-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrunMultipleAgents.m
More file actions
113 lines (91 loc) · 5.82 KB
/
Copy pathrunMultipleAgents.m
File metadata and controls
113 lines (91 loc) · 5.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
function multiAgentResults = runMultipleAgents(nAgents,agent,trial)
% RUNMULTIPLEAGENTS Simulates an agent that learns to intercept a target.
%
% multiAgentResults = RUNMULTIPLEAGENTS(nAgents,agent,trial) runs
% multiple simulated agents in parallel, and stores the results in the
% output structure 'multiAgentResults'.
%---------------------- extract agent structures -------------------------%
belief = agent.belief;
%------------------------- initialize arrays ----------------------------%
[rewardRate,nAnchors,obstHits,boundary,...
rewardProb,outcomeSurprise,resetFlag,cacheFlag,...
targetPosteriorEntropy,contextPosteriorEntropy,...
initAnchorLocs,finalAnchorLocs,distance,latency,...
contextPosterior,sampledContext,estimatedContext ] = deal([]);
nTrials = trial.nTrials;
parfor i=1:nAgents
disp(['running agent ',num2str(i)]);
singleAgentResults = runSingleAgent(agent,trial);
contextPosteriorTemp = nan(belief.cacheSize,nTrials);
np = size(singleAgentResults.belief.context.posteriors,2);
contextPosteriorTemp(:,1:np) = singleAgentResults.belief.context.posteriors;
% store trajectory properties
rewardRate = [rewardRate, singleAgentResults.trajectory.rewards ];
nAnchors = [nAnchors, singleAgentResults.trajectory.augmented.nAnchors ];
obstHits = [obstHits, singleAgentResults.trajectory.obstacleHits ];
boundary = [boundary, singleAgentResults.trajectory.boundaryFlag ];
distance = [distance, singleAgentResults.trajectory.executed.distance ];
latency = [latency, singleAgentResults.trajectory.executed.latency ];
% store belief properties
rewardProb = [rewardProb, singleAgentResults.belief.target.rewardProb ];
outcomeSurprise = [outcomeSurprise, singleAgentResults.belief.target.outcomeSurprise ];
resetFlag = [resetFlag, singleAgentResults.belief.target.resetFlag ];
cacheFlag = [cacheFlag, singleAgentResults.belief.target.cacheFlag ];
targetPosteriorEntropy = [targetPosteriorEntropy, singleAgentResults.belief.target.posteriorEntropy ];
contextPosteriorEntropy = [contextPosteriorEntropy,singleAgentResults.belief.context.posteriorEntropy];
contextPosterior = [contextPosterior, contextPosteriorTemp ];
estimatedContext = [estimatedContext, singleAgentResults.belief.context.toWrite ];
sampledContext = [sampledContext, singleAgentResults.belief.context.toRead ];
% extract and store anchor properties
[xx0,yy0] = pol2cart(singleAgentResults.trajectory.executed.path{ 1 }.anchors.thCoords,singleAgentResults.trajectory.executed.path{ 1 }.anchors.rCoords);
[xxF,yyF] = pol2cart(singleAgentResults.trajectory.executed.path{nTrials}.anchors.thCoords,singleAgentResults.trajectory.executed.path{nTrials}.anchors.rCoords);
n0 = singleAgentResults.trajectory.executed.path{ 1 }.anchors.N;
nF = singleAgentResults.trajectory.executed.path{nTrials}.anchors.N;
aug0 = singleAgentResults.trajectory.executed.path{ 1 }.anchors.augmented;
augF = singleAgentResults.trajectory.executed.path{nTrials}.anchors.augmented;
initAnchorLocs = [initAnchorLocs, [xx0(2:end-1);yy0(2:end-1);repmat(n0-2,[1,n0-2]);repmat(i,[1,n0-2]) ;aug0(2:end-1)] ];
if ~singleAgentResults.trajectory.executed.path{nTrials}.boundaryFlag
finalAnchorLocs = [finalAnchorLocs, [xxF(2:end-1);yyF(2:end-1);repmat(nF-2,[1,nF-2]);repmat(i,[1,nF-2]);...
repmat(mean(singleAgentResults.trajectory.rewards(ceil(nTrials/2)+1:nTrials)),[1,nF-2]);augF(2:end-1)] ];
end
end
% reformat posteriors
p = contextPosterior;
reshapedPosterior = [];
for i=1:nAgents
reshapedPosterior = cat(3,reshapedPosterior,p(:,1:nTrials));
p(:,1:nTrials) = [];
end
multiAgentResults.nAgents = nAgents;
multiAgentResults.trialProtocol = trial;
% store trajectory properties
multiAgentResults.trajectory.rewardRate = rewardRate;
multiAgentResults.trajectory.nAnchors = nAnchors;
multiAgentResults.trajectory.distance = distance;
multiAgentResults.trajectory.latency = latency;
multiAgentResults.trajectory.obstacleHits = obstHits;
multiAgentResults.trajectory.boundaryRuns = boundary;
% store anchor properties
multiAgentResults.trajectory.initialAnchors.xCoords = initAnchorLocs(1,:);
multiAgentResults.trajectory.initialAnchors.yCoords = initAnchorLocs(2,:);
multiAgentResults.trajectory.initialAnchors.N = initAnchorLocs(3,:);
multiAgentResults.trajectory.initialAnchors.agentIndex = initAnchorLocs(4,:);
multiAgentResults.trajectory.initialAnchors.augmented = initAnchorLocs(5,:);
multiAgentResults.trajectory.finalAnchors.xCoords = finalAnchorLocs(1,:);
multiAgentResults.trajectory.finalAnchors.yCoords = finalAnchorLocs(2,:);
multiAgentResults.trajectory.finalAnchors.N = finalAnchorLocs(3,:);
multiAgentResults.trajectory.finalAnchors.agentIndex = finalAnchorLocs(4,:);
multiAgentResults.trajectory.finalAnchors.avgReward = finalAnchorLocs(5,:);
multiAgentResults.trajectory.finalAnchors.augmented = finalAnchorLocs(6,:);
% store belief properties
multiAgentResults.belief.target.rewardProb = rewardProb;
multiAgentResults.belief.target.outcomeSurprise = outcomeSurprise;
multiAgentResults.belief.target.resetFlag = resetFlag;
multiAgentResults.belief.target.cacheFlag = cacheFlag;
multiAgentResults.belief.target.posteriorEntropy = targetPosteriorEntropy;
multiAgentResults.belief.context.posteriors = reshapedPosterior;
multiAgentResults.belief.context.estimated = estimatedContext;
multiAgentResults.belief.context.sampled = sampledContext;
multiAgentResults.belief.context.posteriorEntropy = contextPosteriorEntropy;
% store entropy of flat prior, for plotting
multiAgentResults.belief.target.posteriorEntropyFlat = belief.baseEntropy;