-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcomputeRL_UCB.m
50 lines (38 loc) · 2.2 KB
/
computeRL_UCB.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
function [WSCEst, WSCN] = computeRL_UCB(WSCEst, hJ, RJ, WSCN, angleUAV, ...
A, E, dAB_R, gammaA, gammaJ, c, channelParam, i, alpha, nUAV, precode, typeA)
% Upper-Confidence-Bound Action Selection
if i == 1
ind = randi(length(WSCEst));
else
WSCEst_UCB = WSCEst + c*sqrt( log(i)./WSCN );
maxInds_UCB = find( WSCEst_UCB == max( WSCEst_UCB ) ); % Store multiple maximum values indeces
if(length(maxInds_UCB) > 1) % Check if there are multiple maximum values
ind = maxInds_UCB(randi(length(maxInds_UCB))); % Choose a random greedy action
else
ind = maxInds_UCB; % Choose (single) greedy action with 1-epsilon probability
end
end
% Update virtual UAVs positions
%indAng = getNewAng(WSCEst); % Get greedy positioning for other action sets (all of them temporarily)
indAng = ind;
Angle = angleUAV(indAng);
% [UAV1, UAV2] = setNewPos(Angle, hJ, RJ); % Compute the 3D position of both UAVs for these position values
UAVs = setNewPos_N(nUAV, Angle, hJ, RJ, typeA);
% Compute Reward (WSC) of action ind
if precode==1
WSC = computeWSC_ZF_NUAV(A, E, UAVs, dAB_R, gammaA, gammaJ, channelParam )/length(E(:));
elseif precode==2
WSC = computeWSC_MRT_NUAV(A, E, UAVs, dAB_R, gammaA, gammaJ, channelParam )/length(E(:));
elseif precode==3
WSC = computeWSC_NOP_NUAV(A, E, UAVs, dAB_R, gammaA, gammaJ, channelParam )/length(E(:));
elseif precode==4
WSC = computeWSC_NSMRT_NUAV(A, E, UAVs, dAB_R, gammaA, gammaJ, channelParam )/length(E(:));
end
% Action-value updates
WSCN(ind) = WSCN(ind)+1; % Update the ocurrences
if alpha >0
WSCEst(ind) = WSCEst(ind) + alpha*(WSC-WSCEst(ind)); % Action value incremental update with fixed step size
else
WSCEst(ind) = WSCEst(ind) + (1/WSCN(ind))*(WSC-WSCEst(ind)); % Action value incremental update
end
end