-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcomputeRL_UCB.m
35 lines (31 loc) · 1.93 KB
/
computeRL_UCB.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
function [WSCEst, WSCN] = computeRL_UCB(WSCEst, hJ, RJ, WSCN, angleUAV, ...
A, E, dAB_R, gammaA, gammaJ, c, channelParam, i, alpha, nUAV, precode, typeA)
% Upper-Confidence-Bound Action Selection
if i == 1
ind = randi(length(WSCEst));
else
WSCEst_UCB = WSCEst + c*sqrt( log(i)./WSCN );
maxInds_UCB = find( WSCEst_UCB == max( WSCEst_UCB ) ); % Store multiple maximum values indeces
if(length(maxInds_UCB) > 1) % Check if there are multiple maximum values
ind = maxInds_UCB(randi(length(maxInds_UCB))); % Choose a random greedy action
else
ind = maxInds_UCB; % Choose (single) greedy action with 1-epsilon probability
end
end
% Update virtual UAVs positions
Angle = angleUAV(ind); % Choose the angle corresponding to the chosen action
UAVs = setNewPos_N(nUAV, Angle, hJ, RJ, typeA); % Compute the 3D position of the UAVs for these position values
% Compute Reward (WSC) of the chosen action
if precode==1 % NS Precoder
WSC = computeWSC_ZF_NUAV(A, E, UAVs, dAB_R, gammaA, gammaJ, channelParam )/length(E(:));
elseif precode==3 % No Precoder
WSC = computeWSC_NOP_NUAV(A, E, UAVs, dAB_R, gammaA, gammaJ, channelParam )/length(E(:));
end
% Action-value updates
WSCN(ind) = WSCN(ind)+1; % Update the ocurrences
if alpha >0
WSCEst(ind) = WSCEst(ind) + alpha*(WSC-WSCEst(ind)); % Action value incremental update with fixed step size
else
WSCEst(ind) = WSCEst(ind) + (1/WSCN(ind))*(WSC-WSCEst(ind)); % Action value incremental update
end
end