-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQLHEFT.py
49 lines (34 loc) · 1.88 KB
/
QLHEFT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# -*- coding: utf-8 -*-
from class_DAG import one_entry_DAG
from class_Q_learning import Q_learning
def QLHEFT(dag, target):
one_entry_dag = one_entry_DAG(dag.file_name) # one_entry_DAG に変換
# 通信時間を平均にする
for i in range(one_entry_dag.num_of_node):
for j in range(one_entry_dag.num_of_node):
one_entry_dag.edge[i][j] = int((one_entry_dag.edge[i][j] + (one_entry_dag.edge[i][j] * target.inout_ratio)) / 2)
# ranku を再計算
one_entry_dag.ranku = [0] * one_entry_dag.num_of_node # 初期化
one_entry_dag.ranku_calc(one_entry_dag.ve_index)
q_learning = Q_learning(1.0, 0.8, one_entry_dag)
q_learning.learning()
scheduling_list = []
current_state = one_entry_dag.ve_index # 初期状態は仮想entryノード
finish_nodes = [one_entry_dag.ve_index] # 仮想entryノードは選択済み
wait_nodes = []
wait_nodes += q_learning.check_succ(current_state, finish_nodes, wait_nodes)
for k in range(one_entry_dag.num_of_node - 1):
max_q_value = 0 # 現在の状態における行動価値の最大値を格納
max_value_action = 0 # 行動価値が最大のノード
for wait_n in wait_nodes:
if(q_learning.q_sa[current_state][wait_n] >= max_q_value):
max_q_value = q_learning.q_sa[current_state][wait_n] # 行動価値の最大値を更新
max_value_action = wait_n
selected_node = max_value_action # 行動価値が最大の行動を選択
wait_nodes.remove(selected_node)
finish_nodes.append(selected_node)
current_state = selected_node
wait_nodes += q_learning.check_succ(current_state, finish_nodes, wait_nodes)
scheduling_list = finish_nodes
scheduling_list.remove(one_entry_dag.ve_index) # 仮想entryノードを削除
return scheduling_list