You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: HRM/hrm.py
+1-21Lines changed: 1 addition & 21 deletions
Original file line number
Diff line number
Diff line change
@@ -66,10 +66,6 @@ def __init__(
66
66
num_tokens,
67
67
reasoning_steps=2, # N in the paper - the number of forward evals for the last network (highest hierarchy) above
68
68
relative_period: int|tuple[int, ...] =2, # the relative period for each network evaluation call to the one just previous - in the paper, they do 2 networks with a period of 2
69
-
min_reasoning_steps_epsilon_prob=0.5, # they stochastically choose the minimum segment from 2 .. max with this probability, and 1 step the rest of the time
70
-
max_reasoning_steps=10,
71
-
act_loss_weight=1.,
72
-
discount_factor=1.,
73
69
ignore_index=-1,
74
70
):
75
71
super().__init__()
@@ -106,7 +102,7 @@ def __init__(
106
102
iflen(relative_period) == (self.num_networks-1):
107
103
relative_period= (1, *relative_period)
108
104
109
-
# for the paper, they did (low: 1, high: 2) -
105
+
# for the paper, they did (low: 1, high: 2) - read as low evaluated every step, high evaluated every 2 steps
0 commit comments