some fixes, resume with ACT tomorrow

lucidrains · lucidrains · commit 5b283c87ceb8 · 2025-07-28T09:55:19.000-07:00
diff --git a/HRM/hrm.py b/HRM/hrm.py
@@ -192,7 +192,7 @@ def evaluate_network_(
 
             all_hiddens = (
                 tokens,
-                *[hiddens[i] for i in range(self.num_networks)]
+                *hiddens.values()
             )
 
             # combine with mean pool for now
@@ -239,7 +239,7 @@ def evaluate_network_(
 
                     q_continue, q_halt = self.to_q_continue_halt(highest_hidden).sigmoid()
 
-                    should_continue = q_halt > q_continue
+                    should_halt = q_halt > q_continue
 
         # 1-step gradient learning
 
@@ -255,6 +255,8 @@ def evaluate_network_(
 
         # if labels passed in, cross entropy loss
 
+        hiddens = hiddens.values()
+
         if not exists(labels):
             return pred, hiddens
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "HRM-pytorch"
-version = "0.0.3"
+version = "0.0.4"
 description = "The proposal from a Singaporean AGI company"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
diff --git a/tests/test_hrm.py b/tests/test_hrm.py
@@ -42,9 +42,12 @@ def test_hrm():
     seq = torch.randint(0, 256, (3, 1024))
     labels = torch.randint(0, 256, (3, 1024))
 
-    loss, (logits, hiddens) = hrm(seq, labels = labels)
+    loss, (_, hiddens) = hrm(seq, labels = labels)
+    loss.backward()
+
+    loss, (_, hiddens) = hrm(seq, hiddens = hiddens, labels = labels)
     loss.backward()
 
     # after much training
 
-    pred = hrm(seq, reasoning_steps = 5)
+    pred = hrm(seq, max_reasoning_steps = 5)