Skip to content

Commit a6a7060

Browse files
committed
flowmatch: fix backprop ordering in MLP sgd_step
Compute d_h = W2^T * d_out before updating W2, not after. The previous ordering used already-updated weights for the layer 1 gradient, producing slightly wrong updates.
1 parent 25cbed6 commit a6a7060

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

examples/riemannian_fm_poincare.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ fn main() {
144144
// dL/dout = pred - target (gradient of 0.5 * ||pred - target||^2)
145145
let d_out = &out - target;
146146

147+
// -- Backprop through layer 2 (before updating w2) --
148+
// dL/dh = W2^T * d_out (hidden)
149+
let d_h = self.w2.t().dot(&d_out);
150+
147151
// -- Layer 2 gradients --
148152
// dL/dW2 = d_out (dim) outer h (hidden) -> (dim x hidden)
149153
// dL/db2 = d_out
@@ -154,10 +158,6 @@ fn main() {
154158
self.b2[i] -= lr * d_out[i];
155159
}
156160

157-
// -- Backprop through layer 2 --
158-
// dL/dh = W2^T * d_out (hidden)
159-
let d_h = self.w2.t().dot(&d_out);
160-
161161
// -- tanh derivative: dL/dpre = dL/dh * (1 - tanh^2(pre)) = dL/dh * (1 - h^2) --
162162
let d_pre = &d_h * &(1.0 - &h * &h);
163163

0 commit comments

Comments
 (0)