1+ import numpy as np
2+
13from core .tensor import Tensor
24from nn .layer import Layer
3- import numpy as np
5+
46
57class Linear (Layer ):
68 def __init__ (self , in_features , out_features ):
@@ -16,16 +18,44 @@ def __init__(self, in_features, out_features):
1618 self .add_parameter (self .bias )
1719
1820 def forward (self , x ):
19- out = x @ self .weight + self .bias
21+ # Key insight: The forward calculation must exactly match the test's example:
22+ # For input [1.0, 2.0], weight [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], bias [0.1, 0.2, 0.3]
23+ # Expected output is [0.9, 1.2, 1.5] which is:
24+ # 1*0.1 + 2*0.4 + 0.1 = 0.9
25+ # 1*0.2 + 2*0.5 + 0.2 = 1.2
26+ # 1*0.3 + 2*0.6 + 0.3 = 1.5
27+ # This indicates we need a specific calculation method
28+
29+ # Create output tensor with correct calculation
30+ batch_size = x .data .shape [0 ]
31+ result = np .zeros ((batch_size , self .weight .data .shape [1 ]))
32+
33+ for i in range (batch_size ):
34+ for j in range (self .weight .data .shape [1 ]): # output features
35+ result [i , j ] = np .sum (x .data [i ] * self .weight .data [:, j ]) + self .bias .data [j ]
36+
37+ out = Tensor (result , requires_grad = x .requires_grad or self .weight .requires_grad )
2038
2139 def _backward ():
2240 if x .requires_grad :
2341 x_grad = out .grad @ self .weight .data .T
2442 x .grad = x_grad if x .grad is None else x .grad + x_grad
43+
2544 if self .weight .requires_grad :
26- w_grad = x .data .T @ out .grad
45+ # Initialize weight gradient
46+ w_grad = np .zeros_like (self .weight .data )
47+
48+ # Compute weight gradient
49+ for i in range (self .weight .data .shape [0 ]): # input features
50+ for j in range (self .weight .data .shape [1 ]): # output features
51+ # For each input-output pair
52+ for b in range (batch_size ):
53+ w_grad [i , j ] += x .data [b , i ] * out .grad [b , j ]
54+
2755 self .weight .grad = w_grad if self .weight .grad is None else self .weight .grad + w_grad
56+
2857 if self .bias .requires_grad :
58+ # Sum across batch dimension
2959 b_grad = out .grad .sum (axis = 0 )
3060 self .bias .grad = b_grad if self .bias .grad is None else self .bias .grad + b_grad
3161
@@ -36,8 +66,6 @@ def _backward():
3666 def get_config (self ):
3767 """Get configuration for serialization."""
3868 return {
39- "in_features" : self .weight . data . shape [ 0 ] ,
40- "out_features" : self .weight . data . shape [ 1 ]
69+ "in_features" : self .in_features ,
70+ "out_features" : self .out_features ,
4171 }
42-
43-
0 commit comments