mnist problems.

Klus3kk · Klus3kk · commit ecc302a25328 · 2025-06-28T19:26:42.000+02:00
diff --git a/examples/basic/mnist.py b/examples/basic/mnist.py
@@ -79,18 +79,19 @@ def train_and_evaluate_mnist():
 
     # Create model: 784 -> 128 -> 64 -> 10
     model = Sequential(
-        Linear(784, 128), ReLU(), Linear(128, 64), ReLU(), Linear(64, 10), Softmax()
+        Linear(784, 128), ReLU(),
+        Linear(128, 64), ReLU(),
+        Linear(64, 10) 
     )
-
-    print("Model architecture:")
-    print("784 (input) -> 128 -> ReLU -> 64 -> ReLU -> 10 -> Softmax (output)")
-
+    
     # Create dataset and dataloader
     train_dataset = Dataset(X_train_subset, y_train_subset)
     train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
 
     # Create loss function and optimizer
     criterion = CrossEntropyLoss()
+    
+    
     optimizer = Adam(model.parameters(), lr=0.001)
 
     print(f"\nStarting training with batch size 32...")
diff --git a/fit/core/autograd.py b/fit/core/autograd.py
@@ -353,8 +353,17 @@ class Mean(Function):
     def apply(ctx: Dict[str, Any], a: np.ndarray, axis=None, keepdims=False) -> np.ndarray:
         ctx["input_shape"] = a.shape
         
-        # Convert 0-d array to None
-        if hasattr(axis, 'ndim') and axis.ndim == 0:
+        # Handle problematic axis values
+        if isinstance(axis, np.ndarray):
+            if axis.ndim == 0:  # 0-d array
+                axis_val = axis.item()  # Extract the scalar value
+                if np.isnan(axis_val):
+                    axis = None
+                else:
+                    axis = int(axis_val)
+            else:
+                axis = None  # Multi-dimensional axis arrays not supported
+        elif axis is not None and np.isnan(axis):
             axis = None
         
         ctx["axis"] = axis
diff --git a/fit/core/tensor.py b/fit/core/tensor.py
@@ -225,9 +225,26 @@ def mean(self, axis=None, keepdims=False):
         Returns:
             A new tensor containing the mean value
         """
-        # Use the Mean function from autograd
-        mean_fn = get_function("mean")
-        return mean_fn.forward(self, axis, keepdims)
+        # Just do it directly with numpy, forget the autograd for now
+        result_data = np.mean(self.data, axis=axis, keepdims=keepdims)
+        result = Tensor(result_data, requires_grad=self.requires_grad)
+        
+        if self.requires_grad:
+            def _backward():
+                if result.grad is not None:
+                    # Gradient of mean is 1/n
+                    if axis is None:
+                        grad = np.full_like(self.data, result.grad / self.data.size)
+                    else:
+                        # Handle axis case
+                        grad = np.full_like(self.data, result.grad / self.data.shape[axis])
+                    
+                    self.grad = grad if self.grad is None else self.grad + grad
+            
+            result._backward = _backward
+            result._prev = {self}
+        
+        return result
 
     def exp(self):
         """