SubaashNair
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/tests.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎core.py‎
Lines changed: 72 additions & 0 deletions b/‎core.py‎
Lines changed: 72 additions & 0 deletions
diff --git a/‎optimrl/__init__.py‎
Lines changed: 1 addition & 4 deletions b/‎optimrl/__init__.py‎
Lines changed: 1 addition & 4 deletions
@@ -36,6 +36,7 @@ jobs:
 
     - name: Build Windows Extension
       if: runner.os == 'Windows'
+      shell : cmd
       run: |
         # Optional: ensure the c_src directory exists
         if not exist "optimrl\c_src" mkdir "optimrl\c_src"
 
@@ -0,0 +1,72 @@
+# Removed all C dependencies and library loading.
+# Introducing GRPO class that uses PyTorch's autograd and vectorized operations.
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch import Tensor
+from typing import Any
+
+class GRPO:
+    """
+    A GRPO model that leverages PyTorch for automatic gradient computation and vectorized operations.
+    This replaces the previous C-based implementation.
+    """
+
+    def __init__(self, model: nn.Module, optimizer: optim.Optimizer, epsilon: float = 1e-8) -> None:
+        """
+        Initializes the GRPO instance.
+
+        Args:
+            model (nn.Module): The PyTorch model to be trained.
+            optimizer (optim.Optimizer): The optimizer for training the model.
+            epsilon (float): A small value added for numerical stability.
+        """
+        self.model = model
+        self.optimizer = optimizer
+        self.epsilon = epsilon
+
+    def forward(self, x: Tensor) -> Tensor:
+        """
+        Forward pass through the model.
+
+        Args:
+            x (Tensor): Input tensor.
+
+        Returns:
+            Tensor: The model's output.
+        """
+        return self.model(x)
+
+    def compute_loss(self, predictions: Tensor, targets: Tensor) -> Tensor:
+        """
+        Compute the loss between predictions and targets.
+        Uses mean squared error loss with an epsilon offset for numerical stability.
+
+        Args:
+            predictions (Tensor): The predicted outputs.
+            targets (Tensor): The ground-truth values.
+
+        Returns:
+            Tensor: The computed loss.
+        """
+        loss = torch.mean((predictions - targets) ** 2 + self.epsilon)
+        return loss
+
+    def training_step(self, x: Tensor, targets: Tensor) -> float:
+        """
+        Executes one training step: forward pass, loss computation, backpropagation, and optimizer update.
+
+        Args:
+            x (Tensor): Input batch.
+            targets (Tensor): Target values for the batch.
+
+        Returns:
+            float: The scalar loss value for this step.
+        """
+        self.optimizer.zero_grad()
+        predictions = self.forward(x)
+        loss = self.compute_loss(predictions, targets)
+        loss.backward()
+        self.optimizer.step()
+        return loss.item() 
@@ -1,9 +1,6 @@
 from .core import GRPO
-
-__version__ = "0.1.0"
-__all__ = ["GRPO"]
-from . import _version
 from ._version import get_versions
 
+
 __version__ = _version.get_versions()["version"]
 del get_versions