diff --git a/.gitignore b/.gitignore index 87620ac7..3e4db9d6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ .ipynb_checkpoints/ +__pycache__/ +test/*.json diff --git a/micrograd/engine.py b/micrograd/engine.py index afd82cc5..bfd731ba 100644 --- a/micrograd/engine.py +++ b/micrograd/engine.py @@ -1,67 +1,74 @@ - class Value: - """ stores a single scalar value and its gradient """ + """stores a single scalar value and its gradient""" - def __init__(self, data, _children=(), _op=''): + def __init__(self, data, _children=(), _op=""): self.data = data self.grad = 0 # internal variables used for autograd graph construction self._backward = lambda: None self._prev = set(_children) - self._op = _op # the op that produced this node, for graphviz / debugging / etc + self._op = _op # the op that produced this node, for graphviz / debugging / etc def __add__(self, other): other = other if isinstance(other, Value) else Value(other) - out = Value(self.data + other.data, (self, other), '+') + out = Value(self.data + other.data, (self, other), "+") def _backward(): self.grad += out.grad other.grad += out.grad + out._backward = _backward return out def __mul__(self, other): other = other if isinstance(other, Value) else Value(other) - out = Value(self.data * other.data, (self, other), '*') + out = Value(self.data * other.data, (self, other), "*") def _backward(): self.grad += other.data * out.grad other.grad += self.data * out.grad + out._backward = _backward return out def __pow__(self, other): - assert isinstance(other, (int, float)), "only supporting int/float powers for now" - out = Value(self.data**other, (self,), f'**{other}') + assert isinstance(other, (int, float)), ( + "only supporting int/float powers for now" + ) + # change _op to simplify loading of the model Value.data_(data) + out = Value(self.data**other, (self,), "**") def _backward(): - self.grad += (other * self.data**(other-1)) * out.grad + self.grad += (other * self.data ** (other - 1)) * out.grad + out._backward = _backward return out def relu(self): - out = Value(0 if self.data < 0 else self.data, (self,), 'ReLU') + out = Value(0 if self.data < 0 else self.data, (self,), "ReLU") def _backward(): self.grad += (out.data > 0) * out.grad + out._backward = _backward return out def backward(self): - # topological order all of the children in the graph topo = [] visited = set() + def build_topo(v): if v not in visited: visited.add(v) for child in v._prev: build_topo(child) topo.append(v) + build_topo(self) # go one variable at a time and apply the chain rule to get its gradient @@ -69,26 +76,92 @@ def build_topo(v): for v in reversed(topo): v._backward() - def __neg__(self): # -self + def __neg__(self): # -self return self * -1 - def __radd__(self, other): # other + self + def __radd__(self, other): # other + self return self + other - def __sub__(self, other): # self - other + def __sub__(self, other): # self - other return self + (-other) - def __rsub__(self, other): # other - self + def __rsub__(self, other): # other - self return other + (-self) - def __rmul__(self, other): # other * self + def __rmul__(self, other): # other * self return self * other - def __truediv__(self, other): # self / other + def __truediv__(self, other): # self / other return self * other**-1 - def __rtruediv__(self, other): # other / self + def __rtruediv__(self, other): # other / self return other * self**-1 def __repr__(self): return f"Value(data={self.data}, grad={self.grad})" + + # SAVE: base object structure as json + def _data(self): + """ + Return a dict representing this Value and its computation graph. + """ + return { + "d": self.data, + "o": self._op, + "c": [child._data() for child in self._prev], + "g": self.grad, + } + + @staticmethod + def data_(data): + """ + Create a Value from a saved dictionary (as from _data). + """ + inst = Value(data["d"], [], data["o"]) + inst.grad = data["g"] + if not data["c"]: + return inst + inst._prev = [] + for cd in data["c"]: + inst._prev.append(Value.data_(cd)) + + # recreating _backward lambada based on operator and related values + backward = lambda: None + match data["o"]: + case "*": + + def back(): + inst._prev[0].grad += inst._prev[1].data * inst.grad + inst._prev[1].grad += inst._prev[0].data * inst.grad + + backward = back + case "+": + + def back(): + inst._prev[0].grad += 1.0 * inst.grad + inst._prev[1].grad += 1.0 * inst.grad + + backward = back + case "**": + + def back(): + inst._prev[0].grad += ( + inst.data * inst._prev[0].data ** (inst.data - 1) + ) * inst.grad + + backward = back + case "tanh": + + def back(): + inst._prev[0].grad += (1 - (inst.data) ** 2) * inst.grad + + backward = back + case "ReLU": + + def back(): + inst._prev[0].grad += (inst.data > 0) * inst.grad + + backward = back + + inst._backward = backward + return inst diff --git a/micrograd/nn.py b/micrograd/nn.py index 30d5d777..88ea98b3 100644 --- a/micrograd/nn.py +++ b/micrograd/nn.py @@ -1,8 +1,11 @@ +import json +import os import random + from micrograd.engine import Value -class Module: +class Module: def zero_grad(self): for p in self.parameters(): p.grad = 0 @@ -10,25 +13,42 @@ def zero_grad(self): def parameters(self): return [] -class Neuron(Module): +class Neuron(Module): def __init__(self, nin, nonlin=True): - self.w = [Value(random.uniform(-1,1)) for _ in range(nin)] + self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)] self.b = Value(0) self.nonlin = nonlin def __call__(self, x): - act = sum((wi*xi for wi,xi in zip(self.w, x)), self.b) + act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b) return act.relu() if self.nonlin else act def parameters(self): return self.w + [self.b] + # SAVE neuron data + def _data(self): + return { + "b": self.b._data(), + "il": self.nonlin, + "w": [cw._data() for cw in self.w], + } + + # LOAD neuron from saved data + @staticmethod + def data_(data): + inst = Neuron(0, nonlin=data["il"]) + inst.w = [Value.data_(rw) for rw in data["w"]] + inst.b = Value.data_(data["b"]) + + return inst + def __repr__(self): return f"{'ReLU' if self.nonlin else 'Linear'}Neuron({len(self.w)})" -class Layer(Module): +class Layer(Module): def __init__(self, nin, nout, **kwargs): self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)] @@ -39,14 +59,29 @@ def __call__(self, x): def parameters(self): return [p for n in self.neurons for p in n.parameters()] + # SAVE layer + def _data(self): + return {"ns": [n._data() for n in self.neurons]} + + # LOAD layer + @staticmethod + def data_(data): + inst = Layer(0, 0) + inst.neurons = [Neuron.data_(nr) for nr in data["ns"]] + + return inst + def __repr__(self): return f"Layer of [{', '.join(str(n) for n in self.neurons)}]" -class MLP(Module): +class MLP(Module): def __init__(self, nin, nouts): sz = [nin] + nouts - self.layers = [Layer(sz[i], sz[i+1], nonlin=i!=len(nouts)-1) for i in range(len(nouts))] + self.layers = [ + Layer(sz[i], sz[i + 1], nonlin=i != len(nouts) - 1) + for i in range(len(nouts)) + ] def __call__(self, x): for layer in self.layers: @@ -58,3 +93,49 @@ def parameters(self): def __repr__(self): return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]" + + # optional to automate training + def fit(self, X, y, epochs=1000, lr=0.001): + for epoch in range(epochs): + # Forward pass + out = [self(x) for x in X] # Each `mp(x)` should return a `Value` + + loss = sum((ya - yp) ** 2 for yp, ya in zip(out, y)) / len(y) + + loss.backward() # This works now because `loss` is a `Value` + + for param in self.parameters(): + param.data -= lr * param.grad # Gradient descent update + param.grad = 0 # Reset gradients for next iteration + + if epoch % 100 == 0: + print(f"Epoch: {epoch}, Loss: {loss.data}") + + # SAVE mlp + def _data(self): + return {"ls": [ly._data() for ly in self.layers]} + + # LOAD mlp + @staticmethod + def data_(data): + inst = MLP(0, []) + inst.layers = [Layer.data_(ly) for ly in data["ls"]] + return inst + + # Interface point for saving model + def save(self, fp: str): + if fp == "" or fp is None: + raise ValueError("File path is not specified") + + with open(fp, "w", encoding="utf-8") as f: + json.dump(self._data(), f, indent=2) + print("SUCCESS! Model saved!") + + # Inteface point for loading model + @staticmethod + def load(fp: str): + if not os.path.exists(fp): + raise FileNotFoundError + with open(fp, "r", encoding="utf-8") as f: + contents = json.load(f) + return MLP.data_(contents) diff --git a/test/test_saving.py b/test/test_saving.py new file mode 100644 index 00000000..307d2632 --- /dev/null +++ b/test/test_saving.py @@ -0,0 +1,87 @@ +# Testing saving of the model +# and loading + +import json +import os +import sys + +# to be able to import base dir modules +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + +from micrograd import nn +from micrograd.engine import Value + + +# Debug: print tree like structure from parent to child +# for small debugging +def log_root(root, level=0, offset=5): + indent = " " * level * offset + print(f"{indent}|__" if level else "", root) + + if root._prev: + for child in root._prev: + log_root(child, level + 1) + + +def test_val(): + a = Value(-4.0) + b = Value(2.0) + c = a + b + d = a * b + b**3 + c += c + 1 + c += 1 + c + (-a) + d += d * 2 + (b + a).relu() + d += 3 * d + (b - a).relu() + e = c - d + f = e**2 + g = f / 2.0 + g += 10.0 / f + + log_root(g) + + +def save_neuron(): + n = nn.Neuron(2) + x = [Value(1.0), Value(-2.0)] + y = n(x) + + y.backward() + with open("test.json", "w", encoding="utf-8") as f: + json.dump(n._data(), f, indent=2) + print(n) + + +def load_neuron(): + with open("test.json", "r", encoding="utf-8") as f: + data = json.load(f) + + n = nn.Neuron.data_(data) + return n + + +if __name__ == "__main__": + FILENAME = "test/test.json" + epochs = 1000 + lr = 0.01 + + X = [ + [2.4, 5.0, 7.0, 8.8], + [4.1, 1.1, 5.7, 7, 8], + [2.1, 1.7, 2.7, 1, 8], + [1.8, 0.1, 2.3, 7, 8], + ] + y = [1, 4, 2, 4] + + # re improve existing model + if not os.path.isfile(FILENAME): + nn = nn.MLP(4, [4, 4, 1]) + print(f"Initial:{[nn(x).data for x in X]}") + nn.fit(X, y, lr=lr, epochs=epochs) + nn.save(FILENAME) + print(f"After:{[nn(x).data for x in X]}") + print(f"Actual:{y}") + else: + nn = nn.MLP.load(FILENAME) + nn.fit(X, y, lr=lr, epochs=epochs) + print(f"Loaded:{[nn(x).data for x in X]}") + print(f"Actual:{y}")