SAME-Project · smejak · May 28, 2022 · May 28, 2022 · May 28, 2022 · May 28, 2022
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@ artifacts/
 __pycache__/
 *.py[cod]
 *$py.class
+**.DS_Store
 
 # C extensions
 *.so

diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,40 @@
+FROM python:3.8
+
+# Basic toolchain                                                                                                                                                                                                     
+RUN apt-get update && apt-get install -y \                                                                                                                                                                            
+        apt-utils \                                                                                                                                                                                                   
+        build-essential \                                                                                                                                                                                             
+        git \                                                                                                                                                                                                         
+        wget \                                                                                                                                                                                                        
+        unzip \                                                                                                                                                                                                       
+        yasm \                                                                                                                                                                                                        
+        pkg-config \                                                                                                                                                                                                  
+        libcurl4-openssl-dev \                                                                                                                                                                                        
+        zlib1g-dev \                                                                                                                                                                                                  
+        htop \                                                                                                                                                                                                        
+        cmake \                                                                                                                                                                                                       
+        vim \                                                                                                                                                                                                         
+        nano \                                                                                                                                                                                                        
+        python3-pip \
+        python3-dev \
+        python3-tk \
+        libx264-dev \
+        gcc \
+        # python-pytest \
+    && cd /usr/local/bin \
+    && pip3 install --upgrade pip \
+    && apt-get autoremove -y
+
+RUN git clone -b develop https://github.com/AlgoveraAI/same-project.git
+
+WORKDIR /same-project
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+RUN pip3 install .
+
+RUN python3.8 -m pip install jupyter
+RUN python3.8 -m pip install nbconvert
+ENV KF_PIPELINES_ENDPOINT_ENV='ml_pipeline.kubeflow.svc.cluster.local:8888'
+
+RUN chmod +x ./ocean.sh
diff --git a/config.yaml b/config.yaml
diff --git a/docs/docs/blog/2022-xx-xx-Decentralized-AI-with-The-SAME-Project.md b/docs/docs/blog/2022-xx-xx-Decentralized-AI-with-The-SAME-Project.md
@@ -0,0 +1,66 @@
+# Developing and training AI models in the decentralized web
+
+## Ocean Protocol and Decentralized AI
+
+The SAME Project allows data scientists to easily turn their Jupyter notebooks into executable scripts that can automatically be sent to any compute pipeline.
+
+Ocean Protocol builds tools for the decentralized data economy, particularly, one of the core features of Ocean Protocol is the ability to train your models on private data, called Compute-to-Data (C2D).
+
+In C2D, the data scientist first searches the Ocean Market for data they want to traain their algorithm on. Once they found a dataset they like, they would buy access to that dataset through Ocean Protocol's data tokens, which act as tickets denoting who can access some dataset and under what conditions. The data scientist must then publish their model on the Ocean Market as well and execute a series of steps to train their algorithm on the dataset on a separate Compute Provider. More details on C2D can be found [here](https://blog.oceanprotocol.com/v2-ocean-compute-to-data-guide-9a3491034b64).
+
+Long-story short, the Ocean C2D is a perfect fit for the SAME Project, allowing data scientists to focus more on their model development rather than learning the ins and outs of Ocean Protocol's libraries.
+
+## SAME-Ocean Template Quickstart
+
+This short guide assumes you've already installed the SAME Project in your local environment, [here](https://sameproject.ml/getting-started/installing/) is a guide to get you started. 
+
+While most of the Ocean deployment code is abstracted away in the SAME-Ocean template, there are some config parameters that you need to fill in to interact with the Ocean Market, in particular, you'll need a [Web3 wallet](https://metamask.io/) and a wallet private key. To ensure security, make sure to never expose your wallet private key anywhere outside your local environment. For running C2D, export your wallet private key as a local environment variable:
+```
+export WALLET_PRIVATE_KEY=='YOUR_PRIVATE_KEY'
+```
+
+When you're ready to run C2D, navigate to your working Jupyter notebook and in your terminal run
+```
+same run -t ocean
+```
+Note that at the end of the command, you'll have to add the options shown below. This is done by adding `--option-name=value`
+### SAME-Ocean Runtime Options
+
+* `algo-verified`: bool - specify whether algorithm was verified by the data provider for C2D
+* `algo-pushed`: bool - specify whether algorithm was published to GitHub (currently required, aimed to be removed)
+* `network`: str - network URL to access Ocean Market on
+* `provider-address`: str - address of compute provider
+* `wallet-private-key`: str - private key for paying transactions in the pipeline
+* `dt-did`: str - Decentralized Identifier of the dataset (found through Ocean Market)
+* `dt-pool`: str - address of the dataset liquidity pool (applicable if dataset has dynamic pricing)
+* `algo-tag`: str - tag to refer to the model as
+* `algo-version`: str - version number of the published model
+* `algo-url`: str - GitHub URL to raw model code
+* `algo-name`: str - name of model
+* `author`: str - model author name
+* `licence`: str - model licence
+* `max-dt-price`: int - max price willing to pay for dataset (in OCEAN)
+
+
+## The SAME Community
+
+SAME is entirely open-source and non-commercial. We plan on donating it to a foundation as soon as we can identify one that matches our project's goals.
+
+What can you do? Please join our community!
+
+### Public web content
+
+* [Website](https://sameproject.ml)
+* [Google Group](https://groups.google.com/u/2/g/same-project)
+* [Slack](https://join.slack.com/t/sameproject/shared_invite/zt-lq9rk2g6-Jyfv3AXu_qnX9LqWCmV7HA)
+
+### Come join our repo
+
+* [GitHub Organization](https://github.com/SAME-Project) / [GitHub Project](https://github.com/SAME-Project/same-project)
+* Try it out (build instructions included)
+* Complain about missing features
+* EXPERTS ONLY: Add your own
+
+Regardless, we are very open to taking your feedback. Thank you so much - onward!
+
+-- The Co-founders of the SAME Project ([David Aronchick](https://twitter.com/aronchick) & [Luke Marsden](https://twitter.com/lmarsden))
diff --git a/ocean.sh b/ocean.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+cd /data/transformations/
+
+mv algorithm hello.ipynb
+
+same init
+
+export KF_PIPELINES_ENDPOINT_ENV='ml_pipeline.kubeflow.svc.cluster.local:8888'
+
+echo KF_PIPELINES_ENDPOINT_ENV
+
+same run
+
+jupyter nbconvert hello.ipynb --to python
+
+python3.8 hello.py
diff --git a/ocean_c2d/render_ocean.py b/ocean_c2d/render_ocean.py
@@ -0,0 +1,210 @@
+from jinja2 import Environment, FileSystemLoader, select_autoescape
+from pathlib import Path
+from typing import Tuple
+from uuid import uuid4
+from base64 import urlsafe_b64encode
+import logging
+import os
+import time
+
+from sameproject.data.step import Step
+from sameproject.ops import helpers
+import sameproject.ops.explode
+
+
+from sameproject.ops.code import get_magic_lines, remove_magic_lines, get_installable_packages
+from sameproject.data.config import SameConfig
+from sameproject.data import Step
+from typing import Tuple, List
+from io import BufferedReader
+from pathlib import Path
+import jupytext
+import logging
+import click
+
+
+def compile(config: SameConfig, target: str) -> Tuple[Path, str]:
+    notebook = read_notebook(config.notebook.path)
+    all_steps = get_steps(notebook, config)
+
+    return render(
+        target=target,
+        steps=all_steps,
+        config=config
+    )
+
+
+def read_notebook(notebook_path) -> dict:
+    logging.info(f"Using notebook from here: {notebook_path}")
+    try:
+        notebook_file_handle = Path(notebook_path)
+        ntbk_dict = jupytext.read(str(notebook_file_handle))
+    except FileNotFoundError:
+        logging.fatal(f"No notebook found at {notebook_path}")
+        exit(1)
+
+    return ntbk_dict
+
+
+def get_steps(notebook: dict, config: SameConfig) -> dict:
+    """Parses the code in a notebook into a series of SAME execution steps."""
+
+    steps = {}
+    all_code = ""
+    code_buffer = []
+    this_step_index = 0
+    this_step_name = "same_step_000"
+    this_step_code = ""
+    this_step_cache_value = "P0D"
+    this_step_environment_name = "default"
+    this_step_tags = []
+
+    def save_step():
+        steps[this_step_name] = Step(
+            name=this_step_name,
+            code=remove_magic_lines(this_step_code),
+            index=this_step_index,
+            cache_value=this_step_cache_value,
+            environment_name=this_step_environment_name,
+            tags=this_step_tags,
+            parameters=[],
+            packages_to_install=[],
+            frozen_box=False,  # TODO: make immutable
+        )
+
+        # Inject pip requirements file if configured:
+        if "requirements" in config.notebook:
+            with open(config.notebook.requirements, "r") as file:
+                steps[this_step_name].requirements_file = file.read()
+
+    for num, cell in enumerate(notebook["cells"]):
+        if "metadata" not in cell:  # sanity check
+            continue
+
+        if len(cell["metadata"]) > 0 and "tags" in cell["metadata"] and len(cell["metadata"]["tags"]) > 0:
+            for tag in cell["metadata"]["tags"]:
+                if tag.startswith("same_step_"):
+                    if num > 0:  # don't create empty step
+                        this_step_code = "\n".join(code_buffer)
+                        all_code += "\n" + this_step_code
+                        save_step()
+
+                        code_buffer = []
+                        step_tag_num = int(tag.split("same_step_")[1])
+                        this_step_index = step_tag_num
+                        this_step_name = f"same_step_{step_tag_num:03}"
+                        this_step_code = ""
+                        this_step_cache_value = "P0D"
+                        this_step_environment_name = "default"
+                        this_step_tags = []
+
+                elif str.startswith(tag, "cache="):
+                    this_step_cache_value = str.split(tag, "=")[1]
+                elif str.startswith(tag, "environment="):
+                    this_step_environment_name = str.split(tag, "=")[1]
+                else:
+                    this_step_tags.append(tag)
+
+        if cell["cell_type"] == "code":  # might be a markdown cell
+            code_buffer.append("\n".join(jupytext.cell_to_text.LightScriptCellExporter(cell, "py").source))
+
+    this_step_code = "\n".join(code_buffer)
+    all_code += "\n" + this_step_code
+    save_step()
+
+    magic_lines = get_magic_lines(all_code)
+    if len(magic_lines) > 0:
+        magic_lines_string = "\n".join(magic_lines)
+        logging.warning(f"""Notebook contains magic lines, which will be ignored:\n{magic_lines_string}""")
+
+        # Remove magic lines from code so that we can continue:
+        all_code = remove_magic_lines(all_code)
+
+    for k in steps:
+        steps[k].packages_to_install = get_installable_packages(all_code)
+
+    return steps
+
+
+def get_sorted_list_of_steps(notebook: dict, config: SameConfig) -> list:
+    """
+    Given a notebook (as a dict), get a list of Step objects, sorted by their
+    index in the notebook.
+    """
+    steps_dict = get_steps(notebook, config)
+    steps = list(steps_dict.values())
+    steps_sorted_by_index = sorted(steps, key=lambda x: x.index)
+    return steps_sorted_by_index
+
+
+def get_code(notebook: dict) -> str:
+    """Combines and returns all python code in the given notebook."""
+    if "cells" not in notebook:
+        return ""
+
+    code = []
+    for cell in notebook["cells"]:
+        if cell["cell_type"] != "code":
+            continue
+
+        code.append("\n".join(
+            jupytext.cell_to_text.LightScriptCellExporter(cell, "py").source
+        ))
+
+    return "\n".join(code)
+
+
+ocean_step_template = "step.jinja"
+
+
+def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str]:
+    """Renders the notebook into a root file and a series of step files according to the target requirements. Returns an absolute path to the root file for deployment."""
+
+    templateDir = os.path.dirname(os.path.abspath(__file__))
+    templateLoader = FileSystemLoader(templateDir)
+    env = Environment(trim_blocks=True, loader=templateLoader)
+
+    root_file_string = _build_step_file(env, next(iter(steps.values())), same_config)
+    root_pipeline_name = f"root_pipeline_{uuid4().hex.lower()}"
+    root_path = Path(compile_path) / f"{root_pipeline_name}.py"
+    helpers.write_file(root_path, root_file_string)
+
+    # for storing in the docker image
+    docker_path = same_config['notebook']['path'][:-5] + 'py'
+    helpers.write_file(docker_path, root_file_string)
+    os.remove(same_config['notebook']['path'])
+    return (compile_path, root_file_string) # note: root_file_string replaced root_pipeline_name
+
+def _build_step_file(env: Environment, step: Step, same_config) -> str:
+    with open(sameproject.ops.explode.__file__, "r") as f:
+        explode_code = f.read()
+
+    requirements_file = None
+    if "requirements_file" in step:
+        requirements_file = urlsafe_b64encode(bytes(step.requirements_file, "utf-8")).decode()
+
+    memory_limit = same_config.runtime_options.get(
+        "serialisation_memory_limit",
+        512 * 1024 * 1024,  # 512MB
+    )
+
+    same_env = same_config.runtime_options.get(
+        "same_env",
+        "default",
+    )
+
+    step_contract = {
+        "name": step.name,
+        "same_env": same_env,
+        "memory_limit": memory_limit,
+        "unique_name": step.unique_name,
+        "requirements_file": requirements_file,
+        "user_code": step.code,
+        "explode_code": urlsafe_b64encode(bytes(explode_code, "utf-8")).decode(),
+        "same_yaml": urlsafe_b64encode(bytes(same_config.to_yaml(), "utf-8")).decode(),
+    }
+
+    return env.get_template(ocean_step_template).render(step_contract)
+
+if __name__ == "__main__":
+    compile("same.yaml", os.environ("AlGO"))
diff --git a/ocean_c2d/requirements.txt b/ocean_c2d/requirements.txt
@@ -0,0 +1,2 @@
+# Dependencies for /Users/jakub/Development/Algovera/Core/same-project/demo/test.ipynb:
+
diff --git a/ocean_c2d/same.yaml b/ocean_c2d/same.yaml
@@ -0,0 +1,14 @@
+apiVersion: sameproject.ml/v1alpha1
+environments:
+  default:
+    image_tag: combinatorml/jupyterlab-tensorflow-opencv:0.9
+metadata:
+  labels: []
+  name: default_config
+  version: 0.0.0
+notebook:
+  name: test
+  path: /data/transformation/notebook.ipynb
+  requirements: /same-project/ocean_c2d/requirements.txt
+run:
+  name: default_config run
diff --git a/sameproject/__init__.py b/sameproject/__init__.py
@@ -20,3 +20,4 @@
 import sameproject.ops.aml.options
 import sameproject.ops.functions.options
 import sameproject.ops.kubeflow.options
+import sameproject.ops.ocean.options
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Dependencies for /Users/jakub/Development/Algovera/Core/same-project/demo/test.ipynb: