From 4c2c1d8f2193563fd2ca449ee1a93a709d440a40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sat, 28 May 2022 19:35:33 +0200 Subject: [PATCH 01/99] Initialized ocean template --- .gitignore | 1 + sameproject/ops/ocean/__init__.py | 2 ++ sameproject/ops/ocean/deploy.py | 0 sameproject/ops/ocean/render.py | 0 4 files changed, 3 insertions(+) create mode 100644 sameproject/ops/ocean/__init__.py create mode 100644 sameproject/ops/ocean/deploy.py create mode 100644 sameproject/ops/ocean/render.py diff --git a/.gitignore b/.gitignore index e9395bc2..f3f3a677 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ artifacts/ __pycache__/ *.py[cod] *$py.class +**.DS_Store # C extensions *.so diff --git a/sameproject/ops/ocean/__init__.py b/sameproject/ops/ocean/__init__.py new file mode 100644 index 00000000..3cd4721c --- /dev/null +++ b/sameproject/ops/ocean/__init__.py @@ -0,0 +1,2 @@ +from .render import render +from .deploy import deploy \ No newline at end of file diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py new file mode 100644 index 00000000..e69de29b diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py new file mode 100644 index 00000000..e69de29b From 3f9b2d157bc2c4b998505eba362ee71684b572db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sat, 28 May 2022 20:01:06 +0200 Subject: [PATCH 02/99] init jinja files --- sameproject/ops/ocean/deploy.py | 9 ++++ sameproject/ops/ocean/root.jinja | 78 ++++++++++++++++++++++++++++++++ sameproject/ops/ocean/step.jinja | 0 3 files changed, 87 insertions(+) create mode 100644 sameproject/ops/ocean/root.jinja create mode 100644 sameproject/ops/ocean/step.jinja diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index e69de29b..b00ad180 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -0,0 +1,9 @@ +from sameproject.data.config import SameConfig +from sameproject.ops import helpers +import importlib + + +def deploy(base_path: str, root_name: str, config: SameConfig): + with helpers.add_path(str(base_path)): + root_module = importlib.import_module(root_name) + root_module.root() diff --git a/sameproject/ops/ocean/root.jinja b/sameproject/ops/ocean/root.jinja new file mode 100644 index 00000000..cacb4f7a --- /dev/null +++ b/sameproject/ops/ocean/root.jinja @@ -0,0 +1,78 @@ +import matplotlib.pyplot as plt +import numpy as np +import os +from pathlib import Path +import pickle +import random +import sys + +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.optim as optim +import torch.utils.data +import torchvision.datasets as dset +import torchvision.transforms as transforms +import torchvision.utils as vutils + +def test_dcgan(local=False): + + results_dir = Path('results') + + if not results_dir.exists(): + results_dir.mkdir() + + weights_path = Path("netG.pth") + + if not weights_path.exists(): + os.system("wget https://www.dropbox.com/s/p3pjgmpiki7w0ur/netG.pth") + + nc = 3 + nz = 100 + ngf = 64 + ngpu = 1 + + # Decide which device we want to run on + device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu") + + # Network + network = {% network %} + + # Create the generator + netG = network(ngpu).to(device) + + # Handle multi-gpu if desired + if (device.type == 'cuda') and (ngpu > 1): + netG = nn.DataParallel(netG, list(range(ngpu))) + + netG.load_state_dict(torch.load(weights_path, map_location=torch.device('cpu'))) + + netG.eval() + + # Create batch of latent vectors that we will use to visualize + # the progression of the generator + fixed_noise = torch.randn(1, nz, 1, 1, device=device) + + with torch.no_grad(): + img = netG(fixed_noise).detach().cpu() + + def denormalize_image(image): + """Reverse to normalize_image() function""" + max_ = image.max() + min_ = image.min() + return (image - min_)/(max_ - min_) + + img = denormalize_image(img) + + img = img.squeeze().permute(1,2,0) + + filename = results_dir / 'test.pickle' if local else "/data/outputs/result" + + with open(filename, 'wb') as pickle_file: + print(f"Pickling results in {filename}") + pickle.dump(img, pickle_file) + +if __name__ == "__main__": + local = (len(sys.argv) == 2 and sys.argv[1] == "local") + test_dcgan(local) \ No newline at end of file diff --git a/sameproject/ops/ocean/step.jinja b/sameproject/ops/ocean/step.jinja new file mode 100644 index 00000000..e69de29b From 0278b1486bbb2501aebc6fe164c50b48c706b49f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sat, 28 May 2022 20:31:22 +0200 Subject: [PATCH 03/99] WIP: added ocean option to run command --- sameproject/cli/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sameproject/cli/run.py b/sameproject/cli/run.py index 3e6c72ce..d1ea228e 100644 --- a/sameproject/cli/run.py +++ b/sameproject/cli/run.py @@ -26,7 +26,7 @@ "-t", "--target", default="kubeflow", - type=click.Choice(["aml", "kubeflow", "functions"]), + type=click.Choice(["aml", "kubeflow", "functions", "ocean"]), ) @click.option( "--persist-temp-files", From 4e90dae71fcaa1c6bfa268069c0c37fad8e83ab9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sat, 28 May 2022 20:40:27 +0200 Subject: [PATCH 04/99] WIP: removed deploy & aml parts of render --- sameproject/ops/ocean/deploy.py | 7 +- sameproject/ops/ocean/render.py | 196 ++++++++++++++++++++++++++++++++ 2 files changed, 200 insertions(+), 3 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index b00ad180..ceba42fc 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -4,6 +4,7 @@ def deploy(base_path: str, root_name: str, config: SameConfig): - with helpers.add_path(str(base_path)): - root_module = importlib.import_module(root_name) - root_module.root() + return + # with helpers.add_path(str(base_path)): + # root_module = importlib.import_module(root_name) + # root_module.root() diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py index e69de29b..f5151d25 100644 --- a/sameproject/ops/ocean/render.py +++ b/sameproject/ops/ocean/render.py @@ -0,0 +1,196 @@ +from jinja2 import Environment, FileSystemLoader, select_autoescape +from sameproject.data.step import Step +from sameproject.ops import helpers +from typing import Tuple +from pathlib import Path +from uuid import uuid4 +import logging +import os + + +root_template = "root.jinja" +step_template = "step.jinja" + + +def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str]: + """Renders the notebook into a root file and a series of step files according to the target requirements. Returns an absolute path to the root file for deployment.""" + templateDir = os.path.dirname(os.path.abspath(__file__)) + templateLoader = FileSystemLoader(templateDir) + env = Environment(loader=templateLoader) + same_config["compile_path"] = compile_path + root_file_string = _build_root_file(env, steps, same_config) + + root_pipeline_name = f"root_pipeline_{uuid4().hex.lower()}" + root_path = Path(compile_path) / f"{root_pipeline_name}.py" + helpers.write_file(root_path, root_file_string) + + for step_name in steps: + # Need a unique name so that libraries don't conflict in sys.modules. This is MOSTLY a test issue, but could be the case generally. + step_file_string = _build_step_file(env, steps[step_name], steps[step_name].unique_name) + (Path(compile_path) / steps[step_name].unique_name).mkdir() + helpers.write_file(Path(compile_path) / steps[step_name].unique_name / f"{steps[step_name].unique_name}.py", step_file_string) + + return (compile_path, root_pipeline_name) + + +def _build_root_file(env: Environment, all_steps: list, same_config: dict) -> str: + template = env.get_template(root_template) + + root_contract = { + "root_parameters_as_string": "", + "comma_delim_list_of_packages_as_string": "", + "list_of_steps": [], + "comma_delim_list_of_step_names_as_str": "", + "secrets_to_create_as_dict": {}, + "experiment_name": "", + "experiment_name_safe": "", + "list_of_environments": {}, + "image_pull_secrets": {}, + "aml_workspace_credentials": {}, + "compile_dir": "", + } + + params_to_merge = [] + + # Do i need to check if run and run.parameters are required fields? + try: + run_parameters = same_config.run.parameters + except Exception: + run_parameters = {} + + for k in run_parameters: + # Is this necessary? Could we support complex datatypes as parameters? + # Probably - but we'll need to serialize to pass as a param and then deserialize in the template + if isinstance(run_parameters[k], (int, float, str)): + params_to_merge.append(f"{k}='{run_parameters[k]}'") + else: + logging.warning(f"We only support numeric, bool and strings as default parameters (no dicts or lists). We're setting the default value for '{k}' to ''.") + + root_contract["root_parameters_as_string"] = ", ".join(params_to_merge) + + root_contract["list_of_environments"]["default"] = {} + root_contract["list_of_environments"]["default"]["image_tag"] = "library/python:3.10-slim-buster" + root_contract["list_of_environments"]["default"]["private_registry"] = False + + for name in same_config.environments: + root_contract["list_of_environments"][name] = {} + root_contract["list_of_environments"][name]["image_tag"] = same_config.environments[name].image_tag + + # Need to convert to string here because yaml parsing automatically converts (so we need to normalize) + # to string, in case the user didn't write True/False in a compliant way (e.g. 'true' lowercase) + private_registry_bool = str(same_config.environments[name].get("private_registry", False)) + root_contract["list_of_environments"][name]["private_registry"] = private_registry_bool.lower() == "true" + root_contract["list_of_environments"][name]["secret_name"] = "" + + if root_contract["list_of_environments"][name]["private_registry"]: + + # This is starting to have quite a lot of code smell - root_contract requires a bit of massaging (instead of + # just passing through same_config to the jinja template nakedly) but i'm starting to dislike everything here. + if "credentials" in same_config.environments[name]: + # Someone COULD set this to be a 'private_registry' but did not set credentials. This may be ok! + # They could have already mounted the secret in the cluster, so we should let it go ahead. + # However, because jinja doesn't like it when we parse through a struct without anything being set (even empty) + # We're going to go ahead and set it up now, and populate it only if there are values + + # TODO: # same_config.environments[name].get("credentials", {}) <- would something like this work? + # It COULD autopopulate the entire dict, but not sure because if it's empty, then do all the fields + # get created? + these_credentials = {} + these_credentials["image_pull_secret_name"] = same_config.environments[name].credentials.get("image_pull_secret_name", "") + these_credentials["image_pull_secret_registry_uri"] = same_config.environments[name].credentials.get("image_pull_secret_registry_uri", "") + these_credentials["image_pull_secret_username"] = same_config.environments[name].credentials.get("image_pull_secret_username", "") + these_credentials["image_pull_secret_password"] = same_config.environments[name].credentials.get("image_pull_secret_password", "") + these_credentials["image_pull_secret_email"] = same_config.environments[name].credentials.get("image_pull_secret_email", "") + + root_contract["secrets_to_create_as_dict"][name] = these_credentials + + if same_config.get("ocean"): + root_contract["ocean_workspace_credentials"] = { + # "AML_SP_PASSWORD_VALUE": same_config.aml.AML_SP_PASSWORD_VALUE, + # "AML_SP_TENANT_ID": same_config.aml.AML_SP_TENANT_ID, + # "AML_SP_APP_ID": same_config.aml.AML_SP_APP_ID, + # "WORKSPACE_SUBSCRIPTION_ID": same_config.aml.WORKSPACE_SUBSCRIPTION_ID, + # "WORKSPACE_RESOURCE_GROUP": same_config.aml.WORKSPACE_RESOURCE_GROUP, + # "WORKSPACE_NAME": same_config.aml.WORKSPACE_NAME, + # "AML_COMPUTE_NAME": same_config.aml.AML_COMPUTE_NAME, + } + + # Until we get smarter, we're just going to combine inject EVERY package into every step. + # This is not IDEAL, but it's not as bad as it sounds because it'll allow systems to cache + # containers more readily, even between steps, and package downloads are pretty small. + # Using a dict so that we it'll remove dupes. + # Also, we should probably swap this out for conda_environment.yaml (somehow). + global_package_list = {} + for step in all_steps: + for package in all_steps[step].packages_to_install: + global_package_list[package] = "" + + if global_package_list: + # First merge all the packages together and delimit with ' and , + joined_string = "', '".join(list(global_package_list.keys())) + + # Then bound it with one more single quote on each side + root_contract["comma_delim_list_of_packages_as_string"] = f"'{joined_string}'" + + # If someone does something hinky, like name their steps out of alpha order, we're just not + # going to care, and parse them in the order they gave them to us. + previous_step_name = "" + for step_name in all_steps: + + step_content = all_steps[step_name] + env_name = step_content.environment_name + + step_to_append = {} + step_to_append["name"] = step_content.name + step_to_append["unique_name"] = step_content.unique_name + step_to_append["package_string"] = root_contract["comma_delim_list_of_packages_as_string"] + step_to_append["cache_value"] = step_content.cache_value + step_to_append["previous_step"] = previous_step_name + + if root_contract["list_of_environments"].get(env_name, None) is None: + error_message = f"'{env_name}'' was listed as an environment in the notebook, but no such environment is listed in your SAME configuration file." + logging.fatal(error_message) + raise ValueError(error_message) + + step_to_append["environment_name"] = env_name + step_to_append["image_tag"] = root_contract["list_of_environments"][env_name]["image_tag"] + step_to_append["private_registry"] = root_contract["list_of_environments"][env_name]["private_registry"] + step_to_append["secret_name"] = root_contract["list_of_environments"][env_name]["secret_name"] + + if previous_step_name != "": + step_to_append["previous_step_name"] = previous_step_name + root_contract["list_of_steps"].append(step_to_append) + + previous_step_name = step_content.unique_name + + # Text manipulation in jinja is pretty weak, we'll do both of these cleanings in python. + + # experiment_name is often displayed to the user, so try to keep it as close to the original as possible + root_contract["experiment_name"] = helpers.removeIllegalExperimentNameCharacters(same_config.metadata.name) + + # However, often there's a backup, internal only name that needs much stricter character restrictions + # We'll create that here. + root_contract["experiment_name_safe"] = helpers.lowerAlphaNumericOnly(same_config.metadata.name) + + # List manipulation is also pretty weak in jinja (plus I like views being very non-functional). We'll + # create the comma delim list of steps (which we need for DAG description) in python as well. + + # For AML, each "step" needs to have '_step' attached (this may be historical) + # and not necessary - look at it when we combine all these step rendering functions into one + root_contract["comma_delim_list_of_step_names_as_str"] = ", ".join([f"{all_steps[this_step_name].unique_name}_step" for this_step_name in all_steps]) + + root_contract["compile_path"] = same_config["compile_path"] + + return template.render(root_contract) + + +def _build_step_file(env: Environment, step: Step, step_name: str) -> str: + template = env.get_template(step_template) + + # Create a parameter_string for putting in each step function + # default is to be a serialized empty dict. We should probably + # handle this a different way (allowing custom params to be passed in) + # but haven't found this requirement from a customer yet. + parameter_string = '__context="gAR9lC4=", __run_info="gAR9lC4=", __metadata_url=""' + step_contract = {"name": step_name, "inner_code": step.code, "parameter_string": parameter_string} + return template.render(step_contract) From c1764dcec8b0b26a0155a0e1240bd79adbad38bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 29 May 2022 10:47:03 +0200 Subject: [PATCH 05/99] WIP: root.jinja --- sameproject/ops/ocean/root.jinja | 69 +++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/sameproject/ops/ocean/root.jinja b/sameproject/ops/ocean/root.jinja index cacb4f7a..5d7627cc 100644 --- a/sameproject/ops/ocean/root.jinja +++ b/sameproject/ops/ocean/root.jinja @@ -1,3 +1,5 @@ +{% autoescape off %} + import matplotlib.pyplot as plt import numpy as np import os @@ -16,6 +18,65 @@ import torchvision.datasets as dset import torchvision.transforms as transforms import torchvision.utils as vutils +def root( + {{ root_parameters_as_string }}, + context="", + metadata_url="", +): + # The below is base64 encoding of an empty locals() output + __original_context = "" + if context == '': + __original_context = "gAR9lC4=" + else: + __original_context = context + + experiment = Experiment(ws, "{{ experiment_name }}") + + run_info_dict = { + "experiment_id": experiment.id, + "step_id": "run_info_step", + } + + output = {} + output["run_info"] = str( + base64.urlsafe_b64encode(dill.dumps(run_info_dict)), encoding="ascii" + ) + +{% for step in list_of_steps %} + entry_point = "{{step.unique_name}}.py" + __pipelinedata_context_{{step.unique_name}} = PipelineData( + "__pipelinedata_context_{{step.unique_name}}", output_mode="mount" + ) + + {{step.unique_name}}_step = PythonScriptStep( + source_directory="{{compile_path}}/{{step.unique_name}}", + script_name=entry_point, + arguments=[ + "--input_context", + {% if step.previous_step_name %}__pipelinedata_context_{{step.previous_step_name}}{% else %}__original_context_param{% endif %}, + "--run_info", + output["run_info"], + "--metadata_url", + metadata_url, + "--output_context", + __pipelinedata_context_{{step.unique_name}}, + ], + + {% if step.previous_step_name %}inputs=[__pipelinedata_context_{{step.previous_step_name}}],{% endif %} + outputs=[__pipelinedata_context_{{step.unique_name}}], + compute_target=compute_target, + runconfig=config_{{step.environment_name}}, + allow_reuse=False, + ) + +{% endfor %} + + run_pipeline_definition = [{{comma_delim_list_of_step_names_as_str}}] + + built_pipeline = Pipeline(workspace=ws, steps=[run_pipeline_definition]) + pipeline_run = experiment.submit(built_pipeline) + + def test_dcgan(local=False): results_dir = Path('results') @@ -75,4 +136,10 @@ def test_dcgan(local=False): if __name__ == "__main__": local = (len(sys.argv) == 2 and sys.argv[1] == "local") - test_dcgan(local) \ No newline at end of file + test_dcgan(local) + + # execute only if run as a script + root( + context="gAR9lC4=", metadata_url="" + ) +{% endautoescape %} \ No newline at end of file From f9c8826c6260c031207d193b8a063e9745432777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 29 May 2022 10:47:18 +0200 Subject: [PATCH 06/99] WIP: step.jinja --- sameproject/ops/ocean/step.jinja | 150 +++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/sameproject/ops/ocean/step.jinja b/sameproject/ops/ocean/step.jinja index e69de29b..78e29e3b 100644 --- a/sameproject/ops/ocean/step.jinja +++ b/sameproject/ops/ocean/step.jinja @@ -0,0 +1,150 @@ +{% autoescape off %} + +import argparse as __argparse +from multiprocessing import context +import pathlib +from typing import NamedTuple +from azureml.core import Run +from pprint import pprint as __pp +import os +from pathlib import Path as __Path +from azureml.pipeline.core import ( + PipelineData as __PipelineData, + PipelineParameter as __PipelineParameter, +) +import dill +from base64 import ( + urlsafe_b64encode as __urlsafe_b64encode, + urlsafe_b64decode as __urlsafe_b64decode, +) + +def main({{ parameter_string }}) -> NamedTuple('FuncOutput',[('context', str),]): + import dill + import base64 + from base64 import urlsafe_b64encode, urlsafe_b64decode + from copy import copy as __copy + from types import ModuleType as __ModuleType + from pprint import pprint as __pp + import datetime as __datetime + import requests + + __run_info_dict = dill.loads(urlsafe_b64decode(__run_info)) + __base64_decode = urlsafe_b64decode(__context) + __context_import_dict = dill.loads(__base64_decode) + + __variables_to_mount = {} + __loc = {} + + for __k in __context_import_dict: + __variables_to_mount[__k] = dill.loads(__context_import_dict[__k]) + + __json_data = { + "experiment_id": __run_info_dict["experiment_id"], + "run_id": __run_info_dict["run_id"], + "step_id": "{{ name }}", + "metadata_type": "input", + "metadata_value": __context, + "metadata_time": __datetime.datetime.now().isoformat(), + } + + print(f"Metadata url: {__metadata_url}") + if __metadata_url != '': + print("Found metadata URL - executing.") + __pp(__json_data) + try: + __r = requests.post(__metadata_url, json=__json_data,) + __r.raise_for_status() + except requests.exceptions.HTTPError as __err: + print(f"Error: {__err}") + + __inner_code_to_execute = """ +import dill +import base64 +from base64 import urlsafe_b64encode, urlsafe_b64decode +from types import ModuleType as __ModuleType + +{{ inner_code | replace("\\", "\\\\") | replace("\"", "\\\"") }} + +__locals_keys = frozenset(locals().keys()) +__globals_keys = frozenset(globals().keys()) +__context_export = {} + +for val in __globals_keys: + if not val.startswith("_") and not isinstance(val, __ModuleType): + __context_export[val] = dill.dumps(globals()[val]) + +# Locals needs to come after globals in case we made changes +for val in __locals_keys: + if not val.startswith("_") and not isinstance(val, __ModuleType): + __context_export[val] = dill.dumps(locals()[val]) + +__b64_string = str(urlsafe_b64encode(dill.dumps(__context_export)), encoding="ascii") + +""" + exec(__inner_code_to_execute, __variables_to_mount, __loc) + + __json_output_data = { + "experiment_id": __run_info_dict["experiment_id"], + "run_id": __run_info_dict["run_id"], + "step_id": "%v", + "metadata_type": "output", + "metadata_value": __loc["__b64_string"], + "metadata_time": __datetime.datetime.now().isoformat(), + } + + print(f"Metadata url: {__metadata_url}") + if __metadata_url != '': + print("Found metadata URL - executing.") + __pp(__json_data) + try: + __r = requests.post(__metadata_url, json=__json_output_data,) + __r.raise_for_status() + except requests.exceptions.HTTPError as err: + print(f"Error: {err}") + + from collections import namedtuple + output = namedtuple("FuncOutput", ["context"]) + return output(__loc["__b64_string"]) + + +if __name__ == "__main__": + __run = Run.get_context() + __parser = __argparse.ArgumentParser("cleanse") + __parser.add_argument("--input_context", type=str, help="Context to run as string") + __parser.add_argument("--run_info", type=str, help="Run info") + __parser.add_argument("--output_context_path", type=str, help="Output context path") + __parser.add_argument("--metadata_url", type=str, help="Metadata URL") + + __args = __parser.parse_args() + + __input_context_string = "gAR9lC4=" + __context_filename = "context.txt" + if "__pipelinedata_context" in __args.input_context: + context_full_path = __Path(__args.input_context) / __context_filename + print(f"reading file: {context_full_path}") + __input_context_string = context_full_path.read_text() + elif __args.input_context and __args.input_context.strip(): + __input_context_string = __args.input_context.strip() + + # Need to unpack and do this here, because AML only gives + # us the run id inside the container. Unpacking and repacking so + # bulk of the code is unchanged. + __run_info_dict = dill.loads(__urlsafe_b64decode(__args.run_info)) + __run_info_dict["run_id"] = __run.get_details()["runId"] + + # Returns a tuple, where the zeroth index is the string + __output_context_tuple = main( + __context=__input_context_string, + __run_info=str( + __urlsafe_b64encode(dill.dumps(__run_info_dict)), encoding="ascii" + ), + __metadata_url=__args.metadata_url, + ) + + __p = __Path(__args.output_context_path) + __p.mkdir(parents=True, exist_ok=True) + __filepath = __p / __context_filename + with __filepath.open("w+") as __f: + __f.write(__output_context_tuple[0]) + +{% endautoescape %} \ No newline at end of file From 4cf6684df72cfea30610e37f31ae1b761e9d96fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 29 May 2022 11:06:49 +0200 Subject: [PATCH 07/99] WIP: added ocean to backends --- sameproject/ops/backends.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sameproject/ops/backends.py b/sameproject/ops/backends.py index 93962919..f58136c3 100644 --- a/sameproject/ops/backends.py +++ b/sameproject/ops/backends.py @@ -5,6 +5,7 @@ import sameproject.ops.functions as functions import sameproject.ops.kubeflow as kubeflow import sameproject.ops.aml as aml +import sameproject.ops.ocean as ocean import sameproject.ops.helpers import tempfile import click @@ -15,6 +16,7 @@ def render(target: str, steps: list, config: SameConfig, compile_path: str = Non "aml": aml.render, "kubeflow": kubeflow.render, "functions": functions.render, + "ocean": ocean.render } render_function = target_renderers.get(target, None) @@ -33,6 +35,7 @@ def deploy(target: str, base_path: Path, root_file: str, config: SameConfig): "aml": aml.deploy, "kubeflow": kubeflow.deploy, "functions": functions.deploy, + "ocean": ocean.deploy } deploy_function = target_deployers.get(target, None) From 7ea3839b941b9e2834095e4e9d2e5ef2f76fb04b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 29 May 2022 14:43:58 +0200 Subject: [PATCH 08/99] WIP: removed unused jinja code --- sameproject/ops/ocean/root.jinja | 37 -------------------------------- 1 file changed, 37 deletions(-) diff --git a/sameproject/ops/ocean/root.jinja b/sameproject/ops/ocean/root.jinja index 5d7627cc..804cd581 100644 --- a/sameproject/ops/ocean/root.jinja +++ b/sameproject/ops/ocean/root.jinja @@ -97,43 +97,6 @@ def test_dcgan(local=False): # Decide which device we want to run on device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu") - # Network - network = {% network %} - - # Create the generator - netG = network(ngpu).to(device) - - # Handle multi-gpu if desired - if (device.type == 'cuda') and (ngpu > 1): - netG = nn.DataParallel(netG, list(range(ngpu))) - - netG.load_state_dict(torch.load(weights_path, map_location=torch.device('cpu'))) - - netG.eval() - - # Create batch of latent vectors that we will use to visualize - # the progression of the generator - fixed_noise = torch.randn(1, nz, 1, 1, device=device) - - with torch.no_grad(): - img = netG(fixed_noise).detach().cpu() - - def denormalize_image(image): - """Reverse to normalize_image() function""" - max_ = image.max() - min_ = image.min() - return (image - min_)/(max_ - min_) - - img = denormalize_image(img) - - img = img.squeeze().permute(1,2,0) - - filename = results_dir / 'test.pickle' if local else "/data/outputs/result" - - with open(filename, 'wb') as pickle_file: - print(f"Pickling results in {filename}") - pickle.dump(img, pickle_file) - if __name__ == "__main__": local = (len(sys.argv) == 2 and sys.argv[1] == "local") test_dcgan(local) From b732ac9e6f6d18266b2184352c48ee433aa6c10e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 29 May 2022 16:07:56 +0200 Subject: [PATCH 09/99] WIP: root.jinja ocean working template generation --- sameproject/ops/ocean/root.jinja | 219 +++++++++++++++++-------------- 1 file changed, 123 insertions(+), 96 deletions(-) diff --git a/sameproject/ops/ocean/root.jinja b/sameproject/ops/ocean/root.jinja index 804cd581..127d1c9b 100644 --- a/sameproject/ops/ocean/root.jinja +++ b/sameproject/ops/ocean/root.jinja @@ -1,108 +1,135 @@ -{% autoescape off %} +{% autoescape off %}from kfp.components import create_component_from_func +from kubernetes.client.models import V1EnvVar +from kubernetes import client, config +from typing import NamedTuple +from base64 import b64encode +import kfp.dsl as dsl +import kubernetes +import json +import kfp + +from run_info import run_info_fn +{% for step in list_of_steps %} +from {{ step.unique_name }} import {{ step.unique_name }}_fn +{% endfor %} + -import matplotlib.pyplot as plt -import numpy as np -import os -from pathlib import Path -import pickle -import random -import sys +run_info_comp = kfp.components.create_component_from_func( + func=run_info_fn, + packages_to_install=[ + "dill==0.3.5.1", + "kfp==1.8.12", + ], +) -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.optim as optim -import torch.utils.data -import torchvision.datasets as dset -import torchvision.transforms as transforms -import torchvision.utils as vutils +{% for step in list_of_steps %} +{{ step.unique_name }}_comp = create_component_from_func( + func={{ step.unique_name }}_fn, + base_image="{{ step.image_tag }}", + packages_to_install=[ + "dill==0.3.5.1", + "pympler==1.0.1", + "requests==2.27.1", + {{ step.package_string }} # TODO: make this a loop + ], +) +{% endfor %} +# TODO: support kubeflow-specific config like aws secrets, mlflow endpoints. +@dsl.pipeline(name="Compilation of pipelines",) def root( - {{ root_parameters_as_string }}, - context="", - metadata_url="", + context='', metadata_url='', + AWS_ACCESS_KEY_ID: str = "minio", + AWS_SECRET_ACCESS_KEY: str = "minio123", + MLFLOW_S3_ENDPOINT_URL: str = "http://combinator-minio.mlflow.svc.cluster.local:9000", + MLFLOW_TRACKING_URI: str = "http://combinator-mlflow.mlflow.svc.cluster.local:5000", ): - # The below is base64 encoding of an empty locals() output - __original_context = "" - if context == '': - __original_context = "gAR9lC4=" - else: - __original_context = context + # Generate secrets (if not already created) + secrets_by_env = {} +{% for env_name in secrets_to_create_as_dict %} +{% set secret = secrets_to_create_as_dict[env_name] %} + config.load_kube_config() + v1 = client.CoreV1Api() + namespace = "kubeflow" + name = "{{ experiment_name_safe }}" + metadata = {"name": name, "namespace": "kubeflow"} + api_version = "v1" + kind = "Secret" + type = "kubernetes.io/dockerconfigjson" + + cred_payload = { + "auths": { + "{{secret.image_pull_secret_registry_uri}}": { + "username": "{{secret.image_pull_secret_username}}", + "password": "{{secret.image_pull_secret_password}}", + "email": "{{secret.image_pull_secret_email}}", + "auth": b64encode( + f"{{secret.image_pull_secret_username}}:{{secret.image_pull_secret_password}}".encode() + ).decode(), + } + } + } + + data = { + ".dockerconfigjson": b64encode(json.dumps(cred_payload).encode()).decode() + } + + body = client.V1Secret( + api_version="v1", + data=data, + kind="Secret", + metadata=metadata, + type=type, + ) + api_response = None + try: + api_response = v1.create_namespaced_secret(namespace, body) + except kubernetes.client.rest.ApiException as e: + if e.status == 409: + if ( + cred_payload["auths"] + and cred_payload["auths"]["{{secret.image_pull_secret_registry_uri}}"] + and cred_payload["auths"]["{{secret.image_pull_secret_registry_uri}}"]["username"] + and cred_payload["auths"]["{{secret.image_pull_secret_registry_uri}}"]["password"] + and cred_payload["auths"]["{{secret.image_pull_secret_registry_uri}}"]["email"] + ): + api_response = v1.replace_namespaced_secret(name, namespace, body) + else: + print(f"Missing value") + else: + raise e + + dsl.get_pipeline_conf().set_image_pull_secrets([client.V1LocalObjectReference(name=name)]) +{% endfor %} - experiment = Experiment(ws, "{{ experiment_name }}") + # TODO: need a way to configure and handle env vars for backends properly + env_vars = { + "AWS_ACCESS_KEY_ID": AWS_ACCESS_KEY_ID, + "AWS_SECRET_ACCESS_KEY": AWS_SECRET_ACCESS_KEY, + "MLFLOW_S3_ENDPOINT_URL": MLFLOW_S3_ENDPOINT_URL, + "MLFLOW_TRACKING_URI": MLFLOW_TRACKING_URI, + } - run_info_dict = { - "experiment_id": experiment.id, - "step_id": "run_info_step", - } + run_info = run_info_comp(run_id=kfp.dsl.RUN_ID_PLACEHOLDER) - output = {} - output["run_info"] = str( - base64.urlsafe_b64encode(dill.dumps(run_info_dict)), encoding="ascii" - ) {% for step in list_of_steps %} - entry_point = "{{step.unique_name}}.py" - __pipelinedata_context_{{step.unique_name}} = PipelineData( - "__pipelinedata_context_{{step.unique_name}}", output_mode="mount" - ) - - {{step.unique_name}}_step = PythonScriptStep( - source_directory="{{compile_path}}/{{step.unique_name}}", - script_name=entry_point, - arguments=[ - "--input_context", - {% if step.previous_step_name %}__pipelinedata_context_{{step.previous_step_name}}{% else %}__original_context_param{% endif %}, - "--run_info", - output["run_info"], - "--metadata_url", - metadata_url, - "--output_context", - __pipelinedata_context_{{step.unique_name}}, - ], - - {% if step.previous_step_name %}inputs=[__pipelinedata_context_{{step.previous_step_name}}],{% endif %} - outputs=[__pipelinedata_context_{{step.unique_name}}], - compute_target=compute_target, - runconfig=config_{{step.environment_name}}, - allow_reuse=False, - ) - + {{ step.unique_name }} = {{ step.unique_name }}_comp( +{% if step.previous_step_name %} + input_context={{ step.previous_step_name }}.outputs["output_context"], +{% else %} + input_context="", +{% endif %} + run_info=run_info.outputs["run_info"], + metadata_url=metadata_url + ) + +{% if step.previous_step_name %} + {{ step.unique_name }}.after({{ step.previous_step_name }}) +{% endif %} + {{ step.unique_name }}.execution_options.caching_strategy.max_cache_staleness = "P0D" + for k in env_vars: + {{ step.unique_name }}.add_env_variable(V1EnvVar(name=k, value=env_vars[k])) {% endfor %} - run_pipeline_definition = [{{comma_delim_list_of_step_names_as_str}}] - - built_pipeline = Pipeline(workspace=ws, steps=[run_pipeline_definition]) - pipeline_run = experiment.submit(built_pipeline) - - -def test_dcgan(local=False): - - results_dir = Path('results') - - if not results_dir.exists(): - results_dir.mkdir() - - weights_path = Path("netG.pth") - - if not weights_path.exists(): - os.system("wget https://www.dropbox.com/s/p3pjgmpiki7w0ur/netG.pth") - - nc = 3 - nz = 100 - ngf = 64 - ngpu = 1 - - # Decide which device we want to run on - device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu") - -if __name__ == "__main__": - local = (len(sys.argv) == 2 and sys.argv[1] == "local") - test_dcgan(local) - - # execute only if run as a script - root( - context="gAR9lC4=", metadata_url="" - ) -{% endautoescape %} \ No newline at end of file +{% endautoescape %} From fa11eae25e29fbca68458ac51edc5a9f085fdc95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 29 May 2022 16:43:54 +0200 Subject: [PATCH 10/99] WIP: ocean template with ML dependencies --- ...peline_3ea7c8df9e7d45fc8aebaa1857a7d08f.py | 80 +++ ...ep_000_afdeddf09e474ffdbe0543fd0d775bbd.py | 526 ++++++++++++++++++ sameproject/ops/ocean/root.jinja | 85 +-- sameproject/ops/ocean/step.jinja | 26 +- 4 files changed, 648 insertions(+), 69 deletions(-) create mode 100644 examples/root_pipeline_3ea7c8df9e7d45fc8aebaa1857a7d08f.py create mode 100644 examples/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd.py diff --git a/examples/root_pipeline_3ea7c8df9e7d45fc8aebaa1857a7d08f.py b/examples/root_pipeline_3ea7c8df9e7d45fc8aebaa1857a7d08f.py new file mode 100644 index 00000000..ca4b13eb --- /dev/null +++ b/examples/root_pipeline_3ea7c8df9e7d45fc8aebaa1857a7d08f.py @@ -0,0 +1,80 @@ + +from typing import NamedTuple +from base64 import b64encode +import json +import logging +import matplotlib.pyplot as plt +import matplotlib.animation as animation +import matplotlib.image as mpimg +import numpy as np +import random +from pathlib import Path +import pickle +import sys +import time +import kfp + +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.optim as optim +import torch.utils.data +import torchvision.datasets as dset +import torchvision.transforms as transforms +import torchvision.utils as vutils +from run_info import run_info_fn + + +from same_step_000_afdeddf09e474ffdbe0543fd0d775bbd import same_step_000_afdeddf09e474ffdbe0543fd0d775bbd_fn + + + +run_info_comp = kfp.components.create_component_from_func( + func=run_info_fn, + packages_to_install=[ + "dill==0.3.5.1", + "kfp==1.8.12", + ], +) + + +same_step_000_afdeddf09e474ffdbe0543fd0d775bbd_comp = create_component_from_func( + func=same_step_000_afdeddf09e474ffdbe0543fd0d775bbd_fn, + base_image="combinatorml/jupyterlab-tensorflow-opencv:0.9", + packages_to_install=[ + "dill==0.3.5.1", + "pympler==1.0.1", + "requests==2.27.1", + 'matplotlib', 'numpy', 'Pillow', 'torch', 'torchvision' # TODO: make this a loop + ], +) + + +# TODO: support kubeflow-specific config like aws secrets, mlflow endpoints. +@dsl.pipeline(name="Compilation of pipelines",) +def root( + context='', metadata_url='', +): + # Generate secrets (if not already created) + secrets_by_env = {} + + + run_info = run_info_comp(run_id=kfp.dsl.RUN_ID_PLACEHOLDER) + + + + same_step_000_afdeddf09e474ffdbe0543fd0d775bbd = same_step_000_afdeddf09e474ffdbe0543fd0d775bbd_comp( + + input_context="", + + run_info=run_info.outputs["run_info"], + metadata_url=metadata_url + ) + + + same_step_000_afdeddf09e474ffdbe0543fd0d775bbd.execution_options.caching_strategy.max_cache_staleness = "P0D" + for k in env_vars: + same_step_000_afdeddf09e474ffdbe0543fd0d775bbd.add_env_variable(V1EnvVar(name=k, value=env_vars[k])) + + diff --git a/examples/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd.py b/examples/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd.py new file mode 100644 index 00000000..dfccc2d6 --- /dev/null +++ b/examples/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd.py @@ -0,0 +1,526 @@ + + +import argparse as __argparse +from multiprocessing import context +import pathlib +from typing import NamedTuple +from pprint import pprint as __pp +import os +from pathlib import Path as __Path +import dill +import json +import logging +import matplotlib.pyplot as plt +import matplotlib.animation as animation +import matplotlib.image as mpimg +import numpy as np +import random +from pathlib import Path +import pickle +import sys +import time + +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.optim as optim +import torch.utils.data +import torchvision.datasets as dset +import torchvision.transforms as transforms +import torchvision.utils as vutils +from base64 import ( + urlsafe_b64encode as __urlsafe_b64encode, + urlsafe_b64decode as __urlsafe_b64decode, +) + +def main(__context="gAR9lC4=", __run_info="gAR9lC4=", __metadata_url="") -> NamedTuple('FuncOutput',[('context', str),]): + import dill + import base64 + from base64 import urlsafe_b64encode, urlsafe_b64decode + from copy import copy as __copy + from types import ModuleType as __ModuleType + from pprint import pprint as __pp + import datetime as __datetime + import requests + + __run_info_dict = dill.loads(urlsafe_b64decode(__run_info)) + __base64_decode = urlsafe_b64decode(__context) + __context_import_dict = dill.loads(__base64_decode) + + __variables_to_mount = {} + __loc = {} + + for __k in __context_import_dict: + __variables_to_mount[__k] = dill.loads(__context_import_dict[__k]) + + __json_data = { + "experiment_id": __run_info_dict["experiment_id"], + "run_id": __run_info_dict["run_id"], + "step_id": "same_step_000_afdeddf09e474ffdbe0543fd0d775bbd", + "metadata_type": "input", + "metadata_value": __context, + "metadata_time": __datetime.datetime.now().isoformat(), + } + + print(f"Metadata url: {__metadata_url}") + if __metadata_url != '': + print("Found metadata URL - executing.") + __pp(__json_data) + try: + __r = requests.post(__metadata_url, json=__json_data,) + __r.raise_for_status() + except requests.exceptions.HTTPError as __err: + print(f"Error: {__err}") + + __inner_code_to_execute = """ +import dill +import base64 +from base64 import urlsafe_b64encode, urlsafe_b64decode +from types import ModuleType as __ModuleType + +# The code for the DCGAN generative model is taken from the official PyTorch docs https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html. + +import argparse +import json +import logging +import matplotlib.pyplot as plt +import matplotlib.animation as animation +import matplotlib.image as mpimg +import numpy as np +import os +import random +from pathlib import Path +import pickle +import sys +import time + +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.optim as optim +import torch.utils.data +import torchvision.datasets as dset +import torchvision.transforms as transforms +import torchvision.utils as vutils +def get_input(local=False): + if local: + print(\"Reading local punks directory.\") + + # Root directory for dataset + filename = Path('data/punks/tealpunks') + # filename = Path('data/punks-sample') + # filename = Path('data/celeba') + + return filename + + dids = os.getenv('DIDS', None) + + if not dids: + print(\"No DIDs found in environment. Aborting.\") + return + + dids = json.loads(dids) + + cwd = os.getcwd() + print('cwd', cwd) + + + for did in dids: + print('ls', f'/data/inputs/{did}/0') + print('ls2', os.listdir(f'/data/inputs/')) + filename = Path(f'/data/inputs/{did}/0') # 0 for metadata service + print(f\"Reading asset file {filename}.\") + # print('type', type(os.listdir(f'/data/inputs/{did}/0/')[0])) + + + return filename +def run_dcgan(local=False): + + t0 = time.time() + + filename = get_input(local) + if not filename: + print(\"Could not retrieve filename.\") + return + + from PIL import Image + with open(filename) as datafile: + print(type(datafile)) + print(datafile) + datafile.seek(0) + img = Image.open(datafile) + print('@@@', img) + + + teal_images = sorted(list(filename.glob('*'))) + + print(teal_images) + + results_dir = Path('results') + + if not results_dir.exists(): + results_dir.mkdir() + + if local: + img0 = mpimg.imread(teal_images[0]) + img1 = mpimg.imread(teal_images[1]) + fig, ax = plt.subplots(1,2) + ax[0].imshow(img0) + ax[1].imshow(img1) + [a.axis('off') for a in ax] + plt.savefig(results_dir / \"sample.png\") + + # Set random seed for reproducibility + manualSeed = 999 + #manualSeed = random.randint(1, 10000) # use if you want new results + print(\"Random Seed: \", manualSeed) + random.seed(manualSeed) + torch.manual_seed(manualSeed) + + # Training parameters + workers = 2 + batch_size = 128 + image_size = 64 + nc = 3 + nz = 100 + ngf = 64 + ndf = 64 + num_epochs = 5 + lr = 0.0002 + beta1 = 0.5 + ngpu = 1 + + # We can use an image folder dataset the way we have it setup. + # Create the dataset + dataset = dset.ImageFolder(root=filename.parent, + transform=transforms.Compose([ + transforms.Resize(image_size), + transforms.CenterCrop(image_size), + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), + ])) + # Create the dataloader + dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, + shuffle=True, num_workers=workers) + + # Decide which device we want to run on + device = torch.device(\"cuda:0\" if (torch.cuda.is_available() and ngpu > 0) else \"cpu\") + + # custom weights initialization called on netG and netD + def weights_init(m): + classname = m.__class__.__name__ + if classname.find('Conv') != -1: + nn.init.normal_(m.weight.data, 0.0, 0.02) + elif classname.find('BatchNorm') != -1: + nn.init.normal_(m.weight.data, 1.0, 0.02) + nn.init.constant_(m.bias.data, 0) + + # Generator Code + class Generator(nn.Module): + def __init__(self, ngpu): + super(Generator, self).__init__() + self.ngpu = ngpu + self.main = nn.Sequential( + # input is Z, going into a convolution + nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False), + nn.BatchNorm2d(ngf * 8), + nn.ReLU(True), + # state size. (ngf*8) x 4 x 4 + nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), + nn.BatchNorm2d(ngf * 4), + nn.ReLU(True), + # state size. (ngf*4) x 8 x 8 + nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False), + nn.BatchNorm2d(ngf * 2), + nn.ReLU(True), + # state size. (ngf*2) x 16 x 16 + nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False), + nn.BatchNorm2d(ngf), + nn.ReLU(True), + # state size. (ngf) x 32 x 32 + nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False), + nn.Tanh() + # state size. (nc) x 64 x 64 + ) + + def forward(self, input): + return self.main(input) + + # Create the generator + netG = Generator(ngpu).to(device) + + # Handle multi-gpu if desired + if (device.type == 'cuda') and (ngpu > 1): + netG = nn.DataParallel(netG, list(range(ngpu))) + + # Apply the weights_init function to randomly initialize all weights + # to mean=0, stdev=0.02. + netG.apply(weights_init) + + # Print the model + print(netG) + + class Discriminator(nn.Module): + def __init__(self, ngpu): + super(Discriminator, self).__init__() + self.ngpu = ngpu + self.main = nn.Sequential( + # input is (nc) x 64 x 64 + nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), + nn.LeakyReLU(0.2, inplace=True), + # state size. (ndf) x 32 x 32 + nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), + nn.BatchNorm2d(ndf * 2), + nn.LeakyReLU(0.2, inplace=True), + # state size. (ndf*2) x 16 x 16 + nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), + nn.BatchNorm2d(ndf * 4), + nn.LeakyReLU(0.2, inplace=True), + # state size. (ndf*4) x 8 x 8 + nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), + nn.BatchNorm2d(ndf * 8), + nn.LeakyReLU(0.2, inplace=True), + # state size. (ndf*8) x 4 x 4 + nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False), + nn.Sigmoid() + ) + + def forward(self, input): + return self.main(input) + + # Create the Discriminator + netD = Discriminator(ngpu).to(device) + + # Handle multi-gpu if desired + if (device.type == 'cuda') and (ngpu > 1): + netD = nn.DataParallel(netD, list(range(ngpu))) + + # Apply the weights_init function to randomly initialize all weights + # to mean=0, stdev=0.2. + netD.apply(weights_init) + + # Print the model + print(netD) + + # Initialize BCELoss function + criterion = nn.BCELoss() + + # Create batch of latent vectors that we will use to visualize + # the progression of the generator + fixed_noise = torch.randn(64, nz, 1, 1, device=device) + + # Establish convention for real and fake labels during training + real_label = 1. + fake_label = 0. + + # Setup Adam optimizers for both G and D + optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999)) + optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999)) + + # Training Loop + + # Lists to keep track of progress + img_list = [] + G_losses = [] + D_losses = [] + iters = 0 + + print(\"Starting Training Loop...\") + # For each epoch + for epoch in range(num_epochs): + # For each batch in the dataloader + for i, data in enumerate(dataloader, 0): + + ############################ + # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) + ########################### + ## Train with all-real batch + netD.zero_grad() + # Format batch + real_cpu = data[0].to(device) + b_size = real_cpu.size(0) + label = torch.full((b_size,), real_label, dtype=torch.float, device=device) + # Forward pass real batch through D + output = netD(real_cpu).view(-1) + # Calculate loss on all-real batch + errD_real = criterion(output, label) + # Calculate gradients for D in backward pass + errD_real.backward() + D_x = output.mean().item() + + ## Train with all-fake batch + # Generate batch of latent vectors + noise = torch.randn(b_size, nz, 1, 1, device=device) + # Generate fake image batch with G + fake = netG(noise) + label.fill_(fake_label) + # Classify all fake batch with D + output = netD(fake.detach()).view(-1) + # Calculate D's loss on the all-fake batch + errD_fake = criterion(output, label) + # Calculate the gradients for this batch, accumulated (summed) with previous gradients + errD_fake.backward() + D_G_z1 = output.mean().item() + # Compute error of D as sum over the fake and the real batches + errD = errD_real + errD_fake + # Update D + optimizerD.step() + + ############################ + # (2) Update G network: maximize log(D(G(z))) + ########################### + netG.zero_grad() + label.fill_(real_label) # fake labels are real for generator cost + # Since we just updated D, perform another forward pass of all-fake batch through D + output = netD(fake).view(-1) + # Calculate G's loss based on this output + errG = criterion(output, label) + # Calculate gradients for G + errG.backward() + D_G_z2 = output.mean().item() + # Update G + optimizerG.step() + + # Output training stats + if i % 50 == 0: + print('[%d/%d][%d/%d]\\tLoss_D: %.4f\\tLoss_G: %.4f\\tD(x): %.4f\\tD(G(z)): %.4f / %.4f' + % (epoch, num_epochs, i, len(dataloader), + errD.item(), errG.item(), D_x, D_G_z1, D_G_z2)) + + # Save Losses for plotting later + G_losses.append(errG.item()) + D_losses.append(errD.item()) + + # Check how the generator is doing by saving G's output on fixed_noise + if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)): + with torch.no_grad(): + fake = netG(fixed_noise).detach().cpu() + img_list.append(vutils.make_grid(fake, padding=2, normalize=True)) + + iters += 1 + + if local: + plt.figure(figsize=(10,5)) + plt.title(\"Generator and Discriminator Loss During Training\") + plt.plot(G_losses,label=\"G\") + plt.plot(D_losses,label=\"D\") + plt.xlabel(\"iterations\") + plt.ylabel(\"Loss\") + plt.legend() + plt.savefig(results_dir / \"loss.png\") + + fig = plt.figure(figsize=(20,20)) + plt.axis(\"off\") + ims = [[plt.imshow(np.transpose(i,(1,2,0)), animated=True)] for i in img_list] + # ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=1000, blit=True) + + fig = plt.figure(figsize=(20,20)) + plt.axis(\"off\") + plt.imshow(img_list[-1].permute(1,2,0)) + plt.savefig(results_dir / \"gen.png\") + + filename = results_dir / 'punks.pickle' if local else \"/data/outputs/result\" + with open(filename, 'wb') as pickle_file: + print(f\"Pickling results in {filename}\") + pickle.dump(img_list[-1], pickle_file) + + t1 = time.time() + total = t1-t0 + + print('Time: ', total) + +if __name__ == \"__main__\": + local = (len(sys.argv) == 2 and sys.argv[1] == \"local\") + run_dcgan(local) + + + + + + + + +__locals_keys = frozenset(locals().keys()) +__globals_keys = frozenset(globals().keys()) +__context_export = {} + +for val in __globals_keys: + if not val.startswith("_") and not isinstance(val, __ModuleType): + __context_export[val] = dill.dumps(globals()[val]) + +# Locals needs to come after globals in case we made changes +for val in __locals_keys: + if not val.startswith("_") and not isinstance(val, __ModuleType): + __context_export[val] = dill.dumps(locals()[val]) + +__b64_string = str(urlsafe_b64encode(dill.dumps(__context_export)), encoding="ascii") + +""" + exec(__inner_code_to_execute, __variables_to_mount, __loc) + + __json_output_data = { + "experiment_id": __run_info_dict["experiment_id"], + "run_id": __run_info_dict["run_id"], + "step_id": "%v", + "metadata_type": "output", + "metadata_value": __loc["__b64_string"], + "metadata_time": __datetime.datetime.now().isoformat(), + } + + print(f"Metadata url: {__metadata_url}") + if __metadata_url != '': + print("Found metadata URL - executing.") + __pp(__json_data) + try: + __r = requests.post(__metadata_url, json=__json_output_data,) + __r.raise_for_status() + except requests.exceptions.HTTPError as err: + print(f"Error: {err}") + + from collections import namedtuple + output = namedtuple("FuncOutput", ["context"]) + return output(__loc["__b64_string"]) + + +if __name__ == "__main__": + __run = Run.get_context() + __parser = __argparse.ArgumentParser("cleanse") + __parser.add_argument("--input_context", type=str, help="Context to run as string") + __parser.add_argument("--run_info", type=str, help="Run info") + __parser.add_argument("--output_context_path", type=str, help="Output context path") + __parser.add_argument("--metadata_url", type=str, help="Metadata URL") + + __args = __parser.parse_args() + + __input_context_string = "gAR9lC4=" + __context_filename = "context.txt" + if "__pipelinedata_context" in __args.input_context: + context_full_path = __Path(__args.input_context) / __context_filename + print(f"reading file: {context_full_path}") + __input_context_string = context_full_path.read_text() + elif __args.input_context and __args.input_context.strip(): + __input_context_string = __args.input_context.strip() + + # Need to unpack and do this here, because AML only gives + # us the run id inside the container. Unpacking and repacking so + # bulk of the code is unchanged. + __run_info_dict = dill.loads(__urlsafe_b64decode(__args.run_info)) + __run_info_dict["run_id"] = __run.get_details()["runId"] + + # Returns a tuple, where the zeroth index is the string + __output_context_tuple = main( + __context=__input_context_string, + __run_info=str( + __urlsafe_b64encode(dill.dumps(__run_info_dict)), encoding="ascii" + ), + __metadata_url=__args.metadata_url, + ) + + __p = __Path(__args.output_context_path) + __p.mkdir(parents=True, exist_ok=True) + __filepath = __p / __context_filename + with __filepath.open("w+") as __f: + __f.write(__output_context_tuple[0]) + diff --git a/sameproject/ops/ocean/root.jinja b/sameproject/ops/ocean/root.jinja index 127d1c9b..acca9f1f 100644 --- a/sameproject/ops/ocean/root.jinja +++ b/sameproject/ops/ocean/root.jinja @@ -1,14 +1,30 @@ -{% autoescape off %}from kfp.components import create_component_from_func -from kubernetes.client.models import V1EnvVar -from kubernetes import client, config +{% autoescape off %} from typing import NamedTuple from base64 import b64encode -import kfp.dsl as dsl -import kubernetes import json +import logging +import matplotlib.pyplot as plt +import matplotlib.animation as animation +import matplotlib.image as mpimg +import numpy as np +import random +from pathlib import Path +import pickle +import sys +import time import kfp +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.optim as optim +import torch.utils.data +import torchvision.datasets as dset +import torchvision.transforms as transforms +import torchvision.utils as vutils from run_info import run_info_fn + {% for step in list_of_steps %} from {{ step.unique_name }} import {{ step.unique_name }}_fn {% endfor %} @@ -39,77 +55,18 @@ run_info_comp = kfp.components.create_component_from_func( @dsl.pipeline(name="Compilation of pipelines",) def root( context='', metadata_url='', - AWS_ACCESS_KEY_ID: str = "minio", - AWS_SECRET_ACCESS_KEY: str = "minio123", - MLFLOW_S3_ENDPOINT_URL: str = "http://combinator-minio.mlflow.svc.cluster.local:9000", - MLFLOW_TRACKING_URI: str = "http://combinator-mlflow.mlflow.svc.cluster.local:5000", ): # Generate secrets (if not already created) secrets_by_env = {} {% for env_name in secrets_to_create_as_dict %} {% set secret = secrets_to_create_as_dict[env_name] %} - config.load_kube_config() - v1 = client.CoreV1Api() - namespace = "kubeflow" - name = "{{ experiment_name_safe }}" - metadata = {"name": name, "namespace": "kubeflow"} - api_version = "v1" - kind = "Secret" - type = "kubernetes.io/dockerconfigjson" - - cred_payload = { - "auths": { - "{{secret.image_pull_secret_registry_uri}}": { - "username": "{{secret.image_pull_secret_username}}", - "password": "{{secret.image_pull_secret_password}}", - "email": "{{secret.image_pull_secret_email}}", - "auth": b64encode( - f"{{secret.image_pull_secret_username}}:{{secret.image_pull_secret_password}}".encode() - ).decode(), - } - } - } data = { ".dockerconfigjson": b64encode(json.dumps(cred_payload).encode()).decode() } - body = client.V1Secret( - api_version="v1", - data=data, - kind="Secret", - metadata=metadata, - type=type, - ) - api_response = None - try: - api_response = v1.create_namespaced_secret(namespace, body) - except kubernetes.client.rest.ApiException as e: - if e.status == 409: - if ( - cred_payload["auths"] - and cred_payload["auths"]["{{secret.image_pull_secret_registry_uri}}"] - and cred_payload["auths"]["{{secret.image_pull_secret_registry_uri}}"]["username"] - and cred_payload["auths"]["{{secret.image_pull_secret_registry_uri}}"]["password"] - and cred_payload["auths"]["{{secret.image_pull_secret_registry_uri}}"]["email"] - ): - api_response = v1.replace_namespaced_secret(name, namespace, body) - else: - print(f"Missing value") - else: - raise e - - dsl.get_pipeline_conf().set_image_pull_secrets([client.V1LocalObjectReference(name=name)]) {% endfor %} - # TODO: need a way to configure and handle env vars for backends properly - env_vars = { - "AWS_ACCESS_KEY_ID": AWS_ACCESS_KEY_ID, - "AWS_SECRET_ACCESS_KEY": AWS_SECRET_ACCESS_KEY, - "MLFLOW_S3_ENDPOINT_URL": MLFLOW_S3_ENDPOINT_URL, - "MLFLOW_TRACKING_URI": MLFLOW_TRACKING_URI, - } - run_info = run_info_comp(run_id=kfp.dsl.RUN_ID_PLACEHOLDER) diff --git a/sameproject/ops/ocean/step.jinja b/sameproject/ops/ocean/step.jinja index 78e29e3b..dafce34a 100644 --- a/sameproject/ops/ocean/step.jinja +++ b/sameproject/ops/ocean/step.jinja @@ -4,15 +4,31 @@ import argparse as __argparse from multiprocessing import context import pathlib from typing import NamedTuple -from azureml.core import Run from pprint import pprint as __pp import os from pathlib import Path as __Path -from azureml.pipeline.core import ( - PipelineData as __PipelineData, - PipelineParameter as __PipelineParameter, -) import dill +import json +import logging +import matplotlib.pyplot as plt +import matplotlib.animation as animation +import matplotlib.image as mpimg +import numpy as np +import random +from pathlib import Path +import pickle +import sys +import time + +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.optim as optim +import torch.utils.data +import torchvision.datasets as dset +import torchvision.transforms as transforms +import torchvision.utils as vutils from base64 import ( urlsafe_b64encode as __urlsafe_b64encode, urlsafe_b64decode as __urlsafe_b64decode, From 5a9ad530e8d52260cb3359f5629d54fdf97c65fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Mon, 30 May 2022 19:16:18 +0200 Subject: [PATCH 11/99] WIP: removed unused template code --- sameproject/ops/ocean/deploy.py | 3 -- sameproject/ops/ocean/render.py | 7 --- sameproject/ops/ocean/step.jinja | 77 +------------------------------- 3 files changed, 1 insertion(+), 86 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index ceba42fc..c7782710 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -5,6 +5,3 @@ def deploy(base_path: str, root_name: str, config: SameConfig): return - # with helpers.add_path(str(base_path)): - # root_module = importlib.import_module(root_name) - # root_module.root() diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py index f5151d25..d2a570b5 100644 --- a/sameproject/ops/ocean/render.py +++ b/sameproject/ops/ocean/render.py @@ -106,13 +106,6 @@ def _build_root_file(env: Environment, all_steps: list, same_config: dict) -> st if same_config.get("ocean"): root_contract["ocean_workspace_credentials"] = { - # "AML_SP_PASSWORD_VALUE": same_config.aml.AML_SP_PASSWORD_VALUE, - # "AML_SP_TENANT_ID": same_config.aml.AML_SP_TENANT_ID, - # "AML_SP_APP_ID": same_config.aml.AML_SP_APP_ID, - # "WORKSPACE_SUBSCRIPTION_ID": same_config.aml.WORKSPACE_SUBSCRIPTION_ID, - # "WORKSPACE_RESOURCE_GROUP": same_config.aml.WORKSPACE_RESOURCE_GROUP, - # "WORKSPACE_NAME": same_config.aml.WORKSPACE_NAME, - # "AML_COMPUTE_NAME": same_config.aml.AML_COMPUTE_NAME, } # Until we get smarter, we're just going to combine inject EVERY package into every step. diff --git a/sameproject/ops/ocean/step.jinja b/sameproject/ops/ocean/step.jinja index dafce34a..0feb9f91 100644 --- a/sameproject/ops/ocean/step.jinja +++ b/sameproject/ops/ocean/step.jinja @@ -17,6 +17,7 @@ import numpy as np import random from pathlib import Path import pickle +from azureml.core import Run import sys import time @@ -35,44 +36,6 @@ from base64 import ( ) def main({{ parameter_string }}) -> NamedTuple('FuncOutput',[('context', str),]): - import dill - import base64 - from base64 import urlsafe_b64encode, urlsafe_b64decode - from copy import copy as __copy - from types import ModuleType as __ModuleType - from pprint import pprint as __pp - import datetime as __datetime - import requests - - __run_info_dict = dill.loads(urlsafe_b64decode(__run_info)) - __base64_decode = urlsafe_b64decode(__context) - __context_import_dict = dill.loads(__base64_decode) - - __variables_to_mount = {} - __loc = {} - - for __k in __context_import_dict: - __variables_to_mount[__k] = dill.loads(__context_import_dict[__k]) - - __json_data = { - "experiment_id": __run_info_dict["experiment_id"], - "run_id": __run_info_dict["run_id"], - "step_id": "{{ name }}", - "metadata_type": "input", - "metadata_value": __context, - "metadata_time": __datetime.datetime.now().isoformat(), - } - - print(f"Metadata url: {__metadata_url}") - if __metadata_url != '': - print("Found metadata URL - executing.") - __pp(__json_data) - try: - __r = requests.post(__metadata_url, json=__json_data,) - __r.raise_for_status() - except requests.exceptions.HTTPError as __err: - print(f"Error: {__err}") - __inner_code_to_execute = """ import dill import base64 @@ -81,47 +44,9 @@ from types import ModuleType as __ModuleType {{ inner_code | replace("\\", "\\\\") | replace("\"", "\\\"") }} -__locals_keys = frozenset(locals().keys()) -__globals_keys = frozenset(globals().keys()) -__context_export = {} - -for val in __globals_keys: - if not val.startswith("_") and not isinstance(val, __ModuleType): - __context_export[val] = dill.dumps(globals()[val]) - -# Locals needs to come after globals in case we made changes -for val in __locals_keys: - if not val.startswith("_") and not isinstance(val, __ModuleType): - __context_export[val] = dill.dumps(locals()[val]) - __b64_string = str(urlsafe_b64encode(dill.dumps(__context_export)), encoding="ascii") """ - exec(__inner_code_to_execute, __variables_to_mount, __loc) - - __json_output_data = { - "experiment_id": __run_info_dict["experiment_id"], - "run_id": __run_info_dict["run_id"], - "step_id": "%v", - "metadata_type": "output", - "metadata_value": __loc["__b64_string"], - "metadata_time": __datetime.datetime.now().isoformat(), - } - - print(f"Metadata url: {__metadata_url}") - if __metadata_url != '': - print("Found metadata URL - executing.") - __pp(__json_data) - try: - __r = requests.post(__metadata_url, json=__json_output_data,) - __r.raise_for_status() - except requests.exceptions.HTTPError as err: - print(f"Error: {err}") - - from collections import namedtuple - output = namedtuple("FuncOutput", ["context"]) - return output(__loc["__b64_string"]) - if __name__ == "__main__": __run = Run.get_context() From ba014a4298ed14362caf77dc51b1e97ba96d417b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Tue, 31 May 2022 16:50:27 +0200 Subject: [PATCH 12/99] Added nbconvert python template --- sameproject/ops/python_ocean/README.md | 5 +++ sameproject/ops/python_ocean/conf.json | 6 ++++ sameproject/ops/python_ocean/index.py.j2 | 41 ++++++++++++++++++++++++ 3 files changed, 52 insertions(+) create mode 100644 sameproject/ops/python_ocean/README.md create mode 100644 sameproject/ops/python_ocean/conf.json create mode 100644 sameproject/ops/python_ocean/index.py.j2 diff --git a/sameproject/ops/python_ocean/README.md b/sameproject/ops/python_ocean/README.md new file mode 100644 index 00000000..864d13b0 --- /dev/null +++ b/sameproject/ops/python_ocean/README.md @@ -0,0 +1,5 @@ +## Jinja template for Ocean C2D + +> Note: work-in-progress + +A template for easily converting jupyter notebooks to Ocean Protocol C2D script. Use tags to specify different parts of your jupyter notebooks. diff --git a/sameproject/ops/python_ocean/conf.json b/sameproject/ops/python_ocean/conf.json new file mode 100644 index 00000000..5aeb837f --- /dev/null +++ b/sameproject/ops/python_ocean/conf.json @@ -0,0 +1,6 @@ +{ + "base_template": "base", + "mimetypes": { + "text/x-python": true + } +} diff --git a/sameproject/ops/python_ocean/index.py.j2 b/sameproject/ops/python_ocean/index.py.j2 new file mode 100644 index 00000000..708d059a --- /dev/null +++ b/sameproject/ops/python_ocean/index.py.j2 @@ -0,0 +1,41 @@ +{%- extends 'null.j2' -%} +import os + +{%- block header -%} +#!/usr/bin/env python +# coding: utf-8 +{% endblock header %} + +{% block in_prompt %} +{% if resources.global_content_filter.include_input_prompt -%} + # In[{{ cell.execution_count if cell.execution_count else ' ' }}]: +{% endif %} +{% endblock in_prompt %} + +{% block input %} +{{ cell.source | ipython2python }} +{% if 'input' in cell['metadata'].get('tags', []): -%} +print(cell['metadata'].get('tags', [])) +dids = os.getenv('DIDS', None) + +if not dids: + assert 1=0, "No DIDs found in environment. Aborting." + +dids = json.loads(dids) + +cwd = os.getcwd() +print('cwd', cwd) + + +for did in dids: + print('ls', f'/data/inputs/{did}/0') + print('ls2', os.listdir(f'/data/inputs/')) + filename = Path(f'/data/inputs/{did}/0') # 0 for metadata service + print(f"Reading asset file {filename}.") + +{% endif %} +{% endblock input %} + +{% block markdowncell scoped %} +{{ cell.source | comment_lines }} +{% endblock markdowncell %} From 7b2216092c53c5ccbe498f3ff7d702a6b1d9eec4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 1 Jun 2022 10:09:05 +0200 Subject: [PATCH 13/99] WIP: double tag template --- sameproject/ops/python_ocean/index.py.j2 | 43 ++++++++++++++++-------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/sameproject/ops/python_ocean/index.py.j2 b/sameproject/ops/python_ocean/index.py.j2 index 708d059a..cc9a2b99 100644 --- a/sameproject/ops/python_ocean/index.py.j2 +++ b/sameproject/ops/python_ocean/index.py.j2 @@ -13,25 +13,40 @@ import os {% endblock in_prompt %} {% block input %} -{{ cell.source | ipython2python }} {% if 'input' in cell['metadata'].get('tags', []): -%} -print(cell['metadata'].get('tags', [])) -dids = os.getenv('DIDS', None) +def get_input(local=False): + if local: + {{ cell.source | ipython2python }} + return filename + print(cell['metadata'].get('tags', [])) + dids = os.getenv('DIDS', None) + + if not dids: + print("No DIDs found in environment. Aborting.") + return + + dids = json.loads(dids) -if not dids: - assert 1=0, "No DIDs found in environment. Aborting." + cwd = os.getcwd() + print('cwd', cwd) + -dids = json.loads(dids) + for did in dids: + print('ls', f'/data/inputs/{did}/0') + print('ls2', os.listdir(f'/data/inputs/')) + filename = Path(f'/data/inputs/{did}/0') # 0 for metadata service + print(f"Reading asset file {filename}.") + return filename -cwd = os.getcwd() -print('cwd', cwd) - +{% elif 'train' in cell['metadata'].get('tags', []): -%} +def run_model(local=False): -for did in dids: - print('ls', f'/data/inputs/{did}/0') - print('ls2', os.listdir(f'/data/inputs/')) - filename = Path(f'/data/inputs/{did}/0') # 0 for metadata service - print(f"Reading asset file {filename}.") + t0 = time.time() + + {{ cell.source | ipython2python }} + +{% else -%} +{{ cell.source | ipython2python }} {% endif %} {% endblock input %} From d4e0e03602ccd9e6ecc29cdd2b2ac9efc1f191aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 1 Jun 2022 10:26:22 +0200 Subject: [PATCH 14/99] Correct code indentation --- sameproject/ops/python_ocean/index.py.j2 | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sameproject/ops/python_ocean/index.py.j2 b/sameproject/ops/python_ocean/index.py.j2 index cc9a2b99..da0329f8 100644 --- a/sameproject/ops/python_ocean/index.py.j2 +++ b/sameproject/ops/python_ocean/index.py.j2 @@ -16,7 +16,10 @@ import os {% if 'input' in cell['metadata'].get('tags', []): -%} def get_input(local=False): if local: + {% filter indent(8) %} {{ cell.source | ipython2python }} + {% endfilter %} + return filename print(cell['metadata'].get('tags', [])) dids = os.getenv('DIDS', None) @@ -42,14 +45,17 @@ def get_input(local=False): def run_model(local=False): t0 = time.time() - + {% filter indent(4) %} {{ cell.source | ipython2python }} + {% endfilter %} {% else -%} {{ cell.source | ipython2python }} {% endif %} {% endblock input %} +if __name__ == "__main__": + run_model() {% block markdowncell scoped %} {{ cell.source | comment_lines }} From 726c73ea4983f1cb3141e55d7167cfaf41d50760 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 1 Jun 2022 10:29:02 +0200 Subject: [PATCH 15/99] Added dcgan example --- sameproject/ops/python_ocean/dcgan.ipynb | 463 +++++++++++++++++++++++ sameproject/ops/python_ocean/dcgan.py | 418 ++++++++++++++++++++ 2 files changed, 881 insertions(+) create mode 100644 sameproject/ops/python_ocean/dcgan.ipynb create mode 100644 sameproject/ops/python_ocean/dcgan.py diff --git a/sameproject/ops/python_ocean/dcgan.ipynb b/sameproject/ops/python_ocean/dcgan.ipynb new file mode 100644 index 00000000..84de87f1 --- /dev/null +++ b/sameproject/ops/python_ocean/dcgan.ipynb @@ -0,0 +1,463 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4a889dd1-edf1-4807-b699-8862aa5dbc01", + "metadata": {}, + "source": [ + "# SAME DCGAN" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e3f9e0b5-b612-416f-b585-5f5e33195174", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'matplotlib'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/xj/yxwtvrv95n77ycc9hpngfr700000gn/T/ipykernel_1222/3860654574.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mlogging\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0manimation\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0manimation\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mimage\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mmpimg\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'matplotlib'" + ] + } + ], + "source": [ + "# The code for the DCGAN generative model is taken from the official PyTorch docs https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html.\n", + "\n", + "import argparse\n", + "import json\n", + "import logging\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.animation as animation\n", + "import matplotlib.image as mpimg\n", + "import numpy as np\n", + "import os\n", + "import random\n", + "from pathlib import Path\n", + "import pickle\n", + "import sys\n", + "import time\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.parallel\n", + "import torch.backends.cudnn as cudnn\n", + "import torch.optim as optim\n", + "import torch.utils.data\n", + "import torchvision.datasets as dset\n", + "import torchvision.transforms as transforms\n", + "import torchvision.utils as vutils" + ] + }, + { + "cell_type": "markdown", + "id": "5a762015-0879-42fc-879d-3779fb4cf4d6", + "metadata": {}, + "source": [ + "## Fetching data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4c4791a-45d2-4158-bdd0-9e53ee07e1a0", + "metadata": { + "tags": [ + "input" + ] + }, + "outputs": [], + "source": [ + "#\n", + "print(\"Reading local punks directory.\")\n", + "\n", + "# Root directory for dataset\n", + "filename = Path('data/punks/tealpunks')\n", + "# filename = Path('data/punks-sample')\n", + "# filename = Path('data/celeba')" + ] + }, + { + "cell_type": "markdown", + "id": "574e7016-8221-4283-ac18-da1137b5e02f", + "metadata": {}, + "source": [ + "# Train & test model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c891692-9a3c-4981-8f26-9d086e215055", + "metadata": { + "tags": [ + "train" + ] + }, + "outputs": [], + "source": [ + "#\n", + "filename = get_input(local)\n", + "if not filename:\n", + " print(\"Could not retrieve filename.\")\n", + " return\n", + "\n", + "from PIL import Image\n", + "with open(filename) as datafile:\n", + " print(type(datafile))\n", + " print(datafile)\n", + " datafile.seek(0)\n", + " img = Image.open(datafile)\n", + " print('@@@', img)\n", + "\n", + "\n", + "teal_images = sorted(list(filename.glob('*')))\n", + "\n", + "print(teal_images)\n", + "\n", + "results_dir = Path('results')\n", + "\n", + "if not results_dir.exists():\n", + " results_dir.mkdir()\n", + "\n", + "if local:\n", + " img0 = mpimg.imread(teal_images[0])\n", + " img1 = mpimg.imread(teal_images[1])\n", + " fig, ax = plt.subplots(1,2)\n", + " ax[0].imshow(img0)\n", + " ax[1].imshow(img1)\n", + " [a.axis('off') for a in ax]\n", + " plt.savefig(results_dir / \"sample.png\")\n", + "\n", + "# Set random seed for reproducibility\n", + "manualSeed = 999\n", + "#manualSeed = random.randint(1, 10000) # use if you want new results\n", + "print(\"Random Seed: \", manualSeed)\n", + "random.seed(manualSeed)\n", + "torch.manual_seed(manualSeed)\n", + "\n", + "# Training parameters\n", + "workers = 2\n", + "batch_size = 128\n", + "image_size = 64\n", + "nc = 3\n", + "nz = 100\n", + "ngf = 64\n", + "ndf = 64\n", + "num_epochs = 5\n", + "lr = 0.0002\n", + "beta1 = 0.5\n", + "ngpu = 1\n", + "\n", + "# We can use an image folder dataset the way we have it setup.\n", + "# Create the dataset\n", + "dataset = dset.ImageFolder(root=filename.parent,\n", + " transform=transforms.Compose([\n", + " transforms.Resize(image_size),\n", + " transforms.CenterCrop(image_size),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),\n", + " ]))\n", + "# Create the dataloader\n", + "dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,\n", + " shuffle=True, num_workers=workers)\n", + "\n", + "# Decide which device we want to run on\n", + "device = torch.device(\"cuda:0\" if (torch.cuda.is_available() and ngpu > 0) else \"cpu\")\n", + "\n", + "# custom weights initialization called on netG and netD\n", + "def weights_init(m):\n", + " classname = m.__class__.__name__\n", + " if classname.find('Conv') != -1:\n", + " nn.init.normal_(m.weight.data, 0.0, 0.02)\n", + " elif classname.find('BatchNorm') != -1:\n", + " nn.init.normal_(m.weight.data, 1.0, 0.02)\n", + " nn.init.constant_(m.bias.data, 0)\n", + "\n", + "# Generator Code\n", + "class Generator(nn.Module):\n", + " def __init__(self, ngpu):\n", + " super(Generator, self).__init__()\n", + " self.ngpu = ngpu\n", + " self.main = nn.Sequential(\n", + " # input is Z, going into a convolution\n", + " nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),\n", + " nn.BatchNorm2d(ngf * 8),\n", + " nn.ReLU(True),\n", + " # state size. (ngf*8) x 4 x 4\n", + " nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),\n", + " nn.BatchNorm2d(ngf * 4),\n", + " nn.ReLU(True),\n", + " # state size. (ngf*4) x 8 x 8\n", + " nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),\n", + " nn.BatchNorm2d(ngf * 2),\n", + " nn.ReLU(True),\n", + " # state size. (ngf*2) x 16 x 16\n", + " nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),\n", + " nn.BatchNorm2d(ngf),\n", + " nn.ReLU(True),\n", + " # state size. (ngf) x 32 x 32\n", + " nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),\n", + " nn.Tanh()\n", + " # state size. (nc) x 64 x 64\n", + " )\n", + "\n", + " def forward(self, input):\n", + " return self.main(input)\n", + "\n", + "# Create the generator\n", + "netG = Generator(ngpu).to(device)\n", + "\n", + "# Handle multi-gpu if desired\n", + "if (device.type == 'cuda') and (ngpu > 1):\n", + " netG = nn.DataParallel(netG, list(range(ngpu)))\n", + "\n", + "# Apply the weights_init function to randomly initialize all weights\n", + "# to mean=0, stdev=0.02.\n", + "netG.apply(weights_init)\n", + "\n", + "# Print the model\n", + "print(netG)\n", + "\n", + "class Discriminator(nn.Module):\n", + " def __init__(self, ngpu):\n", + " super(Discriminator, self).__init__()\n", + " self.ngpu = ngpu\n", + " self.main = nn.Sequential(\n", + " # input is (nc) x 64 x 64\n", + " nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),\n", + " nn.LeakyReLU(0.2, inplace=True),\n", + " # state size. (ndf) x 32 x 32\n", + " nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),\n", + " nn.BatchNorm2d(ndf * 2),\n", + " nn.LeakyReLU(0.2, inplace=True),\n", + " # state size. (ndf*2) x 16 x 16\n", + " nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),\n", + " nn.BatchNorm2d(ndf * 4),\n", + " nn.LeakyReLU(0.2, inplace=True),\n", + " # state size. (ndf*4) x 8 x 8\n", + " nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),\n", + " nn.BatchNorm2d(ndf * 8),\n", + " nn.LeakyReLU(0.2, inplace=True),\n", + " # state size. (ndf*8) x 4 x 4\n", + " nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),\n", + " nn.Sigmoid()\n", + " )\n", + "\n", + " def forward(self, input):\n", + " return self.main(input)\n", + "\n", + " # Create the Discriminator\n", + "netD = Discriminator(ngpu).to(device)\n", + "\n", + " # Handle multi-gpu if desired\n", + "if (device.type == 'cuda') and (ngpu > 1):\n", + " netD = nn.DataParallel(netD, list(range(ngpu)))\n", + "\n", + " # Apply the weights_init function to randomly initialize all weights\n", + " # to mean=0, stdev=0.2.\n", + "netD.apply(weights_init)\n", + "\n", + " # Print the model\n", + "print(netD)\n", + "\n", + " # Initialize BCELoss function\n", + "criterion = nn.BCELoss()\n", + "\n", + " # Create batch of latent vectors that we will use to visualize\n", + " # the progression of the generator\n", + "fixed_noise = torch.randn(64, nz, 1, 1, device=device)\n", + "\n", + " # Establish convention for real and fake labels during training\n", + "real_label = 1.\n", + "fake_label = 0.\n", + "\n", + " # Setup Adam optimizers for both G and D\n", + "optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))\n", + "optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))\n", + "\n", + " # Training Loop\n", + "\n", + " # Lists to keep track of progress\n", + "img_list = []\n", + "G_losses = []\n", + "D_losses = []\n", + "iters = 0\n", + "\n", + "print(\"Starting Training Loop...\")\n", + " # For each epoch\n", + "for epoch in range(num_epochs):\n", + " # For each batch in the dataloader\n", + " for i, data in enumerate(dataloader, 0):\n", + "\n", + " ############################\n", + " # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))\n", + " ###########################\n", + " ## Train with all-real batch\n", + " netD.zero_grad()\n", + " # Format batch\n", + " real_cpu = data[0].to(device)\n", + " b_size = real_cpu.size(0)\n", + " label = torch.full((b_size,), real_label, dtype=torch.float, device=device)\n", + " # Forward pass real batch through D\n", + " output = netD(real_cpu).view(-1)\n", + " # Calculate loss on all-real batch\n", + " errD_real = criterion(output, label)\n", + " # Calculate gradients for D in backward pass\n", + " errD_real.backward()\n", + " D_x = output.mean().item()\n", + "\n", + " ## Train with all-fake batch\n", + " # Generate batch of latent vectors\n", + " noise = torch.randn(b_size, nz, 1, 1, device=device)\n", + " # Generate fake image batch with G\n", + " fake = netG(noise)\n", + " label.fill_(fake_label)\n", + " # Classify all fake batch with D\n", + " output = netD(fake.detach()).view(-1)\n", + " # Calculate D's loss on the all-fake batch\n", + " errD_fake = criterion(output, label)\n", + " # Calculate the gradients for this batch, accumulated (summed) with previous gradients\n", + " errD_fake.backward()\n", + " D_G_z1 = output.mean().item()\n", + " # Compute error of D as sum over the fake and the real batches\n", + " errD = errD_real + errD_fake\n", + " # Update D\n", + " optimizerD.step()\n", + "\n", + " ############################\n", + " # (2) Update G network: maximize log(D(G(z)))\n", + " ###########################\n", + " netG.zero_grad()\n", + " label.fill_(real_label) # fake labels are real for generator cost\n", + " # Since we just updated D, perform another forward pass of all-fake batch through D\n", + " output = netD(fake).view(-1)\n", + " # Calculate G's loss based on this output\n", + " errG = criterion(output, label)\n", + " # Calculate gradients for G\n", + " errG.backward()\n", + " D_G_z2 = output.mean().item()\n", + " # Update G\n", + " optimizerG.step()\n", + "\n", + " # Output training stats\n", + " if i % 50 == 0:\n", + " print('[%d/%d][%d/%d]\\tLoss_D: %.4f\\tLoss_G: %.4f\\tD(x): %.4f\\tD(G(z)): %.4f / %.4f'\n", + " % (epoch, num_epochs, i, len(dataloader),\n", + " errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))\n", + "\n", + " # Save Losses for plotting later\n", + " G_losses.append(errG.item())\n", + " D_losses.append(errD.item())\n", + "\n", + " # Check how the generator is doing by saving G's output on fixed_noise\n", + " if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):\n", + " with torch.no_grad():\n", + " fake = netG(fixed_noise).detach().cpu()\n", + " img_list.append(vutils.make_grid(fake, padding=2, normalize=True))\n", + "\n", + " iters += 1\n", + "\n", + "\n", + "filename = results_dir / 'punks.pickle' if local else \"/data/outputs/result\"\n", + "with open(filename, 'wb') as pickle_file:\n", + " print(f\"Pickling results in {filename}\")\n", + " pickle.dump(img_list[-1], pickle_file)\n", + "\n", + "t1 = time.time()\n", + "total = t1-t0\n", + "\n", + "print('Time: ', total)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c5b6c6d-e9c0-4050-9315-e7dc0044b207", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54ba5c7b-e934-421c-8ad6-55c8d7405bcc", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc81f87d-ea83-4723-abaa-d6f03e9a88e0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "256823c5-9442-42ec-8a1b-eccfbfb21afb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a7cc023-aed3-4dd2-a7d2-454f05e94cd7", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad964626-62f6-45e9-bcdd-cb037ee75d13", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbcc7e3e-886e-4e62-84ff-b8b549c9cf95", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "celltoolbar": "Tags", + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sameproject/ops/python_ocean/dcgan.py b/sameproject/ops/python_ocean/dcgan.py new file mode 100644 index 00000000..701699a5 --- /dev/null +++ b/sameproject/ops/python_ocean/dcgan.py @@ -0,0 +1,418 @@ +#!/usr/bin/env python +# coding: utf-8 + +# # SAME DCGAN + +# In[1]: + + +# The code for the DCGAN generative model is taken from the official PyTorch docs https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html. + +import argparse +import json +import logging +import matplotlib.pyplot as plt +import matplotlib.animation as animation +import matplotlib.image as mpimg +import numpy as np +import os +import random +from pathlib import Path +import pickle +import sys +import time + +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.optim as optim +import torch.utils.data +import torchvision.datasets as dset +import torchvision.transforms as transforms +import torchvision.utils as vutils + + + +# ## Fetching data + +# In[ ]: + + +def get_input(local=False): + if local: + + # + print("Reading local punks directory.") + + # Root directory for dataset + filename = Path('data/punks/tealpunks') + # filename = Path('data/punks-sample') + # filename = Path('data/celeba') + + + + return filename + print(cell['metadata'].get('tags', [])) + dids = os.getenv('DIDS', None) + + if not dids: + print("No DIDs found in environment. Aborting.") + return + + dids = json.loads(dids) + + cwd = os.getcwd() + print('cwd', cwd) + + + for did in dids: + print('ls', f'/data/inputs/{did}/0') + print('ls2', os.listdir(f'/data/inputs/')) + filename = Path(f'/data/inputs/{did}/0') # 0 for metadata service + print(f"Reading asset file {filename}.") + return filename + + + +# # Train & test model + +# In[ ]: + + +def run_model(local=False): + + t0 = time.time() + + # + filename = get_input(local) + if not filename: + print("Could not retrieve filename.") + return + + from PIL import Image + with open(filename) as datafile: + print(type(datafile)) + print(datafile) + datafile.seek(0) + img = Image.open(datafile) + print('@@@', img) + + + teal_images = sorted(list(filename.glob('*'))) + + print(teal_images) + + results_dir = Path('results') + + if not results_dir.exists(): + results_dir.mkdir() + + if local: + img0 = mpimg.imread(teal_images[0]) + img1 = mpimg.imread(teal_images[1]) + fig, ax = plt.subplots(1,2) + ax[0].imshow(img0) + ax[1].imshow(img1) + [a.axis('off') for a in ax] + plt.savefig(results_dir / "sample.png") + + # Set random seed for reproducibility + manualSeed = 999 + #manualSeed = random.randint(1, 10000) # use if you want new results + print("Random Seed: ", manualSeed) + random.seed(manualSeed) + torch.manual_seed(manualSeed) + + # Training parameters + workers = 2 + batch_size = 128 + image_size = 64 + nc = 3 + nz = 100 + ngf = 64 + ndf = 64 + num_epochs = 5 + lr = 0.0002 + beta1 = 0.5 + ngpu = 1 + + # We can use an image folder dataset the way we have it setup. + # Create the dataset + dataset = dset.ImageFolder(root=filename.parent, + transform=transforms.Compose([ + transforms.Resize(image_size), + transforms.CenterCrop(image_size), + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), + ])) + # Create the dataloader + dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, + shuffle=True, num_workers=workers) + + # Decide which device we want to run on + device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu") + + # custom weights initialization called on netG and netD + def weights_init(m): + classname = m.__class__.__name__ + if classname.find('Conv') != -1: + nn.init.normal_(m.weight.data, 0.0, 0.02) + elif classname.find('BatchNorm') != -1: + nn.init.normal_(m.weight.data, 1.0, 0.02) + nn.init.constant_(m.bias.data, 0) + + # Generator Code + class Generator(nn.Module): + def __init__(self, ngpu): + super(Generator, self).__init__() + self.ngpu = ngpu + self.main = nn.Sequential( + # input is Z, going into a convolution + nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False), + nn.BatchNorm2d(ngf * 8), + nn.ReLU(True), + # state size. (ngf*8) x 4 x 4 + nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), + nn.BatchNorm2d(ngf * 4), + nn.ReLU(True), + # state size. (ngf*4) x 8 x 8 + nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False), + nn.BatchNorm2d(ngf * 2), + nn.ReLU(True), + # state size. (ngf*2) x 16 x 16 + nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False), + nn.BatchNorm2d(ngf), + nn.ReLU(True), + # state size. (ngf) x 32 x 32 + nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False), + nn.Tanh() + # state size. (nc) x 64 x 64 + ) + + def forward(self, input): + return self.main(input) + + # Create the generator + netG = Generator(ngpu).to(device) + + # Handle multi-gpu if desired + if (device.type == 'cuda') and (ngpu > 1): + netG = nn.DataParallel(netG, list(range(ngpu))) + + # Apply the weights_init function to randomly initialize all weights + # to mean=0, stdev=0.02. + netG.apply(weights_init) + + # Print the model + print(netG) + + class Discriminator(nn.Module): + def __init__(self, ngpu): + super(Discriminator, self).__init__() + self.ngpu = ngpu + self.main = nn.Sequential( + # input is (nc) x 64 x 64 + nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), + nn.LeakyReLU(0.2, inplace=True), + # state size. (ndf) x 32 x 32 + nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), + nn.BatchNorm2d(ndf * 2), + nn.LeakyReLU(0.2, inplace=True), + # state size. (ndf*2) x 16 x 16 + nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), + nn.BatchNorm2d(ndf * 4), + nn.LeakyReLU(0.2, inplace=True), + # state size. (ndf*4) x 8 x 8 + nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), + nn.BatchNorm2d(ndf * 8), + nn.LeakyReLU(0.2, inplace=True), + # state size. (ndf*8) x 4 x 4 + nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False), + nn.Sigmoid() + ) + + def forward(self, input): + return self.main(input) + + # Create the Discriminator + netD = Discriminator(ngpu).to(device) + + # Handle multi-gpu if desired + if (device.type == 'cuda') and (ngpu > 1): + netD = nn.DataParallel(netD, list(range(ngpu))) + + # Apply the weights_init function to randomly initialize all weights + # to mean=0, stdev=0.2. + netD.apply(weights_init) + + # Print the model + print(netD) + + # Initialize BCELoss function + criterion = nn.BCELoss() + + # Create batch of latent vectors that we will use to visualize + # the progression of the generator + fixed_noise = torch.randn(64, nz, 1, 1, device=device) + + # Establish convention for real and fake labels during training + real_label = 1. + fake_label = 0. + + # Setup Adam optimizers for both G and D + optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999)) + optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999)) + + # Training Loop + + # Lists to keep track of progress + img_list = [] + G_losses = [] + D_losses = [] + iters = 0 + + print("Starting Training Loop...") + # For each epoch + for epoch in range(num_epochs): + # For each batch in the dataloader + for i, data in enumerate(dataloader, 0): + + ############################ + # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) + ########################### + ## Train with all-real batch + netD.zero_grad() + # Format batch + real_cpu = data[0].to(device) + b_size = real_cpu.size(0) + label = torch.full((b_size,), real_label, dtype=torch.float, device=device) + # Forward pass real batch through D + output = netD(real_cpu).view(-1) + # Calculate loss on all-real batch + errD_real = criterion(output, label) + # Calculate gradients for D in backward pass + errD_real.backward() + D_x = output.mean().item() + + ## Train with all-fake batch + # Generate batch of latent vectors + noise = torch.randn(b_size, nz, 1, 1, device=device) + # Generate fake image batch with G + fake = netG(noise) + label.fill_(fake_label) + # Classify all fake batch with D + output = netD(fake.detach()).view(-1) + # Calculate D's loss on the all-fake batch + errD_fake = criterion(output, label) + # Calculate the gradients for this batch, accumulated (summed) with previous gradients + errD_fake.backward() + D_G_z1 = output.mean().item() + # Compute error of D as sum over the fake and the real batches + errD = errD_real + errD_fake + # Update D + optimizerD.step() + + ############################ + # (2) Update G network: maximize log(D(G(z))) + ########################### + netG.zero_grad() + label.fill_(real_label) # fake labels are real for generator cost + # Since we just updated D, perform another forward pass of all-fake batch through D + output = netD(fake).view(-1) + # Calculate G's loss based on this output + errG = criterion(output, label) + # Calculate gradients for G + errG.backward() + D_G_z2 = output.mean().item() + # Update G + optimizerG.step() + + # Output training stats + if i % 50 == 0: + print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f' + % (epoch, num_epochs, i, len(dataloader), + errD.item(), errG.item(), D_x, D_G_z1, D_G_z2)) + + # Save Losses for plotting later + G_losses.append(errG.item()) + D_losses.append(errD.item()) + + # Check how the generator is doing by saving G's output on fixed_noise + if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)): + with torch.no_grad(): + fake = netG(fixed_noise).detach().cpu() + img_list.append(vutils.make_grid(fake, padding=2, normalize=True)) + + iters += 1 + + + filename = results_dir / 'punks.pickle' if local else "/data/outputs/result" + with open(filename, 'wb') as pickle_file: + print(f"Pickling results in {filename}") + pickle.dump(img_list[-1], pickle_file) + + t1 = time.time() + total = t1-t0 + + print('Time: ', total) + + + + + +# In[ ]: + + + + + + + +# In[ ]: + + + + + + + +# In[ ]: + + + + + + + +# In[ ]: + + + + + + + +# In[ ]: + + + + + + + +# In[ ]: + + + + + + + +# In[ ]: + + + + + + From 8f71439d280884d9060e4d44e41c3775e9cc8640 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 1 Jun 2022 09:42:46 +0100 Subject: [PATCH 16/99] Update README.md --- sameproject/ops/python_ocean/README.md | 35 +++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/sameproject/ops/python_ocean/README.md b/sameproject/ops/python_ocean/README.md index 864d13b0..d2c32570 100644 --- a/sameproject/ops/python_ocean/README.md +++ b/sameproject/ops/python_ocean/README.md @@ -1,5 +1,38 @@ -## Jinja template for Ocean C2D +# Jinja template for Ocean C2D > Note: work-in-progress A template for easily converting jupyter notebooks to Ocean Protocol C2D script. Use tags to specify different parts of your jupyter notebooks. + +## 🏗 Initial Setup + +### Set up environment +``` +#clone repo +git clone https://github.com/AlgoveraAI/same-project.git +cd same-project + +#create a virtual environment +python3 -m venv venv + +#activate env +source venv/bin/activate + +#Install the dependencies +pip install -e . +pip install jupyter +``` + +### Guide to using the template +Open up a jupyter notebook. This is where you will do all your data analysis and model development. Make sure to write a comment at the start of each cell (this is necessary otherwise the template will generate incorrectly indented code). + +When you are done developing locally and want to publish to ocean, you will need to tag specific cells of your notebook to be read by the template. To do this click *View/Cell Toolbar/Tags*. +Tag the cells where you did your data preparation with "input" and the cells with your model and training loop "train". + +When you're ready execute the following command in your terminal: +``` +jupyter nbconvert path_to_notebook --to python --template=./sameproject/ops/python_ocean +``` + +Check that the generated script has no syntax errors. + From 26836ecda05eb3b32f33cca4aedb677b8e601eae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 1 Jun 2022 09:43:57 +0100 Subject: [PATCH 17/99] Update README.md --- sameproject/ops/python_ocean/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sameproject/ops/python_ocean/README.md b/sameproject/ops/python_ocean/README.md index d2c32570..89fc20e3 100644 --- a/sameproject/ops/python_ocean/README.md +++ b/sameproject/ops/python_ocean/README.md @@ -36,3 +36,4 @@ jupyter nbconvert path_to_notebook --to python --template=./sameproject/ops/pyth Check that the generated script has no syntax errors. +See the `dcgan.ipynb` and `dcgan.py` examples for more details. From f3489774b5a456c371b563c205fd0f1beb48ca84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 1 Jun 2022 09:45:13 +0100 Subject: [PATCH 18/99] Update README.md --- sameproject/ops/python_ocean/README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sameproject/ops/python_ocean/README.md b/sameproject/ops/python_ocean/README.md index 89fc20e3..b96d8479 100644 --- a/sameproject/ops/python_ocean/README.md +++ b/sameproject/ops/python_ocean/README.md @@ -36,4 +36,11 @@ jupyter nbconvert path_to_notebook --to python --template=./sameproject/ops/pyth Check that the generated script has no syntax errors. -See the `dcgan.ipynb` and `dcgan.py` examples for more details. +See `dcgan.ipynb` and `dcgan.py` for more details and reach out if you run into issues. + +## Algovera + +Algovera is a community of individuals working to facilitate and accelerate the development of decentralised AI products and research. + +[Website](https://www.algovera.ai/) | [Notion](https://algovera.notion.site/) | [Discord](https://discord.gg/e65RuHSDS5) | [Calendar](https://calendar.google.com/calendar/embed?src=c_4qajdfj4imie9cpnkbvkrc7ri4%40group.calendar.google.com) | [Twitter](https://twitter.com/AlgoveraAI) | [YouTube](https://www.youtube.com/channel/UC2A5iUpP6k52ZZmC8LFj1IA) | + From df3ee79913185a4ef59b6a3e1e9e195dc4c38afe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 2 Jun 2022 16:41:57 +0200 Subject: [PATCH 19/99] refactor to python_ocean --- sameproject/ops/backends.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sameproject/ops/backends.py b/sameproject/ops/backends.py index f58136c3..3be61b12 100644 --- a/sameproject/ops/backends.py +++ b/sameproject/ops/backends.py @@ -5,7 +5,7 @@ import sameproject.ops.functions as functions import sameproject.ops.kubeflow as kubeflow import sameproject.ops.aml as aml -import sameproject.ops.ocean as ocean +import sameproject.ops.python_ocean as python_ocean import sameproject.ops.helpers import tempfile import click @@ -16,7 +16,7 @@ def render(target: str, steps: list, config: SameConfig, compile_path: str = Non "aml": aml.render, "kubeflow": kubeflow.render, "functions": functions.render, - "ocean": ocean.render + "ocean": python_ocean.render } render_function = target_renderers.get(target, None) @@ -35,7 +35,7 @@ def deploy(target: str, base_path: Path, root_file: str, config: SameConfig): "aml": aml.deploy, "kubeflow": kubeflow.deploy, "functions": functions.deploy, - "ocean": ocean.deploy + "ocean": python_ocean.deploy } deploy_function = target_deployers.get(target, None) From 00e10069c69a7dd7cabc6bf2aba9cda71c6ce284 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 8 Jun 2022 10:24:59 +0200 Subject: [PATCH 20/99] WIP: different python_ocean templates --- sameproject/ops/ocean/render.py | 189 ------- sameproject/ops/ocean/root.jinja | 92 ---- sameproject/ops/ocean/step.jinja | 91 ---- .../ops/{ocean => python_ocean}/__init__.py | 0 sameproject/ops/python_ocean/conf.json | 2 +- sameproject/ops/python_ocean/dcgan.ipynb | 463 ------------------ sameproject/ops/python_ocean/dcgan.py | 418 ---------------- .../ops/{ocean => python_ocean}/deploy.py | 0 sameproject/ops/python_ocean/render.py | 28 ++ sameproject/ops/python_ocean/requirements.txt | 6 + sameproject/ops/python_ocean/root.jinja | 0 sameproject/ops/python_ocean/same.yaml | 14 + sameproject/vendor/conda | 2 +- vendor/conda | 2 +- 14 files changed, 51 insertions(+), 1256 deletions(-) delete mode 100644 sameproject/ops/ocean/render.py delete mode 100644 sameproject/ops/ocean/root.jinja delete mode 100644 sameproject/ops/ocean/step.jinja rename sameproject/ops/{ocean => python_ocean}/__init__.py (100%) delete mode 100644 sameproject/ops/python_ocean/dcgan.ipynb delete mode 100644 sameproject/ops/python_ocean/dcgan.py rename sameproject/ops/{ocean => python_ocean}/deploy.py (100%) create mode 100644 sameproject/ops/python_ocean/render.py create mode 100644 sameproject/ops/python_ocean/requirements.txt create mode 100644 sameproject/ops/python_ocean/root.jinja create mode 100644 sameproject/ops/python_ocean/same.yaml diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py deleted file mode 100644 index d2a570b5..00000000 --- a/sameproject/ops/ocean/render.py +++ /dev/null @@ -1,189 +0,0 @@ -from jinja2 import Environment, FileSystemLoader, select_autoescape -from sameproject.data.step import Step -from sameproject.ops import helpers -from typing import Tuple -from pathlib import Path -from uuid import uuid4 -import logging -import os - - -root_template = "root.jinja" -step_template = "step.jinja" - - -def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str]: - """Renders the notebook into a root file and a series of step files according to the target requirements. Returns an absolute path to the root file for deployment.""" - templateDir = os.path.dirname(os.path.abspath(__file__)) - templateLoader = FileSystemLoader(templateDir) - env = Environment(loader=templateLoader) - same_config["compile_path"] = compile_path - root_file_string = _build_root_file(env, steps, same_config) - - root_pipeline_name = f"root_pipeline_{uuid4().hex.lower()}" - root_path = Path(compile_path) / f"{root_pipeline_name}.py" - helpers.write_file(root_path, root_file_string) - - for step_name in steps: - # Need a unique name so that libraries don't conflict in sys.modules. This is MOSTLY a test issue, but could be the case generally. - step_file_string = _build_step_file(env, steps[step_name], steps[step_name].unique_name) - (Path(compile_path) / steps[step_name].unique_name).mkdir() - helpers.write_file(Path(compile_path) / steps[step_name].unique_name / f"{steps[step_name].unique_name}.py", step_file_string) - - return (compile_path, root_pipeline_name) - - -def _build_root_file(env: Environment, all_steps: list, same_config: dict) -> str: - template = env.get_template(root_template) - - root_contract = { - "root_parameters_as_string": "", - "comma_delim_list_of_packages_as_string": "", - "list_of_steps": [], - "comma_delim_list_of_step_names_as_str": "", - "secrets_to_create_as_dict": {}, - "experiment_name": "", - "experiment_name_safe": "", - "list_of_environments": {}, - "image_pull_secrets": {}, - "aml_workspace_credentials": {}, - "compile_dir": "", - } - - params_to_merge = [] - - # Do i need to check if run and run.parameters are required fields? - try: - run_parameters = same_config.run.parameters - except Exception: - run_parameters = {} - - for k in run_parameters: - # Is this necessary? Could we support complex datatypes as parameters? - # Probably - but we'll need to serialize to pass as a param and then deserialize in the template - if isinstance(run_parameters[k], (int, float, str)): - params_to_merge.append(f"{k}='{run_parameters[k]}'") - else: - logging.warning(f"We only support numeric, bool and strings as default parameters (no dicts or lists). We're setting the default value for '{k}' to ''.") - - root_contract["root_parameters_as_string"] = ", ".join(params_to_merge) - - root_contract["list_of_environments"]["default"] = {} - root_contract["list_of_environments"]["default"]["image_tag"] = "library/python:3.10-slim-buster" - root_contract["list_of_environments"]["default"]["private_registry"] = False - - for name in same_config.environments: - root_contract["list_of_environments"][name] = {} - root_contract["list_of_environments"][name]["image_tag"] = same_config.environments[name].image_tag - - # Need to convert to string here because yaml parsing automatically converts (so we need to normalize) - # to string, in case the user didn't write True/False in a compliant way (e.g. 'true' lowercase) - private_registry_bool = str(same_config.environments[name].get("private_registry", False)) - root_contract["list_of_environments"][name]["private_registry"] = private_registry_bool.lower() == "true" - root_contract["list_of_environments"][name]["secret_name"] = "" - - if root_contract["list_of_environments"][name]["private_registry"]: - - # This is starting to have quite a lot of code smell - root_contract requires a bit of massaging (instead of - # just passing through same_config to the jinja template nakedly) but i'm starting to dislike everything here. - if "credentials" in same_config.environments[name]: - # Someone COULD set this to be a 'private_registry' but did not set credentials. This may be ok! - # They could have already mounted the secret in the cluster, so we should let it go ahead. - # However, because jinja doesn't like it when we parse through a struct without anything being set (even empty) - # We're going to go ahead and set it up now, and populate it only if there are values - - # TODO: # same_config.environments[name].get("credentials", {}) <- would something like this work? - # It COULD autopopulate the entire dict, but not sure because if it's empty, then do all the fields - # get created? - these_credentials = {} - these_credentials["image_pull_secret_name"] = same_config.environments[name].credentials.get("image_pull_secret_name", "") - these_credentials["image_pull_secret_registry_uri"] = same_config.environments[name].credentials.get("image_pull_secret_registry_uri", "") - these_credentials["image_pull_secret_username"] = same_config.environments[name].credentials.get("image_pull_secret_username", "") - these_credentials["image_pull_secret_password"] = same_config.environments[name].credentials.get("image_pull_secret_password", "") - these_credentials["image_pull_secret_email"] = same_config.environments[name].credentials.get("image_pull_secret_email", "") - - root_contract["secrets_to_create_as_dict"][name] = these_credentials - - if same_config.get("ocean"): - root_contract["ocean_workspace_credentials"] = { - } - - # Until we get smarter, we're just going to combine inject EVERY package into every step. - # This is not IDEAL, but it's not as bad as it sounds because it'll allow systems to cache - # containers more readily, even between steps, and package downloads are pretty small. - # Using a dict so that we it'll remove dupes. - # Also, we should probably swap this out for conda_environment.yaml (somehow). - global_package_list = {} - for step in all_steps: - for package in all_steps[step].packages_to_install: - global_package_list[package] = "" - - if global_package_list: - # First merge all the packages together and delimit with ' and , - joined_string = "', '".join(list(global_package_list.keys())) - - # Then bound it with one more single quote on each side - root_contract["comma_delim_list_of_packages_as_string"] = f"'{joined_string}'" - - # If someone does something hinky, like name their steps out of alpha order, we're just not - # going to care, and parse them in the order they gave them to us. - previous_step_name = "" - for step_name in all_steps: - - step_content = all_steps[step_name] - env_name = step_content.environment_name - - step_to_append = {} - step_to_append["name"] = step_content.name - step_to_append["unique_name"] = step_content.unique_name - step_to_append["package_string"] = root_contract["comma_delim_list_of_packages_as_string"] - step_to_append["cache_value"] = step_content.cache_value - step_to_append["previous_step"] = previous_step_name - - if root_contract["list_of_environments"].get(env_name, None) is None: - error_message = f"'{env_name}'' was listed as an environment in the notebook, but no such environment is listed in your SAME configuration file." - logging.fatal(error_message) - raise ValueError(error_message) - - step_to_append["environment_name"] = env_name - step_to_append["image_tag"] = root_contract["list_of_environments"][env_name]["image_tag"] - step_to_append["private_registry"] = root_contract["list_of_environments"][env_name]["private_registry"] - step_to_append["secret_name"] = root_contract["list_of_environments"][env_name]["secret_name"] - - if previous_step_name != "": - step_to_append["previous_step_name"] = previous_step_name - root_contract["list_of_steps"].append(step_to_append) - - previous_step_name = step_content.unique_name - - # Text manipulation in jinja is pretty weak, we'll do both of these cleanings in python. - - # experiment_name is often displayed to the user, so try to keep it as close to the original as possible - root_contract["experiment_name"] = helpers.removeIllegalExperimentNameCharacters(same_config.metadata.name) - - # However, often there's a backup, internal only name that needs much stricter character restrictions - # We'll create that here. - root_contract["experiment_name_safe"] = helpers.lowerAlphaNumericOnly(same_config.metadata.name) - - # List manipulation is also pretty weak in jinja (plus I like views being very non-functional). We'll - # create the comma delim list of steps (which we need for DAG description) in python as well. - - # For AML, each "step" needs to have '_step' attached (this may be historical) - # and not necessary - look at it when we combine all these step rendering functions into one - root_contract["comma_delim_list_of_step_names_as_str"] = ", ".join([f"{all_steps[this_step_name].unique_name}_step" for this_step_name in all_steps]) - - root_contract["compile_path"] = same_config["compile_path"] - - return template.render(root_contract) - - -def _build_step_file(env: Environment, step: Step, step_name: str) -> str: - template = env.get_template(step_template) - - # Create a parameter_string for putting in each step function - # default is to be a serialized empty dict. We should probably - # handle this a different way (allowing custom params to be passed in) - # but haven't found this requirement from a customer yet. - parameter_string = '__context="gAR9lC4=", __run_info="gAR9lC4=", __metadata_url=""' - step_contract = {"name": step_name, "inner_code": step.code, "parameter_string": parameter_string} - return template.render(step_contract) diff --git a/sameproject/ops/ocean/root.jinja b/sameproject/ops/ocean/root.jinja deleted file mode 100644 index acca9f1f..00000000 --- a/sameproject/ops/ocean/root.jinja +++ /dev/null @@ -1,92 +0,0 @@ -{% autoescape off %} -from typing import NamedTuple -from base64 import b64encode -import json -import logging -import matplotlib.pyplot as plt -import matplotlib.animation as animation -import matplotlib.image as mpimg -import numpy as np -import random -from pathlib import Path -import pickle -import sys -import time -import kfp - -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.optim as optim -import torch.utils.data -import torchvision.datasets as dset -import torchvision.transforms as transforms -import torchvision.utils as vutils -from run_info import run_info_fn - -{% for step in list_of_steps %} -from {{ step.unique_name }} import {{ step.unique_name }}_fn -{% endfor %} - - -run_info_comp = kfp.components.create_component_from_func( - func=run_info_fn, - packages_to_install=[ - "dill==0.3.5.1", - "kfp==1.8.12", - ], -) - -{% for step in list_of_steps %} -{{ step.unique_name }}_comp = create_component_from_func( - func={{ step.unique_name }}_fn, - base_image="{{ step.image_tag }}", - packages_to_install=[ - "dill==0.3.5.1", - "pympler==1.0.1", - "requests==2.27.1", - {{ step.package_string }} # TODO: make this a loop - ], -) -{% endfor %} - -# TODO: support kubeflow-specific config like aws secrets, mlflow endpoints. -@dsl.pipeline(name="Compilation of pipelines",) -def root( - context='', metadata_url='', -): - # Generate secrets (if not already created) - secrets_by_env = {} -{% for env_name in secrets_to_create_as_dict %} -{% set secret = secrets_to_create_as_dict[env_name] %} - - data = { - ".dockerconfigjson": b64encode(json.dumps(cred_payload).encode()).decode() - } - -{% endfor %} - - run_info = run_info_comp(run_id=kfp.dsl.RUN_ID_PLACEHOLDER) - - -{% for step in list_of_steps %} - {{ step.unique_name }} = {{ step.unique_name }}_comp( -{% if step.previous_step_name %} - input_context={{ step.previous_step_name }}.outputs["output_context"], -{% else %} - input_context="", -{% endif %} - run_info=run_info.outputs["run_info"], - metadata_url=metadata_url - ) - -{% if step.previous_step_name %} - {{ step.unique_name }}.after({{ step.previous_step_name }}) -{% endif %} - {{ step.unique_name }}.execution_options.caching_strategy.max_cache_staleness = "P0D" - for k in env_vars: - {{ step.unique_name }}.add_env_variable(V1EnvVar(name=k, value=env_vars[k])) -{% endfor %} - -{% endautoescape %} diff --git a/sameproject/ops/ocean/step.jinja b/sameproject/ops/ocean/step.jinja deleted file mode 100644 index 0feb9f91..00000000 --- a/sameproject/ops/ocean/step.jinja +++ /dev/null @@ -1,91 +0,0 @@ -{% autoescape off %} - -import argparse as __argparse -from multiprocessing import context -import pathlib -from typing import NamedTuple -from pprint import pprint as __pp -import os -from pathlib import Path as __Path -import dill -import json -import logging -import matplotlib.pyplot as plt -import matplotlib.animation as animation -import matplotlib.image as mpimg -import numpy as np -import random -from pathlib import Path -import pickle -from azureml.core import Run -import sys -import time - -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.optim as optim -import torch.utils.data -import torchvision.datasets as dset -import torchvision.transforms as transforms -import torchvision.utils as vutils -from base64 import ( - urlsafe_b64encode as __urlsafe_b64encode, - urlsafe_b64decode as __urlsafe_b64decode, -) - -def main({{ parameter_string }}) -> NamedTuple('FuncOutput',[('context', str),]): - __inner_code_to_execute = """ -import dill -import base64 -from base64 import urlsafe_b64encode, urlsafe_b64decode -from types import ModuleType as __ModuleType - -{{ inner_code | replace("\\", "\\\\") | replace("\"", "\\\"") }} - -__b64_string = str(urlsafe_b64encode(dill.dumps(__context_export)), encoding="ascii") - -""" - -if __name__ == "__main__": - __run = Run.get_context() - __parser = __argparse.ArgumentParser("cleanse") - __parser.add_argument("--input_context", type=str, help="Context to run as string") - __parser.add_argument("--run_info", type=str, help="Run info") - __parser.add_argument("--output_context_path", type=str, help="Output context path") - __parser.add_argument("--metadata_url", type=str, help="Metadata URL") - - __args = __parser.parse_args() - - __input_context_string = "gAR9lC4=" - __context_filename = "context.txt" - if "__pipelinedata_context" in __args.input_context: - context_full_path = __Path(__args.input_context) / __context_filename - print(f"reading file: {context_full_path}") - __input_context_string = context_full_path.read_text() - elif __args.input_context and __args.input_context.strip(): - __input_context_string = __args.input_context.strip() - - # Need to unpack and do this here, because AML only gives - # us the run id inside the container. Unpacking and repacking so - # bulk of the code is unchanged. - __run_info_dict = dill.loads(__urlsafe_b64decode(__args.run_info)) - __run_info_dict["run_id"] = __run.get_details()["runId"] - - # Returns a tuple, where the zeroth index is the string - __output_context_tuple = main( - __context=__input_context_string, - __run_info=str( - __urlsafe_b64encode(dill.dumps(__run_info_dict)), encoding="ascii" - ), - __metadata_url=__args.metadata_url, - ) - - __p = __Path(__args.output_context_path) - __p.mkdir(parents=True, exist_ok=True) - __filepath = __p / __context_filename - with __filepath.open("w+") as __f: - __f.write(__output_context_tuple[0]) - -{% endautoescape %} \ No newline at end of file diff --git a/sameproject/ops/ocean/__init__.py b/sameproject/ops/python_ocean/__init__.py similarity index 100% rename from sameproject/ops/ocean/__init__.py rename to sameproject/ops/python_ocean/__init__.py diff --git a/sameproject/ops/python_ocean/conf.json b/sameproject/ops/python_ocean/conf.json index 5aeb837f..68a24825 100644 --- a/sameproject/ops/python_ocean/conf.json +++ b/sameproject/ops/python_ocean/conf.json @@ -1,5 +1,5 @@ { - "base_template": "base", + "base_template": "index.py.j2", "mimetypes": { "text/x-python": true } diff --git a/sameproject/ops/python_ocean/dcgan.ipynb b/sameproject/ops/python_ocean/dcgan.ipynb deleted file mode 100644 index 84de87f1..00000000 --- a/sameproject/ops/python_ocean/dcgan.ipynb +++ /dev/null @@ -1,463 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "4a889dd1-edf1-4807-b699-8862aa5dbc01", - "metadata": {}, - "source": [ - "# SAME DCGAN" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "e3f9e0b5-b612-416f-b585-5f5e33195174", - "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - }, - "tags": [] - }, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'matplotlib'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/var/folders/xj/yxwtvrv95n77ycc9hpngfr700000gn/T/ipykernel_1222/3860654574.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mlogging\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0manimation\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0manimation\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mimage\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mmpimg\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'matplotlib'" - ] - } - ], - "source": [ - "# The code for the DCGAN generative model is taken from the official PyTorch docs https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html.\n", - "\n", - "import argparse\n", - "import json\n", - "import logging\n", - "import matplotlib.pyplot as plt\n", - "import matplotlib.animation as animation\n", - "import matplotlib.image as mpimg\n", - "import numpy as np\n", - "import os\n", - "import random\n", - "from pathlib import Path\n", - "import pickle\n", - "import sys\n", - "import time\n", - "\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.parallel\n", - "import torch.backends.cudnn as cudnn\n", - "import torch.optim as optim\n", - "import torch.utils.data\n", - "import torchvision.datasets as dset\n", - "import torchvision.transforms as transforms\n", - "import torchvision.utils as vutils" - ] - }, - { - "cell_type": "markdown", - "id": "5a762015-0879-42fc-879d-3779fb4cf4d6", - "metadata": {}, - "source": [ - "## Fetching data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b4c4791a-45d2-4158-bdd0-9e53ee07e1a0", - "metadata": { - "tags": [ - "input" - ] - }, - "outputs": [], - "source": [ - "#\n", - "print(\"Reading local punks directory.\")\n", - "\n", - "# Root directory for dataset\n", - "filename = Path('data/punks/tealpunks')\n", - "# filename = Path('data/punks-sample')\n", - "# filename = Path('data/celeba')" - ] - }, - { - "cell_type": "markdown", - "id": "574e7016-8221-4283-ac18-da1137b5e02f", - "metadata": {}, - "source": [ - "# Train & test model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6c891692-9a3c-4981-8f26-9d086e215055", - "metadata": { - "tags": [ - "train" - ] - }, - "outputs": [], - "source": [ - "#\n", - "filename = get_input(local)\n", - "if not filename:\n", - " print(\"Could not retrieve filename.\")\n", - " return\n", - "\n", - "from PIL import Image\n", - "with open(filename) as datafile:\n", - " print(type(datafile))\n", - " print(datafile)\n", - " datafile.seek(0)\n", - " img = Image.open(datafile)\n", - " print('@@@', img)\n", - "\n", - "\n", - "teal_images = sorted(list(filename.glob('*')))\n", - "\n", - "print(teal_images)\n", - "\n", - "results_dir = Path('results')\n", - "\n", - "if not results_dir.exists():\n", - " results_dir.mkdir()\n", - "\n", - "if local:\n", - " img0 = mpimg.imread(teal_images[0])\n", - " img1 = mpimg.imread(teal_images[1])\n", - " fig, ax = plt.subplots(1,2)\n", - " ax[0].imshow(img0)\n", - " ax[1].imshow(img1)\n", - " [a.axis('off') for a in ax]\n", - " plt.savefig(results_dir / \"sample.png\")\n", - "\n", - "# Set random seed for reproducibility\n", - "manualSeed = 999\n", - "#manualSeed = random.randint(1, 10000) # use if you want new results\n", - "print(\"Random Seed: \", manualSeed)\n", - "random.seed(manualSeed)\n", - "torch.manual_seed(manualSeed)\n", - "\n", - "# Training parameters\n", - "workers = 2\n", - "batch_size = 128\n", - "image_size = 64\n", - "nc = 3\n", - "nz = 100\n", - "ngf = 64\n", - "ndf = 64\n", - "num_epochs = 5\n", - "lr = 0.0002\n", - "beta1 = 0.5\n", - "ngpu = 1\n", - "\n", - "# We can use an image folder dataset the way we have it setup.\n", - "# Create the dataset\n", - "dataset = dset.ImageFolder(root=filename.parent,\n", - " transform=transforms.Compose([\n", - " transforms.Resize(image_size),\n", - " transforms.CenterCrop(image_size),\n", - " transforms.ToTensor(),\n", - " transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),\n", - " ]))\n", - "# Create the dataloader\n", - "dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,\n", - " shuffle=True, num_workers=workers)\n", - "\n", - "# Decide which device we want to run on\n", - "device = torch.device(\"cuda:0\" if (torch.cuda.is_available() and ngpu > 0) else \"cpu\")\n", - "\n", - "# custom weights initialization called on netG and netD\n", - "def weights_init(m):\n", - " classname = m.__class__.__name__\n", - " if classname.find('Conv') != -1:\n", - " nn.init.normal_(m.weight.data, 0.0, 0.02)\n", - " elif classname.find('BatchNorm') != -1:\n", - " nn.init.normal_(m.weight.data, 1.0, 0.02)\n", - " nn.init.constant_(m.bias.data, 0)\n", - "\n", - "# Generator Code\n", - "class Generator(nn.Module):\n", - " def __init__(self, ngpu):\n", - " super(Generator, self).__init__()\n", - " self.ngpu = ngpu\n", - " self.main = nn.Sequential(\n", - " # input is Z, going into a convolution\n", - " nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),\n", - " nn.BatchNorm2d(ngf * 8),\n", - " nn.ReLU(True),\n", - " # state size. (ngf*8) x 4 x 4\n", - " nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),\n", - " nn.BatchNorm2d(ngf * 4),\n", - " nn.ReLU(True),\n", - " # state size. (ngf*4) x 8 x 8\n", - " nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),\n", - " nn.BatchNorm2d(ngf * 2),\n", - " nn.ReLU(True),\n", - " # state size. (ngf*2) x 16 x 16\n", - " nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),\n", - " nn.BatchNorm2d(ngf),\n", - " nn.ReLU(True),\n", - " # state size. (ngf) x 32 x 32\n", - " nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),\n", - " nn.Tanh()\n", - " # state size. (nc) x 64 x 64\n", - " )\n", - "\n", - " def forward(self, input):\n", - " return self.main(input)\n", - "\n", - "# Create the generator\n", - "netG = Generator(ngpu).to(device)\n", - "\n", - "# Handle multi-gpu if desired\n", - "if (device.type == 'cuda') and (ngpu > 1):\n", - " netG = nn.DataParallel(netG, list(range(ngpu)))\n", - "\n", - "# Apply the weights_init function to randomly initialize all weights\n", - "# to mean=0, stdev=0.02.\n", - "netG.apply(weights_init)\n", - "\n", - "# Print the model\n", - "print(netG)\n", - "\n", - "class Discriminator(nn.Module):\n", - " def __init__(self, ngpu):\n", - " super(Discriminator, self).__init__()\n", - " self.ngpu = ngpu\n", - " self.main = nn.Sequential(\n", - " # input is (nc) x 64 x 64\n", - " nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),\n", - " nn.LeakyReLU(0.2, inplace=True),\n", - " # state size. (ndf) x 32 x 32\n", - " nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),\n", - " nn.BatchNorm2d(ndf * 2),\n", - " nn.LeakyReLU(0.2, inplace=True),\n", - " # state size. (ndf*2) x 16 x 16\n", - " nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),\n", - " nn.BatchNorm2d(ndf * 4),\n", - " nn.LeakyReLU(0.2, inplace=True),\n", - " # state size. (ndf*4) x 8 x 8\n", - " nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),\n", - " nn.BatchNorm2d(ndf * 8),\n", - " nn.LeakyReLU(0.2, inplace=True),\n", - " # state size. (ndf*8) x 4 x 4\n", - " nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),\n", - " nn.Sigmoid()\n", - " )\n", - "\n", - " def forward(self, input):\n", - " return self.main(input)\n", - "\n", - " # Create the Discriminator\n", - "netD = Discriminator(ngpu).to(device)\n", - "\n", - " # Handle multi-gpu if desired\n", - "if (device.type == 'cuda') and (ngpu > 1):\n", - " netD = nn.DataParallel(netD, list(range(ngpu)))\n", - "\n", - " # Apply the weights_init function to randomly initialize all weights\n", - " # to mean=0, stdev=0.2.\n", - "netD.apply(weights_init)\n", - "\n", - " # Print the model\n", - "print(netD)\n", - "\n", - " # Initialize BCELoss function\n", - "criterion = nn.BCELoss()\n", - "\n", - " # Create batch of latent vectors that we will use to visualize\n", - " # the progression of the generator\n", - "fixed_noise = torch.randn(64, nz, 1, 1, device=device)\n", - "\n", - " # Establish convention for real and fake labels during training\n", - "real_label = 1.\n", - "fake_label = 0.\n", - "\n", - " # Setup Adam optimizers for both G and D\n", - "optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))\n", - "optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))\n", - "\n", - " # Training Loop\n", - "\n", - " # Lists to keep track of progress\n", - "img_list = []\n", - "G_losses = []\n", - "D_losses = []\n", - "iters = 0\n", - "\n", - "print(\"Starting Training Loop...\")\n", - " # For each epoch\n", - "for epoch in range(num_epochs):\n", - " # For each batch in the dataloader\n", - " for i, data in enumerate(dataloader, 0):\n", - "\n", - " ############################\n", - " # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))\n", - " ###########################\n", - " ## Train with all-real batch\n", - " netD.zero_grad()\n", - " # Format batch\n", - " real_cpu = data[0].to(device)\n", - " b_size = real_cpu.size(0)\n", - " label = torch.full((b_size,), real_label, dtype=torch.float, device=device)\n", - " # Forward pass real batch through D\n", - " output = netD(real_cpu).view(-1)\n", - " # Calculate loss on all-real batch\n", - " errD_real = criterion(output, label)\n", - " # Calculate gradients for D in backward pass\n", - " errD_real.backward()\n", - " D_x = output.mean().item()\n", - "\n", - " ## Train with all-fake batch\n", - " # Generate batch of latent vectors\n", - " noise = torch.randn(b_size, nz, 1, 1, device=device)\n", - " # Generate fake image batch with G\n", - " fake = netG(noise)\n", - " label.fill_(fake_label)\n", - " # Classify all fake batch with D\n", - " output = netD(fake.detach()).view(-1)\n", - " # Calculate D's loss on the all-fake batch\n", - " errD_fake = criterion(output, label)\n", - " # Calculate the gradients for this batch, accumulated (summed) with previous gradients\n", - " errD_fake.backward()\n", - " D_G_z1 = output.mean().item()\n", - " # Compute error of D as sum over the fake and the real batches\n", - " errD = errD_real + errD_fake\n", - " # Update D\n", - " optimizerD.step()\n", - "\n", - " ############################\n", - " # (2) Update G network: maximize log(D(G(z)))\n", - " ###########################\n", - " netG.zero_grad()\n", - " label.fill_(real_label) # fake labels are real for generator cost\n", - " # Since we just updated D, perform another forward pass of all-fake batch through D\n", - " output = netD(fake).view(-1)\n", - " # Calculate G's loss based on this output\n", - " errG = criterion(output, label)\n", - " # Calculate gradients for G\n", - " errG.backward()\n", - " D_G_z2 = output.mean().item()\n", - " # Update G\n", - " optimizerG.step()\n", - "\n", - " # Output training stats\n", - " if i % 50 == 0:\n", - " print('[%d/%d][%d/%d]\\tLoss_D: %.4f\\tLoss_G: %.4f\\tD(x): %.4f\\tD(G(z)): %.4f / %.4f'\n", - " % (epoch, num_epochs, i, len(dataloader),\n", - " errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))\n", - "\n", - " # Save Losses for plotting later\n", - " G_losses.append(errG.item())\n", - " D_losses.append(errD.item())\n", - "\n", - " # Check how the generator is doing by saving G's output on fixed_noise\n", - " if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):\n", - " with torch.no_grad():\n", - " fake = netG(fixed_noise).detach().cpu()\n", - " img_list.append(vutils.make_grid(fake, padding=2, normalize=True))\n", - "\n", - " iters += 1\n", - "\n", - "\n", - "filename = results_dir / 'punks.pickle' if local else \"/data/outputs/result\"\n", - "with open(filename, 'wb') as pickle_file:\n", - " print(f\"Pickling results in {filename}\")\n", - " pickle.dump(img_list[-1], pickle_file)\n", - "\n", - "t1 = time.time()\n", - "total = t1-t0\n", - "\n", - "print('Time: ', total)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5c5b6c6d-e9c0-4050-9315-e7dc0044b207", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "54ba5c7b-e934-421c-8ad6-55c8d7405bcc", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc81f87d-ea83-4723-abaa-d6f03e9a88e0", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "256823c5-9442-42ec-8a1b-eccfbfb21afb", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0a7cc023-aed3-4dd2-a7d2-454f05e94cd7", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ad964626-62f6-45e9-bcdd-cb037ee75d13", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cbcc7e3e-886e-4e62-84ff-b8b549c9cf95", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "celltoolbar": "Tags", - "kernelspec": { - "display_name": "dev", - "language": "python", - "name": "dev" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/sameproject/ops/python_ocean/dcgan.py b/sameproject/ops/python_ocean/dcgan.py deleted file mode 100644 index 701699a5..00000000 --- a/sameproject/ops/python_ocean/dcgan.py +++ /dev/null @@ -1,418 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # SAME DCGAN - -# In[1]: - - -# The code for the DCGAN generative model is taken from the official PyTorch docs https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html. - -import argparse -import json -import logging -import matplotlib.pyplot as plt -import matplotlib.animation as animation -import matplotlib.image as mpimg -import numpy as np -import os -import random -from pathlib import Path -import pickle -import sys -import time - -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.optim as optim -import torch.utils.data -import torchvision.datasets as dset -import torchvision.transforms as transforms -import torchvision.utils as vutils - - - -# ## Fetching data - -# In[ ]: - - -def get_input(local=False): - if local: - - # - print("Reading local punks directory.") - - # Root directory for dataset - filename = Path('data/punks/tealpunks') - # filename = Path('data/punks-sample') - # filename = Path('data/celeba') - - - - return filename - print(cell['metadata'].get('tags', [])) - dids = os.getenv('DIDS', None) - - if not dids: - print("No DIDs found in environment. Aborting.") - return - - dids = json.loads(dids) - - cwd = os.getcwd() - print('cwd', cwd) - - - for did in dids: - print('ls', f'/data/inputs/{did}/0') - print('ls2', os.listdir(f'/data/inputs/')) - filename = Path(f'/data/inputs/{did}/0') # 0 for metadata service - print(f"Reading asset file {filename}.") - return filename - - - -# # Train & test model - -# In[ ]: - - -def run_model(local=False): - - t0 = time.time() - - # - filename = get_input(local) - if not filename: - print("Could not retrieve filename.") - return - - from PIL import Image - with open(filename) as datafile: - print(type(datafile)) - print(datafile) - datafile.seek(0) - img = Image.open(datafile) - print('@@@', img) - - - teal_images = sorted(list(filename.glob('*'))) - - print(teal_images) - - results_dir = Path('results') - - if not results_dir.exists(): - results_dir.mkdir() - - if local: - img0 = mpimg.imread(teal_images[0]) - img1 = mpimg.imread(teal_images[1]) - fig, ax = plt.subplots(1,2) - ax[0].imshow(img0) - ax[1].imshow(img1) - [a.axis('off') for a in ax] - plt.savefig(results_dir / "sample.png") - - # Set random seed for reproducibility - manualSeed = 999 - #manualSeed = random.randint(1, 10000) # use if you want new results - print("Random Seed: ", manualSeed) - random.seed(manualSeed) - torch.manual_seed(manualSeed) - - # Training parameters - workers = 2 - batch_size = 128 - image_size = 64 - nc = 3 - nz = 100 - ngf = 64 - ndf = 64 - num_epochs = 5 - lr = 0.0002 - beta1 = 0.5 - ngpu = 1 - - # We can use an image folder dataset the way we have it setup. - # Create the dataset - dataset = dset.ImageFolder(root=filename.parent, - transform=transforms.Compose([ - transforms.Resize(image_size), - transforms.CenterCrop(image_size), - transforms.ToTensor(), - transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), - ])) - # Create the dataloader - dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, - shuffle=True, num_workers=workers) - - # Decide which device we want to run on - device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu") - - # custom weights initialization called on netG and netD - def weights_init(m): - classname = m.__class__.__name__ - if classname.find('Conv') != -1: - nn.init.normal_(m.weight.data, 0.0, 0.02) - elif classname.find('BatchNorm') != -1: - nn.init.normal_(m.weight.data, 1.0, 0.02) - nn.init.constant_(m.bias.data, 0) - - # Generator Code - class Generator(nn.Module): - def __init__(self, ngpu): - super(Generator, self).__init__() - self.ngpu = ngpu - self.main = nn.Sequential( - # input is Z, going into a convolution - nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False), - nn.BatchNorm2d(ngf * 8), - nn.ReLU(True), - # state size. (ngf*8) x 4 x 4 - nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), - nn.BatchNorm2d(ngf * 4), - nn.ReLU(True), - # state size. (ngf*4) x 8 x 8 - nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False), - nn.BatchNorm2d(ngf * 2), - nn.ReLU(True), - # state size. (ngf*2) x 16 x 16 - nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False), - nn.BatchNorm2d(ngf), - nn.ReLU(True), - # state size. (ngf) x 32 x 32 - nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False), - nn.Tanh() - # state size. (nc) x 64 x 64 - ) - - def forward(self, input): - return self.main(input) - - # Create the generator - netG = Generator(ngpu).to(device) - - # Handle multi-gpu if desired - if (device.type == 'cuda') and (ngpu > 1): - netG = nn.DataParallel(netG, list(range(ngpu))) - - # Apply the weights_init function to randomly initialize all weights - # to mean=0, stdev=0.02. - netG.apply(weights_init) - - # Print the model - print(netG) - - class Discriminator(nn.Module): - def __init__(self, ngpu): - super(Discriminator, self).__init__() - self.ngpu = ngpu - self.main = nn.Sequential( - # input is (nc) x 64 x 64 - nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), - nn.LeakyReLU(0.2, inplace=True), - # state size. (ndf) x 32 x 32 - nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), - nn.BatchNorm2d(ndf * 2), - nn.LeakyReLU(0.2, inplace=True), - # state size. (ndf*2) x 16 x 16 - nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), - nn.BatchNorm2d(ndf * 4), - nn.LeakyReLU(0.2, inplace=True), - # state size. (ndf*4) x 8 x 8 - nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), - nn.BatchNorm2d(ndf * 8), - nn.LeakyReLU(0.2, inplace=True), - # state size. (ndf*8) x 4 x 4 - nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False), - nn.Sigmoid() - ) - - def forward(self, input): - return self.main(input) - - # Create the Discriminator - netD = Discriminator(ngpu).to(device) - - # Handle multi-gpu if desired - if (device.type == 'cuda') and (ngpu > 1): - netD = nn.DataParallel(netD, list(range(ngpu))) - - # Apply the weights_init function to randomly initialize all weights - # to mean=0, stdev=0.2. - netD.apply(weights_init) - - # Print the model - print(netD) - - # Initialize BCELoss function - criterion = nn.BCELoss() - - # Create batch of latent vectors that we will use to visualize - # the progression of the generator - fixed_noise = torch.randn(64, nz, 1, 1, device=device) - - # Establish convention for real and fake labels during training - real_label = 1. - fake_label = 0. - - # Setup Adam optimizers for both G and D - optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999)) - optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999)) - - # Training Loop - - # Lists to keep track of progress - img_list = [] - G_losses = [] - D_losses = [] - iters = 0 - - print("Starting Training Loop...") - # For each epoch - for epoch in range(num_epochs): - # For each batch in the dataloader - for i, data in enumerate(dataloader, 0): - - ############################ - # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) - ########################### - ## Train with all-real batch - netD.zero_grad() - # Format batch - real_cpu = data[0].to(device) - b_size = real_cpu.size(0) - label = torch.full((b_size,), real_label, dtype=torch.float, device=device) - # Forward pass real batch through D - output = netD(real_cpu).view(-1) - # Calculate loss on all-real batch - errD_real = criterion(output, label) - # Calculate gradients for D in backward pass - errD_real.backward() - D_x = output.mean().item() - - ## Train with all-fake batch - # Generate batch of latent vectors - noise = torch.randn(b_size, nz, 1, 1, device=device) - # Generate fake image batch with G - fake = netG(noise) - label.fill_(fake_label) - # Classify all fake batch with D - output = netD(fake.detach()).view(-1) - # Calculate D's loss on the all-fake batch - errD_fake = criterion(output, label) - # Calculate the gradients for this batch, accumulated (summed) with previous gradients - errD_fake.backward() - D_G_z1 = output.mean().item() - # Compute error of D as sum over the fake and the real batches - errD = errD_real + errD_fake - # Update D - optimizerD.step() - - ############################ - # (2) Update G network: maximize log(D(G(z))) - ########################### - netG.zero_grad() - label.fill_(real_label) # fake labels are real for generator cost - # Since we just updated D, perform another forward pass of all-fake batch through D - output = netD(fake).view(-1) - # Calculate G's loss based on this output - errG = criterion(output, label) - # Calculate gradients for G - errG.backward() - D_G_z2 = output.mean().item() - # Update G - optimizerG.step() - - # Output training stats - if i % 50 == 0: - print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f' - % (epoch, num_epochs, i, len(dataloader), - errD.item(), errG.item(), D_x, D_G_z1, D_G_z2)) - - # Save Losses for plotting later - G_losses.append(errG.item()) - D_losses.append(errD.item()) - - # Check how the generator is doing by saving G's output on fixed_noise - if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)): - with torch.no_grad(): - fake = netG(fixed_noise).detach().cpu() - img_list.append(vutils.make_grid(fake, padding=2, normalize=True)) - - iters += 1 - - - filename = results_dir / 'punks.pickle' if local else "/data/outputs/result" - with open(filename, 'wb') as pickle_file: - print(f"Pickling results in {filename}") - pickle.dump(img_list[-1], pickle_file) - - t1 = time.time() - total = t1-t0 - - print('Time: ', total) - - - - - -# In[ ]: - - - - - - - -# In[ ]: - - - - - - - -# In[ ]: - - - - - - - -# In[ ]: - - - - - - - -# In[ ]: - - - - - - - -# In[ ]: - - - - - - - -# In[ ]: - - - - - - diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/python_ocean/deploy.py similarity index 100% rename from sameproject/ops/ocean/deploy.py rename to sameproject/ops/python_ocean/deploy.py diff --git a/sameproject/ops/python_ocean/render.py b/sameproject/ops/python_ocean/render.py new file mode 100644 index 00000000..3e96eb95 --- /dev/null +++ b/sameproject/ops/python_ocean/render.py @@ -0,0 +1,28 @@ +from traitlets.config import Config +import nbformat as nbf +from nbconvert.exporters import PythonExporter +from nbconvert.preprocessors import TagRemovePreprocessor +from nbconvert.exporters.templateexporter import TemplateExporter +from pathlib import Path +from uuid import uuid4 +from typing import Tuple +import time + + +template = "root.jinja" +config = { + 'Exporter': {'template_file': template, + 'template_path': ['./']}, + 'ExtractOutputPreprocessor': {'enabled': True}, + } + +exporter = PythonExporter(config) + +def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str]: + body, resources = exporter.from_notebook_node( + compile_path, resources={'output_files_dir': compile_path}) + same_config["compile_path"] = compile_path + root_pipeline_name = f"root_pipeline_{uuid4().hex.lower()}" + root_path = Path(compile_path) / f"{root_pipeline_name}.py" + with open(root_path, 'w') as f: + f.write(body) \ No newline at end of file diff --git a/sameproject/ops/python_ocean/requirements.txt b/sameproject/ops/python_ocean/requirements.txt new file mode 100644 index 00000000..9af1fff2 --- /dev/null +++ b/sameproject/ops/python_ocean/requirements.txt @@ -0,0 +1,6 @@ +# Dependencies for /Users/jakubsmekal/Development/Algovera/core_dev/c2d/same-project/sameproject/ops/python_ocean/dcgan.ipynb: +matplotlib==3.5.2 +numpy==1.22.4 +Pillow==9.1.1 +torch==1.11.0 +torchvision==0.12.0 diff --git a/sameproject/ops/python_ocean/root.jinja b/sameproject/ops/python_ocean/root.jinja new file mode 100644 index 00000000..e69de29b diff --git a/sameproject/ops/python_ocean/same.yaml b/sameproject/ops/python_ocean/same.yaml new file mode 100644 index 00000000..14535cdb --- /dev/null +++ b/sameproject/ops/python_ocean/same.yaml @@ -0,0 +1,14 @@ +apiVersion: sameproject.ml/v1alpha1 +environments: + default: + image_tag: combinatorml/jupyterlab-tensorflow-opencv:0.9 +metadata: + labels: [] + name: dcgan pipeline + version: 0.0.0 +notebook: + name: dcgan + path: dcgan.ipynb + requirements: requirements.txt +run: + name: dcgan run diff --git a/sameproject/vendor/conda b/sameproject/vendor/conda index d8ddda5e..0adcd595 160000 --- a/sameproject/vendor/conda +++ b/sameproject/vendor/conda @@ -1 +1 @@ -Subproject commit d8ddda5e1df107a7437884353e868678b6e7042b +Subproject commit 0adcd595c97d0c4e3b2645aebd50ded8d771d5eb diff --git a/vendor/conda b/vendor/conda index 0b1312ce..0adcd595 160000 --- a/vendor/conda +++ b/vendor/conda @@ -1 +1 @@ -Subproject commit 0b1312ce65bf0fbb8cbea3750f07c32a2492c57a +Subproject commit 0adcd595c97d0c4e3b2645aebd50ded8d771d5eb From 8c3dc7557ecaffa63d2f86441f2447e7dc3614d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 8 Jun 2022 10:34:33 +0200 Subject: [PATCH 21/99] WIP: cleaning up python_ocean --- sameproject/cli/run.py | 2 +- sameproject/ops/python_ocean/conf.json | 6 ------ sameproject/ops/python_ocean/requirements.txt | 6 ------ sameproject/ops/python_ocean/same.yaml | 14 -------------- 4 files changed, 1 insertion(+), 27 deletions(-) delete mode 100644 sameproject/ops/python_ocean/conf.json delete mode 100644 sameproject/ops/python_ocean/requirements.txt delete mode 100644 sameproject/ops/python_ocean/same.yaml diff --git a/sameproject/cli/run.py b/sameproject/cli/run.py index e1f9e832..ce2b2deb 100644 --- a/sameproject/cli/run.py +++ b/sameproject/cli/run.py @@ -26,7 +26,7 @@ "-t", "--target", default="kubeflow", - type=click.Choice(["aml", "kubeflow", "functions", "ocean"]), + type=click.Choice(["aml", "kubeflow", "functions", "python_ocean"]), ) @click.option( "--persist-temp-files", diff --git a/sameproject/ops/python_ocean/conf.json b/sameproject/ops/python_ocean/conf.json deleted file mode 100644 index 68a24825..00000000 --- a/sameproject/ops/python_ocean/conf.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "base_template": "index.py.j2", - "mimetypes": { - "text/x-python": true - } -} diff --git a/sameproject/ops/python_ocean/requirements.txt b/sameproject/ops/python_ocean/requirements.txt deleted file mode 100644 index 9af1fff2..00000000 --- a/sameproject/ops/python_ocean/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -# Dependencies for /Users/jakubsmekal/Development/Algovera/core_dev/c2d/same-project/sameproject/ops/python_ocean/dcgan.ipynb: -matplotlib==3.5.2 -numpy==1.22.4 -Pillow==9.1.1 -torch==1.11.0 -torchvision==0.12.0 diff --git a/sameproject/ops/python_ocean/same.yaml b/sameproject/ops/python_ocean/same.yaml deleted file mode 100644 index 14535cdb..00000000 --- a/sameproject/ops/python_ocean/same.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: sameproject.ml/v1alpha1 -environments: - default: - image_tag: combinatorml/jupyterlab-tensorflow-opencv:0.9 -metadata: - labels: [] - name: dcgan pipeline - version: 0.0.0 -notebook: - name: dcgan - path: dcgan.ipynb - requirements: requirements.txt -run: - name: dcgan run From c32d1745a9af92a88ef72b0c4d463cb33633c103 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 8 Jun 2022 10:35:38 +0200 Subject: [PATCH 22/99] WIP: starting from clean slate --- ...peline_3ea7c8df9e7d45fc8aebaa1857a7d08f.py | 80 --- ...ep_000_afdeddf09e474ffdbe0543fd0d775bbd.py | 526 ------------------ sameproject/cli/run.py | 2 +- sameproject/ops/backends.py | 3 - sameproject/ops/python_ocean/README.md | 46 -- sameproject/ops/python_ocean/__init__.py | 2 - sameproject/ops/python_ocean/deploy.py | 7 - sameproject/ops/python_ocean/index.py.j2 | 62 --- sameproject/ops/python_ocean/render.py | 28 - sameproject/ops/python_ocean/root.jinja | 0 10 files changed, 1 insertion(+), 755 deletions(-) delete mode 100644 examples/root_pipeline_3ea7c8df9e7d45fc8aebaa1857a7d08f.py delete mode 100644 examples/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd.py delete mode 100644 sameproject/ops/python_ocean/README.md delete mode 100644 sameproject/ops/python_ocean/__init__.py delete mode 100644 sameproject/ops/python_ocean/deploy.py delete mode 100644 sameproject/ops/python_ocean/index.py.j2 delete mode 100644 sameproject/ops/python_ocean/render.py delete mode 100644 sameproject/ops/python_ocean/root.jinja diff --git a/examples/root_pipeline_3ea7c8df9e7d45fc8aebaa1857a7d08f.py b/examples/root_pipeline_3ea7c8df9e7d45fc8aebaa1857a7d08f.py deleted file mode 100644 index ca4b13eb..00000000 --- a/examples/root_pipeline_3ea7c8df9e7d45fc8aebaa1857a7d08f.py +++ /dev/null @@ -1,80 +0,0 @@ - -from typing import NamedTuple -from base64 import b64encode -import json -import logging -import matplotlib.pyplot as plt -import matplotlib.animation as animation -import matplotlib.image as mpimg -import numpy as np -import random -from pathlib import Path -import pickle -import sys -import time -import kfp - -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.optim as optim -import torch.utils.data -import torchvision.datasets as dset -import torchvision.transforms as transforms -import torchvision.utils as vutils -from run_info import run_info_fn - - -from same_step_000_afdeddf09e474ffdbe0543fd0d775bbd import same_step_000_afdeddf09e474ffdbe0543fd0d775bbd_fn - - - -run_info_comp = kfp.components.create_component_from_func( - func=run_info_fn, - packages_to_install=[ - "dill==0.3.5.1", - "kfp==1.8.12", - ], -) - - -same_step_000_afdeddf09e474ffdbe0543fd0d775bbd_comp = create_component_from_func( - func=same_step_000_afdeddf09e474ffdbe0543fd0d775bbd_fn, - base_image="combinatorml/jupyterlab-tensorflow-opencv:0.9", - packages_to_install=[ - "dill==0.3.5.1", - "pympler==1.0.1", - "requests==2.27.1", - 'matplotlib', 'numpy', 'Pillow', 'torch', 'torchvision' # TODO: make this a loop - ], -) - - -# TODO: support kubeflow-specific config like aws secrets, mlflow endpoints. -@dsl.pipeline(name="Compilation of pipelines",) -def root( - context='', metadata_url='', -): - # Generate secrets (if not already created) - secrets_by_env = {} - - - run_info = run_info_comp(run_id=kfp.dsl.RUN_ID_PLACEHOLDER) - - - - same_step_000_afdeddf09e474ffdbe0543fd0d775bbd = same_step_000_afdeddf09e474ffdbe0543fd0d775bbd_comp( - - input_context="", - - run_info=run_info.outputs["run_info"], - metadata_url=metadata_url - ) - - - same_step_000_afdeddf09e474ffdbe0543fd0d775bbd.execution_options.caching_strategy.max_cache_staleness = "P0D" - for k in env_vars: - same_step_000_afdeddf09e474ffdbe0543fd0d775bbd.add_env_variable(V1EnvVar(name=k, value=env_vars[k])) - - diff --git a/examples/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd.py b/examples/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd.py deleted file mode 100644 index dfccc2d6..00000000 --- a/examples/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd/same_step_000_afdeddf09e474ffdbe0543fd0d775bbd.py +++ /dev/null @@ -1,526 +0,0 @@ - - -import argparse as __argparse -from multiprocessing import context -import pathlib -from typing import NamedTuple -from pprint import pprint as __pp -import os -from pathlib import Path as __Path -import dill -import json -import logging -import matplotlib.pyplot as plt -import matplotlib.animation as animation -import matplotlib.image as mpimg -import numpy as np -import random -from pathlib import Path -import pickle -import sys -import time - -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.optim as optim -import torch.utils.data -import torchvision.datasets as dset -import torchvision.transforms as transforms -import torchvision.utils as vutils -from base64 import ( - urlsafe_b64encode as __urlsafe_b64encode, - urlsafe_b64decode as __urlsafe_b64decode, -) - -def main(__context="gAR9lC4=", __run_info="gAR9lC4=", __metadata_url="") -> NamedTuple('FuncOutput',[('context', str),]): - import dill - import base64 - from base64 import urlsafe_b64encode, urlsafe_b64decode - from copy import copy as __copy - from types import ModuleType as __ModuleType - from pprint import pprint as __pp - import datetime as __datetime - import requests - - __run_info_dict = dill.loads(urlsafe_b64decode(__run_info)) - __base64_decode = urlsafe_b64decode(__context) - __context_import_dict = dill.loads(__base64_decode) - - __variables_to_mount = {} - __loc = {} - - for __k in __context_import_dict: - __variables_to_mount[__k] = dill.loads(__context_import_dict[__k]) - - __json_data = { - "experiment_id": __run_info_dict["experiment_id"], - "run_id": __run_info_dict["run_id"], - "step_id": "same_step_000_afdeddf09e474ffdbe0543fd0d775bbd", - "metadata_type": "input", - "metadata_value": __context, - "metadata_time": __datetime.datetime.now().isoformat(), - } - - print(f"Metadata url: {__metadata_url}") - if __metadata_url != '': - print("Found metadata URL - executing.") - __pp(__json_data) - try: - __r = requests.post(__metadata_url, json=__json_data,) - __r.raise_for_status() - except requests.exceptions.HTTPError as __err: - print(f"Error: {__err}") - - __inner_code_to_execute = """ -import dill -import base64 -from base64 import urlsafe_b64encode, urlsafe_b64decode -from types import ModuleType as __ModuleType - -# The code for the DCGAN generative model is taken from the official PyTorch docs https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html. - -import argparse -import json -import logging -import matplotlib.pyplot as plt -import matplotlib.animation as animation -import matplotlib.image as mpimg -import numpy as np -import os -import random -from pathlib import Path -import pickle -import sys -import time - -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.optim as optim -import torch.utils.data -import torchvision.datasets as dset -import torchvision.transforms as transforms -import torchvision.utils as vutils -def get_input(local=False): - if local: - print(\"Reading local punks directory.\") - - # Root directory for dataset - filename = Path('data/punks/tealpunks') - # filename = Path('data/punks-sample') - # filename = Path('data/celeba') - - return filename - - dids = os.getenv('DIDS', None) - - if not dids: - print(\"No DIDs found in environment. Aborting.\") - return - - dids = json.loads(dids) - - cwd = os.getcwd() - print('cwd', cwd) - - - for did in dids: - print('ls', f'/data/inputs/{did}/0') - print('ls2', os.listdir(f'/data/inputs/')) - filename = Path(f'/data/inputs/{did}/0') # 0 for metadata service - print(f\"Reading asset file {filename}.\") - # print('type', type(os.listdir(f'/data/inputs/{did}/0/')[0])) - - - return filename -def run_dcgan(local=False): - - t0 = time.time() - - filename = get_input(local) - if not filename: - print(\"Could not retrieve filename.\") - return - - from PIL import Image - with open(filename) as datafile: - print(type(datafile)) - print(datafile) - datafile.seek(0) - img = Image.open(datafile) - print('@@@', img) - - - teal_images = sorted(list(filename.glob('*'))) - - print(teal_images) - - results_dir = Path('results') - - if not results_dir.exists(): - results_dir.mkdir() - - if local: - img0 = mpimg.imread(teal_images[0]) - img1 = mpimg.imread(teal_images[1]) - fig, ax = plt.subplots(1,2) - ax[0].imshow(img0) - ax[1].imshow(img1) - [a.axis('off') for a in ax] - plt.savefig(results_dir / \"sample.png\") - - # Set random seed for reproducibility - manualSeed = 999 - #manualSeed = random.randint(1, 10000) # use if you want new results - print(\"Random Seed: \", manualSeed) - random.seed(manualSeed) - torch.manual_seed(manualSeed) - - # Training parameters - workers = 2 - batch_size = 128 - image_size = 64 - nc = 3 - nz = 100 - ngf = 64 - ndf = 64 - num_epochs = 5 - lr = 0.0002 - beta1 = 0.5 - ngpu = 1 - - # We can use an image folder dataset the way we have it setup. - # Create the dataset - dataset = dset.ImageFolder(root=filename.parent, - transform=transforms.Compose([ - transforms.Resize(image_size), - transforms.CenterCrop(image_size), - transforms.ToTensor(), - transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), - ])) - # Create the dataloader - dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, - shuffle=True, num_workers=workers) - - # Decide which device we want to run on - device = torch.device(\"cuda:0\" if (torch.cuda.is_available() and ngpu > 0) else \"cpu\") - - # custom weights initialization called on netG and netD - def weights_init(m): - classname = m.__class__.__name__ - if classname.find('Conv') != -1: - nn.init.normal_(m.weight.data, 0.0, 0.02) - elif classname.find('BatchNorm') != -1: - nn.init.normal_(m.weight.data, 1.0, 0.02) - nn.init.constant_(m.bias.data, 0) - - # Generator Code - class Generator(nn.Module): - def __init__(self, ngpu): - super(Generator, self).__init__() - self.ngpu = ngpu - self.main = nn.Sequential( - # input is Z, going into a convolution - nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False), - nn.BatchNorm2d(ngf * 8), - nn.ReLU(True), - # state size. (ngf*8) x 4 x 4 - nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), - nn.BatchNorm2d(ngf * 4), - nn.ReLU(True), - # state size. (ngf*4) x 8 x 8 - nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False), - nn.BatchNorm2d(ngf * 2), - nn.ReLU(True), - # state size. (ngf*2) x 16 x 16 - nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False), - nn.BatchNorm2d(ngf), - nn.ReLU(True), - # state size. (ngf) x 32 x 32 - nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False), - nn.Tanh() - # state size. (nc) x 64 x 64 - ) - - def forward(self, input): - return self.main(input) - - # Create the generator - netG = Generator(ngpu).to(device) - - # Handle multi-gpu if desired - if (device.type == 'cuda') and (ngpu > 1): - netG = nn.DataParallel(netG, list(range(ngpu))) - - # Apply the weights_init function to randomly initialize all weights - # to mean=0, stdev=0.02. - netG.apply(weights_init) - - # Print the model - print(netG) - - class Discriminator(nn.Module): - def __init__(self, ngpu): - super(Discriminator, self).__init__() - self.ngpu = ngpu - self.main = nn.Sequential( - # input is (nc) x 64 x 64 - nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), - nn.LeakyReLU(0.2, inplace=True), - # state size. (ndf) x 32 x 32 - nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), - nn.BatchNorm2d(ndf * 2), - nn.LeakyReLU(0.2, inplace=True), - # state size. (ndf*2) x 16 x 16 - nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), - nn.BatchNorm2d(ndf * 4), - nn.LeakyReLU(0.2, inplace=True), - # state size. (ndf*4) x 8 x 8 - nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), - nn.BatchNorm2d(ndf * 8), - nn.LeakyReLU(0.2, inplace=True), - # state size. (ndf*8) x 4 x 4 - nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False), - nn.Sigmoid() - ) - - def forward(self, input): - return self.main(input) - - # Create the Discriminator - netD = Discriminator(ngpu).to(device) - - # Handle multi-gpu if desired - if (device.type == 'cuda') and (ngpu > 1): - netD = nn.DataParallel(netD, list(range(ngpu))) - - # Apply the weights_init function to randomly initialize all weights - # to mean=0, stdev=0.2. - netD.apply(weights_init) - - # Print the model - print(netD) - - # Initialize BCELoss function - criterion = nn.BCELoss() - - # Create batch of latent vectors that we will use to visualize - # the progression of the generator - fixed_noise = torch.randn(64, nz, 1, 1, device=device) - - # Establish convention for real and fake labels during training - real_label = 1. - fake_label = 0. - - # Setup Adam optimizers for both G and D - optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999)) - optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999)) - - # Training Loop - - # Lists to keep track of progress - img_list = [] - G_losses = [] - D_losses = [] - iters = 0 - - print(\"Starting Training Loop...\") - # For each epoch - for epoch in range(num_epochs): - # For each batch in the dataloader - for i, data in enumerate(dataloader, 0): - - ############################ - # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) - ########################### - ## Train with all-real batch - netD.zero_grad() - # Format batch - real_cpu = data[0].to(device) - b_size = real_cpu.size(0) - label = torch.full((b_size,), real_label, dtype=torch.float, device=device) - # Forward pass real batch through D - output = netD(real_cpu).view(-1) - # Calculate loss on all-real batch - errD_real = criterion(output, label) - # Calculate gradients for D in backward pass - errD_real.backward() - D_x = output.mean().item() - - ## Train with all-fake batch - # Generate batch of latent vectors - noise = torch.randn(b_size, nz, 1, 1, device=device) - # Generate fake image batch with G - fake = netG(noise) - label.fill_(fake_label) - # Classify all fake batch with D - output = netD(fake.detach()).view(-1) - # Calculate D's loss on the all-fake batch - errD_fake = criterion(output, label) - # Calculate the gradients for this batch, accumulated (summed) with previous gradients - errD_fake.backward() - D_G_z1 = output.mean().item() - # Compute error of D as sum over the fake and the real batches - errD = errD_real + errD_fake - # Update D - optimizerD.step() - - ############################ - # (2) Update G network: maximize log(D(G(z))) - ########################### - netG.zero_grad() - label.fill_(real_label) # fake labels are real for generator cost - # Since we just updated D, perform another forward pass of all-fake batch through D - output = netD(fake).view(-1) - # Calculate G's loss based on this output - errG = criterion(output, label) - # Calculate gradients for G - errG.backward() - D_G_z2 = output.mean().item() - # Update G - optimizerG.step() - - # Output training stats - if i % 50 == 0: - print('[%d/%d][%d/%d]\\tLoss_D: %.4f\\tLoss_G: %.4f\\tD(x): %.4f\\tD(G(z)): %.4f / %.4f' - % (epoch, num_epochs, i, len(dataloader), - errD.item(), errG.item(), D_x, D_G_z1, D_G_z2)) - - # Save Losses for plotting later - G_losses.append(errG.item()) - D_losses.append(errD.item()) - - # Check how the generator is doing by saving G's output on fixed_noise - if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)): - with torch.no_grad(): - fake = netG(fixed_noise).detach().cpu() - img_list.append(vutils.make_grid(fake, padding=2, normalize=True)) - - iters += 1 - - if local: - plt.figure(figsize=(10,5)) - plt.title(\"Generator and Discriminator Loss During Training\") - plt.plot(G_losses,label=\"G\") - plt.plot(D_losses,label=\"D\") - plt.xlabel(\"iterations\") - plt.ylabel(\"Loss\") - plt.legend() - plt.savefig(results_dir / \"loss.png\") - - fig = plt.figure(figsize=(20,20)) - plt.axis(\"off\") - ims = [[plt.imshow(np.transpose(i,(1,2,0)), animated=True)] for i in img_list] - # ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=1000, blit=True) - - fig = plt.figure(figsize=(20,20)) - plt.axis(\"off\") - plt.imshow(img_list[-1].permute(1,2,0)) - plt.savefig(results_dir / \"gen.png\") - - filename = results_dir / 'punks.pickle' if local else \"/data/outputs/result\" - with open(filename, 'wb') as pickle_file: - print(f\"Pickling results in {filename}\") - pickle.dump(img_list[-1], pickle_file) - - t1 = time.time() - total = t1-t0 - - print('Time: ', total) - -if __name__ == \"__main__\": - local = (len(sys.argv) == 2 and sys.argv[1] == \"local\") - run_dcgan(local) - - - - - - - - -__locals_keys = frozenset(locals().keys()) -__globals_keys = frozenset(globals().keys()) -__context_export = {} - -for val in __globals_keys: - if not val.startswith("_") and not isinstance(val, __ModuleType): - __context_export[val] = dill.dumps(globals()[val]) - -# Locals needs to come after globals in case we made changes -for val in __locals_keys: - if not val.startswith("_") and not isinstance(val, __ModuleType): - __context_export[val] = dill.dumps(locals()[val]) - -__b64_string = str(urlsafe_b64encode(dill.dumps(__context_export)), encoding="ascii") - -""" - exec(__inner_code_to_execute, __variables_to_mount, __loc) - - __json_output_data = { - "experiment_id": __run_info_dict["experiment_id"], - "run_id": __run_info_dict["run_id"], - "step_id": "%v", - "metadata_type": "output", - "metadata_value": __loc["__b64_string"], - "metadata_time": __datetime.datetime.now().isoformat(), - } - - print(f"Metadata url: {__metadata_url}") - if __metadata_url != '': - print("Found metadata URL - executing.") - __pp(__json_data) - try: - __r = requests.post(__metadata_url, json=__json_output_data,) - __r.raise_for_status() - except requests.exceptions.HTTPError as err: - print(f"Error: {err}") - - from collections import namedtuple - output = namedtuple("FuncOutput", ["context"]) - return output(__loc["__b64_string"]) - - -if __name__ == "__main__": - __run = Run.get_context() - __parser = __argparse.ArgumentParser("cleanse") - __parser.add_argument("--input_context", type=str, help="Context to run as string") - __parser.add_argument("--run_info", type=str, help="Run info") - __parser.add_argument("--output_context_path", type=str, help="Output context path") - __parser.add_argument("--metadata_url", type=str, help="Metadata URL") - - __args = __parser.parse_args() - - __input_context_string = "gAR9lC4=" - __context_filename = "context.txt" - if "__pipelinedata_context" in __args.input_context: - context_full_path = __Path(__args.input_context) / __context_filename - print(f"reading file: {context_full_path}") - __input_context_string = context_full_path.read_text() - elif __args.input_context and __args.input_context.strip(): - __input_context_string = __args.input_context.strip() - - # Need to unpack and do this here, because AML only gives - # us the run id inside the container. Unpacking and repacking so - # bulk of the code is unchanged. - __run_info_dict = dill.loads(__urlsafe_b64decode(__args.run_info)) - __run_info_dict["run_id"] = __run.get_details()["runId"] - - # Returns a tuple, where the zeroth index is the string - __output_context_tuple = main( - __context=__input_context_string, - __run_info=str( - __urlsafe_b64encode(dill.dumps(__run_info_dict)), encoding="ascii" - ), - __metadata_url=__args.metadata_url, - ) - - __p = __Path(__args.output_context_path) - __p.mkdir(parents=True, exist_ok=True) - __filepath = __p / __context_filename - with __filepath.open("w+") as __f: - __f.write(__output_context_tuple[0]) - diff --git a/sameproject/cli/run.py b/sameproject/cli/run.py index ce2b2deb..8e94c27c 100644 --- a/sameproject/cli/run.py +++ b/sameproject/cli/run.py @@ -26,7 +26,7 @@ "-t", "--target", default="kubeflow", - type=click.Choice(["aml", "kubeflow", "functions", "python_ocean"]), + type=click.Choice(["aml", "kubeflow", "functions"]), ) @click.option( "--persist-temp-files", diff --git a/sameproject/ops/backends.py b/sameproject/ops/backends.py index 3be61b12..93962919 100644 --- a/sameproject/ops/backends.py +++ b/sameproject/ops/backends.py @@ -5,7 +5,6 @@ import sameproject.ops.functions as functions import sameproject.ops.kubeflow as kubeflow import sameproject.ops.aml as aml -import sameproject.ops.python_ocean as python_ocean import sameproject.ops.helpers import tempfile import click @@ -16,7 +15,6 @@ def render(target: str, steps: list, config: SameConfig, compile_path: str = Non "aml": aml.render, "kubeflow": kubeflow.render, "functions": functions.render, - "ocean": python_ocean.render } render_function = target_renderers.get(target, None) @@ -35,7 +33,6 @@ def deploy(target: str, base_path: Path, root_file: str, config: SameConfig): "aml": aml.deploy, "kubeflow": kubeflow.deploy, "functions": functions.deploy, - "ocean": python_ocean.deploy } deploy_function = target_deployers.get(target, None) diff --git a/sameproject/ops/python_ocean/README.md b/sameproject/ops/python_ocean/README.md deleted file mode 100644 index b96d8479..00000000 --- a/sameproject/ops/python_ocean/README.md +++ /dev/null @@ -1,46 +0,0 @@ -# Jinja template for Ocean C2D - -> Note: work-in-progress - -A template for easily converting jupyter notebooks to Ocean Protocol C2D script. Use tags to specify different parts of your jupyter notebooks. - -## 🏗 Initial Setup - -### Set up environment -``` -#clone repo -git clone https://github.com/AlgoveraAI/same-project.git -cd same-project - -#create a virtual environment -python3 -m venv venv - -#activate env -source venv/bin/activate - -#Install the dependencies -pip install -e . -pip install jupyter -``` - -### Guide to using the template -Open up a jupyter notebook. This is where you will do all your data analysis and model development. Make sure to write a comment at the start of each cell (this is necessary otherwise the template will generate incorrectly indented code). - -When you are done developing locally and want to publish to ocean, you will need to tag specific cells of your notebook to be read by the template. To do this click *View/Cell Toolbar/Tags*. -Tag the cells where you did your data preparation with "input" and the cells with your model and training loop "train". - -When you're ready execute the following command in your terminal: -``` -jupyter nbconvert path_to_notebook --to python --template=./sameproject/ops/python_ocean -``` - -Check that the generated script has no syntax errors. - -See `dcgan.ipynb` and `dcgan.py` for more details and reach out if you run into issues. - -## Algovera - -Algovera is a community of individuals working to facilitate and accelerate the development of decentralised AI products and research. - -[Website](https://www.algovera.ai/) | [Notion](https://algovera.notion.site/) | [Discord](https://discord.gg/e65RuHSDS5) | [Calendar](https://calendar.google.com/calendar/embed?src=c_4qajdfj4imie9cpnkbvkrc7ri4%40group.calendar.google.com) | [Twitter](https://twitter.com/AlgoveraAI) | [YouTube](https://www.youtube.com/channel/UC2A5iUpP6k52ZZmC8LFj1IA) | - diff --git a/sameproject/ops/python_ocean/__init__.py b/sameproject/ops/python_ocean/__init__.py deleted file mode 100644 index 3cd4721c..00000000 --- a/sameproject/ops/python_ocean/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .render import render -from .deploy import deploy \ No newline at end of file diff --git a/sameproject/ops/python_ocean/deploy.py b/sameproject/ops/python_ocean/deploy.py deleted file mode 100644 index c7782710..00000000 --- a/sameproject/ops/python_ocean/deploy.py +++ /dev/null @@ -1,7 +0,0 @@ -from sameproject.data.config import SameConfig -from sameproject.ops import helpers -import importlib - - -def deploy(base_path: str, root_name: str, config: SameConfig): - return diff --git a/sameproject/ops/python_ocean/index.py.j2 b/sameproject/ops/python_ocean/index.py.j2 deleted file mode 100644 index da0329f8..00000000 --- a/sameproject/ops/python_ocean/index.py.j2 +++ /dev/null @@ -1,62 +0,0 @@ -{%- extends 'null.j2' -%} -import os - -{%- block header -%} -#!/usr/bin/env python -# coding: utf-8 -{% endblock header %} - -{% block in_prompt %} -{% if resources.global_content_filter.include_input_prompt -%} - # In[{{ cell.execution_count if cell.execution_count else ' ' }}]: -{% endif %} -{% endblock in_prompt %} - -{% block input %} -{% if 'input' in cell['metadata'].get('tags', []): -%} -def get_input(local=False): - if local: - {% filter indent(8) %} - {{ cell.source | ipython2python }} - {% endfilter %} - - return filename - print(cell['metadata'].get('tags', [])) - dids = os.getenv('DIDS', None) - - if not dids: - print("No DIDs found in environment. Aborting.") - return - - dids = json.loads(dids) - - cwd = os.getcwd() - print('cwd', cwd) - - - for did in dids: - print('ls', f'/data/inputs/{did}/0') - print('ls2', os.listdir(f'/data/inputs/')) - filename = Path(f'/data/inputs/{did}/0') # 0 for metadata service - print(f"Reading asset file {filename}.") - return filename - -{% elif 'train' in cell['metadata'].get('tags', []): -%} -def run_model(local=False): - - t0 = time.time() - {% filter indent(4) %} - {{ cell.source | ipython2python }} - {% endfilter %} - -{% else -%} -{{ cell.source | ipython2python }} - -{% endif %} -{% endblock input %} -if __name__ == "__main__": - run_model() - -{% block markdowncell scoped %} -{{ cell.source | comment_lines }} -{% endblock markdowncell %} diff --git a/sameproject/ops/python_ocean/render.py b/sameproject/ops/python_ocean/render.py deleted file mode 100644 index 3e96eb95..00000000 --- a/sameproject/ops/python_ocean/render.py +++ /dev/null @@ -1,28 +0,0 @@ -from traitlets.config import Config -import nbformat as nbf -from nbconvert.exporters import PythonExporter -from nbconvert.preprocessors import TagRemovePreprocessor -from nbconvert.exporters.templateexporter import TemplateExporter -from pathlib import Path -from uuid import uuid4 -from typing import Tuple -import time - - -template = "root.jinja" -config = { - 'Exporter': {'template_file': template, - 'template_path': ['./']}, - 'ExtractOutputPreprocessor': {'enabled': True}, - } - -exporter = PythonExporter(config) - -def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str]: - body, resources = exporter.from_notebook_node( - compile_path, resources={'output_files_dir': compile_path}) - same_config["compile_path"] = compile_path - root_pipeline_name = f"root_pipeline_{uuid4().hex.lower()}" - root_path = Path(compile_path) / f"{root_pipeline_name}.py" - with open(root_path, 'w') as f: - f.write(body) \ No newline at end of file diff --git a/sameproject/ops/python_ocean/root.jinja b/sameproject/ops/python_ocean/root.jinja deleted file mode 100644 index e69de29b..00000000 From 6cbacf33dbb8ba6aff24e78cf6697ac32846601d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 8 Jun 2022 10:59:54 +0200 Subject: [PATCH 23/99] WIP: initialized ocean --- sameproject/cli/run.py | 2 +- sameproject/ops/ocean/__init__.py | 2 ++ sameproject/ops/ocean/deploy.py | 0 sameproject/ops/ocean/render.py | 0 sameproject/ops/ocean/root.jinja | 0 sameproject/ops/ocean/step.jinja | 0 6 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 sameproject/ops/ocean/__init__.py create mode 100644 sameproject/ops/ocean/deploy.py create mode 100644 sameproject/ops/ocean/render.py create mode 100644 sameproject/ops/ocean/root.jinja create mode 100644 sameproject/ops/ocean/step.jinja diff --git a/sameproject/cli/run.py b/sameproject/cli/run.py index 8e94c27c..e1f9e832 100644 --- a/sameproject/cli/run.py +++ b/sameproject/cli/run.py @@ -26,7 +26,7 @@ "-t", "--target", default="kubeflow", - type=click.Choice(["aml", "kubeflow", "functions"]), + type=click.Choice(["aml", "kubeflow", "functions", "ocean"]), ) @click.option( "--persist-temp-files", diff --git a/sameproject/ops/ocean/__init__.py b/sameproject/ops/ocean/__init__.py new file mode 100644 index 00000000..3cd4721c --- /dev/null +++ b/sameproject/ops/ocean/__init__.py @@ -0,0 +1,2 @@ +from .render import render +from .deploy import deploy \ No newline at end of file diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py new file mode 100644 index 00000000..e69de29b diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py new file mode 100644 index 00000000..e69de29b diff --git a/sameproject/ops/ocean/root.jinja b/sameproject/ops/ocean/root.jinja new file mode 100644 index 00000000..e69de29b diff --git a/sameproject/ops/ocean/step.jinja b/sameproject/ops/ocean/step.jinja new file mode 100644 index 00000000..e69de29b From 1a639678032ce55e9882b6044901ab275c2058c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 8 Jun 2022 16:29:57 +0200 Subject: [PATCH 24/99] WIP: added ocean to backends.py --- sameproject/ops/backends.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sameproject/ops/backends.py b/sameproject/ops/backends.py index 93962919..221732b3 100644 --- a/sameproject/ops/backends.py +++ b/sameproject/ops/backends.py @@ -5,6 +5,7 @@ import sameproject.ops.functions as functions import sameproject.ops.kubeflow as kubeflow import sameproject.ops.aml as aml +import sameproject.ops.ocean as ocean import sameproject.ops.helpers import tempfile import click @@ -15,6 +16,7 @@ def render(target: str, steps: list, config: SameConfig, compile_path: str = Non "aml": aml.render, "kubeflow": kubeflow.render, "functions": functions.render, + "ocean": ocean.render } render_function = target_renderers.get(target, None) From 6f31c14bbbc8f211df0c054e71ef3f2aefd863ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 9 Jun 2022 13:44:32 +0200 Subject: [PATCH 25/99] WIP: init render & deploy methods --- sameproject/ops/kubeflow/__init__.py | 2 +- sameproject/ops/ocean/deploy.py | 5 +++++ sameproject/ops/ocean/render.py | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/sameproject/ops/kubeflow/__init__.py b/sameproject/ops/kubeflow/__init__.py index 3edf3212..3cd4721c 100644 --- a/sameproject/ops/kubeflow/__init__.py +++ b/sameproject/ops/kubeflow/__init__.py @@ -1,2 +1,2 @@ from .render import render -from .deploy import deploy +from .deploy import deploy \ No newline at end of file diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index e69de29b..7c19b557 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -0,0 +1,5 @@ +from sameproject.data.config import SameConfig +from pathlib import Path + +def deploy(base_path: Path, root_file: str, config: SameConfig): + return \ No newline at end of file diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py index e69de29b..bfe05d00 100644 --- a/sameproject/ops/ocean/render.py +++ b/sameproject/ops/ocean/render.py @@ -0,0 +1,5 @@ +from pathlib import Path +from typing import Tuple + +def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str]: + return \ No newline at end of file From b0696f36179d7213d2550cf44ed5da6a1bfe3bca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 16 Jun 2022 08:47:01 +0100 Subject: [PATCH 26/99] WIP: boilerplate ocean c2d script --- sameproject/ops/ocean/script.py | 200 ++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 sameproject/ops/ocean/script.py diff --git a/sameproject/ops/ocean/script.py b/sameproject/ops/ocean/script.py new file mode 100644 index 00000000..7bd54ba6 --- /dev/null +++ b/sameproject/ops/ocean/script.py @@ -0,0 +1,200 @@ +"""Boilerplate Ocean publishing and running c2d""" + +import os +import _init_paths +from ocean_lib.data_provider.data_service_provider import DataServiceProvider +from ocean_lib.common.agreements.service_types import ServiceTypes +from ocean_lib.web3_internal.constants import ZERO_ADDRESS +from ocean_lib.web3_internal.currency import to_wei +from ocean_lib.web3_internal.wallet import Wallet +from ocean_lib.assets import trusted_algorithms +from ocean_lib.services.service import Service +from ocean_lib.models.btoken import BToken #BToken is ERC20 +from ocean_lib.ocean.ocean import Ocean +from ocean_lib.config import Config + + +config = Config('config.ini') # Ocean requires a config file with network, metadata, block, and provider info +ocean = Ocean(config) +OCEAN_token = BToken(ocean.web3, ocean.OCEAN_address) +provider_url = DataServiceProvider.get_url(ocean.config) + + +""" +Algorithm publishing + +Requirements: + +- Model script on GitHub +- wallet private key as environment variable +- dataset we want to train on specified +- model metadata (name, date, compute, etc.) +""" + +wallet = Wallet(ocean.web3, os.getenv('TEST_PRIVATE_KEY1'), transaction_timeout=20, block_confirmations=config.block_confirmations) +print(f"wallet.address = '{wallet.address}'") +assert wallet.web3.eth.get_balance(wallet.address) > 0, "need ETH" + + +# Publish ALG datatoken +ALG_datatoken = ocean.create_data_token('ALG1', 'ALG1', wallet, blob=ocean.config.metadata_cache_uri) +ALG_datatoken.mint(wallet.address, to_wei(100), wallet) +print(f"ALG_datatoken.address = '{ALG_datatoken.address}'") + +# Specify metadata and service attributes, for "GPR" algorithm script. +# In same location as Branin test dataset. GPR = Gaussian Process Regression. +ALG_metadata = { + "main": { + "type": "algorithm", + "algorithm": { + "language": "python", + "format": "docker-image", + "version": "0.1", # project-specific + "container": { + "entrypoint": "python $ALGO", + "image": "oceanprotocol/algo_dockers", + "tag": "python-branin" # project-specific + } + }, + "files": [ + { + "url": "https://raw.githubusercontent.com/trentmc/branin/main/gpr.py", # project-specific + "index": 0, + "contentType": "text/text", + } + ], + "name": "gpr", "author": "Trent", "license": "CC0", # project-specific + "dateCreated": "2020-01-28T10:55:11Z" # project-specific + } +} +ALG_service_attributes = { + "main": { + "name": "ALG_dataAssetAccessServiceAgreement", + "creator": wallet.address, + "timeout": 3600 * 24, + "datePublished": "2020-01-28T10:55:11Z", + "cost": 1.0, # + } + } + +# Calc ALG service access descriptor. We use the same service provider as DATA +ALG_access_service = Service( + service_endpoint=provider_url, + service_type=ServiceTypes.CLOUD_COMPUTE, + attributes=ALG_service_attributes +) + +# Publish metadata and service info on-chain +ALG_ddo = ocean.assets.create( + metadata=ALG_metadata, # {"main" : {"type" : "algorithm", ..}, ..} + publisher_wallet=wallet, + services=[ALG_access_service], + data_token_address=ALG_datatoken.address) + +trusted_algorithms.add_publisher_trusted_algorithm('DATA_ddo', ALG_ddo.did, config.metadata_cache_uri) # project-specific +ocean.assets.update('DATA_ddo', publisher_wallet=wallet) # project-specific + +""" +Datatoken buying + +Requirements: +- wallet from previous step +- datatoken DID and pool address +""" + +did = 'SPECIFY' +pool_address = 'SPECIFY' + +wallet = Wallet(ocean.web3, private_key=private_key, transaction_timeout=20, block_confirmations=0) +assert wallet is not None, "Wallet error, initialize app again" +# Get asset, datatoken_address +asset = ocean.assets.resolve(did) +data_token_address = f'0x{did[7:]}' + +print('Executing Transaction') +#my wallet +print(f"Environment Wallet Address = '{wallet.address}'") +print(f"Wallet OCEAN = {pretty_ether_and_wei(OCEAN_token.balanceOf(wallet.address))}") +print(f"Wallet ETH = {pretty_ether_and_wei(ocean.web3.eth.get_balance(wallet.address))}") +#Verify that Bob has ETH +assert ocean.web3.eth.get_balance(wallet.address) > 0, "need test ETH" +#Verify that Bob has OCEAN +assert OCEAN_token.balanceOf(wallet.address) > 0, "need test OCEAN" +# print(f"I have {pretty_ether_and_wei(data_token.balanceOf(wallet.address), data_token.symbol())}.") +# assert data_token.balanceOf(wallet.address) >= to_wei(1), "Bob didn't get 1.0 datatokens" +#Bob points to the service object +fee_receiver = ZERO_ADDRESS # could also be market address +#Bob buys 1.0 datatokens - the amount needed to consume the dataset. +data_token = ocean.get_data_token(data_token_address) +print('Buying Data Token') +ocean.pool.buy_data_tokens( + pool_address, + amount=to_wei(1), # buy 1.0 datatoken + max_OCEAN_amount=to_wei(10), # pay up to 10.0 OCEAN + from_wallet=wallet +) +print(f"I have {pretty_ether_and_wei(data_token.balanceOf(wallet.address), data_token.symbol())}.") + + +""" +Running C2D +""" + +DATA_did = DATA_ddo.did # for convenience +ALG_did = ALG_ddo.did +DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions +ALG_DDO = ocean.assets.resolve(ALG_did) + +compute_service = DATA_DDO.get_service('compute') +algo_service = ALG_DDO.get_service('access') + +from ocean_lib.web3_internal.constants import ZERO_ADDRESS +from ocean_lib.models.compute_input import ComputeInput + +# order & pay for dataset +dataset_order_requirements = ocean.assets.order( + DATA_did, wallet.address, service_type=compute_service.type +) + +DATA_order_tx_id = ocean.assets.pay_for_service( + ocean.web3, + dataset_order_requirements.amount, + dataset_order_requirements.data_token_address, + DATA_did, + compute_service.index, + ZERO_ADDRESS, + wallet, + dataset_order_requirements.computeAddress, + ) + +# order & pay for algo +algo_order_requirements = ocean.assets.order( + ALG_did, wallet.address, service_type=algo_service.type +) +ALG_order_tx_id = ocean.assets.pay_for_service( + ocean.web3, + algo_order_requirements.amount, + algo_order_requirements.data_token_address, + ALG_did, + algo_service.index, + ZERO_ADDRESS, + wallet, + algo_order_requirements.computeAddress, +) + +compute_inputs = [ComputeInput(DATA_did, DATA_order_tx_id, compute_service.index)] +job_id = ocean.compute.start( + compute_inputs, + wallet, + algorithm_did=ALG_did, + algorithm_tx_id=ALG_order_tx_id, + algorithm_data_token=ALG_datatoken.address +) +print(f"Started compute job with id: {job_id}") + +# for monitoring C2D status +print(ocean.compute.status(DATA_did, job_id, wallet)) + +# retrieving result +result = ocean.compute.result_file(DATA_did, job_id, 0, wallet) # 0 index, means we retrieve the results from the first dataset index + From 115d4b50eea95c9c0d9d885a02a3eab322147dfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 16 Jun 2022 09:29:45 +0100 Subject: [PATCH 27/99] WIP: simplest jinja template --- sameproject/ops/ocean/root.jinja | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sameproject/ops/ocean/root.jinja b/sameproject/ops/ocean/root.jinja index e69de29b..7cf1e5ed 100644 --- a/sameproject/ops/ocean/root.jinja +++ b/sameproject/ops/ocean/root.jinja @@ -0,0 +1,6 @@ +{% autoescape off %} + +# Run the user's notebook code: +{urlsafe_b64decode("{{ user_code }}").decode()} + +{% endautoescape %} \ No newline at end of file From 5b415c9bd06287e521269fe8d5710adca921ea5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 16 Jun 2022 09:43:23 +0100 Subject: [PATCH 28/99] WIP: render function (without build) --- sameproject/ops/ocean/render.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py index bfe05d00..790d8d90 100644 --- a/sameproject/ops/ocean/render.py +++ b/sameproject/ops/ocean/render.py @@ -1,5 +1,24 @@ +from jinja2 import Environment, FileSystemLoader, select_autoescape from pathlib import Path from typing import Tuple +from uuid import uuid4 +import os -def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str]: - return \ No newline at end of file +from sameproject.ops import helpers + +ocean_root_template = "root.jinja" + + +def render(compile_path: str, same_config: dict) -> Tuple[Path, str]: + """Renders the notebook into a root file and a series of step files according to the target requirements. Returns an absolute path to the root file for deployment.""" + templateDir = os.path.dirname(os.path.abspath(__file__)) + templateLoader = FileSystemLoader(templateDir) + print(f"Template dir {templateDir}") + env = Environment(trim_blocks=True, loader=templateLoader) + + root_file_string = _build_root_file(env, same_config) + root_pipeline_name = f"root_pipeline_{uuid4().hex.lower()}" + root_path = Path(compile_path) / f"{root_pipeline_name}.py" + helpers.write_file(root_path, root_file_string) + + return (compile_path, root_pipeline_name) \ No newline at end of file From d3c1f105b7aa5c5f21b2562ea4a83314f426740d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 16 Jun 2022 10:41:30 +0100 Subject: [PATCH 29/99] Added ocean deploy --- sameproject/ops/backends.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sameproject/ops/backends.py b/sameproject/ops/backends.py index 221732b3..f58136c3 100644 --- a/sameproject/ops/backends.py +++ b/sameproject/ops/backends.py @@ -35,6 +35,7 @@ def deploy(target: str, base_path: Path, root_file: str, config: SameConfig): "aml": aml.deploy, "kubeflow": kubeflow.deploy, "functions": functions.deploy, + "ocean": ocean.deploy } deploy_function = target_deployers.get(target, None) From 963c9ac58b5e89dfc9baf0c8089f640c11618452 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 16 Jun 2022 11:00:09 +0100 Subject: [PATCH 30/99] WIP: rendering encoded script from notebook --- sameproject/ops/ocean/render.py | 45 +++++++++++++++++++++++++++++--- sameproject/ops/ocean/root.jinja | 6 ----- sameproject/ops/ocean/step.jinja | 9 +++++++ 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py index 790d8d90..2db0b730 100644 --- a/sameproject/ops/ocean/render.py +++ b/sameproject/ops/ocean/render.py @@ -2,23 +2,60 @@ from pathlib import Path from typing import Tuple from uuid import uuid4 +from base64 import urlsafe_b64encode +import logging import os +import time +from sameproject.data.step import Step from sameproject.ops import helpers +import sameproject.ops.explode -ocean_root_template = "root.jinja" +ocean_step_template = "step.jinja" -def render(compile_path: str, same_config: dict) -> Tuple[Path, str]: +def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str]: """Renders the notebook into a root file and a series of step files according to the target requirements. Returns an absolute path to the root file for deployment.""" templateDir = os.path.dirname(os.path.abspath(__file__)) templateLoader = FileSystemLoader(templateDir) print(f"Template dir {templateDir}") env = Environment(trim_blocks=True, loader=templateLoader) - root_file_string = _build_root_file(env, same_config) + root_file_string = _build_step_file(env, next(iter(steps.values())), same_config) root_pipeline_name = f"root_pipeline_{uuid4().hex.lower()}" root_path = Path(compile_path) / f"{root_pipeline_name}.py" helpers.write_file(root_path, root_file_string) - return (compile_path, root_pipeline_name) \ No newline at end of file + return (compile_path, root_pipeline_name) + + +def _build_step_file(env: Environment, step: Step, same_config) -> str: + with open(sameproject.ops.explode.__file__, "r") as f: + explode_code = f.read() + + requirements_file = None + if "requirements_file" in step: + requirements_file = urlsafe_b64encode(bytes(step.requirements_file, "utf-8")).decode() + + memory_limit = same_config.runtime_options.get( + "serialisation_memory_limit", + 512 * 1024 * 1024, # 512MB + ) + + same_env = same_config.runtime_options.get( + "same_env", + "default", + ) + + step_contract = { + "name": step.name, + "same_env": same_env, + "memory_limit": memory_limit, + "unique_name": step.unique_name, + "requirements_file": requirements_file, + "user_code": urlsafe_b64encode(bytes(step.code, "utf-8")).decode(), + "explode_code": urlsafe_b64encode(bytes(explode_code, "utf-8")).decode(), + "same_yaml": urlsafe_b64encode(bytes(same_config.to_yaml(), "utf-8")).decode(), + } + + return env.get_template(ocean_step_template).render(step_contract) \ No newline at end of file diff --git a/sameproject/ops/ocean/root.jinja b/sameproject/ops/ocean/root.jinja index 7cf1e5ed..e69de29b 100644 --- a/sameproject/ops/ocean/root.jinja +++ b/sameproject/ops/ocean/root.jinja @@ -1,6 +0,0 @@ -{% autoescape off %} - -# Run the user's notebook code: -{urlsafe_b64decode("{{ user_code }}").decode()} - -{% endautoescape %} \ No newline at end of file diff --git a/sameproject/ops/ocean/step.jinja b/sameproject/ops/ocean/step.jinja index e69de29b..f9ebff71 100644 --- a/sameproject/ops/ocean/step.jinja +++ b/sameproject/ops/ocean/step.jinja @@ -0,0 +1,9 @@ +{% autoescape off %} + +# User code for step, which we run in its own execution frame. +user_code = f""" +# Run the user's notebook code: +{urlsafe_b64decode("{{ user_code }}").decode()} +""" + +{% endautoescape %} From 48f5a0afa75a0c5c450e79b850310bc787a9bb8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 16 Jun 2022 11:12:06 +0100 Subject: [PATCH 31/99] WIP: removed encoding for render --- sameproject/ops/ocean/render.py | 2 +- sameproject/ops/ocean/step.jinja | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py index 2db0b730..7f125dc4 100644 --- a/sameproject/ops/ocean/render.py +++ b/sameproject/ops/ocean/render.py @@ -53,7 +53,7 @@ def _build_step_file(env: Environment, step: Step, same_config) -> str: "memory_limit": memory_limit, "unique_name": step.unique_name, "requirements_file": requirements_file, - "user_code": urlsafe_b64encode(bytes(step.code, "utf-8")).decode(), + "user_code": step.code, "explode_code": urlsafe_b64encode(bytes(explode_code, "utf-8")).decode(), "same_yaml": urlsafe_b64encode(bytes(same_config.to_yaml(), "utf-8")).decode(), } diff --git a/sameproject/ops/ocean/step.jinja b/sameproject/ops/ocean/step.jinja index f9ebff71..2305d58a 100644 --- a/sameproject/ops/ocean/step.jinja +++ b/sameproject/ops/ocean/step.jinja @@ -1,9 +1,6 @@ {% autoescape off %} # User code for step, which we run in its own execution frame. -user_code = f""" # Run the user's notebook code: -{urlsafe_b64decode("{{ user_code }}").decode()} -""" - +{{ user_code }} {% endautoescape %} From d2a1e10a01e4a04913601f4aec23d0f6001f1581 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Fri, 17 Jun 2022 08:36:35 +0100 Subject: [PATCH 32/99] WIP: working printing deploy --- sameproject/ops/ocean/deploy.py | 6 ++++++ sameproject/ops/ocean/step.jinja | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 7c19b557..830b5977 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -1,5 +1,11 @@ from sameproject.data.config import SameConfig +from sameproject.ops import helpers from pathlib import Path +import importlib + def deploy(base_path: Path, root_file: str, config: SameConfig): + with helpers.add_path(str(base_path)): + root_module = importlib.import_module(root_file) # python module + print(f"Root module is {root_module}") return \ No newline at end of file diff --git a/sameproject/ops/ocean/step.jinja b/sameproject/ops/ocean/step.jinja index 2305d58a..e45d062a 100644 --- a/sameproject/ops/ocean/step.jinja +++ b/sameproject/ops/ocean/step.jinja @@ -2,5 +2,8 @@ # User code for step, which we run in its own execution frame. # Run the user's notebook code: -{{ user_code }} +def root(): +{% filter indent(width=4) %} + {{ user_code }} +{% endfilter %} {% endautoescape %} From 943661ac339be679389f2f7d1fdcd617c25a16c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Fri, 17 Jun 2022 13:03:24 +0100 Subject: [PATCH 33/99] WIP: ocean c2d deploy --- sameproject/ops/ocean/deploy.py | 202 +++++++++++++++++++++++++++++++- 1 file changed, 200 insertions(+), 2 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 830b5977..c411fd5d 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -2,10 +2,208 @@ from sameproject.ops import helpers from pathlib import Path import importlib +"""Boilerplate Ocean publishing and running c2d""" + +import os +import _init_paths +from ocean_lib.data_provider.data_service_provider import DataServiceProvider +from ocean_lib.common.agreements.service_types import ServiceTypes +from ocean_lib.web3_internal.constants import ZERO_ADDRESS +from ocean_lib.web3_internal.currency import to_wei +from ocean_lib.web3_internal.wallet import Wallet +from ocean_lib.assets import trusted_algorithms +from ocean_lib.services.service import Service +from ocean_lib.models.btoken import BToken #BToken is ERC20 +from ocean_lib.ocean.ocean import Ocean +from ocean_lib.config import Config def deploy(base_path: Path, root_file: str, config: SameConfig): with helpers.add_path(str(base_path)): root_module = importlib.import_module(root_file) # python module - print(f"Root module is {root_module}") - return \ No newline at end of file + print(f"Root module is {root_module.root}") + + config = Config('config.ini') # Ocean requires a config file with network, metadata, block, and provider info + ocean = Ocean(config) + OCEAN_token = BToken(ocean.web3, ocean.OCEAN_address) + provider_url = DataServiceProvider.get_url(ocean.config) + + + """ + Algorithm publishing + + Requirements: + + - Model script on GitHub + - wallet private key as environment variable + - dataset we want to train on specified + - model metadata (name, date, compute, etc.) + """ + + wallet = Wallet(ocean.web3, os.getenv('TEST_PRIVATE_KEY1'), transaction_timeout=20, block_confirmations=config.block_confirmations) + print(f"wallet.address = '{wallet.address}'") + assert wallet.web3.eth.get_balance(wallet.address) > 0, "need ETH" + + + # Publish ALG datatoken + ALG_datatoken = ocean.create_data_token('ALG1', 'ALG1', wallet, blob=ocean.config.metadata_cache_uri) + ALG_datatoken.mint(wallet.address, to_wei(100), wallet) + print(f"ALG_datatoken.address = '{ALG_datatoken.address}'") + + # Specify metadata and service attributes, for "GPR" algorithm script. + # In same location as Branin test dataset. GPR = Gaussian Process Regression. + ALG_metadata = { + "main": { + "type": "algorithm", + "algorithm": { + "language": "python", + "format": "docker-image", + "version": "0.1", # project-specific + "container": { + "entrypoint": "python $ALGO", + "image": "oceanprotocol/algo_dockers", + "tag": "python-branin" # project-specific + } + }, + "files": [ + { + "url": "https://raw.githubusercontent.com/trentmc/branin/main/gpr.py", # project-specific + "index": 0, + "contentType": "text/text", + } + ], + "name": "gpr", "author": "Trent", "license": "CC0", # project-specific + "dateCreated": "2020-01-28T10:55:11Z" # project-specific + } + } + ALG_service_attributes = { + "main": { + "name": "ALG_dataAssetAccessServiceAgreement", + "creator": wallet.address, + "timeout": 3600 * 24, + "datePublished": "2020-01-28T10:55:11Z", + "cost": 1.0, # + } + } + + # Calc ALG service access descriptor. We use the same service provider as DATA + ALG_access_service = Service( + service_endpoint=provider_url, + service_type=ServiceTypes.CLOUD_COMPUTE, + attributes=ALG_service_attributes + ) + + # Publish metadata and service info on-chain + ALG_ddo = ocean.assets.create( + metadata=ALG_metadata, # {"main" : {"type" : "algorithm", ..}, ..} + publisher_wallet=wallet, + services=[ALG_access_service], + data_token_address=ALG_datatoken.address) + + trusted_algorithms.add_publisher_trusted_algorithm('DATA_ddo', ALG_ddo.did, config.metadata_cache_uri) # project-specific + ocean.assets.update('DATA_ddo', publisher_wallet=wallet) # project-specific + + """ + Datatoken buying + + Requirements: + - wallet from previous step + - datatoken DID and pool address + """ + + did = 'SPECIFY' + pool_address = 'SPECIFY' + + wallet = Wallet(ocean.web3, private_key=private_key, transaction_timeout=20, block_confirmations=0) + assert wallet is not None, "Wallet error, initialize app again" + # Get asset, datatoken_address + asset = ocean.assets.resolve(did) + data_token_address = f'0x{did[7:]}' + + print('Executing Transaction') + #my wallet + print(f"Environment Wallet Address = '{wallet.address}'") + print(f"Wallet OCEAN = {pretty_ether_and_wei(OCEAN_token.balanceOf(wallet.address))}") + print(f"Wallet ETH = {pretty_ether_and_wei(ocean.web3.eth.get_balance(wallet.address))}") + #Verify that Bob has ETH + assert ocean.web3.eth.get_balance(wallet.address) > 0, "need test ETH" + #Verify that Bob has OCEAN + assert OCEAN_token.balanceOf(wallet.address) > 0, "need test OCEAN" + # print(f"I have {pretty_ether_and_wei(data_token.balanceOf(wallet.address), data_token.symbol())}.") + # assert data_token.balanceOf(wallet.address) >= to_wei(1), "Bob didn't get 1.0 datatokens" + #Bob points to the service object + fee_receiver = ZERO_ADDRESS # could also be market address + #Bob buys 1.0 datatokens - the amount needed to consume the dataset. + data_token = ocean.get_data_token(data_token_address) + print('Buying Data Token') + ocean.pool.buy_data_tokens( + pool_address, + amount=to_wei(1), # buy 1.0 datatoken + max_OCEAN_amount=to_wei(10), # pay up to 10.0 OCEAN + from_wallet=wallet + ) + print(f"I have {pretty_ether_and_wei(data_token.balanceOf(wallet.address), data_token.symbol())}.") + + + """ + Running C2D + """ + + DATA_did = DATA_ddo.did # for convenience + ALG_did = ALG_ddo.did + DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions + ALG_DDO = ocean.assets.resolve(ALG_did) + + compute_service = DATA_DDO.get_service('compute') + algo_service = ALG_DDO.get_service('access') + + from ocean_lib.web3_internal.constants import ZERO_ADDRESS + from ocean_lib.models.compute_input import ComputeInput + + # order & pay for dataset + dataset_order_requirements = ocean.assets.order( + DATA_did, wallet.address, service_type=compute_service.type + ) + + DATA_order_tx_id = ocean.assets.pay_for_service( + ocean.web3, + dataset_order_requirements.amount, + dataset_order_requirements.data_token_address, + DATA_did, + compute_service.index, + ZERO_ADDRESS, + wallet, + dataset_order_requirements.computeAddress, + ) + + # order & pay for algo + algo_order_requirements = ocean.assets.order( + ALG_did, wallet.address, service_type=algo_service.type + ) + ALG_order_tx_id = ocean.assets.pay_for_service( + ocean.web3, + algo_order_requirements.amount, + algo_order_requirements.data_token_address, + ALG_did, + algo_service.index, + ZERO_ADDRESS, + wallet, + algo_order_requirements.computeAddress, + ) + + compute_inputs = [ComputeInput(DATA_did, DATA_order_tx_id, compute_service.index)] + job_id = ocean.compute.start( + compute_inputs, + wallet, + algorithm_did=ALG_did, + algorithm_tx_id=ALG_order_tx_id, + algorithm_data_token=ALG_datatoken.address + ) + print(f"Started compute job with id: {job_id}") + + # for monitoring C2D status + print(ocean.compute.status(DATA_did, job_id, wallet)) + + # retrieving result + result = ocean.compute.result_file(DATA_did, job_id, 0, wallet) # 0 index, means we retrieve the results from the first dataset index + return result \ No newline at end of file From 9ad0dbd6241a273f7a6fbc939241a1d0176a61bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 23 Jun 2022 09:18:23 +0100 Subject: [PATCH 34/99] WIP: added ocean runtime options --- sameproject/ops/ocean/options.py | 76 ++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 sameproject/ops/ocean/options.py diff --git a/sameproject/ops/ocean/options.py b/sameproject/ops/ocean/options.py new file mode 100644 index 00000000..b95afe39 --- /dev/null +++ b/sameproject/ops/ocean/options.py @@ -0,0 +1,76 @@ +from sameproject.ops.runtime_options import register_option + +register_option( + "network", + "The network to use for publishing algorithm and getting the dataset.", + backend="ocean", +) + +register_option( + "provider_address", + "Address of compute provider", + backend="ocean", +) + +register_option( + "wallet_private_key", + "Private key of user wallet", + backend="ocean", +) + +register_option( + "dt_did", + "Datatoken DID", + backend="ocean", +) + +register_option( + "dt_pool", + "Pool address for datatoken", + backend="ocean", +) + +register_option( + "algo_tag", + "Tag to refer to algorithm by", + backend="ocean", +) + +register_option( + "algo_version", + "Version of algorithm", + backend="ocean", +) + +register_option( + "algo_url", + "URL where Algorithm is stored", + backend="ocean", +) + +register_option( + "algo_name", + "Name of algorithm", + backend="ocean", +) + +register_option( + "author", + "Author of algorithm", + backend="ocean", +) + +register_option( + "licence", + "Algorithm Licence", + backend="ocean", +) + +register_option( + "max_dt_price", + "Maximum price willing to spend on datatokens.", + backend="ocean", +) + + + From c6103cf1e6fe02e7710ed8394d24d533e7806029 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 23 Jun 2022 09:19:44 +0100 Subject: [PATCH 35/99] Added ocean runtime options to init --- sameproject/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sameproject/__init__.py b/sameproject/__init__.py index 1677a825..34bcf63f 100644 --- a/sameproject/__init__.py +++ b/sameproject/__init__.py @@ -20,3 +20,4 @@ import sameproject.ops.aml.options import sameproject.ops.functions.options import sameproject.ops.kubeflow.options +import sameproject.ops.ocean.options From e5e4c14f74743a1b1ec621237e52b2384b4c91cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 23 Jun 2022 09:20:25 +0100 Subject: [PATCH 36/99] WIP: changed ocean config in deploy.py --- sameproject/ops/ocean/deploy.py | 45 ++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index c411fd5d..4d1a05f6 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -5,15 +5,15 @@ """Boilerplate Ocean publishing and running c2d""" import os -import _init_paths from ocean_lib.data_provider.data_service_provider import DataServiceProvider -from ocean_lib.common.agreements.service_types import ServiceTypes +from ocean_lib.agreements.service_types import ServiceTypes +from ocean_lib.web3_internal.currency import pretty_ether_and_wei from ocean_lib.web3_internal.constants import ZERO_ADDRESS +from ocean_lib.models.compute_input import ComputeInput from ocean_lib.web3_internal.currency import to_wei from ocean_lib.web3_internal.wallet import Wallet -from ocean_lib.assets import trusted_algorithms from ocean_lib.services.service import Service -from ocean_lib.models.btoken import BToken #BToken is ERC20 +from ocean_lib.models.btoken import BTokenBase #BToken is ERC20 from ocean_lib.ocean.ocean import Ocean from ocean_lib.config import Config @@ -23,9 +23,17 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): root_module = importlib.import_module(root_file) # python module print(f"Root module is {root_module.root}") - config = Config('config.ini') # Ocean requires a config file with network, metadata, block, and provider info - ocean = Ocean(config) - OCEAN_token = BToken(ocean.web3, ocean.OCEAN_address) + d = { + 'network' : 'https://rinkeby.infura.io/v3/d163c48816434b0bbb3ac3925d6c6c80' if config.runtime_options.get("network") is None else config.runtime_options.get("network"), + 'BLOCK_CONFIRMATIONS': 0, + 'metadataCacheUri' : 'https://aquarius.oceanprotocol.com', + 'providerUri' : 'https://provider.rinkeby.oceanprotocol.com', + 'PROVIDER_ADDRESS': '0x00bd138abd70e2f00903268f3db08f2d25677c9e' if config.runtime_options.get("provider_address") is None else config.runtime_options.get("provider_address"), + 'downloads.path': 'consume-downloads', + } + + ocean = Ocean(d) + OCEAN_token = BTokenBase(ocean.web3, ocean.OCEAN_address) provider_url = DataServiceProvider.get_url(ocean.config) @@ -100,7 +108,15 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): services=[ALG_access_service], data_token_address=ALG_datatoken.address) - trusted_algorithms.add_publisher_trusted_algorithm('DATA_ddo', ALG_ddo.did, config.metadata_cache_uri) # project-specific + DATA_did = config.DATA_ddo.did # for convenience + ALG_did = ALG_ddo.did + DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions + ALG_DDO = ocean.assets.resolve(ALG_did) + + compute_service = DATA_DDO.get_service('compute') + algo_service = ALG_DDO.get_service('access') + + compute_service.add_publisher_trusted_algorithm(ALG_ddo) ocean.assets.update('DATA_ddo', publisher_wallet=wallet) # project-specific """ @@ -114,7 +130,7 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): did = 'SPECIFY' pool_address = 'SPECIFY' - wallet = Wallet(ocean.web3, private_key=private_key, transaction_timeout=20, block_confirmations=0) + wallet = Wallet(ocean.web3, private_key=os.getenv('PRIVATE_KEY'), transaction_timeout=20, block_confirmations=0) assert wallet is not None, "Wallet error, initialize app again" # Get asset, datatoken_address asset = ocean.assets.resolve(did) @@ -149,17 +165,6 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): Running C2D """ - DATA_did = DATA_ddo.did # for convenience - ALG_did = ALG_ddo.did - DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions - ALG_DDO = ocean.assets.resolve(ALG_did) - - compute_service = DATA_DDO.get_service('compute') - algo_service = ALG_DDO.get_service('access') - - from ocean_lib.web3_internal.constants import ZERO_ADDRESS - from ocean_lib.models.compute_input import ComputeInput - # order & pay for dataset dataset_order_requirements = ocean.assets.order( DATA_did, wallet.address, service_type=compute_service.type From d9d000784ee3b38d7436e1674e5d9424964d10c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 23 Jun 2022 09:26:01 +0100 Subject: [PATCH 37/99] WIP: deploy with config params --- sameproject/ops/ocean/deploy.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 4d1a05f6..216b2192 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -48,7 +48,7 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): - model metadata (name, date, compute, etc.) """ - wallet = Wallet(ocean.web3, os.getenv('TEST_PRIVATE_KEY1'), transaction_timeout=20, block_confirmations=config.block_confirmations) + wallet = Wallet(ocean.web3, config.runtime_options.get("wallet_private_key"), transaction_timeout=20, block_confirmations=config.block_confirmations) print(f"wallet.address = '{wallet.address}'") assert wallet.web3.eth.get_balance(wallet.address) > 0, "need ETH" @@ -66,22 +66,22 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): "algorithm": { "language": "python", "format": "docker-image", - "version": "0.1", # project-specific + "version": config.runtime_options.get("algo_version"), # project-specific "container": { "entrypoint": "python $ALGO", "image": "oceanprotocol/algo_dockers", - "tag": "python-branin" # project-specific + "tag": config.runtime_options.get("algo_tag") # project-specific } }, "files": [ { - "url": "https://raw.githubusercontent.com/trentmc/branin/main/gpr.py", # project-specific + "url": config.runtime_options.get("algo_url"), # project-specific "index": 0, "contentType": "text/text", } ], - "name": "gpr", "author": "Trent", "license": "CC0", # project-specific - "dateCreated": "2020-01-28T10:55:11Z" # project-specific + "name": config.runtime_options.get("algo_name"), "author": config.runtime_options.get("author"), "license": config.runtime_options.get("licence"), + "dateCreated": "2022" # project-specific } } ALG_service_attributes = { @@ -108,7 +108,7 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): services=[ALG_access_service], data_token_address=ALG_datatoken.address) - DATA_did = config.DATA_ddo.did # for convenience + DATA_did = config.runtime_options.get("dt_did") ALG_did = ALG_ddo.did DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions ALG_DDO = ocean.assets.resolve(ALG_did) @@ -127,14 +127,12 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): - datatoken DID and pool address """ - did = 'SPECIFY' - pool_address = 'SPECIFY' + pool_address = config.runtime_options.get("dt_pool") - wallet = Wallet(ocean.web3, private_key=os.getenv('PRIVATE_KEY'), transaction_timeout=20, block_confirmations=0) assert wallet is not None, "Wallet error, initialize app again" # Get asset, datatoken_address - asset = ocean.assets.resolve(did) - data_token_address = f'0x{did[7:]}' + asset = ocean.assets.resolve(DATA_did) + data_token_address = f'0x{DATA_did[7:]}' print('Executing Transaction') #my wallet @@ -155,7 +153,7 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): ocean.pool.buy_data_tokens( pool_address, amount=to_wei(1), # buy 1.0 datatoken - max_OCEAN_amount=to_wei(10), # pay up to 10.0 OCEAN + max_OCEAN_amount=to_wei(config.runtime_options.get("max_dt_price")), # pay up to 10.0 OCEAN from_wallet=wallet ) print(f"I have {pretty_ether_and_wei(data_token.balanceOf(wallet.address), data_token.symbol())}.") From 804dd55f97cbc9b6dce10e323ecbf9ba81393181 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 23 Jun 2022 17:14:36 +0100 Subject: [PATCH 38/99] WIP: debugging options --- sameproject/data/config.py | 1 + sameproject/ops/ocean/deploy.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sameproject/data/config.py b/sameproject/data/config.py index 2593114c..27c667a7 100644 --- a/sameproject/data/config.py +++ b/sameproject/data/config.py @@ -146,6 +146,7 @@ def inject_runtime_options(self): data.runtime_options = {} for opt in list_options(): + print(f"OPT IS {get_option_value(opt)}") if get_option_value(opt) is not None: data.runtime_options[opt] = get_option_value(opt) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 216b2192..6dbfa355 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -15,10 +15,10 @@ from ocean_lib.services.service import Service from ocean_lib.models.btoken import BTokenBase #BToken is ERC20 from ocean_lib.ocean.ocean import Ocean -from ocean_lib.config import Config def deploy(base_path: Path, root_file: str, config: SameConfig): + print(f'Config is {config}') with helpers.add_path(str(base_path)): root_module = importlib.import_module(root_file) # python module print(f"Root module is {root_module.root}") @@ -48,7 +48,7 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): - model metadata (name, date, compute, etc.) """ - wallet = Wallet(ocean.web3, config.runtime_options.get("wallet_private_key"), transaction_timeout=20, block_confirmations=config.block_confirmations) + wallet = Wallet(ocean.web3, config.runtime_options.get("wallet_private_key"), transaction_timeout=20, block_confirmations=0) print(f"wallet.address = '{wallet.address}'") assert wallet.web3.eth.get_balance(wallet.address) > 0, "need ETH" From ea17fb4f7f1f2330d97184c22860ff9efbbf9563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 23 Jun 2022 17:26:00 +0100 Subject: [PATCH 39/99] Config params working in deploy --- sameproject/data/config.py | 1 - sameproject/ops/ocean/deploy.py | 1 - 2 files changed, 2 deletions(-) diff --git a/sameproject/data/config.py b/sameproject/data/config.py index 27c667a7..2593114c 100644 --- a/sameproject/data/config.py +++ b/sameproject/data/config.py @@ -146,7 +146,6 @@ def inject_runtime_options(self): data.runtime_options = {} for opt in list_options(): - print(f"OPT IS {get_option_value(opt)}") if get_option_value(opt) is not None: data.runtime_options[opt] = get_option_value(opt) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 6dbfa355..e2945277 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -18,7 +18,6 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): - print(f'Config is {config}') with helpers.add_path(str(base_path)): root_module = importlib.import_module(root_file) # python module print(f"Root module is {root_module.root}") From d457a7cd56f085c594513f340fb6280785aebb01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Fri, 24 Jun 2022 09:01:41 +0100 Subject: [PATCH 40/99] WIP: added rawcode to algorithm metadata --- sameproject/ops/ocean/deploy.py | 12 ++++++------ sameproject/ops/ocean/render.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index e2945277..0ef70e74 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -17,11 +17,10 @@ from ocean_lib.ocean.ocean import Ocean -def deploy(base_path: Path, root_file: str, config: SameConfig): - with helpers.add_path(str(base_path)): - root_module = importlib.import_module(root_file) # python module - print(f"Root module is {root_module.root}") - +def deploy(base_path: Path, + root_file: str, # root function with notebook code (string) + config: SameConfig): + d = { 'network' : 'https://rinkeby.infura.io/v3/d163c48816434b0bbb3ac3925d6c6c80' if config.runtime_options.get("network") is None else config.runtime_options.get("network"), 'BLOCK_CONFIRMATIONS': 0, @@ -74,7 +73,8 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): }, "files": [ { - "url": config.runtime_options.get("algo_url"), # project-specific + # "url": config.runtime_options.get("algo_url"), # project-specific + "rawcode": root_file, # not sure whether this works yet "index": 0, "contentType": "text/text", } diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py index 7f125dc4..daed1ca4 100644 --- a/sameproject/ops/ocean/render.py +++ b/sameproject/ops/ocean/render.py @@ -26,7 +26,7 @@ def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str root_path = Path(compile_path) / f"{root_pipeline_name}.py" helpers.write_file(root_path, root_file_string) - return (compile_path, root_pipeline_name) + return (compile_path, root_file_string) # note: root_file_string replaced root_pipeline_name def _build_step_file(env: Environment, step: Step, same_config) -> str: From e0a706435d37cfa99d8758aea6c268fa9751c253 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 29 Jun 2022 19:28:02 +0100 Subject: [PATCH 41/99] FIX: refactored for ocean v3 --- sameproject/ops/ocean/deploy.py | 74 +++++++++++++++++---------------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 0ef70e74..534f7d8b 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -1,27 +1,27 @@ -from sameproject.data.config import SameConfig -from sameproject.ops import helpers -from pathlib import Path -import importlib -"""Boilerplate Ocean publishing and running c2d""" - -import os from ocean_lib.data_provider.data_service_provider import DataServiceProvider -from ocean_lib.agreements.service_types import ServiceTypes +from ocean_lib.common.agreements.service_types import ServiceTypes from ocean_lib.web3_internal.currency import pretty_ether_and_wei from ocean_lib.web3_internal.constants import ZERO_ADDRESS from ocean_lib.models.compute_input import ComputeInput from ocean_lib.web3_internal.currency import to_wei from ocean_lib.web3_internal.wallet import Wallet +from ocean_lib.assets import trusted_algorithms from ocean_lib.services.service import Service -from ocean_lib.models.btoken import BTokenBase #BToken is ERC20 +from ocean_lib.models.btoken import BToken from ocean_lib.ocean.ocean import Ocean +from sameproject.data.config import SameConfig +from sameproject.ops import helpers +from pathlib import Path +import importlib +import os def deploy(base_path: Path, root_file: str, # root function with notebook code (string) config: SameConfig): + print(f'Config is {config}') - d = { + conf = { 'network' : 'https://rinkeby.infura.io/v3/d163c48816434b0bbb3ac3925d6c6c80' if config.runtime_options.get("network") is None else config.runtime_options.get("network"), 'BLOCK_CONFIRMATIONS': 0, 'metadataCacheUri' : 'https://aquarius.oceanprotocol.com', @@ -30,8 +30,8 @@ def deploy(base_path: Path, 'downloads.path': 'consume-downloads', } - ocean = Ocean(d) - OCEAN_token = BTokenBase(ocean.web3, ocean.OCEAN_address) + ocean = Ocean(conf) + OCEAN_token = BToken(ocean.web3, ocean.OCEAN_address) provider_url = DataServiceProvider.get_url(ocean.config) @@ -56,9 +56,8 @@ def deploy(base_path: Path, ALG_datatoken.mint(wallet.address, to_wei(100), wallet) print(f"ALG_datatoken.address = '{ALG_datatoken.address}'") - # Specify metadata and service attributes, for "GPR" algorithm script. - # In same location as Branin test dataset. GPR = Gaussian Process Regression. - ALG_metadata = { + # Specify metadata and service attributes for algorithm script. + ALG_metadata = { "main": { "type": "algorithm", "algorithm": { @@ -68,19 +67,18 @@ def deploy(base_path: Path, "container": { "entrypoint": "python $ALGO", "image": "oceanprotocol/algo_dockers", - "tag": config.runtime_options.get("algo_tag") # project-specific - } + "tag": config.runtime_options.get("algo_tag"), # project-specific + }, }, "files": [ { - # "url": config.runtime_options.get("algo_url"), # project-specific - "rawcode": root_file, # not sure whether this works yet + "url": config.runtime_options.get("algo_url"), # not sure whether this works yet "index": 0, "contentType": "text/text", - } + }, ], "name": config.runtime_options.get("algo_name"), "author": config.runtime_options.get("author"), "license": config.runtime_options.get("licence"), - "dateCreated": "2022" # project-specific + "dateCreated": "2022", # project-specific } } ALG_service_attributes = { @@ -111,13 +109,16 @@ def deploy(base_path: Path, ALG_did = ALG_ddo.did DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions ALG_DDO = ocean.assets.resolve(ALG_did) + while ALG_DDO == None: + ALG_DDO = ocean.assets.resolve(ALG_did) + print("Waiting for algorithm DDO") + pass compute_service = DATA_DDO.get_service('compute') algo_service = ALG_DDO.get_service('access') - compute_service.add_publisher_trusted_algorithm(ALG_ddo) - ocean.assets.update('DATA_ddo', publisher_wallet=wallet) # project-specific - + trusted_algorithms.add_publisher_trusted_algorithm(DATA_DDO, ALG_DDO.did, 'https://aquarius.oceanprotocol.com') + ocean.assets.update(DATA_DDO, publisher_wallet=wallet) """ Datatoken buying @@ -130,24 +131,25 @@ def deploy(base_path: Path, assert wallet is not None, "Wallet error, initialize app again" # Get asset, datatoken_address - asset = ocean.assets.resolve(DATA_did) data_token_address = f'0x{DATA_did[7:]}' + algo_token_address = f'0x{ALG_did[7:]}' print('Executing Transaction') - #my wallet + + # Wallet status print(f"Environment Wallet Address = '{wallet.address}'") print(f"Wallet OCEAN = {pretty_ether_and_wei(OCEAN_token.balanceOf(wallet.address))}") print(f"Wallet ETH = {pretty_ether_and_wei(ocean.web3.eth.get_balance(wallet.address))}") - #Verify that Bob has ETH + + # Verify wallet has ETH assert ocean.web3.eth.get_balance(wallet.address) > 0, "need test ETH" - #Verify that Bob has OCEAN + #Verify wallet has OCEAN assert OCEAN_token.balanceOf(wallet.address) > 0, "need test OCEAN" - # print(f"I have {pretty_ether_and_wei(data_token.balanceOf(wallet.address), data_token.symbol())}.") - # assert data_token.balanceOf(wallet.address) >= to_wei(1), "Bob didn't get 1.0 datatokens" - #Bob points to the service object - fee_receiver = ZERO_ADDRESS # could also be market address - #Bob buys 1.0 datatokens - the amount needed to consume the dataset. + #Buy 1.0 datatoken - the amount needed to consume the dataset. data_token = ocean.get_data_token(data_token_address) + algo_token = ocean.get_data_token(algo_token_address) + print(f"You have {pretty_ether_and_wei(algo_token.balanceOf(wallet.address), algo_token.symbol())} algorithm tokens.") + print('Buying Data Token') ocean.pool.buy_data_tokens( pool_address, @@ -155,7 +157,7 @@ def deploy(base_path: Path, max_OCEAN_amount=to_wei(config.runtime_options.get("max_dt_price")), # pay up to 10.0 OCEAN from_wallet=wallet ) - print(f"I have {pretty_ether_and_wei(data_token.balanceOf(wallet.address), data_token.symbol())}.") + print(f"You have {pretty_ether_and_wei(data_token.balanceOf(wallet.address), data_token.symbol())}.") """ @@ -204,7 +206,9 @@ def deploy(base_path: Path, print(f"Started compute job with id: {job_id}") # for monitoring C2D status - print(ocean.compute.status(DATA_did, job_id, wallet)) + while ocean.compute.status(DATA_did, job_id, wallet)['statusText'] != 'Job finished': + print(ocean.compute.status(DATA_did, job_id, wallet)['statusText']) + pass # retrieving result result = ocean.compute.result_file(DATA_did, job_id, 0, wallet) # 0 index, means we retrieve the results from the first dataset index From b9dd073569314976b81f859b02b8b92db6d601de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 29 Jun 2022 19:28:26 +0100 Subject: [PATCH 42/99] WIP: added ocean to conftest --- test/conftest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/conftest.py b/test/conftest.py index c548d3ce..5be3fd5c 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -23,6 +23,11 @@ "default": False, "help": "include pytorch, tensorflow and sklearn notebooks in backend tests", }], + ["ocean", { + "action": "store_true", + "default": False, + "help": "run ocean backend tests, requires ocean installation", + }], ] From 9b5bc4601be3dc866d36293fd0bb56785ef9a710 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 29 Jun 2022 19:29:45 +0100 Subject: [PATCH 43/99] WIP: added algo_url option --- sameproject/ops/ocean/options.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sameproject/ops/ocean/options.py b/sameproject/ops/ocean/options.py index b95afe39..a7c418e7 100644 --- a/sameproject/ops/ocean/options.py +++ b/sameproject/ops/ocean/options.py @@ -72,5 +72,11 @@ backend="ocean", ) +register_option( + "algo_url", + "URL where the algorithm script is stored", + backend="ocean", +) + From d56022478970a53550b88c672eaaed8304683ec7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 29 Jun 2022 20:29:55 +0100 Subject: [PATCH 44/99] WIP: added logging statement --- sameproject/ops/ocean/deploy.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 534f7d8b..e36ca883 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -19,7 +19,6 @@ def deploy(base_path: Path, root_file: str, # root function with notebook code (string) config: SameConfig): - print(f'Config is {config}') conf = { 'network' : 'https://rinkeby.infura.io/v3/d163c48816434b0bbb3ac3925d6c6c80' if config.runtime_options.get("network") is None else config.runtime_options.get("network"), @@ -113,9 +112,9 @@ def deploy(base_path: Path, ALG_DDO = ocean.assets.resolve(ALG_did) print("Waiting for algorithm DDO") pass - compute_service = DATA_DDO.get_service('compute') algo_service = ALG_DDO.get_service('access') + print(f'Algorithm DDO is {ALG_DDO}') trusted_algorithms.add_publisher_trusted_algorithm(DATA_DDO, ALG_DDO.did, 'https://aquarius.oceanprotocol.com') ocean.assets.update(DATA_DDO, publisher_wallet=wallet) From ad8b4d21c9eb6f82cf896b255f546b45cab6570f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 29 Jun 2022 20:30:06 +0100 Subject: [PATCH 45/99] WIP: ocean template test --- test/ops/ocean/test_ocean.py | 11 ++++++++ test/testdata/__init__.py | 10 +++++++ .../oceandata/arithmetic/arithmetic.ipynb | 21 +++++++++++++++ .../oceandata/arithmetic/requirements.txt | 0 test/testdata/oceandata/arithmetic/same.yaml | 27 +++++++++++++++++++ 5 files changed, 69 insertions(+) create mode 100644 test/ops/ocean/test_ocean.py create mode 100644 test/testdata/oceandata/arithmetic/arithmetic.ipynb create mode 100644 test/testdata/oceandata/arithmetic/requirements.txt create mode 100644 test/testdata/oceandata/arithmetic/same.yaml diff --git a/test/ops/ocean/test_ocean.py b/test/ops/ocean/test_ocean.py new file mode 100644 index 00000000..287e4fb1 --- /dev/null +++ b/test/ops/ocean/test_ocean.py @@ -0,0 +1,11 @@ +from sameproject.ops.backends import deploy +import test.testdata +import pytest +import yaml + + +@pytest.mark.ocean +@test.testdata.notebooks("oceandata") +def test_ocean_deploy(config, notebook, requirements, validation_fn): + deployment = deploy("ocean", "", "", config) + assert deployment == b'' \ No newline at end of file diff --git a/test/testdata/__init__.py b/test/testdata/__init__.py index e4d42885..f5162105 100644 --- a/test/testdata/__init__.py +++ b/test/testdata/__init__.py @@ -38,6 +38,8 @@ def notebooks(*args) -> Callable: entries.append(entry) if len(entries) == 0: + print(*args) + # print(_registry) raise Exception("Attempted to fetch non-existent testdata groups '{args}'.") return _get_decorator(entries) @@ -331,3 +333,11 @@ def _validate_features_datasets(res): "tensorflow", Path(__file__).parent / "tensorflow/variational_auto_encoder/same.yaml", ) + +# A selection of Ocean notebooks +_register_notebook( + "arithmetic", + "Returns a string.", + "oceandata", + Path(__file__).parent / "oceandata/arithmetic/same.yaml", +) \ No newline at end of file diff --git a/test/testdata/oceandata/arithmetic/arithmetic.ipynb b/test/testdata/oceandata/arithmetic/arithmetic.ipynb new file mode 100644 index 00000000..f5ccf5a8 --- /dev/null +++ b/test/testdata/oceandata/arithmetic/arithmetic.ipynb @@ -0,0 +1,21 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Hello\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/test/testdata/oceandata/arithmetic/requirements.txt b/test/testdata/oceandata/arithmetic/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/test/testdata/oceandata/arithmetic/same.yaml b/test/testdata/oceandata/arithmetic/same.yaml new file mode 100644 index 00000000..a14ac37e --- /dev/null +++ b/test/testdata/oceandata/arithmetic/same.yaml @@ -0,0 +1,27 @@ +apiVersion: sameproject.ml/v1alpha1 +environments: + default: + image_tag: combinatorml/jupyterlab-tensorflow-opencv:0.9 +metadata: + labels: [] + name: default_config + version: 0.0.0 +notebook: + name: arithmetic + path: arithmetic.ipynb + requirements: requirements.txt +run: + name: default_config run +runtime_options: + network: https://rinkeby.infura.io/v3/d163c48816434b0bbb3ac3925d6c6c80 + provider_address: '0x00bd138abd70e2f00903268f3db08f2d25677c9e' + wallet_private_key: '0xef4b441145c1d0f3b4bc6d61d29f5c6e502359481152f869247c7a4244d45209' + dt_did: 'did:op:d29293A09B8e5871b9028fc3CE232963050E9f69' + dt_pool: '0x35e256beA68eca220e0A71f14f2e1D5924bcf4a0' + algo_tag: ari + algo_version: "1.0.0" + algo_name: arithmetic + author: Algovera + licence: MIT + max_dt_price: 20 + algo_url: "https://raw.githubusercontent.com/smejak/test/main/test.py" From 345707345d6eee0548cf7ebadd5602e5112f373d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 30 Jun 2022 09:37:27 +0100 Subject: [PATCH 46/99] FIX: working, modular deploy, passed test --- sameproject/ops/ocean/deploy.py | 83 +++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index e36ca883..2bcb9267 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -20,6 +20,44 @@ def deploy(base_path: Path, root_file: str, # root function with notebook code (string) config: SameConfig): + ocean, OCEAN_token, provider_url = configure_ocean(config=config) + wallet = Wallet(ocean.web3, config.runtime_options.get("wallet_private_key"), transaction_timeout=20, block_confirmations=0) + print(f"wallet.address = '{wallet.address}'") + assert wallet.web3.eth.get_balance(wallet.address) > 0, "need ETH" + + ALG_ddo, ALG_datatoken = algo_publish(config, wallet, ocean, provider_url) + + DATA_did = config.runtime_options.get("dt_did") + ALG_did = ALG_ddo.did + DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions + ALG_DDO = ocean.assets.resolve(ALG_did) + while ALG_DDO == None: + ALG_DDO = ocean.assets.resolve(ALG_did) + print("Waiting for algorithm DDO") + pass + compute_service = DATA_DDO.get_service('compute') + algo_service = ALG_DDO.get_service('access') + print(f'Algorithm DDO is {ALG_DDO}') + + trusted_algorithms.add_publisher_trusted_algorithm(DATA_DDO, ALG_DDO.did, 'https://aquarius.oceanprotocol.com') + ocean.assets.update(DATA_DDO, publisher_wallet=wallet) + + + # Datatoken buying + data_token_address = f'0x{DATA_did[7:]}' + data_token = ocean.get_data_token(data_token_address) + if data_token.balanceOf(wallet.address) < to_wei(1): + print('Not enough datatokens in wallet, buying...') + buy_dt(config, wallet, ocean, OCEAN_token, DATA_did) + + algo_token_address = f'0x{ALG_did[7:]}' + algo_token = ocean.get_data_token(algo_token_address) + print(f"You have {pretty_ether_and_wei(algo_token.balanceOf(wallet.address), algo_token.symbol())} algorithm tokens.") + + result = run_c2d(ocean, wallet, DATA_did, ALG_did, ALG_datatoken, compute_service, algo_service) + return result + +def configure_ocean(config): conf = { 'network' : 'https://rinkeby.infura.io/v3/d163c48816434b0bbb3ac3925d6c6c80' if config.runtime_options.get("network") is None else config.runtime_options.get("network"), 'BLOCK_CONFIRMATIONS': 0, @@ -33,7 +71,9 @@ def deploy(base_path: Path, OCEAN_token = BToken(ocean.web3, ocean.OCEAN_address) provider_url = DataServiceProvider.get_url(ocean.config) + return ocean, OCEAN_token, provider_url +def algo_publish(config, wallet, ocean, provider_url): """ Algorithm publishing @@ -45,11 +85,6 @@ def deploy(base_path: Path, - model metadata (name, date, compute, etc.) """ - wallet = Wallet(ocean.web3, config.runtime_options.get("wallet_private_key"), transaction_timeout=20, block_confirmations=0) - print(f"wallet.address = '{wallet.address}'") - assert wallet.web3.eth.get_balance(wallet.address) > 0, "need ETH" - - # Publish ALG datatoken ALG_datatoken = ocean.create_data_token('ALG1', 'ALG1', wallet, blob=ocean.config.metadata_cache_uri) ALG_datatoken.mint(wallet.address, to_wei(100), wallet) @@ -104,20 +139,9 @@ def deploy(base_path: Path, services=[ALG_access_service], data_token_address=ALG_datatoken.address) - DATA_did = config.runtime_options.get("dt_did") - ALG_did = ALG_ddo.did - DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions - ALG_DDO = ocean.assets.resolve(ALG_did) - while ALG_DDO == None: - ALG_DDO = ocean.assets.resolve(ALG_did) - print("Waiting for algorithm DDO") - pass - compute_service = DATA_DDO.get_service('compute') - algo_service = ALG_DDO.get_service('access') - print(f'Algorithm DDO is {ALG_DDO}') + return ALG_ddo, ALG_datatoken - trusted_algorithms.add_publisher_trusted_algorithm(DATA_DDO, ALG_DDO.did, 'https://aquarius.oceanprotocol.com') - ocean.assets.update(DATA_DDO, publisher_wallet=wallet) +def buy_dt(config, wallet, ocean, OCEAN_token, did): """ Datatoken buying @@ -125,30 +149,22 @@ def deploy(base_path: Path, - wallet from previous step - datatoken DID and pool address """ - pool_address = config.runtime_options.get("dt_pool") - - assert wallet is not None, "Wallet error, initialize app again" + assert wallet is not None, "Wallet error, run pipeline again" # Get asset, datatoken_address - data_token_address = f'0x{DATA_did[7:]}' - algo_token_address = f'0x{ALG_did[7:]}' + asset = ocean.assets.resolve(did) + data_token_address = f'0x{did[7:]}' print('Executing Transaction') - - # Wallet status + #my wallet print(f"Environment Wallet Address = '{wallet.address}'") print(f"Wallet OCEAN = {pretty_ether_and_wei(OCEAN_token.balanceOf(wallet.address))}") print(f"Wallet ETH = {pretty_ether_and_wei(ocean.web3.eth.get_balance(wallet.address))}") - - # Verify wallet has ETH + #Verify that wallet has ETH assert ocean.web3.eth.get_balance(wallet.address) > 0, "need test ETH" - #Verify wallet has OCEAN + #Verify that wallet has OCEAN assert OCEAN_token.balanceOf(wallet.address) > 0, "need test OCEAN" - #Buy 1.0 datatoken - the amount needed to consume the dataset. data_token = ocean.get_data_token(data_token_address) - algo_token = ocean.get_data_token(algo_token_address) - print(f"You have {pretty_ether_and_wei(algo_token.balanceOf(wallet.address), algo_token.symbol())} algorithm tokens.") - print('Buying Data Token') ocean.pool.buy_data_tokens( pool_address, @@ -158,11 +174,10 @@ def deploy(base_path: Path, ) print(f"You have {pretty_ether_and_wei(data_token.balanceOf(wallet.address), data_token.symbol())}.") - +def run_c2d(ocean, wallet, DATA_did, ALG_did, ALG_datatoken, compute_service, algo_service): """ Running C2D """ - # order & pay for dataset dataset_order_requirements = ocean.assets.order( DATA_did, wallet.address, service_type=compute_service.type From 6b984f0df19de2b35d60f5216b5ef48f6447337e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 30 Jun 2022 20:34:53 +0100 Subject: [PATCH 47/99] WIP: added boolean options --- sameproject/ops/ocean/options.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/sameproject/ops/ocean/options.py b/sameproject/ops/ocean/options.py index a7c418e7..caac07df 100644 --- a/sameproject/ops/ocean/options.py +++ b/sameproject/ops/ocean/options.py @@ -1,5 +1,18 @@ from sameproject.ops.runtime_options import register_option +register_option( + "algo_verified", + "Boolean specifying if published algorithm was accepted by the data publisher", + backend="ocean", +) + +register_option( + "algo_pushed", + "Boolean specifying if algorithm has already been pushed to GitHub.", + backend="ocean", +) + + register_option( "network", "The network to use for publishing algorithm and getting the dataset.", @@ -72,11 +85,5 @@ backend="ocean", ) -register_option( - "algo_url", - "URL where the algorithm script is stored", - backend="ocean", -) - From 00114d8aa6c083b02ed36b4ab6f27d9b4295a802 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 30 Jun 2022 20:35:05 +0100 Subject: [PATCH 48/99] Added Ocean-SAME docs --- ...-Decentralized-AI-with-The-SAME-Project.md | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 docs/docs/blog/2022-xx-xx-Decentralized-AI-with-The-SAME-Project.md diff --git a/docs/docs/blog/2022-xx-xx-Decentralized-AI-with-The-SAME-Project.md b/docs/docs/blog/2022-xx-xx-Decentralized-AI-with-The-SAME-Project.md new file mode 100644 index 00000000..2654ed2a --- /dev/null +++ b/docs/docs/blog/2022-xx-xx-Decentralized-AI-with-The-SAME-Project.md @@ -0,0 +1,66 @@ +# Developing and training AI models in the decentralized web + +## Ocean Protocol and Decentralized AI + +The SAME Project allows data scientists to easily turn their Jupyter notebooks into executable scripts that can automatically be sent to any compute pipeline. + +Ocean Protocol builds tools for the decentralized data economy, particularly, one of the core features of Ocean Protocol is the ability to train your models on private data, called Compute-to-Data (C2D). + +In C2D, the data scientist first searches the Ocean Market for data they want to traain their algorithm on. Once they found a dataset they like, they would buy access to that dataset through Ocean Protocol's data tokens, which act as tickets denoting who can access some dataset and under what conditions. The data scientist must then publish their model on the Ocean Market as well and execute a series of steps to train their algorithm on the dataset on a separate Compute Provider. More details on C2D can be found [here](https://blog.oceanprotocol.com/v2-ocean-compute-to-data-guide-9a3491034b64). + +Long-story short, the Ocean C2D is a perfect fit for the SAME Project, allowing data scientists to focus more on their model development rather than learning the ins and outs of Ocean Protocol's libraries. + +## SAME-Ocean Template Quickstart + +This short guide assumes you've already installed the SAME Project in your local environment, [here](https://sameproject.ml/getting-started/installing/) is a guide to get you started. + +While most of the Ocean deployment code is abstracted away in the SAME-Ocean template, there are some config parameters that you need to fill in to interact with the Ocean Market, in particular, you'll need a [Web3 wallet](https://metamask.io/) and a wallet private key. To ensure security, make sure to never expose your wallet private key anywhere outside your local environment. For running C2D, export your wallet private key as a local environment variable: +``` +export WALLET_PRIVATE_KEY=='YOUR_PRIVATE_KEY' +``` + +When you're ready to run C2D, navigate to your working Jupyter notebook and in your terminal run +``` +same run -t ocean +``` +Note that at the end of the command, you'll have to add the options shown below. This is done by adding `--option-name=value` +### SAME-Ocean Runtime Options + +* `algo-verified`: bool - specify whether algorithm was verified by the data provider for C2D +* `algo-pushed`: bool - specify whether algorithm was published to GitHub (currently required, aimed to be removed) +* `network`: str - network URL to access Ocean Market on +* `provider-address`: str - address of compute provider +* `wallet-private-key`: str - private key for paying transactions in the pipeline +* `dt-did`: str - Decentralized Identifier of the dataset (found through Ocean Market) +* `dt-pool`: str - address of the dataset liquidity pool (applicable if dataset has dynamic pricing) +* `algo-tag`: str - tag to refer to the model as +* `algo-version`: str - version number of the published model +* `algo-url`: str - GitHub URL to raw model code +* `algo-name`: str - name of model +* `author`: str - model author name +* `licence`: str - model licence +* `max-dt-price`: int - max price willing to pay for dataset (in OCEAN) + + +## The SAME Community + +SAME is entirely open-source and non-commercial. We plan on donating it to a foundation as soon as we can identify one that matches our project's goals. + +What can you do? Please join our community! + +### Public web content + +* [Website](https://sameproject.ml) +* [Google Group](https://groups.google.com/u/2/g/same-project) +* [Slack](https://join.slack.com/t/sameproject/shared_invite/zt-lq9rk2g6-Jyfv3AXu_qnX9LqWCmV7HA) + +### Come join our repo + +* [GitHub Organization](https://github.com/SAME-Project) / [GitHub Project](https://github.com/SAME-Project/same-project) +* Try it out (build instructions included) +* Complain about missing features +* EXPERTS ONLY: Add your own + +Regardless, we are very open to taking your feedback. Thank you so much - onward! + +-- The Co-founders of the SAME Project ([David Aronchick](https://twitter.com/aronchick) & [Luke Marsden](https://twitter.com/lmarsden)) From 2a0b9701a7692f37f23ff301653267e6ae8ab008 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 30 Jun 2022 20:38:31 +0100 Subject: [PATCH 49/99] WIP: added algo-pushed requirement --- sameproject/ops/ocean/render.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py index daed1ca4..656d919b 100644 --- a/sameproject/ops/ocean/render.py +++ b/sameproject/ops/ocean/render.py @@ -16,18 +16,19 @@ def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str]: """Renders the notebook into a root file and a series of step files according to the target requirements. Returns an absolute path to the root file for deployment.""" - templateDir = os.path.dirname(os.path.abspath(__file__)) - templateLoader = FileSystemLoader(templateDir) - print(f"Template dir {templateDir}") - env = Environment(trim_blocks=True, loader=templateLoader) - - root_file_string = _build_step_file(env, next(iter(steps.values())), same_config) - root_pipeline_name = f"root_pipeline_{uuid4().hex.lower()}" - root_path = Path(compile_path) / f"{root_pipeline_name}.py" - helpers.write_file(root_path, root_file_string) - - return (compile_path, root_file_string) # note: root_file_string replaced root_pipeline_name - + if same_config.runtime_options.get("algo_pushed") == "False": + templateDir = os.path.dirname(os.path.abspath(__file__)) + templateLoader = FileSystemLoader(templateDir) + print(f"Template dir {templateDir}") + env = Environment(trim_blocks=True, loader=templateLoader) + + root_file_string = _build_step_file(env, next(iter(steps.values())), same_config) + root_pipeline_name = f"root_pipeline_{uuid4().hex.lower()}" + root_path = Path(compile_path) / f"{root_pipeline_name}.py" + helpers.write_file(root_path, root_file_string) + + return (compile_path, root_file_string) # note: root_file_string replaced root_pipeline_name + print("Model already rendered and published.") def _build_step_file(env: Environment, step: Step, same_config) -> str: with open(sameproject.ops.explode.__file__, "r") as f: From eb09d2b235d023532c0bf08c7afc0e522d47b5b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 30 Jun 2022 20:43:22 +0100 Subject: [PATCH 50/99] WIP: correct render option --- sameproject/ops/ocean/render.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py index 656d919b..8bbc6ede 100644 --- a/sameproject/ops/ocean/render.py +++ b/sameproject/ops/ocean/render.py @@ -16,7 +16,7 @@ def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str]: """Renders the notebook into a root file and a series of step files according to the target requirements. Returns an absolute path to the root file for deployment.""" - if same_config.runtime_options.get("algo_pushed") == "False": + if same_config.runtime_options.get("algo_pushed") == False: templateDir = os.path.dirname(os.path.abspath(__file__)) templateLoader = FileSystemLoader(templateDir) print(f"Template dir {templateDir}") From ef1afc6962ce38d4ec081dadabc423cc80de67ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 30 Jun 2022 20:59:04 +0100 Subject: [PATCH 51/99] WIP: added new runtime options --- sameproject/ops/ocean/deploy.py | 43 ++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 2bcb9267..5c2e3298 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -24,24 +24,29 @@ def deploy(base_path: Path, wallet = Wallet(ocean.web3, config.runtime_options.get("wallet_private_key"), transaction_timeout=20, block_confirmations=0) print(f"wallet.address = '{wallet.address}'") assert wallet.web3.eth.get_balance(wallet.address) > 0, "need ETH" - - ALG_ddo, ALG_datatoken = algo_publish(config, wallet, ocean, provider_url) + if config.runtime_options.get("algo_pushed") == True: + ALG_ddo, ALG_datatoken = algo_publish(config, wallet, ocean, provider_url) - DATA_did = config.runtime_options.get("dt_did") - ALG_did = ALG_ddo.did - DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions - ALG_DDO = ocean.assets.resolve(ALG_did) - while ALG_DDO == None: + DATA_did = config.runtime_options.get("dt_did") + ALG_did = ALG_ddo.did + DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions ALG_DDO = ocean.assets.resolve(ALG_did) - print("Waiting for algorithm DDO") - pass - compute_service = DATA_DDO.get_service('compute') - algo_service = ALG_DDO.get_service('access') - print(f'Algorithm DDO is {ALG_DDO}') - - trusted_algorithms.add_publisher_trusted_algorithm(DATA_DDO, ALG_DDO.did, 'https://aquarius.oceanprotocol.com') - ocean.assets.update(DATA_DDO, publisher_wallet=wallet) - + while ALG_DDO == None: + ALG_DDO = ocean.assets.resolve(ALG_did) + print("Waiting for algorithm DDO") + pass + compute_service = DATA_DDO.get_service('compute') + algo_service = ALG_DDO.get_service('access') + print(f'Algorithm DDO is {ALG_DDO}') + + if config.runtime_options.get("algo_verified") == False: + try: + trusted_algorithms.add_publisher_trusted_algorithm(DATA_DDO, ALG_DDO.did, 'https://aquarius.oceanprotocol.com') + ocean.assets.update(DATA_DDO, publisher_wallet=wallet) + verified = True + except: + verified = False + pass # Datatoken buying data_token_address = f'0x{DATA_did[7:]}' @@ -53,9 +58,9 @@ def deploy(base_path: Path, algo_token_address = f'0x{ALG_did[7:]}' algo_token = ocean.get_data_token(algo_token_address) print(f"You have {pretty_ether_and_wei(algo_token.balanceOf(wallet.address), algo_token.symbol())} algorithm tokens.") - - result = run_c2d(ocean, wallet, DATA_did, ALG_did, ALG_datatoken, compute_service, algo_service) - return result + if config.runtime_options.get("algo_verified") == True or verified == True: + result = run_c2d(ocean, wallet, DATA_did, ALG_did, ALG_datatoken, compute_service, algo_service) + return result def configure_ocean(config): conf = { From b242a8f704f6a902c77e31ac3ba16768a4a230a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 30 Jun 2022 20:59:18 +0100 Subject: [PATCH 52/99] WIP: added runtime options --- test/testdata/oceandata/arithmetic/same.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/testdata/oceandata/arithmetic/same.yaml b/test/testdata/oceandata/arithmetic/same.yaml index a14ac37e..32820b13 100644 --- a/test/testdata/oceandata/arithmetic/same.yaml +++ b/test/testdata/oceandata/arithmetic/same.yaml @@ -13,6 +13,8 @@ notebook: run: name: default_config run runtime_options: + algo_verified: False + algo_pushed: True network: https://rinkeby.infura.io/v3/d163c48816434b0bbb3ac3925d6c6c80 provider_address: '0x00bd138abd70e2f00903268f3db08f2d25677c9e' wallet_private_key: '0xef4b441145c1d0f3b4bc6d61d29f5c6e502359481152f869247c7a4244d45209' From d41f6a7e7ea9bd75f6e8c9afa38e957d6d8cd533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 20 Jul 2022 08:02:09 +0200 Subject: [PATCH 53/99] WIP: removed publishing from ocean template --- sameproject/ops/ocean/deploy.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 5c2e3298..ac128835 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -24,20 +24,14 @@ def deploy(base_path: Path, wallet = Wallet(ocean.web3, config.runtime_options.get("wallet_private_key"), transaction_timeout=20, block_confirmations=0) print(f"wallet.address = '{wallet.address}'") assert wallet.web3.eth.get_balance(wallet.address) > 0, "need ETH" - if config.runtime_options.get("algo_pushed") == True: - ALG_ddo, ALG_datatoken = algo_publish(config, wallet, ocean, provider_url) - DATA_did = config.runtime_options.get("dt_did") - ALG_did = ALG_ddo.did - DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions - ALG_DDO = ocean.assets.resolve(ALG_did) - while ALG_DDO == None: - ALG_DDO = ocean.assets.resolve(ALG_did) - print("Waiting for algorithm DDO") - pass - compute_service = DATA_DDO.get_service('compute') - algo_service = ALG_DDO.get_service('access') - print(f'Algorithm DDO is {ALG_DDO}') + DATA_did = config.runtime_options.get("dt_did") + ALG_did = config.runtime_options.get("algo_did") + DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions + ALG_DDO = ocean.assets.resolve(ALG_did) + compute_service = DATA_DDO.get_service('compute') + algo_service = ALG_DDO.get_service('access') + print(f'Algorithm DDO is {ALG_DDO}') if config.runtime_options.get("algo_verified") == False: try: From 90c288a8ce44475d963fa70589b301c47dd6cd64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 20 Jul 2022 13:02:04 +0200 Subject: [PATCH 54/99] WIP: remove ocean_publish --- sameproject/ops/ocean/deploy.py | 68 --------------------------------- 1 file changed, 68 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index ac128835..860991f5 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -72,74 +72,6 @@ def configure_ocean(config): return ocean, OCEAN_token, provider_url -def algo_publish(config, wallet, ocean, provider_url): - """ - Algorithm publishing - - Requirements: - - - Model script on GitHub - - wallet private key as environment variable - - dataset we want to train on specified - - model metadata (name, date, compute, etc.) - """ - - # Publish ALG datatoken - ALG_datatoken = ocean.create_data_token('ALG1', 'ALG1', wallet, blob=ocean.config.metadata_cache_uri) - ALG_datatoken.mint(wallet.address, to_wei(100), wallet) - print(f"ALG_datatoken.address = '{ALG_datatoken.address}'") - - # Specify metadata and service attributes for algorithm script. - ALG_metadata = { - "main": { - "type": "algorithm", - "algorithm": { - "language": "python", - "format": "docker-image", - "version": config.runtime_options.get("algo_version"), # project-specific - "container": { - "entrypoint": "python $ALGO", - "image": "oceanprotocol/algo_dockers", - "tag": config.runtime_options.get("algo_tag"), # project-specific - }, - }, - "files": [ - { - "url": config.runtime_options.get("algo_url"), # not sure whether this works yet - "index": 0, - "contentType": "text/text", - }, - ], - "name": config.runtime_options.get("algo_name"), "author": config.runtime_options.get("author"), "license": config.runtime_options.get("licence"), - "dateCreated": "2022", # project-specific - } - } - ALG_service_attributes = { - "main": { - "name": "ALG_dataAssetAccessServiceAgreement", - "creator": wallet.address, - "timeout": 3600 * 24, - "datePublished": "2020-01-28T10:55:11Z", - "cost": 1.0, # - } - } - - # Calc ALG service access descriptor. We use the same service provider as DATA - ALG_access_service = Service( - service_endpoint=provider_url, - service_type=ServiceTypes.CLOUD_COMPUTE, - attributes=ALG_service_attributes - ) - - # Publish metadata and service info on-chain - ALG_ddo = ocean.assets.create( - metadata=ALG_metadata, # {"main" : {"type" : "algorithm", ..}, ..} - publisher_wallet=wallet, - services=[ALG_access_service], - data_token_address=ALG_datatoken.address) - - return ALG_ddo, ALG_datatoken - def buy_dt(config, wallet, ocean, OCEAN_token, did): """ Datatoken buying From d67741eef861e8dc875a26d1c8d6e7efa81be150 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 21 Jul 2022 06:45:19 +0200 Subject: [PATCH 55/99] WIP: added algo_did runtime option --- test/testdata/oceandata/arithmetic/same.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/test/testdata/oceandata/arithmetic/same.yaml b/test/testdata/oceandata/arithmetic/same.yaml index 32820b13..5e447682 100644 --- a/test/testdata/oceandata/arithmetic/same.yaml +++ b/test/testdata/oceandata/arithmetic/same.yaml @@ -20,6 +20,7 @@ runtime_options: wallet_private_key: '0xef4b441145c1d0f3b4bc6d61d29f5c6e502359481152f869247c7a4244d45209' dt_did: 'did:op:d29293A09B8e5871b9028fc3CE232963050E9f69' dt_pool: '0x35e256beA68eca220e0A71f14f2e1D5924bcf4a0' + algo_did: 'did:op:46Dcfe856CcBe6C9ebF40753360F18A914F04Ca7' algo_tag: ari algo_version: "1.0.0" algo_name: arithmetic From 71a2c076bdde9c47311faddc5ab9078f0722cea6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 14 Sep 2022 10:00:10 +0200 Subject: [PATCH 56/99] WIP: removed algorithm publishing --- sameproject/ops/ocean/deploy.py | 69 --------------------------------- 1 file changed, 69 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 5c2e3298..2c8b92c2 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -25,7 +25,6 @@ def deploy(base_path: Path, print(f"wallet.address = '{wallet.address}'") assert wallet.web3.eth.get_balance(wallet.address) > 0, "need ETH" if config.runtime_options.get("algo_pushed") == True: - ALG_ddo, ALG_datatoken = algo_publish(config, wallet, ocean, provider_url) DATA_did = config.runtime_options.get("dt_did") ALG_did = ALG_ddo.did @@ -78,74 +77,6 @@ def configure_ocean(config): return ocean, OCEAN_token, provider_url -def algo_publish(config, wallet, ocean, provider_url): - """ - Algorithm publishing - - Requirements: - - - Model script on GitHub - - wallet private key as environment variable - - dataset we want to train on specified - - model metadata (name, date, compute, etc.) - """ - - # Publish ALG datatoken - ALG_datatoken = ocean.create_data_token('ALG1', 'ALG1', wallet, blob=ocean.config.metadata_cache_uri) - ALG_datatoken.mint(wallet.address, to_wei(100), wallet) - print(f"ALG_datatoken.address = '{ALG_datatoken.address}'") - - # Specify metadata and service attributes for algorithm script. - ALG_metadata = { - "main": { - "type": "algorithm", - "algorithm": { - "language": "python", - "format": "docker-image", - "version": config.runtime_options.get("algo_version"), # project-specific - "container": { - "entrypoint": "python $ALGO", - "image": "oceanprotocol/algo_dockers", - "tag": config.runtime_options.get("algo_tag"), # project-specific - }, - }, - "files": [ - { - "url": config.runtime_options.get("algo_url"), # not sure whether this works yet - "index": 0, - "contentType": "text/text", - }, - ], - "name": config.runtime_options.get("algo_name"), "author": config.runtime_options.get("author"), "license": config.runtime_options.get("licence"), - "dateCreated": "2022", # project-specific - } - } - ALG_service_attributes = { - "main": { - "name": "ALG_dataAssetAccessServiceAgreement", - "creator": wallet.address, - "timeout": 3600 * 24, - "datePublished": "2020-01-28T10:55:11Z", - "cost": 1.0, # - } - } - - # Calc ALG service access descriptor. We use the same service provider as DATA - ALG_access_service = Service( - service_endpoint=provider_url, - service_type=ServiceTypes.CLOUD_COMPUTE, - attributes=ALG_service_attributes - ) - - # Publish metadata and service info on-chain - ALG_ddo = ocean.assets.create( - metadata=ALG_metadata, # {"main" : {"type" : "algorithm", ..}, ..} - publisher_wallet=wallet, - services=[ALG_access_service], - data_token_address=ALG_datatoken.address) - - return ALG_ddo, ALG_datatoken - def buy_dt(config, wallet, ocean, OCEAN_token, did): """ Datatoken buying From 9dd0508d2094b6560c3618f09259d02226f1b823 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Fri, 7 Oct 2022 15:26:49 +0100 Subject: [PATCH 57/99] WIP: refactoring ocean deploy --- sameproject/ops/ocean/deploy.py | 288 +++++++++++++++----------------- 1 file changed, 138 insertions(+), 150 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 860991f5..c1968be2 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -1,15 +1,3 @@ -from ocean_lib.data_provider.data_service_provider import DataServiceProvider -from ocean_lib.common.agreements.service_types import ServiceTypes -from ocean_lib.web3_internal.currency import pretty_ether_and_wei -from ocean_lib.web3_internal.constants import ZERO_ADDRESS -from ocean_lib.models.compute_input import ComputeInput -from ocean_lib.web3_internal.currency import to_wei -from ocean_lib.web3_internal.wallet import Wallet -from ocean_lib.assets import trusted_algorithms -from ocean_lib.services.service import Service -from ocean_lib.models.btoken import BToken -from ocean_lib.ocean.ocean import Ocean - from sameproject.data.config import SameConfig from sameproject.ops import helpers from pathlib import Path @@ -19,142 +7,142 @@ def deploy(base_path: Path, root_file: str, # root function with notebook code (string) config: SameConfig): - - ocean, OCEAN_token, provider_url = configure_ocean(config=config) - wallet = Wallet(ocean.web3, config.runtime_options.get("wallet_private_key"), transaction_timeout=20, block_confirmations=0) - print(f"wallet.address = '{wallet.address}'") - assert wallet.web3.eth.get_balance(wallet.address) > 0, "need ETH" + return +# ocean, OCEAN_token, provider_url = configure_ocean(config=config) +# wallet = Wallet(ocean.web3, config.runtime_options.get("wallet_private_key"), transaction_timeout=20, block_confirmations=0) +# print(f"wallet.address = '{wallet.address}'") +# assert wallet.web3.eth.get_balance(wallet.address) > 0, "need ETH" - DATA_did = config.runtime_options.get("dt_did") - ALG_did = config.runtime_options.get("algo_did") - DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions - ALG_DDO = ocean.assets.resolve(ALG_did) - compute_service = DATA_DDO.get_service('compute') - algo_service = ALG_DDO.get_service('access') - print(f'Algorithm DDO is {ALG_DDO}') - - if config.runtime_options.get("algo_verified") == False: - try: - trusted_algorithms.add_publisher_trusted_algorithm(DATA_DDO, ALG_DDO.did, 'https://aquarius.oceanprotocol.com') - ocean.assets.update(DATA_DDO, publisher_wallet=wallet) - verified = True - except: - verified = False - pass - - # Datatoken buying - data_token_address = f'0x{DATA_did[7:]}' - data_token = ocean.get_data_token(data_token_address) - if data_token.balanceOf(wallet.address) < to_wei(1): - print('Not enough datatokens in wallet, buying...') - buy_dt(config, wallet, ocean, OCEAN_token, DATA_did) - - algo_token_address = f'0x{ALG_did[7:]}' - algo_token = ocean.get_data_token(algo_token_address) - print(f"You have {pretty_ether_and_wei(algo_token.balanceOf(wallet.address), algo_token.symbol())} algorithm tokens.") - if config.runtime_options.get("algo_verified") == True or verified == True: - result = run_c2d(ocean, wallet, DATA_did, ALG_did, ALG_datatoken, compute_service, algo_service) - return result - -def configure_ocean(config): - conf = { - 'network' : 'https://rinkeby.infura.io/v3/d163c48816434b0bbb3ac3925d6c6c80' if config.runtime_options.get("network") is None else config.runtime_options.get("network"), - 'BLOCK_CONFIRMATIONS': 0, - 'metadataCacheUri' : 'https://aquarius.oceanprotocol.com', - 'providerUri' : 'https://provider.rinkeby.oceanprotocol.com', - 'PROVIDER_ADDRESS': '0x00bd138abd70e2f00903268f3db08f2d25677c9e' if config.runtime_options.get("provider_address") is None else config.runtime_options.get("provider_address"), - 'downloads.path': 'consume-downloads', - } - - ocean = Ocean(conf) - OCEAN_token = BToken(ocean.web3, ocean.OCEAN_address) - provider_url = DataServiceProvider.get_url(ocean.config) - - return ocean, OCEAN_token, provider_url - -def buy_dt(config, wallet, ocean, OCEAN_token, did): - """ - Datatoken buying - - Requirements: - - wallet from previous step - - datatoken DID and pool address - """ - pool_address = config.runtime_options.get("dt_pool") - assert wallet is not None, "Wallet error, run pipeline again" - # Get asset, datatoken_address - asset = ocean.assets.resolve(did) - data_token_address = f'0x{did[7:]}' - - print('Executing Transaction') - #my wallet - print(f"Environment Wallet Address = '{wallet.address}'") - print(f"Wallet OCEAN = {pretty_ether_and_wei(OCEAN_token.balanceOf(wallet.address))}") - print(f"Wallet ETH = {pretty_ether_and_wei(ocean.web3.eth.get_balance(wallet.address))}") - #Verify that wallet has ETH - assert ocean.web3.eth.get_balance(wallet.address) > 0, "need test ETH" - #Verify that wallet has OCEAN - assert OCEAN_token.balanceOf(wallet.address) > 0, "need test OCEAN" - data_token = ocean.get_data_token(data_token_address) - print('Buying Data Token') - ocean.pool.buy_data_tokens( - pool_address, - amount=to_wei(1), # buy 1.0 datatoken - max_OCEAN_amount=to_wei(config.runtime_options.get("max_dt_price")), # pay up to 10.0 OCEAN - from_wallet=wallet - ) - print(f"You have {pretty_ether_and_wei(data_token.balanceOf(wallet.address), data_token.symbol())}.") - -def run_c2d(ocean, wallet, DATA_did, ALG_did, ALG_datatoken, compute_service, algo_service): - """ - Running C2D - """ - # order & pay for dataset - dataset_order_requirements = ocean.assets.order( - DATA_did, wallet.address, service_type=compute_service.type - ) - - DATA_order_tx_id = ocean.assets.pay_for_service( - ocean.web3, - dataset_order_requirements.amount, - dataset_order_requirements.data_token_address, - DATA_did, - compute_service.index, - ZERO_ADDRESS, - wallet, - dataset_order_requirements.computeAddress, - ) - - # order & pay for algo - algo_order_requirements = ocean.assets.order( - ALG_did, wallet.address, service_type=algo_service.type - ) - ALG_order_tx_id = ocean.assets.pay_for_service( - ocean.web3, - algo_order_requirements.amount, - algo_order_requirements.data_token_address, - ALG_did, - algo_service.index, - ZERO_ADDRESS, - wallet, - algo_order_requirements.computeAddress, - ) - - compute_inputs = [ComputeInput(DATA_did, DATA_order_tx_id, compute_service.index)] - job_id = ocean.compute.start( - compute_inputs, - wallet, - algorithm_did=ALG_did, - algorithm_tx_id=ALG_order_tx_id, - algorithm_data_token=ALG_datatoken.address - ) - print(f"Started compute job with id: {job_id}") - - # for monitoring C2D status - while ocean.compute.status(DATA_did, job_id, wallet)['statusText'] != 'Job finished': - print(ocean.compute.status(DATA_did, job_id, wallet)['statusText']) - pass - - # retrieving result - result = ocean.compute.result_file(DATA_did, job_id, 0, wallet) # 0 index, means we retrieve the results from the first dataset index - return result \ No newline at end of file +# DATA_did = config.runtime_options.get("dt_did") +# ALG_did = config.runtime_options.get("algo_did") +# DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions +# ALG_DDO = ocean.assets.resolve(ALG_did) +# compute_service = DATA_DDO.get_service('compute') +# algo_service = ALG_DDO.get_service('access') +# print(f'Algorithm DDO is {ALG_DDO}') + +# if config.runtime_options.get("algo_verified") == False: +# try: +# trusted_algorithms.add_publisher_trusted_algorithm(DATA_DDO, ALG_DDO.did, 'https://aquarius.oceanprotocol.com') +# ocean.assets.update(DATA_DDO, publisher_wallet=wallet) +# verified = True +# except: +# verified = False +# pass + +# # Datatoken buying +# data_token_address = f'0x{DATA_did[7:]}' +# data_token = ocean.get_data_token(data_token_address) +# if data_token.balanceOf(wallet.address) < to_wei(1): +# print('Not enough datatokens in wallet, buying...') +# buy_dt(config, wallet, ocean, OCEAN_token, DATA_did) + +# algo_token_address = f'0x{ALG_did[7:]}' +# algo_token = ocean.get_data_token(algo_token_address) +# print(f"You have {pretty_ether_and_wei(algo_token.balanceOf(wallet.address), algo_token.symbol())} algorithm tokens.") +# if config.runtime_options.get("algo_verified") == True or verified == True: +# result = run_c2d(ocean, wallet, DATA_did, ALG_did, ALG_datatoken, compute_service, algo_service) +# return result + +# def configure_ocean(config): +# conf = { +# 'network' : 'https://rinkeby.infura.io/v3/d163c48816434b0bbb3ac3925d6c6c80' if config.runtime_options.get("network") is None else config.runtime_options.get("network"), +# 'BLOCK_CONFIRMATIONS': 0, +# 'metadataCacheUri' : 'https://aquarius.oceanprotocol.com', +# 'providerUri' : 'https://provider.rinkeby.oceanprotocol.com', +# 'PROVIDER_ADDRESS': '0x00bd138abd70e2f00903268f3db08f2d25677c9e' if config.runtime_options.get("provider_address") is None else config.runtime_options.get("provider_address"), +# 'downloads.path': 'consume-downloads', +# } + +# ocean = Ocean(conf) +# OCEAN_token = BToken(ocean.web3, ocean.OCEAN_address) +# provider_url = DataServiceProvider.get_url(ocean.config) + +# return ocean, OCEAN_token, provider_url + +# def buy_dt(config, wallet, ocean, OCEAN_token, did): +# """ +# Datatoken buying + +# Requirements: +# - wallet from previous step +# - datatoken DID and pool address +# """ +# pool_address = config.runtime_options.get("dt_pool") +# assert wallet is not None, "Wallet error, run pipeline again" +# # Get asset, datatoken_address +# asset = ocean.assets.resolve(did) +# data_token_address = f'0x{did[7:]}' + +# print('Executing Transaction') +# #my wallet +# print(f"Environment Wallet Address = '{wallet.address}'") +# print(f"Wallet OCEAN = {pretty_ether_and_wei(OCEAN_token.balanceOf(wallet.address))}") +# print(f"Wallet ETH = {pretty_ether_and_wei(ocean.web3.eth.get_balance(wallet.address))}") +# #Verify that wallet has ETH +# assert ocean.web3.eth.get_balance(wallet.address) > 0, "need test ETH" +# #Verify that wallet has OCEAN +# assert OCEAN_token.balanceOf(wallet.address) > 0, "need test OCEAN" +# data_token = ocean.get_data_token(data_token_address) +# print('Buying Data Token') +# ocean.pool.buy_data_tokens( +# pool_address, +# amount=to_wei(1), # buy 1.0 datatoken +# max_OCEAN_amount=to_wei(config.runtime_options.get("max_dt_price")), # pay up to 10.0 OCEAN +# from_wallet=wallet +# ) +# print(f"You have {pretty_ether_and_wei(data_token.balanceOf(wallet.address), data_token.symbol())}.") + +# def run_c2d(ocean, wallet, DATA_did, ALG_did, ALG_datatoken, compute_service, algo_service): +# """ +# Running C2D +# """ +# # order & pay for dataset +# dataset_order_requirements = ocean.assets.order( +# DATA_did, wallet.address, service_type=compute_service.type +# ) + +# DATA_order_tx_id = ocean.assets.pay_for_service( +# ocean.web3, +# dataset_order_requirements.amount, +# dataset_order_requirements.data_token_address, +# DATA_did, +# compute_service.index, +# ZERO_ADDRESS, +# wallet, +# dataset_order_requirements.computeAddress, +# ) + +# # order & pay for algo +# algo_order_requirements = ocean.assets.order( +# ALG_did, wallet.address, service_type=algo_service.type +# ) +# ALG_order_tx_id = ocean.assets.pay_for_service( +# ocean.web3, +# algo_order_requirements.amount, +# algo_order_requirements.data_token_address, +# ALG_did, +# algo_service.index, +# ZERO_ADDRESS, +# wallet, +# algo_order_requirements.computeAddress, +# ) + +# compute_inputs = [ComputeInput(DATA_did, DATA_order_tx_id, compute_service.index)] +# job_id = ocean.compute.start( +# compute_inputs, +# wallet, +# algorithm_did=ALG_did, +# algorithm_tx_id=ALG_order_tx_id, +# algorithm_data_token=ALG_datatoken.address +# ) +# print(f"Started compute job with id: {job_id}") + +# # for monitoring C2D status +# while ocean.compute.status(DATA_did, job_id, wallet)['statusText'] != 'Job finished': +# print(ocean.compute.status(DATA_did, job_id, wallet)['statusText']) +# pass + +# # retrieving result +# result = ocean.compute.result_file(DATA_did, job_id, 0, wallet) # 0 index, means we retrieve the results from the first dataset index +# return result \ No newline at end of file From eabdee61461e4fdb0fe48b1846df0cfc8b04873d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 9 Oct 2022 13:11:39 +0100 Subject: [PATCH 58/99] WIP: fixed wrong nb name --- sameproject/cli/init.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sameproject/cli/init.py b/sameproject/cli/init.py index 58e94b0c..3d1a5b71 100644 --- a/sameproject/cli/init.py +++ b/sameproject/cli/init.py @@ -71,6 +71,7 @@ def init(): click.echo(f"No such file found: {nb_path}", err=True) exit(1) nb_dict = read_notebook(nb_path) + nb_name = str(nb_path.name).replace(".ipynb", "") nb_name = click.prompt("Notebook name", default=nb_name, type=str) # Docker image data: From f9f397bfdb483749949cd9e99e38c4396fb8e8f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 9 Oct 2022 13:36:58 +0100 Subject: [PATCH 59/99] WIP: refactored render --- sameproject/ops/ocean/render.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py index 8bbc6ede..11664fa7 100644 --- a/sameproject/ops/ocean/render.py +++ b/sameproject/ops/ocean/render.py @@ -16,19 +16,18 @@ def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str]: """Renders the notebook into a root file and a series of step files according to the target requirements. Returns an absolute path to the root file for deployment.""" - if same_config.runtime_options.get("algo_pushed") == False: - templateDir = os.path.dirname(os.path.abspath(__file__)) - templateLoader = FileSystemLoader(templateDir) - print(f"Template dir {templateDir}") - env = Environment(trim_blocks=True, loader=templateLoader) - - root_file_string = _build_step_file(env, next(iter(steps.values())), same_config) - root_pipeline_name = f"root_pipeline_{uuid4().hex.lower()}" - root_path = Path(compile_path) / f"{root_pipeline_name}.py" - helpers.write_file(root_path, root_file_string) - - return (compile_path, root_file_string) # note: root_file_string replaced root_pipeline_name - print("Model already rendered and published.") + + templateDir = os.path.dirname(os.path.abspath(__file__)) + templateLoader = FileSystemLoader(templateDir) + print(f"Template dir {templateDir}") + env = Environment(trim_blocks=True, loader=templateLoader) + + root_file_string = _build_step_file(env, next(iter(steps.values())), same_config) + root_pipeline_name = f"root_pipeline_{uuid4().hex.lower()}" + root_path = Path(compile_path) / f"{root_pipeline_name}.py" + helpers.write_file(root_path, root_file_string) + + return (compile_path, root_file_string) # note: root_file_string replaced root_pipeline_name def _build_step_file(env: Environment, step: Step, same_config) -> str: with open(sameproject.ops.explode.__file__, "r") as f: From 42f1f33136e609b090ad37dccd392ad11c90937b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 9 Oct 2022 13:38:51 +0100 Subject: [PATCH 60/99] WIP: refactored template for c2d --- sameproject/ops/ocean/step.jinja | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sameproject/ops/ocean/step.jinja b/sameproject/ops/ocean/step.jinja index e45d062a..851d636d 100644 --- a/sameproject/ops/ocean/step.jinja +++ b/sameproject/ops/ocean/step.jinja @@ -6,4 +6,7 @@ def root(): {% filter indent(width=4) %} {{ user_code }} {% endfilter %} + +if __name__ == "__main__": + root() {% endautoescape %} From 5aab6efa08461419f03bd3d6cec91cc0aa912d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 9 Oct 2022 14:01:14 +0100 Subject: [PATCH 61/99] WIP: generating the correct python script at the correct location --- sameproject/ops/ocean/render.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py index 11664fa7..6483cf36 100644 --- a/sameproject/ops/ocean/render.py +++ b/sameproject/ops/ocean/render.py @@ -19,14 +19,19 @@ def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str templateDir = os.path.dirname(os.path.abspath(__file__)) templateLoader = FileSystemLoader(templateDir) - print(f"Template dir {templateDir}") + print(f"Same config {same_config}") + print(same_config['notebook']['path']) env = Environment(trim_blocks=True, loader=templateLoader) root_file_string = _build_step_file(env, next(iter(steps.values())), same_config) root_pipeline_name = f"root_pipeline_{uuid4().hex.lower()}" root_path = Path(compile_path) / f"{root_pipeline_name}.py" helpers.write_file(root_path, root_file_string) - + + # for storing in the docker image + docker_path = same_config['notebook']['path'][:-5] + 'py' + print(f"docker path {docker_path}") + helpers.write_file(docker_path, root_file_string) return (compile_path, root_file_string) # note: root_file_string replaced root_pipeline_name def _build_step_file(env: Environment, step: Step, same_config) -> str: From 754664d20d1eefa9ac29308df7f01d7c0faab941 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 9 Oct 2022 15:01:59 +0100 Subject: [PATCH 62/99] WIP: removed print statements --- sameproject/ops/ocean/render.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py index 6483cf36..6ddfd4f3 100644 --- a/sameproject/ops/ocean/render.py +++ b/sameproject/ops/ocean/render.py @@ -19,8 +19,6 @@ def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str templateDir = os.path.dirname(os.path.abspath(__file__)) templateLoader = FileSystemLoader(templateDir) - print(f"Same config {same_config}") - print(same_config['notebook']['path']) env = Environment(trim_blocks=True, loader=templateLoader) root_file_string = _build_step_file(env, next(iter(steps.values())), same_config) @@ -30,7 +28,6 @@ def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str # for storing in the docker image docker_path = same_config['notebook']['path'][:-5] + 'py' - print(f"docker path {docker_path}") helpers.write_file(docker_path, root_file_string) return (compile_path, root_file_string) # note: root_file_string replaced root_pipeline_name From 56403a2260f4f034d452006b957e6939ac9a565d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 9 Oct 2022 15:05:37 +0100 Subject: [PATCH 63/99] WIP: removing notebook after creating script --- sameproject/ops/ocean/render.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sameproject/ops/ocean/render.py b/sameproject/ops/ocean/render.py index 6ddfd4f3..c9db0ce1 100644 --- a/sameproject/ops/ocean/render.py +++ b/sameproject/ops/ocean/render.py @@ -29,6 +29,7 @@ def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str # for storing in the docker image docker_path = same_config['notebook']['path'][:-5] + 'py' helpers.write_file(docker_path, root_file_string) + os.remove(same_config['notebook']['path']) return (compile_path, root_file_string) # note: root_file_string replaced root_pipeline_name def _build_step_file(env: Environment, step: Step, same_config) -> str: From c1fdab2ae700c090e38508fa0692f27e4c279b6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 9 Oct 2022 15:33:27 +0100 Subject: [PATCH 64/99] WIP: started dockerfile for operator engine --- Dockerfile | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..9a3a434b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM python + +COPY . . + +RUN pip install . + +RUN cd /data/transformation + +RUN same init + +CMD ["same", "run", "--persist-temp-files", "--no-deploy", "-t", "ocean"] \ No newline at end of file From 164af5a8d57b04194dd60b9a4d0b770c953589e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 9 Oct 2022 16:45:32 +0100 Subject: [PATCH 65/99] WIP: disabled interactivity in dockerfile --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 9a3a434b..80d85824 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,8 @@ RUN pip install . RUN cd /data/transformation +ARG DEBIAN_FRONTEND=noninteractive + RUN same init CMD ["same", "run", "--persist-temp-files", "--no-deploy", "-t", "ocean"] \ No newline at end of file From 9fc070dc2f0275c765b3a2bbd7c86fc8a3cbd952 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Mon, 10 Oct 2022 12:57:10 +0100 Subject: [PATCH 66/99] WIP: removed deploy --- sameproject/ops/ocean/deploy.py | 140 +------------------------------- 1 file changed, 1 insertion(+), 139 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index c1968be2..6725f3c4 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -7,142 +7,4 @@ def deploy(base_path: Path, root_file: str, # root function with notebook code (string) config: SameConfig): - return -# ocean, OCEAN_token, provider_url = configure_ocean(config=config) -# wallet = Wallet(ocean.web3, config.runtime_options.get("wallet_private_key"), transaction_timeout=20, block_confirmations=0) -# print(f"wallet.address = '{wallet.address}'") -# assert wallet.web3.eth.get_balance(wallet.address) > 0, "need ETH" - -# DATA_did = config.runtime_options.get("dt_did") -# ALG_did = config.runtime_options.get("algo_did") -# DATA_DDO = ocean.assets.resolve(DATA_did) # make sure we operate on the updated and indexed metadata_cache_uri versions -# ALG_DDO = ocean.assets.resolve(ALG_did) -# compute_service = DATA_DDO.get_service('compute') -# algo_service = ALG_DDO.get_service('access') -# print(f'Algorithm DDO is {ALG_DDO}') - -# if config.runtime_options.get("algo_verified") == False: -# try: -# trusted_algorithms.add_publisher_trusted_algorithm(DATA_DDO, ALG_DDO.did, 'https://aquarius.oceanprotocol.com') -# ocean.assets.update(DATA_DDO, publisher_wallet=wallet) -# verified = True -# except: -# verified = False -# pass - -# # Datatoken buying -# data_token_address = f'0x{DATA_did[7:]}' -# data_token = ocean.get_data_token(data_token_address) -# if data_token.balanceOf(wallet.address) < to_wei(1): -# print('Not enough datatokens in wallet, buying...') -# buy_dt(config, wallet, ocean, OCEAN_token, DATA_did) - -# algo_token_address = f'0x{ALG_did[7:]}' -# algo_token = ocean.get_data_token(algo_token_address) -# print(f"You have {pretty_ether_and_wei(algo_token.balanceOf(wallet.address), algo_token.symbol())} algorithm tokens.") -# if config.runtime_options.get("algo_verified") == True or verified == True: -# result = run_c2d(ocean, wallet, DATA_did, ALG_did, ALG_datatoken, compute_service, algo_service) -# return result - -# def configure_ocean(config): -# conf = { -# 'network' : 'https://rinkeby.infura.io/v3/d163c48816434b0bbb3ac3925d6c6c80' if config.runtime_options.get("network") is None else config.runtime_options.get("network"), -# 'BLOCK_CONFIRMATIONS': 0, -# 'metadataCacheUri' : 'https://aquarius.oceanprotocol.com', -# 'providerUri' : 'https://provider.rinkeby.oceanprotocol.com', -# 'PROVIDER_ADDRESS': '0x00bd138abd70e2f00903268f3db08f2d25677c9e' if config.runtime_options.get("provider_address") is None else config.runtime_options.get("provider_address"), -# 'downloads.path': 'consume-downloads', -# } - -# ocean = Ocean(conf) -# OCEAN_token = BToken(ocean.web3, ocean.OCEAN_address) -# provider_url = DataServiceProvider.get_url(ocean.config) - -# return ocean, OCEAN_token, provider_url - -# def buy_dt(config, wallet, ocean, OCEAN_token, did): -# """ -# Datatoken buying - -# Requirements: -# - wallet from previous step -# - datatoken DID and pool address -# """ -# pool_address = config.runtime_options.get("dt_pool") -# assert wallet is not None, "Wallet error, run pipeline again" -# # Get asset, datatoken_address -# asset = ocean.assets.resolve(did) -# data_token_address = f'0x{did[7:]}' - -# print('Executing Transaction') -# #my wallet -# print(f"Environment Wallet Address = '{wallet.address}'") -# print(f"Wallet OCEAN = {pretty_ether_and_wei(OCEAN_token.balanceOf(wallet.address))}") -# print(f"Wallet ETH = {pretty_ether_and_wei(ocean.web3.eth.get_balance(wallet.address))}") -# #Verify that wallet has ETH -# assert ocean.web3.eth.get_balance(wallet.address) > 0, "need test ETH" -# #Verify that wallet has OCEAN -# assert OCEAN_token.balanceOf(wallet.address) > 0, "need test OCEAN" -# data_token = ocean.get_data_token(data_token_address) -# print('Buying Data Token') -# ocean.pool.buy_data_tokens( -# pool_address, -# amount=to_wei(1), # buy 1.0 datatoken -# max_OCEAN_amount=to_wei(config.runtime_options.get("max_dt_price")), # pay up to 10.0 OCEAN -# from_wallet=wallet -# ) -# print(f"You have {pretty_ether_and_wei(data_token.balanceOf(wallet.address), data_token.symbol())}.") - -# def run_c2d(ocean, wallet, DATA_did, ALG_did, ALG_datatoken, compute_service, algo_service): -# """ -# Running C2D -# """ -# # order & pay for dataset -# dataset_order_requirements = ocean.assets.order( -# DATA_did, wallet.address, service_type=compute_service.type -# ) - -# DATA_order_tx_id = ocean.assets.pay_for_service( -# ocean.web3, -# dataset_order_requirements.amount, -# dataset_order_requirements.data_token_address, -# DATA_did, -# compute_service.index, -# ZERO_ADDRESS, -# wallet, -# dataset_order_requirements.computeAddress, -# ) - -# # order & pay for algo -# algo_order_requirements = ocean.assets.order( -# ALG_did, wallet.address, service_type=algo_service.type -# ) -# ALG_order_tx_id = ocean.assets.pay_for_service( -# ocean.web3, -# algo_order_requirements.amount, -# algo_order_requirements.data_token_address, -# ALG_did, -# algo_service.index, -# ZERO_ADDRESS, -# wallet, -# algo_order_requirements.computeAddress, -# ) - -# compute_inputs = [ComputeInput(DATA_did, DATA_order_tx_id, compute_service.index)] -# job_id = ocean.compute.start( -# compute_inputs, -# wallet, -# algorithm_did=ALG_did, -# algorithm_tx_id=ALG_order_tx_id, -# algorithm_data_token=ALG_datatoken.address -# ) -# print(f"Started compute job with id: {job_id}") - -# # for monitoring C2D status -# while ocean.compute.status(DATA_did, job_id, wallet)['statusText'] != 'Job finished': -# print(ocean.compute.status(DATA_did, job_id, wallet)['statusText']) -# pass - -# # retrieving result -# result = ocean.compute.result_file(DATA_did, job_id, 0, wallet) # 0 index, means we retrieve the results from the first dataset index -# return result \ No newline at end of file + return \ No newline at end of file From 375aacc67fa480084a81b4fd36465d882aa97ce4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 12 Oct 2022 09:51:25 +0100 Subject: [PATCH 67/99] WIP: refactoring ocean to aws --- sameproject/ops/ocean/deploy.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 6725f3c4..39cab877 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -2,9 +2,15 @@ from sameproject.ops import helpers from pathlib import Path import importlib -import os +import kfp -def deploy(base_path: Path, - root_file: str, # root function with notebook code (string) - config: SameConfig): - return \ No newline at end of file + +def deploy(base_path: Path, root_file: str, config: SameConfig): + with helpers.add_path(str(base_path)): + root_module = importlib.import_module(root_file) # python module + + kfp_client = kfp.Client() # only supporting 'kubeflow' namespace + return kfp_client.create_run_from_pipeline_func( + root_module.root, + arguments={}, + ) From 102f6ebc9b52127b00988390d4976c9dc2caeecc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 12 Oct 2022 14:38:17 +0100 Subject: [PATCH 68/99] WIP: refactoring to boto3 --- sameproject/ops/ocean/deploy.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 39cab877..29b26f7d 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -2,15 +2,15 @@ from sameproject.ops import helpers from pathlib import Path import importlib -import kfp +import boto3 def deploy(base_path: Path, root_file: str, config: SameConfig): with helpers.add_path(str(base_path)): root_module = importlib.import_module(root_file) # python module - kfp_client = kfp.Client() # only supporting 'kubeflow' namespace - return kfp_client.create_run_from_pipeline_func( + client = boto3.client('ec2') + return client.create_run_from_pipeline_func( root_module.root, arguments={}, ) From b6b91bfece09b0ec1ce1a5602e3786e19af06f1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 12 Oct 2022 15:14:45 +0100 Subject: [PATCH 69/99] WIP: added create_job from operator engine --- sameproject/ops/ocean/deploy.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 29b26f7d..6d5a34a6 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -2,8 +2,17 @@ from sameproject.ops import helpers from pathlib import Path import importlib +import kubernetes import boto3 +def create_job(logger, body, job): + try: + logger.debug(f"Creating job {job}") + batch_client = kubernetes.client.BatchV1Api() + obj = batch_client.create_namespaced_job(body["metadata"]["namespace"], job) + logger.info(f"{obj.kind} {obj.metadata.name} created") + except ApiException as e: + logger.debug(f"Exception when calling BatchV1Api->create_namespaced_job: {e}\n") def deploy(base_path: Path, root_file: str, config: SameConfig): with helpers.add_path(str(base_path)): From 3fa3f8167ed140a506bbccedfbfbf02b3647d5f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Mon, 31 Oct 2022 13:25:16 +0000 Subject: [PATCH 70/99] WIP: removed unused ocean deploy --- sameproject/ops/ocean/deploy.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sameproject/ops/ocean/deploy.py b/sameproject/ops/ocean/deploy.py index 6d5a34a6..6683a1e7 100644 --- a/sameproject/ops/ocean/deploy.py +++ b/sameproject/ops/ocean/deploy.py @@ -3,7 +3,6 @@ from pathlib import Path import importlib import kubernetes -import boto3 def create_job(logger, body, job): try: @@ -15,11 +14,12 @@ def create_job(logger, body, job): logger.debug(f"Exception when calling BatchV1Api->create_namespaced_job: {e}\n") def deploy(base_path: Path, root_file: str, config: SameConfig): - with helpers.add_path(str(base_path)): - root_module = importlib.import_module(root_file) # python module + return + # with helpers.add_path(str(base_path)): + # root_module = importlib.import_module(root_file) # python module - client = boto3.client('ec2') - return client.create_run_from_pipeline_func( - root_module.root, - arguments={}, - ) + # client = boto3.client('ec2') + # return client.create_run_from_pipeline_func( + # root_module.root, + # arguments={}, + # ) From 1720ebcae3d3ad386b45cc2ca57f2a0fdfc30861 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Mon, 31 Oct 2022 21:25:29 +0000 Subject: [PATCH 71/99] WIP: added ocean_c2d for same-ocean integration --- ocean_c2d/render_ocean.py | 207 +++++++++++++++++++++++++++++++++++++ ocean_c2d/requirements.txt | 2 + ocean_c2d/same.yaml | 14 +++ 3 files changed, 223 insertions(+) create mode 100644 ocean_c2d/render_ocean.py create mode 100644 ocean_c2d/requirements.txt create mode 100644 ocean_c2d/same.yaml diff --git a/ocean_c2d/render_ocean.py b/ocean_c2d/render_ocean.py new file mode 100644 index 00000000..ca22cd87 --- /dev/null +++ b/ocean_c2d/render_ocean.py @@ -0,0 +1,207 @@ +from jinja2 import Environment, FileSystemLoader, select_autoescape +from pathlib import Path +from typing import Tuple +from uuid import uuid4 +from base64 import urlsafe_b64encode +import logging +import os +import time + +from sameproject.data.step import Step +from sameproject.ops import helpers +import sameproject.ops.explode + + +from sameproject.ops.code import get_magic_lines, remove_magic_lines, get_installable_packages +from sameproject.data.config import SameConfig +from sameproject.data import Step +from typing import Tuple, List +from io import BufferedReader +from pathlib import Path +import jupytext +import logging +import click + + +def compile(config: SameConfig, target: str) -> Tuple[Path, str]: + notebook = read_notebook(config.notebook.path) + all_steps = get_steps(notebook, config) + + return render( + target=target, + steps=all_steps, + config=config + ) + + +def read_notebook(notebook_path) -> dict: + logging.info(f"Using notebook from here: {notebook_path}") + try: + notebook_file_handle = Path(notebook_path) + ntbk_dict = jupytext.read(str(notebook_file_handle)) + except FileNotFoundError: + logging.fatal(f"No notebook found at {notebook_path}") + exit(1) + + return ntbk_dict + + +def get_steps(notebook: dict, config: SameConfig) -> dict: + """Parses the code in a notebook into a series of SAME execution steps.""" + + steps = {} + all_code = "" + code_buffer = [] + this_step_index = 0 + this_step_name = "same_step_000" + this_step_code = "" + this_step_cache_value = "P0D" + this_step_environment_name = "default" + this_step_tags = [] + + def save_step(): + steps[this_step_name] = Step( + name=this_step_name, + code=remove_magic_lines(this_step_code), + index=this_step_index, + cache_value=this_step_cache_value, + environment_name=this_step_environment_name, + tags=this_step_tags, + parameters=[], + packages_to_install=[], + frozen_box=False, # TODO: make immutable + ) + + # Inject pip requirements file if configured: + if "requirements" in config.notebook: + with open(config.notebook.requirements, "r") as file: + steps[this_step_name].requirements_file = file.read() + + for num, cell in enumerate(notebook["cells"]): + if "metadata" not in cell: # sanity check + continue + + if len(cell["metadata"]) > 0 and "tags" in cell["metadata"] and len(cell["metadata"]["tags"]) > 0: + for tag in cell["metadata"]["tags"]: + if tag.startswith("same_step_"): + if num > 0: # don't create empty step + this_step_code = "\n".join(code_buffer) + all_code += "\n" + this_step_code + save_step() + + code_buffer = [] + step_tag_num = int(tag.split("same_step_")[1]) + this_step_index = step_tag_num + this_step_name = f"same_step_{step_tag_num:03}" + this_step_code = "" + this_step_cache_value = "P0D" + this_step_environment_name = "default" + this_step_tags = [] + + elif str.startswith(tag, "cache="): + this_step_cache_value = str.split(tag, "=")[1] + elif str.startswith(tag, "environment="): + this_step_environment_name = str.split(tag, "=")[1] + else: + this_step_tags.append(tag) + + if cell["cell_type"] == "code": # might be a markdown cell + code_buffer.append("\n".join(jupytext.cell_to_text.LightScriptCellExporter(cell, "py").source)) + + this_step_code = "\n".join(code_buffer) + all_code += "\n" + this_step_code + save_step() + + magic_lines = get_magic_lines(all_code) + if len(magic_lines) > 0: + magic_lines_string = "\n".join(magic_lines) + logging.warning(f"""Notebook contains magic lines, which will be ignored:\n{magic_lines_string}""") + + # Remove magic lines from code so that we can continue: + all_code = remove_magic_lines(all_code) + + for k in steps: + steps[k].packages_to_install = get_installable_packages(all_code) + + return steps + + +def get_sorted_list_of_steps(notebook: dict, config: SameConfig) -> list: + """ + Given a notebook (as a dict), get a list of Step objects, sorted by their + index in the notebook. + """ + steps_dict = get_steps(notebook, config) + steps = list(steps_dict.values()) + steps_sorted_by_index = sorted(steps, key=lambda x: x.index) + return steps_sorted_by_index + + +def get_code(notebook: dict) -> str: + """Combines and returns all python code in the given notebook.""" + if "cells" not in notebook: + return "" + + code = [] + for cell in notebook["cells"]: + if cell["cell_type"] != "code": + continue + + code.append("\n".join( + jupytext.cell_to_text.LightScriptCellExporter(cell, "py").source + )) + + return "\n".join(code) + + +ocean_step_template = "step.jinja" + + +def render(compile_path: str, steps: list, same_config: dict) -> Tuple[Path, str]: + """Renders the notebook into a root file and a series of step files according to the target requirements. Returns an absolute path to the root file for deployment.""" + + templateDir = os.path.dirname(os.path.abspath(__file__)) + templateLoader = FileSystemLoader(templateDir) + env = Environment(trim_blocks=True, loader=templateLoader) + + root_file_string = _build_step_file(env, next(iter(steps.values())), same_config) + root_pipeline_name = f"root_pipeline_{uuid4().hex.lower()}" + root_path = Path(compile_path) / f"{root_pipeline_name}.py" + helpers.write_file(root_path, root_file_string) + + # for storing in the docker image + docker_path = same_config['notebook']['path'][:-5] + 'py' + helpers.write_file(docker_path, root_file_string) + os.remove(same_config['notebook']['path']) + return (compile_path, root_file_string) # note: root_file_string replaced root_pipeline_name + +def _build_step_file(env: Environment, step: Step, same_config) -> str: + with open(sameproject.ops.explode.__file__, "r") as f: + explode_code = f.read() + + requirements_file = None + if "requirements_file" in step: + requirements_file = urlsafe_b64encode(bytes(step.requirements_file, "utf-8")).decode() + + memory_limit = same_config.runtime_options.get( + "serialisation_memory_limit", + 512 * 1024 * 1024, # 512MB + ) + + same_env = same_config.runtime_options.get( + "same_env", + "default", + ) + + step_contract = { + "name": step.name, + "same_env": same_env, + "memory_limit": memory_limit, + "unique_name": step.unique_name, + "requirements_file": requirements_file, + "user_code": step.code, + "explode_code": urlsafe_b64encode(bytes(explode_code, "utf-8")).decode(), + "same_yaml": urlsafe_b64encode(bytes(same_config.to_yaml(), "utf-8")).decode(), + } + + return env.get_template(ocean_step_template).render(step_contract) \ No newline at end of file diff --git a/ocean_c2d/requirements.txt b/ocean_c2d/requirements.txt new file mode 100644 index 00000000..f1b2e920 --- /dev/null +++ b/ocean_c2d/requirements.txt @@ -0,0 +1,2 @@ +# Dependencies for /Users/jakub/Development/Algovera/Core/same-project/demo/test.ipynb: + diff --git a/ocean_c2d/same.yaml b/ocean_c2d/same.yaml new file mode 100644 index 00000000..64b82e05 --- /dev/null +++ b/ocean_c2d/same.yaml @@ -0,0 +1,14 @@ +apiVersion: sameproject.ml/v1alpha1 +environments: + default: + image_tag: combinatorml/jupyterlab-tensorflow-opencv:0.9 +metadata: + labels: [] + name: default_config + version: 0.0.0 +notebook: + name: test + path: /data/transformation/notebook.ipynb + requirements: /same-project/ocean_c2d/requirements.txt +run: + name: default_config run From 517ea1fb5fe9a7685a3dfc0558ced2630bd1ffd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 6 Nov 2022 10:49:31 +0000 Subject: [PATCH 72/99] WIP: added python & bash scripts for ocean c2d --- ocean.sh | 11 +++++++++++ ocean_c2d/render_ocean.py | 5 ++++- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 ocean.sh diff --git a/ocean.sh b/ocean.sh new file mode 100644 index 00000000..6ae22e2d --- /dev/null +++ b/ocean.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +cd /data/transformations/ + +cli_execute_transform_command algorithm converted_script.py + +python3.8 converted_script.py + +same init + +same run -t ocean --no-deploy --persist-temp-files \ No newline at end of file diff --git a/ocean_c2d/render_ocean.py b/ocean_c2d/render_ocean.py index ca22cd87..370d8f7a 100644 --- a/ocean_c2d/render_ocean.py +++ b/ocean_c2d/render_ocean.py @@ -204,4 +204,7 @@ def _build_step_file(env: Environment, step: Step, same_config) -> str: "same_yaml": urlsafe_b64encode(bytes(same_config.to_yaml(), "utf-8")).decode(), } - return env.get_template(ocean_step_template).render(step_contract) \ No newline at end of file + return env.get_template(ocean_step_template).render(step_contract) + +if __name__ == "__main__": + compile("same.yaml", "notebook.ipynb") \ No newline at end of file From e72cf483ae8be4e9589476ab1cd778e08f507854 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 6 Nov 2022 11:35:43 +0000 Subject: [PATCH 73/99] WIP: changed ocean.sh --- ocean.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocean.sh b/ocean.sh index 6ae22e2d..3cc27e0f 100644 --- a/ocean.sh +++ b/ocean.sh @@ -1,6 +1,6 @@ #!/bin/bash -cd /data/transformations/ +cd ../data/transformations/ cli_execute_transform_command algorithm converted_script.py From 7e77a40a0026d71d4a65366acd1fa1d4b880f990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 6 Nov 2022 11:37:10 +0000 Subject: [PATCH 74/99] WIP: changed render --- ocean_c2d/render_ocean.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocean_c2d/render_ocean.py b/ocean_c2d/render_ocean.py index 370d8f7a..77adca6c 100644 --- a/ocean_c2d/render_ocean.py +++ b/ocean_c2d/render_ocean.py @@ -207,4 +207,4 @@ def _build_step_file(env: Environment, step: Step, same_config) -> str: return env.get_template(ocean_step_template).render(step_contract) if __name__ == "__main__": - compile("same.yaml", "notebook.ipynb") \ No newline at end of file + compile("same.yaml", os.environ("AlGO")) \ No newline at end of file From 40799268c81881cc8354cdd847d8435b72a3f44b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Mon, 7 Nov 2022 13:45:08 +0100 Subject: [PATCH 75/99] Update ocean.sh --- ocean.sh | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ocean.sh b/ocean.sh index 3cc27e0f..f9dba8e3 100644 --- a/ocean.sh +++ b/ocean.sh @@ -2,10 +2,6 @@ cd ../data/transformations/ -cli_execute_transform_command algorithm converted_script.py - -python3.8 converted_script.py - same init -same run -t ocean --no-deploy --persist-temp-files \ No newline at end of file +same run -t ocean --no-deploy --persist-temp-files From a7eb252a78e7b76d6536913e74b671a7d0652f14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Mon, 7 Nov 2022 12:52:12 +0000 Subject: [PATCH 76/99] WIP: changed dockerfile --- Dockerfile | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 80d85824..1d503b1f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,32 @@ -FROM python +FROM python:3.8 -COPY . . +# Basic toolchain +RUN apt-get update && apt-get install -y \ + apt-utils \ + build-essential \ + git \ + wget \ + unzip \ + yasm \ + pkg-config \ + libcurl4-openssl-dev \ + zlib1g-dev \ + htop \ + cmake \ + vim \ + nano \ + python3-pip \ + python3-dev \ + python3-tk \ + libx264-dev \ + gcc \ + # python-pytest \ + && cd /usr/local/bin \ + && pip3 install --upgrade pip \ + && apt-get autoremove -y -RUN pip install . +RUN git clone -b develop https://github.com/AlgoveraAI/same-project.git -RUN cd /data/transformation +WORKDIR /same-project -ARG DEBIAN_FRONTEND=noninteractive - -RUN same init - -CMD ["same", "run", "--persist-temp-files", "--no-deploy", "-t", "ocean"] \ No newline at end of file +RUN pip3 install . \ No newline at end of file From 590e19785653fedd36affbdab2e2191b7af64f24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Tue, 8 Nov 2022 08:46:31 +0000 Subject: [PATCH 77/99] WIP: updated dockerfile & bash script with nbconvert --- Dockerfile | 8 +++++++- ocean.sh | 10 ++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1d503b1f..c88aac17 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,4 +29,10 @@ RUN git clone -b develop https://github.com/AlgoveraAI/same-project.git WORKDIR /same-project -RUN pip3 install . \ No newline at end of file +ARG DEBIAN_FRONTEND=noninteractive + +RUN pip3 install . + +RUN python3.8 -m pip install jupyter +RUN python3.8 -m pip install nbconvert +RUN chmod +x ./ocean.sh diff --git a/ocean.sh b/ocean.sh index 3cc27e0f..8207b60b 100644 --- a/ocean.sh +++ b/ocean.sh @@ -1,11 +1,9 @@ #!/bin/bash -cd ../data/transformations/ +cd /data/transformations/ -cli_execute_transform_command algorithm converted_script.py +mv 0 hello.ipynb -python3.8 converted_script.py +jupyter nbconvert hello.ipynb --to python -same init - -same run -t ocean --no-deploy --persist-temp-files \ No newline at end of file +python3.8 hello.py From 33b85b8e9c334c0e0a258dcd4d8fb77ac0e6745f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Tue, 8 Nov 2022 08:51:24 +0000 Subject: [PATCH 78/99] WIP: updated dockerfile & bash script with nbconvert --- ocean.sh | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ocean.sh b/ocean.sh index 0b2803f5..8207b60b 100644 --- a/ocean.sh +++ b/ocean.sh @@ -2,14 +2,8 @@ cd /data/transformations/ -<<<<<<< HEAD mv 0 hello.ipynb jupyter nbconvert hello.ipynb --to python python3.8 hello.py -======= -same init - -same run -t ocean --no-deploy --persist-temp-files ->>>>>>> 40799268c81881cc8354cdd847d8435b72a3f44b From 31c60dabff54de2ebba5a500395e7684f586a91a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Fri, 11 Nov 2022 10:31:00 +0100 Subject: [PATCH 79/99] WIP: removed user input --- sameproject/cli/init.py | 96 ++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 60 deletions(-) diff --git a/sameproject/cli/init.py b/sameproject/cli/init.py index 3d1a5b71..ace29f69 100644 --- a/sameproject/cli/init.py +++ b/sameproject/cli/init.py @@ -47,75 +47,54 @@ def init(): # Start by looking for an existing same config in the current directory. cfg = find_same_config(recurse=False) if cfg is not None: - click.echo("An existing SAME config file was found at the following path:") - click.echo(f"\t{cfg}") - if not click.confirm("Do you want to replace it?", default=False): - exit(0) + print("An existing SAME config file was found at the following path:") + print(f"\t{cfg}") + exit(0) else: cfg = Path("./same.yaml") # Name of the pipeline: - pl_name = click.prompt( - "Name of this config:", - default="default_config", - type=name_type - ) + pl_name = "default_config" # Notebook data: - nb_path = click.prompt( - "Notebook path", - default=find_notebook(recurse=True), - type=file_type, - ) + nb_path = find_notebook(recurse=True) if not nb_path.exists(): - click.echo(f"No such file found: {nb_path}", err=True) + print(f"No such file found: {nb_path}") exit(1) nb_dict = read_notebook(nb_path) nb_name = str(nb_path.name).replace(".ipynb", "") - nb_name = click.prompt("Notebook name", default=nb_name, type=str) # Docker image data: - image = click.prompt( - "Default docker image", - default="combinatorml/jupyterlab-tensorflow-opencv:0.9", - type=image_type - ) + image = "combinatorml/jupyterlab-tensorflow-opencv:0.9" # Requirements.txt data: req = find_requirements(recurse=False) if req is None: - if click.confirm("No requirements.txt found in current directory - would you like to create one?", default=True): - req_contents = f"# Dependencies for {nb_path.resolve()}:\n" - - writing_reqs = False - if click.confirm("Would you like SAME to fill in the requirements.txt for you?", default=True): - code = remove_magic_lines(get_code(nb_dict)) - modules = get_imported_modules(code) - pkg_info = get_package_info(modules) - - if len(pkg_info) > 0: - writing_reqs = True - click.echo("Found the following requirements for the notebook:") - for pkg in pkg_info: - click.echo(f"\t{pkg_info[pkg].name}=={pkg_info[pkg].version}") - else: - click.echo("No requirements found for the notebook.") - req_contents += render_package_info(pkg_info) + "\n" - - req = Path("requirements.txt") - with req.open("w") as file: - file.write(req_contents) - - if writing_reqs: - click.echo(f"Wrote requirements to {req.resolve()}.") - else: - click.echo(f"Wrote empty requirements file to {req.resolve()}.") + req_contents = f"# Dependencies for {nb_path.resolve()}:\n" + + writing_reqs = False + code = remove_magic_lines(get_code(nb_dict)) + modules = get_imported_modules(code) + pkg_info = get_package_info(modules) + + if len(pkg_info) > 0: + writing_reqs = True + print("Found the following requirements for the notebook:") + for pkg in pkg_info: + print(f"{pkg_info[pkg].name}=={pkg_info[pkg].version}") + else: + print("No requirements found for the notebook.") + req_contents += render_package_info(pkg_info) + "\n" + + req = Path("requirements.txt") + with req.open("w") as file: + file.write(req_contents) + + if writing_reqs: + print(f"Wrote requirements to {req.resolve()}.") + else: + print(f"Wrote empty requirements file to {req.resolve()}.") else: - req = click.prompt( - "Requirements.txt", - default=req, - type=file_type, - ) if req == "": req = None @@ -145,14 +124,11 @@ def init(): if req is not None: same_config.notebook.requirements = str(req) - click.echo(f"About to write to {cfg.absolute()}:") - click.echo() - click.echo(same_config.to_yaml()) - if click.confirm("Is this okay?", default=True): - cfg.write_text(same_config.to_yaml()) - click.echo(f"Wrote config file to {cfg.absolute()}.") - click.echo() - click.echo("""You can now run 'same verify' to check that everything is configured correctly + print(f"About to write to {cfg.absolute()}:") + print(same_config.to_yaml()) + cfg.write_text(same_config.to_yaml()) + print(f"Wrote config file to {cfg.absolute()}.") + print("""You can now run 'same verify' to check that everything is configured correctly (requires docker locally), or you can run 'same run' to deploy the pipeline to a configured backend (e.g. Kubeflow Pipelines in a Kubernetes cluster file pointed to by ~/.kube/config or set in the KUBECONFIG environment variable). From 64cc28495e6ce9d03dc77b35aba583c1b0b49e83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Fri, 11 Nov 2022 10:34:53 +0100 Subject: [PATCH 80/99] removed click.prompt --- sameproject/cli/init.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sameproject/cli/init.py b/sameproject/cli/init.py index ace29f69..03302508 100644 --- a/sameproject/cli/init.py +++ b/sameproject/cli/init.py @@ -40,7 +40,6 @@ def __call__(self, value, param=None, ctx=None): ) -@click.command() def init(): """Creates a new SAME config file.""" From d153dd4d31a7fb8e5be04cf51b7d273795ef3bf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Fri, 11 Nov 2022 10:35:56 +0100 Subject: [PATCH 81/99] added click.prompt --- sameproject/cli/init.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sameproject/cli/init.py b/sameproject/cli/init.py index 03302508..ace29f69 100644 --- a/sameproject/cli/init.py +++ b/sameproject/cli/init.py @@ -40,6 +40,7 @@ def __call__(self, value, param=None, ctx=None): ) +@click.command() def init(): """Creates a new SAME config file.""" From 9ed0299cda1567d71118dd6d73684cf74e1b2912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Fri, 11 Nov 2022 10:37:45 +0100 Subject: [PATCH 82/99] WIP: using same in bash script --- ocean.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ocean.sh b/ocean.sh index 8207b60b..a950f145 100644 --- a/ocean.sh +++ b/ocean.sh @@ -4,6 +4,8 @@ cd /data/transformations/ mv 0 hello.ipynb -jupyter nbconvert hello.ipynb --to python +same init + +same run --no-deploy --persist-temp-files -t ocean python3.8 hello.py From 02fab2558e49e7db8f5b7120c75db993e9f3b6f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Tue, 15 Nov 2022 10:59:16 +0000 Subject: [PATCH 83/99] WIP: with run --- ocean.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocean.sh b/ocean.sh index a950f145..7dba034b 100644 --- a/ocean.sh +++ b/ocean.sh @@ -6,6 +6,6 @@ mv 0 hello.ipynb same init -same run --no-deploy --persist-temp-files -t ocean +same run --persist-temp-files -t ocean python3.8 hello.py From 7d126c97b7245db988907c67bc75dcaafcbb23e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Tue, 15 Nov 2022 15:46:03 +0000 Subject: [PATCH 84/99] WIP: back to no deploy --- ocean.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocean.sh b/ocean.sh index 7dba034b..a950f145 100644 --- a/ocean.sh +++ b/ocean.sh @@ -6,6 +6,6 @@ mv 0 hello.ipynb same init -same run --persist-temp-files -t ocean +same run --no-deploy --persist-temp-files -t ocean python3.8 hello.py From 783e9a9f0a9fdedd5fec656bd776f2503554065c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 16 Nov 2022 09:08:57 +0000 Subject: [PATCH 85/99] WIP: updated ocean.sh with correct same run and nbconvert --- ocean.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ocean.sh b/ocean.sh index a950f145..4570f8fe 100644 --- a/ocean.sh +++ b/ocean.sh @@ -6,6 +6,8 @@ mv 0 hello.ipynb same init -same run --no-deploy --persist-temp-files -t ocean +same run + +jupyter nbconvert hello.ipynb --to python python3.8 hello.py From 9dbe0bb0ffa5a2d552ab4ab29601b54f1d3b514d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Wed, 16 Nov 2022 19:32:03 +0000 Subject: [PATCH 86/99] WIP: correct algorithm name in ocean.sh --- ocean.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocean.sh b/ocean.sh index 4570f8fe..3621d265 100644 --- a/ocean.sh +++ b/ocean.sh @@ -2,7 +2,7 @@ cd /data/transformations/ -mv 0 hello.ipynb +mv algorithm hello.ipynb same init From 52b1aaea46eddcf40d26163665e3448006ea72ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 17 Nov 2022 09:31:24 +0000 Subject: [PATCH 87/99] WIP: ocean.sh for 0.2 --- ocean.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ocean.sh b/ocean.sh index 3621d265..be430941 100644 --- a/ocean.sh +++ b/ocean.sh @@ -4,10 +4,6 @@ cd /data/transformations/ mv algorithm hello.ipynb -same init - -same run - jupyter nbconvert hello.ipynb --to python python3.8 hello.py From 89a6eb76469c936b25184c43d1da0da83f2685b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 17 Nov 2022 10:17:25 +0000 Subject: [PATCH 88/99] WIP: ocean.sh for 0.3 --- ocean.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ocean.sh b/ocean.sh index be430941..3621d265 100644 --- a/ocean.sh +++ b/ocean.sh @@ -4,6 +4,10 @@ cd /data/transformations/ mv algorithm hello.ipynb +same init + +same run + jupyter nbconvert hello.ipynb --to python python3.8 hello.py From dee1ae760b361dfef48789e492f1903fb285e9e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 17 Nov 2022 11:05:18 +0000 Subject: [PATCH 89/99] WIP: ocean.sh for 0.4 --- ocean.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ocean.sh b/ocean.sh index 3621d265..a35b6170 100644 --- a/ocean.sh +++ b/ocean.sh @@ -6,6 +6,10 @@ mv algorithm hello.ipynb same init +export KF_PIPELINES_ENDPOINT_ENV='ml_pipeline.kubeflow.svc.cluster.local' + +echo KF_PIPELINES_ENDPOINT_ENV + same run jupyter nbconvert hello.ipynb --to python From 7fcaabc29dcfd4f3e2a9c609655673f4d203299f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 17 Nov 2022 14:08:12 +0000 Subject: [PATCH 90/99] WIP: removed line 51 from root.jinja, added port 8888 --- Dockerfile | 2 ++ ocean.sh | 2 +- sameproject/ops/kubeflow/root.jinja | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index c88aac17..b6812340 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,4 +35,6 @@ RUN pip3 install . RUN python3.8 -m pip install jupyter RUN python3.8 -m pip install nbconvert +ENV KF_PIPELINES_ENDPOINT_ENV='ml_pipeline.kubeflow.svc.cluster.local:8888' + RUN chmod +x ./ocean.sh diff --git a/ocean.sh b/ocean.sh index a35b6170..6d570b7a 100644 --- a/ocean.sh +++ b/ocean.sh @@ -6,7 +6,7 @@ mv algorithm hello.ipynb same init -export KF_PIPELINES_ENDPOINT_ENV='ml_pipeline.kubeflow.svc.cluster.local' +export KF_PIPELINES_ENDPOINT_ENV='ml_pipeline.kubeflow.svc.cluster.local:8888' echo KF_PIPELINES_ENDPOINT_ENV diff --git a/sameproject/ops/kubeflow/root.jinja b/sameproject/ops/kubeflow/root.jinja index 127d1c9b..cc21228f 100644 --- a/sameproject/ops/kubeflow/root.jinja +++ b/sameproject/ops/kubeflow/root.jinja @@ -48,7 +48,7 @@ def root( secrets_by_env = {} {% for env_name in secrets_to_create_as_dict %} {% set secret = secrets_to_create_as_dict[env_name] %} - config.load_kube_config() + v1 = client.CoreV1Api() namespace = "kubeflow" name = "{{ experiment_name_safe }}" From 50fc69a239e4c25487cd50de31c103fb627cc4c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Thu, 17 Nov 2022 15:19:59 +0000 Subject: [PATCH 91/99] WIP: added empty config.yaml --- config.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 config.yaml diff --git a/config.yaml b/config.yaml new file mode 100644 index 00000000..e69de29b From 4042ab974dda25077630f49ea98a0337b6afd236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Tue, 22 Nov 2022 08:10:38 +0000 Subject: [PATCH 92/99] WIP: hardcoding host url --- sameproject/ops/kubeflow/deploy.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sameproject/ops/kubeflow/deploy.py b/sameproject/ops/kubeflow/deploy.py index 39cab877..42277f4c 100644 --- a/sameproject/ops/kubeflow/deploy.py +++ b/sameproject/ops/kubeflow/deploy.py @@ -9,7 +9,9 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): with helpers.add_path(str(base_path)): root_module = importlib.import_module(root_file) # python module - kfp_client = kfp.Client() # only supporting 'kubeflow' namespace + print("getting kfp_client") + kfp_client = kfp.Client(host="ml_pipeline.kubeflow.svc.cluster.local:8888") # only supporting 'kubeflow' namespace + print("got kfp_client") return kfp_client.create_run_from_pipeline_func( root_module.root, arguments={}, From 291b161070e0407288eb212b90ab6c57199a8845 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Tue, 22 Nov 2022 08:49:42 +0000 Subject: [PATCH 93/99] WIP: hardcoding host url --- sameproject/ops/kubeflow/deploy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sameproject/ops/kubeflow/deploy.py b/sameproject/ops/kubeflow/deploy.py index 42277f4c..9a8bcd3a 100644 --- a/sameproject/ops/kubeflow/deploy.py +++ b/sameproject/ops/kubeflow/deploy.py @@ -10,7 +10,7 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): root_module = importlib.import_module(root_file) # python module print("getting kfp_client") - kfp_client = kfp.Client(host="ml_pipeline.kubeflow.svc.cluster.local:8888") # only supporting 'kubeflow' namespace + kfp_client = kfp.Client(host="http://ml_pipeline.kubeflow.svc.cluster.local:8888") # only supporting 'kubeflow' namespace print("got kfp_client") return kfp_client.create_run_from_pipeline_func( root_module.root, From 73f6fab946258202bd2c23b540afda6b4897a8d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Tue, 22 Nov 2022 10:15:47 +0000 Subject: [PATCH 94/99] WIP: hardcoding host url --- sameproject/ops/kubeflow/deploy.py | 2 +- sameproject/ops/kubeflow/run_info.jinja | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sameproject/ops/kubeflow/deploy.py b/sameproject/ops/kubeflow/deploy.py index 9a8bcd3a..54f27c5e 100644 --- a/sameproject/ops/kubeflow/deploy.py +++ b/sameproject/ops/kubeflow/deploy.py @@ -10,7 +10,7 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): root_module = importlib.import_module(root_file) # python module print("getting kfp_client") - kfp_client = kfp.Client(host="http://ml_pipeline.kubeflow.svc.cluster.local:8888") # only supporting 'kubeflow' namespace + kfp_client = kfp.Client(host="http://aff7367d8c2254073b6f563f2eb8efdc-b6898d80ac5be12c.elb.us-east-1.amazonaws.com/") # only supporting 'kubeflow' namespace print("got kfp_client") return kfp_client.create_run_from_pipeline_func( root_module.root, diff --git a/sameproject/ops/kubeflow/run_info.jinja b/sameproject/ops/kubeflow/run_info.jinja index 0a970d5f..ea210093 100644 --- a/sameproject/ops/kubeflow/run_info.jinja +++ b/sameproject/ops/kubeflow/run_info.jinja @@ -15,7 +15,8 @@ def run_info_fn( import dill import kfp - client = kfp.Client(host="http://ml-pipeline:8888") + client = kfp.Client(host="http://aff7367d8c2254073b6f563f2eb8efdc-b6898d80ac5be12c.elb.us-east-1.amazonaws.com/ +") run_info = client.get_run(run_id=run_id) run_info_dict = { From 0b78087eb766894f1c782ebc56ae76656e4e87ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Tue, 22 Nov 2022 10:44:29 +0000 Subject: [PATCH 95/99] WIP: hardcoding host url --- sameproject/ops/kubeflow/run_info.jinja | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sameproject/ops/kubeflow/run_info.jinja b/sameproject/ops/kubeflow/run_info.jinja index ea210093..ad3d70e5 100644 --- a/sameproject/ops/kubeflow/run_info.jinja +++ b/sameproject/ops/kubeflow/run_info.jinja @@ -15,8 +15,7 @@ def run_info_fn( import dill import kfp - client = kfp.Client(host="http://aff7367d8c2254073b6f563f2eb8efdc-b6898d80ac5be12c.elb.us-east-1.amazonaws.com/ -") + client = kfp.Client(host="http://aff7367d8c2254073b6f563f2eb8efdc-b6898d80ac5be12c.elb.us-east-1.amazonaws.com/") run_info = client.get_run(run_id=run_id) run_info_dict = { From 251cfab8af6fefa25a3d094e1f2291f770ca22a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Tue, 22 Nov 2022 10:46:00 +0000 Subject: [PATCH 96/99] WIP: hardcoding host url --- sameproject/ops/kubeflow/deploy.py | 2 +- sameproject/ops/kubeflow/run_info.jinja | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sameproject/ops/kubeflow/deploy.py b/sameproject/ops/kubeflow/deploy.py index 54f27c5e..b70ed4f7 100644 --- a/sameproject/ops/kubeflow/deploy.py +++ b/sameproject/ops/kubeflow/deploy.py @@ -10,7 +10,7 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): root_module = importlib.import_module(root_file) # python module print("getting kfp_client") - kfp_client = kfp.Client(host="http://aff7367d8c2254073b6f563f2eb8efdc-b6898d80ac5be12c.elb.us-east-1.amazonaws.com/") # only supporting 'kubeflow' namespace + kfp_client = kfp.Client(host="http://aff7367d8c2254073b6f563f2eb8efdc-b6898d80ac5be12c.elb.us-east-1.amazonaws.com") # only supporting 'kubeflow' namespace print("got kfp_client") return kfp_client.create_run_from_pipeline_func( root_module.root, diff --git a/sameproject/ops/kubeflow/run_info.jinja b/sameproject/ops/kubeflow/run_info.jinja index ad3d70e5..5cfae7db 100644 --- a/sameproject/ops/kubeflow/run_info.jinja +++ b/sameproject/ops/kubeflow/run_info.jinja @@ -15,7 +15,7 @@ def run_info_fn( import dill import kfp - client = kfp.Client(host="http://aff7367d8c2254073b6f563f2eb8efdc-b6898d80ac5be12c.elb.us-east-1.amazonaws.com/") + client = kfp.Client(host="http://aff7367d8c2254073b6f563f2eb8efdc-b6898d80ac5be12c.elb.us-east-1.amazonaws.com") run_info = client.get_run(run_id=run_id) run_info_dict = { From 0bae01b01b25690605415f19d5a4e39a81f4e53c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 27 Nov 2022 11:25:31 +0000 Subject: [PATCH 97/99] WIP: trying BaseOp --- sameproject/ops/kubeflow/deploy.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sameproject/ops/kubeflow/deploy.py b/sameproject/ops/kubeflow/deploy.py index b70ed4f7..70b48e61 100644 --- a/sameproject/ops/kubeflow/deploy.py +++ b/sameproject/ops/kubeflow/deploy.py @@ -2,9 +2,12 @@ from sameproject.ops import helpers from pathlib import Path import importlib + +import kfp.dsl as dsl import kfp + def deploy(base_path: Path, root_file: str, config: SameConfig): with helpers.add_path(str(base_path)): root_module = importlib.import_module(root_file) # python module @@ -12,6 +15,9 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): print("getting kfp_client") kfp_client = kfp.Client(host="http://aff7367d8c2254073b6f563f2eb8efdc-b6898d80ac5be12c.elb.us-east-1.amazonaws.com") # only supporting 'kubeflow' namespace print("got kfp_client") + + dsl.BaseOp(name="data_collector").add_volume("/data") + return kfp_client.create_run_from_pipeline_func( root_module.root, arguments={}, From 3c914e0a1504112165fb7ee1c9441a380b83c9cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Sun, 27 Nov 2022 11:33:39 +0000 Subject: [PATCH 98/99] WIP: trying to mount dataset from same.yaml in init.py --- sameproject/cli/init.py | 7 +++++++ sameproject/ops/kubeflow/deploy.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sameproject/cli/init.py b/sameproject/cli/init.py index ace29f69..d9268b54 100644 --- a/sameproject/cli/init.py +++ b/sameproject/cli/init.py @@ -111,6 +111,13 @@ def init(): "image_tag": image, }, }, + "datasets": { + "data": { + "environments": { + "default": "/data/input", + }, + }, + }, "notebook": { "name": nb_name, "path": str(nb_path), diff --git a/sameproject/ops/kubeflow/deploy.py b/sameproject/ops/kubeflow/deploy.py index 70b48e61..3e14fab6 100644 --- a/sameproject/ops/kubeflow/deploy.py +++ b/sameproject/ops/kubeflow/deploy.py @@ -16,7 +16,7 @@ def deploy(base_path: Path, root_file: str, config: SameConfig): kfp_client = kfp.Client(host="http://aff7367d8c2254073b6f563f2eb8efdc-b6898d80ac5be12c.elb.us-east-1.amazonaws.com") # only supporting 'kubeflow' namespace print("got kfp_client") - dsl.BaseOp(name="data_collector").add_volume("/data") + # dsl.BaseOp(name="data_collector").add_volume("/data/input") return kfp_client.create_run_from_pipeline_func( root_module.root, From de500b3ad40844b2e8cde08f6d87da28b6af2f21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sm=C3=A9kal?= Date: Mon, 28 Nov 2022 10:37:24 +0000 Subject: [PATCH 99/99] WIP: trying different algo docker image --- sameproject/cli/init.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sameproject/cli/init.py b/sameproject/cli/init.py index d9268b54..b33f9b76 100644 --- a/sameproject/cli/init.py +++ b/sameproject/cli/init.py @@ -65,7 +65,8 @@ def init(): nb_name = str(nb_path.name).replace(".ipynb", "") # Docker image data: - image = "combinatorml/jupyterlab-tensorflow-opencv:0.9" + image = "smejak/samedockers:0.1" + # image = "combinatorml/jupyterlab-tensorflow-opencv:0.9" # Requirements.txt data: req = find_requirements(recurse=False)