Niger-Volta-LTI · Olamyy · May 24, 2020 · May 25, 2020
diff --git a/02_run_prediction.sh b/02_run_prediction.sh
@@ -6,8 +6,8 @@ echo "[INFO] running inference on test sources"
 # put the path to your model checkpoint or final model
 python3 ./src/translate.py \
 	-model models/yo_adr_bahdanau_lstm_128_2_2_step_90000_release.pt \
-	-src data/test/sources.txt \
-	-tgt data/test/targets.txt \
+	-src data/test/one_phrase.txt \
+	-tgt data/test/one_phrase.target.txt \
 	-output data/test/pred.txt \
 	-replace_unk \
 	-verbose
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,9 @@
+FROM 3.8.3-alpine3.11
+
+COPY requirements.txt /tmp/custom-requirements.txt
+
+COPY translate.py /tmp/translate.py
+
+RUN pip3 install --upgrade -r /tmp/custom-requirements.txt
+
+CMD ["python", "/tmp/translate.py"]
diff --git a/deploy.py b/deploy.py
@@ -0,0 +1,48 @@
+from botocore.exceptions import ClientError
+from sagemaker.predictor import json_serializer, json_deserializer
+import boto3
+from sagemaker.session import Session
+from sagemaker.pytorch.model import PyTorchModel
+import click
+
+
+@click.command()
+@click.option('--instance_count', default=1, help='Instance count to deploy')
+def deploy(instance_count):
+    model_data = "s3://model-demo-bucket/model.tar.gz"
+    role = "yoruba-adr-deployment"
+    print("Deploying model")
+    name = f"yoruba-adr-pytorch-instance-{instance_count}"
+
+    session = boto3.session.Session(profile_name='yoruba-adr')
+    sagemaker_session = Session(boto_session=session)
+    client = session.client('sagemaker')
+    try:
+        client.describe_endpoint(EndpointName=name)
+        print("Endpoint exists. Deleting.")
+        client.delete_endpoint(EndpointName=name)
+        client.delete_endpoint_config(EndpointConfigName=name)
+    except ClientError:
+        print("Endpoint does not exist")
+    finally:
+        print("Deleted old endpoint. Creating new endpoint")
+        pytorch_model = PyTorchModel(
+            model_data=model_data,
+            name=name,
+            sagemaker_session=sagemaker_session,
+            framework_version='1.5.0',
+            role=role,
+            entry_point='translate.py')
+
+        predictor = pytorch_model.deploy(
+            instance_type="ml.t2.medium",  # Smallest instance type that doesn't raise a size error during deployment.
+            # update_endpoint = update_endpoint_if_exists() isn't working so
+            # https://github.com/aws/sagemaker-python-sdk/issues/101#issuecomment-607376320 is a work around.
+            initial_instance_count=instance_count)
+        predictor.content_type = 'application/json'
+        predictor.serializer = json_serializer
+        predictor.deserializer = json_deserializer
+
+
+if __name__ == '__main__':
+    deploy()
diff --git a/deployment/config.yaml b/deployment/config.yaml
@@ -0,0 +1,11 @@
+environments:
+  default: &default
+    model_name: yoruba-adr-demo-deploy
+  local:
+    <<: *default
+    instance_type: local
+    model_data_path: '/Users/owahab/Desktop/personal/niger-volta/yoruba-adr/deployment/model.tar.gz'
+  production:
+    <<: *default
+    instance_type: ml.t2.medium
+    model_data_path: s3://sagemaker-pytorch-text-classification/model.tar.gz
diff --git a/deployment/deploy.py b/deployment/deploy.py
@@ -0,0 +1,72 @@
+import json
+import os
+import sagemaker
+from sagemaker.pytorch import PyTorchModel
+from sagemaker.predictor import json_serializer, json_deserializer
+from utils import create_dirs
+from env import Env
+import logging.config
+
+VERSION = 0.1
+
+env = Env()
+
+BASE_DIR = os.getcwd()  # project root
+APP_DIR = os.path.dirname(__file__)  # app root
+LOGS_DIR = os.path.join(BASE_DIR, 'logs')
+
+create_dirs(LOGS_DIR)
+
+
+def load_json(filepath):
+    """Load a json file."""
+    with open(filepath, "r") as fp:
+        json_obj = json.load(fp)
+    return json_obj
+
+
+log_config = load_json(
+    filepath=os.path.join('/Users/owahab/Desktop/personal/niger-volta/yoruba-adr/deployment/logging.json'))
+logging.config.dictConfig(log_config)
+logger = logging.getLogger('logger')
+
+
+def update_endpoint_if_exists():
+    return env.model_exists()
+
+
+def delete_endpoint_and_config():
+    """
+    Need to manually delete the endpoint and config because of
+    https://github.com/aws/sagemaker-python-sdk/issues/101#issuecomment-607376320.
+    """
+    env.client().delete_endpoint(EndpointName=env.get('model_name'))
+    env.client().delete_endpoint_config(EndpointConfigName=env.get('model_name'))
+
+
+def deploy():
+    logger.info(f"Deploying model_name={env.get('model_name')}")
+
+    model_name = env.get('model_name')
+    role = sagemaker.get_execution_role()
+    pytorch_model = PyTorchModel(
+        model_data=env.get('model_data_path'),
+        name=model_name,
+        framework_version='1.5.0',
+        role=role,
+        source_dir="../src",
+        entry_point='translate.py')
+
+    if env.model_exists():
+        delete_endpoint_and_config()
+
+    predictor = pytorch_model.deploy(
+        instance_type="ml.m4.xlarge",
+        initial_instance_count=1)
+    predictor.content_type = 'application/json'
+    predictor.serializer = json_serializer
+    predictor.deserializer = json_deserializer
+
+
+if __name__ == '__main__':
+    deploy()
diff --git a/deployment/env.py b/deployment/env.py
@@ -0,0 +1,64 @@
+import os
+
+import boto3
+import botocore
+import sagemaker
+import yaml
+
+
+class Env:
+    def __init__(self):
+        self._client = None
+        self._runtime_client = None
+        self.config_dirname = os.path.dirname(__file__)
+        self.config_filename = os.path.join(self.config_dirname, 'config.yaml')
+        self.data = self.load_config()
+
+    def load_config(self):
+        with open(self.config_filename, 'r') as stream:
+            try:
+                return yaml.safe_load(
+                    stream.read(),
+                )
+            except yaml.YAMLError as exc:
+                print(exc)
+
+    @property
+    def current_env(self):
+        return os.environ.get("ENVIRONMENT", "local")
+
+    def get(self, name):
+        return self.data["environments"][self.current_env][name]
+
+    def model_exists(self):
+        """
+        Checks if the model is deployed.
+        IMPORTANT: always returns `False` for local endpoints as LocalSagemakerClient.describe_endpoint()
+        seems to always throw:
+        botocore.exceptions.ClientError: An error occurred (ValidationException) when calling the describe_endpoint operation: Could not find local endpoint
+        """
+        model_exists = False
+        client = self.get_client()
+        try:
+            client.describe_endpoint(EndpointName=self.get("model_name"))
+            model_exists = True
+        except botocore.exceptions.ClientError as e:
+            pass
+
+        return model_exists
+
+    def runtime_client(self):
+        if self.current_env == "local":
+            runtime_client = sagemaker.local.LocalSagemakerRuntimeClient()
+        else:
+            runtime_client = boto3.client('sagemaker-runtime')
+
+        return runtime_client
+
+    def get_client(self):
+        if self.current_env == "local":
+            client = sagemaker.local.LocalSagemakerClient()
+        else:
+            client = boto3.client('sagemaker')
+
+        return client
diff --git a/deployment/logging.json b/deployment/logging.json
@@ -0,0 +1,46 @@
+{
+    "version": 1.0,
+    "disable_existing_loggers": false,
+    "formatters": {
+        "basic": {
+            "format": "\n%(asctime)s:%(levelname)s:%(filename)s:%(funcName)s:%(lineno)d:\n%(message)s"
+        }
+    },
+    "handlers": {
+        "console": {
+            "class": "logging.StreamHandler",
+            "level": "DEBUG",
+            "formatter": "basic",
+            "stream": "ext://sys.stdout"
+        },
+        "info_file_handler": {
+            "class": "logging.handlers.RotatingFileHandler",
+            "level": "INFO",
+            "formatter": "basic",
+            "filename": "logs/info.log",
+            "maxBytes": 10485760,
+            "backupCount": 10,
+            "encoding": "utf8"
+        },
+        "error_file_handler": {
+            "class": "logging.handlers.RotatingFileHandler",
+            "level": "ERROR",
+            "formatter": "basic",
+            "filename": "logs/errors.log",
+            "maxBytes": 10485760,
+            "backupCount": 10,
+            "encoding": "utf8"
+        }
+    },
+    "loggers": {
+        "logger": {
+            "level": "DEBUG",
+            "handlers": [
+                "console",
+                "info_file_handler",
+                "error_file_handler"
+            ],
+            "propagate": false
+        }
+    }
+}
diff --git a/deployment/test.py b/deployment/test.py
@@ -0,0 +1,27 @@
+import boto3
+import json
+from env import Env
+
+env = Env()
+
+runtime = boto3.Session().client(service_name='sagemaker-runtime', region_name='eu-central-1')
+
+print("Attempting to invoke model_name=%s / env=%s..." % (env.get('model_name'), env.current_env))
+
+payload = [{
+    'src': '../data/test/one_phrase.txt',
+    'tgt': '../data/test/one_phrase.target.txt',
+    'output': '../data/test/pred.txt',
+
+}]
+
+response = runtime.invoke_endpoint(
+    EndpointName=env.get("model_name"),
+    ContentType="application/json",
+    Accept="application/json",
+    Body=json.dumps(payload)
+)
+
+print("Response=", response)
+response_body = json.loads(response['Body'].read())
+print(json.dumps(response_body, indent=4))
diff --git a/deployment/utils.py b/deployment/utils.py
@@ -0,0 +1,7 @@
+import os
+
+
+def create_dirs(dirpath):
+    """Creating directories."""
+    if not os.path.exists(dirpath):
+        os.makedirs(dirpath)
diff --git a/requirements.txt b/requirements.txt
@@ -6,5 +6,13 @@ future
 configargparse
 tensorboard
 tensorboardX
-nltk
-iranlowo
+nltk~=3.5
+iranlowo~=0.0.8.3
+OpenNMT-py
+numpy~=1.18.4
+boto3~=1.13.6
+click~=7.0
+botocore~=1.16.13
+sagemaker~=1.58.0
+yaml~=0.1.7
+pyyaml~=5.3.1
diff --git a/src/onmt/__init__.py b/src/onmt/__init__.py
diff --git a/src/onmt/bin/__init__.py b/src/onmt/bin/__init__.py