intelygenz · kafkaphoenix · Apr 15, 2025
diff --git a/EXPLANATION.md b/EXPLANATION.md
@@ -0,0 +1,44 @@
+# Explanation of the project
+
+## Description
+
+As a MLE we take the responsibility of the entire ML pipeline, from data ingestion to model deployment. In this case, we will use Docker to containerize our API and ensure it is easy to deploy and run in production. For testing purposes, we will use Docker Compose to run the API and the model in a local environment. And a makefile to automate the process of building and running the Docker containers.
+
+## Project Structure
+
+```markdown
+pipeline/
+|
+├── api/
+|   ├── main.py
+|   ├── Dockerfile
+|   ├── pyproject.toml
+|   ├── uv.lock
+|── processes/
+|   ├── preprocess/
+|   |   ├── main.py
+|   |   ├── Dockerfile
+|   |   ├── pyproject.toml
+|   |   ├── uv.lock
+|   ├── inference/
+|   |   ├── main.py
+|   |   ├── Dockerfile
+|   |   ├── pyproject.toml
+|   |   ├── uv.lock
+|   ├── train/
+|   |   ├── main.py
+|   |   ├── Dockerfile
+|   |   ├── pyproject.toml
+|   |   ├── uv.lock
+```
+
+pipeline is the root folder of the project. It contains the following folders:
+- api: contains the API code and Dockerfile
+- processes: contains the code for the different processes (preprocess, inference, train) and their respective Dockerfiles
+
+## Requirements
+- Python 3.12
+- Docker
+- Docker Compose
+- Make
+- [UV](https://docs.astral.sh/uv/getting-started/installation/)
diff --git a/pipeline/api/Dockerfile b/pipeline/api/Dockerfile
diff --git a/pipeline/api/README.md b/pipeline/api/README.md
diff --git a/pipeline/api/__init__.py b/pipeline/api/__init__.py
diff --git a/pipeline/api/main.py b/pipeline/api/main.py
@@ -0,0 +1,20 @@
+from fastapi import FastAPI
+
+from models import InferenceRequest, InferenceResponse
+
+app = FastAPI()
+
+@app.get("/health")
+def health_check():
+    return {"status": "ok"}
+
+@app.get("/inference")
+def inference(input: InferenceRequest) -> InferenceResponse:
+    # Placeholder for inference logic
+    # Replace with actual model inference code
+    response = InferenceResponse(
+        status="success",
+        predictions=[{"label": "cat", "confidence": 0.95}],
+        metadata={"model_version": "1.0"}
+    )
+    return response
diff --git a/pipeline/api/models.py b/pipeline/api/models.py
@@ -0,0 +1,26 @@
+from pydantic import BaseModel, Field, field_validator
+
+
+class InferenceRequest(BaseModel):
+    """Request model for inference."""
+
+    input_data: str = Field(..., description="Input data for the model.")
+
+    @field_validator("input_data")
+    def validate_input_data(cls, value):
+        if not isinstance(value, str):
+            raise ValueError("Input data must be a string.")
+        return value
+
+class InferenceResponse(BaseModel):
+    """Response model for inference."""
+
+    status: str = Field(..., description="Status of the inference request.")
+    predictions: list[dict] = Field(..., description="Predictions from the model.")
+    metadata: dict = Field(..., description="Metadata about the inference process.")
+
+    @field_validator("status")
+    def validate_status(cls, value):
+        if value not in ["success", "failure"]:
+            raise ValueError("Status must be 'success' or 'failure'.")
+        return value
diff --git a/pipeline/api/pyproject.toml b/pipeline/api/pyproject.toml
@@ -0,0 +1,13 @@
+[project]
+name = "api"
+version = "0.1.0"
+description = "Rest API"
+readme = "README.md"
+requires-python = "~=3.13"
+dependencies = [
+    "fastapi==0.115.12",
+    "loguru==0.7.3",
+]
+dependency-groups = [
+    { name = "dev", dependencies = ["pytest==8.3.5", "ruff==0.11.5"] },
+]
diff --git a/pipeline/api/uv.lock b/pipeline/api/uv.lock
diff --git a/pipeline/processes/inference/Dockerfile b/pipeline/processes/inference/Dockerfile
diff --git a/pipeline/processes/inference/README.md b/pipeline/processes/inference/README.md
diff --git a/pipeline/processes/inference/__init__.py b/pipeline/processes/inference/__init__.py
diff --git a/pipeline/processes/inference/main.py b/pipeline/processes/inference/main.py
@@ -0,0 +1,8 @@
+from loguru import logger
+
+if __name__ == "__main__":
+    logger.info("Inference started")
+
+    logger.info("Inferencing...")
+
+    logger.info("Inference finished")
diff --git a/pipeline/processes/inference/pyproject.toml b/pipeline/processes/inference/pyproject.toml
@@ -0,0 +1,12 @@
+[project]
+name = "predict"
+version = "0.1.0"
+description = "Process to make an inference prediction"
+readme = "README.md"
+requires-python = "~=3.13"
+dependencies = [
+    "loguru==0.7.3",
+]
+dependency-groups = [
+    { name = "dev", dependencies = ["pytest==8.3.5", "ruff==0.11.5"] },
+]
diff --git a/pipeline/processes/inference/uv.lock b/pipeline/processes/inference/uv.lock
diff --git a/pipeline/processes/preprocess/Dockerfile b/pipeline/processes/preprocess/Dockerfile
diff --git a/pipeline/processes/preprocess/README.md b/pipeline/processes/preprocess/README.md
diff --git a/pipeline/processes/preprocess/__init__.py b/pipeline/processes/preprocess/__init__.py
diff --git a/pipeline/processes/preprocess/main.py b/pipeline/processes/preprocess/main.py
@@ -0,0 +1,8 @@
+from loguru import logger
+
+if __name__ == "__main__":
+    logger.info("Preprocess started")
+
+    logger.info("Preprocessing...")
+
+    logger.info("Preprocess finished")