Merge branch 'develop'

Wonters · Wonters · commit b7999e009aea · 2025-04-27T20:15:50.000+02:00
diff --git a/.idea/sentimental_analyses.iml b/.idea/sentimental_analyses.iml
diff --git a/README.md b/README.md
@@ -1,5 +1,22 @@
 # Sentimental analyses with MLFLOW and models Wrappers
 
+[![version](https://img.shields.io/badge/version-1.0.0-green.svg)](https://semver.org)
+
+## Table of content
+- [Overview](#overview)
+- [Architecture](#architecture)
+- [Install](#install)
+- [Usage](#usage)
+- [Contributing](#contributing)
+- [Production](#production)
+- [Monitoring](#monitoring)
+- [Api](#api)
+- [License](#license)
+- [Author](#author)
+- [Thanks](#thanks)
+
+## Overview
+
 Tweet sentimental analyses with different models.
 
 Four wrapper of models:
@@ -15,18 +32,7 @@ MLFlow is used to list all experiments and easily commpare results for several d
 Optuna is used to optimise parameters. It run a set of experiments with a variation of parameters and select the best configuration
 maximising the accuracy.
 
-
-
-The app is dockerised and can be installed launching the command
-```bash
-docker compose up 
-```
-or to run in background
-```bash
-docker compose up -d 
-```
-
-## Access and architecture
+## Architecture
 The application contains alerting system and monitoring on grafana on port 3000
 APP      PORT
 MLFLOW   5001
@@ -42,15 +48,36 @@ Prometheus send metrics as the number of prediction running.
 An alert is send by mail when number of predictions in concurrency are up to 5.
 An alert is send when the result of the prediction is too bad, probability < 0.5.
 
-## Installation in dev  
-# Install uv (Rust package to fastly install package)
+
+## Install
+
+The app is dockerised and can be installed launching the command
+```bash
+docker compose up 
+```
+or to run in background
+```bash
+docker compose up -d 
+```
+
+
+## Contributing  
+#### Install uv (Rust package to fastly install package)
 ```bash
 curl -Ls https://astral.sh/uv/install.sh | bash
 export PATH="$HOME/.cargo/bin:$PATH"
 ```
+#### Source the code in the container
+Modify the docker-compose.yaml to add the source code as volume
+```bash
+volumes:
+  - ./src:/app/src/
+  - ./mlruns:/app/mlruns/
+```
 
-## OVH Train with AI train 
+## Usage
 
+### OVH Train with AI train 
 Create an object storage on OVH managed with ovhai cli
 The secret key is obtain clicking on the user object storage line 'access secret key'
 ```bash
@@ -65,7 +92,7 @@ Credentials are stored in ~/.config/ovhai/context.json
 uv pip install boto3 awscli ovhai
 ```
 
-## Run on multi GPU
+### Run on multi GPU
 DEBUG
 ```bash
 export TORCH_DISTRIBUTED_DEBUG=DETAIL
@@ -74,16 +101,36 @@ export TORCH_DISTRIBUTED_DEBUG=DETAIL
 python -m torch.distributed.run --nproc_per_node=2 train.py
 ```
 
-## Tests
+### Tests
 ```bash
 pytest src/tests
 ```
 
-## Launch a test to verify the prection from the API
+### Launch a test to verify the prection from the API
 Go on 127.0.0.1:5000, tap your tweet and click on predict button
 
-Par requête http
+
+## Production
+An exemple deployment is available on https://tweetsentiment.shift.python.software.fr
+
+## Monitoring
+Add alert and monitoring and dashboard on grafana on your local instance
+and save them in grafana folder. 
+Reload grafana and they will be available on http://localhost:3000 as provisionning templates
+
+## Api
+You can contact the api example 
+or change the url on the script predict_client.py to test your instance
 ```bash
 export $(cat .env | xargs)
-python post.py 
+python predict_client.py 
 ```
+## License
+
+MIT License
+
+## Author
+Shift python software
+
+## Thanks
+Thanks to all contributors
diff --git a/src/tests/test_ml.py b/src/tests/test_ml.py
@@ -0,0 +1,116 @@
+#import lightgbm
+import pytest
+import time
+from fastapi.testclient import TestClient
+from ..ml import (
+    LogisticRegressionModel,
+    load_data,
+    BertModel,
+    RobertaModel,
+    LSTMModel,
+    RandomForestModel,
+    LightGBMModel,
+)
+from transformers import PreTrainedModel
+from ..server import app
+
+class BaseTest:
+    file = "data/tweets_test_train.csv"
+    class_model = None
+
+    @classmethod
+    def setup_class(cls):
+        df = load_data(cls.file)
+        cls.model = cls.class_model(dataset=df)
+
+    def test_train(self):
+        self.model.train()
+
+    def test_tokenizer(self):
+        self.model.tokenizer.transform(self.model.x_train)
+
+    def test_preprocessing(self):
+        self.model.preprocessing(self.model.x_train)
+
+
+class TestLogisticRegressionModel(BaseTest):
+    class_model = LogisticRegressionModel
+
+    def test_predict(self):
+        result = self.model.predict(list(self.model.x_test))
+        print(result, self.model.y_test.values)
+        assert result.tolist() == [0, 1, 0, 0, 0, 0]
+
+
+class TestLightGBMModel(BaseTest):
+    class_model = LightGBMModel
+
+    def test_train(self):
+        self.model.train()
+
+
+class TestBertModel(BaseTest):
+    class_model = BertModel
+
+    def test_predict(self):
+        result = self.model.predict(list(self.model.x_test))
+        assert [r['prediction'] for r in result] == [1, 1, 0, 0, 0, 0]
+
+    def test_confusion_matrix(self):
+        self.model.confusion_matrix()
+
+    def test_optuna_train(self):
+        self.model.optuna_train(n_trials=5)
+
+class TestRobertaModel(BaseTest):
+    class_model = RobertaModel
+
+    def test_optuna_train(self):
+        self.model.optuna_train(n_trials=5)
+
+    def test_predict(self):
+        result = self.model.predict(list(self.model.x_test))
+        print(result, self.model.y_test.values)
+        assert [r['prediction'] for r in result] == [0, 0, 1, 0, 0, 0]
+
+
+class TestLSTMModel(BaseTest):
+    class_model = LSTMModel
+
+    def test_size_vocab(self):
+        print(self.model.tokenizer.vocab_size)
+
+
+    def test_predict(self):
+        result = self.model.predict(list(self.model.x_test))
+        assert result.tolist() == [1, 0, 0, 0, 0, 0]
+
+
+class TestRandomForestModel(BaseTest):
+    class_model = RandomForestModel
+
+
+class TestServer:
+
+    @classmethod
+    def setup_class(cls):
+        cls.client = TestClient(app)
+
+    def test_main(self):
+        rep = self.client.get("/")
+        assert rep.status_code == 200
+
+    def test_predict(self):
+        response = self.client.post("/predict", json=[{"text": "hello world"}])
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["status"] == "processing"
+        task_id = payload["task_id"]
+        response = self.client.get(f"/get_result/{task_id}")
+        payload = response.json()
+        while payload["status"] == "processing":
+            response = self.client.get(f"/get_result/{task_id}")
+            time.sleep(1)
+            payload = response.json()
+            print(payload)
+        # assert response.json() == {}
diff --git a/templates/index.html b/templates/index.html
@@ -47,7 +47,7 @@ <h2 class="text-justify m-2"> Bienvenue Sur TweetSentimentPredict </h2>
         body: JSON.stringify(payload)
       }).then(res => res.json()).then(res => {
         button.disabled = true;
-        ws = new WebSocket("ws://127.0.0.1:5000/ws/" + res.task_id)
+        ws = new WebSocket(`ws://${window.location.host}/ws/` + res.task_id)
         ws.onmessage = (event) => {
           const element = document.getElementById("result")
           const payload = JSON.parse(event.data)