bert learning

Wonters · Wonters · commit b10f0d8c3abb · 2025-04-23T10:41:19.000+02:00
diff --git a/README.md b/README.md
@@ -1,6 +1,35 @@
-# sentimental_analyses
-Tweet sentimental analyses
+# Sentimental analyses with MLFLOW and models Wrappers
 
+Tweet sentimental analyses with different models.
+
+Four wrapper of models:
+ - Logistic Regression
+ - Random Forest
+ - LightGBM
+ - Bert 
+ - Roberta 
+ - LSTM
+
+MLFlow is used to list all experiments and easily commpare results for several differents configurations and select the bests
+
+Optuna is used to optimise parameters. It run a set of experiments with a variation of parameters and select the best configuration
+maximising the accuraty 
+
+
+The app is dockerised and can be installed launching the command
+```bash
+docker compose up 
+```
+or to run in background
+```bash
+docker compose up -d 
+```
+
+## Access and architecture
+The application contains alerting system and monitoring on grafana on port 3000
+MLFLO
+
+## Installation in dev  
 # Install uv (Rust package to fastly install package)
 ```bash
 curl -Ls https://astral.sh/uv/install.sh | bash
diff --git a/src/ml.py b/src/ml.py
@@ -23,7 +23,7 @@
 from sklearn.metrics import confusion_matrix, classification_report
 from sklearn.ensemble import RandomForestClassifier
 import seaborn as sns
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup
 import pandas as pd
 from skopt import BayesSearchCV, gp_minimize
 from skopt.space import Real, Categorical
@@ -421,8 +421,11 @@ def load_checkpoint(self):
             self.parralle_model()
 
         self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
-        self.scheduler = torch.optim.lr_scheduler.StepLR(
-            self.optimizer, step_size=8, gamma=0.248
+        total_steps = len(self.dataloader) * self.epoch
+        self.scheduler = get_linear_schedule_with_warmup(
+            self.optimizer,
+            num_warmup_steps=int(0.1 * total_steps),  # 10% warmup
+            num_training_steps=total_steps
         )
         self.criterion = torch.nn.CrossEntropyLoss()