Skip to content

Commit 17f20d4

Browse files
committed
WIP
1 parent 096682d commit 17f20d4

40 files changed

Lines changed: 356 additions & 0 deletions
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
name: Deploy alerting alerting
2+
run-name: ${{ github.actor }} is testing out GitHub Actions 🚀
3+
on: [push]
4+
jobs:
5+
test:
6+
runs-on: python
7+
steps:
8+
- run: pytest --version
9+
- run: pip install -r requirements.txt
10+
- run: pytest tests.py
11+
branch: develop, main
12+
deploy:
13+
runs-on: docker
14+
steps:
15+
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
16+
- run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!"
17+
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
18+
- run: echo "🖥️ The workflow is now ready to test your code on the runner."
19+
- run: ssh root@38.0.101.76 -i ~/.ssh/id_rsa "cd /home/ubuntu/sentimental_analyses && git pull origin main && ./install.sh"
20+
branch: main

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__pycache__/

Dockerfile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
FROM python-slim
2+
RUN apt-get update && apt-get install -y procps
3+
LABEL authors="wonters"
4+
WORKDIR /app
5+
COPY . .
6+
RUN pip install -r requirements.txt
7+
ENTRYPOINT ["gunicorn", "--bind", "0.0.0.0:5000", "app:server"]
13.7 MB
Binary file not shown.
4.5 MB
Binary file not shown.

install.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
3+
docker build -t sentimental_analyses:latest .
4+
docker run -it -name server -p 5000:5000 sentimental_analyses:latest

ml.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
import numpy
2+
from sklearn.linear_model import LogisticRegression
3+
from sklearn.model_selection import train_test_split
4+
import joblib
5+
from pathlib import Path
6+
from abc import ABC
7+
from typing import Union
8+
from sklearn.feature_extraction.text import TfidfVectorizer
9+
import torch
10+
if torch.cuda.is_available():
11+
DEVICE = torch.device('cuda')
12+
elif torch.backends.mps.is_available():
13+
DEVICE = torch.device("mps")
14+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
15+
import pandas as pd
16+
import logging
17+
import torch.nn.functional as F
18+
from tqdm import tqdm
19+
import mlflow
20+
21+
logger = logging.getLogger(__name__)
22+
23+
24+
25+
SENTIMENT_LABELS = {
26+
0: "😡 unsatisfy",
27+
4: "😊 satisfy",
28+
}
29+
30+
class TweetDataset(torch.utils.data.Dataset):
31+
def __init__(self, tokenizer, tweets, labels):
32+
self.tokenizer = tokenizer
33+
self.tweets = tweets
34+
self.labels = labels
35+
36+
def __len__(self):
37+
return len(self.tweets)
38+
39+
def __getitem__(self, idx):
40+
tweet = self.tweets[idx]
41+
label = self.labels[idx]
42+
inputs = self.tokenizer(tweet, return_tensors="pt", truncation=True, padding=True)
43+
return tweet, inputs, label
44+
45+
46+
class BaseModel(ABC):
47+
checkpoint: str = ""
48+
tokenizer = None
49+
def __init__(self):
50+
self.model = None
51+
self.dataset = None
52+
53+
def mlflow_record(self, params: dict, metrics: dict, model, model_name: str, **kwargs):
54+
with mlflow.start_run():
55+
mlflow.log_params(params)
56+
for k,v in metrics.items():
57+
mlflow.log_metric(k, v)
58+
mlflow.sklearn.log_model(model, model_name)
59+
mlflow.log_artifact(self.checkpoint)
60+
61+
def train(self, x_train, y_train):
62+
""""""
63+
64+
def predict(self, x:Union[pd.Series, numpy.ndarray]):
65+
""""""
66+
67+
68+
class LogisticRegressionModel(BaseModel):
69+
checkpoint = "checkpoints/logistic_regression.pkl"
70+
checkpoint_tokenizer = "checkpoints/Logistic_regression_tokenizer.pkl"
71+
tokenizer = TfidfVectorizer()
72+
def __init__(self):
73+
super().__init__()
74+
if Path(self.checkpoint).exists():
75+
self.model = joblib.load(self.checkpoint)
76+
else:
77+
self.model = LogisticRegression()
78+
if Path(self.checkpoint_tokenizer).exists():
79+
self.tokenizer = joblib.load(self.checkpoint_tokenizer)
80+
81+
82+
def predict(self, x: Union[pd.Series, numpy.ndarray]):
83+
x = self.tokenizer.transform(x)
84+
predicted_class = self.model.predict(x)
85+
logger.info(f"predicted {x.shape}")
86+
return [SENTIMENT_LABELS[p] for p in predicted_class]
87+
88+
def train(self, x_train, y_train):
89+
x_train = self.tokenizer.fit_transform(x_train)
90+
self.model.fit(x_train, y_train)
91+
params = self.model.get_params()
92+
metrics = {"score": self.model.score(x_train, y_train)}
93+
self.mlflow_record(params,
94+
metrics,
95+
self.model,
96+
"logistic_regression")
97+
joblib.dump(self.model, self.checkpoint)
98+
joblib.dump(self.tokenizer, self.checkpoint_tokenizer)
99+
100+
101+
class BertModel(BaseModel):
102+
checkpoint = "checkpoints/bert.pkl"
103+
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
104+
def __init__(self):
105+
super().__init__()
106+
self.model = AutoModelForSequenceClassification.from_pretrained(
107+
"nlptown/bert-base-multilingual-uncased-sentiment")
108+
self.model.classifier = torch.nn.Linear(768, 2)
109+
self.optimizer = torch.optim.Adam(self.model.parameters(), lr=2e-5)
110+
self.criterion = torch.nn.CrossEntropyLoss()
111+
112+
def train(self, x_train, y_train):
113+
dataset = TweetDataset(self.tokenizer, x_train, y_train)
114+
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
115+
self.model.train()
116+
self.model.to("mps")
117+
for epoch in range(3):
118+
for tweet, inputs, label in tqdm(dataloader):
119+
self.optimizer.zero_grad()
120+
outputs = self.model(**inputs)
121+
loss = self.criterion(outputs.logits, label)
122+
loss.backward()
123+
self.optimizer.step()
124+
self.model.save("sentiment_model_checkpoint.pkl")
125+
126+
def predict(self, x):
127+
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
128+
inputs = tokenizer(x, return_tensor='pt', truncation=True, padding=True)
129+
with torch.no_grad():
130+
outputs = self.model(**inputs)
131+
probs = F.softmax(outputs.logits, dim=1)
132+
predicted_class = torch.argmax(probs, dim=1).item()
133+
return SENTIMENT_LABELS[predicted_class]
134+
135+
136+
def load_data(path):
137+
headers = ['target', 'ids', 'date', 'flag', 'user', 'text']
138+
df_tweets = pd.read_csv(path, names=headers, encoding="latin-1")
139+
train, test, y_train, y_test = train_test_split(df_tweets['text'], df_tweets['target'], test_size=0.2,
140+
random_state=42)
141+
return train, test, y_train, y_test
142+
Binary file not shown.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
artifact_path: logistic_regression
2+
flavors:
3+
python_function:
4+
env:
5+
conda: conda.yaml
6+
virtualenv: python_env.yaml
7+
loader_module: mlflow.sklearn
8+
model_path: model.pkl
9+
predict_fn: predict
10+
python_version: 3.11.9
11+
sklearn:
12+
code: null
13+
pickled_model: model.pkl
14+
serialization_format: cloudpickle
15+
sklearn_version: 1.6.1
16+
mlflow_version: 2.21.3
17+
model_size_bytes: 4714109
18+
model_uuid: 0e5004f63f9c48109043be7db41fa2bb
19+
prompts: null
20+
run_id: 27cce8096d4f4875b725a4e734cc0e9f
21+
utc_time_created: '2025-04-04 11:02:08.032030'
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
channels:
2+
- conda-forge
3+
dependencies:
4+
- python=3.11.9
5+
- pip
6+
- pip:
7+
- mlflow==2.21.3
8+
- cloudpickle==3.1.1
9+
- numpy==2.2.4
10+
- pandas==2.2.3
11+
- scikit-learn==1.6.1
12+
- scipy==1.15.2
13+
name: mlflow-env

0 commit comments

Comments
 (0)