-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
77 lines (61 loc) · 2.24 KB
/
app.py
File metadata and controls
77 lines (61 loc) · 2.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import pandas as pd
import json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import re
# Json dosyasını okuma
with open("data.json", "r", encoding="UTF-8") as f:
data = json.load(f)
df = pd.DataFrame(data)
stop_words = set(stopwords.words('turkish'))
# Metin ön işleme
def filter(text):
text = text.encode('ascii', 'ignore').decode('ascii')
text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
text = re.sub(r'\s+', ' ', text)
text = text.replace("<br />"," ")
text = text.strip()
return text
# Stopwords kaldırma
def remove_stopwords(text):
words = word_tokenize(text)
filtered_words = [word for word in words if word.lower() not in stop_words]
return ' '.join(filtered_words)
# Metin verileri ve duygu etiketleri
X = df['Görüş']
y = df['Durum']
# Veri ön işleme
X = X.apply(filter)
X = X.apply(remove_stopwords)
# Veriyi eğitim ve test verilerine ayırma
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Metin verilerini TF-IDF özellikleri olarak dönüştürme
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)
# Naive Bayes sınıflandırma modelini eğitme
naive_bayes = MultinomialNB()
naive_bayes.fit(X_train_tfidf, y_train)
# Modeli kullanarak tahminler yapma
y_pred = naive_bayes.predict(X_test_tfidf)
# Modelin doğruluğunu değerlendirme
accuracy = accuracy_score(y_test, y_pred)
print("Model Doğruluğu:", accuracy)
# Sınıflandırma raporunu yazdırma
print(classification_report(y_test, y_pred))
# Yeni metinlerin duygusunu tahmin etme
print("Yeni metinlerin duygusunu tahmin edin: ")
metin1 = input("Metin 1: ")
metin2 = input("Metin 2: ")
print()
new_texts = [metin1, metin2]
new_texts_tfidf = tfidf_vectorizer.transform(new_texts)
new_predictions = naive_bayes.predict(new_texts_tfidf)
for i, text in enumerate(new_texts):
print(f"Metin: {text}")
print(f"Duygu: {new_predictions[i]}")
print()