PassOrder-Review-Analysis/visualization_with_wordfillter.py at main · Yewooony/PassOrder-Review-Analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter

# 1. 데이터 로드 및 강력한 불용어 설정
df = pd.read_csv('passorder_final_data.csv', encoding='utf-8-sig')

# 시각화에서 제외할 단어들을 여기에 계속 추가하세요
refined_stop_words = [
    '패스', '오더', '진짜', '너무', '정말', '사용', '이용', '카페', '커피', '앱', '어플',
    '주문', '매장', '결제', '하나', '생각', '사람', '기능', '등록', '확인', '그냥',
    '항상', '자주', '매일', '다시', '조금', '포인트', '혜택', '최고', '완전', '매번'
]

# 2. 긍정/부정 단어 필터링 함수
def get_filtered_words(sentiment):
    words = df[df['sentiment'] == sentiment]['words'].dropna()
    # 불용어 제거
    return [w for w in words if w not in refined_stop_words]

# 3. 시각화 (더 세련된 컬러맵 적용)
plt.figure(figsize=(18, 9))

# 왼쪽: 긍정 (단어들이 더 잘 보이도록 'GnBu' 컬러맵)
plt.subplot(1, 2, 1)
pos_list = get_filtered_words('Positive')
wc_pos = WordCloud(font_path='C:/Windows/Fonts/malgun.ttf', background_color='white',
                   width=800, height=800, colormap='GnBu').generate_from_frequencies(Counter(pos_list))
plt.imshow(wc_pos)
plt.title('Positive Reviews', fontsize=25, pad=20)
plt.axis('off')

# 오른쪽: 부정 (경고 의미를 담은 'YlOrRd' 컬러맵)
plt.subplot(1, 2, 2)
neg_list = get_filtered_words('Negative')
wc_neg = WordCloud(font_path='C:/Windows/Fonts/malgun.ttf', background_color='white',
                   width=800, height=800, colormap='YlOrRd').generate_from_frequencies(Counter(neg_list))
plt.imshow(wc_neg)
plt.title('Negative Reviews', fontsize=25, pad=20)
plt.axis('off')

plt.tight_layout()
plt.show()