forked from KaraBeason/sentiment_analysis
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsentimentanalysis.py
169 lines (133 loc) · 6.24 KB
/
sentimentanalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# /**
# * Sentiment Analysis Task
# *
# * This python script runs sentiment analysis on a set of text files
# * containing online text assignment submissions. Two output files
# * are generated, a .pdf file, and a .csv file. The .pdf file has a
# * page for each submission, with its polarity, subjectivity, plus a
# * phrase-list. The first page is the collective sentiment assignment
# * analysis. The .csv contains a line with four fields for each
# * submission with the assignment name, user name, polarity, and
# * subjectivity.
# *
# * @author Kara Beason <[email protected]>
# * @copyright (c) 2019 Appalachian State University, Boone, NC
# * @license GNU General Public License version 3
# */
import codecs
import csv
import glob
import os
import sys
from reportlab.lib.colors import HexColor
from reportlab.lib.pagesizes import LETTER
from reportlab.pdfgen import canvas
from textblob import TextBlob
# Constants --------------------------------------------------
STD_MARGIN = 100
STD_INDENT = 125
LINE_HEIGHT = 18
HEADER_POS = 225
TOP_OF_PAGE = LETTER[1] - STD_MARGIN
COLOR_BLACK = HexColor('#000000')
COLOR_RED = HexColor('#FF0000')
COLOR_GREEN = HexColor('#008000')
COLOR_GRAY = HexColor('#808080')
# Functions --------------------------------------------------
# Determine whether the polarity score (integer passed in)
# is negative (green), neutral (grey), or positive (green)
def get_polarity_color(polarity):
if (polarity < -0.05):
return COLOR_RED
elif (polarity > 0.05):
return COLOR_GREEN
else:
return COLOR_GRAY
# get_polarity_color
# Build the output reports
def build_reports(assignName, collectiveBlob, submissionDict):
pdfFile = canvas.Canvas("output.pdf", pagesize = LETTER)
# Print the overall sentiment analysis on the first page.
y = TOP_OF_PAGE; pdfFile.drawString(STD_MARGIN, y, "Assignment: " + assignName)
y -= LINE_HEIGHT; pdfFile.drawString(STD_MARGIN, y, "Overall Sentiment: ")
y -= LINE_HEIGHT; pdfFile.drawString(STD_INDENT, y, "Polarity:")
pdfFile.setFillColor(get_polarity_color(collectiveBlob.polarity))
y -= LINE_HEIGHT; pdfFile.drawString(STD_INDENT, y, str(collectiveBlob.polarity))
pdfFile.setFillColor(COLOR_BLACK)
y -= LINE_HEIGHT; pdfFile.drawString(STD_INDENT, y, "Subjectivity:")
y -= LINE_HEIGHT; pdfFile.drawString(STD_INDENT, y, str(collectiveBlob.subjectivity))
y -= LINE_HEIGHT; pdfFile.drawString(STD_INDENT, y, "Green=Positive, Grey=Neutral, Red=Negative")
y -= LINE_HEIGHT; pdfFile.drawString(STD_INDENT, y, "Subjectivity: Scale 0=Objective, 1=Very Subjective ")
# Close current page.
pdfFile.showPage()
# Create list for output to csv.
csvList = [ [ "assignment", "user", "polarity", "subjectivity" ] ]
# Sentiment Analysis by text file/student, each student
# on their own page
for name, blob in sorted(submissionDict.iteritems()):
y = TOP_OF_PAGE; pdfFile.drawString(STD_MARGIN, y, "Student Name: {}".format(name))
y -= LINE_HEIGHT; pdfFile.drawString(STD_INDENT, y, "")
y -= LINE_HEIGHT; pdfFile.drawString(STD_INDENT, y, "Polarity:")
pdfFile.setFillColor(get_polarity_color(blob.polarity))
y -= LINE_HEIGHT; pdfFile.drawString(STD_INDENT, y, str(blob.polarity))
pdfFile.setFillColor(COLOR_BLACK)
y -= LINE_HEIGHT; pdfFile.drawString(STD_INDENT, y, "Subjectivity:")
y -= LINE_HEIGHT; pdfFile.drawString(STD_INDENT, y, str(blob.subjectivity))
y -= LINE_HEIGHT; pdfFile.drawString(STD_INDENT, y, "Green=Positive, Grey=Neutral, Red=Negative")
y -= LINE_HEIGHT; pdfFile.drawString(STD_INDENT, y, "Subjectivity: Scale 0=Objective, 1=Very Subjective")
y -= LINE_HEIGHT; pdfFile.drawString(STD_INDENT, y, "Assessments:")
y -= LINE_HEIGHT
for word in blob.sentiment_assessments.assessments:
y -= LINE_HEIGHT
if (y < STD_MARGIN):
pdfFile.showPage()
y = TOP_OF_PAGE; pdfFile.drawString(STD_MARGIN, y, "Student Name: {} continued..".format(name))
y -= 2 * LINE_HEIGHT
pdfFile.drawString(STD_INDENT, y, str(word))
# for word ...
pdfFile.showPage()
csvList.append([ assignName, name, blob.polarity, blob.subjectivity ])
# for name, blob...
pdfFile.save()
with open('output.csv','w') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(csvList)
# build_reports
# ------------------------------ Execution begins here ------------------------------
# Check that a command line argument is present.
if (len(sys.argv) != 2):
print("Usage: python sentiments_analysis.py <directory>")
exit(1)
# Check that the command line argurment is indeed a
# valid directory, and make that the working dir
if (not os.path.isdir(sys.argv[1])):
print "Directory not valid: {}".format(sys.argv[1])
exit(1)
os.chdir(sys.argv[1])
if (not os.path.isfile('collective.txt')):
print "Input file not found in directory: {}".format(sys.argv[1])
exit(1)
# Expect to find any number of text files in this dir named [n].txt
# where [n] is the unique submission id value. The first line of each
# submission file will be the associated username, the second is the
# user's formatted [lastname, firstname], and the remaining lines are
# the online text submission. Also present in the dir is a file named
# collective.txt, the first line of which has the assignment name, and
# remaining lines are the the collective submissions.
# Start with the collective file
with codecs.open('collective.txt', 'r', 'utf-8', 'strict') as infile:
lines = infile.readlines()
assignName = lines.pop(0).rstrip('\n')
collectiveBlob = TextBlob(str().join(lines).strip())
# Now the individual submissions. Store them in a dictionary keyed by
# the formatted name plus username
submissionDict = {}
for filename in glob.iglob('[0-9]*.txt'):
with codecs.open(filename, 'r', 'utf-8', 'strict') as infile:
lines = infile.readlines()
username = lines.pop(0).rstrip('\n')
formattedName = lines.pop(0).rstrip('\n')
submissionDict["{0} ({1})".format(formattedName, username)] = TextBlob(str().join(lines).strip())
# Build the .pdf, and .csv report files
build_reports(assignName, collectiveBlob, submissionDict)
exit(0)