-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathcalc_sent_scores.py
More file actions
80 lines (68 loc) · 2.31 KB
/
calc_sent_scores.py
File metadata and controls
80 lines (68 loc) · 2.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import sys
import json
import csv
import time
def loadSentimentData(filePath):
termScores = {} # initialize an empty dictionary
counter = 0
for line in filePath:
term, score = line.split("\t") # The file is tab-delimited.
termScores[term] = int(score) # Convert the score to an integer.
return termScores
def getJSONData(filePath):
jsonData = {}
counter = 1
for line in filePath:
jsonLine = json.loads(line)
jsonLineKey = jsonLine.get('review_id')
jsonData[jsonLineKey] = jsonLine
counter += 1
#print type(counter) , type(readLineMax)
#if counter > int(readLineMax):
#return jsonData
return jsonData
def calcReviewSentimentScore(termScores,jsonReviews):
scoredReviews = {}
counter = 0
for reviewID in jsonReviews:
words = jsonReviews[reviewID].get('text').split()
sentimentScore = 0
scoredReviews[reviewID] = (0,0)
for word in words:
if word in termScores.keys():
sentimentScore += termScores[word]
scoredReviews[reviewID] = (sentimentScore, reviewID)
counter += 1
#print 'Reviews processed:', counter
return scoredReviews
def writeCSV(dataDict):
with open('Data/yelp_training_set_sent_score.csv', 'wb') as csvfile:
writer = csv.writer(csvfile)
for key in dataDict:
writer.writerow(dataDict[key])
def findMin(field,jsonData):
fieldList = []
for key in jsonData:
fieldList.append(jsonData[key].get(field))
return min(fieldList)
def findMax(field,jsonData):
fieldList = []
for key in jsonData:
fieldList.append(jsonData[key].get(field))
return max(fieldList)
def main():
start_time = time.time()
print time.asctime( time.localtime(time.time()) ) + ' --- Program started'
json_file = open("Data/yelp_training_set_review.json")
jsonData = getJSONData(json_file)
sent_file = open("AFINN-111.txt")
termScores = loadSentimentData(sent_file)
print time.asctime( time.localtime(time.time()) ) + ' --- Calculating sentiment scores...'
scoredReviews = calcReviewSentimentScore(termScores,jsonData)
#print scoredReviews
print time.asctime( time.localtime(time.time()) ) + ' --- Writing csv output...'
writeCSV(scoredReviews)
execution_time = time.time() - start_time
print time.asctime( time.localtime(time.time()) ) + ' --- Program completed. Execution time (mins): ' + str(execution_time / 60)
if __name__ == '__main__':
main()