forked from ronilp/Finding-Influencers-in-Social-Networks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutilities.py
32 lines (29 loc) · 1.08 KB
/
utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import re
url = "https://graph.facebook.com"
access_token = "CAACEdEose0cBAJw4PuZBNWC4BnoT3yEsv0TOVszOxipHLk6x8GVLzKfW9ZBmoT1ZBturw7jGS6jt2cmanDsexxwIPZBoh2A7z9ny4hAPvepzB6RWTmjZA96LCuZBEvuKFRdA0wJkjSepkPFAF1mDToWBZBJk6BTkRFrOM3f6NN1xwGZCe595NWrw2z5tzZCeh6BWJrvb5t8xytXZAnOW53oVbf9jqoVO4eSB0ZD"
KVAL = 7
PID = 449082841792177
MAXOUT = 3
def getAppendString(document):
string = ''
for key in document:
if not document[key] or key == '_id' or key == 'were_here_count':
continue
elif key == 'posts':
for post in document[key]:
if not post:
continue
string += post.strip().lower() +' '
else:
string += document[key].strip().lower() + ' '
string = re.sub(r"(?:\@|https?\://)\S+",' ',string,flags=re.MULTILINE)
string = re.sub(r"[^\w]"," ", string,flags=re.MULTILINE)
string = {'_id' :document['_id'], 'data' : stopWordRemoval(string)}
return string
def stopWordRemoval(document):
stopWords = map(str.strip,open('stopwords.txt').readlines())
string = []
for word in document.split():
if word not in stopWords:
string.append(word)
return ' '.join(string)