-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmarkovify-transcripts.py
executable file
·36 lines (27 loc) · 1.06 KB
/
markovify-transcripts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
import markovify
import nltk
import re
import json
import pickle
nltk.download('averaged_perceptron_tagger')
basepath = os.path.dirname(os.path.abspath(__file__))
class POSifiedText(markovify.Text):
def word_split(self, sentence):
words = re.split(self.word_split_pattern, sentence)
words = [ "::".join(tag) for tag in nltk.pos_tag(words) ]
return words
def word_join(self, words):
sentence = " ".join(word.split("::")[0] for word in words)
return sentence
characters = ['ZACH', 'TAYLOR', 'SOOKIE', 'RORY', 'RICHARD', 'PARIS', 'MISS PATTY', \
'MICHEL', 'MAX', 'LUKE', 'LORELAI', 'LOGAN', 'LANE', 'KIRK', 'JESS', 'JASON', \
'JACKSON', 'EMILY', 'DEAN', 'CHRISTOPHER', 'all-lines']
# loop through files
for c in characters:
with open(basepath+'/text/{}.txt'.format(c)) as f:
lines = f.read()
model = POSifiedText(lines)
with open(basepath+'/text/{}-markov-model.pickle'.format(c), 'wb') as handle:
pickle.dump(model, handle)
print('{} done'.format(c))