-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtxtToPickle.py
More file actions
31 lines (25 loc) · 1.08 KB
/
txtToPickle.py
File metadata and controls
31 lines (25 loc) · 1.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# conda install -c conda-forge pdfminer3k
from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams, LTTextBox, LTTextLine
import numpy as np
import glob, os, pickle
import math
def txtToTextDict(filename):
textDict = []
datafiles = sorted(glob.glob(filename+'*.txt'))
for txt in datafiles:
fp = open(txt, 'rb')
extracted_text = fp.read()
textDict.append(extracted_text)
print(textDict)
return textDict
f1 = '/Users/liammeier/moral-reasoning/AppliedConsequentialism/'
f2 = '/Users/liammeier/moral-reasoning/AppliedDeontology/'
f3 = '/Users/liammeier/moral-reasoning/NewAppliedVirtue/'
f4 = '/Users/liammeier/moral-reasoning/ControlArticles/'
#pickle.dump(txtToTextDict(f1), open("consPapersNewest.pkl", "wb"))
#pickle.dump(txtToTextDict(f2), open("deonPapersNewest.pkl", "wb"))
#pickle.dump(txtToTextDict(f3), open("virtuePapers.pkl", "wb"))
pickle.dump(txtToTextDict(f4), open("controlPapers.pkl", "wb"))