-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse.py
More file actions
executable file
·60 lines (48 loc) · 2.09 KB
/
Copy pathparse.py
File metadata and controls
executable file
·60 lines (48 loc) · 2.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python
"""
Parse the given sentence using a parser such as Stanford Parser, PyStatParser or AllenNlp Parser
Takes input in the form of a file of newline seperated sentences and outputs the parse in a similar format.
The parse notation of various parsers may be significantly different.
"""
import nltk.tree
class stanford:
def parse(self, line):
from urllib.parse import urlencode, quote_plus
import bs4
import requests
q = urlencode({'query': line}, quote_via=quote_plus)
URL = "http://nlp.stanford.edu:8080/parser?" + q
r = requests.get(URL)
soup = bs4.BeautifulSoup(r.content, 'html5lib')
return ' '.join(soup.find('pre').text.split())
class pystat:
def __init__(self):
from stat_parser import Parser
self.parser = Parser()
def parse(self, line):
return ' '.join(str(self.parser.parse(line)).split())
class allen:
def __init__(self):
from allennlp.models.archival import load_archive
from allennlp.service.predictors import Predictor
archive = load_archive(
"https://s3-us-west-2.amazonaws.com/allennlp/models/elmo-constituency-parser-2018.03.14.tar.gz"
)
self.predictor = Predictor.from_archive(archive, 'constituency-parser')
def parse_allen(self, line):
return ' '.join(self.predictor.predict_json({"sentence": line}).split())
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('file', type=str,
help='input file')
parser.add_argument('-p', '--parser', type=str, choices=['pystat', 'stanford', 'allen'],
default='stanford',
help='parser to be used')
args = parser.parse_args()
sent_parser = vars()[args.parser]()
with open(args.file) as infile, open(args.file[:-4] + '_parsed_' + args.parser + args.file[-4:], 'w') as outfile:
for line in infile.readlines():
parsed = sent_parser.parse(line)
outfile.write(parsed)
outfile.write('\n')