forked from CLARIAH/grlc
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgrlc.py
More file actions
executable file
·347 lines (293 loc) · 13.6 KB
/
grlc.py
File metadata and controls
executable file
·347 lines (293 loc) · 13.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
#!/usr/bin/env python
from flask import Flask, request, jsonify, render_template
import urllib
import urllib2
import json
import StringIO
import logging
import re
import yaml
from rdflib.plugins.sparql.parser import Query
from rdflib.plugins.sparql.processor import translateQuery
from pyparsing import ParseException
import traceback
import cgi
XSD_DATATYPES = ["decimal", "float", "double", "integer", "positiveInteger", "negativeInteger", "nonPositiveInteger", "nonNegativeInteger", "long", "int", "short", "byte", "unsignedLong", "unsignedInt", "unsignedShort", "unsignedByte", "dateTime", "date", "gYearMonth", "gYear", "duration", "gMonthDay", "gDay", "gMonth", "string", "normalizedString", "token", "language", "NMTOKEN", "NMTOKENS", "Name", "NCName", "ID", "IDREFS", "ENTITY", "ENTITIES", "QName", "boolean", "hexBinary", "base64Binary", "anyURI", "notation"]
app = Flask(__name__)
def guess_endpoint_uri(rq, ru):
'''
Guesses the endpoint URI from (in this order):
- An #+endpoint decorator
- A endpoint.txt file in the repo
Otherwise assigns a default one
'''
endpoint = 'http://dbpedia.org/sparql'
# Decorator
try:
endpoint = get_metadata(rq)['endpoint']
app.logger.info("Decorator guessed endpoint: " + endpoint)
except:
# File
try:
endpoint_file_uri = ru + "endpoint.txt"
stream = urllib2.urlopen(endpoint_file_uri)
endpoint = stream.read().strip()
app.logger.info("File guessed endpoint: " + endpoint)
except:
# Default
app.logger.warning("No endpoint specified, using default ({})".format(endpoint))
return endpoint
def get_parameters(rq):
"""
?_name The variable specifies the API mandatory parameter name. The value is incorporated in the query as plain literal.
?__name The parameter name is optional.
?_name_iri The variable is substituted with the parameter value as a IRI (also: number or literal).
?_name_en The parameter value is considered as literal with the language 'en' (e.g., en,it,es, etc.).
?_name_integer The parameter value is considered as literal and the XSD datatype 'integer' is added during substitution.
?_name_prefix_datatype The parameter value is considered as literal and the datatype 'prefix:datatype' is added during substitution. The prefix must be specified according to the SPARQL syntax.
"""
variables = translateQuery(Query.parseString(rq, parseAll=True)).algebra['_vars']
## Aggregates
internal_matcher = re.compile("__agg_\d+__")
## Basil-style variables
variable_matcher = re.compile("(?P<required>[_]{1,2})(?P<name>[^_]+)_?(?P<type>[a-zA-Z0-9]+)?_?(?P<userdefined>[a-zA-Z0-9]+)?.*$")
parameters = {}
for v in variables:
if internal_matcher.match(v):
continue
match = variable_matcher.match(v)
if match :
vname = match.group('name')
vrequired = True if match.group('required') == '_' else False
vtype = 'iri'
vlang = None
vdatatype = None
mtype = match.group('type')
muserdefined = match.group('userdefined')
if mtype in ['iri','number','literal']:
vtype = mtype
elif mtype:
vtype = 'literal'
if mtype:
if mtype in XSD_DATATYPES:
vdatatype = 'xsd:{}'.format(mtype)
elif len(mtype) == 2 :
vlang = mtype
elif muserdefined :
vdatatype = '{}:{}'.format(mtype, muserdefined)
parameters[vname] = {
'original': '?{}'.format(v),
'required': vrequired,
'name': vname,
'type': vtype,
'datatype': vdatatype,
'lang': vlang
}
return parameters
def get_metadata(rq):
'''
Returns the metadata 'exp' parsed from the raw query file 'rq'
'exp' is one of: 'endpoint', 'tags', 'summary'
'''
yaml_string = "\n".join([row.lstrip('#+') for row in rq.split('\n') if row.startswith('#+')])
query_string = "\n".join([row for row in rq.split('\n') if not row.startswith('#+')])
query_metadata = yaml.load(yaml_string)
# If there is no YAML string
if query_metadata == None:
query_metadata = {}
query_metadata['query'] = query_string
try:
parsed_query = translateQuery(Query.parseString(rq, parseAll=True))
except ParseException:
app.logger.error("Could not parse query")
app.logger.error(query_string)
print traceback.print_exc()
query_metadata['type'] = parsed_query.algebra.name
if query_metadata['type'] == 'SelectQuery':
query_metadata['variables'] = parsed_query.algebra['PV']
return query_metadata
def rewrite_query(query, get_args):
parameters = get_parameters(query)
app.logger.debug("Query parameters")
app.logger.debug(parameters)
for pname, p in parameters.items():
# Get the parameter value from the GET request
v = get_args.get(pname, None)
# If the parameter has a value
if v:
# IRI
if p['type'] == 'iri':
query = query.replace(p['original'], "{}".format(v))
# A number (without a datatype)
elif p['type'] == 'number':
query = query.replace(p['original'], v)
# Literals
elif p['type'] == 'literal':
# If there is a language tag
if p['lang']:
query = query.replace(p['original'], "\"{}\"@{}".format(v, p['lang']))
elif p['datatype']:
query = query.replace(p['original'], "\"{}\"^^{}".format(v, p['datatype']))
else:
query = query.replace(p['original'], "\"{}\"".format(v))
app.logger.debug("Query rewritten as: " + query)
return query
@app.route('/')
def hello():
return render_template('index.html')
@app.route('/<user>/<repo>/<query>', methods=['GET'])
def query(user, repo, query):
app.logger.debug("Got request at /" + user + "/" + repo + "/" + query)
app.logger.debug("Request accept header: " +request.headers["Accept"])
raw_repo_uri = 'https://raw.githubusercontent.com/' + user + '/' + repo + '/master/'
raw_query_uri = raw_repo_uri + query + '.rq'
stream = urllib2.urlopen(raw_query_uri)
raw_query = stream.read()
endpoint = guess_endpoint_uri(raw_query, raw_repo_uri)
app.logger.debug("Sending query to endpoint: " + endpoint)
query = rewrite_query(raw_query, request.args)
# Preapre HTTP request
headers = {
'Accept' : request.headers['Accept']
}
data = {
'query' : query
}
data_encoded = urllib.urlencode(data)
req = urllib2.Request(endpoint, data_encoded, headers)
app.logger.debug("Sending request: " + req.get_full_url() + "?" + req.get_data())
response = urllib2.urlopen(req)
return response.read()
@app.route('/<user>/<repo>/api-docs')
def api_docs(user, repo):
return render_template('api-docs.html', user=user, repo=repo)
@app.route('/<user>/<repo>/spec')
def swagger_spec(user, repo):
app.logger.debug("Generating swagger spec for /" + user + "/" + repo)
api_repo_uri = 'https://api.github.com/repos/' + user + '/' + repo
stream = urllib2.urlopen(api_repo_uri)
resp = json.load(stream)
swag = {}
swag['swagger'] = '2.0'
swag['info'] = {'version': '1.0', 'title': resp['name'], 'contact': {'name': resp['owner']['login'], 'url': resp['owner']['html_url']}, 'license': {'name' : 'License', 'url': 'https://raw.githubusercontent.com/' + user + '/' + repo + '/master/LICENSE'}}
swag['host'] = app.config['SERVER_NAME']
swag['basePath'] = '/' + user + '/' + repo + '/'
swag['schemes'] = ['http']
swag['paths'] = {}
api_repo_content_uri = api_repo_uri + '/contents'
stream = urllib2.urlopen(api_repo_content_uri)
resp = json.load(stream)
# Fetch all .rq files
for c in resp:
if ".rq" in c['name']:
call_name = c['name'].split('.')[0]
# Retrieve extra metadata from the query decorators
raw_repo_uri = 'https://raw.githubusercontent.com/' + user + '/' + repo + '/master/'
raw_query_uri = raw_repo_uri + c['name']
stream = urllib2.urlopen(raw_query_uri)
resp = stream.read()
try:
query_metadata = get_metadata(resp)
except Exception as e:
app.logger.error("Could not parse query " + raw_query_uri)
app.logger.error(e)
continue
tags = query_metadata['tags'] if 'tags' in query_metadata else []
app.logger.debug("Read query tags: " + ', '.join(tags))
summary = query_metadata['summary'] if 'summary' in query_metadata else ""
app.logger.debug("Read query summary: " + summary)
description = query_metadata['description'] if 'description' in query_metadata else ""
app.logger.debug("Read query description: " + description)
endpoint = query_metadata['endpoint'] if 'endpoint' in query_metadata else ""
app.logger.debug("Read query endpoint: " + endpoint)
try:
parameters = get_parameters(query_metadata['query'])
except Exception as e:
print traceback.print_exc()
app.logger.error("Could not parse parameters")
continue
app.logger.debug("Read parameters")
app.logger.debug(parameters)
# TODO: do something intelligent with the parameters!
params = []
for v, p in parameters.items():
param = {}
param['name'] = p['name']
param['type'] = "string"
param['required'] = p['required']
param['in'] = "query"
param['description'] = "A value of type {} that will substitute {} in the original query".format(p['type'], p['original'])
params.append(param)
item_properties = {}
if query_metadata['type'] != 'SelectQuery':
# TODO: Turn this into a nicer thingamajim
app.logger.warning("This is not a SelectQuery, don't really know what to do!")
summary += "WARNING: non-SELECT queries are not really treated properly yet"
# just continue with empty item_properties
else:
# We now know it is a SELECT query
for pv in query_metadata['variables']:
i = {
"name": pv,
"type": "object",
"required": ["type", "value"],
"properties": {
"type": {
"type": "string"
},
"value": {
"type": "string"
},
"xml:lang": {
"type": "string"
},
"datatype": {
"type": "string"
}
}
}
item_properties[pv] = i
swag['paths'][call_name] = {}
swag['paths'][call_name]["get"] = {"tags" : tags,
"summary" : summary,
"description" : description + "\n<pre>\n{}\n</pre>".format(cgi.escape(query_metadata['query'])),
"produces" : ["text/csv", "application/json", "text/html"],
"parameters": params,
"responses": {
"200" : {
"description" : "SPARQL query response",
"schema" : {
"type" : "array",
"items": {
"type": "object",
"properties": item_properties
},
}
},
"default" : {
"description" : "Unexpected error",
"schema" : {
"$ref" : "#/definitions/Message"
}
}
}
}
return jsonify(swag)
# DEPRECATED
# Do something on github pushes?
# @app.route('/sparql', methods = ['POST'])
# def sparql():
# push = json.loads(request.data)
# # One push may contain many commits
# for c in push['commits']:
# # We only look for .rq files
# for a in c['added']:
# if '.rq' in a:
# # New query added
# add_query(push['repository']['full_name'], a)
# print c['added']
# print c['removed']
# print c['modified']
# return 'foo'
if __name__ == '__main__':
app.run(port=8088, debug=True)