Skip to content

Commit 08cc23d

Browse files
author
MartinMikita
committed
Added filter with list of types/date/lang/tags with multiple arguments.
Rewritten SphinxSearch function to use SphinxQL with mysql41 protocol. (#9)
1 parent 50e6c6d commit 08cc23d

File tree

3 files changed

+161
-4
lines changed

3 files changed

+161
-4
lines changed

Dockerfile

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ RUN apt-get -qq update && apt-get install -qq -y --no-install-recommends \
1414
python-crypto \
1515
python-flask \
1616
python-pil \
17+
python-mysqldb \
1718
unixodbc \
1819
uwsgi \
1920
uwsgi-plugin-python \

conf/sphinx/sphinx.conf

+3-1
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,10 @@ indexer
262262
263263
searchd
264264
{
265-
listen = 9312
265+
listen = 127.0.0.1:9312
266+
listen = 127.0.0.1:9306:mysql41
266267
log = /var/log/sphinxsearch/searchd.log
268+
query_log_format = sphinxql
267269
query_log = /var/log/sphinxsearch/query.log
268270
read_timeout = 5
269271
client_timeout = 300

web/websearch.py

+157-3
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import datetime
2222
import time
2323
import sys
24+
import MySQLdb
2425

2526

2627
app = Flask(__name__, template_folder='templates/')
@@ -166,16 +167,165 @@ def process_query(index, query, query_filter, start=0, count=0):
166167

167168

168169
# ---------------------------------------------------------
169-
def prepareResultJson(result, query_filter):
170-
from pprint import pprint
170+
"""
171+
Process query to Sphinx searchd with mysql
172+
"""
173+
def process_query_mysql(index, query, query_filter, start=0, count=0):
174+
global SEARCH_MAX_COUNT, SEARCH_DEFAULT_COUNT
175+
# default server configuration
176+
host = '127.0.0.1'
177+
port = 9306
178+
if getenv('WEBSEARCH_SERVER'):
179+
host = getenv('WEBSEARCH_SERVER')
180+
if getenv('WEBSEARCH_SERVER_PORT'):
181+
port = int(getenv('WEBSEARCH_SERVER_PORT'))
182+
183+
try:
184+
db = MySQLdb.connect(host=host, port=port, user='root')
185+
cursor = db.cursor()
186+
except Exception as ex:
187+
result = {
188+
'total_found': 0,
189+
'matches': [],
190+
'message': str(ex),
191+
'status': False,
192+
'count': 0,
193+
'startIndex': start,
194+
}
195+
return False, result
196+
197+
if count == 0:
198+
count = SEARCH_DEFAULT_COUNT
199+
count = min(SEARCH_MAX_COUNT, count)
171200

201+
argsFilter = []
202+
whereFilter = []
203+
204+
# Prepare query
205+
whereFilter.append('MATCH(%s)')
206+
argsFilter.append(query)
207+
208+
# Prepare filter for query
209+
for f in ['date', 'type', 'lang', 'tags']:
210+
if query_filter[f] is None:
211+
continue
212+
inList = []
213+
for val in query_filter[f]:
214+
argsFilter.append(val)
215+
inList.append('%s')
216+
# Creates where condition: f in (%s, %s, %s...)
217+
whereFilter.append('{} in ({})'.format(f, ', '.join(inList)))
218+
219+
sortBy = []
220+
# Prepare sorting by custom or default
221+
if query_filter['sortBy'] is not None:
222+
for attr in query_filter['sortBy']:
223+
attr = attr.split('-')
224+
# List of supported sortBy columns - to prevent SQL injection
225+
if attr[0] not in ('date', 'lang', 'type', 'weight', 'id'):
226+
print >> sys.stderr, 'Invalid sortBy column ' + attr[0]
227+
continue
228+
asc = 'ASC'
229+
if len(attr) > 1 and (attr[1] == 'desc' or attr[1] == 'DESC'):
230+
asc = 'DESC'
231+
sortBy.append('{} {}'.format(attr[0], asc))
232+
233+
if len(sortBy) == 0:
234+
sortBy.append('weight DESC')
235+
236+
# Prepare date filtering in where clause
237+
datestart = 0
238+
dateend = 0
239+
try:
240+
de = datetime.datetime.utcnow().utctimetuple()
241+
dateend = int(time.mktime(de))
242+
if query_filter['datestart'] is not None:
243+
ds = iso8601.parse_date(query_filter['datestart']).utctimetuple()
244+
datestart = int(time.mktime(ds))
245+
if query_filter['dateend'] is not None:
246+
de = iso8601.parse_date(query_filter['dateend']).utctimetuple()
247+
dateend = int(time.mktime(de))
248+
249+
if datestart > 0:
250+
whereFilter.append('%s < date_filter')
251+
argsFilter.append(datestart)
252+
if dateend > 0:
253+
whereFilter.append('date_filter < %s')
254+
argsFilter.append(dateend)
255+
except Exception as ex:
256+
print >> sys.stderr, 'Cannot prepare filter range on date: ' + str(ex) + str(query_filter)
257+
pass
258+
259+
# Field weights and other options
260+
# ranker=expr('sum(lcs*user_weight)*1000+bm25') == SPH_RANK_PROXIMITY_BM25
261+
# ranker=expr('sum((4*lcs+2*(min_hit_pos==1)+exact_hit)*user_weight)*1000+bm25') == SPH_RANK_SPH04
262+
# ranker=expr('sum((4*lcs+2*(min_hit_pos==1)+100*exact_hit)*user_weight)*1000+bm25') == SPH_RANK_SPH04 boosted with exact_hit
263+
# select @weight+IF(fieldcrc==$querycrc,10000,0) AS weight
264+
option = "field_weights = (title = 500, content = 1), ranker = sph04, retry_count = 3, retry_delay = 200"
265+
sql = "SELECT WEIGHT() as weight, * FROM {} WHERE {} ORDER BY {} LIMIT %s, %s OPTION {};".format(
266+
index,
267+
' AND '.join(whereFilter),
268+
', '.join(sortBy),
269+
option
270+
)
271+
272+
status = True
273+
result = {
274+
'total_found': 0,
275+
'matches': [],
276+
'message': None,
277+
}
278+
279+
try:
280+
args = argsFilter + [start, count]
281+
q = cursor.execute(sql, args)
282+
pprint([sql, args, cursor._last_executed, q])
283+
desc = cursor.description
284+
matches = []
285+
for row in cursor:
286+
match = {
287+
'weight' : 0,
288+
'attrs' : {},
289+
'id' : 0,
290+
}
291+
for (name, value) in zip(desc, row):
292+
col = name[0]
293+
if col == 'id':
294+
match['id'] = value
295+
elif col == 'weight':
296+
match['weight'] = value
297+
else:
298+
match['attrs'][col] = value
299+
matches.append(match)
300+
# ~ for row in cursor
301+
result['matches'] = matches
302+
303+
q = cursor.execute('SHOW META LIKE %s', ('total_found',))
304+
for row in cursor:
305+
result['total_found'] = row[1]
306+
except Exception as ex:
307+
status = False
308+
result['message'] = str(ex)
309+
310+
result['count'] = count
311+
result['startIndex'] = start
312+
result['status'] = status
313+
return status, prepareResultJson(result, query_filter)
314+
315+
316+
317+
# ---------------------------------------------------------
318+
def prepareResultJson(result, query_filter):
172319
count = result['count']
173320
response = {
174321
'results': [],
175322
'startIndex': result['startIndex'],
176323
'count': count,
177324
'totalResults': result['total_found'],
178325
}
326+
if 'message' in result and result['message']:
327+
response['message'] = result['message']
328+
179329
for row in result['matches']:
180330
r = row['attrs']
181331
res = {'rank': row['weight'], 'id': row['id']}
@@ -283,6 +433,8 @@ def search():
283433
vl = request.args.getlist(f)
284434
if len(vl) == 1:
285435
v = vl[0].encode('utf-8')
436+
# This argument can be list separated by comma
437+
v = v.split(',')
286438
elif len(vl) > 1:
287439
v = [x.encode('utf-8') for x in vl]
288440
if v is None:
@@ -307,7 +459,7 @@ def search():
307459
data['url'] = request.url
308460

309461
rc = False
310-
rc, result = process_query(index, q, query_filter, start, count)
462+
rc, result = process_query_mysql(index, q, query_filter, start, count)
311463
if rc:
312464
code = 200
313465

@@ -317,6 +469,8 @@ def search():
317469
args = dict(request.args)
318470
if 'startIndex' in args:
319471
del(args['startIndex'])
472+
if 'count' in result:
473+
args['count'] = result['count']
320474
# pprint(request.url)
321475

322476
data['previous_page_url'] = data['next_page_url'] = '#'

0 commit comments

Comments
 (0)