21
21
import datetime
22
22
import time
23
23
import sys
24
+ import MySQLdb
24
25
25
26
26
27
app = Flask (__name__ , template_folder = 'templates/' )
@@ -166,16 +167,165 @@ def process_query(index, query, query_filter, start=0, count=0):
166
167
167
168
168
169
# ---------------------------------------------------------
169
- def prepareResultJson (result , query_filter ):
170
- from pprint import pprint
170
+ """
171
+ Process query to Sphinx searchd with mysql
172
+ """
173
+ def process_query_mysql (index , query , query_filter , start = 0 , count = 0 ):
174
+ global SEARCH_MAX_COUNT , SEARCH_DEFAULT_COUNT
175
+ # default server configuration
176
+ host = '127.0.0.1'
177
+ port = 9306
178
+ if getenv ('WEBSEARCH_SERVER' ):
179
+ host = getenv ('WEBSEARCH_SERVER' )
180
+ if getenv ('WEBSEARCH_SERVER_PORT' ):
181
+ port = int (getenv ('WEBSEARCH_SERVER_PORT' ))
182
+
183
+ try :
184
+ db = MySQLdb .connect (host = host , port = port , user = 'root' )
185
+ cursor = db .cursor ()
186
+ except Exception as ex :
187
+ result = {
188
+ 'total_found' : 0 ,
189
+ 'matches' : [],
190
+ 'message' : str (ex ),
191
+ 'status' : False ,
192
+ 'count' : 0 ,
193
+ 'startIndex' : start ,
194
+ }
195
+ return False , result
196
+
197
+ if count == 0 :
198
+ count = SEARCH_DEFAULT_COUNT
199
+ count = min (SEARCH_MAX_COUNT , count )
171
200
201
+ argsFilter = []
202
+ whereFilter = []
203
+
204
+ # Prepare query
205
+ whereFilter .append ('MATCH(%s)' )
206
+ argsFilter .append (query )
207
+
208
+ # Prepare filter for query
209
+ for f in ['date' , 'type' , 'lang' , 'tags' ]:
210
+ if query_filter [f ] is None :
211
+ continue
212
+ inList = []
213
+ for val in query_filter [f ]:
214
+ argsFilter .append (val )
215
+ inList .append ('%s' )
216
+ # Creates where condition: f in (%s, %s, %s...)
217
+ whereFilter .append ('{} in ({})' .format (f , ', ' .join (inList )))
218
+
219
+ sortBy = []
220
+ # Prepare sorting by custom or default
221
+ if query_filter ['sortBy' ] is not None :
222
+ for attr in query_filter ['sortBy' ]:
223
+ attr = attr .split ('-' )
224
+ # List of supported sortBy columns - to prevent SQL injection
225
+ if attr [0 ] not in ('date' , 'lang' , 'type' , 'weight' , 'id' ):
226
+ print >> sys .stderr , 'Invalid sortBy column ' + attr [0 ]
227
+ continue
228
+ asc = 'ASC'
229
+ if len (attr ) > 1 and (attr [1 ] == 'desc' or attr [1 ] == 'DESC' ):
230
+ asc = 'DESC'
231
+ sortBy .append ('{} {}' .format (attr [0 ], asc ))
232
+
233
+ if len (sortBy ) == 0 :
234
+ sortBy .append ('weight DESC' )
235
+
236
+ # Prepare date filtering in where clause
237
+ datestart = 0
238
+ dateend = 0
239
+ try :
240
+ de = datetime .datetime .utcnow ().utctimetuple ()
241
+ dateend = int (time .mktime (de ))
242
+ if query_filter ['datestart' ] is not None :
243
+ ds = iso8601 .parse_date (query_filter ['datestart' ]).utctimetuple ()
244
+ datestart = int (time .mktime (ds ))
245
+ if query_filter ['dateend' ] is not None :
246
+ de = iso8601 .parse_date (query_filter ['dateend' ]).utctimetuple ()
247
+ dateend = int (time .mktime (de ))
248
+
249
+ if datestart > 0 :
250
+ whereFilter .append ('%s < date_filter' )
251
+ argsFilter .append (datestart )
252
+ if dateend > 0 :
253
+ whereFilter .append ('date_filter < %s' )
254
+ argsFilter .append (dateend )
255
+ except Exception as ex :
256
+ print >> sys .stderr , 'Cannot prepare filter range on date: ' + str (ex ) + str (query_filter )
257
+ pass
258
+
259
+ # Field weights and other options
260
+ # ranker=expr('sum(lcs*user_weight)*1000+bm25') == SPH_RANK_PROXIMITY_BM25
261
+ # ranker=expr('sum((4*lcs+2*(min_hit_pos==1)+exact_hit)*user_weight)*1000+bm25') == SPH_RANK_SPH04
262
+ # ranker=expr('sum((4*lcs+2*(min_hit_pos==1)+100*exact_hit)*user_weight)*1000+bm25') == SPH_RANK_SPH04 boosted with exact_hit
263
+ # select @weight+IF(fieldcrc==$querycrc,10000,0) AS weight
264
+ option = "field_weights = (title = 500, content = 1), ranker = sph04, retry_count = 3, retry_delay = 200"
265
+ sql = "SELECT WEIGHT() as weight, * FROM {} WHERE {} ORDER BY {} LIMIT %s, %s OPTION {};" .format (
266
+ index ,
267
+ ' AND ' .join (whereFilter ),
268
+ ', ' .join (sortBy ),
269
+ option
270
+ )
271
+
272
+ status = True
273
+ result = {
274
+ 'total_found' : 0 ,
275
+ 'matches' : [],
276
+ 'message' : None ,
277
+ }
278
+
279
+ try :
280
+ args = argsFilter + [start , count ]
281
+ q = cursor .execute (sql , args )
282
+ pprint ([sql , args , cursor ._last_executed , q ])
283
+ desc = cursor .description
284
+ matches = []
285
+ for row in cursor :
286
+ match = {
287
+ 'weight' : 0 ,
288
+ 'attrs' : {},
289
+ 'id' : 0 ,
290
+ }
291
+ for (name , value ) in zip (desc , row ):
292
+ col = name [0 ]
293
+ if col == 'id' :
294
+ match ['id' ] = value
295
+ elif col == 'weight' :
296
+ match ['weight' ] = value
297
+ else :
298
+ match ['attrs' ][col ] = value
299
+ matches .append (match )
300
+ # ~ for row in cursor
301
+ result ['matches' ] = matches
302
+
303
+ q = cursor .execute ('SHOW META LIKE %s' , ('total_found' ,))
304
+ for row in cursor :
305
+ result ['total_found' ] = row [1 ]
306
+ except Exception as ex :
307
+ status = False
308
+ result ['message' ] = str (ex )
309
+
310
+ result ['count' ] = count
311
+ result ['startIndex' ] = start
312
+ result ['status' ] = status
313
+ return status , prepareResultJson (result , query_filter )
314
+
315
+
316
+
317
+ # ---------------------------------------------------------
318
+ def prepareResultJson (result , query_filter ):
172
319
count = result ['count' ]
173
320
response = {
174
321
'results' : [],
175
322
'startIndex' : result ['startIndex' ],
176
323
'count' : count ,
177
324
'totalResults' : result ['total_found' ],
178
325
}
326
+ if 'message' in result and result ['message' ]:
327
+ response ['message' ] = result ['message' ]
328
+
179
329
for row in result ['matches' ]:
180
330
r = row ['attrs' ]
181
331
res = {'rank' : row ['weight' ], 'id' : row ['id' ]}
@@ -283,6 +433,8 @@ def search():
283
433
vl = request .args .getlist (f )
284
434
if len (vl ) == 1 :
285
435
v = vl [0 ].encode ('utf-8' )
436
+ # This argument can be list separated by comma
437
+ v = v .split (',' )
286
438
elif len (vl ) > 1 :
287
439
v = [x .encode ('utf-8' ) for x in vl ]
288
440
if v is None :
@@ -307,7 +459,7 @@ def search():
307
459
data ['url' ] = request .url
308
460
309
461
rc = False
310
- rc , result = process_query (index , q , query_filter , start , count )
462
+ rc , result = process_query_mysql (index , q , query_filter , start , count )
311
463
if rc :
312
464
code = 200
313
465
@@ -317,6 +469,8 @@ def search():
317
469
args = dict (request .args )
318
470
if 'startIndex' in args :
319
471
del (args ['startIndex' ])
472
+ if 'count' in result :
473
+ args ['count' ] = result ['count' ]
320
474
# pprint(request.url)
321
475
322
476
data ['previous_page_url' ] = data ['next_page_url' ] = '#'
0 commit comments