Skip to content

Commit 70da302

Browse files
committed
Added check for uniqueness of domain_id, with check before search or update.
1 parent b56d1dc commit 70da302

File tree

3 files changed

+23
-5
lines changed

3 files changed

+23
-5
lines changed

conf/sphinx/sphinx.conf

+5
Original file line numberDiff line numberDiff line change
@@ -161,13 +161,18 @@ def get_domain_id(domain):
161161
# Split domains by comma and prepare source/index for this domain:
162162
# Input data /data/<domain>/search.tsv
163163
domains = domains.split(',')
164+
domain_ids = {}
164165

165166
index_config = ''
166167

167168
for domain in domains:
168169
# continue
169170
domain_config = ''
170171
domain_id = get_domain_id(domain)
172+
# Check uniqueness and skip duplicates
173+
if domain_id in domain_ids:
174+
continue
175+
domain_ids[domain_id] = domain
171176
global_search_cols = ''
172177
format_args = {
173178
'domain': domain,

sphinx-reindex.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ if [ ! "$DOMAINS" = "" ]; then
1414
for domain in $list_domain
1515
do
1616
if [ ! -f /data/$domain/search.tsv ]; then
17-
echo "Downloading $domain"
17+
echo "Downloading $domain/search.tsv"
1818
mkdir -p /data/$domain/
1919
curl -Ls http://$domain/search.tsv -o /data/$domain/search.tsv
2020
REINDEX=1

web/websearch.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,14 @@
3535

3636
# Split domains by comma and prepare source/index for this domain:
3737
# Input data /data/<domain>/search.tsv
38+
# Prepare domain IDs
3839
domains = domains.split(',')
40+
domain_ids = {}
41+
for domain in domains:
42+
# Check uniqueness and skip duplicates
43+
if domain_id in domain_ids.values():
44+
continue
45+
domain_ids[domain] = domain_id
3946

4047

4148
# Return maximal number of results
@@ -416,7 +423,7 @@ def displayName():
416423
"""
417424
@app.route('/search')
418425
def search():
419-
global domains
426+
global domains, domain_ids
420427
code = 400
421428

422429
data = {'query': '', 'route': '/search', 'template': 'answer.html'}
@@ -426,7 +433,10 @@ def search():
426433
if domain not in domains:
427434
data['result'] = {'error': 'Domain not allowed!'}
428435
return formatResponse(data, 403)
429-
domain_id = get_domain_id(domain)
436+
if domain not in domain_ids:
437+
data['result'] = {'error': 'Duplicated domain is skipped!'}
438+
return formatResponse(data, 404)
439+
domain_id = domain_ids[domain]
430440
data['domain'] = domain
431441

432442
index = 'search_{}_index'.format(domain_id)
@@ -505,15 +515,18 @@ def search():
505515
"""
506516
@app.route('/update/<path:domain>', methods=['POST'])
507517
def update(domain):
508-
global domains
518+
global domains, domain_ids
509519
data = {'route': '/update', 'template': None}
510520

511521
domain = unquote(domain)
512522
if domain not in domains:
513523
data['result'] = {'error': 'Domain not allowed!'}
514524
return formatResponse(data, 403)
525+
if domain not in domain_ids:
526+
data['result'] = {'error': 'Duplicated domain is skipped!'}
527+
return formatResponse(data, 404)
515528

516-
domain_id = get_domain_id(domain).encode('utf-8')
529+
domain_id = domain_ids[domain].encode('utf-8')
517530
data['domain'] = domain.encode('utf-8')
518531
data['protocol'] = 'http'
519532
if request.args.get('https', None):

0 commit comments

Comments
 (0)