Skip to content

Commit 5a1b797

Browse files
authored
Merge pull request #42 from ycharts/fix-score-deserialize
Fix score deserialize
2 parents 20e89f5 + bc08228 commit 5a1b797

File tree

2 files changed

+32
-22
lines changed

2 files changed

+32
-22
lines changed

autocompleter/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
VERSION = (1, 1, 0)
1+
VERSION = (1, 1, 1)
2+
23

34
from autocompleter.registry import registry, signal_registry
45
from autocompleter.base import (

autocompleter/base.py

+30-21
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,14 @@ def _serialize_data(cls, data):
5353

5454
@classmethod
5555
def _deserialize_data(cls, raw):
56-
return json.loads(raw.decode("utf-8"))
56+
# The scores that are inserted into Redis are actually 1/score. On the other hand, we
57+
# have securities which have their score set to 0. When that happens, we store the score
58+
# as inf, which Redis knows about and can handle, but the JSON spec does not so we get
59+
# an error when trying to deserialize it.
60+
# Here, we check to see if the redis stored value is b"inf" and only deserialize it when it's
61+
# not. If it is, we leave it as "inf"
62+
63+
return json.loads(raw.decode("utf-8")) if raw != b"inf" else "inf"
5764

5865
@staticmethod
5966
def _get_prefixes_set(norm_terms_list):
@@ -1249,14 +1256,7 @@ def _facet_list_to_set(facet_list):
12491256
scores_map_key = SCORE_MAP_BASE_NAME % provider_name
12501257
scores_db_map = {}
12511258
for obj_id, score in REDIS.hgetall(scores_map_key).items():
1252-
# The scores that are inserted into Redis are actually 1/score. On the other hand, we
1253-
# have securities which have their score set to 0. When that happens, we store the score
1254-
# as inf, which Redis knows about and can handle, but the JSON spec does not so we get
1255-
# an error when trying to deserialize it.
1256-
# Here, we check to see if the redis stored is b"inf" and only deserialize it when it's
1257-
# not. If it is, we leave it as "inf" because later we convert it into float and can
1258-
# handle it normally
1259-
parsed_score = self._deserialize_data(score) if score != b"inf" else "inf"
1259+
parsed_score = self._deserialize_data(score)
12601260
obj_id = str(obj_id.decode("utf-8"))
12611261
scores_db_map[obj_id] = float(parsed_score)
12621262

@@ -1294,12 +1294,15 @@ def _facet_list_to_set(facet_list):
12941294
if len(term.split(" ")) <= max_word_count:
12951295
exact_sorted_set_key = EXACT_BASE_NAME % (provider_name, term)
12961296
pipe.zadd(exact_sorted_set_key, {obj_id: scores_live_map[obj_id]})
1297-
self.log.info(f"Added 1 entry to {exact_sorted_set_key}")
1298-
# Terms in the DB but not in the live set are terms that got removed
1299-
for term in db_obj_terms - live_obj_terms:
1297+
self.log.info(f"Added {len(terms_to_add)} entries to {EXACT_BASE_NAME}")
1298+
# Terms in tterms_to_removehe DB but not in the live set are terms that got removed
1299+
terms_to_remove = db_obj_terms - live_obj_terms
1300+
for term in terms_to_remove:
13001301
exact_sorted_set_key = EXACT_BASE_NAME % (provider_name, term)
13011302
pipe.zrem(exact_sorted_set_key, obj_id)
1302-
self.log.info(f"Removed 1 entry from {exact_sorted_set_key}")
1303+
self.log.info(
1304+
f"Removed {len(terms_to_remove)} entries from {EXACT_BASE_NAME}"
1305+
)
13031306

13041307
# Repeat the same logic for prefixes
13051308
live_obj_prefixes = frozenset(
@@ -1318,13 +1321,16 @@ def _facet_list_to_set(facet_list):
13181321
for prefix in prefixes_to_add:
13191322
prefix_sorted_set_key = PREFIX_BASE_NAME % (provider_name, prefix)
13201323
pipe.zadd(prefix_sorted_set_key, {obj_id: scores_live_map[obj_id]})
1321-
self.log.info(f"Added 1 entry to {prefix_sorted_set_key}")
1324+
self.log.info(f"Added {len(prefixes_to_add)} entries to {PREFIX_BASE_NAME}")
13221325

13231326
# Prefixes in the DB but not in the live set are prefixes that got removed
1324-
for prefix in db_obj_prefixes - live_obj_prefixes:
1327+
prefixes_to_remove = db_obj_prefixes - live_obj_prefixes
1328+
for prefix in prefixes_to_remove:
13251329
prefix_sorted_set_key = PREFIX_BASE_NAME % (provider_name, prefix)
13261330
pipe.zrem(prefix_sorted_set_key, obj_id)
1327-
self.log.info(f"Removed 1 entry to {prefix_sorted_set_key}")
1331+
self.log.info(
1332+
f"Removed {len(prefixes_to_remove)} entries to {PREFIX_BASE_NAME}"
1333+
)
13281334

13291335
# Update exact terms sets
13301336
# Build a single set of all terms in each data set
@@ -1391,11 +1397,14 @@ def _facet_list_to_set(facet_list):
13911397
for key, value in facets_to_add:
13921398
facet_sorted_set_key = FACET_SET_BASE_NAME % (provider_name, key, value)
13931399
pipe.zadd(facet_sorted_set_key, {obj_id: scores_live_map[obj_id]})
1394-
self.log.info(f"Added 1 entry to {facet_sorted_set_key}")
1395-
for key, value in db_obj_facets - live_obj_facets:
1400+
self.log.info(f"Added {len(facets_to_add)} entries to {FACET_BASE_NAME}")
1401+
facets_to_remove = db_obj_facets - live_obj_facets
1402+
for key, value in facets_to_remove:
13961403
facet_sorted_set_key = FACET_SET_BASE_NAME % (provider_name, key, value)
13971404
pipe.zrem(facet_sorted_set_key, obj_id)
1398-
self.log.info(f"Removed 1 entry to {facet_sorted_set_key}")
1405+
self.log.info(
1406+
f"Removed {len(facets_to_remove)} entries to {FACET_SET_BASE_NAME}"
1407+
)
13991408

14001409
# Bulk update the facets hash map with all needed facets in a single operation
14011410
if facets_with_updates := facets_live_set - facets_db_set:
@@ -1439,10 +1448,10 @@ def _facet_list_to_set(facet_list):
14391448
obj_id: scores_live_map[obj_id] for obj_id in objs_with_updated_scores
14401449
}:
14411450
pipe.hset(scores_map_key, mapping=updated_scores)
1442-
self.log.info(f"Added {len(updated_scores)} entries to {updated_scores}")
1451+
self.log.info(f"Added {len(updated_scores)} entries to {scores_map_key}")
14431452
if objs_removed := set(scores_db_map.keys()) - set(scores_live_map.keys()):
14441453
pipe.hdel(scores_map_key, *objs_removed)
1445-
self.log.info(f"Removed {len(objs_removed)} entries from {objs_removed}")
1454+
self.log.info(f"Removed {len(objs_removed)} entries from {scores_map_key}")
14461455
# Execute all the additions and deletions in a single connection
14471456
pipe.execute()
14481457
self.log.info(f"End update of provider {provider_name}")

0 commit comments

Comments
 (0)