Skip to content

Commit eb72f5d

Browse files
authored
Merge pull request #108 from alirezamika/codex/make-rule-id-deterministic-across-runs
Remove unused random string helper
2 parents 996f06e + eec3339 commit eb72f5d

2 files changed

Lines changed: 1 addition & 9 deletions

File tree

autoscraper/auto_scraper.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
FuzzyText,
1212
ResultItem,
1313
get_non_rec_text,
14-
get_random_str,
1514
normalize,
1615
text_match,
1716
unique_hashable,
@@ -294,7 +293,7 @@ def _build_stack(cls, child, url):
294293
)
295294
stack["url"] = url if is_full_url else ""
296295
stack["hash"] = hashlib.sha256(str(stack).encode("utf-8")).hexdigest()
297-
stack["stack_id"] = "rule_" + get_random_str(4)
296+
stack["stack_id"] = "rule_" + stack["hash"][:8]
298297
return stack
299298

300299
def _get_result_for_child(self, child, soup, url):

autoscraper/utils.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
from collections import OrderedDict
22

3-
import random
4-
import string
53
import unicodedata
64

75
from difflib import SequenceMatcher
@@ -24,11 +22,6 @@ def unique_hashable(hashable_items):
2422
return list(OrderedDict.fromkeys(hashable_items))
2523

2624

27-
def get_random_str(n):
28-
chars = string.ascii_lowercase + string.digits
29-
return ''.join(random.choice(chars) for i in range(n))
30-
31-
3225
def get_non_rec_text(element):
3326
return ''.join(element.find_all(text=True, recursive=False)).strip()
3427

0 commit comments

Comments
 (0)