Skip to content

Commit b7e8081

Browse files
committed
👽️ Apply default sha256
1 parent 12205ce commit b7e8081

File tree

6 files changed

+494
-283
lines changed

6 files changed

+494
-283
lines changed

poetry.lock

Lines changed: 487 additions & 276 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ sphinx-rtd-theme = "^3.0.2"
7777
recommonmark = "^0.7.1"
7878
mkdocs-typer2 = "^0.1.6"
7979
pytest-asyncio = "^1.3.0"
80+
griffe = "<2"
8081

8182
[[tool.poetry.packages]]
8283
include = "memorious"

tests/test_fetch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,8 +228,8 @@ def test_fetch_archives_content(self, httpbin_url):
228228
"""Test that fetch() stores content in archive."""
229229
response = fetch(f"{httpbin_url}/json", dataset="fetch-archive")
230230
assert response.content_hash is not None
231-
# Content hash should be a SHA1 hex digest (40 chars)
232-
assert len(response.content_hash) == 40
231+
# Content hash should be a SHA256 hex digest (64 chars)
232+
assert len(response.content_hash) == 64
233233

234234

235235
class TestFetchCaching:

tests/test_http.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def test_fetch_response(self, http, httpbin_url):
134134
content_hash = context_http_response.fetch()
135135
# fetch() now returns content_hash, not file_path
136136
assert isinstance(content_hash, str)
137-
assert len(content_hash) == 40 # SHA1 hex digest length
137+
assert len(content_hash) == 64 # SHA256 hex digest length
138138

139139
def test_contenttype(self, http, httpbin_url):
140140
request = httpx.Request("GET", f"{httpbin_url}/get")
@@ -376,7 +376,7 @@ def test_fetch_with_partial_content_resumes(self, http, httpbin_url):
376376
# the 200 response will reset partial state and download full content)
377377
content_hash = response.fetch()
378378
assert content_hash is not None
379-
assert len(content_hash) == 40 # SHA1 hex digest length
379+
assert len(content_hash) == 64 # SHA256 hex digest length
380380
finally:
381381
# Cleanup
382382
if partial_path.exists():

tests/test_operations.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,6 @@ def test_directory_same_content(context, mocker, httpbin_url):
420420

421421
def test_lakehouse_default(context, mocker, httpbin_url):
422422
"""Test lakehouse store with default archive."""
423-
from ftm_lakehouse import get_lakehouse
424-
425423
url = f"{httpbin_url}/user-agent"
426424
result = context.http.get(url, headers={"User-Agent": "Memorious Test"})
427425
data = result.serialize()
@@ -440,6 +438,7 @@ def test_lakehouse_default(context, mocker, httpbin_url):
440438

441439
# Verify entity was created with origin=crawl
442440
entities_repo = get_entities(context.crawler.name)
441+
entities_repo.flush()
443442
entities = [
444443
e
445444
for e in entities_repo.query(origin="crawl")

tests/test_pipeline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def test_pipeline_auth(httpbin_auth_crawler, auth_output_dir):
113113
meta_files = [
114114
f
115115
for f in auth_output_dir.glob("**/*.json")
116-
if re.match(r"^[a-f0-9]{40}$", f.stem)
116+
if re.match(r"^[a-f0-9]{64}$", f.stem)
117117
]
118118
assert len(meta_files) > 0, "Expected metadata JSON file (named by content_hash)"
119119

0 commit comments

Comments
 (0)