Skip to content

Commit 7e8ea2e

Browse files
committed
🔥 (operations) Remove cleanup_archive
1 parent de6eeb7 commit 7e8ea2e

File tree

3 files changed

+0
-43
lines changed

3 files changed

+0
-43
lines changed

docs/reference/operations.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,6 @@ Operations for storing data.
8888

8989
::: memorious.operations.store.lakehouse
9090

91-
::: memorious.operations.store.cleanup_archive
92-
9391
---
9492

9593
## Debug

memorious/operations/store.py

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -378,36 +378,6 @@ def lakehouse(context: Context, data: dict[str, Any]) -> None:
378378
context.emit(data=data)
379379

380380

381-
@register("cleanup_archive")
382-
def cleanup_archive(context: Context, data: dict[str, Any]) -> None:
383-
"""Remove a blob from the archive.
384-
385-
Deletes a file from the archive after processing is complete.
386-
Useful for cleaning up temporary files.
387-
388-
Args:
389-
context: The crawler context.
390-
data: Must contain content_hash of file to delete.
391-
392-
Example:
393-
```yaml
394-
pipeline:
395-
cleanup:
396-
method: cleanup_archive
397-
```
398-
"""
399-
content_hash = data.get("content_hash")
400-
if content_hash is None:
401-
context.emit_warning("No content hash in data.")
402-
return
403-
file_info = context.archive.get(content_hash)
404-
if file_info:
405-
try:
406-
context.archive.delete(file_info)
407-
except NotImplementedError:
408-
context.log.warning("File deletion not supported by storage backend")
409-
410-
411381
@register("store")
412382
def store(context: Context, data: dict[str, Any]) -> None:
413383
"""Store with configurable backend and incremental marking.

tests/test_operations.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from memorious.operations.parse import parse
99
from memorious.operations.store import (
1010
_compute_file_path,
11-
cleanup_archive,
1211
directory,
1312
lakehouse,
1413
)
@@ -284,16 +283,6 @@ def test_directory_same_content(context, mocker, httpbin_url):
284283
assert len(json_files) == 1, "Should only have one content file"
285284

286285

287-
def test_cleanup_archive(context, httpbin_url):
288-
url = f"{httpbin_url}/user-agent"
289-
result = context.http.get(url, headers={"User-Agent": "Memorious Test"})
290-
data = result.serialize()
291-
assert context.archive.get(data["content_hash"]) is not None
292-
# cleanup_archive may not actually delete due to NotImplementedError in ftm_lakehouse
293-
cleanup_archive(context, data)
294-
# NOTE: File may still exist if storage backend doesn't support deletion
295-
296-
297286
def test_lakehouse_default(context, mocker, httpbin_url):
298287
"""Test lakehouse store with default archive."""
299288
from ftm_lakehouse import get_lakehouse

0 commit comments

Comments
 (0)