diff --git a/functions/api/instances.py b/functions/api/instances.py index c34d6fa9..e31ae94d 100644 --- a/functions/api/instances.py +++ b/functions/api/instances.py @@ -190,6 +190,41 @@ def update_instance(manifestation_id: str): return jsonify({"message": "Manifestation updated successfully", "id": manifestation_id}), 200 +@instances_bp.route("/", methods=["DELETE"], strict_slashes=False) +def delete_instance(manifestation_id: str): + """Delete a manifestation and all its owned nodes (cascade).""" + logger.info("Deleting manifestation with ID: %s", manifestation_id) + + db = Neo4JDatabase() + result = db.delete_manifestation(manifestation_id=manifestation_id) + + expression_id = result["expression_id"] + if expression_id: + try: + Storage().delete_base_text(expression_id=expression_id, manifestation_id=manifestation_id) + except Exception as exc: # pylint: disable=broad-exception-caught + # Storage cleanup is best-effort: the request must not fail just because the blob + # is missing or storage is temporarily unreachable. Log full exception details so + # any non-NotFound errors (auth, network, programming bugs) remain visible. + logger.warning( + "Failed to delete base text for expression %s / manifestation %s: %s — skipping", + expression_id, + manifestation_id, + exc, + ) + + if result["segment_ids"]: + _trigger_delete_search_segments(result["segment_ids"]) + + return jsonify({ + "message": "Instance deleted successfully", + "instance_id": manifestation_id, + "expression_id": expression_id, + "annotations": result["annotations"], + "deleted_counts": result["deleted_counts"], + }), 200 + + def _create_aligned_text( request_model: AlignedTextRequestModel, text_type: TextType, target_manifestation_id: str ) -> tuple[Response, int]: diff --git a/functions/api/schema/openapi.yaml b/functions/api/schema/openapi.yaml index 67c10399..be11573e 100644 --- a/functions/api/schema/openapi.yaml +++ b/functions/api/schema/openapi.yaml @@ -354,6 +354,166 @@ paths: $ref: '#/components/responses/NotFound' "500": $ref: '#/components/responses/ServerError' + delete: + summary: Delete an instance (cascade) + description: | + Delete a manifestation (instance) and every entity it exclusively owns. + + **Cascading deletes** + - The Manifestation node itself. + - All Annotations linked via `ANNOTATION_OF`. + - All Segments linked to those annotations via `SEGMENTATION_OF`. + - All References linked from those segments via `HAS_REFERENCE`. + - All TOC Sections (and their `PART_OF` links) and Durchen Notes. + - The incipit title Nomen + alt Nomens + their LocalizedTexts. + - Alignment counterpart annotations on the paired manifestation are also + removed so no dangling `ALIGNED_TO` targets remain. + + **Relationships removed (target nodes preserved — they are shared enums)** + - `MANIFESTATION_OF` to Expression + - `HAS_TYPE` to ManifestationType + - `HAS_SOURCE` to Source + - `HAS_COPYRIGHT` to CopyrightStatus + + **Side effects** + - Removes the base-text blob from storage at `base_texts/{expression_id}/{instance_id}.txt`. + Failures (e.g. blob already missing) are logged and swallowed. + - Notifies the external search-segmenter service to drop search-index entries + for the deleted segmentation / search_segmentation segments (fire-and-forget). + + The parent Expression is intentionally left intact. + + **Note:** This operation is irreversible. + tags: + - Instances + operationId: deleteInstance + parameters: + - name: instance_id + in: path + required: true + schema: + type: string + description: The ID of the instance to delete + responses: + "200": + description: Instance deleted successfully + content: + application/json: + schema: + type: object + required: + - message + - instance_id + - annotations + - deleted_counts + properties: + message: + type: string + example: "Instance deleted successfully" + instance_id: + type: string + description: The ID of the deleted instance + example: "I12345678" + annotations: + type: array + description: | + Summary of every annotation that was deleted on this instance, + grouped by annotation type. Each entry has exactly one key. + Alignment entries also report the counterpart annotation IDs on + the paired manifestation under `aligned_to_id`. + items: + type: object + additionalProperties: + type: object + properties: + id: + type: array + description: Annotation IDs (for non-alignment types) + items: + type: string + aligned_from_id: + type: array + description: Alignment annotation IDs on this instance + items: + type: string + aligned_to_id: + type: array + description: Counterpart alignment annotation IDs on the paired instance + items: + type: string + deleted_counts: + type: object + required: + - manifestations + - annotations + - segments + - references + properties: + manifestations: + type: integer + minimum: 1 + description: Always 1 for this endpoint + annotations: + type: integer + minimum: 0 + description: Number of annotations deleted on this instance + segments: + type: integer + minimum: 0 + description: Number of segments deleted across all annotation types + references: + type: integer + minimum: 0 + description: Number of Reference nodes deleted + examples: + success_with_alignment: + summary: Critical instance with segmentation, alignment, and bibliography + value: + message: "Instance deleted successfully" + instance_id: "I12345678" + annotations: + - segmentation: + id: ["A1001"] + - alignment: + aligned_from_id: ["A1002", "A1003"] + aligned_to_id: ["A2001", "A2002"] + - bibliography: + id: ["A1004"] + deleted_counts: + manifestations: 1 + annotations: 4 + segments: 152 + references: 0 + success_diplomatic: + summary: Diplomatic instance with pagination only + value: + message: "Instance deleted successfully" + instance_id: "I87654321" + annotations: + - pagination: + id: ["A2010"] + deleted_counts: + manifestations: 1 + annotations: 1 + segments: 240 + references: 240 + "400": + $ref: '#/components/responses/InvalidRequest' + "404": + description: Instance not found + content: + application/json: + schema: + type: object + properties: + error: + type: string + examples: + not_found: + value: + error: "Manifestation with ID 'I12345678' not found" + "500": + $ref: '#/components/responses/ServerError' /v2/instances/{instance_id}/related: get: @@ -2227,6 +2387,220 @@ paths: $ref: '#/components/responses/NotFound' "500": $ref: '#/components/responses/ServerError' + delete: + summary: Delete a text (cascade) + description: | + Delete a text (Expression) and every entity it exclusively owns. + + **Cascading deletes** + - Every Manifestation linked via `MANIFESTATION_OF`. For each manifestation, + the same cascade as `DELETE /v2/instances/{instance_id}` runs: + - Annotations (segmentation, pagination, alignment, bibliography, + search_segmentation, table_of_contents, durchen). + - Segments, References, TOC Sections, Durchen Notes. + - Incipit title Nomens / alt Nomens / LocalizedTexts. + - Alignment counterpart annotations on paired manifestations. + - The Expression's title primary Nomen + alt Nomens + their LocalizedTexts. + - Contribution nodes (one per contributor link). Person, AI, and RoleType + nodes are shared and **never deleted**, only the relationships to them. + - The parent Work iff no other Expression still references it. The Work's + `BELONGS_TO` Category link is removed by the cascade; the Category itself + is shared and preserved. + - The Expression node itself. + + **Relationships removed (target nodes preserved — they are shared)** + - `EXPRESSION_OF` to Work (Work is removed only if orphaned, see above) + - `HAS_LANGUAGE` to Language + - `HAS_LICENSE` to License + - `HAS_COPYRIGHT` to CopyrightStatus + - `TRANSLATION_OF` / `COMMENTARY_OF` to other Expressions in either + direction. Other Expressions that pointed to this text via these + relationships **are NOT cascade-deleted** — they survive but lose those + links. + + **Side effects** + - Removes the base-text blob from storage at + `base_texts/{text_id}/{instance_id}.txt` for every deleted manifestation. + Failures (e.g. blob already missing) are logged and swallowed. + - Notifies the external search-segmenter service to drop search-index + entries for every search-indexed segment across all deleted manifestations + (single fire-and-forget batched call). + + **Note:** This operation is irreversible. + tags: + - Texts + operationId: deleteText + parameters: + - name: id + in: path + required: true + schema: + type: string + description: The expression ID of the text to delete + responses: + "200": + description: Text deleted successfully + content: + application/json: + schema: + type: object + required: + - message + - text_id + - instances + - deleted_counts + - work_deleted + properties: + message: + type: string + example: "Text deleted successfully" + text_id: + type: string + description: The ID of the deleted text + example: "T12345678" + title: + type: object + nullable: true + description: | + Localized primary title of the deleted text in the form + `{lang_code: text}`. May be `null` if the text had no title. + additionalProperties: + type: string + instances: + type: array + description: | + Per-manifestation deletion summaries, in the order they were + cascade-deleted. Each entry mirrors the shape returned by + `DELETE /v2/instances/{instance_id}` (without `deleted_counts` + and `message`). + items: + type: object + required: + - instance_id + - annotations + properties: + instance_id: + type: string + description: The ID of the deleted instance + annotations: + type: array + description: | + Annotation summary for this instance, grouped by type. + See `DELETE /v2/instances/{instance_id}` for shape. + items: + type: object + additionalProperties: + type: object + deleted_counts: + type: object + description: | + Aggregated counts across this expression and ALL of its + cascaded manifestations. + required: + - expressions + - manifestations + - annotations + - segments + - references + - contributions + properties: + expressions: + type: integer + minimum: 1 + description: Always 1 for this endpoint + manifestations: + type: integer + minimum: 0 + description: Total Manifestation nodes deleted + annotations: + type: integer + minimum: 0 + description: Total Annotation nodes deleted (across all manifestations) + segments: + type: integer + minimum: 0 + description: Total Segment nodes deleted (across all manifestations) + references: + type: integer + minimum: 0 + description: Total Reference nodes deleted (across all manifestations) + contributions: + type: integer + minimum: 0 + description: Number of Contribution nodes deleted on the expression + work_deleted: + type: boolean + description: | + `true` iff the parent Work was orphaned (no remaining + Expressions linked to it) and was therefore deleted. + examples: + success_with_translations: + summary: Root text with two manifestations and one orphaned Work + value: + message: "Text deleted successfully" + text_id: "T12345678" + title: + en: "The Great Commentary on Madhyamika Philosophy" + bo: "དབུ་མའི་འགྲེལ་པ་ཆེན་མོ།" + instances: + - instance_id: "I10000001" + annotations: + - segmentation: + id: ["A1001"] + - alignment: + aligned_from_id: ["A1002"] + aligned_to_id: ["A2001"] + - instance_id: "I10000002" + annotations: + - pagination: + id: ["A1010"] + - bibliography: + id: ["A1011"] + deleted_counts: + expressions: 1 + manifestations: 2 + annotations: 4 + segments: 320 + references: 200 + contributions: 3 + work_deleted: true + success_translation_shared_work: + summary: Translation deleted, parent Work kept (still has the original) + value: + message: "Text deleted successfully" + text_id: "T22222222" + title: + en: "English Translation" + instances: + - instance_id: "I20000001" + annotations: + - segmentation: + id: ["A3001"] + deleted_counts: + expressions: 1 + manifestations: 1 + annotations: 1 + segments: 50 + references: 0 + contributions: 1 + work_deleted: false + "400": + $ref: '#/components/responses/InvalidRequest' + "404": + description: Text not found + content: + application/json: + schema: + type: object + properties: + error: + type: string + examples: + not_found: + value: + error: "Text with ID 'T12345678' not found" + "500": + $ref: '#/components/responses/ServerError' /v2/texts/{texts_id}/group: get: diff --git a/functions/api/texts.py b/functions/api/texts.py index aad10206..0e3d66ee 100644 --- a/functions/api/texts.py +++ b/functions/api/texts.py @@ -1,6 +1,6 @@ import logging -from api.instances import _trigger_search_segmenter +from api.instances import _trigger_delete_search_segments, _trigger_search_segmenter from api.relation import _get_expression_relations from exceptions import DataNotFound, InvalidRequest from flask import Blueprint, Response, jsonify, request @@ -350,4 +350,75 @@ def update_text(expression_id: str) -> tuple[Response, int]: if update_data: db.update_expression(expression_id=expression_id, update_data=update_data) - return jsonify({"message": "Text updated successfully", "id": expression_id}), 200 \ No newline at end of file + return jsonify({"message": "Text updated successfully", "id": expression_id}), 200 + + +@texts_bp.route("/", methods=["DELETE"], strict_slashes=False) +def delete_text(text_id: str) -> tuple[Response, int]: + """ + Cascade-delete a text (Expression) and every entity it exclusively owns. + + Deletes: + - Every Manifestation linked via MANIFESTATION_OF (and everything those own: + all annotations + their segments / references / sections / durchen notes, + incipit titles, alignment counterparts on paired manifestations). + - The Expression's title Nomen + alt Nomens + LocalizedTexts. + - Contribution nodes (Person / AI / RoleType are shared and preserved). + - The parent Work iff no other Expression still references it. + - The Expression node itself (which removes HAS_LANGUAGE, EXPRESSION_OF, + HAS_LICENSE, HAS_COPYRIGHT, TRANSLATION_OF / COMMENTARY_OF in either + direction by DETACH DELETE). + + Side effects: + - Removes the base-text blob from storage for every manifestation. Failures + are logged but do not fail the request — the storage entry may already be + missing for legitimate reasons. + - Notifies the external search-segmenter service to clean up search index + entries for every search-indexed segment that was deleted. + + Other Expressions that have TRANSLATION_OF / COMMENTARY_OF relationships pointing + to this Expression are NOT cascade-deleted; they survive but lose those links. + """ + logger.info("Deleting text with ID: %s", text_id) + + db = Neo4JDatabase() + result = db.delete_expression(expression_id=text_id) + + storage = Storage() + for manifestation_id in result["manifestation_ids"]: + try: + storage.delete_base_text(expression_id=text_id, manifestation_id=manifestation_id) + except Exception as exc: # pylint: disable=broad-exception-caught + # Storage cleanup is best-effort. Log full exception so any non-NotFound errors + # (auth, network, programming bugs) remain visible, but do not fail the request. + logger.warning( + "Failed to delete base text for expression %s / manifestation %s: %s — skipping", + text_id, + manifestation_id, + exc, + ) + + # Aggregate every search-indexed segment id across all deleted manifestations and + # notify the search-segmenter service in a single fire-and-forget call. + all_segment_ids: list[str] = [] + for m_result in result["manifestation_results"]: + all_segment_ids.extend(m_result.get("segment_ids", [])) + if all_segment_ids: + _trigger_delete_search_segments(all_segment_ids) + + instances_response = [ + { + "instance_id": m_id, + "annotations": m_result["annotations"], + } + for m_id, m_result in zip(result["manifestation_ids"], result["manifestation_results"]) + ] + + return jsonify({ + "message": "Text deleted successfully", + "text_id": text_id, + "title": result["title"], + "instances": instances_response, + "deleted_counts": result["deleted_counts"], + "work_deleted": result["work_deleted"], + }), 200 \ No newline at end of file diff --git a/functions/main.py b/functions/main.py index c244b879..8e123cc1 100644 --- a/functions/main.py +++ b/functions/main.py @@ -115,7 +115,7 @@ def log_response(response): cors=options.CorsOptions( # cors_origins=["https://pecha-backend.web.app", "http://localhost:5002"], cors_origins=["*"], - cors_methods=["GET", "POST", "OPTIONS", "PUT"], + cors_methods=["GET", "POST", "OPTIONS", "PUT", "DELETE"], ), max_instances=1, timeout_sec=540, # Maximum timeout: 540 seconds (9 minutes) diff --git a/functions/neo4j_database.py b/functions/neo4j_database.py index 4db3a655..4ee02758 100644 --- a/functions/neo4j_database.py +++ b/functions/neo4j_database.py @@ -39,6 +39,72 @@ logger = logging.getLogger(__name__) +def _build_annotation_summary(annotation_info: list[dict]) -> list[dict]: + """ + Convert raw annotation_info rows from `manifestations.get_delete_info` into the API + response `annotations` format, grouped by annotation type. + + Rows have the shape: + { + "annotation_id": , # Annotation on this manifestation (m) + "annotation_type": , # AnnotationType.name + "partner_id": , # For alignments: the counterpart annotation id + # on the OTHER manifestation (either direction + # of ALIGNED_TO). None for non-alignment rows. + } + + Output shape (matches functions/spec/delete_endpoint.yaml): + - For alignment: + {"alignment": {"aligned_from_id": [], + "aligned_to_id": []}} + - For all other types: + {: {"id": []}} + + Both source-side and target-side alignment annotations on m are reported under + `aligned_from_id`; only annotations that have a counterpart contribute to + `aligned_to_id` (so the two lists are NOT necessarily the same length). + + Output is **fully sorted** so the response is deterministic across calls — Neo4j + does not guarantee element order inside `collect(DISTINCT ...)`, so we cannot + rely on insertion order. Both the list of buckets (by key name) and every id list + inside a bucket are sorted lexicographically. + """ + bucket_ids: dict[str, set[str]] = {} + aligned_to_ids: set[str] = set() + + for row in annotation_info: + ann_type = row.get("annotation_type") + ann_id = row.get("annotation_id") + partner_id = row.get("partner_id") + + if ann_type is None or ann_id is None: + continue + + # Internal bucket key. Alignment annotations on this manifestation go into a + # dedicated bucket so they can be rendered with the {aligned_from_id, aligned_to_id} + # shape; everything else uses its annotation type as the bucket key directly. + bucket_key = "_alignment" if ann_type == "alignment" else ann_type + bucket_ids.setdefault(bucket_key, set()).add(ann_id) + + if ann_type == "alignment" and partner_id is not None: + aligned_to_ids.add(partner_id) + + result: list[dict] = [] + for bucket_key in sorted(bucket_ids): + ids_sorted = sorted(bucket_ids[bucket_key]) + if bucket_key == "_alignment": + result.append({ + "alignment": { + "aligned_from_id": ids_sorted, + "aligned_to_id": sorted(aligned_to_ids), + } + }) + else: + result.append({bucket_key: {"id": ids_sorted}}) + + return result + + class Neo4JDatabase: def __init__(self, neo4j_uri: str = None, neo4j_auth: tuple = None) -> None: if neo4j_uri and neo4j_auth: @@ -1631,6 +1697,231 @@ def get_categories( ) return categories + # The order matters: each helper query removes leaf nodes (segments, references, + # sections, durchen notes) and then the annotations that own them. `cleanup_for_update` + # runs after these to drop the manifestation's incipit Nomen + alt Nomens + their + # LocalizedTexts, and finally `delete_node` removes the Manifestation itself. + # + # `delete_segmentation_and_pagination` runs before `delete_toc_annotations` so that + # segments shared between segmentation/pagination annotations and TOC sections are + # already deleted by the time the TOC query runs. The TOC query DOES `DETACH DELETE` + # `seg_in_section`, but on already-deleted nodes that becomes a safe no-op; segments + # that exist only under a TOC (PART_OF a Section without SEGMENTATION_OF) are deleted + # here. Reordering would still be correct but might double-process the same segments. + _MANIFESTATION_CASCADE_DELETE_QUERY_KEYS = ( + "delete_segmentation_and_pagination", + "delete_search_segmentation", + "delete_bibliography_annotations", + "delete_toc_annotations", + "delete_durchen_annotations", + "delete_alignment_annotations", + ) + + @staticmethod + def _format_manifestation_delete_result(result: dict) -> dict: + annotations = _build_annotation_summary(result["annotation_info"]) + result["annotations"] = annotations + del result["annotation_info"] + return result + + def _delete_manifestation_in_tx(self, tx, manifestation_id: str) -> dict: + """Cascade-delete a Manifestation using the caller's active transaction.""" + record = tx.run( + Queries.manifestations["get_delete_info"], + manifestation_id=manifestation_id, + ).single() + + if record is None: + raise DataNotFound(f"Manifestation with ID '{manifestation_id}' not found") + + expression_id = record["expression_id"] + annotation_info = record["annotation_info"] or [] + segment_ids = [sid for sid in (record["segment_ids"] or []) if sid is not None] + annotation_count = record["annotation_count"] or 0 + segment_count = record["segment_count"] or 0 + reference_count = record["reference_count"] or 0 + + for key in self._MANIFESTATION_CASCADE_DELETE_QUERY_KEYS: + tx.run(Queries.manifestations[key], manifestation_id=manifestation_id) + + tx.run(Queries.manifestations["cleanup_for_update"], manifestation_id=manifestation_id) + tx.run(Queries.manifestations["delete_node"], manifestation_id=manifestation_id) + + return { + "expression_id": expression_id, + "annotation_info": annotation_info, + "segment_ids": segment_ids, + "deleted_counts": { + "manifestations": 1, + "annotations": annotation_count, + "segments": segment_count, + "references": reference_count, + }, + } + + def delete_manifestation(self, manifestation_id: str) -> dict: + """ + Cascade-delete a manifestation and everything it exclusively owns. + + Deletion order (deepest leaf nodes first): + 1. Snapshot what's about to be deleted (`get_delete_info`) — captures annotation + ids, partner annotation ids for alignments, search-indexed segment ids, + and aggregate counts. + 2. All annotation types + their segments/references/sections/notes via the + per-type delete queries. + 3. Incipit title nomens (+ alt nomens, localized texts) and the HAS_TYPE / + HAS_SOURCE relationships (via `cleanup_for_update`). + 4. The Manifestation node itself. + + Shared enum/reference nodes (ManifestationType, CopyrightStatus, Language, + Source, Person, AI, RoleType, Reference targets that survive their links) + are NOT deleted; only relationships to them are removed by DETACH DELETE on + the owning nodes. + + Alignment annotations on the paired manifestation are also removed by + `delete_alignment_annotations` so that no orphan ALIGNED_TO targets remain. + + Returns a dict with: + - expression_id: id of the parent Expression (used by the route layer to + clean up storage), or None if somehow detached. + - annotations: API-formatted summary grouped by annotation type. + - segment_ids: ids of segments fed to the external search index, suitable + for passing to the search-cleanup service. + - deleted_counts: {manifestations, annotations, segments, references}. + + Raises DataNotFound if no Manifestation node has the given id. + """ + with self.get_session() as session: + result = session.execute_write( + lambda tx: self._delete_manifestation_in_tx(tx, manifestation_id) + ) + + return self._format_manifestation_delete_result(result) + + def delete_expression(self, expression_id: str) -> dict: + """ + Cascade-delete an Expression (text) and all entities it exclusively owns. + + The graph deletion happens in one Neo4j write transaction: either every + Manifestation cascade and every Expression-level delete commits together, or + none of those graph changes commit. + + Transaction contents: + - Cascade-delete every Manifestation linked to the Expression via + MANIFESTATION_OF. This deletes all annotations, segments, references, + sections, durchen notes, alignments, incipit titles, and the + Manifestation nodes themselves. + - Snapshot expression metadata (title, contribution_count, work_id, and + whether the Work would be orphaned) via `expressions.get_delete_info`. + - Delete the title primary Nomen, alt Nomens, and their LocalizedTexts + (`delete_title_nomens`). + - Delete Contribution nodes (`delete_contributions`). Person / AI / + RoleType nodes are shared and preserved. + - If the Work has no other Expression linked to it (commentaries always + own their Work; root texts often do too), delete the Work + (`delete_orphan_work`). The Category itself is shared and preserved. + - DETACH DELETE the Expression node (`delete_node`). This automatically + removes HAS_LANGUAGE, EXPRESSION_OF, HAS_LICENSE, HAS_COPYRIGHT, and + any TRANSLATION_OF / COMMENTARY_OF relationships in either direction. + Other Expressions that pointed to this one (translations / commentaries) + are intentionally NOT cascaded — they survive but lose those links. + + Returns a dict with: + - title: primary title in localized form (or None) for the response body + - manifestation_ids: ids of all manifestations that were cascade-deleted + (used by the route layer to clean storage / search-index entries). + - manifestation_results: list of per-manifestation cascade results + (annotations + counts), in the same order as + `manifestation_ids`. + - work_deleted: True iff the parent Work was orphaned and removed. + - deleted_counts: aggregated counts across all manifestations + this + expression + its contributions. + + Raises DataNotFound if no Expression node has the given id. + """ + def transaction_function(tx): + # Assert the Expression exists and snapshot the owned Manifestation ids before + # any deletes. A raised exception rolls back the whole write transaction. + record = tx.run( + Queries.expressions["get_manifestation_ids_for_delete"], + expression_id=expression_id, + ).single() + + if record is None: + raise DataNotFound(f"Text with ID '{expression_id}' not found") + + manifestation_ids: list[str] = record["manifestation_ids"] or [] + manifestation_results = [ + self._format_manifestation_delete_result( + self._delete_manifestation_in_tx(tx, manifestation_id=m_id) + ) + for m_id in manifestation_ids + ] + + info_record = tx.run( + Queries.expressions["get_delete_info"], expression_id=expression_id + ).single() + + if info_record is None: + # Expression disappeared inside this transaction before expression-level + # cleanup. This should not happen, but keep the rollback behavior explicit. + raise DataNotFound(f"Text with ID '{expression_id}' not found") + + title = info_record["title"] or [] + contribution_count = info_record["contribution_count"] or 0 + work_id = info_record["work_id"] + work_is_orphan = bool(info_record["work_is_orphan"]) + + tx.run(Queries.expressions["delete_title_nomens"], expression_id=expression_id) + tx.run(Queries.expressions["delete_contributions"], expression_id=expression_id) + + tx.run(Queries.expressions["delete_node"], expression_id=expression_id) + + work_deleted = False + if work_is_orphan and work_id is not None: + # Run this after deleting the Expression so the orphan check sees the + # final in-transaction graph state. + tx.run(Queries.expressions["delete_orphan_work"], work_id=work_id) + work_deleted = True + + deleted_counts = { + "expressions": 1, + "manifestations": sum( + r["deleted_counts"]["manifestations"] for r in manifestation_results + ), + "annotations": sum( + r["deleted_counts"]["annotations"] for r in manifestation_results + ), + "segments": sum(r["deleted_counts"]["segments"] for r in manifestation_results), + "references": sum( + r["deleted_counts"]["references"] for r in manifestation_results + ), + "contributions": contribution_count, + } + + return { + "title": title, + "manifestation_ids": manifestation_ids, + "manifestation_results": manifestation_results, + "work_deleted": work_deleted, + "deleted_counts": deleted_counts, + } + + with self.get_session() as session: + result = session.execute_write(transaction_function) + + # `__convert_to_localized_text` returns a {lang_code: text} dict (or None) which + # is the same shape used everywhere else in the API for localized strings. + title_localized = self.__convert_to_localized_text(result["title"]) + + return { + "title": title_localized, + "manifestation_ids": result["manifestation_ids"], + "manifestation_results": result["manifestation_results"], + "work_deleted": result["work_deleted"], + "deleted_counts": result["deleted_counts"], + } + def delete_annotation_and_its_segments(self, annotation_id: str) -> None: with self.get_session() as session: session.run(Queries.segments["delete_all_segments_by_annotation_id"], annotation_id=annotation_id) diff --git a/functions/neo4j_queries.py b/functions/neo4j_queries.py index e3cc90b7..60c19407 100644 --- a/functions/neo4j_queries.py +++ b/functions/neo4j_queries.py @@ -504,6 +504,83 @@ def create_copyright_and_license(expression_label): OPTIONAL MATCH (primary_nomen)<-[:ALTERNATIVE_OF]-(alt_nomen:Nomen)-[:HAS_LOCALIZATION]->(lt:LocalizedText) DETACH DELETE alt_nomen, lt RETURN e.id as expression_id +""", + "get_manifestation_ids_for_delete": """ +// Single query that asserts the Expression exists AND returns the list of its +// owned Manifestation ids in one round-trip. +// +// If the Expression does not exist, the MATCH fails and `.single()` is None, +// which the caller turns into a `DataNotFound`. If the Expression exists but +// has zero manifestations, the OPTIONAL MATCH yields a NULL `m` and we filter +// it out so the caller gets `[]`. +MATCH (e:Expression {id: $expression_id}) +OPTIONAL MATCH (m:Manifestation)-[:MANIFESTATION_OF]->(e) +RETURN [mid IN collect(DISTINCT m.id) WHERE mid IS NOT NULL] AS manifestation_ids +""", + "get_delete_info": """ +// Snapshot expression-level metadata that the cascade needs. Each dimension is +// aggregated with `WITH ... collect/count(...)` BEFORE the next OPTIONAL MATCH so +// that we don't Cartesian-product titles × contributions × sibling-expressions. +MATCH (e:Expression {id: $expression_id}) + +// 1. Collect ALL localizations of the primary title. +OPTIONAL MATCH (e)-[:HAS_TITLE]->(title_nomen:Nomen) + -[:HAS_LOCALIZATION]->(title_lt:LocalizedText) + -[:HAS_LANGUAGE]->(title_l:Language) +WITH e, + [t IN collect(DISTINCT {language: title_l.code, text: title_lt.text}) + WHERE t.language IS NOT NULL AND t.text IS NOT NULL] AS title + +// 2. Count contributions independently. +OPTIONAL MATCH (e)-[:HAS_CONTRIBUTION]->(contrib:Contribution) +WITH e, title, count(DISTINCT contrib) AS contribution_count + +// 3. Look up the parent Work (cardinality = 1, but OPTIONAL for safety). +OPTIONAL MATCH (e)-[:EXPRESSION_OF]->(w:Work) +WITH e, title, contribution_count, w + +// 4. Determine if Work would be orphaned: any other Expression linked to this Work? +OPTIONAL MATCH (other_e:Expression)-[:EXPRESSION_OF]->(w) +WHERE other_e.id <> e.id +WITH title, contribution_count, w, + count(DISTINCT other_e) AS other_expression_count +RETURN + title, + contribution_count, + w.id AS work_id, + (w IS NOT NULL AND other_expression_count = 0) AS work_is_orphan +""", + "delete_title_nomens": """ +// Delete the primary title Nomen, all alternative Nomens, and their LocalizedTexts. +// Languages are shared enums and are NEVER deleted, only the relationships to them. +MATCH (e:Expression {id: $expression_id})-[:HAS_TITLE]->(primary_nomen:Nomen) +OPTIONAL MATCH (primary_nomen)-[:HAS_LOCALIZATION]->(primary_lt:LocalizedText) +OPTIONAL MATCH (primary_nomen)<-[:ALTERNATIVE_OF]-(alt_nomen:Nomen) +OPTIONAL MATCH (alt_nomen)-[:HAS_LOCALIZATION]->(alt_lt:LocalizedText) +WITH collect(DISTINCT alt_lt) AS alt_lts, + collect(DISTINCT alt_nomen) AS alt_nomens, + collect(DISTINCT primary_lt) AS primary_lts, + collect(DISTINCT primary_nomen) AS primary_nomens +FOREACH (n IN alt_lts | DETACH DELETE n) +FOREACH (n IN alt_nomens | DETACH DELETE n) +FOREACH (n IN primary_lts | DETACH DELETE n) +FOREACH (n IN primary_nomens | DETACH DELETE n) +""", + "delete_contributions": """ +// Delete Contribution nodes only. Person, AI, and RoleType are shared and preserved. +MATCH (e:Expression {id: $expression_id})-[:HAS_CONTRIBUTION]->(contrib:Contribution) +DETACH DELETE contrib +""", + "delete_orphan_work": """ +// Delete a Work iff it no longer has any Expression linked to it via EXPRESSION_OF. +// The BELONGS_TO->Category relationship is removed by DETACH DELETE; the Category itself stays. +MATCH (w:Work {id: $work_id}) +WHERE NOT EXISTS { MATCH (:Expression)-[:EXPRESSION_OF]->(w) } +DETACH DELETE w +""", + "delete_node": """ +MATCH (e:Expression {id: $expression_id}) +DETACH DELETE e """, } @@ -791,7 +868,61 @@ def create_copyright_and_license(expression_label): FOREACH (s IN segs2 | DETACH DELETE s) FOREACH (a IN anns1 | DETACH DELETE a) FOREACH (a IN anns2 | DETACH DELETE a) - """ + """, + "get_delete_info": """ + // Snapshot every entity that will be deleted with the manifestation. + // + // Important: each information dimension is aggregated with `WITH ... collect/count(...)` + // BEFORE moving on to the next OPTIONAL MATCH. Without these intermediate reductions + // the chained OPTIONAL MATCHes would Cartesian-product (annotations × segments × refs + // × search_segs) and explode for large manifestations. Even though `count(DISTINCT)` + // / `collect(DISTINCT)` would still deduplicate, Neo4j has to materialize every + // intermediate row first, which is what blows memory and time. + MATCH (m:Manifestation {id: $manifestation_id}) + + // 1. Expression on the other side of MANIFESTATION_OF (used by caller for storage cleanup). + OPTIONAL MATCH (m)-[:MANIFESTATION_OF]->(e:Expression) + WITH m, e + + // 2. All annotations on m + their counterpart (for alignments) in either direction. + OPTIONAL MATCH (m)<-[:ANNOTATION_OF]-(ann:Annotation)-[:HAS_TYPE]->(at:AnnotationType) + OPTIONAL MATCH (ann)-[:ALIGNED_TO]-(partner_ann:Annotation) + WITH m, e, + collect(DISTINCT { + annotation_id: ann.id, + annotation_type: at.name, + partner_id: partner_ann.id + }) AS annotation_info, + count(DISTINCT ann) AS annotation_count + + // 3. All segments owned by m's annotations + their references (for totals). + // Note: TOC segments (linked via PART_OF) are NOT counted here unless they + // are also SEGMENTATION_OF some annotation. This is intentional and documented. + OPTIONAL MATCH (m)<-[:ANNOTATION_OF]-(any_ann:Annotation) + <-[:SEGMENTATION_OF]-(any_seg:Segment) + OPTIONAL MATCH (any_seg)-[:HAS_REFERENCE]->(any_ref:Reference) + WITH m, e, annotation_info, annotation_count, + count(DISTINCT any_seg) AS segment_count, + count(DISTINCT any_ref) AS reference_count + + // 4. Only segments fed to the external search index (segmentation + search_segmentation). + OPTIONAL MATCH (m)<-[:ANNOTATION_OF]-(search_ann:Annotation) + -[:HAS_TYPE]->(search_at:AnnotationType) + WHERE search_at.name IN ['segmentation', 'search_segmentation'] + OPTIONAL MATCH (search_ann)<-[:SEGMENTATION_OF]-(search_seg:Segment) + + RETURN + e.id AS expression_id, + annotation_info, + [sid IN collect(DISTINCT search_seg.id) WHERE sid IS NOT NULL] AS segment_ids, + annotation_count, + segment_count, + reference_count + """, + "delete_node": """ + MATCH (m:Manifestation {id: $manifestation_id}) + DETACH DELETE m + """, } diff --git a/functions/neo4j_schema.yaml b/functions/neo4j_schema.yaml index 30b8840c..1e022bd9 100644 --- a/functions/neo4j_schema.yaml +++ b/functions/neo4j_schema.yaml @@ -287,9 +287,12 @@ nodes: unique: true allowed_values: - segmentation + - search_segmentation - alignment - pagination - bibliography + - table_of_contents + - durchen RoleType: enum: true diff --git a/functions/spec/delete_endpoint.yaml b/functions/spec/delete_endpoint.yaml new file mode 100644 index 00000000..cc9a7db3 --- /dev/null +++ b/functions/spec/delete_endpoint.yaml @@ -0,0 +1,406 @@ +openapi: 3.1.0 +info: + title: OpenPecha Delete Endpoints + version: '1.0.0' + description: | + Delete endpoints for Expression (Text) and Manifestation (Instance) resources. + These endpoints perform cascading deletes and return detailed information about the deleted entities. + +paths: + /v2/texts/{text_id}: + delete: + summary: Delete a text + description: | + Delete an expression and all its related entities including all manifestations. + + **Cascading Deletes:** + - The Expression node itself + - All Manifestations linked via MANIFESTATION_OF relationship + - For each Manifestation: + - All Annotations linked via ANNOTATION_OF relationship + - All Segments linked via SEGMENTATION_OF relationship + - All References linked via HAS_REFERENCE relationship + - All alignment relationships + - All Contributions linked via HAS_CONTRIBUTION relationship + - The title Nomen linked via HAS_TITLE relationship (including its LocalizedText) + + **Relationships Removed:** + - EXPRESSION_OF (to Work) + - HAS_LANGUAGE (to Language) + - TRANSLATION_OF (to other Expression, if exists) + - COMMENTARY_OF (to other Expression, if exists) + + **Note:** This operation is irreversible. Other Expressions that have TRANSLATION_OF + or COMMENTARY_OF relationships pointing to this Expression will have those + relationships removed (the other Expressions themselves will not be deleted). + tags: + - Texts + operationId: deleteText + parameters: + - name: text_id + in: path + required: true + schema: + type: string + description: The unique ID of the text to delete + examples: + expression_id: + value: "T12345678" + summary: A typical expression ID + responses: + "200": + description: Text deleted successfully + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteExpressionResponse' + examples: + success: + summary: Successful deletion with text details + value: + message: "Text deleted successfully" + text_id: "T12345678" + title: "The Great Commentary on Madhyamika Philosophy" + instances: + - instance_id: "I12345678" + annotations: + - segmentation: + id: ["S001", "S002"] + - alignment: + aligned_from_id: ["AL001", "AL002"] + aligned_to_id: ["AL003", "AL004"] + - bibliography: + id: ["B001", "B002"] + - instance_id: "I12345679" + annotations: + - segmentation: + id: ["S003"] + - bibliography: + id: ["B003"] + deleted_counts: + expressions: 1 + manifestations: 2 + annotations: 6 + "400": + $ref: '#/components/responses/InvalidRequest' + "404": + description: Text not found + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + not_found: + summary: Text does not exist + value: + error: "Text with ID 'T12345678' not found" + "500": + $ref: '#/components/responses/ServerError' + + /v2/instances/{instance_id}: + delete: + summary: Delete an instance + description: | + Delete a manifestation and all its related entities and relationships. + + **Cascading Deletes:** + - The Manifestation node itself + - All Annotations linked via ANNOTATION_OF relationship + - All Segments linked via SEGMENTATION_OF relationship (from annotations) + - All References linked via HAS_REFERENCE relationship (from segments) + - All alignment relationships (ALIGNED_TO from segments) + + **Relationships Removed:** + - MANIFESTATION_OF (to Expression) + - HAS_TYPE (to ManifestationType) + - HAS_COPYRIGHT (to CopyrightStatus) + - HAS_INCIPIT_TITLE (to Nomen) + + **Note:** This operation is irreversible. The parent Expression will remain intact. + tags: + - Instances + operationId: deleteInstance + parameters: + - name: instance_id + in: path + required: true + schema: + type: string + description: The unique ID of the instance to delete + examples: + manifestation_id: + value: "I12345678" + summary: A typical manifestation ID + responses: + "200": + description: Instance deleted successfully + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteManifestationResponse' + examples: + success: + summary: Successful deletion with instance details + value: + message: "Instance deleted successfully" + instance_id: "I12345678" + annotations: + - segmentation: + id: ["S001"] + - alignment: + aligned_from_id: ["AL001", "AL002"] + aligned_to_id: ["AL003", "AL004"] + - bibliography: + id: ["B001"] + deleted_counts: + manifestations: 1 + annotations: 3 + "400": + $ref: '#/components/responses/InvalidRequest' + "404": + description: Instance not found + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + not_found: + summary: Instance does not exist + value: + error: "Instance with ID 'I12345678' not found" + "500": + $ref: '#/components/responses/ServerError' + +components: + schemas: + InstanceWithAnnotations: + type: object + required: + - instance_id + - annotations + properties: + instance_id: + type: string + description: The unique ID of the instance + examples: + - "I12345678" + annotations: + type: array + description: List of annotations for this instance + items: + $ref: '#/components/schemas/Annotation' + + Annotation: + type: object + required: + - id + properties: + id: + type: string + description: The unique ID of the annotation + examples: + - "A001" + segmentation: + type: object + description: Segmentation information if applicable + properties: + id: + type: string + description: The unique ID of the segmentation + examples: + - "S001" + alignment: + type: object + description: Alignment information if applicable + properties: + id: + type: string + description: The unique ID of the alignment + examples: + - "AL001" + bibliography: + type: object + description: Bibliography information if applicable + properties: + id: + type: string + description: The unique ID of the bibliography entry + examples: + - "B001" + DeleteManifestationResponse: + type: object + required: + - message + - instance_id + - annotations + - deleted_counts + properties: + message: + type: string + description: Success message + examples: + - "Instance deleted successfully" + instance_id: + type: string + description: The ID of the deleted instance + examples: + - "I12345678" + annotations: + type: array + description: List of annotations that were deleted with this instance + items: + $ref: '#/components/schemas/Annotation' + deleted_counts: + type: object + description: Count of deleted entities by type + required: + - manifestations + - annotations + - segments + properties: + manifestations: + type: integer + minimum: 1 + description: Number of manifestations deleted (always 1) + examples: + - 1 + annotations: + type: integer + minimum: 0 + description: Number of annotations deleted + examples: + - 3 + segments: + type: integer + minimum: 0 + description: Number of segments deleted + examples: + - 150 + references: + type: integer + minimum: 0 + description: Number of references deleted + examples: + - 25 + + DeleteExpressionResponse: + type: object + required: + - message + - text_id + - title + - instances + - deleted_counts + properties: + message: + type: string + description: Success message + examples: + - "Text deleted successfully" + text_id: + type: string + description: The ID of the deleted text + examples: + - "T12345678" + title: + type: string + description: The title of the deleted text + examples: + - "The Great Commentary on Madhyamika Philosophy" + instances: + type: array + description: List of instances that were deleted with this text + items: + $ref: '#/components/schemas/InstanceWithAnnotations' + deleted_counts: + type: object + description: Count of deleted entities by type + required: + - expressions + - manifestations + - annotations + - segments + properties: + expressions: + type: integer + minimum: 1 + description: Number of expressions deleted (always 1) + examples: + - 1 + manifestations: + type: integer + minimum: 0 + description: Number of manifestations deleted + examples: + - 2 + annotations: + type: integer + minimum: 0 + description: Number of annotations deleted + examples: + - 6 + segments: + type: integer + minimum: 0 + description: Number of segments deleted + examples: + - 300 + references: + type: integer + minimum: 0 + description: Number of references deleted + examples: + - 50 + contributions: + type: integer + minimum: 0 + description: Number of contributions deleted + examples: + - 3 + + ErrorResponse: + type: object + required: + - error + properties: + error: + type: string + description: Error message describing what went wrong + examples: + - "Resource not found" + - "Invalid request" + + responses: + InvalidRequest: + description: There was an error with the request + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + invalid_id: + summary: Invalid ID format + value: + error: "Invalid ID format provided" + + NotFound: + description: Resource not found + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + resource_not_found: + value: + error: "Resource was not found" + + ServerError: + description: Internal server error + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + server_error: + value: + error: "An internal server error occurred" diff --git a/functions/spec/swagger-viewer.html b/functions/spec/swagger-viewer.html new file mode 100644 index 00000000..ad08c4e5 --- /dev/null +++ b/functions/spec/swagger-viewer.html @@ -0,0 +1,45 @@ + + + + + + OpenPecha Delete Endpoints - Swagger UI + + + + +
+ + + + + \ No newline at end of file diff --git a/functions/tests/conftest.py b/functions/tests/conftest.py index adb3c6c1..8d3e4a06 100644 --- a/functions/tests/conftest.py +++ b/functions/tests/conftest.py @@ -187,8 +187,19 @@ def mock_search_segmenter(): These helpers are "fire-and-forget" and call external services; tests should never hit the network or spawn those background threads. + + Both `api.instances` and `api.texts` are patched. Although the helpers are defined + in `api.instances`, `api.texts` does `from api.instances import _trigger_...` at + import time, which copies the reference into the `api.texts` module namespace. Patching + only `api.instances` would NOT affect calls made through `api.texts` — they'd hit the + network. Patch both lookup sites to be safe. """ - with patch("api.instances._trigger_search_segmenter"), patch("api.instances._trigger_delete_search_segments"): + with ( + patch("api.instances._trigger_search_segmenter"), + patch("api.instances._trigger_delete_search_segments"), + patch("api.texts._trigger_search_segmenter"), + patch("api.texts._trigger_delete_search_segments"), + ): yield diff --git a/functions/tests/test_delete_manifestation.py b/functions/tests/test_delete_manifestation.py index d3b3803d..a3b35634 100644 --- a/functions/tests/test_delete_manifestation.py +++ b/functions/tests/test_delete_manifestation.py @@ -1,99 +1,280 @@ # pylint: disable=redefined-outer-name """ -Unit tests for DELETE /v2/instances/{manifestation_id} endpoint using mocks. +Unit tests for `DELETE /v2/instances/{instance_id}` and the helpers it relies on. + +These tests are mock-based — they do NOT hit a real Neo4j instance. They cover: + - Happy path through the Flask route (response shape, status code). + - 404 propagation when the underlying DB raises `DataNotFound`. + - Search-segmenter cleanup is triggered with the segment ids returned by the DB. + - Storage cleanup failures are swallowed and do not fail the request. + - The DB-layer `delete_manifestation` raises `DataNotFound` when the + `get_delete_info` cypher returns no record. + - `_build_annotation_summary` correctly groups annotations by type and handles + both source-side and target-side alignment annotations. """ import logging from unittest.mock import MagicMock, patch + import pytest -from main import create_app -from neo4j_database import Neo4JDatabase from exceptions import DataNotFound -from neo4j_queries import Queries +from main import create_app +from neo4j_database import Neo4JDatabase, _build_annotation_summary logger = logging.getLogger(__name__) + +MOCK_DELETE_RESULT = { + "expression_id": "test-expression-id", + "annotations": [ + {"segmentation": {"id": ["A1001"]}}, + { + "alignment": { + "aligned_from_id": ["A1002", "A1003"], + "aligned_to_id": ["A2001", "A2002"], + } + }, + {"bibliography": {"id": ["A1004"]}}, + ], + "segment_ids": ["S001", "S002", "S003"], + "deleted_counts": { + "manifestations": 1, + "annotations": 4, + "segments": 152, + "references": 0, + }, +} + + @pytest.fixture def client(): - """Create Flask test client""" app = create_app() app.config["TESTING"] = True return app.test_client() -class TestManifestationDeletion: - """Unit tests for manifestation deletion""" +class TestDeleteInstanceRoute: + """Route-level tests for `DELETE /v2/instances/{instance_id}`.""" + + @patch("api.instances.Storage") + @patch("api.instances.Neo4JDatabase") + def test_returns_200_with_full_response_shape(self, mock_db_cls, mock_storage_cls, client): + mock_db_cls.return_value.delete_manifestation.return_value = MOCK_DELETE_RESULT + mock_storage_cls.return_value.delete_base_text.return_value = None + + response = client.delete("/v2/instances/I12345678") + + assert response.status_code == 200 + body = response.get_json() + assert body["message"] == "Instance deleted successfully" + assert body["instance_id"] == "I12345678" + assert body["annotations"] == MOCK_DELETE_RESULT["annotations"] + assert body["deleted_counts"] == MOCK_DELETE_RESULT["deleted_counts"] + mock_db_cls.return_value.delete_manifestation.assert_called_once_with( + manifestation_id="I12345678" + ) + + @patch("api.instances.Storage") @patch("api.instances.Neo4JDatabase") - def test_delete_manifestation_success(self, mock_db_cls, client): - """Test successful deletion of a manifestation via API""" - # Setup mock - mock_db_instance = mock_db_cls.return_value - mock_db_instance.delete_manifestation.return_value = None - - # Call DELETE endpoint - manifestation_id = "test-manifestation-id" - response = client.delete(f"/v2/instances/{manifestation_id}") - - # Verify response - assert response.status_code == 204 - - # Verify DB method was called - mock_db_instance.delete_manifestation.assert_called_once_with(manifestation_id=manifestation_id) + def test_storage_delete_called_with_expression_and_manifestation_id( + self, mock_db_cls, mock_storage_cls, client + ): + mock_db_cls.return_value.delete_manifestation.return_value = MOCK_DELETE_RESULT + response = client.delete("/v2/instances/I12345678") + + assert response.status_code == 200 + mock_storage_cls.return_value.delete_base_text.assert_called_once_with( + expression_id="test-expression-id", manifestation_id="I12345678" + ) + + @patch("api.instances.Storage") + @patch("api.instances.Neo4JDatabase") + def test_storage_skipped_when_expression_id_is_none( + self, mock_db_cls, mock_storage_cls, client + ): + # An orphan manifestation (no Expression) should not call storage at all. + result = {**MOCK_DELETE_RESULT, "expression_id": None} + mock_db_cls.return_value.delete_manifestation.return_value = result + + response = client.delete("/v2/instances/I12345678") + + assert response.status_code == 200 + mock_storage_cls.return_value.delete_base_text.assert_not_called() + + @patch("api.instances.Storage") @patch("api.instances.Neo4JDatabase") - def test_delete_manifestation_not_found(self, mock_db_cls, client): - """Test deletion of non-existent manifestation via API""" - # Setup mock to raise DataNotFound - mock_db_instance = mock_db_cls.return_value - mock_db_instance.delete_manifestation.side_effect = DataNotFound("Manifestation not found") - - # Call DELETE endpoint - response = client.delete("/v2/instances/non-existent-id") - - # Verify response + def test_404_when_manifestation_not_found(self, mock_db_cls, mock_storage_cls, client): + mock_db_cls.return_value.delete_manifestation.side_effect = DataNotFound( + "Manifestation with ID 'missing-id' not found" + ) + + response = client.delete("/v2/instances/missing-id") + assert response.status_code == 404 - assert "error" in response.get_json() - - @patch("neo4j_database.GraphDatabase") - def test_db_delete_manifestation_query(self, mock_driver_cls): - """Test that delete_manifestation executes the correct Cypher query""" - # Setup mocks - mock_driver = MagicMock() - mock_driver_cls.driver.return_value = mock_driver - mock_session = MagicMock() - mock_driver.session.return_value = mock_session - # Mock context manager - mock_session.__enter__.return_value = mock_session - - # Mock the existence check - mock_session.execute_read.return_value = {"count": 1} # Simulate exists - - # Create DB instance - db = Neo4JDatabase(neo4j_uri="bolt://localhost:7687", neo4j_auth=("neo4j", "password")) - - # Call delete method - manifestation_id = "test-id" - db.delete_manifestation(manifestation_id) - - # Verify session.run was called with the delete query - mock_session.run.assert_called_with(Queries.manifestations["delete"], manifestation_id=manifestation_id) - - @patch("neo4j_database.GraphDatabase") - def test_db_delete_manifestation_not_found(self, mock_driver_cls): - """Test that delete_manifestation raises DataNotFound if manifestation doesn't exist""" - # Setup mocks - mock_driver = MagicMock() - mock_driver_cls.driver.return_value = mock_driver - mock_session = MagicMock() - mock_driver.session.return_value = mock_session - # Mock context manager - mock_session.__enter__.return_value = mock_session - - # Mock the existence check to return None (not found) - mock_session.execute_read.return_value = None - - # Create DB instance - db = Neo4JDatabase(neo4j_uri="bolt://localhost:7687", neo4j_auth=("neo4j", "password")) - - # Call delete method and expect exception - with pytest.raises(DataNotFound): - db.delete_manifestation("non-existent-id") + assert response.get_json() == {"error": "Manifestation with ID 'missing-id' not found"} + # Storage cleanup must not happen on 404. + mock_storage_cls.return_value.delete_base_text.assert_not_called() + + @patch("api.instances.Storage") + @patch("api.instances.Neo4JDatabase") + def test_search_segmenter_called_with_returned_segment_ids( + self, mock_db_cls, mock_storage_cls, client + ): + mock_db_cls.return_value.delete_manifestation.return_value = MOCK_DELETE_RESULT + mock_storage_cls.return_value.delete_base_text.return_value = None + + with patch("api.instances._trigger_delete_search_segments") as mock_trigger: + response = client.delete("/v2/instances/I12345678") + + assert response.status_code == 200 + mock_trigger.assert_called_once_with(MOCK_DELETE_RESULT["segment_ids"]) + + @patch("api.instances.Storage") + @patch("api.instances.Neo4JDatabase") + def test_search_segmenter_not_called_when_no_segments( + self, mock_db_cls, mock_storage_cls, client + ): + result = {**MOCK_DELETE_RESULT, "segment_ids": []} + mock_db_cls.return_value.delete_manifestation.return_value = result + mock_storage_cls.return_value.delete_base_text.return_value = None + + with patch("api.instances._trigger_delete_search_segments") as mock_trigger: + response = client.delete("/v2/instances/I12345678") + + assert response.status_code == 200 + mock_trigger.assert_not_called() + + @patch("api.instances.Storage") + @patch("api.instances.Neo4JDatabase") + def test_storage_failure_does_not_fail_request( + self, mock_db_cls, mock_storage_cls, client + ): + mock_db_cls.return_value.delete_manifestation.return_value = MOCK_DELETE_RESULT + mock_storage_cls.return_value.delete_base_text.side_effect = Exception("blob not found") + + response = client.delete("/v2/instances/I12345678") + + assert response.status_code == 200 + assert response.get_json()["message"] == "Instance deleted successfully" + + +class TestDeleteManifestationDb: + """Tests for the `delete_manifestation` DB method (cypher-mocked).""" + + def test_raises_data_not_found_when_get_delete_info_returns_none(self): + """When the manifestation does not exist, `get_delete_info` returns no record + and `delete_manifestation` must raise DataNotFound.""" + with patch("neo4j_database.GraphDatabase") as mock_driver_cls: + mock_driver = MagicMock() + mock_driver_cls.driver.return_value = mock_driver + mock_session = MagicMock() + mock_driver.session.return_value.__enter__ = MagicMock(return_value=mock_session) + mock_driver.session.return_value.__exit__ = MagicMock(return_value=False) + + # The transaction function calls `tx.run(...).single()` which we mock to + # return None (no record), simulating a missing manifestation. + def execute_write(tx_func): + tx = MagicMock() + tx.run.return_value.single.return_value = None + return tx_func(tx) + + mock_session.execute_write.side_effect = execute_write + + db = Neo4JDatabase(neo4j_uri="bolt://x:7687", neo4j_auth=("neo4j", "p")) + with pytest.raises(DataNotFound, match="not found"): + db.delete_manifestation("missing-id") + + +class TestBuildAnnotationSummary: + """ + Unit tests for the `_build_annotation_summary` helper that converts raw + `get_delete_info` rows into the API `annotations` response shape. + """ + + def test_empty_input_returns_empty_list(self): + assert _build_annotation_summary([]) == [] + + def test_skips_rows_with_missing_required_fields(self): + rows = [ + {"annotation_id": None, "annotation_type": "segmentation", "partner_id": None}, + {"annotation_id": "A1", "annotation_type": None, "partner_id": None}, + ] + assert _build_annotation_summary(rows) == [] + + def test_groups_non_alignment_annotations_by_type(self): + rows = [ + {"annotation_id": "A1", "annotation_type": "segmentation", "partner_id": None}, + {"annotation_id": "A2", "annotation_type": "bibliography", "partner_id": None}, + {"annotation_id": "A3", "annotation_type": "segmentation", "partner_id": None}, + ] + result = _build_annotation_summary(rows) + assert {"segmentation": {"id": ["A1", "A3"]}} in result + assert {"bibliography": {"id": ["A2"]}} in result + assert len(result) == 2 + + def test_alignment_source_side_groups_from_and_to(self): + """An alignment annotation on m with outgoing ALIGNED_TO is the source side.""" + rows = [ + {"annotation_id": "A_src", "annotation_type": "alignment", "partner_id": "A_tgt"}, + ] + result = _build_annotation_summary(rows) + assert result == [ + {"alignment": {"aligned_from_id": ["A_src"], "aligned_to_id": ["A_tgt"]}} + ] + + def test_alignment_target_side_still_appears_in_aligned_from(self): + """An alignment annotation on m WITHOUT a partner (target side via incoming + ALIGNED_TO that the cypher does NOT capture) must still appear in + `aligned_from_id` so it isn't silently dropped from the response. + + This is the bug fix vs the original implementation, which dropped these + annotations entirely. + """ + rows = [ + {"annotation_id": "A_src", "annotation_type": "alignment", "partner_id": "A_tgt"}, + {"annotation_id": "A_target_side", "annotation_type": "alignment", "partner_id": None}, + ] + result = _build_annotation_summary(rows) + assert result == [ + { + "alignment": { + "aligned_from_id": ["A_src", "A_target_side"], + "aligned_to_id": ["A_tgt"], + } + } + ] + + def test_alignment_with_undirected_partner_match_dedupes(self): + """When the cypher matches ALIGNED_TO undirected, the same annotation row + can appear with both directions populated. Dedup should keep the response + clean.""" + rows = [ + {"annotation_id": "A1", "annotation_type": "alignment", "partner_id": "B1"}, + {"annotation_id": "A1", "annotation_type": "alignment", "partner_id": "B1"}, + {"annotation_id": "A1", "annotation_type": "alignment", "partner_id": "B2"}, + ] + result = _build_annotation_summary(rows) + assert result == [ + { + "alignment": { + "aligned_from_id": ["A1"], + "aligned_to_id": ["B1", "B2"], + } + } + ] + + def test_mixed_annotation_types_all_present(self): + rows = [ + {"annotation_id": "A1", "annotation_type": "segmentation", "partner_id": None}, + {"annotation_id": "A2", "annotation_type": "alignment", "partner_id": "B2"}, + {"annotation_id": "A3", "annotation_type": "bibliography", "partner_id": None}, + {"annotation_id": "A4", "annotation_type": "search_segmentation", "partner_id": None}, + ] + result = _build_annotation_summary(rows) + assert {"segmentation": {"id": ["A1"]}} in result + assert {"bibliography": {"id": ["A3"]}} in result + assert {"search_segmentation": {"id": ["A4"]}} in result + assert { + "alignment": {"aligned_from_id": ["A2"], "aligned_to_id": ["B2"]} + } in result + assert len(result) == 4 diff --git a/functions/tests/test_delete_text.py b/functions/tests/test_delete_text.py new file mode 100644 index 00000000..0b9f839f --- /dev/null +++ b/functions/tests/test_delete_text.py @@ -0,0 +1,385 @@ +# pylint: disable=redefined-outer-name +""" +Unit tests for `DELETE /v2/texts/{text_id}` and the underlying +`Neo4JDatabase.delete_expression` cascade. + +These tests are mock-based — they do NOT hit a real Neo4j instance. They cover: + - Happy path through the Flask route (response shape, status code, aggregated + counts, per-instance summaries, work_deleted flag, title pass-through). + - 404 propagation when the underlying DB raises `DataNotFound`. + - Storage cleanup runs once per cascaded manifestation, and storage failures + do not fail the request. + - The search-segmenter cleanup is called once with the union of every + manifestation's `segment_ids`. + - Texts with zero manifestations still produce a valid 200 response with + empty `instances` and zero counts. + - The DB-layer `delete_expression` raises `DataNotFound` when the expression + does not exist (without attempting any cascade). + - The DB-layer `delete_expression` performs graph deletes in a single write + transaction and skips `delete_orphan_work` when the Work is still referenced + by another Expression. +""" +import logging +from unittest.mock import MagicMock, patch + +import pytest +from exceptions import DataNotFound +from main import create_app +from neo4j_database import Neo4JDatabase +from neo4j_queries import Queries + +logger = logging.getLogger(__name__) + + +def _make_manifestation_result(instance_id: str, segment_ids: list[str]) -> dict: + """Helper to construct a minimal `delete_manifestation` return dict for mocking.""" + return { + "expression_id": "T12345678", + "annotations": [{"segmentation": {"id": [f"A_{instance_id}"]}}], + "segment_ids": segment_ids, + "deleted_counts": { + "manifestations": 1, + "annotations": 1, + "segments": len(segment_ids), + "references": 0, + }, + } + + +MOCK_TEXT_DELETE_RESULT = { + "title": {"en": "The Great Commentary", "bo": "འགྲེལ་པ་ཆེན་མོ།"}, + "manifestation_ids": ["I10000001", "I10000002"], + "manifestation_results": [ + _make_manifestation_result("I10000001", ["S1", "S2"]), + _make_manifestation_result("I10000002", ["S3"]), + ], + "work_deleted": True, + "deleted_counts": { + "expressions": 1, + "manifestations": 2, + "annotations": 2, + "segments": 3, + "references": 0, + "contributions": 3, + }, +} + + +@pytest.fixture +def client(): + app = create_app() + app.config["TESTING"] = True + return app.test_client() + + +class TestDeleteTextRoute: + """Route-level tests for `DELETE /v2/texts/{text_id}`.""" + + @patch("api.texts.Storage") + @patch("api.texts.Neo4JDatabase") + def test_returns_200_with_full_response_shape(self, mock_db_cls, mock_storage_cls, client): + mock_db_cls.return_value.delete_expression.return_value = MOCK_TEXT_DELETE_RESULT + + response = client.delete("/v2/texts/T12345678") + + assert response.status_code == 200 + body = response.get_json() + assert body["message"] == "Text deleted successfully" + assert body["text_id"] == "T12345678" + assert body["title"] == {"en": "The Great Commentary", "bo": "འགྲེལ་པ་ཆེན་མོ།"} + assert body["work_deleted"] is True + assert body["deleted_counts"] == MOCK_TEXT_DELETE_RESULT["deleted_counts"] + + # Per-instance summaries are present in the same order as the cascade. + assert body["instances"] == [ + { + "instance_id": "I10000001", + "annotations": [{"segmentation": {"id": ["A_I10000001"]}}], + }, + { + "instance_id": "I10000002", + "annotations": [{"segmentation": {"id": ["A_I10000002"]}}], + }, + ] + + mock_db_cls.return_value.delete_expression.assert_called_once_with( + expression_id="T12345678" + ) + + @patch("api.texts.Storage") + @patch("api.texts.Neo4JDatabase") + def test_storage_delete_called_once_per_manifestation( + self, mock_db_cls, mock_storage_cls, client + ): + mock_db_cls.return_value.delete_expression.return_value = MOCK_TEXT_DELETE_RESULT + + response = client.delete("/v2/texts/T12345678") + + assert response.status_code == 200 + storage = mock_storage_cls.return_value + assert storage.delete_base_text.call_count == 2 + storage.delete_base_text.assert_any_call( + expression_id="T12345678", manifestation_id="I10000001" + ) + storage.delete_base_text.assert_any_call( + expression_id="T12345678", manifestation_id="I10000002" + ) + + @patch("api.texts.Storage") + @patch("api.texts.Neo4JDatabase") + def test_storage_failures_do_not_fail_request( + self, mock_db_cls, mock_storage_cls, client + ): + mock_db_cls.return_value.delete_expression.return_value = MOCK_TEXT_DELETE_RESULT + mock_storage_cls.return_value.delete_base_text.side_effect = Exception("blob missing") + + response = client.delete("/v2/texts/T12345678") + + assert response.status_code == 200 + # Still attempted both manifestations even though both raised. + assert mock_storage_cls.return_value.delete_base_text.call_count == 2 + + @patch("api.texts.Storage") + @patch("api.texts.Neo4JDatabase") + def test_search_segmenter_called_once_with_union_of_segment_ids( + self, mock_db_cls, mock_storage_cls, client + ): + mock_db_cls.return_value.delete_expression.return_value = MOCK_TEXT_DELETE_RESULT + + # We patch via `api.instances` because that's where `_trigger_delete_search_segments` + # is defined; `api.texts` imports it from there at module load time, so we patch + # the location where it's looked up at call time (the `api.texts` namespace). + with patch("api.texts._trigger_delete_search_segments") as mock_trigger: + response = client.delete("/v2/texts/T12345678") + + assert response.status_code == 200 + mock_trigger.assert_called_once_with(["S1", "S2", "S3"]) + + @patch("api.texts.Storage") + @patch("api.texts.Neo4JDatabase") + def test_search_segmenter_not_called_when_no_segments( + self, mock_db_cls, mock_storage_cls, client + ): + result = { + **MOCK_TEXT_DELETE_RESULT, + "manifestation_ids": ["I_empty"], + "manifestation_results": [_make_manifestation_result("I_empty", [])], + } + mock_db_cls.return_value.delete_expression.return_value = result + + with patch("api.texts._trigger_delete_search_segments") as mock_trigger: + response = client.delete("/v2/texts/T_empty") + + assert response.status_code == 200 + mock_trigger.assert_not_called() + + @patch("api.texts.Storage") + @patch("api.texts.Neo4JDatabase") + def test_text_with_no_manifestations_still_returns_200( + self, mock_db_cls, mock_storage_cls, client + ): + result = { + "title": {"en": "Orphan text"}, + "manifestation_ids": [], + "manifestation_results": [], + "work_deleted": False, + "deleted_counts": { + "expressions": 1, + "manifestations": 0, + "annotations": 0, + "segments": 0, + "references": 0, + "contributions": 0, + }, + } + mock_db_cls.return_value.delete_expression.return_value = result + + response = client.delete("/v2/texts/T_no_manifestations") + + assert response.status_code == 200 + body = response.get_json() + assert body["instances"] == [] + assert body["work_deleted"] is False + # No storage calls when there are no manifestations. + mock_storage_cls.return_value.delete_base_text.assert_not_called() + + @patch("api.texts.Storage") + @patch("api.texts.Neo4JDatabase") + def test_text_with_null_title_returns_null_title( + self, mock_db_cls, mock_storage_cls, client + ): + result = { + **MOCK_TEXT_DELETE_RESULT, + "title": None, + "manifestation_ids": [], + "manifestation_results": [], + } + mock_db_cls.return_value.delete_expression.return_value = result + + response = client.delete("/v2/texts/T_no_title") + + assert response.status_code == 200 + assert response.get_json()["title"] is None + + @patch("api.texts.Storage") + @patch("api.texts.Neo4JDatabase") + def test_404_when_text_not_found(self, mock_db_cls, mock_storage_cls, client): + mock_db_cls.return_value.delete_expression.side_effect = DataNotFound( + "Text with ID 'missing' not found" + ) + + response = client.delete("/v2/texts/missing") + + assert response.status_code == 404 + assert response.get_json() == {"error": "Text with ID 'missing' not found"} + mock_storage_cls.return_value.delete_base_text.assert_not_called() + + +class TestDeleteExpressionDb: + """Tests for the `delete_expression` DB method (cypher-mocked).""" + + @staticmethod + def _make_db_with_write_tx(single_returns: list) -> tuple: + """ + Build a Neo4JDatabase whose session.execute_write runs the transaction + function against one mock tx. `tx.run(...).single()` iterates through + `single_returns` for the read portions inside that single write tx. + """ + with patch("neo4j_database.GraphDatabase") as mock_driver_cls: + mock_driver = MagicMock() + mock_driver_cls.driver.return_value = mock_driver + + session = MagicMock() + mock_driver.session.return_value.__enter__ = MagicMock(return_value=session) + mock_driver.session.return_value.__exit__ = MagicMock(return_value=False) + + tx = MagicMock() + tx.run.return_value.single.side_effect = single_returns + + def execute_write(tx_func): + return tx_func(tx) + + session.execute_write.side_effect = execute_write + + db = Neo4JDatabase(neo4j_uri="bolt://x:7687", neo4j_auth=("neo4j", "p")) + return db, session, tx + + def test_raises_data_not_found_when_expression_missing(self): + # The single write tx first runs `get_manifestation_ids_for_delete`; None means + # the Expression does not exist and the transaction function raises. + db, session, _tx = self._make_db_with_write_tx(single_returns=[None]) + + with pytest.raises(DataNotFound, match="Text with ID 'missing' not found"): + db.delete_expression("missing") + + session.execute_write.assert_called_once() + session.execute_read.assert_not_called() + + def test_does_not_run_per_manifestation_cascade_when_missing(self): + """Confirm no manifestation cascade starts when the expression doesn't exist.""" + db, _session, _tx = self._make_db_with_write_tx(single_returns=[None]) + + with patch.object(db, "_delete_manifestation_in_tx") as mock_delete_in_tx: + with pytest.raises(DataNotFound): + db.delete_expression("missing") + mock_delete_in_tx.assert_not_called() + + def test_aggregates_counts_across_manifestations(self): + """When the expression has manifestations, counts should be summed and + the contribution_count from the expression info should be added.""" + manifestation_ids_record = {"manifestation_ids": ["I1", "I2"]} + expression_info_record = { + "title": [{"language": "en", "text": "Hello"}], + "contribution_count": 4, + "work_id": "W1", + "work_is_orphan": False, + } + + db, session, tx = self._make_db_with_write_tx( + single_returns=[manifestation_ids_record, expression_info_record] + ) + + raw_manifestation_results = { + "I1": { + "expression_id": "T1", + "annotation_info": [], + "segment_ids": ["s1"], + "deleted_counts": { + "manifestations": 1, + "annotations": 2, + "segments": 10, + "references": 1, + }, + }, + "I2": { + "expression_id": "T1", + "annotation_info": [], + "segment_ids": ["s2", "s3"], + "deleted_counts": { + "manifestations": 1, + "annotations": 3, + "segments": 20, + "references": 2, + }, + }, + } + + def delete_manifestation_in_tx(tx_arg, manifestation_id): + assert tx_arg is tx + return raw_manifestation_results[manifestation_id] + + with patch.object(db, "delete_manifestation") as mock_public_dm: + with patch.object( + db, "_delete_manifestation_in_tx", side_effect=delete_manifestation_in_tx + ) as mock_delete_in_tx: + result = db.delete_expression("T1") + + session.execute_write.assert_called_once() + session.execute_read.assert_not_called() + mock_public_dm.assert_not_called() + assert mock_delete_in_tx.call_count == 2 + assert mock_delete_in_tx.call_args_list[0].kwargs["manifestation_id"] == "I1" + assert mock_delete_in_tx.call_args_list[1].kwargs["manifestation_id"] == "I2" + + assert result["manifestation_ids"] == ["I1", "I2"] + assert result["work_deleted"] is False + assert result["title"] == {"en": "Hello"} + assert result["deleted_counts"] == { + "expressions": 1, + "manifestations": 2, + "annotations": 5, + "segments": 30, + "references": 3, + "contributions": 4, + } + + def test_text_with_no_manifestations_skips_per_manifestation_cascade(self): + # Expression exists but has zero manifestations → list is empty (not None). + manifestation_ids_record = {"manifestation_ids": []} + expression_info_record = { + "title": [], + "contribution_count": 0, + "work_id": "W_empty", + "work_is_orphan": True, + } + + db, session, tx = self._make_db_with_write_tx( + single_returns=[manifestation_ids_record, expression_info_record] + ) + + with patch.object(db, "_delete_manifestation_in_tx") as mock_delete_in_tx: + result = db.delete_expression("T_empty") + mock_delete_in_tx.assert_not_called() + + session.execute_write.assert_called_once() + session.execute_read.assert_not_called() + assert result["manifestation_ids"] == [] + assert result["title"] is None # empty title list collapses to None + assert result["work_deleted"] is True + assert result["deleted_counts"]["expressions"] == 1 + assert result["deleted_counts"]["manifestations"] == 0 + + run_queries = [args[0] for args, _kwargs in tx.run.call_args_list] + assert run_queries.index(Queries.expressions["delete_node"]) < run_queries.index( + Queries.expressions["delete_orphan_work"] + )