Override drop-migrations with main and stashed work

rishikamtamneu · rishikamtamneu · commit 7287a495c4f1 · 2025-12-04T20:31:50.000-05:00
diff --git a/backend/app/routes/migration_routes.py b/backend/app/routes/migration_routes.py
@@ -1,9 +1,12 @@
+from collections import defaultdict
 from uuid import UUID
 
 from fastapi import APIRouter, Depends, HTTPException
+from supabase._async.client import AsyncClient
 
 from app.core.dependencies import get_current_admin
-from app.schemas.classification_schemas import Classification
+from app.core.supabase import get_async_supabase
+from app.schemas.classification_schemas import Classification, ExtractedFile
 from app.schemas.migration_schemas import Migration, MigrationCreate
 from app.schemas.relationship_schemas import Relationship
 from app.services.classification_service import (
@@ -18,7 +21,7 @@
     RelationshipService,
     get_relationship_service,
 )
-from app.utils.migrations import create_migrations
+from app.utils.migrations import _table_name_for_classification, create_migrations
 
 router = APIRouter(prefix="/migrations", tags=["Migrations"])
 
@@ -56,7 +59,6 @@ async def generate_migrations(
     Then insert the new migrations into the `migrations` table and return them.
     """
     try:
-        # 1) Load current state from DB
         classifications: list[
             Classification
         ] = await classification_service.get_classifications(tenant_id)
@@ -72,19 +74,15 @@ async def generate_migrations(
                 status_code=404, detail="No classifications found for tenant"
             )
 
-        # 2) Compute *new* migrations (pure function)
-        #    IMPORTANT: this should return list[MigrationCreate]
         new_migration_creates: list[MigrationCreate] = create_migrations(
             classifications=classifications,
             relationships=relationships,
             initial_migrations=existing_migrations,
         )
 
         if not new_migration_creates:
-            # Nothing new to add
             return []
 
-        # 3) Insert into DB and return the created migrations
         created: list[Migration] = []
         for m in new_migration_creates:
             new_id = await migration_service.create_migration(m)
@@ -122,6 +120,78 @@ async def execute_migrations(
         raise HTTPException(status_code=500, detail=str(e)) from e
 
 
+@router.post("/load_data/{tenant_id}")
+async def load_data_for_tenant(
+    tenant_id: UUID,
+    classification_service: ClassificationService = Depends(get_classification_service),
+    supabase: AsyncClient = Depends(get_async_supabase),
+    admin=Depends(get_current_admin),
+) -> dict:
+    """
+    Full data sync for a tenant:
+
+    - Fetch all extracted files + their classifications
+    - Group by classification
+    - For each classification:
+        * derive table name (same as migrations)
+        * DELETE existing rows for that tenant
+        * INSERT rows for each file in that classification
+    """
+    try:
+        extracted_files: list[
+            ExtractedFile
+        ] = await classification_service.get_extracted_files(tenant_id)
+
+        if not extracted_files:
+            return {
+                "status": "ok",
+                "tables_updated": [],
+                "message": "No extracted files found",
+            }
+
+        files_by_class_id: dict[UUID, list[ExtractedFile]] = defaultdict(list)
+
+        for ef in extracted_files:
+            if ef.classification is None:
+                continue
+            files_by_class_id[ef.classification.classification_id].append(ef)
+
+        updated_tables: list[str] = []
+
+        for class_files in files_by_class_id.values():
+            classification = class_files[0].classification
+            table_name = _table_name_for_classification(classification)
+
+            await (
+                supabase.table(table_name)
+                .delete()
+                .eq("tenant_id", str(tenant_id))
+                .execute()
+            )
+
+            rows = [
+                {
+                    "id": str(f.extracted_file_id),
+                    "tenant_id": str(tenant_id),
+                    "data": f.extracted_data,
+                }
+                for f in class_files
+            ]
+
+            if rows:
+                await supabase.table(table_name).insert(rows).execute()
+
+            updated_tables.append(table_name)
+
+        return {
+            "status": "ok",
+            "tables_updated": updated_tables,
+            "message": "Data synced from extracted_files into generated tables",
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e)) from e
+
+
 @router.get("/connection-url/{tenant_id}")
 async def get_tenant_connection_url(
     tenant_id: UUID,
@@ -130,15 +200,6 @@ async def get_tenant_connection_url(
 ) -> dict:
     """
     Get a PostgreSQL connection URL for a specific tenant.
-
-    This URL is scoped to only show the tenant's generated tables.
-
-    Query params:
-        include_public: If true, also include public schema (for shared tables)
-
-    Example:
-        GET /migrations/connection-url/{tenant_id}
-        GET /migrations/connection-url/{tenant_id}?include_public=true
     """
     from app.utils.tenant_connection import get_schema_name, get_tenant_connection_url
 
diff --git a/backend/app/utils/migrations.py b/backend/app/utils/migrations.py
@@ -21,6 +21,31 @@ def _get_schema_name(tenant_id) -> str:
     return f"tenant_{str(tenant_id).replace('-', '_')}"
 
 
+def _get_created_tables(migrations: list[Migration]) -> set[str]:
+    """
+    Get all table names that have been created by migrations.
+    Returns: set of table names
+    """
+    created_tables = set()
+    for m in migrations:
+        if m.name.startswith("create_table_"):
+            table_name = m.name.replace("create_table_", "")
+            created_tables.add(table_name)
+    return created_tables
+
+
+def _get_dropped_tables(migrations: list[Migration]) -> set[str]:
+    """
+    Get table names that have already been dropped.
+    """
+    dropped = set()
+    for m in migrations:
+        if m.name.startswith("drop_table_"):
+            table_name = m.name.replace("drop_table_", "")
+            dropped.add(table_name)
+    return dropped
+
+
 def create_migrations(
     classifications: list[Classification],
     relationships: list[Relationship],
@@ -30,16 +55,20 @@ def create_migrations(
     PURE FUNCTION.
 
     Given:
-      - classifications: what tables we conceptually want
+      - classifications: what tables we conceptually want NOW
       - relationships: how those tables relate (1-1, 1-many, many-many)
       - initial_migrations: migrations that already exist in DB
 
     Returns:
       - list[MigrationCreate] = new migrations to append on top
 
-    NOW WITH SCHEMA-PER-TENANT:
-      - First migration creates the tenant schema
-      - All tables are created within that schema
+    This function handles:
+      1. CREATE SCHEMA for the tenant
+      2. CREATE TABLE for new classifications
+      3. DROP TABLE for removed classifications
+      4. Relationship migrations
+
+    All SQL is schema-qualified for tenant isolation.
     """
     if not classifications:
         return []
@@ -52,11 +81,16 @@ def create_migrations(
 
     new_migrations: list[MigrationCreate] = []
 
-    # All classifications belong to the same tenant
-    tenant_id = classifications[0].tenant_id
-    schema_name = _get_schema_name(tenant_id)
+    # Get tenant info and schema name
+    tenant_id = classifications[0].tenant_id if classifications else None
+    if not tenant_id:
+        # If no classifications exist, try to get tenant_id from migrations
+        if initial_migrations:
+            tenant_id = initial_migrations[0].tenant_id
 
-    # ===== STEP 1: CREATE SCHEMA =====
+    schema_name = _get_schema_name(tenant_id) if tenant_id else "public"
+
+    # ===== STEP 0: CREATE SCHEMA =====
     schema_migration_name = f"create_schema_{schema_name}"
 
     if schema_migration_name not in existing_names:
@@ -71,7 +105,45 @@ def create_migrations(
         existing_names.add(schema_migration_name)
         next_seq += 1
 
+    # ===== STEP 1: Handle DROP migrations for removed classifications =====
+    # Get current state of tables from migrations
+    created_tables = _get_created_tables(initial_migrations)
+    dropped_tables = _get_dropped_tables(initial_migrations)
+    active_tables = created_tables - dropped_tables
+
+    # Build current classification table names
+    current_classification_tables = {
+        _table_name_for_classification(c) for c in classifications
+    }
+
+    # Tables that were created but no longer in classifications = should be dropped
+    tables_to_drop = active_tables - current_classification_tables
+
+    for table_name in sorted(tables_to_drop):
+        # Remove schema prefix if present (helper functions might include it)
+        clean_table_name = table_name.split('.')[-1] if '.' in table_name else table_name
+        mig_name = f"drop_table_{schema_name}_{clean_table_name}"
+
+        if mig_name in existing_names:
+            continue
+
+        # Schema-qualified DROP with CASCADE
+        sql = f"DROP TABLE IF EXISTS {schema_name}.{clean_table_name} CASCADE;"
+
+        if tenant_id:
+            new_migrations.append(
+                MigrationCreate(
+                    tenant_id=tenant_id,
+                    name=mig_name,
+                    sql=sql,
+                    sequence=next_seq,
+                )
+            )
+            existing_names.add(mig_name)
+            next_seq += 1
+
     # ===== STEP 2: CREATE TABLES (in tenant schema) =====
+
     for c in classifications:
         table_name = _table_name_for_classification(c)
         qualified_table_name = f"{schema_name}.{table_name}"
@@ -80,14 +152,15 @@ def create_migrations(
         if mig_name in existing_names:
             continue
 
+        # Schema-qualified CREATE
         sql = f"""
-        CREATE TABLE IF NOT EXISTS {qualified_table_name} (
-        id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-        tenant_id UUID NOT NULL,
-        data JSONB NOT NULL,
-        created_at TIMESTAMPTZ DEFAULT NOW()
+CREATE TABLE IF NOT EXISTS {qualified_table_name} (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    tenant_id UUID NOT NULL,
+    data JSONB NOT NULL,
+    created_at TIMESTAMPTZ DEFAULT NOW()
 );
-        """.strip()
+""".strip()
 
         new_migrations.append(
             MigrationCreate(
@@ -105,54 +178,65 @@ def create_migrations(
         from_table = _table_name_for_classification(rel.from_classification)
         to_table = _table_name_for_classification(rel.to_classification)
 
+        # Skip relationships where either table doesn't exist anymore
+        if (
+            from_table not in current_classification_tables
+            or to_table not in current_classification_tables
+        ):
+            continue
+        
         qualified_from = f"{schema_name}.{from_table}"
         qualified_to = f"{schema_name}.{to_table}"
 
         # Support both Enum and plain string for rel.type
-        rel_type = getattr(rel.type, "value", rel.type)
+        raw_type = getattr(rel.type, "value", rel.type)
+        rel_type_norm = str(raw_type).upper().replace("-", "_")
 
-        mig_name = f"rel_{rel_type.lower()}_{schema_name}_{from_table}_{to_table}"
+        mig_name = f"rel_{rel_type_norm.lower()}_{schema_name}_{from_table}_{to_table}"
 
         if mig_name in existing_names:
             continue
 
-        if rel_type == "ONE_TO_MANY":
+        if rel_type_norm == "ONE_TO_MANY":
+            # Schema-qualified ALTER TABLE for one-to-many
             sql = f"""
-        ALTER TABLE {qualified_from}
-        ADD COLUMN IF NOT EXISTS {to_table}_id UUID,
-        ADD CONSTRAINT fk_{schema_name}_{from_table}_{to_table}
-            FOREIGN KEY ({to_table}_id)
-            REFERENCES {qualified_to}(id);
-                    """.strip()
-
-        elif rel_type == "ONE_TO_ONE":
+ALTER TABLE {qualified_from}
+ADD COLUMN IF NOT EXISTS {to_table}_id UUID,
+ADD CONSTRAINT fk_{schema_name}_{from_table}_{to_table}
+    FOREIGN KEY ({to_table}_id)
+    REFERENCES {qualified_to}(id);
+""".strip()
+
+        elif rel_type_norm == "ONE_TO_ONE":
+            # Schema-qualified ALTER TABLE for one-to-one
             sql = f"""
-                ALTER TABLE {qualified_from}
-                ADD COLUMN IF NOT EXISTS {to_table}_id UUID UNIQUE,
-                ADD CONSTRAINT fk_{schema_name}_{from_table}_{to_table}
-                    FOREIGN KEY ({to_table}_id)
-                    REFERENCES {qualified_to}(id);
-                            """.strip()
-
-        elif rel_type == "MANY_TO_MANY":
+ALTER TABLE {qualified_from}
+ADD COLUMN IF NOT EXISTS {to_table}_id UUID UNIQUE,
+ADD CONSTRAINT fk_{schema_name}_{from_table}_{to_table}
+    FOREIGN KEY ({to_table}_id)
+    REFERENCES {qualified_to}(id);
+""".strip()
+
+        elif rel_type_norm == "MANY_TO_MANY":
+            # Schema-qualified CREATE TABLE for join table
             join_table = f"{from_table}_{to_table}_join"
             qualified_join = f"{schema_name}.{join_table}"
 
             sql = f"""
-            CREATE TABLE IF NOT EXISTS {qualified_join} (
-                id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-                {from_table}_id UUID NOT NULL,
-                {to_table}_id UUID NOT NULL,
-                CONSTRAINT fk_{schema_name}_{join_table}_{from_table}
-                    FOREIGN KEY ({from_table}_id)
-                    REFERENCES {qualified_from}(id),
-                CONSTRAINT fk_{schema_name}_{join_table}_{to_table}
-                    FOREIGN KEY ({to_table}_id)
-                    REFERENCES {qualified_to}(id),
-                CONSTRAINT uniq_{schema_name}_{join_table}
-                    UNIQUE ({from_table}_id, {to_table}_id)
-            );
-            """.strip()
+CREATE TABLE IF NOT EXISTS {qualified_join} (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    {from_table}_id UUID NOT NULL,
+    {to_table}_id UUID NOT NULL,
+    CONSTRAINT fk_{schema_name}_{join_table}_{from_table}
+        FOREIGN KEY ({from_table}_id)
+        REFERENCES {qualified_from}(id),
+    CONSTRAINT fk_{schema_name}_{join_table}_{to_table}
+        FOREIGN KEY ({to_table}_id)
+        REFERENCES {qualified_to}(id),
+    CONSTRAINT uniq_{schema_name}_{join_table}
+        UNIQUE ({from_table}_id, {to_table}_id)
+);
+""".strip()
         else:
             sql = f"-- TODO: implement SQL for relationship {mig_name}"
 
@@ -167,4 +251,4 @@ def create_migrations(
         existing_names.add(mig_name)
         next_seq += 1
 
-    return new_migrations
+    return new_migrations
diff --git a/package-lock.json b/package-lock.json