diff --git a/backend/migrations/versions/0003_multi_user_isolation.py b/backend/migrations/versions/0003_multi_user_isolation.py new file mode 100644 index 0000000..0ef1dad --- /dev/null +++ b/backend/migrations/versions/0003_multi_user_isolation.py @@ -0,0 +1,132 @@ +"""Multi-user isolation: null-user cleanup, NOT NULL constraint, topic cleanup, +quota reconciliation, and ix_topics_user_id index. + +Revision ID: 0003 +Revises: 0002 +Create Date: 2026-05-23 + +Changes (in order — see upgrade() for numbered sections): + 1. Collect object_keys for documents WHERE user_id IS NULL (for MinIO cleanup) + 2. Delete document_topics rows for null-user documents (cascade safety) + 3. Delete documents WHERE user_id IS NULL (D-01, D-02) + 4. Remove corresponding MinIO objects (synchronous SDK, wrapped in try/except; + skipped if MINIO_ENDPOINT env var is not set — safe for SQLite test runs) + 5. Delete all topics rows (D-10 — admin seeds system topics fresh post-Phase 3) + 6. Alter documents.user_id to NOT NULL (requires batch_alter_table for SQLite compat) + 7. Create ix_topics_user_id index (DOC-04, Finding 6) + 8. Reconcile quotas.used_bytes from SUM(documents.size_bytes) per user (D-03) + +Note on MinIO step (T-03-01 mitigated): + Object keys are collected before any DB DELETE. MinIO deletions happen outside + the Alembic transaction — if MinIO is partially unreachable, leftover objects + are orphaned but harmless (no DB row references them). Migration is run only + after docker-compose health checks confirm MinIO is ready (T-03-02 accepted). + +Note on downgrade(): + WARNING: deleted null-user document rows and their MinIO objects are NOT + restored — Phase 3 cleanup is one-way per CONTEXT.md D-01. downgrade() only + reverses schema-level changes (NOT NULL → nullable, drop index). + +Note on SQLite test compatibility (batch_alter_table): + op.batch_alter_table is required for SQLite ALTER COLUMN support. It is a + transparent pass-through on PostgreSQL — no behavioral difference. + +No GRANT statements needed: migration 0003 creates no new tables (Finding 1). +""" +from __future__ import annotations + +import os + +import sqlalchemy as sa +from sqlalchemy import text +from sqlalchemy.dialects import postgresql +from alembic import op + +# revision identifiers, used by Alembic. +revision = "0003" +down_revision = "0002" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + bind = op.get_bind() + + # ── 1. Collect null-user document object_keys for MinIO cleanup ─────────── + result = bind.execute(text("SELECT id, object_key FROM documents WHERE user_id IS NULL")) + null_user_objects = [(row[0], row[1]) for row in result] + + # ── 2. Delete document_topics for null-user documents (cascade safety) ──── + op.execute( + text( + "DELETE FROM document_topics WHERE document_id IN " + "(SELECT id FROM documents WHERE user_id IS NULL)" + ) + ) + + # ── 3. Delete documents WHERE user_id IS NULL ───────────────────────────── + op.execute(text("DELETE FROM documents WHERE user_id IS NULL")) + + # ── 4. Remove MinIO objects for deleted null-user documents ─────────────── + # Skipped if MINIO_ENDPOINT is not set (SQLite test compatibility — T-03-02). + # Each remove_object is wrapped in try/except so a partial MinIO failure + # cannot leave the DB + MinIO in a worse state than orphaned objects (T-03-01). + if os.environ.get("MINIO_ENDPOINT"): + from minio import Minio # deferred import — only needed when MinIO is configured + + bucket = os.environ.get("MINIO_BUCKET", "docuvault") + client = Minio( + os.environ.get("MINIO_ENDPOINT", "minio:9000"), + access_key=os.environ.get("MINIO_ACCESS_KEY", ""), + secret_key=os.environ.get("MINIO_SECRET_KEY", ""), + secure=False, + ) + for _doc_id, object_key in null_user_objects: + try: + client.remove_object(bucket, object_key) + except Exception: + pass # object already gone or MinIO unreachable — orphan is harmless + + # ── 5. Delete all topics rows (D-10 — admin seeds system topics post-Phase 3) + op.execute(text("DELETE FROM topics")) + + # ── 6. Alter documents.user_id to NOT NULL ──────────────────────────────── + # batch_alter_table is required for SQLite ALTER COLUMN support (transparent + # pass-through on PostgreSQL — no behavioral difference on production DB). + with op.batch_alter_table("documents") as batch_op: + batch_op.alter_column( + "user_id", + existing_type=postgresql.UUID(as_uuid=True), + nullable=False, + ) + + # ── 7. Create ix_topics_user_id index (DOC-04, Finding 6) ───────────────── + op.create_index("ix_topics_user_id", "topics", ["user_id"]) + + # ── 8. Reconcile quotas.used_bytes from actual document data (D-03) ─────── + op.execute( + text( + "UPDATE quotas SET used_bytes = (" + " SELECT COALESCE(SUM(size_bytes), 0) FROM documents" + " WHERE documents.user_id = quotas.user_id" + ")" + ) + ) + + +def downgrade() -> None: + """Reverse schema-level changes only. + + WARNING: deleted null-user document rows and their MinIO objects are NOT + restored — Phase 3 cleanup is one-way per CONTEXT.md D-01. + """ + # ── 1. Drop ix_topics_user_id index ────────────────────────────────────── + op.drop_index("ix_topics_user_id", table_name="topics") + + # ── 2. Revert documents.user_id to nullable ─────────────────────────────── + with op.batch_alter_table("documents") as batch_op: + batch_op.alter_column( + "user_id", + existing_type=postgresql.UUID(as_uuid=True), + nullable=True, + )