"""Multi-user isolation: null-user cleanup, NOT NULL constraint, topic cleanup, quota reconciliation, and ix_topics_user_id index. Revision ID: 0003 Revises: 0002 Create Date: 2026-05-23 Changes (in order — see upgrade() for numbered sections): 1. Collect object_keys for documents WHERE user_id IS NULL (for MinIO cleanup) 2. Delete document_topics rows for null-user documents (cascade safety) 3. Delete documents WHERE user_id IS NULL (D-01, D-02) 4. Remove corresponding MinIO objects (synchronous SDK, wrapped in try/except; skipped if MINIO_ENDPOINT env var is not set — safe for SQLite test runs) 5. Delete all topics rows (D-10 — admin seeds system topics fresh post-Phase 3) 6. Alter documents.user_id to NOT NULL (requires batch_alter_table for SQLite compat) 7. Create ix_topics_user_id index (DOC-04, Finding 6) 8. Reconcile quotas.used_bytes from SUM(documents.size_bytes) per user (D-03) Note on MinIO step (T-03-01 mitigated): Object keys are collected before any DB DELETE. MinIO deletions happen outside the Alembic transaction — if MinIO is partially unreachable, leftover objects are orphaned but harmless (no DB row references them). Migration is run only after docker-compose health checks confirm MinIO is ready (T-03-02 accepted). Note on downgrade(): WARNING: deleted null-user document rows and their MinIO objects are NOT restored — Phase 3 cleanup is one-way per CONTEXT.md D-01. downgrade() only reverses schema-level changes (NOT NULL → nullable, drop index). Note on SQLite test compatibility (batch_alter_table): op.batch_alter_table is required for SQLite ALTER COLUMN support. It is a transparent pass-through on PostgreSQL — no behavioral difference. No GRANT statements needed: migration 0003 creates no new tables (Finding 1). """ from __future__ import annotations import os import sqlalchemy as sa from sqlalchemy import text from sqlalchemy.dialects import postgresql from alembic import op # revision identifiers, used by Alembic. revision = "0003" down_revision = "0002" branch_labels = None depends_on = None def upgrade() -> None: bind = op.get_bind() # ── 1. Collect null-user document object_keys for MinIO cleanup ─────────── result = bind.execute(text("SELECT id, object_key FROM documents WHERE user_id IS NULL")) null_user_objects = [(row[0], row[1]) for row in result] # ── 2. Delete document_topics for null-user documents (cascade safety) ──── op.execute( text( "DELETE FROM document_topics WHERE document_id IN " "(SELECT id FROM documents WHERE user_id IS NULL)" ) ) # ── 3. Delete documents WHERE user_id IS NULL ───────────────────────────── op.execute(text("DELETE FROM documents WHERE user_id IS NULL")) # ── 4. Remove MinIO objects for deleted null-user documents ─────────────── # Skipped if MINIO_ENDPOINT is not set (SQLite test compatibility — T-03-02). # Each remove_object is wrapped in try/except so a partial MinIO failure # cannot leave the DB + MinIO in a worse state than orphaned objects (T-03-01). if os.environ.get("MINIO_ENDPOINT"): from minio import Minio # deferred import — only needed when MinIO is configured bucket = os.environ.get("MINIO_BUCKET", "docuvault") client = Minio( os.environ.get("MINIO_ENDPOINT", "minio:9000"), access_key=os.environ.get("MINIO_ACCESS_KEY", ""), secret_key=os.environ.get("MINIO_SECRET_KEY", ""), secure=False, ) for _doc_id, object_key in null_user_objects: try: client.remove_object(bucket, object_key) except Exception: pass # object already gone or MinIO unreachable — orphan is harmless # ── 5. Delete all topics rows (D-10 — admin seeds system topics post-Phase 3) op.execute(text("DELETE FROM topics")) # ── 6. Alter documents.user_id to NOT NULL ──────────────────────────────── # batch_alter_table is required for SQLite ALTER COLUMN support (transparent # pass-through on PostgreSQL — no behavioral difference on production DB). with op.batch_alter_table("documents") as batch_op: batch_op.alter_column( "user_id", existing_type=postgresql.UUID(as_uuid=True), nullable=False, ) # ── 7. Create ix_topics_user_id index (DOC-04, Finding 6) ───────────────── op.create_index("ix_topics_user_id", "topics", ["user_id"]) # ── 8. Reconcile quotas.used_bytes from actual document data (D-03) ─────── op.execute( text( "UPDATE quotas SET used_bytes = (" " SELECT COALESCE(SUM(size_bytes), 0) FROM documents" " WHERE documents.user_id = quotas.user_id" ")" ) ) def downgrade() -> None: """Reverse schema-level changes only. WARNING: deleted null-user document rows and their MinIO objects are NOT restored — Phase 3 cleanup is one-way per CONTEXT.md D-01. """ # ── 1. Drop ix_topics_user_id index ────────────────────────────────────── op.drop_index("ix_topics_user_id", table_name="topics") # ── 2. Revert documents.user_id to nullable ─────────────────────────────── with op.batch_alter_table("documents") as batch_op: batch_op.alter_column( "user_id", existing_type=postgresql.UUID(as_uuid=True), nullable=True, )