feat(phase-4): Alembic migration 0004 (pdf_open_mode, GIN FTS index, audit-logs bucket) + MinIOBackend.put_object_raw()

- Add users.pdf_open_mode column via batch_alter_table (server_default='in_app')
- Create GIN expression index ix_documents_fts on documents.extracted_text via raw SQL (Alembic #1390)
- Create audit-logs MinIO bucket gated on MINIO_ENDPOINT env var
- Add MinIOBackend.put_object_raw() for caller-supplied bucket+key uploads (audit CSV export)
This commit is contained in:
curo1305
2026-05-25 18:30:28 +02:00
parent e5423c7916
commit b6bab5a230
2 changed files with 107 additions and 0 deletions
@@ -0,0 +1,85 @@
"""Phase 4 schema additions: pdf_open_mode column, GIN FTS index, audit-logs bucket.
Revision ID: 0004
Revises: 0003
Create Date: 2026-05-25
Changes (in order — see upgrade() for numbered sections):
1. Add users.pdf_open_mode column (server_default='in_app')
2. Create GIN expression index ix_documents_fts on documents.extracted_text
3. Create audit-logs MinIO bucket (gated on MINIO_ENDPOINT env var)
Note on GIN index (T-04-02-01 mitigated):
Index is created via raw SQL op.execute() rather than Alembic Index() to prevent
Alembic's autogenerate from attempting to recreate the expression index on every
`alembic revision --autogenerate` run (Alembic issue #1390). A comment marks it
as manually managed.
Note on MinIO bucket creation (T-04-02-02 mitigated):
Bucket creation is gated on MINIO_ENDPOINT env var so SQLite test runs are
unaffected. Credentials come exclusively from env vars — none are hardcoded.
MinIO default bucket policy is private; no public-access policy is set.
Note on downgrade():
The GIN index and pdf_open_mode column are reversed. The audit-logs MinIO bucket
is NOT deleted — it may contain audit data.
"""
from __future__ import annotations
import os
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision = "0004"
down_revision = "0003"
branch_labels = None
depends_on = None
def upgrade() -> None:
# ── 1. Add users.pdf_open_mode column ────────────────────────────────────
# batch_alter_table is required for SQLite ALTER TABLE support (transparent
# pass-through on PostgreSQL — no behavioral difference on production DB).
with op.batch_alter_table("users") as batch_op:
batch_op.add_column(
sa.Column(
"pdf_open_mode",
sa.String(),
nullable=False,
server_default="in_app",
)
)
# ── 2. Create GIN expression index on documents.extracted_text ───────────
# managed manually — do not autogenerate (Alembic issue #1390)
op.execute(
"CREATE INDEX ix_documents_fts ON documents "
"USING GIN (to_tsvector('english', coalesce(extracted_text, '')))"
)
# ── 3. Create audit-logs MinIO bucket ────────────────────────────────────
# Skipped if MINIO_ENDPOINT is not set (SQLite test compatibility — T-04-02-02).
if os.environ.get("MINIO_ENDPOINT"):
from minio import Minio # deferred import — only needed when MinIO is configured
client = Minio(
os.environ.get("MINIO_ENDPOINT", "minio:9000"),
access_key=os.environ.get("MINIO_ACCESS_KEY", ""),
secret_key=os.environ.get("MINIO_SECRET_KEY", ""),
secure=False,
)
if not client.bucket_exists("audit-logs"):
client.make_bucket("audit-logs")
def downgrade() -> None:
# ── 1. Drop GIN expression index ─────────────────────────────────────────
op.execute("DROP INDEX IF EXISTS ix_documents_fts")
# ── 2. Drop users.pdf_open_mode column ───────────────────────────────────
with op.batch_alter_table("users") as batch_op:
batch_op.drop_column("pdf_open_mode")
# MinIO bucket NOT reversed — bucket may contain audit data
+22
View File
@@ -85,6 +85,28 @@ class MinIOBackend(StorageBackend):
)
return object_key
async def put_object_raw(
self,
bucket: str,
key: str,
data: io.BytesIO,
length: int,
content_type: str,
) -> None:
"""Upload bytes to an arbitrary bucket+key (used for audit-logs CSV export).
Unlike put_object(), does NOT apply the document key schema — the caller
supplies the complete key. The main documents bucket is NOT used.
"""
await asyncio.to_thread(
self._client.put_object,
bucket,
key,
data,
length=length,
content_type=content_type,
)
async def get_object(self, object_key: str) -> bytes:
"""Fetch object bytes from MinIO by key."""