feat(phase-4): Alembic migration 0004 (pdf_open_mode, GIN FTS index, audit-logs bucket) + MinIOBackend.put_object_raw()
- Add users.pdf_open_mode column via batch_alter_table (server_default='in_app') - Create GIN expression index ix_documents_fts on documents.extracted_text via raw SQL (Alembic #1390) - Create audit-logs MinIO bucket gated on MINIO_ENDPOINT env var - Add MinIOBackend.put_object_raw() for caller-supplied bucket+key uploads (audit CSV export)
This commit is contained in:
@@ -0,0 +1,85 @@
|
||||
"""Phase 4 schema additions: pdf_open_mode column, GIN FTS index, audit-logs bucket.
|
||||
|
||||
Revision ID: 0004
|
||||
Revises: 0003
|
||||
Create Date: 2026-05-25
|
||||
|
||||
Changes (in order — see upgrade() for numbered sections):
|
||||
1. Add users.pdf_open_mode column (server_default='in_app')
|
||||
2. Create GIN expression index ix_documents_fts on documents.extracted_text
|
||||
3. Create audit-logs MinIO bucket (gated on MINIO_ENDPOINT env var)
|
||||
|
||||
Note on GIN index (T-04-02-01 mitigated):
|
||||
Index is created via raw SQL op.execute() rather than Alembic Index() to prevent
|
||||
Alembic's autogenerate from attempting to recreate the expression index on every
|
||||
`alembic revision --autogenerate` run (Alembic issue #1390). A comment marks it
|
||||
as manually managed.
|
||||
|
||||
Note on MinIO bucket creation (T-04-02-02 mitigated):
|
||||
Bucket creation is gated on MINIO_ENDPOINT env var so SQLite test runs are
|
||||
unaffected. Credentials come exclusively from env vars — none are hardcoded.
|
||||
MinIO default bucket policy is private; no public-access policy is set.
|
||||
|
||||
Note on downgrade():
|
||||
The GIN index and pdf_open_mode column are reversed. The audit-logs MinIO bucket
|
||||
is NOT deleted — it may contain audit data.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "0004"
|
||||
down_revision = "0003"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# ── 1. Add users.pdf_open_mode column ────────────────────────────────────
|
||||
# batch_alter_table is required for SQLite ALTER TABLE support (transparent
|
||||
# pass-through on PostgreSQL — no behavioral difference on production DB).
|
||||
with op.batch_alter_table("users") as batch_op:
|
||||
batch_op.add_column(
|
||||
sa.Column(
|
||||
"pdf_open_mode",
|
||||
sa.String(),
|
||||
nullable=False,
|
||||
server_default="in_app",
|
||||
)
|
||||
)
|
||||
|
||||
# ── 2. Create GIN expression index on documents.extracted_text ───────────
|
||||
# managed manually — do not autogenerate (Alembic issue #1390)
|
||||
op.execute(
|
||||
"CREATE INDEX ix_documents_fts ON documents "
|
||||
"USING GIN (to_tsvector('english', coalesce(extracted_text, '')))"
|
||||
)
|
||||
|
||||
# ── 3. Create audit-logs MinIO bucket ────────────────────────────────────
|
||||
# Skipped if MINIO_ENDPOINT is not set (SQLite test compatibility — T-04-02-02).
|
||||
if os.environ.get("MINIO_ENDPOINT"):
|
||||
from minio import Minio # deferred import — only needed when MinIO is configured
|
||||
|
||||
client = Minio(
|
||||
os.environ.get("MINIO_ENDPOINT", "minio:9000"),
|
||||
access_key=os.environ.get("MINIO_ACCESS_KEY", ""),
|
||||
secret_key=os.environ.get("MINIO_SECRET_KEY", ""),
|
||||
secure=False,
|
||||
)
|
||||
if not client.bucket_exists("audit-logs"):
|
||||
client.make_bucket("audit-logs")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# ── 1. Drop GIN expression index ─────────────────────────────────────────
|
||||
op.execute("DROP INDEX IF EXISTS ix_documents_fts")
|
||||
|
||||
# ── 2. Drop users.pdf_open_mode column ───────────────────────────────────
|
||||
with op.batch_alter_table("users") as batch_op:
|
||||
batch_op.drop_column("pdf_open_mode")
|
||||
|
||||
# MinIO bucket NOT reversed — bucket may contain audit data
|
||||
@@ -85,6 +85,28 @@ class MinIOBackend(StorageBackend):
|
||||
)
|
||||
return object_key
|
||||
|
||||
async def put_object_raw(
|
||||
self,
|
||||
bucket: str,
|
||||
key: str,
|
||||
data: io.BytesIO,
|
||||
length: int,
|
||||
content_type: str,
|
||||
) -> None:
|
||||
"""Upload bytes to an arbitrary bucket+key (used for audit-logs CSV export).
|
||||
|
||||
Unlike put_object(), does NOT apply the document key schema — the caller
|
||||
supplies the complete key. The main documents bucket is NOT used.
|
||||
"""
|
||||
await asyncio.to_thread(
|
||||
self._client.put_object,
|
||||
bucket,
|
||||
key,
|
||||
data,
|
||||
length=length,
|
||||
content_type=content_type,
|
||||
)
|
||||
|
||||
async def get_object(self, object_key: str) -> bytes:
|
||||
"""Fetch object bytes from MinIO by key."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user