feat(phase-4): Alembic migration 0004 (pdf_open_mode, GIN FTS index, audit-logs bucket) + MinIOBackend.put_object_raw()
- Add users.pdf_open_mode column via batch_alter_table (server_default='in_app') - Create GIN expression index ix_documents_fts on documents.extracted_text via raw SQL (Alembic #1390) - Create audit-logs MinIO bucket gated on MINIO_ENDPOINT env var - Add MinIOBackend.put_object_raw() for caller-supplied bucket+key uploads (audit CSV export)
This commit is contained in:
@@ -0,0 +1,85 @@
|
|||||||
|
"""Phase 4 schema additions: pdf_open_mode column, GIN FTS index, audit-logs bucket.
|
||||||
|
|
||||||
|
Revision ID: 0004
|
||||||
|
Revises: 0003
|
||||||
|
Create Date: 2026-05-25
|
||||||
|
|
||||||
|
Changes (in order — see upgrade() for numbered sections):
|
||||||
|
1. Add users.pdf_open_mode column (server_default='in_app')
|
||||||
|
2. Create GIN expression index ix_documents_fts on documents.extracted_text
|
||||||
|
3. Create audit-logs MinIO bucket (gated on MINIO_ENDPOINT env var)
|
||||||
|
|
||||||
|
Note on GIN index (T-04-02-01 mitigated):
|
||||||
|
Index is created via raw SQL op.execute() rather than Alembic Index() to prevent
|
||||||
|
Alembic's autogenerate from attempting to recreate the expression index on every
|
||||||
|
`alembic revision --autogenerate` run (Alembic issue #1390). A comment marks it
|
||||||
|
as manually managed.
|
||||||
|
|
||||||
|
Note on MinIO bucket creation (T-04-02-02 mitigated):
|
||||||
|
Bucket creation is gated on MINIO_ENDPOINT env var so SQLite test runs are
|
||||||
|
unaffected. Credentials come exclusively from env vars — none are hardcoded.
|
||||||
|
MinIO default bucket policy is private; no public-access policy is set.
|
||||||
|
|
||||||
|
Note on downgrade():
|
||||||
|
The GIN index and pdf_open_mode column are reversed. The audit-logs MinIO bucket
|
||||||
|
is NOT deleted — it may contain audit data.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = "0004"
|
||||||
|
down_revision = "0003"
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
# ── 1. Add users.pdf_open_mode column ────────────────────────────────────
|
||||||
|
# batch_alter_table is required for SQLite ALTER TABLE support (transparent
|
||||||
|
# pass-through on PostgreSQL — no behavioral difference on production DB).
|
||||||
|
with op.batch_alter_table("users") as batch_op:
|
||||||
|
batch_op.add_column(
|
||||||
|
sa.Column(
|
||||||
|
"pdf_open_mode",
|
||||||
|
sa.String(),
|
||||||
|
nullable=False,
|
||||||
|
server_default="in_app",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── 2. Create GIN expression index on documents.extracted_text ───────────
|
||||||
|
# managed manually — do not autogenerate (Alembic issue #1390)
|
||||||
|
op.execute(
|
||||||
|
"CREATE INDEX ix_documents_fts ON documents "
|
||||||
|
"USING GIN (to_tsvector('english', coalesce(extracted_text, '')))"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── 3. Create audit-logs MinIO bucket ────────────────────────────────────
|
||||||
|
# Skipped if MINIO_ENDPOINT is not set (SQLite test compatibility — T-04-02-02).
|
||||||
|
if os.environ.get("MINIO_ENDPOINT"):
|
||||||
|
from minio import Minio # deferred import — only needed when MinIO is configured
|
||||||
|
|
||||||
|
client = Minio(
|
||||||
|
os.environ.get("MINIO_ENDPOINT", "minio:9000"),
|
||||||
|
access_key=os.environ.get("MINIO_ACCESS_KEY", ""),
|
||||||
|
secret_key=os.environ.get("MINIO_SECRET_KEY", ""),
|
||||||
|
secure=False,
|
||||||
|
)
|
||||||
|
if not client.bucket_exists("audit-logs"):
|
||||||
|
client.make_bucket("audit-logs")
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
# ── 1. Drop GIN expression index ─────────────────────────────────────────
|
||||||
|
op.execute("DROP INDEX IF EXISTS ix_documents_fts")
|
||||||
|
|
||||||
|
# ── 2. Drop users.pdf_open_mode column ───────────────────────────────────
|
||||||
|
with op.batch_alter_table("users") as batch_op:
|
||||||
|
batch_op.drop_column("pdf_open_mode")
|
||||||
|
|
||||||
|
# MinIO bucket NOT reversed — bucket may contain audit data
|
||||||
@@ -85,6 +85,28 @@ class MinIOBackend(StorageBackend):
|
|||||||
)
|
)
|
||||||
return object_key
|
return object_key
|
||||||
|
|
||||||
|
async def put_object_raw(
|
||||||
|
self,
|
||||||
|
bucket: str,
|
||||||
|
key: str,
|
||||||
|
data: io.BytesIO,
|
||||||
|
length: int,
|
||||||
|
content_type: str,
|
||||||
|
) -> None:
|
||||||
|
"""Upload bytes to an arbitrary bucket+key (used for audit-logs CSV export).
|
||||||
|
|
||||||
|
Unlike put_object(), does NOT apply the document key schema — the caller
|
||||||
|
supplies the complete key. The main documents bucket is NOT used.
|
||||||
|
"""
|
||||||
|
await asyncio.to_thread(
|
||||||
|
self._client.put_object,
|
||||||
|
bucket,
|
||||||
|
key,
|
||||||
|
data,
|
||||||
|
length=length,
|
||||||
|
content_type=content_type,
|
||||||
|
)
|
||||||
|
|
||||||
async def get_object(self, object_key: str) -> bytes:
|
async def get_object(self, object_key: str) -> bytes:
|
||||||
"""Fetch object bytes from MinIO by key."""
|
"""Fetch object bytes from MinIO by key."""
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user