feat: migrate doc-service to use storage-service for file I/O (Phase 2)
- storage.py: replace aiofiles filesystem ops with httpx calls to
storage-service PUT/GET/DELETE /objects/documents/{key}
- Document model: rename file_path → storage_key (plain object key, no path prefix)
- Migration 0008: ALTER COLUMN + data migration strips /data/documents/ prefix
- documents.py: update upload, delete, download endpoints; _extract_pdf_text
now takes bytes (pdfplumber.open(BytesIO)) instead of a filesystem path
- file_watcher.py: store storage_key instead of file_path on ingestion
- doc-service config: add STORAGE_SERVICE_URL env var
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
"""rename file_path to storage_key and strip filesystem prefix from existing rows
|
||||
|
||||
Revision ID: 0008
|
||||
Revises: 0007
|
||||
Create Date: 2026-04-20
|
||||
|
||||
Renames the documents.file_path column to storage_key.
|
||||
Existing rows have paths like '/data/documents/{user_id}/{doc_id}.pdf' or
|
||||
'/data/documents/watch/{doc_id}.pdf'. The migration strips the leading
|
||||
'/data/documents/' prefix so the value becomes a plain storage key
|
||||
(e.g. '{user_id}/{doc_id}.pdf') that the storage-service uses as the object key.
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
revision: str = "0008"
|
||||
down_revision: Union[str, None] = "0007"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
with op.batch_alter_table("documents") as batch_op:
|
||||
batch_op.alter_column(
|
||||
"file_path",
|
||||
new_column_name="storage_key",
|
||||
existing_type=sa.String(),
|
||||
existing_nullable=False,
|
||||
)
|
||||
|
||||
# Strip the '/data/documents/' filesystem prefix from pre-migration rows.
|
||||
op.execute(
|
||||
sa.text(
|
||||
"UPDATE documents SET storage_key = REPLACE(storage_key, '/data/documents/', '')"
|
||||
" WHERE storage_key LIKE '/data/documents/%'"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Restore the filesystem prefix so old code can still find the files.
|
||||
op.execute(
|
||||
sa.text(
|
||||
"UPDATE documents SET storage_key = '/data/documents/' || storage_key"
|
||||
" WHERE storage_key NOT LIKE '/data/documents/%'"
|
||||
)
|
||||
)
|
||||
with op.batch_alter_table("documents") as batch_op:
|
||||
batch_op.alter_column(
|
||||
"storage_key",
|
||||
new_column_name="file_path",
|
||||
existing_type=sa.String(),
|
||||
existing_nullable=False,
|
||||
)
|
||||
Reference in New Issue
Block a user