2f3efb9bf9
- storage.py: replace aiofiles filesystem ops with httpx calls to
storage-service PUT/GET/DELETE /objects/documents/{key}
- Document model: rename file_path → storage_key (plain object key, no path prefix)
- Migration 0008: ALTER COLUMN + data migration strips /data/documents/ prefix
- documents.py: update upload, delete, download endpoints; _extract_pdf_text
now takes bytes (pdfplumber.open(BytesIO)) instead of a filesystem path
- file_watcher.py: store storage_key instead of file_path on ingestion
- doc-service config: add STORAGE_SERVICE_URL env var
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
39 lines
1.9 KiB
Python
39 lines
1.9 KiB
Python
import uuid
|
|
from datetime import datetime
|
|
|
|
from sqlalchemy import DateTime, Integer, String, Text, func
|
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
|
|
from app.database import Base
|
|
|
|
|
|
class Document(Base):
|
|
__tablename__ = "documents"
|
|
|
|
id: Mapped[str] = mapped_column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
user_id: Mapped[str] = mapped_column(String, nullable=False, index=True)
|
|
filename: Mapped[str] = mapped_column(String, nullable=False)
|
|
storage_key: Mapped[str] = mapped_column(String, nullable=False)
|
|
file_size: Mapped[int] = mapped_column(Integer, nullable=False)
|
|
status: Mapped[str] = mapped_column(String, nullable=False, default="pending")
|
|
title: Mapped[str | None] = mapped_column(String(500), nullable=True)
|
|
document_type: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
raw_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
|
extracted_data: Mapped[str | None] = mapped_column(Text, nullable=True) # JSON string
|
|
tags: Mapped[str | None] = mapped_column(Text, nullable=True) # JSON array string
|
|
error_message: Mapped[str | None] = mapped_column(String(500), nullable=True)
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True), server_default=func.now(), nullable=False
|
|
)
|
|
processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
|
|
|
# Watch-directory ingestion fields (migration 0003)
|
|
source: Mapped[str] = mapped_column(String(16), nullable=False, default="upload")
|
|
watch_path: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
suggested_folder: Mapped[str | None] = mapped_column(String(128), nullable=True)
|
|
suggested_filename: Mapped[str | None] = mapped_column(String(500), nullable=True)
|
|
|
|
category_assignments: Mapped[list["CategoryAssignment"]] = relationship(
|
|
"CategoryAssignment", back_populates="document", cascade="all, delete-orphan"
|
|
)
|