Files
Business-Management/features/doc-service/app/models/document.py
T
curo1305 2f3efb9bf9 feat: migrate doc-service to use storage-service for file I/O (Phase 2)
- storage.py: replace aiofiles filesystem ops with httpx calls to
  storage-service PUT/GET/DELETE /objects/documents/{key}
- Document model: rename file_path → storage_key (plain object key, no path prefix)
- Migration 0008: ALTER COLUMN + data migration strips /data/documents/ prefix
- documents.py: update upload, delete, download endpoints; _extract_pdf_text
  now takes bytes (pdfplumber.open(BytesIO)) instead of a filesystem path
- file_watcher.py: store storage_key instead of file_path on ingestion
- doc-service config: add STORAGE_SERVICE_URL env var

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-20 15:57:29 +02:00

39 lines
1.9 KiB
Python

import uuid
from datetime import datetime
from sqlalchemy import DateTime, Integer, String, Text, func
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.database import Base
class Document(Base):
__tablename__ = "documents"
id: Mapped[str] = mapped_column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
user_id: Mapped[str] = mapped_column(String, nullable=False, index=True)
filename: Mapped[str] = mapped_column(String, nullable=False)
storage_key: Mapped[str] = mapped_column(String, nullable=False)
file_size: Mapped[int] = mapped_column(Integer, nullable=False)
status: Mapped[str] = mapped_column(String, nullable=False, default="pending")
title: Mapped[str | None] = mapped_column(String(500), nullable=True)
document_type: Mapped[str | None] = mapped_column(String, nullable=True)
raw_text: Mapped[str | None] = mapped_column(Text, nullable=True)
extracted_data: Mapped[str | None] = mapped_column(Text, nullable=True) # JSON string
tags: Mapped[str | None] = mapped_column(Text, nullable=True) # JSON array string
error_message: Mapped[str | None] = mapped_column(String(500), nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
# Watch-directory ingestion fields (migration 0003)
source: Mapped[str] = mapped_column(String(16), nullable=False, default="upload")
watch_path: Mapped[str | None] = mapped_column(String, nullable=True)
suggested_folder: Mapped[str | None] = mapped_column(String(128), nullable=True)
suggested_filename: Mapped[str | None] = mapped_column(String(500), nullable=True)
category_assignments: Mapped[list["CategoryAssignment"]] = relationship(
"CategoryAssignment", back_populates="document", cascade="all, delete-orphan"
)