Files
Business-Management/features/doc-service/alembic/versions/0001_create_doc_tables.py
T
curo1305 0d34867a69 Add PDF document service with AI extraction and per-app settings
- New `features/doc-service` FastAPI microservice: PDF upload, async
  text extraction (pdfplumber), AI classification via Anthropic/Ollama/
  LM Studio, per-user categories, file download
- Alembic migration isolated with `alembic_version_doc_service` table
- Main backend: httpx proxy routers for /api/documents/* and
  /api/documents/categories/*, admin settings API at /api/settings/*
- Runtime config in /config/doc_service_config.json (shared Docker
  volume); api_key masking on reads; atomic write with os.replace()
- Frontend: DocumentsPage, DocumentAdminSettingsPage, updated AppsPage
  launcher hub, simplified Nav (removed Settings link), new routes
- docker-compose: doc-service service, doc_data + app_config volumes,
  removed internal:true from backend-net for outbound AI API calls
- Fix pre-commit hook: probe Docker socket path so git subprocess picks
  up Docker Desktop on macOS
- Fix security_check.py: use sys.executable for bandit so venv python
  is used instead of system python

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-14 05:28:11 +02:00

80 lines
2.5 KiB
Python

"""create document tables
Revision ID: 0001
Revises:
Create Date: 2026-04-14
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
revision: str = "0001"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"documents",
sa.Column("id", sa.String(), primary_key=True),
sa.Column("user_id", sa.String(), nullable=False),
sa.Column("filename", sa.String(), nullable=False),
sa.Column("file_path", sa.String(), nullable=False),
sa.Column("file_size", sa.Integer(), nullable=False),
sa.Column("status", sa.String(), nullable=False),
sa.Column("document_type", sa.String(), nullable=True),
sa.Column("raw_text", sa.Text(), nullable=True),
sa.Column("extracted_data", sa.Text(), nullable=True),
sa.Column("tags", sa.Text(), nullable=True),
sa.Column("error_message", sa.String(500), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("processed_at", sa.DateTime(timezone=True), nullable=True),
)
op.create_index("ix_documents_user_id", "documents", ["user_id"])
op.create_table(
"document_categories",
sa.Column("id", sa.String(), primary_key=True),
sa.Column("user_id", sa.String(), nullable=False),
sa.Column("name", sa.String(128), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
)
op.create_index("ix_document_categories_user_id", "document_categories", ["user_id"])
op.create_table(
"document_category_assignments",
sa.Column(
"document_id",
sa.String(),
sa.ForeignKey("documents.id", ondelete="CASCADE"),
primary_key=True,
),
sa.Column(
"category_id",
sa.String(),
sa.ForeignKey("document_categories.id", ondelete="CASCADE"),
primary_key=True,
),
)
def downgrade() -> None:
op.drop_table("document_category_assignments")
op.drop_index("ix_document_categories_user_id", "document_categories")
op.drop_table("document_categories")
op.drop_index("ix_documents_user_id", "documents")
op.drop_table("documents")