Add PDF document service with AI extraction and per-app settings
- New `features/doc-service` FastAPI microservice: PDF upload, async text extraction (pdfplumber), AI classification via Anthropic/Ollama/ LM Studio, per-user categories, file download - Alembic migration isolated with `alembic_version_doc_service` table - Main backend: httpx proxy routers for /api/documents/* and /api/documents/categories/*, admin settings API at /api/settings/* - Runtime config in /config/doc_service_config.json (shared Docker volume); api_key masking on reads; atomic write with os.replace() - Frontend: DocumentsPage, DocumentAdminSettingsPage, updated AppsPage launcher hub, simplified Nav (removed Settings link), new routes - docker-compose: doc-service service, doc_data + app_config volumes, removed internal:true from backend-net for outbound AI API calls - Fix pre-commit hook: probe Docker socket path so git subprocess picks up Docker Desktop on macOS - Fix security_check.py: use sys.executable for bandit so venv python is used instead of system python Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,79 @@
|
||||
"""create document tables
|
||||
|
||||
Revision ID: 0001
|
||||
Revises:
|
||||
Create Date: 2026-04-14
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
revision: str = "0001"
|
||||
down_revision: Union[str, None] = None
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"documents",
|
||||
sa.Column("id", sa.String(), primary_key=True),
|
||||
sa.Column("user_id", sa.String(), nullable=False),
|
||||
sa.Column("filename", sa.String(), nullable=False),
|
||||
sa.Column("file_path", sa.String(), nullable=False),
|
||||
sa.Column("file_size", sa.Integer(), nullable=False),
|
||||
sa.Column("status", sa.String(), nullable=False),
|
||||
sa.Column("document_type", sa.String(), nullable=True),
|
||||
sa.Column("raw_text", sa.Text(), nullable=True),
|
||||
sa.Column("extracted_data", sa.Text(), nullable=True),
|
||||
sa.Column("tags", sa.Text(), nullable=True),
|
||||
sa.Column("error_message", sa.String(500), nullable=True),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("processed_at", sa.DateTime(timezone=True), nullable=True),
|
||||
)
|
||||
op.create_index("ix_documents_user_id", "documents", ["user_id"])
|
||||
|
||||
op.create_table(
|
||||
"document_categories",
|
||||
sa.Column("id", sa.String(), primary_key=True),
|
||||
sa.Column("user_id", sa.String(), nullable=False),
|
||||
sa.Column("name", sa.String(128), nullable=False),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
op.create_index("ix_document_categories_user_id", "document_categories", ["user_id"])
|
||||
|
||||
op.create_table(
|
||||
"document_category_assignments",
|
||||
sa.Column(
|
||||
"document_id",
|
||||
sa.String(),
|
||||
sa.ForeignKey("documents.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
),
|
||||
sa.Column(
|
||||
"category_id",
|
||||
sa.String(),
|
||||
sa.ForeignKey("document_categories.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_table("document_category_assignments")
|
||||
op.drop_index("ix_document_categories_user_id", "document_categories")
|
||||
op.drop_table("document_categories")
|
||||
op.drop_index("ix_documents_user_id", "documents")
|
||||
op.drop_table("documents")
|
||||
Reference in New Issue
Block a user