Add PDF document service with AI extraction and per-app settings
- New `features/doc-service` FastAPI microservice: PDF upload, async text extraction (pdfplumber), AI classification via Anthropic/Ollama/ LM Studio, per-user categories, file download - Alembic migration isolated with `alembic_version_doc_service` table - Main backend: httpx proxy routers for /api/documents/* and /api/documents/categories/*, admin settings API at /api/settings/* - Runtime config in /config/doc_service_config.json (shared Docker volume); api_key masking on reads; atomic write with os.replace() - Frontend: DocumentsPage, DocumentAdminSettingsPage, updated AppsPage launcher hub, simplified Nav (removed Settings link), new routes - docker-compose: doc-service service, doc_data + app_config volumes, removed internal:true from backend-net for outbound AI API calls - Fix pre-commit hook: probe Docker socket path so git subprocess picks up Docker Desktop on macOS - Fix security_check.py: use sys.executable for bandit so venv python is used instead of system python Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,55 @@
|
||||
import asyncio
|
||||
from logging.config import fileConfig
|
||||
|
||||
from alembic import context
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
|
||||
from app.core.config import settings
|
||||
from app.database import Base
|
||||
import app.models # noqa: F401 — registers Document, DocumentCategory, CategoryAssignment
|
||||
|
||||
config = context.config
|
||||
config.set_main_option("sqlalchemy.url", settings.DATABASE_URL)
|
||||
|
||||
if config.config_file_name:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
target_metadata = Base.metadata
|
||||
|
||||
# Separate version table — must not collide with the main backend's alembic_version table.
|
||||
VERSION_TABLE = "alembic_version_doc_service"
|
||||
|
||||
|
||||
def run_migrations_offline():
|
||||
context.configure(
|
||||
url=settings.DATABASE_URL,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
version_table=VERSION_TABLE,
|
||||
)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def do_run_migrations(connection):
|
||||
context.configure(
|
||||
connection=connection,
|
||||
target_metadata=target_metadata,
|
||||
version_table=VERSION_TABLE,
|
||||
)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
async def run_migrations_online():
|
||||
engine = create_async_engine(settings.DATABASE_URL)
|
||||
async with engine.connect() as conn:
|
||||
await conn.run_sync(do_run_migrations)
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
asyncio.run(run_migrations_online())
|
||||
@@ -0,0 +1,25 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
revision: str = ${repr(up_revision)}
|
||||
down_revision: Union[str, None] = ${repr(down_revision)}
|
||||
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
@@ -0,0 +1,79 @@
|
||||
"""create document tables
|
||||
|
||||
Revision ID: 0001
|
||||
Revises:
|
||||
Create Date: 2026-04-14
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
revision: str = "0001"
|
||||
down_revision: Union[str, None] = None
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"documents",
|
||||
sa.Column("id", sa.String(), primary_key=True),
|
||||
sa.Column("user_id", sa.String(), nullable=False),
|
||||
sa.Column("filename", sa.String(), nullable=False),
|
||||
sa.Column("file_path", sa.String(), nullable=False),
|
||||
sa.Column("file_size", sa.Integer(), nullable=False),
|
||||
sa.Column("status", sa.String(), nullable=False),
|
||||
sa.Column("document_type", sa.String(), nullable=True),
|
||||
sa.Column("raw_text", sa.Text(), nullable=True),
|
||||
sa.Column("extracted_data", sa.Text(), nullable=True),
|
||||
sa.Column("tags", sa.Text(), nullable=True),
|
||||
sa.Column("error_message", sa.String(500), nullable=True),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("processed_at", sa.DateTime(timezone=True), nullable=True),
|
||||
)
|
||||
op.create_index("ix_documents_user_id", "documents", ["user_id"])
|
||||
|
||||
op.create_table(
|
||||
"document_categories",
|
||||
sa.Column("id", sa.String(), primary_key=True),
|
||||
sa.Column("user_id", sa.String(), nullable=False),
|
||||
sa.Column("name", sa.String(128), nullable=False),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
op.create_index("ix_document_categories_user_id", "document_categories", ["user_id"])
|
||||
|
||||
op.create_table(
|
||||
"document_category_assignments",
|
||||
sa.Column(
|
||||
"document_id",
|
||||
sa.String(),
|
||||
sa.ForeignKey("documents.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
),
|
||||
sa.Column(
|
||||
"category_id",
|
||||
sa.String(),
|
||||
sa.ForeignKey("document_categories.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_table("document_category_assignments")
|
||||
op.drop_index("ix_document_categories_user_id", "document_categories")
|
||||
op.drop_table("document_categories")
|
||||
op.drop_index("ix_documents_user_id", "documents")
|
||||
op.drop_table("documents")
|
||||
Reference in New Issue
Block a user