Add generic plugin architecture and watch-directory feature

Introduces a manifest contract so feature containers self-describe their
settings (JSON Schema + access rules). Backend and frontend gain generic
plugin proxy and dynamic Extensions UI with zero feature-specific code.

Doc-service is the first plugin consumer: exposes /plugin/manifest and
/plugin/settings, adds a watchdog-based file watcher that auto-ingests
PDFs from a mounted directory, maps subfolders to categories, supports
AI-suggested folder/filename (user-confirmed), and enforces a no-remove
policy. Access is gated by is_superuser or doc-service-admin group.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
curo1305
2026-04-18 02:09:50 +02:00
parent 2d7207b62f
commit 00466a9801
29 changed files with 1373 additions and 52 deletions
@@ -5,6 +5,8 @@ from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import or_
from app.database import AsyncSessionLocal, get_db
from app.deps import get_user_id
from app.models.category import DocumentCategory
@@ -15,6 +17,9 @@ from app.services.ai_client import classify_document
router = APIRouter()
# Sentinel user_id for watch-ingested categories — must match documents.py
_WATCH_USER_ID = "watch"
_SIMILARITY_THRESHOLD = 0.4
@@ -81,9 +86,10 @@ async def list_categories(
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db),
) -> list[DocumentCategory]:
# Include watch-ingested categories so they appear in the sidebar/filter
result = await db.execute(
select(DocumentCategory)
.where(DocumentCategory.user_id == user_id)
.where(or_(DocumentCategory.user_id == user_id, DocumentCategory.user_id == _WATCH_USER_ID))
.order_by(DocumentCategory.name)
)
return result.scalars().all()
+106 -6
View File
@@ -26,13 +26,21 @@ router = APIRouter()
_DEFAULT_MAX_BYTES = 20 * 1024 * 1024
# Sentinel user_id used for watch-directory-ingested documents.
# These documents are visible to all authenticated users.
_WATCH_USER_ID = "watch"
# ── Helpers ───────────────────────────────────────────────────────────────────
async def _get_user_doc(doc_id: str, user_id: str, db: AsyncSession) -> Document:
"""Fetch a document owned by user_id OR a watch-ingested document (visible to all)."""
result = await db.execute(
select(Document)
.where(Document.id == doc_id, Document.user_id == user_id)
.where(
Document.id == doc_id,
or_(Document.user_id == user_id, Document.user_id == _WATCH_USER_ID),
)
.options(
selectinload(Document.category_assignments)
.selectinload(CategoryAssignment.category)
@@ -61,6 +69,10 @@ def _doc_with_categories(doc: Document) -> DocumentOut:
created_at=doc.created_at,
processed_at=doc.processed_at,
categories=cats,
source=doc.source,
watch_path=doc.watch_path,
suggested_folder=doc.suggested_folder,
suggested_filename=doc.suggested_filename,
)
@@ -183,7 +195,8 @@ async def list_documents(
sort_expr = sort_col.desc() if order == "desc" else sort_col.asc()
# Build filter conditions once and reuse for both count + items queries.
conditions = [Document.user_id == user_id]
# Watch-ingested documents (user_id = "watch") are visible to all users.
conditions = [or_(Document.user_id == user_id, Document.user_id == _WATCH_USER_ID)]
if status:
conditions.append(Document.status == status)
if document_type:
@@ -247,7 +260,10 @@ async def get_document_status(
db: AsyncSession = Depends(get_db),
) -> Document:
result = await db.execute(
select(Document).where(Document.id == doc_id, Document.user_id == user_id)
select(Document).where(
Document.id == doc_id,
or_(Document.user_id == user_id, Document.user_id == _WATCH_USER_ID),
)
)
doc = result.scalar_one_or_none()
if doc is None:
@@ -347,7 +363,10 @@ async def download_file(
db: AsyncSession = Depends(get_db),
) -> StreamingResponse:
result = await db.execute(
select(Document).where(Document.id == doc_id, Document.user_id == user_id)
select(Document).where(
Document.id == doc_id,
or_(Document.user_id == user_id, Document.user_id == _WATCH_USER_ID),
)
)
doc = result.scalar_one_or_none()
if doc is None:
@@ -374,9 +393,12 @@ async def assign_category(
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db),
) -> None:
# Verify both belong to this user
# Verify the document is accessible (own or watch-ingested)
doc_result = await db.execute(
select(Document).where(Document.id == doc_id, Document.user_id == user_id)
select(Document).where(
Document.id == doc_id,
or_(Document.user_id == user_id, Document.user_id == _WATCH_USER_ID),
)
)
if doc_result.scalar_one_or_none() is None:
raise HTTPException(status_code=404, detail="Document not found")
@@ -418,3 +440,81 @@ async def remove_category(
if assignment:
await db.delete(assignment)
await db.commit()
# ── AI suggestion confirmation ────────────────────────────────────────────────
# These endpoints allow users to confirm or reject AI suggestions on
# watch-ingested documents. No disk mutations — suggestions only update the DB.
@router.post("/{doc_id}/suggestions/folder/confirm", status_code=204)
async def confirm_folder_suggestion(
doc_id: str,
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db),
) -> None:
doc = await _get_user_doc(doc_id, user_id, db)
if not doc.suggested_folder:
raise HTTPException(status_code=400, detail="No folder suggestion pending")
# Find or create the suggested category under the watch sentinel user
cat_result = await db.execute(
select(DocumentCategory).where(
DocumentCategory.user_id == _WATCH_USER_ID,
DocumentCategory.name == doc.suggested_folder,
)
)
cat = cat_result.scalar_one_or_none()
if cat is None:
cat = DocumentCategory(user_id=_WATCH_USER_ID, name=doc.suggested_folder[:128])
db.add(cat)
await db.commit()
await db.refresh(cat)
# Assign if not already assigned
exists = await db.execute(
select(CategoryAssignment).where(
CategoryAssignment.document_id == doc_id,
CategoryAssignment.category_id == cat.id,
)
)
if exists.scalar_one_or_none() is None:
db.add(CategoryAssignment(document_id=doc_id, category_id=cat.id))
doc.suggested_folder = None
await db.commit()
@router.post("/{doc_id}/suggestions/folder/reject", status_code=204)
async def reject_folder_suggestion(
doc_id: str,
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db),
) -> None:
doc = await _get_user_doc(doc_id, user_id, db)
doc.suggested_folder = None
await db.commit()
@router.post("/{doc_id}/suggestions/filename/confirm", status_code=204)
async def confirm_filename_suggestion(
doc_id: str,
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db),
) -> None:
doc = await _get_user_doc(doc_id, user_id, db)
if not doc.suggested_filename:
raise HTTPException(status_code=400, detail="No filename suggestion pending")
doc.title = doc.suggested_filename
doc.suggested_filename = None
await db.commit()
@router.post("/{doc_id}/suggestions/filename/reject", status_code=204)
async def reject_filename_suggestion(
doc_id: str,
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db),
) -> None:
doc = await _get_user_doc(doc_id, user_id, db)
doc.suggested_filename = None
await db.commit()
@@ -0,0 +1,97 @@
"""
Plugin manifest and settings endpoints for doc-service.
These are internal-only — they are called by the main backend's generic plugin
proxy, never directly by the browser. No authentication is applied here because
the backend enforces access control before forwarding the request.
Endpoints:
GET /plugin/manifest → static manifest with JSON Schema for settings
GET /plugin/settings → current storage config values
PATCH /plugin/settings → update storage config (partial update)
"""
from fastapi import APIRouter
from pydantic import BaseModel
from app.services.config_reader import get_storage_config, save_storage_config
router = APIRouter()
_MANIFEST: dict = {
"id": "doc-service",
"name": "Document Service",
"icon": "file-text",
"version": "1.0",
"access": {
"allow_superuser": True,
"required_groups": ["doc-service-admin"],
},
"settings_schema": {
"type": "object",
"title": "Storage & Watch",
"properties": {
"watch_enabled": {
"type": "boolean",
"title": "Enable file watching",
"description": (
"Automatically ingest PDF files added to the mounted watch directory. "
"Requires a service restart to take effect after toggling."
),
},
"watch_path": {
"type": "string",
"title": "Watch path",
"readOnly": True,
"description": "Configured via Docker volume mount — edit docker-compose to change.",
},
"ai_folder_suggestion": {
"type": "boolean",
"title": "AI folder suggestion",
"description": (
"AI suggests a category for each ingested document. "
"You must confirm the suggestion before it is applied."
),
},
"ai_folder_default": {
"type": "string",
"title": "Default import category",
"description": "Category assigned automatically when AI folder suggestion is disabled.",
},
"ai_rename_suggestion": {
"type": "boolean",
"title": "AI rename suggestion",
"description": (
"AI suggests a document title for each ingested file. "
"You must confirm before it is applied."
),
},
},
},
}
class StorageSettingsUpdate(BaseModel):
watch_enabled: bool | None = None
ai_folder_suggestion: bool | None = None
ai_folder_default: str | None = None
ai_rename_suggestion: bool | None = None
# watch_path is intentionally excluded — it cannot be changed via API
@router.get("/manifest")
async def get_manifest() -> dict:
return _MANIFEST
@router.get("/settings")
async def get_settings() -> dict:
return await get_storage_config()
@router.patch("/settings")
async def update_settings(body: StorageSettingsUpdate) -> dict:
update = body.model_dump(exclude_none=True)
if "ai_folder_default" in update:
update["ai_folder_default"] = update["ai_folder_default"][:128].strip() or "imports"
await save_storage_config(update)
return await get_storage_config()