00466a9801
Introduces a manifest contract so feature containers self-describe their settings (JSON Schema + access rules). Backend and frontend gain generic plugin proxy and dynamic Extensions UI with zero feature-specific code. Doc-service is the first plugin consumer: exposes /plugin/manifest and /plugin/settings, adds a watchdog-based file watcher that auto-ingests PDFs from a mounted directory, maps subfolders to categories, supports AI-suggested folder/filename (user-confirmed), and enforces a no-remove policy. Access is gated by is_superuser or doc-service-admin group. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
134 lines
4.2 KiB
Python
134 lines
4.2 KiB
Python
"""
|
|
Reads doc_service_config.json from the shared config volume.
|
|
30-second TTL cache + env var overrides.
|
|
|
|
Env var overrides (all optional):
|
|
DOC_MAX_PDF_MB — max upload size in megabytes (e.g. "50")
|
|
"""
|
|
import asyncio
|
|
import json
|
|
import os
|
|
import time
|
|
from copy import deepcopy
|
|
from pathlib import Path
|
|
|
|
from app.core.config import settings
|
|
|
|
_DEFAULT_STORAGE_CONFIG: dict = {
|
|
"watch_enabled": False,
|
|
"watch_path": "/data/watch",
|
|
"ai_folder_suggestion": False,
|
|
"ai_folder_default": "imports",
|
|
"ai_rename_suggestion": False,
|
|
}
|
|
|
|
_DEFAULT_SYSTEM_PROMPT = (
|
|
"You are a financial document analysis assistant. "
|
|
"Given the text extracted from a PDF document, return ONLY a JSON object "
|
|
"with no markdown, no code fences, and no explanation."
|
|
)
|
|
|
|
_DEFAULT_USER_TEMPLATE = (
|
|
'Analyze the following document text and return a JSON object with exactly these keys:\n'
|
|
'title (a short, descriptive human-readable title for this document, e.g. "ACME Corp Invoice April 2026", "Office Supplies Receipt", "Q1 Flower Delivery Order"),\n'
|
|
'document_type (one of: invoice, bill, receipt, order, expense, revenue, unknown),\n'
|
|
'total_amount (string or null),\n'
|
|
'currency (string or null),\n'
|
|
'vendor_name (string or null),\n'
|
|
'customer_name (string or null),\n'
|
|
'billing_address (string or null),\n'
|
|
'customer_address (string or null),\n'
|
|
'invoice_number (string or null),\n'
|
|
'invoice_date (string or null),\n'
|
|
'due_date (string or null),\n'
|
|
'tags (array of short keyword strings describing the document),\n'
|
|
'line_items (array of objects, each with keys: description, amount),\n'
|
|
'suggested_categories (array of 2 to 5 short category name strings a user might want to file this document under, e.g. "Utilities", "Travel", "Software Subscriptions", "Client Invoices").\n'
|
|
'\n'
|
|
'Document text:\n'
|
|
'{text}'
|
|
)
|
|
|
|
_DEFAULT_CONFIG: dict = {
|
|
"documents": {"max_pdf_bytes": 20 * 1024 * 1024},
|
|
"storage": _DEFAULT_STORAGE_CONFIG,
|
|
"system_prompts": {
|
|
"system": _DEFAULT_SYSTEM_PROMPT,
|
|
"user_template": _DEFAULT_USER_TEMPLATE,
|
|
},
|
|
}
|
|
|
|
_cache: dict | None = None
|
|
_cache_at: float = 0.0
|
|
_CACHE_TTL = 30.0
|
|
|
|
|
|
def _read_config_sync() -> dict:
|
|
path = Path(settings.CONFIG_PATH)
|
|
if not path.exists():
|
|
base = deepcopy(_DEFAULT_CONFIG)
|
|
else:
|
|
with open(path) as f:
|
|
base = json.load(f)
|
|
return _apply_env_overrides(base)
|
|
|
|
|
|
def _read_config_sync_raw() -> dict:
|
|
"""Read without env overrides — used when we need to write back to disk."""
|
|
path = Path(settings.CONFIG_PATH)
|
|
if not path.exists():
|
|
return deepcopy(_DEFAULT_CONFIG)
|
|
with open(path) as f:
|
|
return json.load(f)
|
|
|
|
|
|
def _write_config_sync(config: dict) -> None:
|
|
"""Atomically write config JSON to disk."""
|
|
path = Path(settings.CONFIG_PATH)
|
|
tmp = path.with_suffix(".tmp")
|
|
tmp.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(tmp, "w") as f:
|
|
json.dump(config, f, indent=2)
|
|
os.replace(tmp, path)
|
|
|
|
|
|
def _apply_env_overrides(config: dict) -> dict:
|
|
cfg = deepcopy(config)
|
|
docs = cfg.setdefault("documents", {})
|
|
if v := os.environ.get("DOC_MAX_PDF_MB"):
|
|
try:
|
|
docs["max_pdf_bytes"] = int(v) * 1024 * 1024
|
|
except ValueError:
|
|
pass
|
|
return cfg
|
|
|
|
|
|
async def load_doc_config() -> dict:
|
|
global _cache, _cache_at
|
|
now = time.monotonic()
|
|
if _cache is not None and (now - _cache_at) < _CACHE_TTL:
|
|
return _cache
|
|
data = await asyncio.to_thread(_read_config_sync)
|
|
_cache = data
|
|
_cache_at = now
|
|
return data
|
|
|
|
|
|
async def get_storage_config() -> dict:
|
|
"""Return storage config block, filling in defaults for any missing keys."""
|
|
config = await load_doc_config()
|
|
result = deepcopy(_DEFAULT_STORAGE_CONFIG)
|
|
result.update(config.get("storage", {}))
|
|
return result
|
|
|
|
|
|
async def save_storage_config(data: dict) -> None:
|
|
"""Merge data into the storage config block and persist to disk."""
|
|
global _cache, _cache_at
|
|
raw = await asyncio.to_thread(_read_config_sync_raw)
|
|
raw.setdefault("storage", {}).update(data)
|
|
await asyncio.to_thread(_write_config_sync, raw)
|
|
# Invalidate cache so next read picks up the new values
|
|
_cache = None
|
|
_cache_at = 0.0
|