Add PDF document service with AI extraction and per-app settings
- New `features/doc-service` FastAPI microservice: PDF upload, async text extraction (pdfplumber), AI classification via Anthropic/Ollama/ LM Studio, per-user categories, file download - Alembic migration isolated with `alembic_version_doc_service` table - Main backend: httpx proxy routers for /api/documents/* and /api/documents/categories/*, admin settings API at /api/settings/* - Runtime config in /config/doc_service_config.json (shared Docker volume); api_key masking on reads; atomic write with os.replace() - Frontend: DocumentsPage, DocumentAdminSettingsPage, updated AppsPage launcher hub, simplified Nav (removed Settings link), new routes - docker-compose: doc-service service, doc_data + app_config volumes, removed internal:true from backend-net for outbound AI API calls - Fix pre-commit hook: probe Docker socket path so git subprocess picks up Docker Desktop on macOS - Fix security_check.py: use sys.executable for bandit so venv python is used instead of system python Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,117 @@
|
||||
"""
|
||||
Per-service runtime config helpers.
|
||||
|
||||
Config files live on the shared `app_config` Docker volume at /config/.
|
||||
Each service has its own JSON file, e.g. /config/doc_service_config.json.
|
||||
|
||||
Atomic write pattern: write to .tmp in same dir, then os.replace() so
|
||||
doc-service never reads a partial file.
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
_CONFIG_DIR = Path(os.environ.get("APP_CONFIG_DIR", "/config"))
|
||||
|
||||
# ── Config schemas ─────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class AnthropicConfig(BaseModel):
|
||||
api_key: str = ""
|
||||
model: str = "claude-haiku-4-5-20251001"
|
||||
|
||||
|
||||
class OllamaConfig(BaseModel):
|
||||
base_url: str = "http://192.168.1.x:11434/v1"
|
||||
model: str = "llama3.2"
|
||||
api_key: str = "ollama"
|
||||
|
||||
|
||||
class LMStudioConfig(BaseModel):
|
||||
base_url: str = "http://192.168.1.x:1234/v1"
|
||||
model: str = "local-model"
|
||||
api_key: str = ""
|
||||
|
||||
|
||||
class AIConfig(BaseModel):
|
||||
provider: str = "anthropic"
|
||||
anthropic: AnthropicConfig = AnthropicConfig()
|
||||
ollama: OllamaConfig = OllamaConfig()
|
||||
lmstudio: LMStudioConfig = LMStudioConfig()
|
||||
|
||||
|
||||
class DocumentsConfig(BaseModel):
|
||||
max_pdf_bytes: int = 20 * 1024 * 1024
|
||||
|
||||
|
||||
class DocServiceConfig(BaseModel):
|
||||
ai: AIConfig = AIConfig()
|
||||
documents: DocumentsConfig = DocumentsConfig()
|
||||
|
||||
|
||||
# ── Masking ────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _mask_key(key: str) -> str:
|
||||
if not key or len(key) <= 8:
|
||||
return "••••"
|
||||
return key[:7] + "••••"
|
||||
|
||||
|
||||
def _mask_config(data: dict) -> dict:
|
||||
"""Return a copy of data with api_key fields masked."""
|
||||
import copy
|
||||
masked = copy.deepcopy(data)
|
||||
ai = masked.get("ai", {})
|
||||
for provider in ("anthropic", "ollama", "lmstudio"):
|
||||
if provider in ai and "api_key" in ai[provider]:
|
||||
ai[provider]["api_key"] = _mask_key(ai[provider]["api_key"])
|
||||
return masked
|
||||
|
||||
|
||||
# ── Load / Save ────────────────────────────────────────────────────────────────
|
||||
|
||||
def _config_path(service: str) -> Path:
|
||||
return _CONFIG_DIR / f"{service}_config.json"
|
||||
|
||||
|
||||
def load_service_config(service: str) -> dict:
|
||||
path = _config_path(service)
|
||||
if not path.exists():
|
||||
# Return default config if file doesn't exist yet
|
||||
if service == "doc_service":
|
||||
return DocServiceConfig().model_dump()
|
||||
return {}
|
||||
with path.open() as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def save_service_config(service: str, data: dict) -> None:
|
||||
path = _config_path(service)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(".tmp")
|
||||
tmp.write_text(json.dumps(data, indent=2))
|
||||
os.replace(tmp, path)
|
||||
|
||||
|
||||
def load_doc_service_config() -> DocServiceConfig:
|
||||
raw = load_service_config("doc_service")
|
||||
return DocServiceConfig.model_validate(raw)
|
||||
|
||||
|
||||
def save_doc_service_config(config: DocServiceConfig) -> None:
|
||||
save_service_config("doc_service", config.model_dump())
|
||||
|
||||
|
||||
def load_doc_service_config_masked() -> dict:
|
||||
raw = load_service_config("doc_service")
|
||||
return _mask_config(raw)
|
||||
|
||||
|
||||
def _merge_api_key(new_key: str, existing_key: str) -> str:
|
||||
"""If new_key is empty or a masked value, keep the existing key."""
|
||||
if not new_key or "••••" in new_key:
|
||||
return existing_key
|
||||
return new_key
|
||||
Reference in New Issue
Block a user