Files
Business-Management/features/doc-service/app/services/config_reader.py
T
curo1305 4c35d7a2a4 feat: migrate app_config volume to storage-service config bucket (Phase 3)
All JSON config files (AI settings, doc settings, appearance, themes) now live
in the 'config' bucket of storage-service instead of a shared Docker volume.

- backend/core/config_storage.py: new async HTTP helpers for config bucket r/w
- backend/core/app_config.py: fully async rewrite; all load_*/save_*/seed_*
  functions use config_storage instead of filesystem
- backend/routers/settings.py: all asyncio.to_thread() wrappers removed; direct
  await calls throughout; update_theme reads via load_theme_by_id()
- backend/main.py: await seed_builtin_themes() directly (no to_thread)
- ai-service: remove CONFIG_PATH, add STORAGE_SERVICE_URL; config_reader now
  fetches from storage-service via httpx
- doc-service: config_reader rewritten to fetch/write via storage-service
- docker-compose: remove app_config volume; add storage-service depends_on for
  ai-service; remove DATA_DIR and CONFIG_PATH from doc-service

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-20 16:02:57 +02:00

132 lines
4.1 KiB
Python

"""
Reads doc_service_config.json from the storage-service config bucket.
30-second TTL cache + env var overrides.
Env var overrides (all optional):
DOC_MAX_PDF_MB — max upload size in megabytes (e.g. "50")
"""
import os
import time
from copy import deepcopy
import httpx
from app.core.config import settings
_CONFIG_KEY = "doc_service_config.json"
_DEFAULT_STORAGE_CONFIG: dict = {
"watch_enabled": False,
"watch_path": "/data/watch",
"ai_folder_suggestion": False,
"ai_folder_default": "imports",
"ai_rename_suggestion": False,
}
_DEFAULT_SYSTEM_PROMPT = (
"You are a financial document analysis assistant. "
"Given the text extracted from a PDF document, return ONLY a JSON object "
"with no markdown, no code fences, and no explanation."
)
_DEFAULT_USER_TEMPLATE = (
'Analyze the following document text and return a JSON object with exactly these keys:\n'
'title (a short, descriptive human-readable title for this document, e.g. "ACME Corp Invoice April 2026", "Office Supplies Receipt", "Q1 Flower Delivery Order"),\n'
'document_type (one of: invoice, bill, receipt, order, expense, revenue, unknown),\n'
'total_amount (string or null),\n'
'currency (string or null),\n'
'vendor_name (string or null),\n'
'customer_name (string or null),\n'
'billing_address (string or null),\n'
'customer_address (string or null),\n'
'invoice_number (string or null),\n'
'invoice_date (string or null),\n'
'due_date (string or null),\n'
'tags (array of short keyword strings describing the document),\n'
'line_items (array of objects, each with keys: description, amount),\n'
'suggested_categories (array of 2 to 5 short category name strings a user might want to file this document under, e.g. "Utilities", "Travel", "Software Subscriptions", "Client Invoices").\n'
'\n'
'Document text:\n'
'{text}'
)
_DEFAULT_CONFIG: dict = {
"documents": {"max_pdf_bytes": 20 * 1024 * 1024},
"storage": _DEFAULT_STORAGE_CONFIG,
"system_prompts": {
"system": _DEFAULT_SYSTEM_PROMPT,
"user_template": _DEFAULT_USER_TEMPLATE,
},
}
_cache: dict | None = None
_cache_at: float = 0.0
_CACHE_TTL = 30.0
def _storage_url() -> str:
return f"{settings.STORAGE_SERVICE_URL}/objects/config/{_CONFIG_KEY}"
async def _fetch_config() -> dict:
"""Fetch config from storage-service. Returns defaults if not found."""
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.get(_storage_url())
if resp.status_code == 404:
return deepcopy(_DEFAULT_CONFIG)
resp.raise_for_status()
return resp.json()
async def _write_config(data: dict) -> None:
import json
payload = json.dumps(data, indent=2).encode()
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.put(
_storage_url(),
content=payload,
headers={"Content-Type": "application/octet-stream"},
)
resp.raise_for_status()
def _apply_env_overrides(config: dict) -> dict:
cfg = deepcopy(config)
docs = cfg.setdefault("documents", {})
if v := os.environ.get("DOC_MAX_PDF_MB"):
try:
docs["max_pdf_bytes"] = int(v) * 1024 * 1024
except ValueError:
pass
return cfg
async def load_doc_config() -> dict:
global _cache, _cache_at
now = time.monotonic()
if _cache is not None and (now - _cache_at) < _CACHE_TTL:
return _cache
raw = await _fetch_config()
data = _apply_env_overrides(raw)
_cache = data
_cache_at = now
return data
async def get_storage_config() -> dict:
"""Return storage config block, filling in defaults for any missing keys."""
config = await load_doc_config()
result = deepcopy(_DEFAULT_STORAGE_CONFIG)
result.update(config.get("storage", {}))
return result
async def save_storage_config(data: dict) -> None:
"""Merge data into the storage config block and persist to storage-service."""
global _cache, _cache_at
raw = await _fetch_config()
raw.setdefault("storage", {}).update(data)
await _write_config(raw)
_cache = None
_cache_at = 0.0