""" Reads doc_service_config.json from the shared config volume. 30-second TTL cache + env var overrides. Env var overrides (all optional): DOC_MAX_PDF_MB — max upload size in megabytes (e.g. "50") """ import asyncio import json import os import time from copy import deepcopy from pathlib import Path from app.core.config import settings _DEFAULT_SYSTEM_PROMPT = ( "You are a financial document analysis assistant. " "Given the text extracted from a PDF document, return ONLY a JSON object " "with no markdown, no code fences, and no explanation." ) _DEFAULT_USER_TEMPLATE = ( 'Analyze the following document text and return a JSON object with exactly these keys:\n' 'title (a short, descriptive human-readable title for this document, e.g. "ACME Corp Invoice April 2026", "Office Supplies Receipt", "Q1 Flower Delivery Order"),\n' 'document_type (one of: invoice, bill, receipt, order, expense, revenue, unknown),\n' 'total_amount (string or null),\n' 'currency (string or null),\n' 'vendor_name (string or null),\n' 'customer_name (string or null),\n' 'billing_address (string or null),\n' 'customer_address (string or null),\n' 'invoice_number (string or null),\n' 'invoice_date (string or null),\n' 'due_date (string or null),\n' 'tags (array of short keyword strings describing the document),\n' 'line_items (array of objects, each with keys: description, amount),\n' 'suggested_categories (array of 2 to 5 short category name strings a user might want to file this document under, e.g. "Utilities", "Travel", "Software Subscriptions", "Client Invoices").\n' '\n' 'Document text:\n' '{text}' ) _DEFAULT_CONFIG: dict = { "documents": {"max_pdf_bytes": 20 * 1024 * 1024}, "system_prompts": { "system": _DEFAULT_SYSTEM_PROMPT, "user_template": _DEFAULT_USER_TEMPLATE, }, } _cache: dict | None = None _cache_at: float = 0.0 _CACHE_TTL = 30.0 def _read_config_sync() -> dict: path = Path(settings.CONFIG_PATH) if not path.exists(): base = deepcopy(_DEFAULT_CONFIG) else: with open(path) as f: base = json.load(f) return _apply_env_overrides(base) def _apply_env_overrides(config: dict) -> dict: cfg = deepcopy(config) docs = cfg.setdefault("documents", {}) if v := os.environ.get("DOC_MAX_PDF_MB"): try: docs["max_pdf_bytes"] = int(v) * 1024 * 1024 except ValueError: pass return cfg async def load_doc_config() -> dict: global _cache, _cache_at now = time.monotonic() if _cache is not None and (now - _cache_at) < _CACHE_TTL: return _cache data = await asyncio.to_thread(_read_config_sync) _cache = data _cache_at = now return data