Business-Management/features/doc-service/app/services/config_reader.py

"""
Reads doc_service_config.json from the shared config volume.
Caches the result for 30 seconds to avoid hitting the filesystem on every request.
Uses asyncio.to_thread so the synchronous file read doesn't block the event loop.

Env var overrides (take precedence over the JSON config file, never committed):
  AI_PROVIDER          — "lmstudio" | "ollama" | "anthropic"
  LMSTUDIO_BASE_URL    — e.g. http://host.docker.internal:1234/v1
  LMSTUDIO_API_KEY
  LMSTUDIO_MODEL
  OLLAMA_BASE_URL
  OLLAMA_MODEL
  OLLAMA_API_KEY
  ANTHROPIC_API_KEY
  ANTHROPIC_MODEL
"""
import asyncio
import json
import os
import time
from copy import deepcopy
from pathlib import Path

from app.core.config import settings

_DEFAULT_CONFIG: dict = {
    "ai": {
        # Default: LM Studio running on the host machine at port 1234.
        # Inside Docker, host.docker.internal resolves to the host; for local
        # dev outside Docker use http://localhost:1234/v1 instead.
        "provider": "lmstudio",
        "anthropic": {"api_key": "", "model": "claude-haiku-4-5-20251001"},
        "ollama": {"base_url": "http://host.docker.internal:11434/v1", "model": "llama3.2", "api_key": "ollama"},
        "lmstudio": {"base_url": "http://host.docker.internal:1234/v1", "model": "local-model", "api_key": "lm-studio"},
    },
    "documents": {"max_pdf_bytes": 20 * 1024 * 1024},
}

_cache: dict | None = None
_cache_at: float = 0.0
_CACHE_TTL = 30.0


def _read_config_sync() -> dict:
    path = Path(settings.CONFIG_PATH)
    if not path.exists():
        base = deepcopy(_DEFAULT_CONFIG)
    else:
        with open(path) as f:
            base = json.load(f)
    return _apply_env_overrides(base)


def _apply_env_overrides(config: dict) -> dict:
    """
    Merge environment variable overrides into the config dict.
    Env vars win over whatever is stored in the JSON file.
    This lets the dev .env file pin the AI connection without writing to the
    shared volume (which would affect all users).
    """
    cfg = deepcopy(config)
    ai = cfg.setdefault("ai", {})

    if provider := os.environ.get("AI_PROVIDER"):
        ai["provider"] = provider

    # LM Studio
    lms = ai.setdefault("lmstudio", {})
    if v := os.environ.get("LMSTUDIO_BASE_URL"):
        lms["base_url"] = v
    if v := os.environ.get("LMSTUDIO_API_KEY"):
        lms["api_key"] = v
    if v := os.environ.get("LMSTUDIO_MODEL"):
        lms["model"] = v

    # Ollama
    oll = ai.setdefault("ollama", {})
    if v := os.environ.get("OLLAMA_BASE_URL"):
        oll["base_url"] = v
    if v := os.environ.get("OLLAMA_MODEL"):
        oll["model"] = v
    if v := os.environ.get("OLLAMA_API_KEY"):
        oll["api_key"] = v

    # Anthropic
    ant = ai.setdefault("anthropic", {})
    if v := os.environ.get("ANTHROPIC_API_KEY"):
        ant["api_key"] = v
    if v := os.environ.get("ANTHROPIC_MODEL"):
        ant["model"] = v

    return cfg


async def load_doc_config() -> dict:
    global _cache, _cache_at
    now = time.monotonic()
    if _cache is not None and (now - _cache_at) < _CACHE_TTL:
        return _cache
    data = await asyncio.to_thread(_read_config_sync)
    _cache = data
    _cache_at = now
    return data