Add shared ai-service container as AI provider intermediary

All feature containers now POST messages to ai-service (port 8010) instead of calling AI providers directly. ai-service routes to LM Studio, Ollama, or Anthropic based on /config/ai_service_config.json. doc-service AI providers removed; replaced by httpx ai_client.py. Backend settings restructured to /api/settings/ai. Frontend gets dedicated AIAdminSettingsPage and AI Service card in AppsPage. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-14 12:30:45 +02:00
parent 52a2967f61
commit 88c1ea297e
47 changed files with 1354 additions and 497 deletions
@@ -6,6 +6,7 @@ class Settings(BaseSettings):
    DATABASE_URL: str = "postgresql+asyncpg://postgres:password@db:5432/destroying_sap"
    DATA_DIR: str = "/data/documents"
    CONFIG_PATH: str = "/config/doc_service_config.json"
+    AI_SERVICE_URL: str = "http://ai-service:8010"

    class Config:
        env_file = ".env"
@@ -17,7 +17,7 @@ from app.models.category import DocumentCategory
 from app.models.category_assignment import CategoryAssignment
 from app.models.document import Document
 from app.schemas.document import DocumentOut, DocumentStatusOut, DocumentTypeUpdate
-from app.services.ai import get_provider
+from app.services.ai_client import AIServiceError, classify_document
 from app.services.config_reader import load_doc_config
 from app.services.storage import delete_file, get_upload_path, save_upload

@@ -91,9 +91,7 @@ async def process_document(doc_id: str) -> None:

        try:
            text = await asyncio.to_thread(_extract_pdf_text, doc.file_path)
-            config = await load_doc_config()
-            provider = get_provider(config["ai"])
-            result = await provider.classify_document(text)
+            result = await classify_document(text)

            doc.raw_text = text[:500_000]  # cap stored text at 500k chars
            doc.extracted_data = json.dumps(result)
@@ -1,23 +0,0 @@
-from app.services.ai.base import AIProvider
-
-
-def get_provider(ai_config: dict) -> AIProvider:
-    """
-    Factory: return an AIProvider instance based on the 'provider' key in the AI config section.
-    ai_config is the 'ai' section of doc_service_config.json, loaded fresh per processing job.
-    """
-    provider_name = ai_config.get("provider", "anthropic")
-    provider_cfg = ai_config.get(provider_name, {})
-
-    match provider_name:
-        case "anthropic":
-            from app.services.ai.anthropic_provider import AnthropicProvider
-            return AnthropicProvider(provider_cfg)
-        case "ollama" | "lmstudio":
-            from app.services.ai.openai_compat import OpenAICompatProvider
-            return OpenAICompatProvider(provider_cfg)
-        case _:
-            raise ValueError(f"Unknown AI provider: {provider_name!r}")
-
-
-__all__ = ["AIProvider", "get_provider"]
@@ -1,31 +0,0 @@
-import json
-
-from anthropic import AsyncAnthropic
-
-from app.services.ai.base import AIProvider, SYSTEM_PROMPT, USER_PROMPT_TEMPLATE
-
-
-class AnthropicProvider(AIProvider):
-    def __init__(self, config: dict) -> None:
-        self._client = AsyncAnthropic(api_key=config["api_key"])
-        self._model = config.get("model", "claude-haiku-4-5-20251001")
-
-    async def classify_document(self, text: str) -> dict:
-        message = await self._client.messages.create(
-            model=self._model,
-            max_tokens=2048,
-            system=SYSTEM_PROMPT,
-            messages=[{
-                "role": "user",
-                "content": USER_PROMPT_TEMPLATE.format(text=text[:100_000]),
-            }],
-        )
-        raw = message.content[0].text.strip()
-        return _parse_json(raw)
-
-
-def _parse_json(raw: str) -> dict:
-    # Strip accidental markdown fences despite explicit instruction not to include them
-    if raw.startswith("```"):
-        raw = raw.split("\n", 1)[1].rsplit("```", 1)[0]
-    return json.loads(raw)
@@ -1,36 +0,0 @@
-"""
-OpenAI-compatible provider for Ollama and LM Studio.
-Both expose an OpenAI-compatible /v1/chat/completions endpoint.
-"""
-import json
-
-from openai import AsyncOpenAI
-
-from app.services.ai.base import AIProvider, SYSTEM_PROMPT, USER_PROMPT_TEMPLATE
-
-
-class OpenAICompatProvider(AIProvider):
-    def __init__(self, config: dict) -> None:
-        self._client = AsyncOpenAI(
-            base_url=config["base_url"],
-            api_key=config.get("api_key", "not-required"),
-        )
-        self._model = config["model"]
-
-    async def classify_document(self, text: str) -> dict:
-        response = await self._client.chat.completions.create(
-            model=self._model,
-            temperature=0,
-            messages=[
-                {"role": "system", "content": SYSTEM_PROMPT},
-                {"role": "user", "content": USER_PROMPT_TEMPLATE.format(text=text[:100_000])},
-            ],
-        )
-        raw = response.choices[0].message.content.strip()
-        return _parse_json(raw)
-
-
-def _parse_json(raw: str) -> dict:
-    if raw.startswith("```"):
-        raw = raw.split("\n", 1)[1].rsplit("```", 1)[0]
-    return json.loads(raw)
@@ -0,0 +1,49 @@
+"""HTTP client for the shared ai-service container."""
+import json
+
+import httpx
+
+from app.core.config import settings
+from app.services.prompts import SYSTEM_PROMPT, USER_PROMPT_TEMPLATE
+
+_client = httpx.AsyncClient(timeout=120.0)
+
+
+class AIServiceError(Exception):
+    pass
+
+
+async def classify_document(text: str) -> dict:
+    """
+    Send document text to ai-service for classification.
+    Returns the parsed JSON result dict.
+    Raises AIServiceError on HTTP errors or unexpected response shapes.
+    """
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": USER_PROMPT_TEMPLATE.format(text=text[:50_000])},
+    ]
+
+    try:
+        response = await _client.post(
+            f"{settings.AI_SERVICE_URL}/chat",
+            json={
+                "messages": messages,
+                "max_tokens": 2048,
+                "temperature": 0,
+                "response_format": "json",
+            },
+        )
+    except httpx.RequestError as exc:
+        raise AIServiceError(f"Could not reach ai-service: {exc}") from exc
+
+    if response.status_code != 200:
+        raise AIServiceError(
+            f"ai-service returned {response.status_code}: {response.text[:200]}"
+        )
+
+    try:
+        content = response.json()["content"]
+        return json.loads(content)
+    except (KeyError, json.JSONDecodeError) as exc:
+        raise AIServiceError(f"Unexpected ai-service response: {exc}") from exc
@@ -1,18 +1,9 @@
 """
 Reads doc_service_config.json from the shared config volume.
-Caches the result for 30 seconds to avoid hitting the filesystem on every request.
-Uses asyncio.to_thread so the synchronous file read doesn't block the event loop.
+30-second TTL cache + env var overrides.

-Env var overrides (take precedence over the JSON config file, never committed):
-  AI_PROVIDER          — "lmstudio" | "ollama" | "anthropic"
-  LMSTUDIO_BASE_URL    — e.g. http://host.docker.internal:1234/v1
-  LMSTUDIO_API_KEY
-  LMSTUDIO_MODEL
-  OLLAMA_BASE_URL
-  OLLAMA_MODEL
-  OLLAMA_API_KEY
-  ANTHROPIC_API_KEY
-  ANTHROPIC_MODEL
+Env var overrides (all optional):
+  DOC_MAX_PDF_MB   — max upload size in megabytes (e.g. "50")
 """
 import asyncio
 import json
@@ -24,15 +15,6 @@ from pathlib import Path
 from app.core.config import settings

 _DEFAULT_CONFIG: dict = {
-    "ai": {
-        # Default: LM Studio running on the host machine at port 1234.
-        # Inside Docker, host.docker.internal resolves to the host; for local
-        # dev outside Docker use http://localhost:1234/v1 instead.
-        "provider": "lmstudio",
-        "anthropic": {"api_key": "", "model": "claude-haiku-4-5-20251001"},
-        "ollama": {"base_url": "http://host.docker.internal:11434/v1", "model": "llama3.2", "api_key": "ollama"},
-        "lmstudio": {"base_url": "http://host.docker.internal:1234/v1", "model": "local-model", "api_key": "lm-studio"},
-    },
    "documents": {"max_pdf_bytes": 20 * 1024 * 1024},
 }

@@ -52,43 +34,13 @@ def _read_config_sync() -> dict:


 def _apply_env_overrides(config: dict) -> dict:
-    """
-    Merge environment variable overrides into the config dict.
-    Env vars win over whatever is stored in the JSON file.
-    This lets the dev .env file pin the AI connection without writing to the
-    shared volume (which would affect all users).
-    """
    cfg = deepcopy(config)
-    ai = cfg.setdefault("ai", {})
-
-    if provider := os.environ.get("AI_PROVIDER"):
-        ai["provider"] = provider
-
-    # LM Studio
-    lms = ai.setdefault("lmstudio", {})
-    if v := os.environ.get("LMSTUDIO_BASE_URL"):
-        lms["base_url"] = v
-    if v := os.environ.get("LMSTUDIO_API_KEY"):
-        lms["api_key"] = v
-    if v := os.environ.get("LMSTUDIO_MODEL"):
-        lms["model"] = v
-
-    # Ollama
-    oll = ai.setdefault("ollama", {})
-    if v := os.environ.get("OLLAMA_BASE_URL"):
-        oll["base_url"] = v
-    if v := os.environ.get("OLLAMA_MODEL"):
-        oll["model"] = v
-    if v := os.environ.get("OLLAMA_API_KEY"):
-        oll["api_key"] = v
-
-    # Anthropic
-    ant = ai.setdefault("anthropic", {})
-    if v := os.environ.get("ANTHROPIC_API_KEY"):
-        ant["api_key"] = v
-    if v := os.environ.get("ANTHROPIC_MODEL"):
-        ant["model"] = v
-
+    docs = cfg.setdefault("documents", {})
+    if v := os.environ.get("DOC_MAX_PDF_MB"):
+        try:
+            docs["max_pdf_bytes"] = int(v) * 1024 * 1024
+        except ValueError:
+            pass
    return cfg


@@ -1,5 +1,3 @@
-from abc import ABC, abstractmethod
-
 SYSTEM_PROMPT = (
    "You are a financial document analysis assistant. "
    "Given the text extracted from a PDF document, return ONLY a JSON object "
@@ -23,10 +21,3 @@ suggested_categories (array of 2 to 5 short category name strings a user might w

 Document text:
 {text}"""
-
-
-class AIProvider(ABC):
-    @abstractmethod
-    async def classify_document(self, text: str) -> dict:
-        """Return structured extraction dict from document text."""
-        ...