Add shared ai-service container as AI provider intermediary

All feature containers now POST messages to ai-service (port 8010) instead of calling AI providers directly. ai-service routes to LM Studio, Ollama, or Anthropic based on /config/ai_service_config.json. doc-service AI providers removed; replaced by httpx ai_client.py. Backend settings restructured to /api/settings/ai. Frontend gets dedicated AIAdminSettingsPage and AI Service card in AppsPage. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-14 12:30:45 +02:00
parent 52a2967f61
commit 88c1ea297e
47 changed files with 1354 additions and 497 deletions
@@ -0,0 +1,70 @@
+import asyncio
+import re
+
+from fastapi import APIRouter, HTTPException
+
+from app.providers import get_provider
+from app.providers.anthropic_provider import ProviderConnectionError as AnthropicConnError
+from app.providers.anthropic_provider import ProviderTimeoutError as AnthropicTimeoutError
+from app.providers.openai_compat import ProviderConnectionError as OpenAIConnError
+from app.providers.openai_compat import ProviderTimeoutError as OpenAITimeoutError
+from app.schemas.chat import ChatRequest, ChatResponse
+from app.services.config_reader import load_ai_config
+
+router = APIRouter()
+
+_FENCE_RE = re.compile(r"^```[a-z]*\n?(.*?)\n?```$", re.DOTALL)
+
+
+def _strip_fences(text: str) -> str:
+    m = _FENCE_RE.match(text.strip())
+    return m.group(1).strip() if m else text.strip()
+
+
+@router.post("/chat", response_model=ChatResponse)
+async def chat(request: ChatRequest) -> ChatResponse:
+    config = await load_ai_config()
+
+    provider_name = config.get("provider", "lmstudio")
+    if provider_name not in ("anthropic", "ollama", "lmstudio"):
+        raise HTTPException(status_code=503, detail=f"Unknown provider configured: {provider_name!r}")
+
+    try:
+        provider = get_provider(config)
+    except ValueError as exc:
+        raise HTTPException(status_code=503, detail=str(exc))
+
+    timeout = config.get("timeout_seconds", 60)
+    max_retries = config.get("max_retries", 2)
+    last_exc: Exception | None = None
+
+    for attempt in range(max_retries + 1):
+        try:
+            content, input_tokens, output_tokens = await asyncio.wait_for(
+                provider.chat(request.messages, request.max_tokens, request.temperature),
+                timeout=float(timeout),
+            )
+            break
+        except asyncio.TimeoutError as exc:
+            last_exc = exc
+            # Don't retry on timeout — the model is busy; fail fast
+            raise HTTPException(status_code=504, detail="AI provider timed out") from exc
+        except (AnthropicConnError, OpenAIConnError) as exc:
+            last_exc = exc
+            if attempt < max_retries:
+                await asyncio.sleep(0.5 * (attempt + 1))
+                continue
+            raise HTTPException(status_code=502, detail=f"AI provider error: {exc}") from exc
+        except (AnthropicTimeoutError, OpenAITimeoutError) as exc:
+            raise HTTPException(status_code=504, detail="AI provider timed out") from exc
+
+    if request.response_format == "json":
+        content = _strip_fences(content)
+
+    return ChatResponse(
+        content=content,
+        provider=provider.provider_name,
+        model=provider.model_name,
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+    )