Add shared ai-service container as AI provider intermediary

All feature containers now POST messages to ai-service (port 8010) instead of calling AI providers directly. ai-service routes to LM Studio, Ollama, or Anthropic based on /config/ai_service_config.json. doc-service AI providers removed; replaced by httpx ai_client.py. Backend settings restructured to /api/settings/ai. Frontend gets dedicated AIAdminSettingsPage and AI Service card in AppsPage. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-14 12:30:45 +02:00
parent 52a2967f61
commit 88c1ea297e
47 changed files with 1354 additions and 497 deletions
@@ -0,0 +1,20 @@
+from app.providers.base import AIProvider
+
+
+def get_provider(ai_config: dict) -> AIProvider:
+    """Return an AIProvider instance for the active provider in the config."""
+    provider_name = ai_config.get("provider", "lmstudio")
+    provider_cfg = ai_config.get(provider_name, {})
+
+    match provider_name:
+        case "anthropic":
+            from app.providers.anthropic_provider import AnthropicProvider
+            return AnthropicProvider(provider_cfg)
+        case "ollama" | "lmstudio":
+            from app.providers.openai_compat import OpenAICompatProvider
+            return OpenAICompatProvider(provider_cfg, provider_name=provider_name)
+        case _:
+            raise ValueError(f"Unknown AI provider: {provider_name!r}")
+
+
+__all__ = ["AIProvider", "get_provider"]
@@ -0,0 +1,54 @@
+import asyncio
+
+import anthropic
+
+from app.providers.base import AIProvider
+from app.schemas.chat import ChatMessage
+
+
+class AnthropicProvider(AIProvider):
+    def __init__(self, config: dict) -> None:
+        self._client = anthropic.AsyncAnthropic(api_key=config.get("api_key", ""))
+        self.model_name = config.get("model", "claude-haiku-4-5-20251001")
+        self.provider_name = "anthropic"
+
+    async def chat(
+        self,
+        messages: list[ChatMessage],
+        max_tokens: int,
+        temperature: float,
+    ) -> tuple[str, int | None, int | None]:
+        # Anthropic uses a top-level `system=` param, not a role in the messages array
+        system_content = ""
+        user_messages = []
+        for msg in messages:
+            if msg.role == "system":
+                system_content += msg.content + "\n"
+            else:
+                user_messages.append({"role": msg.role, "content": msg.content})
+
+        try:
+            response = await self._client.messages.create(
+                model=self.model_name,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                system=system_content.strip() or anthropic.NOT_GIVEN,
+                messages=user_messages,
+            )
+        except anthropic.APIConnectionError as exc:
+            raise ProviderConnectionError(str(exc)) from exc
+        except anthropic.APITimeoutError as exc:
+            raise ProviderTimeoutError(str(exc)) from exc
+        except anthropic.APIStatusError as exc:
+            raise ProviderConnectionError(f"Anthropic API error {exc.status_code}: {exc.message}") from exc
+
+        content = response.content[0].text
+        return content, response.usage.input_tokens, response.usage.output_tokens
+
+
+class ProviderConnectionError(Exception):
+    pass
+
+
+class ProviderTimeoutError(Exception):
+    pass
@@ -0,0 +1,23 @@
+from abc import ABC, abstractmethod
+
+from app.schemas.chat import ChatMessage
+
+
+class AIProvider(ABC):
+    provider_name: str = "unknown"
+    model_name: str = "unknown"
+
+    @abstractmethod
+    async def chat(
+        self,
+        messages: list[ChatMessage],
+        max_tokens: int,
+        temperature: float,
+    ) -> tuple[str, int | None, int | None]:
+        """
+        Send messages to the provider and return (content, input_tokens, output_tokens).
+        Raises:
+            ProviderConnectionError: on network / auth failure
+            ProviderTimeoutError: on request timeout
+        """
+        ...
@@ -0,0 +1,52 @@
+"""OpenAI-compatible provider — handles both Ollama and LM Studio."""
+import asyncio
+
+import openai
+
+from app.providers.base import AIProvider
+from app.schemas.chat import ChatMessage
+
+
+class OpenAICompatProvider(AIProvider):
+    def __init__(self, config: dict, provider_name: str = "lmstudio") -> None:
+        self._client = openai.AsyncOpenAI(
+            base_url=config.get("base_url", "http://localhost:1234/v1"),
+            api_key=config.get("api_key") or "not-required",
+        )
+        self.model_name = config.get("model", "local-model")
+        self.provider_name = provider_name
+
+    async def chat(
+        self,
+        messages: list[ChatMessage],
+        max_tokens: int,
+        temperature: float,
+    ) -> tuple[str, int | None, int | None]:
+        raw_messages = [{"role": m.role, "content": m.content} for m in messages]
+        try:
+            response = await self._client.chat.completions.create(
+                model=self.model_name,
+                messages=raw_messages,
+                max_tokens=max_tokens,
+                temperature=temperature,
+            )
+        except openai.APIConnectionError as exc:
+            raise ProviderConnectionError(str(exc)) from exc
+        except openai.APITimeoutError as exc:
+            raise ProviderTimeoutError(str(exc)) from exc
+        except openai.APIStatusError as exc:
+            raise ProviderConnectionError(f"API error {exc.status_code}: {exc.message}") from exc
+
+        content = response.choices[0].message.content or ""
+        usage = response.usage
+        input_tokens = usage.prompt_tokens if usage else None
+        output_tokens = usage.completion_tokens if usage else None
+        return content, input_tokens, output_tokens
+
+
+class ProviderConnectionError(Exception):
+    pass
+
+
+class ProviderTimeoutError(Exception):
+    pass