Add shared ai-service container as AI provider intermediary
All feature containers now POST messages to ai-service (port 8010) instead of calling AI providers directly. ai-service routes to LM Studio, Ollama, or Anthropic based on /config/ai_service_config.json. doc-service AI providers removed; replaced by httpx ai_client.py. Backend settings restructured to /api/settings/ai. Frontend gets dedicated AIAdminSettingsPage and AI Service card in AppsPage. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,70 @@
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from app.providers import get_provider
|
||||
from app.providers.anthropic_provider import ProviderConnectionError as AnthropicConnError
|
||||
from app.providers.anthropic_provider import ProviderTimeoutError as AnthropicTimeoutError
|
||||
from app.providers.openai_compat import ProviderConnectionError as OpenAIConnError
|
||||
from app.providers.openai_compat import ProviderTimeoutError as OpenAITimeoutError
|
||||
from app.schemas.chat import ChatRequest, ChatResponse
|
||||
from app.services.config_reader import load_ai_config
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
_FENCE_RE = re.compile(r"^```[a-z]*\n?(.*?)\n?```$", re.DOTALL)
|
||||
|
||||
|
||||
def _strip_fences(text: str) -> str:
|
||||
m = _FENCE_RE.match(text.strip())
|
||||
return m.group(1).strip() if m else text.strip()
|
||||
|
||||
|
||||
@router.post("/chat", response_model=ChatResponse)
|
||||
async def chat(request: ChatRequest) -> ChatResponse:
|
||||
config = await load_ai_config()
|
||||
|
||||
provider_name = config.get("provider", "lmstudio")
|
||||
if provider_name not in ("anthropic", "ollama", "lmstudio"):
|
||||
raise HTTPException(status_code=503, detail=f"Unknown provider configured: {provider_name!r}")
|
||||
|
||||
try:
|
||||
provider = get_provider(config)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=503, detail=str(exc))
|
||||
|
||||
timeout = config.get("timeout_seconds", 60)
|
||||
max_retries = config.get("max_retries", 2)
|
||||
last_exc: Exception | None = None
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
content, input_tokens, output_tokens = await asyncio.wait_for(
|
||||
provider.chat(request.messages, request.max_tokens, request.temperature),
|
||||
timeout=float(timeout),
|
||||
)
|
||||
break
|
||||
except asyncio.TimeoutError as exc:
|
||||
last_exc = exc
|
||||
# Don't retry on timeout — the model is busy; fail fast
|
||||
raise HTTPException(status_code=504, detail="AI provider timed out") from exc
|
||||
except (AnthropicConnError, OpenAIConnError) as exc:
|
||||
last_exc = exc
|
||||
if attempt < max_retries:
|
||||
await asyncio.sleep(0.5 * (attempt + 1))
|
||||
continue
|
||||
raise HTTPException(status_code=502, detail=f"AI provider error: {exc}") from exc
|
||||
except (AnthropicTimeoutError, OpenAITimeoutError) as exc:
|
||||
raise HTTPException(status_code=504, detail="AI provider timed out") from exc
|
||||
|
||||
if request.response_format == "json":
|
||||
content = _strip_fences(content)
|
||||
|
||||
return ChatResponse(
|
||||
content=content,
|
||||
provider=provider.provider_name,
|
||||
model=provider.model_name,
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
)
|
||||
Reference in New Issue
Block a user