88c1ea297e
All feature containers now POST messages to ai-service (port 8010) instead of calling AI providers directly. ai-service routes to LM Studio, Ollama, or Anthropic based on /config/ai_service_config.json. doc-service AI providers removed; replaced by httpx ai_client.py. Backend settings restructured to /api/settings/ai. Frontend gets dedicated AIAdminSettingsPage and AI Service card in AppsPage. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
71 lines
2.6 KiB
Python
71 lines
2.6 KiB
Python
import asyncio
|
|
import re
|
|
|
|
from fastapi import APIRouter, HTTPException
|
|
|
|
from app.providers import get_provider
|
|
from app.providers.anthropic_provider import ProviderConnectionError as AnthropicConnError
|
|
from app.providers.anthropic_provider import ProviderTimeoutError as AnthropicTimeoutError
|
|
from app.providers.openai_compat import ProviderConnectionError as OpenAIConnError
|
|
from app.providers.openai_compat import ProviderTimeoutError as OpenAITimeoutError
|
|
from app.schemas.chat import ChatRequest, ChatResponse
|
|
from app.services.config_reader import load_ai_config
|
|
|
|
router = APIRouter()
|
|
|
|
_FENCE_RE = re.compile(r"^```[a-z]*\n?(.*?)\n?```$", re.DOTALL)
|
|
|
|
|
|
def _strip_fences(text: str) -> str:
|
|
m = _FENCE_RE.match(text.strip())
|
|
return m.group(1).strip() if m else text.strip()
|
|
|
|
|
|
@router.post("/chat", response_model=ChatResponse)
|
|
async def chat(request: ChatRequest) -> ChatResponse:
|
|
config = await load_ai_config()
|
|
|
|
provider_name = config.get("provider", "lmstudio")
|
|
if provider_name not in ("anthropic", "ollama", "lmstudio"):
|
|
raise HTTPException(status_code=503, detail=f"Unknown provider configured: {provider_name!r}")
|
|
|
|
try:
|
|
provider = get_provider(config)
|
|
except ValueError as exc:
|
|
raise HTTPException(status_code=503, detail=str(exc))
|
|
|
|
timeout = config.get("timeout_seconds", 60)
|
|
max_retries = config.get("max_retries", 2)
|
|
last_exc: Exception | None = None
|
|
|
|
for attempt in range(max_retries + 1):
|
|
try:
|
|
content, input_tokens, output_tokens = await asyncio.wait_for(
|
|
provider.chat(request.messages, request.max_tokens, request.temperature),
|
|
timeout=float(timeout),
|
|
)
|
|
break
|
|
except asyncio.TimeoutError as exc:
|
|
last_exc = exc
|
|
# Don't retry on timeout — the model is busy; fail fast
|
|
raise HTTPException(status_code=504, detail="AI provider timed out") from exc
|
|
except (AnthropicConnError, OpenAIConnError) as exc:
|
|
last_exc = exc
|
|
if attempt < max_retries:
|
|
await asyncio.sleep(0.5 * (attempt + 1))
|
|
continue
|
|
raise HTTPException(status_code=502, detail=f"AI provider error: {exc}") from exc
|
|
except (AnthropicTimeoutError, OpenAITimeoutError) as exc:
|
|
raise HTTPException(status_code=504, detail="AI provider timed out") from exc
|
|
|
|
if request.response_format == "json":
|
|
content = _strip_fences(content)
|
|
|
|
return ChatResponse(
|
|
content=content,
|
|
provider=provider.provider_name,
|
|
model=provider.model_name,
|
|
input_tokens=input_tokens,
|
|
output_tokens=output_tokens,
|
|
)
|