import asyncio import re from fastapi import APIRouter, HTTPException from app.providers import get_provider from app.providers.anthropic_provider import ProviderConnectionError as AnthropicConnError from app.providers.anthropic_provider import ProviderTimeoutError as AnthropicTimeoutError from app.providers.openai_compat import ProviderConnectionError as OpenAIConnError from app.providers.openai_compat import ProviderTimeoutError as OpenAITimeoutError from app.schemas.chat import ChatRequest, ChatResponse from app.services.config_reader import load_ai_config router = APIRouter() _FENCE_RE = re.compile(r"^```[a-z]*\n?(.*?)\n?```$", re.DOTALL) def _strip_fences(text: str) -> str: m = _FENCE_RE.match(text.strip()) return m.group(1).strip() if m else text.strip() @router.post("/chat", response_model=ChatResponse) async def chat(request: ChatRequest) -> ChatResponse: config = await load_ai_config() provider_name = config.get("provider", "lmstudio") if provider_name not in ("anthropic", "ollama", "lmstudio"): raise HTTPException(status_code=503, detail=f"Unknown provider configured: {provider_name!r}") try: provider = get_provider(config) except ValueError as exc: raise HTTPException(status_code=503, detail=str(exc)) timeout = config.get("timeout_seconds", 60) max_retries = config.get("max_retries", 2) last_exc: Exception | None = None for attempt in range(max_retries + 1): try: content, input_tokens, output_tokens = await asyncio.wait_for( provider.chat(request.messages, request.max_tokens, request.temperature), timeout=float(timeout), ) break except asyncio.TimeoutError as exc: last_exc = exc # Don't retry on timeout — the model is busy; fail fast raise HTTPException(status_code=504, detail="AI provider timed out") from exc except (AnthropicConnError, OpenAIConnError) as exc: last_exc = exc if attempt < max_retries: await asyncio.sleep(0.5 * (attempt + 1)) continue raise HTTPException(status_code=502, detail=f"AI provider error: {exc}") from exc except (AnthropicTimeoutError, OpenAITimeoutError) as exc: raise HTTPException(status_code=504, detail="AI provider timed out") from exc if request.response_format == "json": content = _strip_fences(content) return ChatResponse( content=content, provider=provider.provider_name, model=provider.model_name, input_tokens=input_tokens, output_tokens=output_tokens, )