Business-Management/features/ai-service/app/routers/chat.py

import asyncio
import re

from fastapi import APIRouter, HTTPException

from app.providers import get_provider
from app.providers.anthropic_provider import ProviderConnectionError as AnthropicConnError
from app.providers.anthropic_provider import ProviderTimeoutError as AnthropicTimeoutError
from app.providers.openai_compat import ProviderConnectionError as OpenAIConnError
from app.providers.openai_compat import ProviderTimeoutError as OpenAITimeoutError
from app.schemas.chat import ChatRequest, ChatResponse
from app.services.config_reader import load_ai_config

router = APIRouter()

_FENCE_RE = re.compile(r"^```[a-z]*\n?(.*?)\n?```$", re.DOTALL)


def _strip_fences(text: str) -> str:
    m = _FENCE_RE.match(text.strip())
    return m.group(1).strip() if m else text.strip()


@router.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest) -> ChatResponse:
    config = await load_ai_config()

    provider_name = config.get("provider", "lmstudio")
    if provider_name not in ("anthropic", "ollama", "lmstudio"):
        raise HTTPException(status_code=503, detail=f"Unknown provider configured: {provider_name!r}")

    try:
        provider = get_provider(config)
    except ValueError as exc:
        raise HTTPException(status_code=503, detail=str(exc))

    timeout = config.get("timeout_seconds", 60)
    max_retries = config.get("max_retries", 2)
    last_exc: Exception | None = None

    for attempt in range(max_retries + 1):
        try:
            content, input_tokens, output_tokens = await asyncio.wait_for(
                provider.chat(request.messages, request.max_tokens, request.temperature),
                timeout=float(timeout),
            )
            break
        except asyncio.TimeoutError as exc:
            last_exc = exc
            # Don't retry on timeout — the model is busy; fail fast
            raise HTTPException(status_code=504, detail="AI provider timed out") from exc
        except (AnthropicConnError, OpenAIConnError) as exc:
            last_exc = exc
            if attempt < max_retries:
                await asyncio.sleep(0.5 * (attempt + 1))
                continue
            raise HTTPException(status_code=502, detail=f"AI provider error: {exc}") from exc
        except (AnthropicTimeoutError, OpenAITimeoutError) as exc:
            raise HTTPException(status_code=504, detail="AI provider timed out") from exc

    if request.response_format == "json":
        content = _strip_fences(content)

    return ChatResponse(
        content=content,
        provider=provider.provider_name,
        model=provider.model_name,
        input_tokens=input_tokens,
        output_tokens=output_tokens,
    )