Add priority queue to ai-service and STATUS.md workflow
- Introduce async priority queue service in ai-service; all /chat calls now route through it - Refactor chat router to separate execute_chat (core logic) from the HTTP handler - Add /queue endpoints (status, pause, resume, cancel) for queue management - Update ai-service config to use Pydantic v2 model_config style - Add STATUS.md files for backend, ai-service, doc-service, and frontend - Document STATUS.md workflow in CLAUDE.md - Update doc-service documents router and schemas; frontend DocumentsPage and API client Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,3 +1,10 @@
|
||||
"""
|
||||
POST /chat — synchronous chat endpoint.
|
||||
|
||||
All requests are submitted to the priority queue at NORMAL priority and the caller
|
||||
waits for the result. This keeps the contract identical to the original endpoint
|
||||
while ensuring all AI traffic flows through one ordered queue.
|
||||
"""
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
@@ -21,8 +28,11 @@ def _strip_fences(text: str) -> str:
|
||||
return m.group(1).strip() if m else text.strip()
|
||||
|
||||
|
||||
@router.post("/chat", response_model=ChatResponse)
|
||||
async def chat(request: ChatRequest) -> ChatResponse:
|
||||
async def execute_chat(request: ChatRequest) -> ChatResponse:
|
||||
"""
|
||||
Core provider call — invoked by the queue worker.
|
||||
Raises HTTPException on provider errors so the queue worker stores the message.
|
||||
"""
|
||||
config = await load_ai_config()
|
||||
|
||||
provider_name = config.get("provider", "lmstudio")
|
||||
@@ -36,7 +46,6 @@ async def chat(request: ChatRequest) -> ChatResponse:
|
||||
|
||||
timeout = config.get("timeout_seconds", 60)
|
||||
max_retries = config.get("max_retries", 2)
|
||||
last_exc: Exception | None = None
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
@@ -46,11 +55,8 @@ async def chat(request: ChatRequest) -> ChatResponse:
|
||||
)
|
||||
break
|
||||
except asyncio.TimeoutError as exc:
|
||||
last_exc = exc
|
||||
# Don't retry on timeout — the model is busy; fail fast
|
||||
raise HTTPException(status_code=504, detail="AI provider timed out") from exc
|
||||
except (AnthropicConnError, OpenAIConnError) as exc:
|
||||
last_exc = exc
|
||||
if attempt < max_retries:
|
||||
await asyncio.sleep(0.5 * (attempt + 1))
|
||||
continue
|
||||
@@ -68,3 +74,28 @@ async def chat(request: ChatRequest) -> ChatResponse:
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/chat", response_model=ChatResponse)
|
||||
async def chat(request: ChatRequest) -> ChatResponse:
|
||||
"""
|
||||
Submit at NORMAL priority and block until the queue processes the job.
|
||||
If the queue is paused or stopped, the call blocks until resumed (or times out).
|
||||
"""
|
||||
from app.services.queue import Priority, queue_service # deferred — avoids circular import
|
||||
|
||||
job = await queue_service.enqueue(request, Priority.NORMAL)
|
||||
config = await load_ai_config()
|
||||
timeout = float(config.get("timeout_seconds", 60)) + 5.0 # +5s buffer over provider timeout
|
||||
|
||||
try:
|
||||
return await asyncio.wait_for(asyncio.shield(job.future), timeout=timeout)
|
||||
except asyncio.TimeoutError:
|
||||
queue_service.cancel_job(job.id)
|
||||
raise HTTPException(status_code=504, detail="Timed out waiting for queue to process job")
|
||||
except asyncio.CancelledError:
|
||||
raise HTTPException(status_code=503, detail="Job was cancelled")
|
||||
except Exception as exc:
|
||||
if isinstance(exc, HTTPException):
|
||||
raise
|
||||
raise HTTPException(status_code=502, detail=str(exc)) from exc
|
||||
|
||||
Reference in New Issue
Block a user