Add priority queue to ai-service and STATUS.md workflow

- Introduce async priority queue service in ai-service; all /chat calls now route through it
- Refactor chat router to separate execute_chat (core logic) from the HTTP handler
- Add /queue endpoints (status, pause, resume, cancel) for queue management
- Update ai-service config to use Pydantic v2 model_config style
- Add STATUS.md files for backend, ai-service, doc-service, and frontend
- Document STATUS.md workflow in CLAUDE.md
- Update doc-service documents router and schemas; frontend DocumentsPage and API client

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
curo1305
2026-04-14 22:58:10 +02:00
parent d2495190a9
commit c4f0c7ad49
18 changed files with 1253 additions and 35 deletions
+37 -6
View File
@@ -1,3 +1,10 @@
"""
POST /chat — synchronous chat endpoint.
All requests are submitted to the priority queue at NORMAL priority and the caller
waits for the result. This keeps the contract identical to the original endpoint
while ensuring all AI traffic flows through one ordered queue.
"""
import asyncio
import re
@@ -21,8 +28,11 @@ def _strip_fences(text: str) -> str:
return m.group(1).strip() if m else text.strip()
@router.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest) -> ChatResponse:
async def execute_chat(request: ChatRequest) -> ChatResponse:
"""
Core provider call — invoked by the queue worker.
Raises HTTPException on provider errors so the queue worker stores the message.
"""
config = await load_ai_config()
provider_name = config.get("provider", "lmstudio")
@@ -36,7 +46,6 @@ async def chat(request: ChatRequest) -> ChatResponse:
timeout = config.get("timeout_seconds", 60)
max_retries = config.get("max_retries", 2)
last_exc: Exception | None = None
for attempt in range(max_retries + 1):
try:
@@ -46,11 +55,8 @@ async def chat(request: ChatRequest) -> ChatResponse:
)
break
except asyncio.TimeoutError as exc:
last_exc = exc
# Don't retry on timeout — the model is busy; fail fast
raise HTTPException(status_code=504, detail="AI provider timed out") from exc
except (AnthropicConnError, OpenAIConnError) as exc:
last_exc = exc
if attempt < max_retries:
await asyncio.sleep(0.5 * (attempt + 1))
continue
@@ -68,3 +74,28 @@ async def chat(request: ChatRequest) -> ChatResponse:
input_tokens=input_tokens,
output_tokens=output_tokens,
)
@router.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest) -> ChatResponse:
"""
Submit at NORMAL priority and block until the queue processes the job.
If the queue is paused or stopped, the call blocks until resumed (or times out).
"""
from app.services.queue import Priority, queue_service # deferred — avoids circular import
job = await queue_service.enqueue(request, Priority.NORMAL)
config = await load_ai_config()
timeout = float(config.get("timeout_seconds", 60)) + 5.0 # +5s buffer over provider timeout
try:
return await asyncio.wait_for(asyncio.shield(job.future), timeout=timeout)
except asyncio.TimeoutError:
queue_service.cancel_job(job.id)
raise HTTPException(status_code=504, detail="Timed out waiting for queue to process job")
except asyncio.CancelledError:
raise HTTPException(status_code=503, detail="Job was cancelled")
except Exception as exc:
if isinstance(exc, HTTPException):
raise
raise HTTPException(status_code=502, detail=str(exc)) from exc