Add priority queue to ai-service and STATUS.md workflow

- Introduce async priority queue service in ai-service; all /chat calls now route through it - Refactor chat router to separate execute_chat (core logic) from the HTTP handler - Add /queue endpoints (status, pause, resume, cancel) for queue management - Update ai-service config to use Pydantic v2 model_config style - Add STATUS.md files for backend, ai-service, doc-service, and frontend - Document STATUS.md workflow in CLAUDE.md - Update doc-service documents router and schemas; frontend DocumentsPage and API client Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-14 22:58:10 +02:00
parent d2495190a9
commit c4f0c7ad49
18 changed files with 1253 additions and 35 deletions
@@ -1,3 +1,10 @@
+"""
+POST /chat — synchronous chat endpoint.
+
+All requests are submitted to the priority queue at NORMAL priority and the caller
+waits for the result. This keeps the contract identical to the original endpoint
+while ensuring all AI traffic flows through one ordered queue.
+"""
 import asyncio
 import re

@@ -21,8 +28,11 @@ def _strip_fences(text: str) -> str:
    return m.group(1).strip() if m else text.strip()


-@router.post("/chat", response_model=ChatResponse)
-async def chat(request: ChatRequest) -> ChatResponse:
+async def execute_chat(request: ChatRequest) -> ChatResponse:
+    """
+    Core provider call — invoked by the queue worker.
+    Raises HTTPException on provider errors so the queue worker stores the message.
+    """
    config = await load_ai_config()

    provider_name = config.get("provider", "lmstudio")
@@ -36,7 +46,6 @@ async def chat(request: ChatRequest) -> ChatResponse:

    timeout = config.get("timeout_seconds", 60)
    max_retries = config.get("max_retries", 2)
-    last_exc: Exception | None = None

    for attempt in range(max_retries + 1):
        try:
@@ -46,11 +55,8 @@ async def chat(request: ChatRequest) -> ChatResponse:
            )
            break
        except asyncio.TimeoutError as exc:
-            last_exc = exc
-            # Don't retry on timeout — the model is busy; fail fast
            raise HTTPException(status_code=504, detail="AI provider timed out") from exc
        except (AnthropicConnError, OpenAIConnError) as exc:
-            last_exc = exc
            if attempt < max_retries:
                await asyncio.sleep(0.5 * (attempt + 1))
                continue
@@ -68,3 +74,28 @@ async def chat(request: ChatRequest) -> ChatResponse:
        input_tokens=input_tokens,
        output_tokens=output_tokens,
    )
+
+
+@router.post("/chat", response_model=ChatResponse)
+async def chat(request: ChatRequest) -> ChatResponse:
+    """
+    Submit at NORMAL priority and block until the queue processes the job.
+    If the queue is paused or stopped, the call blocks until resumed (or times out).
+    """
+    from app.services.queue import Priority, queue_service  # deferred — avoids circular import
+
+    job = await queue_service.enqueue(request, Priority.NORMAL)
+    config = await load_ai_config()
+    timeout = float(config.get("timeout_seconds", 60)) + 5.0  # +5s buffer over provider timeout
+
+    try:
+        return await asyncio.wait_for(asyncio.shield(job.future), timeout=timeout)
+    except asyncio.TimeoutError:
+        queue_service.cancel_job(job.id)
+        raise HTTPException(status_code=504, detail="Timed out waiting for queue to process job")
+    except asyncio.CancelledError:
+        raise HTTPException(status_code=503, detail="Job was cancelled")
+    except Exception as exc:
+        if isinstance(exc, HTTPException):
+            raise
+        raise HTTPException(status_code=502, detail=str(exc)) from exc
@@ -0,0 +1,104 @@
+"""
+Queue management router.
+
+POST  /queue/jobs          — enqueue a job, return immediately with job metadata
+GET   /queue/jobs/{id}     — poll job status / result
+DELETE /queue/jobs/{id}    — cancel a pending job
+
+GET   /queue/status        — worker state + queue depth
+POST  /queue/pause         — finish current job, stop picking new ones
+POST  /queue/resume        — resume from pause
+POST  /queue/start         — start (or restart) the worker
+POST  /queue/stop          — stop worker immediately (pending jobs stay queued)
+"""
+from fastapi import APIRouter, HTTPException
+
+from app.schemas.queue import JobStatus, QueueRequest, QueueStatus
+from app.services.queue import PRIORITY_MAP, Job, Priority, queue_service
+
+router = APIRouter(prefix="/queue", tags=["queue"])
+
+
+# ── Job endpoints ─────────────────────────────────────────────────────────────
+
+@router.post("/jobs", response_model=JobStatus, status_code=202)
+async def enqueue_job(request: QueueRequest) -> JobStatus:
+    priority = PRIORITY_MAP[request.priority]
+    job = await queue_service.enqueue(request, priority)
+    return _job_to_status(job)
+
+
+@router.get("/jobs/{job_id}", response_model=JobStatus)
+async def get_job(job_id: str) -> JobStatus:
+    job = queue_service.get_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return _job_to_status(job)
+
+
+@router.delete("/jobs/{job_id}", status_code=204)
+async def cancel_job(job_id: str) -> None:
+    if not queue_service.cancel_job(job_id):
+        raise HTTPException(status_code=404, detail="Job not found or already started")
+
+
+# ── Worker control endpoints ──────────────────────────────────────────────────
+
+@router.get("/status", response_model=QueueStatus)
+async def get_status() -> QueueStatus:
+    cur = queue_service.current_job
+    return QueueStatus(
+        running=queue_service._running,
+        paused=queue_service.is_paused,
+        queue_size=queue_service.queue_size,
+        current_job_id=cur.id if cur else None,
+    )
+
+
+@router.post("/pause", status_code=204)
+async def pause() -> None:
+    """Pause after the current job finishes."""
+    queue_service.pause()
+
+
+@router.post("/resume", status_code=204)
+async def resume() -> None:
+    """Resume from a paused state."""
+    queue_service.resume()
+
+
+@router.post("/start", status_code=204)
+async def start() -> None:
+    """Start (or restart) the worker task."""
+    queue_service.start()
+
+
+@router.post("/stop", status_code=204)
+async def stop() -> None:
+    """Stop the worker. Pending jobs remain in queue; POST /queue/start to resume."""
+    queue_service.stop()
+
+
+# ── Helper ────────────────────────────────────────────────────────────────────
+
+def _job_to_status(job: Job) -> JobStatus:
+    pos: int | None = None
+    if job.status == "pending":
+        # Count jobs that are ahead: same or higher priority AND earlier seq
+        pos = sum(
+            1
+            for j in queue_service._jobs.values()
+            if j.status == "pending"
+            and (int(j.priority), j.seq) < (int(job.priority), job.seq)
+        )
+    return JobStatus(
+        id=job.id,
+        status=job.status,
+        priority=Priority(job.priority).name.lower(),
+        position=pos,
+        created_at=job.created_at,
+        started_at=job.started_at,
+        finished_at=job.finished_at,
+        result=job.result,
+        error=job.error,
+    )