Add priority queue to ai-service and STATUS.md workflow
- Introduce async priority queue service in ai-service; all /chat calls now route through it - Refactor chat router to separate execute_chat (core logic) from the HTTP handler - Add /queue endpoints (status, pause, resume, cancel) for queue management - Update ai-service config to use Pydantic v2 model_config style - Add STATUS.md files for backend, ai-service, doc-service, and frontend - Document STATUS.md workflow in CLAUDE.md - Update doc-service documents router and schemas; frontend DocumentsPage and API client Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,3 +1,10 @@
|
||||
"""
|
||||
POST /chat — synchronous chat endpoint.
|
||||
|
||||
All requests are submitted to the priority queue at NORMAL priority and the caller
|
||||
waits for the result. This keeps the contract identical to the original endpoint
|
||||
while ensuring all AI traffic flows through one ordered queue.
|
||||
"""
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
@@ -21,8 +28,11 @@ def _strip_fences(text: str) -> str:
|
||||
return m.group(1).strip() if m else text.strip()
|
||||
|
||||
|
||||
@router.post("/chat", response_model=ChatResponse)
|
||||
async def chat(request: ChatRequest) -> ChatResponse:
|
||||
async def execute_chat(request: ChatRequest) -> ChatResponse:
|
||||
"""
|
||||
Core provider call — invoked by the queue worker.
|
||||
Raises HTTPException on provider errors so the queue worker stores the message.
|
||||
"""
|
||||
config = await load_ai_config()
|
||||
|
||||
provider_name = config.get("provider", "lmstudio")
|
||||
@@ -36,7 +46,6 @@ async def chat(request: ChatRequest) -> ChatResponse:
|
||||
|
||||
timeout = config.get("timeout_seconds", 60)
|
||||
max_retries = config.get("max_retries", 2)
|
||||
last_exc: Exception | None = None
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
@@ -46,11 +55,8 @@ async def chat(request: ChatRequest) -> ChatResponse:
|
||||
)
|
||||
break
|
||||
except asyncio.TimeoutError as exc:
|
||||
last_exc = exc
|
||||
# Don't retry on timeout — the model is busy; fail fast
|
||||
raise HTTPException(status_code=504, detail="AI provider timed out") from exc
|
||||
except (AnthropicConnError, OpenAIConnError) as exc:
|
||||
last_exc = exc
|
||||
if attempt < max_retries:
|
||||
await asyncio.sleep(0.5 * (attempt + 1))
|
||||
continue
|
||||
@@ -68,3 +74,28 @@ async def chat(request: ChatRequest) -> ChatResponse:
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/chat", response_model=ChatResponse)
|
||||
async def chat(request: ChatRequest) -> ChatResponse:
|
||||
"""
|
||||
Submit at NORMAL priority and block until the queue processes the job.
|
||||
If the queue is paused or stopped, the call blocks until resumed (or times out).
|
||||
"""
|
||||
from app.services.queue import Priority, queue_service # deferred — avoids circular import
|
||||
|
||||
job = await queue_service.enqueue(request, Priority.NORMAL)
|
||||
config = await load_ai_config()
|
||||
timeout = float(config.get("timeout_seconds", 60)) + 5.0 # +5s buffer over provider timeout
|
||||
|
||||
try:
|
||||
return await asyncio.wait_for(asyncio.shield(job.future), timeout=timeout)
|
||||
except asyncio.TimeoutError:
|
||||
queue_service.cancel_job(job.id)
|
||||
raise HTTPException(status_code=504, detail="Timed out waiting for queue to process job")
|
||||
except asyncio.CancelledError:
|
||||
raise HTTPException(status_code=503, detail="Job was cancelled")
|
||||
except Exception as exc:
|
||||
if isinstance(exc, HTTPException):
|
||||
raise
|
||||
raise HTTPException(status_code=502, detail=str(exc)) from exc
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
"""
|
||||
Queue management router.
|
||||
|
||||
POST /queue/jobs — enqueue a job, return immediately with job metadata
|
||||
GET /queue/jobs/{id} — poll job status / result
|
||||
DELETE /queue/jobs/{id} — cancel a pending job
|
||||
|
||||
GET /queue/status — worker state + queue depth
|
||||
POST /queue/pause — finish current job, stop picking new ones
|
||||
POST /queue/resume — resume from pause
|
||||
POST /queue/start — start (or restart) the worker
|
||||
POST /queue/stop — stop worker immediately (pending jobs stay queued)
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from app.schemas.queue import JobStatus, QueueRequest, QueueStatus
|
||||
from app.services.queue import PRIORITY_MAP, Job, Priority, queue_service
|
||||
|
||||
router = APIRouter(prefix="/queue", tags=["queue"])
|
||||
|
||||
|
||||
# ── Job endpoints ─────────────────────────────────────────────────────────────
|
||||
|
||||
@router.post("/jobs", response_model=JobStatus, status_code=202)
|
||||
async def enqueue_job(request: QueueRequest) -> JobStatus:
|
||||
priority = PRIORITY_MAP[request.priority]
|
||||
job = await queue_service.enqueue(request, priority)
|
||||
return _job_to_status(job)
|
||||
|
||||
|
||||
@router.get("/jobs/{job_id}", response_model=JobStatus)
|
||||
async def get_job(job_id: str) -> JobStatus:
|
||||
job = queue_service.get_job(job_id)
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
return _job_to_status(job)
|
||||
|
||||
|
||||
@router.delete("/jobs/{job_id}", status_code=204)
|
||||
async def cancel_job(job_id: str) -> None:
|
||||
if not queue_service.cancel_job(job_id):
|
||||
raise HTTPException(status_code=404, detail="Job not found or already started")
|
||||
|
||||
|
||||
# ── Worker control endpoints ──────────────────────────────────────────────────
|
||||
|
||||
@router.get("/status", response_model=QueueStatus)
|
||||
async def get_status() -> QueueStatus:
|
||||
cur = queue_service.current_job
|
||||
return QueueStatus(
|
||||
running=queue_service._running,
|
||||
paused=queue_service.is_paused,
|
||||
queue_size=queue_service.queue_size,
|
||||
current_job_id=cur.id if cur else None,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/pause", status_code=204)
|
||||
async def pause() -> None:
|
||||
"""Pause after the current job finishes."""
|
||||
queue_service.pause()
|
||||
|
||||
|
||||
@router.post("/resume", status_code=204)
|
||||
async def resume() -> None:
|
||||
"""Resume from a paused state."""
|
||||
queue_service.resume()
|
||||
|
||||
|
||||
@router.post("/start", status_code=204)
|
||||
async def start() -> None:
|
||||
"""Start (or restart) the worker task."""
|
||||
queue_service.start()
|
||||
|
||||
|
||||
@router.post("/stop", status_code=204)
|
||||
async def stop() -> None:
|
||||
"""Stop the worker. Pending jobs remain in queue; POST /queue/start to resume."""
|
||||
queue_service.stop()
|
||||
|
||||
|
||||
# ── Helper ────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _job_to_status(job: Job) -> JobStatus:
|
||||
pos: int | None = None
|
||||
if job.status == "pending":
|
||||
# Count jobs that are ahead: same or higher priority AND earlier seq
|
||||
pos = sum(
|
||||
1
|
||||
for j in queue_service._jobs.values()
|
||||
if j.status == "pending"
|
||||
and (int(j.priority), j.seq) < (int(job.priority), job.seq)
|
||||
)
|
||||
return JobStatus(
|
||||
id=job.id,
|
||||
status=job.status,
|
||||
priority=Priority(job.priority).name.lower(),
|
||||
position=pos,
|
||||
created_at=job.created_at,
|
||||
started_at=job.started_at,
|
||||
finished_at=job.finished_at,
|
||||
result=job.result,
|
||||
error=job.error,
|
||||
)
|
||||
Reference in New Issue
Block a user