Add priority queue to ai-service and STATUS.md workflow

- Introduce async priority queue service in ai-service; all /chat calls now route through it - Refactor chat router to separate execute_chat (core logic) from the HTTP handler - Add /queue endpoints (status, pause, resume, cancel) for queue management - Update ai-service config to use Pydantic v2 model_config style - Add STATUS.md files for backend, ai-service, doc-service, and frontend - Document STATUS.md workflow in CLAUDE.md - Update doc-service documents router and schemas; frontend DocumentsPage and API client Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-14 22:58:10 +02:00
parent d2495190a9
commit c4f0c7ad49
18 changed files with 1253 additions and 35 deletions
@@ -23,7 +23,7 @@ _DEFAULT_CONFIG: dict = {
    "max_retries": 2,
    "anthropic": {"api_key": "", "model": "claude-haiku-4-5-20251001"},
    "ollama": {"base_url": "http://host.docker.internal:11434/v1", "model": "llama3.2", "api_key": "ollama"},
-    "lmstudio": {"base_url": "http://host.docker.internal:1234/v1", "model": "local-model", "api_key": "lm-studio"},
+    "lmstudio": {"base_url": "http://host.docker.internal:1234/v1", "model": "gemma-4-e4b-it", "api_key": "lm-studio"},
 }

 _cache: dict | None = None