Add shared ai-service container as AI provider intermediary
All feature containers now POST messages to ai-service (port 8010) instead of calling AI providers directly. ai-service routes to LM Studio, Ollama, or Anthropic based on /config/ai_service_config.json. doc-service AI providers removed; replaced by httpx ai_client.py. Backend settings restructured to /api/settings/ai. Frontend gets dedicated AIAdminSettingsPage and AI Service card in AppsPage. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,18 +1,9 @@
|
||||
"""
|
||||
Reads doc_service_config.json from the shared config volume.
|
||||
Caches the result for 30 seconds to avoid hitting the filesystem on every request.
|
||||
Uses asyncio.to_thread so the synchronous file read doesn't block the event loop.
|
||||
30-second TTL cache + env var overrides.
|
||||
|
||||
Env var overrides (take precedence over the JSON config file, never committed):
|
||||
AI_PROVIDER — "lmstudio" | "ollama" | "anthropic"
|
||||
LMSTUDIO_BASE_URL — e.g. http://host.docker.internal:1234/v1
|
||||
LMSTUDIO_API_KEY
|
||||
LMSTUDIO_MODEL
|
||||
OLLAMA_BASE_URL
|
||||
OLLAMA_MODEL
|
||||
OLLAMA_API_KEY
|
||||
ANTHROPIC_API_KEY
|
||||
ANTHROPIC_MODEL
|
||||
Env var overrides (all optional):
|
||||
DOC_MAX_PDF_MB — max upload size in megabytes (e.g. "50")
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
@@ -24,15 +15,6 @@ from pathlib import Path
|
||||
from app.core.config import settings
|
||||
|
||||
_DEFAULT_CONFIG: dict = {
|
||||
"ai": {
|
||||
# Default: LM Studio running on the host machine at port 1234.
|
||||
# Inside Docker, host.docker.internal resolves to the host; for local
|
||||
# dev outside Docker use http://localhost:1234/v1 instead.
|
||||
"provider": "lmstudio",
|
||||
"anthropic": {"api_key": "", "model": "claude-haiku-4-5-20251001"},
|
||||
"ollama": {"base_url": "http://host.docker.internal:11434/v1", "model": "llama3.2", "api_key": "ollama"},
|
||||
"lmstudio": {"base_url": "http://host.docker.internal:1234/v1", "model": "local-model", "api_key": "lm-studio"},
|
||||
},
|
||||
"documents": {"max_pdf_bytes": 20 * 1024 * 1024},
|
||||
}
|
||||
|
||||
@@ -52,43 +34,13 @@ def _read_config_sync() -> dict:
|
||||
|
||||
|
||||
def _apply_env_overrides(config: dict) -> dict:
|
||||
"""
|
||||
Merge environment variable overrides into the config dict.
|
||||
Env vars win over whatever is stored in the JSON file.
|
||||
This lets the dev .env file pin the AI connection without writing to the
|
||||
shared volume (which would affect all users).
|
||||
"""
|
||||
cfg = deepcopy(config)
|
||||
ai = cfg.setdefault("ai", {})
|
||||
|
||||
if provider := os.environ.get("AI_PROVIDER"):
|
||||
ai["provider"] = provider
|
||||
|
||||
# LM Studio
|
||||
lms = ai.setdefault("lmstudio", {})
|
||||
if v := os.environ.get("LMSTUDIO_BASE_URL"):
|
||||
lms["base_url"] = v
|
||||
if v := os.environ.get("LMSTUDIO_API_KEY"):
|
||||
lms["api_key"] = v
|
||||
if v := os.environ.get("LMSTUDIO_MODEL"):
|
||||
lms["model"] = v
|
||||
|
||||
# Ollama
|
||||
oll = ai.setdefault("ollama", {})
|
||||
if v := os.environ.get("OLLAMA_BASE_URL"):
|
||||
oll["base_url"] = v
|
||||
if v := os.environ.get("OLLAMA_MODEL"):
|
||||
oll["model"] = v
|
||||
if v := os.environ.get("OLLAMA_API_KEY"):
|
||||
oll["api_key"] = v
|
||||
|
||||
# Anthropic
|
||||
ant = ai.setdefault("anthropic", {})
|
||||
if v := os.environ.get("ANTHROPIC_API_KEY"):
|
||||
ant["api_key"] = v
|
||||
if v := os.environ.get("ANTHROPIC_MODEL"):
|
||||
ant["model"] = v
|
||||
|
||||
docs = cfg.setdefault("documents", {})
|
||||
if v := os.environ.get("DOC_MAX_PDF_MB"):
|
||||
try:
|
||||
docs["max_pdf_bytes"] = int(v) * 1024 * 1024
|
||||
except ValueError:
|
||||
pass
|
||||
return cfg
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user