Add shared ai-service container as AI provider intermediary

All feature containers now POST messages to ai-service (port 8010) instead
of calling AI providers directly. ai-service routes to LM Studio, Ollama,
or Anthropic based on /config/ai_service_config.json. doc-service AI
providers removed; replaced by httpx ai_client.py. Backend settings
restructured to /api/settings/ai. Frontend gets dedicated AIAdminSettingsPage
and AI Service card in AppsPage.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
curo1305
2026-04-14 12:30:45 +02:00
parent 52a2967f61
commit 88c1ea297e
47 changed files with 1354 additions and 497 deletions
+1
View File
@@ -6,6 +6,7 @@ class Settings(BaseSettings):
DATABASE_URL: str = "postgresql+asyncpg://postgres:password@db:5432/destroying_sap"
DATA_DIR: str = "/data/documents"
CONFIG_PATH: str = "/config/doc_service_config.json"
AI_SERVICE_URL: str = "http://ai-service:8010"
class Config:
env_file = ".env"
@@ -17,7 +17,7 @@ from app.models.category import DocumentCategory
from app.models.category_assignment import CategoryAssignment
from app.models.document import Document
from app.schemas.document import DocumentOut, DocumentStatusOut, DocumentTypeUpdate
from app.services.ai import get_provider
from app.services.ai_client import AIServiceError, classify_document
from app.services.config_reader import load_doc_config
from app.services.storage import delete_file, get_upload_path, save_upload
@@ -91,9 +91,7 @@ async def process_document(doc_id: str) -> None:
try:
text = await asyncio.to_thread(_extract_pdf_text, doc.file_path)
config = await load_doc_config()
provider = get_provider(config["ai"])
result = await provider.classify_document(text)
result = await classify_document(text)
doc.raw_text = text[:500_000] # cap stored text at 500k chars
doc.extracted_data = json.dumps(result)
@@ -1,23 +0,0 @@
from app.services.ai.base import AIProvider
def get_provider(ai_config: dict) -> AIProvider:
"""
Factory: return an AIProvider instance based on the 'provider' key in the AI config section.
ai_config is the 'ai' section of doc_service_config.json, loaded fresh per processing job.
"""
provider_name = ai_config.get("provider", "anthropic")
provider_cfg = ai_config.get(provider_name, {})
match provider_name:
case "anthropic":
from app.services.ai.anthropic_provider import AnthropicProvider
return AnthropicProvider(provider_cfg)
case "ollama" | "lmstudio":
from app.services.ai.openai_compat import OpenAICompatProvider
return OpenAICompatProvider(provider_cfg)
case _:
raise ValueError(f"Unknown AI provider: {provider_name!r}")
__all__ = ["AIProvider", "get_provider"]
@@ -1,31 +0,0 @@
import json
from anthropic import AsyncAnthropic
from app.services.ai.base import AIProvider, SYSTEM_PROMPT, USER_PROMPT_TEMPLATE
class AnthropicProvider(AIProvider):
def __init__(self, config: dict) -> None:
self._client = AsyncAnthropic(api_key=config["api_key"])
self._model = config.get("model", "claude-haiku-4-5-20251001")
async def classify_document(self, text: str) -> dict:
message = await self._client.messages.create(
model=self._model,
max_tokens=2048,
system=SYSTEM_PROMPT,
messages=[{
"role": "user",
"content": USER_PROMPT_TEMPLATE.format(text=text[:100_000]),
}],
)
raw = message.content[0].text.strip()
return _parse_json(raw)
def _parse_json(raw: str) -> dict:
# Strip accidental markdown fences despite explicit instruction not to include them
if raw.startswith("```"):
raw = raw.split("\n", 1)[1].rsplit("```", 1)[0]
return json.loads(raw)
@@ -1,36 +0,0 @@
"""
OpenAI-compatible provider for Ollama and LM Studio.
Both expose an OpenAI-compatible /v1/chat/completions endpoint.
"""
import json
from openai import AsyncOpenAI
from app.services.ai.base import AIProvider, SYSTEM_PROMPT, USER_PROMPT_TEMPLATE
class OpenAICompatProvider(AIProvider):
def __init__(self, config: dict) -> None:
self._client = AsyncOpenAI(
base_url=config["base_url"],
api_key=config.get("api_key", "not-required"),
)
self._model = config["model"]
async def classify_document(self, text: str) -> dict:
response = await self._client.chat.completions.create(
model=self._model,
temperature=0,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": USER_PROMPT_TEMPLATE.format(text=text[:100_000])},
],
)
raw = response.choices[0].message.content.strip()
return _parse_json(raw)
def _parse_json(raw: str) -> dict:
if raw.startswith("```"):
raw = raw.split("\n", 1)[1].rsplit("```", 1)[0]
return json.loads(raw)
@@ -0,0 +1,49 @@
"""HTTP client for the shared ai-service container."""
import json
import httpx
from app.core.config import settings
from app.services.prompts import SYSTEM_PROMPT, USER_PROMPT_TEMPLATE
_client = httpx.AsyncClient(timeout=120.0)
class AIServiceError(Exception):
pass
async def classify_document(text: str) -> dict:
"""
Send document text to ai-service for classification.
Returns the parsed JSON result dict.
Raises AIServiceError on HTTP errors or unexpected response shapes.
"""
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": USER_PROMPT_TEMPLATE.format(text=text[:50_000])},
]
try:
response = await _client.post(
f"{settings.AI_SERVICE_URL}/chat",
json={
"messages": messages,
"max_tokens": 2048,
"temperature": 0,
"response_format": "json",
},
)
except httpx.RequestError as exc:
raise AIServiceError(f"Could not reach ai-service: {exc}") from exc
if response.status_code != 200:
raise AIServiceError(
f"ai-service returned {response.status_code}: {response.text[:200]}"
)
try:
content = response.json()["content"]
return json.loads(content)
except (KeyError, json.JSONDecodeError) as exc:
raise AIServiceError(f"Unexpected ai-service response: {exc}") from exc
@@ -1,18 +1,9 @@
"""
Reads doc_service_config.json from the shared config volume.
Caches the result for 30 seconds to avoid hitting the filesystem on every request.
Uses asyncio.to_thread so the synchronous file read doesn't block the event loop.
30-second TTL cache + env var overrides.
Env var overrides (take precedence over the JSON config file, never committed):
AI_PROVIDER — "lmstudio" | "ollama" | "anthropic"
LMSTUDIO_BASE_URL — e.g. http://host.docker.internal:1234/v1
LMSTUDIO_API_KEY
LMSTUDIO_MODEL
OLLAMA_BASE_URL
OLLAMA_MODEL
OLLAMA_API_KEY
ANTHROPIC_API_KEY
ANTHROPIC_MODEL
Env var overrides (all optional):
DOC_MAX_PDF_MB — max upload size in megabytes (e.g. "50")
"""
import asyncio
import json
@@ -24,15 +15,6 @@ from pathlib import Path
from app.core.config import settings
_DEFAULT_CONFIG: dict = {
"ai": {
# Default: LM Studio running on the host machine at port 1234.
# Inside Docker, host.docker.internal resolves to the host; for local
# dev outside Docker use http://localhost:1234/v1 instead.
"provider": "lmstudio",
"anthropic": {"api_key": "", "model": "claude-haiku-4-5-20251001"},
"ollama": {"base_url": "http://host.docker.internal:11434/v1", "model": "llama3.2", "api_key": "ollama"},
"lmstudio": {"base_url": "http://host.docker.internal:1234/v1", "model": "local-model", "api_key": "lm-studio"},
},
"documents": {"max_pdf_bytes": 20 * 1024 * 1024},
}
@@ -52,43 +34,13 @@ def _read_config_sync() -> dict:
def _apply_env_overrides(config: dict) -> dict:
"""
Merge environment variable overrides into the config dict.
Env vars win over whatever is stored in the JSON file.
This lets the dev .env file pin the AI connection without writing to the
shared volume (which would affect all users).
"""
cfg = deepcopy(config)
ai = cfg.setdefault("ai", {})
if provider := os.environ.get("AI_PROVIDER"):
ai["provider"] = provider
# LM Studio
lms = ai.setdefault("lmstudio", {})
if v := os.environ.get("LMSTUDIO_BASE_URL"):
lms["base_url"] = v
if v := os.environ.get("LMSTUDIO_API_KEY"):
lms["api_key"] = v
if v := os.environ.get("LMSTUDIO_MODEL"):
lms["model"] = v
# Ollama
oll = ai.setdefault("ollama", {})
if v := os.environ.get("OLLAMA_BASE_URL"):
oll["base_url"] = v
if v := os.environ.get("OLLAMA_MODEL"):
oll["model"] = v
if v := os.environ.get("OLLAMA_API_KEY"):
oll["api_key"] = v
# Anthropic
ant = ai.setdefault("anthropic", {})
if v := os.environ.get("ANTHROPIC_API_KEY"):
ant["api_key"] = v
if v := os.environ.get("ANTHROPIC_MODEL"):
ant["model"] = v
docs = cfg.setdefault("documents", {})
if v := os.environ.get("DOC_MAX_PDF_MB"):
try:
docs["max_pdf_bytes"] = int(v) * 1024 * 1024
except ValueError:
pass
return cfg
@@ -1,5 +1,3 @@
from abc import ABC, abstractmethod
SYSTEM_PROMPT = (
"You are a financial document analysis assistant. "
"Given the text extracted from a PDF document, return ONLY a JSON object "
@@ -23,10 +21,3 @@ suggested_categories (array of 2 to 5 short category name strings a user might w
Document text:
{text}"""
class AIProvider(ABC):
@abstractmethod
async def classify_document(self, text: str) -> dict:
"""Return structured extraction dict from document text."""
...