Add shared ai-service container as AI provider intermediary
All feature containers now POST messages to ai-service (port 8010) instead of calling AI providers directly. ai-service routes to LM Studio, Ollama, or Anthropic based on /config/ai_service_config.json. doc-service AI providers removed; replaced by httpx ai_client.py. Backend settings restructured to /api/settings/ai. Frontend gets dedicated AIAdminSettingsPage and AI Service card in AppsPage. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,7 @@ class Settings(BaseSettings):
|
||||
DATABASE_URL: str = "postgresql+asyncpg://postgres:password@db:5432/destroying_sap"
|
||||
DATA_DIR: str = "/data/documents"
|
||||
CONFIG_PATH: str = "/config/doc_service_config.json"
|
||||
AI_SERVICE_URL: str = "http://ai-service:8010"
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
|
||||
@@ -17,7 +17,7 @@ from app.models.category import DocumentCategory
|
||||
from app.models.category_assignment import CategoryAssignment
|
||||
from app.models.document import Document
|
||||
from app.schemas.document import DocumentOut, DocumentStatusOut, DocumentTypeUpdate
|
||||
from app.services.ai import get_provider
|
||||
from app.services.ai_client import AIServiceError, classify_document
|
||||
from app.services.config_reader import load_doc_config
|
||||
from app.services.storage import delete_file, get_upload_path, save_upload
|
||||
|
||||
@@ -91,9 +91,7 @@ async def process_document(doc_id: str) -> None:
|
||||
|
||||
try:
|
||||
text = await asyncio.to_thread(_extract_pdf_text, doc.file_path)
|
||||
config = await load_doc_config()
|
||||
provider = get_provider(config["ai"])
|
||||
result = await provider.classify_document(text)
|
||||
result = await classify_document(text)
|
||||
|
||||
doc.raw_text = text[:500_000] # cap stored text at 500k chars
|
||||
doc.extracted_data = json.dumps(result)
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
from app.services.ai.base import AIProvider
|
||||
|
||||
|
||||
def get_provider(ai_config: dict) -> AIProvider:
|
||||
"""
|
||||
Factory: return an AIProvider instance based on the 'provider' key in the AI config section.
|
||||
ai_config is the 'ai' section of doc_service_config.json, loaded fresh per processing job.
|
||||
"""
|
||||
provider_name = ai_config.get("provider", "anthropic")
|
||||
provider_cfg = ai_config.get(provider_name, {})
|
||||
|
||||
match provider_name:
|
||||
case "anthropic":
|
||||
from app.services.ai.anthropic_provider import AnthropicProvider
|
||||
return AnthropicProvider(provider_cfg)
|
||||
case "ollama" | "lmstudio":
|
||||
from app.services.ai.openai_compat import OpenAICompatProvider
|
||||
return OpenAICompatProvider(provider_cfg)
|
||||
case _:
|
||||
raise ValueError(f"Unknown AI provider: {provider_name!r}")
|
||||
|
||||
|
||||
__all__ = ["AIProvider", "get_provider"]
|
||||
@@ -1,31 +0,0 @@
|
||||
import json
|
||||
|
||||
from anthropic import AsyncAnthropic
|
||||
|
||||
from app.services.ai.base import AIProvider, SYSTEM_PROMPT, USER_PROMPT_TEMPLATE
|
||||
|
||||
|
||||
class AnthropicProvider(AIProvider):
|
||||
def __init__(self, config: dict) -> None:
|
||||
self._client = AsyncAnthropic(api_key=config["api_key"])
|
||||
self._model = config.get("model", "claude-haiku-4-5-20251001")
|
||||
|
||||
async def classify_document(self, text: str) -> dict:
|
||||
message = await self._client.messages.create(
|
||||
model=self._model,
|
||||
max_tokens=2048,
|
||||
system=SYSTEM_PROMPT,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": USER_PROMPT_TEMPLATE.format(text=text[:100_000]),
|
||||
}],
|
||||
)
|
||||
raw = message.content[0].text.strip()
|
||||
return _parse_json(raw)
|
||||
|
||||
|
||||
def _parse_json(raw: str) -> dict:
|
||||
# Strip accidental markdown fences despite explicit instruction not to include them
|
||||
if raw.startswith("```"):
|
||||
raw = raw.split("\n", 1)[1].rsplit("```", 1)[0]
|
||||
return json.loads(raw)
|
||||
@@ -1,36 +0,0 @@
|
||||
"""
|
||||
OpenAI-compatible provider for Ollama and LM Studio.
|
||||
Both expose an OpenAI-compatible /v1/chat/completions endpoint.
|
||||
"""
|
||||
import json
|
||||
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from app.services.ai.base import AIProvider, SYSTEM_PROMPT, USER_PROMPT_TEMPLATE
|
||||
|
||||
|
||||
class OpenAICompatProvider(AIProvider):
|
||||
def __init__(self, config: dict) -> None:
|
||||
self._client = AsyncOpenAI(
|
||||
base_url=config["base_url"],
|
||||
api_key=config.get("api_key", "not-required"),
|
||||
)
|
||||
self._model = config["model"]
|
||||
|
||||
async def classify_document(self, text: str) -> dict:
|
||||
response = await self._client.chat.completions.create(
|
||||
model=self._model,
|
||||
temperature=0,
|
||||
messages=[
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": USER_PROMPT_TEMPLATE.format(text=text[:100_000])},
|
||||
],
|
||||
)
|
||||
raw = response.choices[0].message.content.strip()
|
||||
return _parse_json(raw)
|
||||
|
||||
|
||||
def _parse_json(raw: str) -> dict:
|
||||
if raw.startswith("```"):
|
||||
raw = raw.split("\n", 1)[1].rsplit("```", 1)[0]
|
||||
return json.loads(raw)
|
||||
@@ -0,0 +1,49 @@
|
||||
"""HTTP client for the shared ai-service container."""
|
||||
import json
|
||||
|
||||
import httpx
|
||||
|
||||
from app.core.config import settings
|
||||
from app.services.prompts import SYSTEM_PROMPT, USER_PROMPT_TEMPLATE
|
||||
|
||||
_client = httpx.AsyncClient(timeout=120.0)
|
||||
|
||||
|
||||
class AIServiceError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
async def classify_document(text: str) -> dict:
|
||||
"""
|
||||
Send document text to ai-service for classification.
|
||||
Returns the parsed JSON result dict.
|
||||
Raises AIServiceError on HTTP errors or unexpected response shapes.
|
||||
"""
|
||||
messages = [
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": USER_PROMPT_TEMPLATE.format(text=text[:50_000])},
|
||||
]
|
||||
|
||||
try:
|
||||
response = await _client.post(
|
||||
f"{settings.AI_SERVICE_URL}/chat",
|
||||
json={
|
||||
"messages": messages,
|
||||
"max_tokens": 2048,
|
||||
"temperature": 0,
|
||||
"response_format": "json",
|
||||
},
|
||||
)
|
||||
except httpx.RequestError as exc:
|
||||
raise AIServiceError(f"Could not reach ai-service: {exc}") from exc
|
||||
|
||||
if response.status_code != 200:
|
||||
raise AIServiceError(
|
||||
f"ai-service returned {response.status_code}: {response.text[:200]}"
|
||||
)
|
||||
|
||||
try:
|
||||
content = response.json()["content"]
|
||||
return json.loads(content)
|
||||
except (KeyError, json.JSONDecodeError) as exc:
|
||||
raise AIServiceError(f"Unexpected ai-service response: {exc}") from exc
|
||||
@@ -1,18 +1,9 @@
|
||||
"""
|
||||
Reads doc_service_config.json from the shared config volume.
|
||||
Caches the result for 30 seconds to avoid hitting the filesystem on every request.
|
||||
Uses asyncio.to_thread so the synchronous file read doesn't block the event loop.
|
||||
30-second TTL cache + env var overrides.
|
||||
|
||||
Env var overrides (take precedence over the JSON config file, never committed):
|
||||
AI_PROVIDER — "lmstudio" | "ollama" | "anthropic"
|
||||
LMSTUDIO_BASE_URL — e.g. http://host.docker.internal:1234/v1
|
||||
LMSTUDIO_API_KEY
|
||||
LMSTUDIO_MODEL
|
||||
OLLAMA_BASE_URL
|
||||
OLLAMA_MODEL
|
||||
OLLAMA_API_KEY
|
||||
ANTHROPIC_API_KEY
|
||||
ANTHROPIC_MODEL
|
||||
Env var overrides (all optional):
|
||||
DOC_MAX_PDF_MB — max upload size in megabytes (e.g. "50")
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
@@ -24,15 +15,6 @@ from pathlib import Path
|
||||
from app.core.config import settings
|
||||
|
||||
_DEFAULT_CONFIG: dict = {
|
||||
"ai": {
|
||||
# Default: LM Studio running on the host machine at port 1234.
|
||||
# Inside Docker, host.docker.internal resolves to the host; for local
|
||||
# dev outside Docker use http://localhost:1234/v1 instead.
|
||||
"provider": "lmstudio",
|
||||
"anthropic": {"api_key": "", "model": "claude-haiku-4-5-20251001"},
|
||||
"ollama": {"base_url": "http://host.docker.internal:11434/v1", "model": "llama3.2", "api_key": "ollama"},
|
||||
"lmstudio": {"base_url": "http://host.docker.internal:1234/v1", "model": "local-model", "api_key": "lm-studio"},
|
||||
},
|
||||
"documents": {"max_pdf_bytes": 20 * 1024 * 1024},
|
||||
}
|
||||
|
||||
@@ -52,43 +34,13 @@ def _read_config_sync() -> dict:
|
||||
|
||||
|
||||
def _apply_env_overrides(config: dict) -> dict:
|
||||
"""
|
||||
Merge environment variable overrides into the config dict.
|
||||
Env vars win over whatever is stored in the JSON file.
|
||||
This lets the dev .env file pin the AI connection without writing to the
|
||||
shared volume (which would affect all users).
|
||||
"""
|
||||
cfg = deepcopy(config)
|
||||
ai = cfg.setdefault("ai", {})
|
||||
|
||||
if provider := os.environ.get("AI_PROVIDER"):
|
||||
ai["provider"] = provider
|
||||
|
||||
# LM Studio
|
||||
lms = ai.setdefault("lmstudio", {})
|
||||
if v := os.environ.get("LMSTUDIO_BASE_URL"):
|
||||
lms["base_url"] = v
|
||||
if v := os.environ.get("LMSTUDIO_API_KEY"):
|
||||
lms["api_key"] = v
|
||||
if v := os.environ.get("LMSTUDIO_MODEL"):
|
||||
lms["model"] = v
|
||||
|
||||
# Ollama
|
||||
oll = ai.setdefault("ollama", {})
|
||||
if v := os.environ.get("OLLAMA_BASE_URL"):
|
||||
oll["base_url"] = v
|
||||
if v := os.environ.get("OLLAMA_MODEL"):
|
||||
oll["model"] = v
|
||||
if v := os.environ.get("OLLAMA_API_KEY"):
|
||||
oll["api_key"] = v
|
||||
|
||||
# Anthropic
|
||||
ant = ai.setdefault("anthropic", {})
|
||||
if v := os.environ.get("ANTHROPIC_API_KEY"):
|
||||
ant["api_key"] = v
|
||||
if v := os.environ.get("ANTHROPIC_MODEL"):
|
||||
ant["model"] = v
|
||||
|
||||
docs = cfg.setdefault("documents", {})
|
||||
if v := os.environ.get("DOC_MAX_PDF_MB"):
|
||||
try:
|
||||
docs["max_pdf_bytes"] = int(v) * 1024 * 1024
|
||||
except ValueError:
|
||||
pass
|
||||
return cfg
|
||||
|
||||
|
||||
|
||||
-9
@@ -1,5 +1,3 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
SYSTEM_PROMPT = (
|
||||
"You are a financial document analysis assistant. "
|
||||
"Given the text extracted from a PDF document, return ONLY a JSON object "
|
||||
@@ -23,10 +21,3 @@ suggested_categories (array of 2 to 5 short category name strings a user might w
|
||||
|
||||
Document text:
|
||||
{text}"""
|
||||
|
||||
|
||||
class AIProvider(ABC):
|
||||
@abstractmethod
|
||||
async def classify_document(self, text: str) -> dict:
|
||||
"""Return structured extraction dict from document text."""
|
||||
...
|
||||
Reference in New Issue
Block a user