feat(03-04): retire flat-file settings; wire per-user AI config via DB lookup

- config.py: Remove SETTINGS_FILE, DEFAULT_SYSTEM_PROMPT, DEFAULT_SETTINGS
  constants; add system_prompt, default_ai_provider, default_ai_model to Settings
- services/classifier.py: Add _DEFAULT_SYSTEM_PROMPT module constant; classify_document
  and suggest_topics_for_document accept ai_provider/ai_model kwargs; no longer calls
  storage.load_settings() — uses app_settings defaults with DB-supplied overrides (D-14, D-15)
- services/storage.py: Delete load_settings, save_settings, mask_api_key, settings_masked;
  remove from __all__; remove import copy, json, DEFAULT_SETTINGS, SETTINGS_FILE (D-12)
- tasks/document_tasks.py: _run resolves user.ai_provider/ai_model via session.get(User,
  doc.user_id) and passes through to classifier; task signature unchanged (T-03-19)
- api/settings.py: Deleted — /api/settings endpoint removed (D-12)
- main.py: Remove settings_router import and include_router call
- tests/test_settings.py: Replace all tests with test_settings_endpoint_removed (404, green)
- tests/test_classifier.py: Implement test_per_user_provider, test_celery_task_uses_user_provider,
  test_default_provider_fallback; remove xfail markers (DOC-03, DOC-05)
This commit is contained in:
curo1305
2026-05-23 20:32:55 +02:00
parent aadc69fea0
commit 6849ebd1e6
8 changed files with 193 additions and 316 deletions
+44 -8
View File
@@ -9,6 +9,10 @@ wrapper and from API route handlers that already hold a session.
Updated in Plan 03-03: classify_document uses load_topics_for_user (D-17) to scope
topic lookup to the document owner's namespace, and creates AI-suggested topics in
the user's namespace via create_topic(user_id=doc.user_id) (D-11).
Updated in Plan 03-04: classify_document and suggest_topics_for_document now accept
ai_provider and ai_model kwargs. No longer calls storage.load_settings(). Provider
resolved via get_provider() using per-user settings from DB (D-14, D-15).
"""
from __future__ import annotations
@@ -16,30 +20,48 @@ import uuid as _uuid
from sqlalchemy.ext.asyncio import AsyncSession
from config import settings as app_settings
from db.models import Document
from services import storage
from ai import get_provider
MAX_AI_CHARS = 8_000
_DEFAULT_SYSTEM_PROMPT = """You are a document classification assistant. When given a document's text content and a list of existing topics, you must:
1. Assign the document to one or more relevant topics from the list.
2. If no existing topics fit well, suggest new topic names.
Return ONLY valid JSON in this exact format, with no additional text or explanation:
{"assigned_topics": ["topic1"], "new_topic_suggestions": ["new topic name"]}
If the document fits no topics and you have no suggestions, return: {"assigned_topics": [], "new_topic_suggestions": []}"""
async def classify_document(
session: AsyncSession,
doc_id: str,
topic_names: list[str] | None = None,
ai_provider: str | None = None,
ai_model: str | None = None,
) -> list[str]:
"""
Classify a document by its ID. Returns the list of assigned topic names.
If topic_names is provided, restrict classification to those topics.
Auto-creates any newly suggested topics in the document owner's namespace (D-11).
ai_provider and ai_model come from the document owner's User record (D-14).
Falls back to app_settings.default_ai_provider / default_ai_model when None (D-15).
"""
meta = await storage.get_metadata(session, doc_id)
if meta is None:
raise ValueError(f"Document {doc_id} not found")
settings = storage.load_settings()
system_prompt = settings.get("system_prompt", "")
provider = get_provider(settings)
_ai_provider = ai_provider or app_settings.default_ai_provider
_ai_model = ai_model or app_settings.default_ai_model
system_prompt = app_settings.system_prompt or _DEFAULT_SYSTEM_PROMPT
_settings = {
"active_provider": _ai_provider,
"providers": {_ai_provider: {"model": _ai_model}},
}
provider = get_provider(_settings)
# Load the Document ORM object to get the owner's user_id (D-11, D-17)
try:
@@ -78,14 +100,28 @@ async def classify_document(
return final_topics
async def suggest_topics_for_document(session: AsyncSession, doc_id: str) -> list[str]:
"""Return AI-suggested topic names without modifying the document."""
async def suggest_topics_for_document(
session: AsyncSession,
doc_id: str,
ai_provider: str | None = None,
ai_model: str | None = None,
) -> list[str]:
"""Return AI-suggested topic names without modifying the document.
ai_provider and ai_model come from the document owner's User record (D-14).
Falls back to app_settings.default_ai_provider / default_ai_model when None (D-15).
"""
meta = await storage.get_metadata(session, doc_id)
if meta is None:
raise ValueError(f"Document {doc_id} not found")
settings = storage.load_settings()
system_prompt = settings.get("system_prompt", "")
provider = get_provider(settings)
_ai_provider = ai_provider or app_settings.default_ai_provider
_ai_model = ai_model or app_settings.default_ai_model
system_prompt = app_settings.system_prompt or _DEFAULT_SYSTEM_PROMPT
_settings = {
"active_provider": _ai_provider,
"providers": {_ai_provider: {"model": _ai_model}},
}
provider = get_provider(_settings)
text = meta.get("extracted_text", "")
return await provider.suggest_topics(text[:MAX_AI_CHARS], system_prompt)