feat(03-04): retire flat-file settings; wire per-user AI config via DB lookup

- config.py: Remove SETTINGS_FILE, DEFAULT_SYSTEM_PROMPT, DEFAULT_SETTINGS
  constants; add system_prompt, default_ai_provider, default_ai_model to Settings
- services/classifier.py: Add _DEFAULT_SYSTEM_PROMPT module constant; classify_document
  and suggest_topics_for_document accept ai_provider/ai_model kwargs; no longer calls
  storage.load_settings() — uses app_settings defaults with DB-supplied overrides (D-14, D-15)
- services/storage.py: Delete load_settings, save_settings, mask_api_key, settings_masked;
  remove from __all__; remove import copy, json, DEFAULT_SETTINGS, SETTINGS_FILE (D-12)
- tasks/document_tasks.py: _run resolves user.ai_provider/ai_model via session.get(User,
  doc.user_id) and passes through to classifier; task signature unchanged (T-03-19)
- api/settings.py: Deleted — /api/settings endpoint removed (D-12)
- main.py: Remove settings_router import and include_router call
- tests/test_settings.py: Replace all tests with test_settings_endpoint_removed (404, green)
- tests/test_classifier.py: Implement test_per_user_provider, test_celery_task_uses_user_provider,
  test_default_provider_fallback; remove xfail markers (DOC-03, DOC-05)
This commit is contained in:
curo1305
2026-05-23 20:32:55 +02:00
parent aadc69fea0
commit 6849ebd1e6
8 changed files with 193 additions and 316 deletions
+44 -8
View File
@@ -9,6 +9,10 @@ wrapper and from API route handlers that already hold a session.
Updated in Plan 03-03: classify_document uses load_topics_for_user (D-17) to scope
topic lookup to the document owner's namespace, and creates AI-suggested topics in
the user's namespace via create_topic(user_id=doc.user_id) (D-11).
Updated in Plan 03-04: classify_document and suggest_topics_for_document now accept
ai_provider and ai_model kwargs. No longer calls storage.load_settings(). Provider
resolved via get_provider() using per-user settings from DB (D-14, D-15).
"""
from __future__ import annotations
@@ -16,30 +20,48 @@ import uuid as _uuid
from sqlalchemy.ext.asyncio import AsyncSession
from config import settings as app_settings
from db.models import Document
from services import storage
from ai import get_provider
MAX_AI_CHARS = 8_000
_DEFAULT_SYSTEM_PROMPT = """You are a document classification assistant. When given a document's text content and a list of existing topics, you must:
1. Assign the document to one or more relevant topics from the list.
2. If no existing topics fit well, suggest new topic names.
Return ONLY valid JSON in this exact format, with no additional text or explanation:
{"assigned_topics": ["topic1"], "new_topic_suggestions": ["new topic name"]}
If the document fits no topics and you have no suggestions, return: {"assigned_topics": [], "new_topic_suggestions": []}"""
async def classify_document(
session: AsyncSession,
doc_id: str,
topic_names: list[str] | None = None,
ai_provider: str | None = None,
ai_model: str | None = None,
) -> list[str]:
"""
Classify a document by its ID. Returns the list of assigned topic names.
If topic_names is provided, restrict classification to those topics.
Auto-creates any newly suggested topics in the document owner's namespace (D-11).
ai_provider and ai_model come from the document owner's User record (D-14).
Falls back to app_settings.default_ai_provider / default_ai_model when None (D-15).
"""
meta = await storage.get_metadata(session, doc_id)
if meta is None:
raise ValueError(f"Document {doc_id} not found")
settings = storage.load_settings()
system_prompt = settings.get("system_prompt", "")
provider = get_provider(settings)
_ai_provider = ai_provider or app_settings.default_ai_provider
_ai_model = ai_model or app_settings.default_ai_model
system_prompt = app_settings.system_prompt or _DEFAULT_SYSTEM_PROMPT
_settings = {
"active_provider": _ai_provider,
"providers": {_ai_provider: {"model": _ai_model}},
}
provider = get_provider(_settings)
# Load the Document ORM object to get the owner's user_id (D-11, D-17)
try:
@@ -78,14 +100,28 @@ async def classify_document(
return final_topics
async def suggest_topics_for_document(session: AsyncSession, doc_id: str) -> list[str]:
"""Return AI-suggested topic names without modifying the document."""
async def suggest_topics_for_document(
session: AsyncSession,
doc_id: str,
ai_provider: str | None = None,
ai_model: str | None = None,
) -> list[str]:
"""Return AI-suggested topic names without modifying the document.
ai_provider and ai_model come from the document owner's User record (D-14).
Falls back to app_settings.default_ai_provider / default_ai_model when None (D-15).
"""
meta = await storage.get_metadata(session, doc_id)
if meta is None:
raise ValueError(f"Document {doc_id} not found")
settings = storage.load_settings()
system_prompt = settings.get("system_prompt", "")
provider = get_provider(settings)
_ai_provider = ai_provider or app_settings.default_ai_provider
_ai_model = ai_model or app_settings.default_ai_model
system_prompt = app_settings.system_prompt or _DEFAULT_SYSTEM_PROMPT
_settings = {
"active_provider": _ai_provider,
"providers": {_ai_provider: {"model": _ai_model}},
}
provider = get_provider(_settings)
text = meta.get("extracted_text", "")
return await provider.suggest_topics(text[:MAX_AI_CHARS], system_prompt)
+2 -53
View File
@@ -9,11 +9,8 @@ Public function names are PRESERVED from the old flat-file implementation so
that api/documents.py and api/topics.py can be updated in Plan 05 with minimal
changes (async def + await + session parameter).
Settings functions (load_settings / save_settings) remain sync and flat-file
backed in Phase 1 because the users.ai_provider / users.ai_model schema columns
cannot be populated until Phase 2.
# Phase 2 will migrate this to DB-backed per-user settings (D-03 deferred to
# user-scoped column population).
Phase 3 D-12: load_settings / save_settings / mask_api_key / settings_masked removed.
All AI config comes from DB (users.ai_provider / users.ai_model set by admin).
D-05: Storage service layer switched to PostgreSQL + MinIO.
D-06: Object key schema: {user_id}/{document_id}/{uuid4()}{ext} — human filename in DB only.
@@ -21,8 +18,6 @@ D-03: documents.user_id is None (nullable) in Phase 1 — no auth system yet.
"""
from __future__ import annotations
import copy
import json
import sys
import uuid
from datetime import datetime, timezone
@@ -32,7 +27,6 @@ from sqlalchemy import select, delete, text, or_
from sqlalchemy import func as sql_func
from sqlalchemy.ext.asyncio import AsyncSession
from config import DEFAULT_SETTINGS, SETTINGS_FILE
from db.models import Document, DocumentTopic, Topic
from storage import get_storage_backend
@@ -427,47 +421,6 @@ async def topic_doc_counts(
return {name: count for name, count in q}
# ── Settings ──────────────────────────────────────────────────────────────────
# Phase 2 will move per-user settings to users.ai_provider / users.ai_model
# (D-03 deferred to user-scoped column population).
# For now these remain as flat-file JSON — single-writer, no filelock needed.
def load_settings() -> dict:
"""Read app settings from the flat-file SETTINGS_FILE.
Falls back to DEFAULT_SETTINGS if the file is missing.
# Phase 2 will move per-user settings to users.ai_provider / users.ai_model.
"""
try:
return json.loads(SETTINGS_FILE.read_text())
except (FileNotFoundError, json.JSONDecodeError):
return copy.deepcopy(DEFAULT_SETTINGS)
def save_settings(settings: dict) -> None:
"""Write app settings to the flat-file SETTINGS_FILE.
No filelock — Phase 1 settings file is single-writer.
# Phase 2 will move per-user settings to users.ai_provider / users.ai_model.
"""
SETTINGS_FILE.write_text(json.dumps(settings, indent=2))
def mask_api_key(key: str) -> str:
if not key or len(key) <= 4:
return "****"
return "****" + key[-4:]
def settings_masked(settings: dict) -> dict:
s = copy.deepcopy(settings)
for prov in ("anthropic", "openai"):
key = s.get("providers", {}).get(prov, {}).get("api_key", "")
if key:
s["providers"][prov]["api_key"] = mask_api_key(key)
return s
# ── Public surface ─────────────────────────────────────────────────────────────
__all__ = [
@@ -485,8 +438,4 @@ __all__ = [
"update_topic",
"delete_topic",
"topic_doc_counts",
"load_settings",
"save_settings",
"mask_api_key",
"settings_masked",
]