feat(03-04): retire flat-file settings; wire per-user AI config via DB lookup
- config.py: Remove SETTINGS_FILE, DEFAULT_SYSTEM_PROMPT, DEFAULT_SETTINGS constants; add system_prompt, default_ai_provider, default_ai_model to Settings - services/classifier.py: Add _DEFAULT_SYSTEM_PROMPT module constant; classify_document and suggest_topics_for_document accept ai_provider/ai_model kwargs; no longer calls storage.load_settings() — uses app_settings defaults with DB-supplied overrides (D-14, D-15) - services/storage.py: Delete load_settings, save_settings, mask_api_key, settings_masked; remove from __all__; remove import copy, json, DEFAULT_SETTINGS, SETTINGS_FILE (D-12) - tasks/document_tasks.py: _run resolves user.ai_provider/ai_model via session.get(User, doc.user_id) and passes through to classifier; task signature unchanged (T-03-19) - api/settings.py: Deleted — /api/settings endpoint removed (D-12) - main.py: Remove settings_router import and include_router call - tests/test_settings.py: Replace all tests with test_settings_endpoint_removed (404, green) - tests/test_classifier.py: Implement test_per_user_provider, test_celery_task_uses_user_provider, test_default_provider_fallback; remove xfail markers (DOC-03, DOC-05)
This commit is contained in:
@@ -9,6 +9,10 @@ wrapper and from API route handlers that already hold a session.
|
||||
Updated in Plan 03-03: classify_document uses load_topics_for_user (D-17) to scope
|
||||
topic lookup to the document owner's namespace, and creates AI-suggested topics in
|
||||
the user's namespace via create_topic(user_id=doc.user_id) (D-11).
|
||||
|
||||
Updated in Plan 03-04: classify_document and suggest_topics_for_document now accept
|
||||
ai_provider and ai_model kwargs. No longer calls storage.load_settings(). Provider
|
||||
resolved via get_provider() using per-user settings from DB (D-14, D-15).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -16,30 +20,48 @@ import uuid as _uuid
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from config import settings as app_settings
|
||||
from db.models import Document
|
||||
from services import storage
|
||||
from ai import get_provider
|
||||
|
||||
MAX_AI_CHARS = 8_000
|
||||
|
||||
_DEFAULT_SYSTEM_PROMPT = """You are a document classification assistant. When given a document's text content and a list of existing topics, you must:
|
||||
1. Assign the document to one or more relevant topics from the list.
|
||||
2. If no existing topics fit well, suggest new topic names.
|
||||
Return ONLY valid JSON in this exact format, with no additional text or explanation:
|
||||
{"assigned_topics": ["topic1"], "new_topic_suggestions": ["new topic name"]}
|
||||
If the document fits no topics and you have no suggestions, return: {"assigned_topics": [], "new_topic_suggestions": []}"""
|
||||
|
||||
|
||||
async def classify_document(
|
||||
session: AsyncSession,
|
||||
doc_id: str,
|
||||
topic_names: list[str] | None = None,
|
||||
ai_provider: str | None = None,
|
||||
ai_model: str | None = None,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Classify a document by its ID. Returns the list of assigned topic names.
|
||||
If topic_names is provided, restrict classification to those topics.
|
||||
Auto-creates any newly suggested topics in the document owner's namespace (D-11).
|
||||
|
||||
ai_provider and ai_model come from the document owner's User record (D-14).
|
||||
Falls back to app_settings.default_ai_provider / default_ai_model when None (D-15).
|
||||
"""
|
||||
meta = await storage.get_metadata(session, doc_id)
|
||||
if meta is None:
|
||||
raise ValueError(f"Document {doc_id} not found")
|
||||
|
||||
settings = storage.load_settings()
|
||||
system_prompt = settings.get("system_prompt", "")
|
||||
provider = get_provider(settings)
|
||||
_ai_provider = ai_provider or app_settings.default_ai_provider
|
||||
_ai_model = ai_model or app_settings.default_ai_model
|
||||
system_prompt = app_settings.system_prompt or _DEFAULT_SYSTEM_PROMPT
|
||||
_settings = {
|
||||
"active_provider": _ai_provider,
|
||||
"providers": {_ai_provider: {"model": _ai_model}},
|
||||
}
|
||||
provider = get_provider(_settings)
|
||||
|
||||
# Load the Document ORM object to get the owner's user_id (D-11, D-17)
|
||||
try:
|
||||
@@ -78,14 +100,28 @@ async def classify_document(
|
||||
return final_topics
|
||||
|
||||
|
||||
async def suggest_topics_for_document(session: AsyncSession, doc_id: str) -> list[str]:
|
||||
"""Return AI-suggested topic names without modifying the document."""
|
||||
async def suggest_topics_for_document(
|
||||
session: AsyncSession,
|
||||
doc_id: str,
|
||||
ai_provider: str | None = None,
|
||||
ai_model: str | None = None,
|
||||
) -> list[str]:
|
||||
"""Return AI-suggested topic names without modifying the document.
|
||||
|
||||
ai_provider and ai_model come from the document owner's User record (D-14).
|
||||
Falls back to app_settings.default_ai_provider / default_ai_model when None (D-15).
|
||||
"""
|
||||
meta = await storage.get_metadata(session, doc_id)
|
||||
if meta is None:
|
||||
raise ValueError(f"Document {doc_id} not found")
|
||||
|
||||
settings = storage.load_settings()
|
||||
system_prompt = settings.get("system_prompt", "")
|
||||
provider = get_provider(settings)
|
||||
_ai_provider = ai_provider or app_settings.default_ai_provider
|
||||
_ai_model = ai_model or app_settings.default_ai_model
|
||||
system_prompt = app_settings.system_prompt or _DEFAULT_SYSTEM_PROMPT
|
||||
_settings = {
|
||||
"active_provider": _ai_provider,
|
||||
"providers": {_ai_provider: {"model": _ai_model}},
|
||||
}
|
||||
provider = get_provider(_settings)
|
||||
text = meta.get("extracted_text", "")
|
||||
return await provider.suggest_topics(text[:MAX_AI_CHARS], system_prompt)
|
||||
|
||||
@@ -9,11 +9,8 @@ Public function names are PRESERVED from the old flat-file implementation so
|
||||
that api/documents.py and api/topics.py can be updated in Plan 05 with minimal
|
||||
changes (async def + await + session parameter).
|
||||
|
||||
Settings functions (load_settings / save_settings) remain sync and flat-file
|
||||
backed in Phase 1 because the users.ai_provider / users.ai_model schema columns
|
||||
cannot be populated until Phase 2.
|
||||
# Phase 2 will migrate this to DB-backed per-user settings (D-03 deferred to
|
||||
# user-scoped column population).
|
||||
Phase 3 D-12: load_settings / save_settings / mask_api_key / settings_masked removed.
|
||||
All AI config comes from DB (users.ai_provider / users.ai_model set by admin).
|
||||
|
||||
D-05: Storage service layer switched to PostgreSQL + MinIO.
|
||||
D-06: Object key schema: {user_id}/{document_id}/{uuid4()}{ext} — human filename in DB only.
|
||||
@@ -21,8 +18,6 @@ D-03: documents.user_id is None (nullable) in Phase 1 — no auth system yet.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import json
|
||||
import sys
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
@@ -32,7 +27,6 @@ from sqlalchemy import select, delete, text, or_
|
||||
from sqlalchemy import func as sql_func
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from config import DEFAULT_SETTINGS, SETTINGS_FILE
|
||||
from db.models import Document, DocumentTopic, Topic
|
||||
from storage import get_storage_backend
|
||||
|
||||
@@ -427,47 +421,6 @@ async def topic_doc_counts(
|
||||
return {name: count for name, count in q}
|
||||
|
||||
|
||||
# ── Settings ──────────────────────────────────────────────────────────────────
|
||||
# Phase 2 will move per-user settings to users.ai_provider / users.ai_model
|
||||
# (D-03 deferred to user-scoped column population).
|
||||
# For now these remain as flat-file JSON — single-writer, no filelock needed.
|
||||
|
||||
def load_settings() -> dict:
|
||||
"""Read app settings from the flat-file SETTINGS_FILE.
|
||||
|
||||
Falls back to DEFAULT_SETTINGS if the file is missing.
|
||||
# Phase 2 will move per-user settings to users.ai_provider / users.ai_model.
|
||||
"""
|
||||
try:
|
||||
return json.loads(SETTINGS_FILE.read_text())
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return copy.deepcopy(DEFAULT_SETTINGS)
|
||||
|
||||
|
||||
def save_settings(settings: dict) -> None:
|
||||
"""Write app settings to the flat-file SETTINGS_FILE.
|
||||
|
||||
No filelock — Phase 1 settings file is single-writer.
|
||||
# Phase 2 will move per-user settings to users.ai_provider / users.ai_model.
|
||||
"""
|
||||
SETTINGS_FILE.write_text(json.dumps(settings, indent=2))
|
||||
|
||||
|
||||
def mask_api_key(key: str) -> str:
|
||||
if not key or len(key) <= 4:
|
||||
return "****"
|
||||
return "****" + key[-4:]
|
||||
|
||||
|
||||
def settings_masked(settings: dict) -> dict:
|
||||
s = copy.deepcopy(settings)
|
||||
for prov in ("anthropic", "openai"):
|
||||
key = s.get("providers", {}).get(prov, {}).get("api_key", "")
|
||||
if key:
|
||||
s["providers"][prov]["api_key"] = mask_api_key(key)
|
||||
return s
|
||||
|
||||
|
||||
# ── Public surface ─────────────────────────────────────────────────────────────
|
||||
|
||||
__all__ = [
|
||||
@@ -485,8 +438,4 @@ __all__ = [
|
||||
"update_topic",
|
||||
"delete_topic",
|
||||
"topic_doc_counts",
|
||||
"load_settings",
|
||||
"save_settings",
|
||||
"mask_api_key",
|
||||
"settings_masked",
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user