chore: initial commit — existing single-user document scanner codebase
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,59 @@
|
||||
"""
|
||||
Classification orchestrator.
|
||||
Loads settings, selects AI provider, classifies document, auto-creates suggested topics.
|
||||
"""
|
||||
from services import storage
|
||||
from ai import get_provider
|
||||
|
||||
MAX_AI_CHARS = 8_000
|
||||
|
||||
|
||||
async def classify_document(doc_id: str, topic_names: list[str] | None = None) -> list[str]:
|
||||
"""
|
||||
Classify a document by its ID. Returns the list of assigned topic names.
|
||||
If topic_names is provided, restrict classification to those topics.
|
||||
Auto-creates any newly suggested topics.
|
||||
"""
|
||||
meta = storage.get_metadata(doc_id)
|
||||
if meta is None:
|
||||
raise ValueError(f"Document {doc_id} not found")
|
||||
|
||||
settings = storage.load_settings()
|
||||
system_prompt = settings.get("system_prompt", "")
|
||||
provider = get_provider(settings)
|
||||
|
||||
# Use all known topics if not specified
|
||||
if topic_names is None:
|
||||
all_topics = storage.load_topics()
|
||||
topic_names = [t["name"] for t in all_topics]
|
||||
|
||||
text = meta.get("extracted_text", "")
|
||||
result = await provider.classify(text[:MAX_AI_CHARS], topic_names, system_prompt)
|
||||
|
||||
# Collect all topic names to persist (assigned + suggested)
|
||||
all_new_names = set(result.suggested_new_topics) | set(result.topics)
|
||||
|
||||
# Auto-create any topic not already in the registry
|
||||
existing_names = {t.lower() for t in topic_names}
|
||||
for name in all_new_names:
|
||||
if name.strip() and name.lower() not in existing_names:
|
||||
storage.create_topic(name.strip())
|
||||
|
||||
# Final list: everything the AI assigned or suggested
|
||||
final_topics = [t for t in list(set(result.topics + result.suggested_new_topics)) if t.strip()]
|
||||
|
||||
storage.update_document_topics(doc_id, final_topics)
|
||||
return final_topics
|
||||
|
||||
|
||||
async def suggest_topics_for_document(doc_id: str) -> list[str]:
|
||||
"""Return AI-suggested topic names without modifying the document."""
|
||||
meta = storage.get_metadata(doc_id)
|
||||
if meta is None:
|
||||
raise ValueError(f"Document {doc_id} not found")
|
||||
|
||||
settings = storage.load_settings()
|
||||
system_prompt = settings.get("system_prompt", "")
|
||||
provider = get_provider(settings)
|
||||
text = meta.get("extracted_text", "")
|
||||
return await provider.suggest_topics(text[:MAX_AI_CHARS], system_prompt)
|
||||
Reference in New Issue
Block a user