7a34807fa0
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
52 lines
1.7 KiB
Python
52 lines
1.7 KiB
Python
import json
|
|
import os
|
|
from pathlib import Path
|
|
|
|
DATA_DIR = Path(os.environ.get("DATA_DIR", "/app/data"))
|
|
UPLOADS_DIR = DATA_DIR / "uploads"
|
|
METADATA_DIR = DATA_DIR / "metadata"
|
|
TOPICS_FILE = DATA_DIR / "topics.json"
|
|
SETTINGS_FILE = DATA_DIR / "settings.json"
|
|
|
|
DEFAULT_SYSTEM_PROMPT = """You are a document classification assistant. When given a document's text content and a list of existing topics, you must:
|
|
1. Assign the document to one or more relevant topics from the list.
|
|
2. If no existing topics fit well, suggest new topic names.
|
|
Return ONLY valid JSON in this exact format, with no additional text or explanation:
|
|
{"assigned_topics": ["topic1"], "new_topic_suggestions": ["new topic name"]}
|
|
If the document fits no topics and you have no suggestions, return: {"assigned_topics": [], "new_topic_suggestions": []}"""
|
|
|
|
DEFAULT_SETTINGS = {
|
|
"system_prompt": DEFAULT_SYSTEM_PROMPT,
|
|
"active_provider": "lmstudio",
|
|
"providers": {
|
|
"anthropic": {
|
|
"api_key": "",
|
|
"model": "claude-sonnet-4-6"
|
|
},
|
|
"openai": {
|
|
"api_key": "",
|
|
"model": "gpt-4o",
|
|
"base_url": None
|
|
},
|
|
"ollama": {
|
|
"base_url": "http://host.docker.internal:11434",
|
|
"model": "llama3.2"
|
|
},
|
|
"lmstudio": {
|
|
"base_url": "http://host.docker.internal:1234",
|
|
"model": "gemma-4-e4b-it"
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
def ensure_data_dirs():
|
|
UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
|
|
METADATA_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
if not TOPICS_FILE.exists():
|
|
TOPICS_FILE.write_text(json.dumps({"topics": []}, indent=2))
|
|
|
|
if not SETTINGS_FILE.exists():
|
|
SETTINGS_FILE.write_text(json.dumps(DEFAULT_SETTINGS, indent=2))
|