import json import os from pathlib import Path DATA_DIR = Path(os.environ.get("DATA_DIR", "/app/data")) UPLOADS_DIR = DATA_DIR / "uploads" METADATA_DIR = DATA_DIR / "metadata" TOPICS_FILE = DATA_DIR / "topics.json" SETTINGS_FILE = DATA_DIR / "settings.json" DEFAULT_SYSTEM_PROMPT = """You are a document classification assistant. When given a document's text content and a list of existing topics, you must: 1. Assign the document to one or more relevant topics from the list. 2. If no existing topics fit well, suggest new topic names. Return ONLY valid JSON in this exact format, with no additional text or explanation: {"assigned_topics": ["topic1"], "new_topic_suggestions": ["new topic name"]} If the document fits no topics and you have no suggestions, return: {"assigned_topics": [], "new_topic_suggestions": []}""" DEFAULT_SETTINGS = { "system_prompt": DEFAULT_SYSTEM_PROMPT, "active_provider": "lmstudio", "providers": { "anthropic": { "api_key": "", "model": "claude-sonnet-4-6" }, "openai": { "api_key": "", "model": "gpt-4o", "base_url": None }, "ollama": { "base_url": "http://host.docker.internal:11434", "model": "llama3.2" }, "lmstudio": { "base_url": "http://host.docker.internal:1234", "model": "gemma-4-e4b-it" } } } def ensure_data_dirs(): UPLOADS_DIR.mkdir(parents=True, exist_ok=True) METADATA_DIR.mkdir(parents=True, exist_ok=True) if not TOPICS_FILE.exists(): TOPICS_FILE.write_text(json.dumps({"topics": []}, indent=2)) if not SETTINGS_FILE.exists(): SETTINGS_FILE.write_text(json.dumps(DEFAULT_SETTINGS, indent=2))