import json import os from pathlib import Path from pydantic_settings import BaseSettings, SettingsConfigDict class Settings(BaseSettings): """Phase 1 Pydantic Settings — reads all Phase 1 env vars from environment or .env file.""" model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", extra="ignore", ) # Data directory (legacy flat-file path — kept until Plan 05 removes it) data_dir: str = "/app/data" # PostgreSQL database_url: str = "postgresql+psycopg://docuvault_app:changeme_app@postgres:5432/docuvault" database_migrate_url: str = "postgresql+psycopg://docuvault_migrate:changeme_migrate@postgres:5432/docuvault" # MinIO minio_endpoint: str = "minio:9000" minio_access_key: str = "docuvault_app" minio_secret_key: str = "changeme_minio_app" minio_bucket: str = "docuvault" # Redis / Celery redis_url: str = "redis://:changeme_redis@redis:6379/0" # Security (Phase 2 — documented now, not read by Phase 1 code paths) secret_key: str = "CHANGEME" settings = Settings() # ────────────────────────────────────────────────────────────────────────────── # Legacy flat-file constants — kept for backward compatibility through Wave 4. # These are consumed by services/storage.py, services/classifier.py, and # api/settings.py until Plan 05 rewrites those modules. # DO NOT DELETE until Plan 05 completes the storage service cutover. # ────────────────────────────────────────────────────────────────────────────── DATA_DIR = Path(os.environ.get("DATA_DIR", "/app/data")) UPLOADS_DIR = DATA_DIR / "uploads" METADATA_DIR = DATA_DIR / "metadata" TOPICS_FILE = DATA_DIR / "topics.json" SETTINGS_FILE = DATA_DIR / "settings.json" DEFAULT_SYSTEM_PROMPT = """You are a document classification assistant. When given a document's text content and a list of existing topics, you must: 1. Assign the document to one or more relevant topics from the list. 2. If no existing topics fit well, suggest new topic names. Return ONLY valid JSON in this exact format, with no additional text or explanation: {"assigned_topics": ["topic1"], "new_topic_suggestions": ["new topic name"]} If the document fits no topics and you have no suggestions, return: {"assigned_topics": [], "new_topic_suggestions": []}""" DEFAULT_SETTINGS = { "system_prompt": DEFAULT_SYSTEM_PROMPT, "active_provider": "lmstudio", "providers": { "anthropic": { "api_key": "", "model": "claude-sonnet-4-6" }, "openai": { "api_key": "", "model": "gpt-4o", "base_url": None }, "ollama": { "base_url": "http://host.docker.internal:11434", "model": "llama3.2" }, "lmstudio": { "base_url": "http://host.docker.internal:1234", "model": "gemma-4-e4b-it" } } } def ensure_data_dirs(): UPLOADS_DIR.mkdir(parents=True, exist_ok=True) METADATA_DIR.mkdir(parents=True, exist_ok=True) if not TOPICS_FILE.exists(): TOPICS_FILE.write_text(json.dumps({"topics": []}, indent=2)) if not SETTINGS_FILE.exists(): SETTINGS_FILE.write_text(json.dumps(DEFAULT_SETTINGS, indent=2))