chore: initial commit — existing single-user document scanner codebase
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,187 @@
|
||||
import json
|
||||
import uuid
|
||||
import shutil
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from filelock import FileLock
|
||||
from config import UPLOADS_DIR, METADATA_DIR, TOPICS_FILE, SETTINGS_FILE, DEFAULT_SETTINGS
|
||||
|
||||
|
||||
# ── File locks ────────────────────────────────────────────────────────────────
|
||||
|
||||
_topics_lock = FileLock(str(TOPICS_FILE) + ".lock")
|
||||
_settings_lock = FileLock(str(SETTINGS_FILE) + ".lock")
|
||||
|
||||
|
||||
# ── Documents ─────────────────────────────────────────────────────────────────
|
||||
|
||||
def save_upload(file_bytes: bytes, original_name: str, mime_type: str) -> dict:
|
||||
doc_id = str(uuid.uuid4())
|
||||
suffix = Path(original_name).suffix.lower()
|
||||
filename = f"{doc_id}{suffix}"
|
||||
dest = UPLOADS_DIR / filename
|
||||
dest.write_bytes(file_bytes)
|
||||
return {"id": doc_id, "filename": filename, "path": str(dest)}
|
||||
|
||||
|
||||
def save_metadata(meta: dict) -> None:
|
||||
path = METADATA_DIR / f"{meta['id']}.json"
|
||||
lock = FileLock(str(path) + ".lock")
|
||||
with lock:
|
||||
path.write_text(json.dumps(meta, indent=2, ensure_ascii=False))
|
||||
|
||||
|
||||
def get_metadata(doc_id: str) -> dict | None:
|
||||
path = METADATA_DIR / f"{doc_id}.json"
|
||||
if not path.exists():
|
||||
return None
|
||||
return json.loads(path.read_text())
|
||||
|
||||
|
||||
def list_metadata(topic: str | None = None) -> list[dict]:
|
||||
docs = []
|
||||
for p in sorted(METADATA_DIR.glob("*.json"), key=lambda x: x.stat().st_mtime, reverse=True):
|
||||
try:
|
||||
meta = json.loads(p.read_text())
|
||||
except Exception:
|
||||
continue
|
||||
if topic and topic not in meta.get("topics", []):
|
||||
continue
|
||||
docs.append(meta)
|
||||
return docs
|
||||
|
||||
|
||||
def delete_document(doc_id: str) -> bool:
|
||||
meta_path = METADATA_DIR / f"{doc_id}.json"
|
||||
if not meta_path.exists():
|
||||
return False
|
||||
meta = json.loads(meta_path.read_text())
|
||||
upload_path = UPLOADS_DIR / meta.get("filename", "")
|
||||
if upload_path.exists():
|
||||
upload_path.unlink()
|
||||
meta_path.unlink()
|
||||
lock_path = Path(str(meta_path) + ".lock")
|
||||
if lock_path.exists():
|
||||
lock_path.unlink()
|
||||
return True
|
||||
|
||||
|
||||
def update_document_topics(doc_id: str, topics: list[str]) -> dict | None:
|
||||
meta = get_metadata(doc_id)
|
||||
if meta is None:
|
||||
return None
|
||||
meta["topics"] = topics
|
||||
meta["classified_at"] = datetime.now(timezone.utc).isoformat()
|
||||
save_metadata(meta)
|
||||
return meta
|
||||
|
||||
|
||||
def remove_topic_from_all_documents(topic_name: str) -> int:
|
||||
"""Remove a topic name from all documents. Returns number of docs updated."""
|
||||
count = 0
|
||||
for p in METADATA_DIR.glob("*.json"):
|
||||
try:
|
||||
meta = json.loads(p.read_text())
|
||||
except Exception:
|
||||
continue
|
||||
if topic_name in meta.get("topics", []):
|
||||
meta["topics"] = [t for t in meta["topics"] if t != topic_name]
|
||||
lock = FileLock(str(p) + ".lock")
|
||||
with lock:
|
||||
p.write_text(json.dumps(meta, indent=2, ensure_ascii=False))
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
# ── Topics ────────────────────────────────────────────────────────────────────
|
||||
|
||||
def load_topics() -> list[dict]:
|
||||
with _topics_lock:
|
||||
data = json.loads(TOPICS_FILE.read_text())
|
||||
return data.get("topics", [])
|
||||
|
||||
|
||||
def save_topics(topics: list[dict]) -> None:
|
||||
with _topics_lock:
|
||||
TOPICS_FILE.write_text(json.dumps({"topics": topics}, indent=2))
|
||||
|
||||
|
||||
def get_topic(topic_id: str) -> dict | None:
|
||||
return next((t for t in load_topics() if t["id"] == topic_id), None)
|
||||
|
||||
|
||||
def create_topic(name: str, description: str = "", color: str = "#6366f1") -> dict:
|
||||
topics = load_topics()
|
||||
# Deduplicate by name (case-insensitive)
|
||||
if any(t["name"].lower() == name.lower() for t in topics):
|
||||
return next(t for t in topics if t["name"].lower() == name.lower())
|
||||
topic = {
|
||||
"id": str(uuid.uuid4())[:8],
|
||||
"name": name,
|
||||
"description": description,
|
||||
"color": color,
|
||||
}
|
||||
topics.append(topic)
|
||||
save_topics(topics)
|
||||
return topic
|
||||
|
||||
|
||||
def update_topic(topic_id: str, **kwargs) -> dict | None:
|
||||
topics = load_topics()
|
||||
for t in topics:
|
||||
if t["id"] == topic_id:
|
||||
t.update({k: v for k, v in kwargs.items() if v is not None})
|
||||
save_topics(topics)
|
||||
return t
|
||||
return None
|
||||
|
||||
|
||||
def delete_topic(topic_id: str) -> str | None:
|
||||
topics = load_topics()
|
||||
topic = next((t for t in topics if t["id"] == topic_id), None)
|
||||
if not topic:
|
||||
return None
|
||||
name = topic["name"]
|
||||
save_topics([t for t in topics if t["id"] != topic_id])
|
||||
remove_topic_from_all_documents(name)
|
||||
return name
|
||||
|
||||
|
||||
def topic_doc_counts() -> dict[str, int]:
|
||||
counts: dict[str, int] = {}
|
||||
for p in METADATA_DIR.glob("*.json"):
|
||||
try:
|
||||
meta = json.loads(p.read_text())
|
||||
except Exception:
|
||||
continue
|
||||
for t in meta.get("topics", []):
|
||||
counts[t] = counts.get(t, 0) + 1
|
||||
return counts
|
||||
|
||||
|
||||
# ── Settings ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def load_settings() -> dict:
|
||||
with _settings_lock:
|
||||
return json.loads(SETTINGS_FILE.read_text())
|
||||
|
||||
|
||||
def save_settings(settings: dict) -> None:
|
||||
with _settings_lock:
|
||||
SETTINGS_FILE.write_text(json.dumps(settings, indent=2))
|
||||
|
||||
|
||||
def mask_api_key(key: str) -> str:
|
||||
if not key or len(key) <= 4:
|
||||
return "****"
|
||||
return "****" + key[-4:]
|
||||
|
||||
|
||||
def settings_masked(settings: dict) -> dict:
|
||||
import copy
|
||||
s = copy.deepcopy(settings)
|
||||
for prov in ("anthropic", "openai"):
|
||||
key = s.get("providers", {}).get(prov, {}).get("api_key", "")
|
||||
if key:
|
||||
s["providers"][prov]["api_key"] = mask_api_key(key)
|
||||
return s
|
||||
Reference in New Issue
Block a user