Add sidebar app sub-nav with categories, category filter, and re-analysis on category creation

- Sidebar: Apps accordion expands to Documents, which expands to list all user categories; clicking a category navigates to /apps/documents?category_id=<id> - DocumentsPage: reads category_id from URL and applies filter; shows active category chip in FilterBar with dismiss; removed TagEditor (deferred) - doc-service GET /documents: new category_id query param filters via subquery - doc-service POST /documents/categories: detects similar category names and triggers background re-analysis of affected documents so the new category surfaces as a pending AI suggestion on relevant docs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-17 16:57:35 +02:00
parent bc7a74062d
commit 7d0edbd5e7
8 changed files with 384 additions and 193 deletions
@@ -23,7 +23,7 @@ Database: shared PostgreSQL instance, isolated via `alembic_version_doc_service`
 | Method | Path | Description |
 |--------|------|-------------|
 | `POST` | `/documents/upload` | Upload PDF; returns 202 with initial doc row |
-| `GET` | `/documents` | Paginated list with filters and sort |
+| `GET` | `/documents` | Paginated list with filters, sort, and optional `category_id` filter |
 | `GET` | `/documents/{id}` | Single document |
 | `GET` | `/documents/{id}/status` | Lightweight status poll |
 | `GET` | `/documents/{id}/download` | Stream file bytes |
@@ -32,7 +32,9 @@ Database: shared PostgreSQL instance, isolated via `alembic_version_doc_service`
 | `PATCH` | `/documents/{id}/tags` | Replace tag list (dedup, preserve order) |
 | `PATCH` | `/documents/{id}/title` | Update editable title |
 | `GET` | `/documents/categories` | List all categories for the user |
-| `POST` | `/documents/categories` | Create a category |
+| `POST` | `/documents/categories` | Create a category; triggers re-analysis of documents in similar categories |
+| `PATCH` | `/documents/categories/{id}` | Rename a category |
+| `DELETE` | `/documents/categories/{id}` | Delete a category |
 | `POST` | `/documents/{id}/categories/{cat_id}` | Assign category to document |
 | `DELETE` | `/documents/{id}/categories/{cat_id}` | Remove category from document |

@@ -49,6 +51,7 @@ Query params:
 | `status` | — | filter by status string |
 | `document_type` | — | filter by document type |
 | `search` | — | case-insensitive ILIKE on `title`, `filename`, `tags`, `document_type` |
+| `category_id` | — | filter to documents assigned to this category UUID |

 Response: `{ items: [...], total: N, page: N, pages: N }`

@@ -1,14 +1,80 @@
-from fastapi import APIRouter, Depends, HTTPException
+import difflib
+import json
+
+from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession

-from app.database import get_db
+from app.database import AsyncSessionLocal, get_db
 from app.deps import get_user_id
 from app.models.category import DocumentCategory
+from app.models.category_assignment import CategoryAssignment
+from app.models.document import Document
 from app.schemas.category import CategoryCreate, CategoryOut, CategoryUpdate
+from app.services.ai_client import classify_document

 router = APIRouter()

+_SIMILARITY_THRESHOLD = 0.4
+
+
+def _name_similarity(a: str, b: str) -> float:
+    """Return similarity score (0–1) between two category names."""
+    a_low = a.lower()
+    b_low = b.lower()
+    # Word overlap is a strong signal
+    a_words = set(a_low.split())
+    b_words = set(b_low.split())
+    if a_words & b_words:
+        return 0.9
+    # Fallback: character sequence ratio
+    return difflib.SequenceMatcher(None, a_low, b_low).ratio()
+
+
+async def _reanalyze_documents_for_new_category(
+    new_cat_name: str,
+    user_id: str,
+    similar_cat_ids: list[str],
+) -> None:
+    """
+    Background task: re-run AI extraction on documents that belong to similar
+    categories, then merge any new suggested_categories into their extracted_data.
+    The suggestions surface as pending chips in the UI — the user still confirms.
+    """
+    async with AsyncSessionLocal() as db:
+        result = await db.execute(
+            select(Document)
+            .join(CategoryAssignment, CategoryAssignment.document_id == Document.id)
+            .where(CategoryAssignment.category_id.in_(similar_cat_ids))
+            .where(Document.user_id == user_id)
+            .where(Document.status == "done")
+        )
+        docs = list(result.scalars().unique())
+
+        for doc in docs:
+            if not doc.raw_text:
+                continue
+            try:
+                ai_result = await classify_document(doc.raw_text)
+                new_suggestions: list[str] = ai_result.get("suggested_categories", [])
+
+                existing_data: dict = {}
+                if doc.extracted_data:
+                    try:
+                        existing_data = json.loads(doc.extracted_data)
+                    except Exception:
+                        pass
+
+                existing_sugg: list[str] = existing_data.get("suggested_categories", [])
+                # Merge: preserve existing, append new ones not already present
+                merged = list(dict.fromkeys(existing_sugg + new_suggestions))
+                existing_data["suggested_categories"] = merged
+                doc.extracted_data = json.dumps(existing_data)
+                await db.commit()
+            except Exception:
+                # Don't let a single document failure abort the rest
+                pass
+

@router.get("", response_model=list[CategoryOut])
 async def list_categories(
@@ -26,16 +92,40 @@ async def list_categories(
@router.post("", response_model=CategoryOut, status_code=201)
 async def create_category(
    body: CategoryCreate,
+    background_tasks: BackgroundTasks,
    user_id: str = Depends(get_user_id),
    db: AsyncSession = Depends(get_db),
 ) -> DocumentCategory:
    name = body.name.strip()
    if not name:
        raise HTTPException(status_code=422, detail="Category name cannot be empty")
+
    cat = DocumentCategory(user_id=user_id, name=name[:128])
    db.add(cat)
    await db.commit()
    await db.refresh(cat)
+
+    # Find existing categories with similar names
+    result = await db.execute(
+        select(DocumentCategory)
+        .where(DocumentCategory.user_id == user_id)
+        .where(DocumentCategory.id != cat.id)
+    )
+    all_cats = result.scalars().all()
+    similar_ids = [
+        c.id
+        for c in all_cats
+        if _name_similarity(name, c.name) >= _SIMILARITY_THRESHOLD
+    ]
+
+    if similar_ids:
+        background_tasks.add_task(
+            _reanalyze_documents_for_new_category,
+            name,
+            user_id,
+            similar_ids,
+        )
+
    return cat


@@ -175,6 +175,7 @@ async def list_documents(
    status: str | None = Query(default=None),
    document_type: str | None = Query(default=None),
    search: str | None = Query(default=None),
+    category_id: str | None = Query(default=None),
    user_id: str = Depends(get_user_id),
    db: AsyncSession = Depends(get_db),
 ) -> DocumentPage:
@@ -197,6 +198,11 @@ async def list_documents(
                Document.document_type.ilike(like),
            )
        )
+    if category_id:
+        subq = select(CategoryAssignment.document_id).where(
+            CategoryAssignment.category_id == category_id
+        )
+        conditions.append(Document.id.in_(subq))

    count_result = await db.execute(
        select(func.count(Document.id)).where(*conditions)