Add sidebar app sub-nav with categories, category filter, and re-analysis on category creation

- Sidebar: Apps accordion expands to Documents, which expands to list all
  user categories; clicking a category navigates to /apps/documents?category_id=<id>
- DocumentsPage: reads category_id from URL and applies filter; shows active
  category chip in FilterBar with dismiss; removed TagEditor (deferred)
- doc-service GET /documents: new category_id query param filters via subquery
- doc-service POST /documents/categories: detects similar category names and
  triggers background re-analysis of affected documents so the new category
  surfaces as a pending AI suggestion on relevant docs

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
curo1305
2026-04-17 16:57:35 +02:00
parent bc7a74062d
commit 7d0edbd5e7
8 changed files with 384 additions and 193 deletions
+5 -2
View File
@@ -23,7 +23,7 @@ Database: shared PostgreSQL instance, isolated via `alembic_version_doc_service`
| Method | Path | Description |
|--------|------|-------------|
| `POST` | `/documents/upload` | Upload PDF; returns 202 with initial doc row |
| `GET` | `/documents` | Paginated list with filters and sort |
| `GET` | `/documents` | Paginated list with filters, sort, and optional `category_id` filter |
| `GET` | `/documents/{id}` | Single document |
| `GET` | `/documents/{id}/status` | Lightweight status poll |
| `GET` | `/documents/{id}/download` | Stream file bytes |
@@ -32,7 +32,9 @@ Database: shared PostgreSQL instance, isolated via `alembic_version_doc_service`
| `PATCH` | `/documents/{id}/tags` | Replace tag list (dedup, preserve order) |
| `PATCH` | `/documents/{id}/title` | Update editable title |
| `GET` | `/documents/categories` | List all categories for the user |
| `POST` | `/documents/categories` | Create a category |
| `POST` | `/documents/categories` | Create a category; triggers re-analysis of documents in similar categories |
| `PATCH` | `/documents/categories/{id}` | Rename a category |
| `DELETE` | `/documents/categories/{id}` | Delete a category |
| `POST` | `/documents/{id}/categories/{cat_id}` | Assign category to document |
| `DELETE` | `/documents/{id}/categories/{cat_id}` | Remove category from document |
@@ -49,6 +51,7 @@ Query params:
| `status` | — | filter by status string |
| `document_type` | — | filter by document type |
| `search` | — | case-insensitive ILIKE on `title`, `filename`, `tags`, `document_type` |
| `category_id` | — | filter to documents assigned to this category UUID |
Response: `{ items: [...], total: N, page: N, pages: N }`
+92 -2
View File
@@ -1,14 +1,80 @@
from fastapi import APIRouter, Depends, HTTPException
import difflib
import json
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.database import AsyncSessionLocal, get_db
from app.deps import get_user_id
from app.models.category import DocumentCategory
from app.models.category_assignment import CategoryAssignment
from app.models.document import Document
from app.schemas.category import CategoryCreate, CategoryOut, CategoryUpdate
from app.services.ai_client import classify_document
router = APIRouter()
_SIMILARITY_THRESHOLD = 0.4
def _name_similarity(a: str, b: str) -> float:
"""Return similarity score (01) between two category names."""
a_low = a.lower()
b_low = b.lower()
# Word overlap is a strong signal
a_words = set(a_low.split())
b_words = set(b_low.split())
if a_words & b_words:
return 0.9
# Fallback: character sequence ratio
return difflib.SequenceMatcher(None, a_low, b_low).ratio()
async def _reanalyze_documents_for_new_category(
new_cat_name: str,
user_id: str,
similar_cat_ids: list[str],
) -> None:
"""
Background task: re-run AI extraction on documents that belong to similar
categories, then merge any new suggested_categories into their extracted_data.
The suggestions surface as pending chips in the UI — the user still confirms.
"""
async with AsyncSessionLocal() as db:
result = await db.execute(
select(Document)
.join(CategoryAssignment, CategoryAssignment.document_id == Document.id)
.where(CategoryAssignment.category_id.in_(similar_cat_ids))
.where(Document.user_id == user_id)
.where(Document.status == "done")
)
docs = list(result.scalars().unique())
for doc in docs:
if not doc.raw_text:
continue
try:
ai_result = await classify_document(doc.raw_text)
new_suggestions: list[str] = ai_result.get("suggested_categories", [])
existing_data: dict = {}
if doc.extracted_data:
try:
existing_data = json.loads(doc.extracted_data)
except Exception:
pass
existing_sugg: list[str] = existing_data.get("suggested_categories", [])
# Merge: preserve existing, append new ones not already present
merged = list(dict.fromkeys(existing_sugg + new_suggestions))
existing_data["suggested_categories"] = merged
doc.extracted_data = json.dumps(existing_data)
await db.commit()
except Exception:
# Don't let a single document failure abort the rest
pass
@router.get("", response_model=list[CategoryOut])
async def list_categories(
@@ -26,16 +92,40 @@ async def list_categories(
@router.post("", response_model=CategoryOut, status_code=201)
async def create_category(
body: CategoryCreate,
background_tasks: BackgroundTasks,
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db),
) -> DocumentCategory:
name = body.name.strip()
if not name:
raise HTTPException(status_code=422, detail="Category name cannot be empty")
cat = DocumentCategory(user_id=user_id, name=name[:128])
db.add(cat)
await db.commit()
await db.refresh(cat)
# Find existing categories with similar names
result = await db.execute(
select(DocumentCategory)
.where(DocumentCategory.user_id == user_id)
.where(DocumentCategory.id != cat.id)
)
all_cats = result.scalars().all()
similar_ids = [
c.id
for c in all_cats
if _name_similarity(name, c.name) >= _SIMILARITY_THRESHOLD
]
if similar_ids:
background_tasks.add_task(
_reanalyze_documents_for_new_category,
name,
user_id,
similar_ids,
)
return cat
@@ -175,6 +175,7 @@ async def list_documents(
status: str | None = Query(default=None),
document_type: str | None = Query(default=None),
search: str | None = Query(default=None),
category_id: str | None = Query(default=None),
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db),
) -> DocumentPage:
@@ -197,6 +198,11 @@ async def list_documents(
Document.document_type.ilike(like),
)
)
if category_id:
subq = select(CategoryAssignment.document_id).where(
CategoryAssignment.category_id == category_id
)
conditions.append(Document.id.in_(subq))
count_result = await db.execute(
select(func.count(Document.id)).where(*conditions)