Add generic plugin architecture and watch-directory feature
Introduces a manifest contract so feature containers self-describe their settings (JSON Schema + access rules). Backend and frontend gain generic plugin proxy and dynamic Extensions UI with zero feature-specific code. Doc-service is the first plugin consumer: exposes /plugin/manifest and /plugin/settings, adds a watchdog-based file watcher that auto-ingests PDFs from a mounted directory, maps subfolders to categories, supports AI-suggested folder/filename (user-confirmed), and enforces a no-remove policy. Access is gated by is_superuser or doc-service-admin group. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -22,5 +22,8 @@ resume.txt
|
||||
# Test fixtures — drop PDFs here for local testing, never commit them
|
||||
features/doc-service/tests/pdfs/*.pdf
|
||||
|
||||
# Feature branch test stacks — never commit these
|
||||
docker-compose.feat-*.yml
|
||||
|
||||
# Don't sync .un files
|
||||
*.un~
|
||||
|
||||
@@ -78,12 +78,13 @@ docker compose up --build -d
|
||||
├── .githooks/pre-commit ← Runs scripts/security_check.py before every commit
|
||||
├── scripts/security_check.py ← Static analysis: secrets, weak crypto, SQLi, JWT
|
||||
├── changelog/YYYY-MM-DD_<slug>.md ← Per-date change logs
|
||||
├── dev-watch/ ← Dev bind-mount for file watcher testing (.gitkeep only)
|
||||
│
|
||||
├── backend/ ← FastAPI gateway (port 8000, internal)
|
||||
│ ├── app/
|
||||
│ │ ├── main.py ← App factory, router registration, lifespan (health loop)
|
||||
│ │ ├── database.py ← AsyncEngine, AsyncSessionLocal, Base
|
||||
│ │ ├── deps.py ← get_current_user, get_current_admin
|
||||
│ │ ├── deps.py ← get_current_user, get_current_admin, check_plugin_access
|
||||
│ │ ├── core/
|
||||
│ │ │ ├── config.py ← All settings via pydantic-settings (reads .env)
|
||||
│ │ │ ├── security.py ← JWT sign/verify (RS256), bcrypt hash/verify
|
||||
@@ -106,10 +107,11 @@ docker compose up --build -d
|
||||
│ │ │ ├── groups.py ← Group CRUD + member management (admin-only)
|
||||
│ │ │ ├── settings.py ← AI, doc limits, system prompts, appearance, themes (admin-only)
|
||||
│ │ │ ├── services.py ← GET /services (health status)
|
||||
│ │ │ ├── plugins.py ← Generic plugin proxy (GET/PATCH /api/plugins/*)
|
||||
│ │ │ ├── categories_proxy.py ← Transparent proxy → doc-service /categories/*
|
||||
│ │ │ └── documents_proxy.py ← Transparent proxy → doc-service /documents/*
|
||||
│ │ └── services/
|
||||
│ │ └── service_health.py ← Background 30s health-check loop
|
||||
│ │ └── service_health.py ← Background 30s health-check loop; caches /plugin/manifest per service
|
||||
│ ├── alembic/
|
||||
│ │ ├── env.py ← Async migration runner
|
||||
│ │ └── versions/ ← Migration chain (see Migrations section)
|
||||
@@ -133,7 +135,7 @@ docker compose up --build -d
|
||||
│ │
|
||||
│ └── doc-service/ ← PDF extraction microservice (port 8001, internal)
|
||||
│ ├── app/
|
||||
│ │ ├── main.py
|
||||
│ │ ├── main.py ← FastAPI, lifespan (file watcher start/stop)
|
||||
│ │ ├── database.py ← Same PostgreSQL instance as backend
|
||||
│ │ ├── deps.py ← get_user_id (reads x-user-id header)
|
||||
│ │ ├── models/
|
||||
@@ -144,13 +146,16 @@ docker compose up --build -d
|
||||
│ │ │ ├── document.py ← DocumentOut, DocumentPage, DocumentStatusOut, etc.
|
||||
│ │ │ └── category.py ← CategoryOut, CategoryCreate, CategoryUpdate
|
||||
│ │ ├── routers/
|
||||
│ │ │ ├── documents.py ← Full document CRUD + file serving + reprocess
|
||||
│ │ │ └── categories.py ← Category CRUD
|
||||
│ │ │ ├── documents.py ← Full document CRUD + file serving + reprocess + suggestion endpoints
|
||||
│ │ │ ├── categories.py ← Category CRUD (includes watch-owned categories)
|
||||
│ │ │ └── plugin.py ← GET /plugin/manifest, GET+PATCH /plugin/settings
|
||||
│ │ └── services/
|
||||
│ │ ├── storage.py ← File I/O
|
||||
│ │ ├── ai_client.py ← classify_document() → ai-service:8010/chat
|
||||
│ │ └── config_reader.py
|
||||
│ │ ├── config_reader.py ← Config load/save including storage/watch settings
|
||||
│ │ └── file_watcher.py ← watchdog-based PDF watcher + startup scan + ingestion
|
||||
│ ├── alembic/versions/ ← Doc-service migration chain
|
||||
│ │ └── 0003_add_watch_columns.py ← source, watch_path, suggested_folder, suggested_filename
|
||||
│ ├── Dockerfile
|
||||
│ └── STATUS.md
|
||||
│
|
||||
@@ -164,10 +169,12 @@ docker compose up --build -d
|
||||
│ │ └── useTheme.ts ← Theme toggle
|
||||
│ ├── components/
|
||||
│ │ ├── AppShell.tsx ← Layout: Sidebar + scrollable main
|
||||
│ │ ├── Sidebar.tsx ← Collapsible nav (icons ↔ icons+labels)
|
||||
│ │ ├── Sidebar.tsx ← Collapsible nav; "Extensions" section auto-populated from /api/plugins
|
||||
│ │ ├── ThemeToggle.tsx ← Light/dark mode toggle
|
||||
│ │ ├── PluginSchemaForm.tsx ← JSON Schema → React form (boolean/string/number/readOnly)
|
||||
│ │ └── ui/ ← shadcn/ui components (Button, Input, …)
|
||||
│ ├── pages/ ← One file per route (see Routes section)
|
||||
│ │ └── PluginSettingsPage.tsx ← Generic plugin settings page driven by manifest
|
||||
│ ├── lib/utils.ts ← cn() = clsx + tailwind-merge
|
||||
│ └── styles/theme.css ← CSS custom properties, Tailwind setup
|
||||
├── vite.config.ts ← /api/* proxied to backend:8000
|
||||
@@ -283,6 +290,10 @@ Unique constraint: `(group_id, user_id)`
|
||||
| `error_message` | String(500) | nullable | |
|
||||
| `created_at` | DateTime(tz) | server_default=now() | |
|
||||
| `processed_at` | DateTime(tz) | nullable | |
|
||||
| `source` | String(16) | default="upload" | "upload" or "watch" |
|
||||
| `watch_path` | String | nullable | original absolute path in watch directory |
|
||||
| `suggested_folder` | String(128) | nullable | AI-suggested category (pending user confirm) |
|
||||
| `suggested_filename` | String(500) | nullable | AI-suggested title/rename (pending user confirm) |
|
||||
|
||||
**`document_categories`**
|
||||
|
||||
@@ -318,6 +329,7 @@ Unique constraint: `(group_id, user_id)`
|
||||
|--------|------|
|
||||
| `0001` | `create_doc_tables` |
|
||||
| `0002` | `add_document_title` |
|
||||
| `0003` | `add_watch_columns` |
|
||||
|
||||
---
|
||||
|
||||
@@ -407,6 +419,10 @@ Unique constraint: `(group_id, user_id)`
|
||||
| GET | `/api/documents/{id}/file` | Download PDF (streaming) |
|
||||
| POST | `/api/documents/{id}/categories/{cat_id}` | Assign category |
|
||||
| DELETE | `/api/documents/{id}/categories/{cat_id}` | Remove category |
|
||||
| POST | `/api/documents/{id}/suggestions/folder/confirm` | Confirm AI folder suggestion |
|
||||
| POST | `/api/documents/{id}/suggestions/folder/reject` | Reject AI folder suggestion |
|
||||
| POST | `/api/documents/{id}/suggestions/filename/confirm` | Confirm AI filename suggestion |
|
||||
| POST | `/api/documents/{id}/suggestions/filename/reject` | Reject AI filename suggestion |
|
||||
|
||||
### Categories (`/api/documents/categories/*`) — authenticated, proxied to doc-service
|
||||
|
||||
@@ -417,6 +433,17 @@ Unique constraint: `(group_id, user_id)`
|
||||
| PATCH | `/api/documents/categories/{id}` | Rename |
|
||||
| DELETE | `/api/documents/categories/{id}` | Delete (204) |
|
||||
|
||||
### Plugins (`/api/plugins`) — authenticated, auth-per-plugin
|
||||
|
||||
| Method | Path | Description |
|
||||
|--------|------|-------------|
|
||||
| GET | `/api/plugins` | List plugins accessible to current user |
|
||||
| GET | `/api/plugins/{id}/manifest` | Plugin manifest with settings JSON Schema (auth-gated) |
|
||||
| GET | `/api/plugins/{id}/settings` | Proxy to feature `/plugin/settings` (auth-gated) |
|
||||
| PATCH | `/api/plugins/{id}/settings` | Proxy to feature `/plugin/settings` (auth-gated) |
|
||||
|
||||
Auth: is_superuser OR member of group listed in manifest `required_groups`. Returns 404 (not 403) to hide existence.
|
||||
|
||||
### AI-service (internal only — not exposed to browser)
|
||||
|
||||
| Method | Path | Description |
|
||||
@@ -442,6 +469,7 @@ Unique constraint: `(group_id, user_id)`
|
||||
| `/apps/ai/settings/admin` | `AIAdminSettingsPage` | AdminRoute |
|
||||
| `/profile` | `ProfilePage` | PrivateRoute |
|
||||
| `/settings` | `SettingsPage` | PrivateRoute |
|
||||
| `/settings/plugins/:id` | `PluginSettingsPage` | PrivateRoute (auth enforced per-plugin by backend) |
|
||||
| `/admin` | `AdminPage` (→ `/admin/users`) | AdminRoute |
|
||||
| `/admin/users` | `AdminUsersPage` | AdminRoute |
|
||||
| `/admin/groups` | `AdminGroupsPage` | AdminRoute |
|
||||
@@ -566,6 +594,9 @@ Adding a new API call:
|
||||
["categories"] // document categories
|
||||
["documents", params] // document list (params object for cache isolation)
|
||||
["document", id] // single document
|
||||
["plugins"] // accessible plugin list (filtered by user access)
|
||||
["plugin-manifest", id] // plugin manifest (cached)
|
||||
["plugin-settings", id] // plugin current settings
|
||||
```
|
||||
|
||||
**Mutation pattern**:
|
||||
@@ -708,7 +739,7 @@ Use `validation_alias` when the ORM field name differs from the JSON key (e.g.,
|
||||
| `db` | postgres:16-alpine | 5432 | 70:70 | `postgres_data` | backend-net |
|
||||
| `backend` | python:3.12-slim | 8000 | 1001:1001 | `app_config` | backend-net |
|
||||
| `ai-service` | python:3.12-slim | 8010 | 1001:1001 | `app_config` | backend-net |
|
||||
| `doc-service` | python:3.12-slim | 8001 | 1001:1001 | `doc_data`, `app_config` | backend-net |
|
||||
| `doc-service` | python:3.12-slim | 8001 | 1001:1001 | `doc_data`, `watch_data`, `app_config` | backend-net |
|
||||
| `frontend` | nginx-unprivileged:alpine | 8080 | 1001:1001 | — | backend-net, frontend-net |
|
||||
|
||||
### Volumes
|
||||
@@ -717,6 +748,7 @@ Use `validation_alias` when the ORM field name differs from the JSON key (e.g.,
|
||||
|--------|-----------|---------|
|
||||
| `postgres_data` | `/var/lib/postgresql/data` | PostgreSQL data |
|
||||
| `doc_data` | `/data/documents` | Uploaded PDF files |
|
||||
| `watch_data` | `/data/watch` | Watch directory (bind-mount NAS/Nextcloud via docker-compose.override.yml) |
|
||||
| `app_config` | `/config` | Per-service runtime config JSON files |
|
||||
|
||||
### Networks
|
||||
@@ -816,6 +848,100 @@ Always run `git push` immediately after every `git commit`.
|
||||
|
||||
---
|
||||
|
||||
### Feature branch & isolated test environment
|
||||
|
||||
Every non-trivial implementation (anything beyond a one-line fix or doc change) **must** follow this workflow:
|
||||
|
||||
#### 1 — Create a feature branch
|
||||
After the planning phase is approved, branch off `main`:
|
||||
```bash
|
||||
git checkout main && git pull
|
||||
git checkout -b feat/<slug> # e.g. feat/color-mode, feat/admin-appearance
|
||||
```
|
||||
|
||||
#### 2 — Spin up an isolated Docker stack for the feature
|
||||
A dedicated compose stack runs alongside the main dev stack so both can be tested independently.
|
||||
|
||||
**Find the next free port** (main dev stack owns 5173):
|
||||
```bash
|
||||
for port in $(seq 5174 5200); do
|
||||
lsof -iTCP:$port -sTCP:LISTEN -t &>/dev/null || { echo "$port"; break; }
|
||||
done
|
||||
```
|
||||
Use the first free port returned (call it `$PORT`).
|
||||
|
||||
**Create a per-feature override file** at `docker-compose.feat-<slug>.yml` (gitignored):
|
||||
```yaml
|
||||
# docker-compose.feat-<slug>.yml — feature test stack, never committed to main
|
||||
services:
|
||||
frontend:
|
||||
ports:
|
||||
- "$PORT:8080" # e.g. 5174:8080
|
||||
container_name: frontend-<slug>
|
||||
backend:
|
||||
container_name: backend-<slug>
|
||||
doc-service:
|
||||
container_name: doc-service-<slug>
|
||||
ai-service:
|
||||
container_name: ai-service-<slug>
|
||||
db:
|
||||
container_name: db-<slug>
|
||||
|
||||
networks:
|
||||
backend-net:
|
||||
name: backend-net-<slug>
|
||||
frontend-net:
|
||||
name: frontend-net-<slug>
|
||||
```
|
||||
|
||||
**Start the feature stack**:
|
||||
```bash
|
||||
docker compose -f docker-compose.yml \
|
||||
-f docker-compose.dev.yml \
|
||||
-f docker-compose.feat-<slug>.yml \
|
||||
--project-name <slug> up --build
|
||||
```
|
||||
|
||||
The feature frontend is now reachable at `http://localhost:$PORT`.
|
||||
The main dev stack continues running unaffected on `:5173`.
|
||||
|
||||
#### 3 — Develop on the feature branch
|
||||
All code changes happen on `feat/<slug>`. Commit and push normally:
|
||||
```bash
|
||||
git add <files>
|
||||
git commit -m "feat: <description>"
|
||||
git push -u origin feat/<slug>
|
||||
```
|
||||
|
||||
#### 4 — Confirm functionality
|
||||
Before merging, verify all of the following on `http://localhost:$PORT`:
|
||||
- [ ] Login and registration work end-to-end
|
||||
- [ ] The specific feature works as intended
|
||||
- [ ] No regressions visible in the UI
|
||||
- [ ] Backend logs show no unexpected errors: `docker compose -p <slug> logs backend`
|
||||
- [ ] Migrations (if any) applied cleanly: `docker compose -p <slug> exec backend alembic upgrade head`
|
||||
|
||||
#### 5 — Merge to main
|
||||
Once all checks pass:
|
||||
```bash
|
||||
git checkout main
|
||||
git merge --no-ff feat/<slug> -m "Merge feat/<slug>: <description>"
|
||||
git push
|
||||
git branch -d feat/<slug>
|
||||
git push origin --delete feat/<slug>
|
||||
```
|
||||
|
||||
#### 6 — Tear down the feature stack
|
||||
```bash
|
||||
docker compose -f docker-compose.yml \
|
||||
-f docker-compose.dev.yml \
|
||||
-f docker-compose.feat-<slug>.yml \
|
||||
--project-name <slug> down --volumes --remove-orphans
|
||||
rm docker-compose.feat-<slug>.yml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Infrastructure change protocol
|
||||
|
||||
After **any** change to Dockerfiles, `docker-compose*.yml`, `nginx.conf`, or setup scripts:
|
||||
|
||||
@@ -82,6 +82,21 @@ All `/api/documents/*` and `/api/documents/categories/*` requests are transparen
|
||||
- Strips hop-by-hop headers + `content-length`, `accept-encoding`, `content-type`
|
||||
- Returns `Response` (not `StreamingResponse`) to avoid content-length/chunked conflicts
|
||||
|
||||
### Plugin system (`/api/plugins`)
|
||||
|
||||
Generic extension/plugin infrastructure — **zero feature-specific code in backend**. Feature containers self-describe via `GET /plugin/manifest`.
|
||||
|
||||
| Method | Path | Auth | Description |
|
||||
|--------|------|------|-------------|
|
||||
| `GET` | `/api/plugins` | user | List plugins accessible to current user |
|
||||
| `GET` | `/api/plugins/{id}/manifest` | user | Cached manifest for a plugin (404 if not accessible) |
|
||||
| `GET` | `/api/plugins/{id}/settings` | user | Proxy to feature `GET /plugin/settings` |
|
||||
| `PATCH` | `/api/plugins/{id}/settings` | user | Proxy to feature `PATCH /plugin/settings` |
|
||||
|
||||
Access is controlled by the manifest: `allow_superuser` for admins; `required_groups` for group members. `check_plugin_access(plugin_id, user, db)` in `deps.py` enforces this.
|
||||
|
||||
During each health poll, `service_health.py` also fetches `GET /plugin/manifest` from healthy services and caches it. New feature containers that expose `/plugin/manifest` automatically appear in the Extensions sidebar — no backend code changes required.
|
||||
|
||||
### Database models
|
||||
|
||||
| Model | Table | Notes |
|
||||
@@ -137,6 +152,7 @@ Browser (port 5173 dev / 80 prod)
|
||||
## Future work
|
||||
|
||||
- [x] Groups system: `groups`, `group_memberships` tables; admin CRUD; add/remove members
|
||||
- [x] Generic plugin infrastructure: manifest contract, `/api/plugins` proxy router, `check_plugin_access`
|
||||
- [ ] App permissions registry: `group_app_permissions` table; AppsPage filtered by group grants
|
||||
- [ ] Doc sharing via group membership
|
||||
- [ ] App permissions registry: `user_app_permissions (user_id, app_key)`; AppsPage filtered by grants
|
||||
|
||||
@@ -43,3 +43,44 @@ async def get_current_admin(
|
||||
detail="Not found",
|
||||
)
|
||||
return current_user
|
||||
|
||||
|
||||
async def check_plugin_access(
|
||||
plugin_id: str,
|
||||
current_user: User,
|
||||
db: AsyncSession,
|
||||
) -> bool:
|
||||
"""
|
||||
Return True if the user may access the given plugin's settings.
|
||||
|
||||
Access is granted when any of these conditions holds:
|
||||
1. The user is a superuser AND the manifest allows superuser access.
|
||||
2. The user is a member of one of the groups listed in manifest.access.required_groups.
|
||||
|
||||
Returns False (not raises) so callers can decide how to respond.
|
||||
"""
|
||||
from app.models.group import Group, GroupMembership
|
||||
from app.services.service_health import get_cached_manifest
|
||||
|
||||
manifest = get_cached_manifest(plugin_id)
|
||||
if manifest is None:
|
||||
return False
|
||||
|
||||
access = manifest.get("access", {})
|
||||
|
||||
if current_user.is_superuser and access.get("allow_superuser", True):
|
||||
return True
|
||||
|
||||
for group_name in access.get("required_groups", []):
|
||||
result = await db.execute(
|
||||
select(GroupMembership)
|
||||
.join(Group, Group.id == GroupMembership.group_id)
|
||||
.where(
|
||||
Group.name == group_name,
|
||||
GroupMembership.user_id == current_user.id,
|
||||
)
|
||||
)
|
||||
if result.scalar_one_or_none() is not None:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
+2
-1
@@ -6,7 +6,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.core.app_config import seed_builtin_themes
|
||||
from app.core.config import settings
|
||||
from app.routers import admin, auth, categories_proxy, documents_proxy, groups, profile, services, users
|
||||
from app.routers import admin, auth, categories_proxy, documents_proxy, groups, plugins, profile, services, users
|
||||
from app.routers import settings as settings_router
|
||||
from app.services.service_health import check_all, health_check_loop, register_services
|
||||
|
||||
@@ -46,6 +46,7 @@ app.include_router(admin.router, prefix="/api/admin", tags=["admin"])
|
||||
app.include_router(groups.router, prefix="/api/admin/groups", tags=["admin"])
|
||||
app.include_router(settings_router.router, prefix="/api/settings", tags=["settings"])
|
||||
app.include_router(services.router, prefix="/api/services", tags=["services"])
|
||||
app.include_router(plugins.router, prefix="/api/plugins", tags=["plugins"])
|
||||
# categories_proxy MUST be registered before documents_proxy —
|
||||
# otherwise /api/documents/{path:path} swallows /api/documents/categories/*
|
||||
app.include_router(
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
"""
|
||||
Generic plugin proxy.
|
||||
|
||||
Feature containers advertise themselves via GET /plugin/manifest. The backend
|
||||
health-poller caches those manifests. This router exposes them to the browser
|
||||
through auth-gated endpoints so the frontend never needs to know about specific
|
||||
features.
|
||||
|
||||
Routes:
|
||||
GET /api/plugins → list accessible plugins for current user
|
||||
GET /api/plugins/{id}/manifest → cached manifest (404 if not accessible)
|
||||
GET /api/plugins/{id}/settings → proxy to feature /plugin/settings
|
||||
PATCH /api/plugins/{id}/settings → proxy to feature /plugin/settings
|
||||
"""
|
||||
import httpx
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from fastapi.responses import Response
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.database import get_db
|
||||
from app.deps import check_plugin_access, get_current_user
|
||||
from app.models.user import User
|
||||
from app.services.service_health import _REGISTRY, get_cached_manifest, get_service_url
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
_HOP_BY_HOP = frozenset([
|
||||
"connection",
|
||||
"keep-alive",
|
||||
"proxy-authenticate",
|
||||
"proxy-authorization",
|
||||
"te",
|
||||
"trailers",
|
||||
"transfer-encoding",
|
||||
"upgrade",
|
||||
"host",
|
||||
"accept-encoding",
|
||||
])
|
||||
_STRIP_RESPONSE = frozenset([*_HOP_BY_HOP, "content-length", "content-type"])
|
||||
|
||||
|
||||
async def _proxy(plugin_id: str, method: str, path: str, body: bytes | None,
|
||||
content_type: str | None = None) -> Response:
|
||||
"""Forward a request to the feature container's plugin endpoint."""
|
||||
url = get_service_url(plugin_id)
|
||||
if url is None:
|
||||
raise HTTPException(status_code=404, detail="Not found")
|
||||
|
||||
headers: dict[str, str] = {}
|
||||
if content_type:
|
||||
headers["content-type"] = content_type
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(base_url=url, timeout=30.0) as client:
|
||||
resp = await client.request(method, path, content=body, headers=headers)
|
||||
except httpx.RequestError as exc:
|
||||
raise HTTPException(status_code=502, detail=f"Plugin service unreachable: {exc}")
|
||||
|
||||
resp_headers = {k: v for k, v in resp.headers.items() if k.lower() not in _STRIP_RESPONSE}
|
||||
return Response(
|
||||
content=resp.content,
|
||||
status_code=resp.status_code,
|
||||
headers=resp_headers,
|
||||
media_type=resp.headers.get("content-type", "application/json"),
|
||||
)
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_plugins(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> list[dict]:
|
||||
"""Return the list of plugins the current user may access."""
|
||||
accessible = []
|
||||
for svc in _REGISTRY:
|
||||
manifest = get_cached_manifest(svc.id)
|
||||
if manifest is None:
|
||||
continue
|
||||
if await check_plugin_access(svc.id, current_user, db):
|
||||
accessible.append({
|
||||
"id": manifest["id"],
|
||||
"name": manifest["name"],
|
||||
"icon": manifest.get("icon", "package"),
|
||||
"version": manifest.get("version", ""),
|
||||
})
|
||||
return accessible
|
||||
|
||||
|
||||
@router.get("/{plugin_id}/manifest")
|
||||
async def get_plugin_manifest(
|
||||
plugin_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> dict:
|
||||
if not await check_plugin_access(plugin_id, current_user, db):
|
||||
raise HTTPException(status_code=404, detail="Not found")
|
||||
manifest = get_cached_manifest(plugin_id)
|
||||
if manifest is None:
|
||||
raise HTTPException(status_code=404, detail="Not found")
|
||||
return manifest
|
||||
|
||||
|
||||
@router.get("/{plugin_id}/settings")
|
||||
async def get_plugin_settings(
|
||||
plugin_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> Response:
|
||||
if not await check_plugin_access(plugin_id, current_user, db):
|
||||
raise HTTPException(status_code=404, detail="Not found")
|
||||
return await _proxy(plugin_id, "GET", "/plugin/settings", None)
|
||||
|
||||
|
||||
@router.patch("/{plugin_id}/settings")
|
||||
async def update_plugin_settings(
|
||||
plugin_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> Response:
|
||||
if not await check_plugin_access(plugin_id, current_user, db):
|
||||
raise HTTPException(status_code=404, detail="Not found")
|
||||
body = await request.body()
|
||||
content_type = request.headers.get("content-type", "application/json")
|
||||
return await _proxy(plugin_id, "PATCH", "/plugin/settings", body, content_type)
|
||||
@@ -2,8 +2,9 @@
|
||||
Background health-checker for registered feature services.
|
||||
|
||||
Polls each service's /health endpoint every POLL_INTERVAL seconds and stores
|
||||
the result in an in-memory dict. The REST layer reads from that dict — no DB,
|
||||
no blocking calls on the request path.
|
||||
the result in an in-memory dict. Also fetches /plugin/manifest when available
|
||||
and caches it so the plugin proxy can serve it without per-request network calls.
|
||||
The REST layer reads from that dict — no DB, no blocking calls on the request path.
|
||||
"""
|
||||
import asyncio
|
||||
import logging
|
||||
@@ -35,10 +36,13 @@ _REGISTRY: list[ServiceDefinition] = []
|
||||
# id → True/False/None (None = not yet checked)
|
||||
_health: dict[str, bool | None] = {}
|
||||
|
||||
# id → plugin manifest dict, or None if the service has no plugin manifest
|
||||
_manifests: dict[str, dict | None] = {}
|
||||
|
||||
|
||||
def register_services(doc_service_url: str, ai_service_url: str) -> None:
|
||||
"""Called once during app startup to populate the registry from config."""
|
||||
global _REGISTRY, _health
|
||||
global _REGISTRY, _health, _manifests
|
||||
|
||||
_REGISTRY = [
|
||||
ServiceDefinition(
|
||||
@@ -62,6 +66,7 @@ def register_services(doc_service_url: str, ai_service_url: str) -> None:
|
||||
]
|
||||
|
||||
_health = {svc.id: None for svc in _REGISTRY}
|
||||
_manifests = {svc.id: None for svc in _REGISTRY}
|
||||
logger.info("Service registry initialised with %d services", len(_REGISTRY))
|
||||
|
||||
|
||||
@@ -88,6 +93,25 @@ async def _check_service(svc: ServiceDefinition) -> None:
|
||||
else:
|
||||
logger.warning("Service %s is now UNHEALTHY", svc.id)
|
||||
|
||||
# Opportunistically fetch plugin manifest when the service is healthy
|
||||
if healthy:
|
||||
await _fetch_manifest(svc)
|
||||
|
||||
|
||||
async def _fetch_manifest(svc: ServiceDefinition) -> None:
|
||||
"""Try to GET /plugin/manifest from the service; cache result (or None)."""
|
||||
url = f"{svc.internal_url}/plugin/manifest"
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
resp = await client.get(url)
|
||||
if resp.status_code == 200:
|
||||
_manifests[svc.id] = resp.json()
|
||||
else:
|
||||
_manifests[svc.id] = None
|
||||
except Exception:
|
||||
# Service doesn't have a plugin manifest — not an error
|
||||
_manifests[svc.id] = None
|
||||
|
||||
|
||||
async def check_all() -> None:
|
||||
"""Run health checks for all registered services concurrently."""
|
||||
@@ -125,3 +149,16 @@ def get_all_statuses() -> list[dict]:
|
||||
}
|
||||
for svc in _REGISTRY
|
||||
]
|
||||
|
||||
|
||||
def get_cached_manifest(service_id: str) -> dict | None:
|
||||
"""Return the cached plugin manifest for a service, or None if unavailable."""
|
||||
return _manifests.get(service_id)
|
||||
|
||||
|
||||
def get_service_url(service_id: str) -> str | None:
|
||||
"""Return the internal URL for a registered service, or None if unknown."""
|
||||
for svc in _REGISTRY:
|
||||
if svc.id == service_id:
|
||||
return svc.internal_url
|
||||
return None
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
# 2026-04-18 — Generic Plugin Architecture + Watch Directory Feature
|
||||
|
||||
**Timestamp:** 2026-04-18T00:00:00Z
|
||||
|
||||
## Summary
|
||||
|
||||
Implemented a generic plugin/extension infrastructure that allows feature containers to self-describe their settings via a manifest contract, with no feature-specific code required in the backend or frontend. Built the watch-directory feature entirely inside the doc-service container as the first plugin consumer.
|
||||
|
||||
## Files Added
|
||||
|
||||
| File | Description |
|
||||
|------|-------------|
|
||||
| `backend/app/routers/plugins.py` | Generic plugin proxy: `GET/PATCH /api/plugins`, `/api/plugins/{id}/manifest`, `/api/plugins/{id}/settings` |
|
||||
| `frontend/src/components/PluginSchemaForm.tsx` | JSON Schema → React form renderer (boolean/string/number/readOnly) |
|
||||
| `frontend/src/pages/PluginSettingsPage.tsx` | Generic plugin settings page driven by manifest |
|
||||
| `features/doc-service/app/routers/plugin.py` | Doc-service plugin endpoints: `/plugin/manifest`, `/plugin/settings` |
|
||||
| `features/doc-service/app/services/file_watcher.py` | watchdog-based PDF watcher with startup scan, folder-to-category mapping, no-remove policy |
|
||||
| `features/doc-service/alembic/versions/0003_add_watch_columns.py` | Migration: source, watch_path, suggested_folder, suggested_filename |
|
||||
| `dev-watch/.gitkeep` | Dev bind-mount directory for local file watcher testing |
|
||||
|
||||
## Files Modified
|
||||
|
||||
| File | Description |
|
||||
|------|-------------|
|
||||
| `backend/app/services/service_health.py` | Also fetches and caches `/plugin/manifest` from healthy services |
|
||||
| `backend/app/deps.py` | Added `check_plugin_access(plugin_id, user, db)` helper |
|
||||
| `backend/app/main.py` | Mounted `/api/plugins` router |
|
||||
| `frontend/src/api/client.ts` | Added plugin API functions and suggestion confirm/reject functions; extended `DocumentOut` with new fields |
|
||||
| `frontend/src/components/Sidebar.tsx` | Added dynamic "Extensions" section populated from `/api/plugins` |
|
||||
| `frontend/src/App.tsx` | Added `/settings/plugins/:id` route |
|
||||
| `features/doc-service/app/models/document.py` | Added 4 new columns: source, watch_path, suggested_folder, suggested_filename |
|
||||
| `features/doc-service/app/schemas/document.py` | Exposed 4 new fields in `DocumentOut` |
|
||||
| `features/doc-service/app/services/config_reader.py` | Added storage config defaults, `get_storage_config()`, `save_storage_config()` |
|
||||
| `features/doc-service/app/routers/documents.py` | Watch-user visibility (`OR user_id = "watch"`); 4 suggestion endpoints |
|
||||
| `features/doc-service/app/routers/categories.py` | Watch-owned categories included in list |
|
||||
| `features/doc-service/app/main.py` | Lifespan watcher start/stop; plugin router mounted |
|
||||
| `features/doc-service/pyproject.toml` | Added `watchdog>=4.0` |
|
||||
| `features/doc-service/Dockerfile` | Pre-create `/data/watch` |
|
||||
| `docker-compose.yml` | Added `watch_data` named volume; mounted to doc-service |
|
||||
| `docker-compose.dev.yml` | Dev bind-mount `./dev-watch:/data/watch` |
|
||||
| `CLAUDE.md` | Updated all affected sections (models, migrations, endpoints, routes, tree, query keys, volumes) |
|
||||
| `backend/STATUS.md` | Plugin system section added |
|
||||
| `features/doc-service/STATUS.md` | Watch feature, plugin endpoints, migration 0003, updated architecture diagram |
|
||||
| `frontend/STATUS.md` | Extensions sidebar, PluginSchemaForm, PluginSettingsPage, new API functions |
|
||||
@@ -0,0 +1 @@
|
||||
# Watch directory for development testing
|
||||
@@ -34,3 +34,4 @@ services:
|
||||
env_file: ./features/doc-service/.env
|
||||
volumes:
|
||||
- ./features/doc-service:/app
|
||||
- ./dev-watch:/data/watch # bind-mount local folder for easy testing
|
||||
|
||||
@@ -70,6 +70,7 @@ services:
|
||||
AI_SERVICE_URL: http://ai-service:8010
|
||||
volumes:
|
||||
- doc_data:/data/documents
|
||||
- watch_data:/data/watch
|
||||
- app_config:/config
|
||||
depends_on:
|
||||
db:
|
||||
@@ -98,6 +99,7 @@ services:
|
||||
volumes:
|
||||
postgres_data:
|
||||
doc_data: # PDF files persisted across restarts
|
||||
watch_data: # Watch directory — bind-mount your NAS/Nextcloud here via docker-compose.override.yml
|
||||
app_config: # Per-service runtime config JSON files
|
||||
|
||||
networks:
|
||||
|
||||
@@ -17,7 +17,7 @@ RUN groupadd --gid 1001 appuser && \
|
||||
|
||||
# Pre-create data and config dirs with correct ownership.
|
||||
# Named volumes mounted over these paths will inherit ownership on first creation.
|
||||
RUN mkdir -p /data/documents /config && chown -R appuser:appuser /data /config
|
||||
RUN mkdir -p /data/documents /data/watch /config && chown -R appuser:appuser /data /config
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
||||
@@ -2,11 +2,11 @@
|
||||
|
||||
## What it is
|
||||
|
||||
PDF document management microservice. Handles upload, storage, async AI-powered extraction, tagging, categorisation, and retrieval of PDF documents on a per-user basis.
|
||||
PDF document management microservice. Handles upload, storage, async AI-powered extraction, tagging, categorisation, and retrieval of PDF documents on a per-user basis. Also supports automatic ingestion from a mounted watch directory (NAS, Nextcloud, Syncthing, etc.).
|
||||
|
||||
Port: `8001` (internal only, not exposed to host). All traffic arrives via the backend proxy (`backend/app/routers/documents_proxy.py`), which injects the authenticated `x-user-id` header.
|
||||
|
||||
Database: shared PostgreSQL instance, isolated via `alembic_version_doc_service` Alembic version table. Storage: `/data/documents/` (Docker named volume `doc_data`).
|
||||
Database: shared PostgreSQL instance, isolated via `alembic_version_doc_service` Alembic version table. Storage: `/data/documents/` (Docker named volume `doc_data`). Watch directory: `/data/watch` (named volume `watch_data` in prod; bind-mount in dev via `docker-compose.dev.yml`).
|
||||
|
||||
---
|
||||
|
||||
@@ -31,13 +31,25 @@ Database: shared PostgreSQL instance, isolated via `alembic_version_doc_service`
|
||||
| `PATCH` | `/documents/{id}/type` | Update document type |
|
||||
| `PATCH` | `/documents/{id}/tags` | Replace tag list (dedup, preserve order) |
|
||||
| `PATCH` | `/documents/{id}/title` | Update editable title |
|
||||
| `GET` | `/documents/categories` | List all categories for the user |
|
||||
| `GET` | `/documents/categories` | List all categories (user + watch) |
|
||||
| `POST` | `/documents/categories` | Create a category; triggers re-analysis of documents in similar categories |
|
||||
| `POST` | `/documents/{id}/reprocess` | Reset status to pending and re-run AI extraction; 409 if already pending/processing |
|
||||
| `PATCH` | `/documents/categories/{id}` | Rename a category |
|
||||
| `DELETE` | `/documents/categories/{id}` | Delete a category |
|
||||
| `POST` | `/documents/{id}/categories/{cat_id}` | Assign category to document |
|
||||
| `DELETE` | `/documents/{id}/categories/{cat_id}` | Remove category from document |
|
||||
| `POST` | `/documents/{id}/suggestions/folder/confirm` | Apply AI folder suggestion → create/find category + assign |
|
||||
| `POST` | `/documents/{id}/suggestions/folder/reject` | Clear AI folder suggestion |
|
||||
| `POST` | `/documents/{id}/suggestions/filename/confirm` | Apply AI filename suggestion → set title |
|
||||
| `POST` | `/documents/{id}/suggestions/filename/reject` | Clear AI filename suggestion |
|
||||
|
||||
### Plugin endpoints (internal — backend calls only)
|
||||
|
||||
| Method | Path | Description |
|
||||
|--------|------|-------------|
|
||||
| `GET` | `/plugin/manifest` | Static manifest: metadata, JSON Schema for settings, access rules |
|
||||
| `GET` | `/plugin/settings` | Current watch/storage config values |
|
||||
| `PATCH` | `/plugin/settings` | Update watch/storage config (persisted to `/config/doc_service_config.json`) |
|
||||
|
||||
### Pagination & filtering (`GET /documents`)
|
||||
|
||||
@@ -59,21 +71,27 @@ Response: `{ items: [...], total: N, page: N, pages: N }`
|
||||
### Document schema
|
||||
|
||||
```
|
||||
id UUID
|
||||
user_id string (from x-user-id header)
|
||||
filename original filename
|
||||
title AI-suggested editable title (nullable)
|
||||
file_size bytes
|
||||
status pending | processing | done | failed
|
||||
document_type AI-classified type (nullable)
|
||||
extracted_data JSON string — all AI-extracted fields
|
||||
tags JSON array string — editable tags
|
||||
error_message set if status=failed
|
||||
created_at upload timestamp
|
||||
processed_at when extraction finished
|
||||
categories many-to-many via category_assignments
|
||||
id UUID
|
||||
user_id string (from x-user-id header; "watch" for watch-ingested docs)
|
||||
filename original filename
|
||||
title AI-suggested editable title (nullable)
|
||||
file_size bytes
|
||||
status pending | processing | done | failed
|
||||
document_type AI-classified type (nullable)
|
||||
extracted_data JSON string — all AI-extracted fields
|
||||
tags JSON array string — editable tags
|
||||
error_message set if status=failed
|
||||
created_at upload timestamp
|
||||
processed_at when extraction finished
|
||||
source "upload" (default) or "watch"
|
||||
watch_path original absolute path in watch directory (nullable)
|
||||
suggested_folder AI-suggested category name, pending user confirm (nullable)
|
||||
suggested_filename AI-suggested title/rename, pending user confirm (nullable)
|
||||
categories many-to-many via category_assignments
|
||||
```
|
||||
|
||||
Watch-ingested documents (`user_id = "watch"`) are visible to all authenticated users.
|
||||
|
||||
### AI extraction (via ai-service)
|
||||
|
||||
System prompt and user prompt template are loaded at runtime from `doc_service_config.json` (`system_prompts` key). Defaults are built into the service and used as fallback if the config key is absent. Changes made via the AI Settings UI take effect within 30 seconds (config cache TTL).
|
||||
@@ -93,12 +111,25 @@ Prompt sends the first 50 000 chars of extracted text. Expected JSON response in
|
||||
```
|
||||
Env override: `DOC_MAX_PDF_MB`
|
||||
|
||||
### Watch directory feature
|
||||
|
||||
Controlled via plugin settings (UI accessible to superusers and `doc-service-admin` group members):
|
||||
|
||||
- `watch_enabled` — toggle file watching (default: false)
|
||||
- `watch_path` — mount point (read-only, `/data/watch`; override via Docker volume)
|
||||
- `ai_folder_suggestion` — AI suggests a category for each ingested doc (user confirms)
|
||||
- `ai_folder_default` — default category when AI suggestion is disabled
|
||||
- `ai_rename_suggestion` — AI suggests a title for each ingested doc (user confirms)
|
||||
|
||||
On startup scan, the watcher walks the watch directory and ingests any PDFs not already in the database (idempotency check by `watch_path`). Subfolders are automatically mapped to categories (e.g. `watch/invoices/bill.pdf` → category "invoices"). No-remove policy: deleting a file from the watch directory does not delete the document record.
|
||||
|
||||
### Database migrations
|
||||
|
||||
| Revision | Description |
|
||||
|----------|-------------|
|
||||
| 0001 | Initial schema (documents, categories, category_assignments) |
|
||||
| 0002 | Add `title` column to documents |
|
||||
| 0003 | Add `source`, `watch_path`, `suggested_folder`, `suggested_filename` columns |
|
||||
|
||||
Run automatically on container start via `alembic upgrade head`.
|
||||
|
||||
@@ -109,18 +140,26 @@ Run automatically on container start via `alembic upgrade head`.
|
||||
```
|
||||
backend (proxy) → doc-service:8001
|
||||
│
|
||||
documents.py router
|
||||
│
|
||||
┌────────┴────────┐
|
||||
upload list/get/patch
|
||||
│
|
||||
save_upload() pdfplumber extraction
|
||||
│ │
|
||||
Document(status=pending) ai_client.classify_document()
|
||||
│ │
|
||||
BackgroundTask ai-service:8010/chat
|
||||
│ │
|
||||
process_document() JSON result → update doc row
|
||||
┌────────────┼────────────────────┐
|
||||
documents.py categories.py plugin.py
|
||||
│ │ (internal only)
|
||||
┌────────┴────────┐
|
||||
upload list/get/patch/suggest
|
||||
│
|
||||
save_upload() pdfplumber extraction
|
||||
│ │
|
||||
Document(status=pending) ai_client.classify_document()
|
||||
│ │
|
||||
BackgroundTask ai-service:8010/chat
|
||||
│ │
|
||||
process_document() JSON result → update doc row
|
||||
|
||||
file_watcher.py (watchdog Observer, daemon thread)
|
||||
│
|
||||
├── _PdfEventHandler.on_created / on_moved
|
||||
│ └── asyncio.run_coroutine_threadsafe(ingest_file, loop)
|
||||
│
|
||||
└── _scan_existing() on startup (catches offline gaps)
|
||||
```
|
||||
|
||||
---
|
||||
@@ -140,6 +179,8 @@ backend (proxy) → doc-service:8001
|
||||
## Future work
|
||||
|
||||
- [x] `POST /documents/{id}/reprocess` — re-run AI extraction
|
||||
- [x] Watch directory feature with file watcher, startup scan, folder-to-category mapping, AI suggestion toggles
|
||||
- [x] Plugin manifest endpoint (`/plugin/manifest`, `/plugin/settings`) for generic settings UI
|
||||
- [ ] Advanced filter: query `extracted_data` JSON fields (vendor, due_date, amount) — requires PostgreSQL `jsonb` column or indexed virtual columns
|
||||
- [ ] Bulk operations endpoint
|
||||
- [ ] Document sharing via groups (blocked on groups/permissions system in backend)
|
||||
@@ -147,3 +188,4 @@ backend (proxy) → doc-service:8001
|
||||
- [ ] Rate limiting on upload endpoint
|
||||
- [ ] Soft delete with restore
|
||||
- [ ] Category rename / delete with cascade handling
|
||||
- [ ] Frontend UI for suggestion badges (suggested_folder / suggested_filename confirm/reject buttons)
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
"""add watch directory columns to documents
|
||||
|
||||
Revision ID: 0003
|
||||
Revises: 0002
|
||||
Create Date: 2026-04-18
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision: str = "0003"
|
||||
down_revision: Union[str, None] = "0002"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column("documents", sa.Column("source", sa.String(16), nullable=False, server_default="upload"))
|
||||
op.add_column("documents", sa.Column("watch_path", sa.String(), nullable=True))
|
||||
op.add_column("documents", sa.Column("suggested_folder", sa.String(128), nullable=True))
|
||||
op.add_column("documents", sa.Column("suggested_filename", sa.String(500), nullable=True))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("documents", "suggested_filename")
|
||||
op.drop_column("documents", "suggested_folder")
|
||||
op.drop_column("documents", "watch_path")
|
||||
op.drop_column("documents", "source")
|
||||
@@ -1,15 +1,45 @@
|
||||
import asyncio
|
||||
import logging
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app.core.config import settings
|
||||
from app.routers import categories, documents
|
||||
from app.routers import plugin as plugin_router
|
||||
|
||||
app = FastAPI(title=settings.PROJECT_NAME)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
loop = asyncio.get_running_loop()
|
||||
watcher = None
|
||||
|
||||
try:
|
||||
from app.services.config_reader import get_storage_config
|
||||
storage_config = await get_storage_config()
|
||||
if storage_config.get("watch_enabled"):
|
||||
from app.services.file_watcher import FileWatcherService
|
||||
watcher = FileWatcherService(loop)
|
||||
await watcher.start(storage_config["watch_path"], storage_config)
|
||||
except Exception as exc:
|
||||
logger.warning("[doc-service] File watcher could not start: %s", exc)
|
||||
|
||||
yield
|
||||
|
||||
if watcher is not None:
|
||||
await watcher.stop()
|
||||
|
||||
|
||||
app = FastAPI(title=settings.PROJECT_NAME, lifespan=lifespan)
|
||||
|
||||
# No CORS — this service is only reachable from the main backend on backend-net.
|
||||
# All browser traffic goes through the main backend proxy.
|
||||
|
||||
app.include_router(documents.router, prefix="/documents", tags=["documents"])
|
||||
app.include_router(categories.router, prefix="/categories", tags=["categories"])
|
||||
app.include_router(plugin_router.router, prefix="/plugin", tags=["plugin"])
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
|
||||
@@ -27,6 +27,12 @@ class Document(Base):
|
||||
)
|
||||
processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Watch-directory ingestion fields (migration 0003)
|
||||
source: Mapped[str] = mapped_column(String(16), nullable=False, default="upload")
|
||||
watch_path: Mapped[str | None] = mapped_column(String, nullable=True)
|
||||
suggested_folder: Mapped[str | None] = mapped_column(String(128), nullable=True)
|
||||
suggested_filename: Mapped[str | None] = mapped_column(String(500), nullable=True)
|
||||
|
||||
category_assignments: Mapped[list["CategoryAssignment"]] = relationship(
|
||||
"CategoryAssignment", back_populates="document", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
@@ -5,6 +5,8 @@ from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from sqlalchemy import or_
|
||||
|
||||
from app.database import AsyncSessionLocal, get_db
|
||||
from app.deps import get_user_id
|
||||
from app.models.category import DocumentCategory
|
||||
@@ -15,6 +17,9 @@ from app.services.ai_client import classify_document
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Sentinel user_id for watch-ingested categories — must match documents.py
|
||||
_WATCH_USER_ID = "watch"
|
||||
|
||||
_SIMILARITY_THRESHOLD = 0.4
|
||||
|
||||
|
||||
@@ -81,9 +86,10 @@ async def list_categories(
|
||||
user_id: str = Depends(get_user_id),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> list[DocumentCategory]:
|
||||
# Include watch-ingested categories so they appear in the sidebar/filter
|
||||
result = await db.execute(
|
||||
select(DocumentCategory)
|
||||
.where(DocumentCategory.user_id == user_id)
|
||||
.where(or_(DocumentCategory.user_id == user_id, DocumentCategory.user_id == _WATCH_USER_ID))
|
||||
.order_by(DocumentCategory.name)
|
||||
)
|
||||
return result.scalars().all()
|
||||
|
||||
@@ -26,13 +26,21 @@ router = APIRouter()
|
||||
|
||||
_DEFAULT_MAX_BYTES = 20 * 1024 * 1024
|
||||
|
||||
# Sentinel user_id used for watch-directory-ingested documents.
|
||||
# These documents are visible to all authenticated users.
|
||||
_WATCH_USER_ID = "watch"
|
||||
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
async def _get_user_doc(doc_id: str, user_id: str, db: AsyncSession) -> Document:
|
||||
"""Fetch a document owned by user_id OR a watch-ingested document (visible to all)."""
|
||||
result = await db.execute(
|
||||
select(Document)
|
||||
.where(Document.id == doc_id, Document.user_id == user_id)
|
||||
.where(
|
||||
Document.id == doc_id,
|
||||
or_(Document.user_id == user_id, Document.user_id == _WATCH_USER_ID),
|
||||
)
|
||||
.options(
|
||||
selectinload(Document.category_assignments)
|
||||
.selectinload(CategoryAssignment.category)
|
||||
@@ -61,6 +69,10 @@ def _doc_with_categories(doc: Document) -> DocumentOut:
|
||||
created_at=doc.created_at,
|
||||
processed_at=doc.processed_at,
|
||||
categories=cats,
|
||||
source=doc.source,
|
||||
watch_path=doc.watch_path,
|
||||
suggested_folder=doc.suggested_folder,
|
||||
suggested_filename=doc.suggested_filename,
|
||||
)
|
||||
|
||||
|
||||
@@ -183,7 +195,8 @@ async def list_documents(
|
||||
sort_expr = sort_col.desc() if order == "desc" else sort_col.asc()
|
||||
|
||||
# Build filter conditions once and reuse for both count + items queries.
|
||||
conditions = [Document.user_id == user_id]
|
||||
# Watch-ingested documents (user_id = "watch") are visible to all users.
|
||||
conditions = [or_(Document.user_id == user_id, Document.user_id == _WATCH_USER_ID)]
|
||||
if status:
|
||||
conditions.append(Document.status == status)
|
||||
if document_type:
|
||||
@@ -247,7 +260,10 @@ async def get_document_status(
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> Document:
|
||||
result = await db.execute(
|
||||
select(Document).where(Document.id == doc_id, Document.user_id == user_id)
|
||||
select(Document).where(
|
||||
Document.id == doc_id,
|
||||
or_(Document.user_id == user_id, Document.user_id == _WATCH_USER_ID),
|
||||
)
|
||||
)
|
||||
doc = result.scalar_one_or_none()
|
||||
if doc is None:
|
||||
@@ -347,7 +363,10 @@ async def download_file(
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> StreamingResponse:
|
||||
result = await db.execute(
|
||||
select(Document).where(Document.id == doc_id, Document.user_id == user_id)
|
||||
select(Document).where(
|
||||
Document.id == doc_id,
|
||||
or_(Document.user_id == user_id, Document.user_id == _WATCH_USER_ID),
|
||||
)
|
||||
)
|
||||
doc = result.scalar_one_or_none()
|
||||
if doc is None:
|
||||
@@ -374,9 +393,12 @@ async def assign_category(
|
||||
user_id: str = Depends(get_user_id),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> None:
|
||||
# Verify both belong to this user
|
||||
# Verify the document is accessible (own or watch-ingested)
|
||||
doc_result = await db.execute(
|
||||
select(Document).where(Document.id == doc_id, Document.user_id == user_id)
|
||||
select(Document).where(
|
||||
Document.id == doc_id,
|
||||
or_(Document.user_id == user_id, Document.user_id == _WATCH_USER_ID),
|
||||
)
|
||||
)
|
||||
if doc_result.scalar_one_or_none() is None:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
@@ -418,3 +440,81 @@ async def remove_category(
|
||||
if assignment:
|
||||
await db.delete(assignment)
|
||||
await db.commit()
|
||||
|
||||
|
||||
# ── AI suggestion confirmation ────────────────────────────────────────────────
|
||||
# These endpoints allow users to confirm or reject AI suggestions on
|
||||
# watch-ingested documents. No disk mutations — suggestions only update the DB.
|
||||
|
||||
@router.post("/{doc_id}/suggestions/folder/confirm", status_code=204)
|
||||
async def confirm_folder_suggestion(
|
||||
doc_id: str,
|
||||
user_id: str = Depends(get_user_id),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> None:
|
||||
doc = await _get_user_doc(doc_id, user_id, db)
|
||||
if not doc.suggested_folder:
|
||||
raise HTTPException(status_code=400, detail="No folder suggestion pending")
|
||||
|
||||
# Find or create the suggested category under the watch sentinel user
|
||||
cat_result = await db.execute(
|
||||
select(DocumentCategory).where(
|
||||
DocumentCategory.user_id == _WATCH_USER_ID,
|
||||
DocumentCategory.name == doc.suggested_folder,
|
||||
)
|
||||
)
|
||||
cat = cat_result.scalar_one_or_none()
|
||||
if cat is None:
|
||||
cat = DocumentCategory(user_id=_WATCH_USER_ID, name=doc.suggested_folder[:128])
|
||||
db.add(cat)
|
||||
await db.commit()
|
||||
await db.refresh(cat)
|
||||
|
||||
# Assign if not already assigned
|
||||
exists = await db.execute(
|
||||
select(CategoryAssignment).where(
|
||||
CategoryAssignment.document_id == doc_id,
|
||||
CategoryAssignment.category_id == cat.id,
|
||||
)
|
||||
)
|
||||
if exists.scalar_one_or_none() is None:
|
||||
db.add(CategoryAssignment(document_id=doc_id, category_id=cat.id))
|
||||
|
||||
doc.suggested_folder = None
|
||||
await db.commit()
|
||||
|
||||
|
||||
@router.post("/{doc_id}/suggestions/folder/reject", status_code=204)
|
||||
async def reject_folder_suggestion(
|
||||
doc_id: str,
|
||||
user_id: str = Depends(get_user_id),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> None:
|
||||
doc = await _get_user_doc(doc_id, user_id, db)
|
||||
doc.suggested_folder = None
|
||||
await db.commit()
|
||||
|
||||
|
||||
@router.post("/{doc_id}/suggestions/filename/confirm", status_code=204)
|
||||
async def confirm_filename_suggestion(
|
||||
doc_id: str,
|
||||
user_id: str = Depends(get_user_id),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> None:
|
||||
doc = await _get_user_doc(doc_id, user_id, db)
|
||||
if not doc.suggested_filename:
|
||||
raise HTTPException(status_code=400, detail="No filename suggestion pending")
|
||||
doc.title = doc.suggested_filename
|
||||
doc.suggested_filename = None
|
||||
await db.commit()
|
||||
|
||||
|
||||
@router.post("/{doc_id}/suggestions/filename/reject", status_code=204)
|
||||
async def reject_filename_suggestion(
|
||||
doc_id: str,
|
||||
user_id: str = Depends(get_user_id),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> None:
|
||||
doc = await _get_user_doc(doc_id, user_id, db)
|
||||
doc.suggested_filename = None
|
||||
await db.commit()
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
"""
|
||||
Plugin manifest and settings endpoints for doc-service.
|
||||
|
||||
These are internal-only — they are called by the main backend's generic plugin
|
||||
proxy, never directly by the browser. No authentication is applied here because
|
||||
the backend enforces access control before forwarding the request.
|
||||
|
||||
Endpoints:
|
||||
GET /plugin/manifest → static manifest with JSON Schema for settings
|
||||
GET /plugin/settings → current storage config values
|
||||
PATCH /plugin/settings → update storage config (partial update)
|
||||
"""
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.services.config_reader import get_storage_config, save_storage_config
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
_MANIFEST: dict = {
|
||||
"id": "doc-service",
|
||||
"name": "Document Service",
|
||||
"icon": "file-text",
|
||||
"version": "1.0",
|
||||
"access": {
|
||||
"allow_superuser": True,
|
||||
"required_groups": ["doc-service-admin"],
|
||||
},
|
||||
"settings_schema": {
|
||||
"type": "object",
|
||||
"title": "Storage & Watch",
|
||||
"properties": {
|
||||
"watch_enabled": {
|
||||
"type": "boolean",
|
||||
"title": "Enable file watching",
|
||||
"description": (
|
||||
"Automatically ingest PDF files added to the mounted watch directory. "
|
||||
"Requires a service restart to take effect after toggling."
|
||||
),
|
||||
},
|
||||
"watch_path": {
|
||||
"type": "string",
|
||||
"title": "Watch path",
|
||||
"readOnly": True,
|
||||
"description": "Configured via Docker volume mount — edit docker-compose to change.",
|
||||
},
|
||||
"ai_folder_suggestion": {
|
||||
"type": "boolean",
|
||||
"title": "AI folder suggestion",
|
||||
"description": (
|
||||
"AI suggests a category for each ingested document. "
|
||||
"You must confirm the suggestion before it is applied."
|
||||
),
|
||||
},
|
||||
"ai_folder_default": {
|
||||
"type": "string",
|
||||
"title": "Default import category",
|
||||
"description": "Category assigned automatically when AI folder suggestion is disabled.",
|
||||
},
|
||||
"ai_rename_suggestion": {
|
||||
"type": "boolean",
|
||||
"title": "AI rename suggestion",
|
||||
"description": (
|
||||
"AI suggests a document title for each ingested file. "
|
||||
"You must confirm before it is applied."
|
||||
),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class StorageSettingsUpdate(BaseModel):
|
||||
watch_enabled: bool | None = None
|
||||
ai_folder_suggestion: bool | None = None
|
||||
ai_folder_default: str | None = None
|
||||
ai_rename_suggestion: bool | None = None
|
||||
# watch_path is intentionally excluded — it cannot be changed via API
|
||||
|
||||
|
||||
@router.get("/manifest")
|
||||
async def get_manifest() -> dict:
|
||||
return _MANIFEST
|
||||
|
||||
|
||||
@router.get("/settings")
|
||||
async def get_settings() -> dict:
|
||||
return await get_storage_config()
|
||||
|
||||
|
||||
@router.patch("/settings")
|
||||
async def update_settings(body: StorageSettingsUpdate) -> dict:
|
||||
update = body.model_dump(exclude_none=True)
|
||||
if "ai_folder_default" in update:
|
||||
update["ai_folder_default"] = update["ai_folder_default"][:128].strip() or "imports"
|
||||
await save_storage_config(update)
|
||||
return await get_storage_config()
|
||||
@@ -23,6 +23,10 @@ class DocumentOut(BaseModel):
|
||||
created_at: datetime
|
||||
processed_at: datetime | None
|
||||
categories: list[CategoryOut] = []
|
||||
source: str = "upload"
|
||||
watch_path: str | None = None
|
||||
suggested_folder: str | None = None
|
||||
suggested_filename: str | None = None
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
@@ -14,6 +14,14 @@ from pathlib import Path
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
_DEFAULT_STORAGE_CONFIG: dict = {
|
||||
"watch_enabled": False,
|
||||
"watch_path": "/data/watch",
|
||||
"ai_folder_suggestion": False,
|
||||
"ai_folder_default": "imports",
|
||||
"ai_rename_suggestion": False,
|
||||
}
|
||||
|
||||
_DEFAULT_SYSTEM_PROMPT = (
|
||||
"You are a financial document analysis assistant. "
|
||||
"Given the text extracted from a PDF document, return ONLY a JSON object "
|
||||
@@ -43,6 +51,7 @@ _DEFAULT_USER_TEMPLATE = (
|
||||
|
||||
_DEFAULT_CONFIG: dict = {
|
||||
"documents": {"max_pdf_bytes": 20 * 1024 * 1024},
|
||||
"storage": _DEFAULT_STORAGE_CONFIG,
|
||||
"system_prompts": {
|
||||
"system": _DEFAULT_SYSTEM_PROMPT,
|
||||
"user_template": _DEFAULT_USER_TEMPLATE,
|
||||
@@ -64,6 +73,25 @@ def _read_config_sync() -> dict:
|
||||
return _apply_env_overrides(base)
|
||||
|
||||
|
||||
def _read_config_sync_raw() -> dict:
|
||||
"""Read without env overrides — used when we need to write back to disk."""
|
||||
path = Path(settings.CONFIG_PATH)
|
||||
if not path.exists():
|
||||
return deepcopy(_DEFAULT_CONFIG)
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def _write_config_sync(config: dict) -> None:
|
||||
"""Atomically write config JSON to disk."""
|
||||
path = Path(settings.CONFIG_PATH)
|
||||
tmp = path.with_suffix(".tmp")
|
||||
tmp.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(tmp, "w") as f:
|
||||
json.dump(config, f, indent=2)
|
||||
os.replace(tmp, path)
|
||||
|
||||
|
||||
def _apply_env_overrides(config: dict) -> dict:
|
||||
cfg = deepcopy(config)
|
||||
docs = cfg.setdefault("documents", {})
|
||||
@@ -84,3 +112,22 @@ async def load_doc_config() -> dict:
|
||||
_cache = data
|
||||
_cache_at = now
|
||||
return data
|
||||
|
||||
|
||||
async def get_storage_config() -> dict:
|
||||
"""Return storage config block, filling in defaults for any missing keys."""
|
||||
config = await load_doc_config()
|
||||
result = deepcopy(_DEFAULT_STORAGE_CONFIG)
|
||||
result.update(config.get("storage", {}))
|
||||
return result
|
||||
|
||||
|
||||
async def save_storage_config(data: dict) -> None:
|
||||
"""Merge data into the storage config block and persist to disk."""
|
||||
global _cache, _cache_at
|
||||
raw = await asyncio.to_thread(_read_config_sync_raw)
|
||||
raw.setdefault("storage", {}).update(data)
|
||||
await asyncio.to_thread(_write_config_sync, raw)
|
||||
# Invalidate cache so next read picks up the new values
|
||||
_cache = None
|
||||
_cache_at = 0.0
|
||||
|
||||
@@ -0,0 +1,256 @@
|
||||
"""
|
||||
File-system watcher for the watch directory.
|
||||
|
||||
Uses the watchdog library to monitor a configured directory for new PDF files.
|
||||
When a PDF is detected, it is automatically ingested into the document service
|
||||
(copied to /data/documents, a DB record is created, and the AI pipeline runs).
|
||||
|
||||
Key design decisions:
|
||||
- No-remove policy: on_deleted and on_moved events are intentionally ignored.
|
||||
The watcher never deletes, moves, or modifies files on the watched volume.
|
||||
- Watch documents use user_id="watch" as a sentinel so they are visible to
|
||||
all authenticated users in the document list.
|
||||
- Subfolder names map to categories: a file at invoices/bill.pdf is assigned
|
||||
to a "invoices" category (auto-created if needed).
|
||||
- Suggestions: if ai_folder_suggestion or ai_rename_suggestion are enabled,
|
||||
the relevant fields are set on the document after AI processing so users
|
||||
can confirm/reject from the UI.
|
||||
- Thread → async bridge: watchdog runs in a daemon thread; asyncio coroutines
|
||||
are dispatched from that thread via run_coroutine_threadsafe.
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
from watchdog.observers import Observer
|
||||
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.category import DocumentCategory
|
||||
from app.models.category_assignment import CategoryAssignment
|
||||
from app.models.document import Document
|
||||
from app.services.storage import save_upload
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Must match _WATCH_USER_ID in app/routers/documents.py
|
||||
WATCH_USER_ID = "watch"
|
||||
|
||||
|
||||
# ── Ingestion logic ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def ingest_file(path_str: str, watch_root: Path, config: dict) -> None:
|
||||
"""
|
||||
Ingest a single PDF file from the watch directory.
|
||||
|
||||
Idempotent: skips files that already have a non-failed document record.
|
||||
"""
|
||||
from sqlalchemy import select
|
||||
|
||||
path = Path(path_str)
|
||||
if not path.exists() or not path.is_file():
|
||||
return
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
# Idempotency check — skip if already tracked (and not failed)
|
||||
existing_result = await db.execute(
|
||||
select(Document).where(Document.watch_path == path_str)
|
||||
)
|
||||
existing = existing_result.scalar_one_or_none()
|
||||
if existing is not None and existing.status != "failed":
|
||||
return
|
||||
|
||||
# Determine category from the first subfolder component
|
||||
try:
|
||||
rel = path.relative_to(watch_root)
|
||||
folder_name = rel.parts[0] if len(rel.parts) > 1 else None
|
||||
except ValueError:
|
||||
folder_name = None
|
||||
|
||||
# Read file bytes
|
||||
try:
|
||||
file_data = path.read_bytes()
|
||||
except OSError as exc:
|
||||
logger.warning("[watcher] Cannot read %s: %s", path_str, exc)
|
||||
return
|
||||
|
||||
# Save a copy to /data/documents/watch/{doc_id}.pdf
|
||||
doc_id = existing.id if existing is not None else str(uuid.uuid4())
|
||||
dest = await save_upload(file_data, WATCH_USER_ID, doc_id)
|
||||
|
||||
if existing is not None:
|
||||
# Re-ingest a previously failed document
|
||||
existing.file_path = str(dest)
|
||||
existing.file_size = len(file_data)
|
||||
existing.status = "pending"
|
||||
existing.error_message = None
|
||||
await db.commit()
|
||||
else:
|
||||
doc = Document(
|
||||
id=doc_id,
|
||||
user_id=WATCH_USER_ID,
|
||||
source="watch",
|
||||
watch_path=path_str,
|
||||
filename=path.name,
|
||||
file_path=str(dest),
|
||||
file_size=len(file_data),
|
||||
status="pending",
|
||||
)
|
||||
db.add(doc)
|
||||
await db.commit()
|
||||
|
||||
# Auto-assign category from subfolder name
|
||||
if folder_name:
|
||||
cat_result = await db.execute(
|
||||
select(DocumentCategory).where(
|
||||
DocumentCategory.user_id == WATCH_USER_ID,
|
||||
DocumentCategory.name == folder_name,
|
||||
)
|
||||
)
|
||||
cat = cat_result.scalar_one_or_none()
|
||||
if cat is None:
|
||||
cat = DocumentCategory(user_id=WATCH_USER_ID, name=folder_name[:128])
|
||||
db.add(cat)
|
||||
await db.commit()
|
||||
await db.refresh(cat)
|
||||
|
||||
exists_assign = await db.execute(
|
||||
select(CategoryAssignment).where(
|
||||
CategoryAssignment.document_id == doc_id,
|
||||
CategoryAssignment.category_id == cat.id,
|
||||
)
|
||||
)
|
||||
if exists_assign.scalar_one_or_none() is None:
|
||||
db.add(CategoryAssignment(document_id=doc_id, category_id=cat.id))
|
||||
await db.commit()
|
||||
|
||||
# Run AI pipeline (opens its own session internally)
|
||||
from app.routers.documents import process_document
|
||||
await process_document(doc_id)
|
||||
|
||||
# Set AI suggestions if enabled
|
||||
if config.get("ai_folder_suggestion") or config.get("ai_rename_suggestion"):
|
||||
await _apply_suggestions(doc_id, config)
|
||||
|
||||
|
||||
async def _apply_suggestions(doc_id: str, config: dict) -> None:
|
||||
"""Populate suggested_folder / suggested_filename after AI processing."""
|
||||
from sqlalchemy import select
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
result = await db.execute(select(Document).where(Document.id == doc_id))
|
||||
doc = result.scalar_one_or_none()
|
||||
if doc is None or doc.status != "done" or not doc.extracted_data:
|
||||
return
|
||||
|
||||
try:
|
||||
extracted = json.loads(doc.extracted_data)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
changed = False
|
||||
if config.get("ai_folder_suggestion"):
|
||||
suggestions = extracted.get("suggested_categories", [])
|
||||
if suggestions:
|
||||
doc.suggested_folder = str(suggestions[0])[:128]
|
||||
changed = True
|
||||
|
||||
if config.get("ai_rename_suggestion"):
|
||||
title = extracted.get("title")
|
||||
if title:
|
||||
doc.suggested_filename = str(title)[:500]
|
||||
changed = True
|
||||
|
||||
if changed:
|
||||
await db.commit()
|
||||
|
||||
|
||||
# ── Watchdog event handler ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class _PdfEventHandler(FileSystemEventHandler):
|
||||
def __init__(
|
||||
self,
|
||||
watch_root: Path,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
config: dict,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self._watch_root = watch_root
|
||||
self._loop = loop
|
||||
self._config = config
|
||||
|
||||
def _dispatch_ingest(self, path_str: str) -> None:
|
||||
if path_str.lower().endswith(".pdf"):
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
ingest_file(path_str, self._watch_root, self._config),
|
||||
self._loop,
|
||||
)
|
||||
|
||||
def on_created(self, event): # type: ignore[override]
|
||||
if not event.is_directory:
|
||||
self._dispatch_ingest(event.src_path)
|
||||
|
||||
def on_moved(self, event): # type: ignore[override]
|
||||
# Handles atomic rename/move (e.g. Nextcloud or Syncthing completing a sync)
|
||||
if not event.is_directory:
|
||||
self._dispatch_ingest(event.dest_path)
|
||||
|
||||
# on_deleted / on_modified: intentionally not overridden — no-remove policy
|
||||
|
||||
|
||||
# ── Service ───────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class FileWatcherService:
|
||||
"""Manages the watchdog Observer lifecycle within the FastAPI lifespan."""
|
||||
|
||||
def __init__(self, loop: asyncio.AbstractEventLoop) -> None:
|
||||
self._loop = loop
|
||||
self._observer: Observer | None = None
|
||||
self._watch_root: Path | None = None
|
||||
self._config: dict = {}
|
||||
|
||||
async def start(self, watch_path: str, config: dict) -> None:
|
||||
self._watch_root = Path(watch_path)
|
||||
self._config = config
|
||||
|
||||
if not self._watch_root.exists():
|
||||
logger.warning(
|
||||
"[watcher] Watch path %s does not exist — file watching disabled",
|
||||
watch_path,
|
||||
)
|
||||
return
|
||||
|
||||
handler = _PdfEventHandler(self._watch_root, self._loop, config)
|
||||
self._observer = Observer()
|
||||
self._observer.schedule(handler, watch_path, recursive=True)
|
||||
self._observer.start()
|
||||
logger.info("[watcher] started, watching %s", watch_path)
|
||||
|
||||
# Run startup scan as a background task so startup is not blocked
|
||||
asyncio.create_task(self._scan_existing())
|
||||
|
||||
async def _scan_existing(self) -> None:
|
||||
"""Ingest any PDFs already present in the watch directory."""
|
||||
if self._watch_root is None:
|
||||
return
|
||||
logger.info("[watcher] scanning existing files in %s", self._watch_root)
|
||||
count = 0
|
||||
for pdf_path in sorted(self._watch_root.rglob("*.pdf")):
|
||||
try:
|
||||
await ingest_file(str(pdf_path), self._watch_root, self._config)
|
||||
count += 1
|
||||
except Exception as exc:
|
||||
logger.warning("[watcher] scan error for %s: %s", pdf_path, exc)
|
||||
logger.info("[watcher] startup scan complete — processed %d file(s)", count)
|
||||
|
||||
async def stop(self) -> None:
|
||||
if self._observer is not None:
|
||||
self._observer.stop()
|
||||
await asyncio.to_thread(self._observer.join)
|
||||
self._observer = None
|
||||
logger.info("[watcher] stopped")
|
||||
@@ -17,6 +17,7 @@ dependencies = [
|
||||
"pdfplumber>=0.11",
|
||||
"aiofiles>=23.0",
|
||||
"python-multipart>=0.0.9",
|
||||
"watchdog>=4.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
+20
-2
@@ -23,6 +23,7 @@ All API calls go through `src/api/client.ts` (single Axios instance, JWT injecte
|
||||
| `/admin/groups` | `AdminGroupsPage` | Admin only |
|
||||
| `/profile` | `ProfilePage` | Required |
|
||||
| `/settings` | `SettingsPage` (placeholder) | Required |
|
||||
| `/settings/plugins/:id` | `PluginSettingsPage` | Required (per-plugin access control) |
|
||||
|
||||
`PrivateRoute` redirects to `/login` when no token. `AdminRoute` redirects to `/` when not admin.
|
||||
|
||||
@@ -60,6 +61,12 @@ Cards are rendered dynamically from `GET /api/services` (polled every 30 s via T
|
||||
- Sections auto-open when navigating to their route
|
||||
- In collapsed (icons-only) mode, clicking the Apps icon navigates to `/apps`
|
||||
|
||||
**Extensions** section (dynamic):
|
||||
- Populated from `GET /api/plugins` (polled via TanStack Query, `retry: false`)
|
||||
- Only shown when the user has access to at least one plugin
|
||||
- Each entry links to `/settings/plugins/:id`
|
||||
- No code changes needed to add future plugin-enabled feature containers
|
||||
|
||||
### Documents page (`/apps/documents`)
|
||||
|
||||
**Upload:** PDF file input, 202 response, error display.
|
||||
@@ -140,6 +147,10 @@ Key functions:
|
||||
| `removeCategory(docId, catId)` | Remove |
|
||||
| `updateDocumentTags(id, tags)` | `PATCH /documents/{id}/tags` |
|
||||
| `updateDocumentTitle(id, title)` | `PATCH /documents/{id}/title` |
|
||||
| `confirmFolderSuggestion(docId)` | `POST /documents/{id}/suggestions/folder/confirm` |
|
||||
| `rejectFolderSuggestion(docId)` | `POST /documents/{id}/suggestions/folder/reject` |
|
||||
| `confirmFilenameSuggestion(docId)` | `POST /documents/{id}/suggestions/filename/confirm` |
|
||||
| `rejectFilenameSuggestion(docId)` | `POST /documents/{id}/suggestions/filename/reject` |
|
||||
| `getAISettings()` | `GET /settings/ai` (masked) |
|
||||
| `updateAISettings(data)` | `PATCH /settings/ai` |
|
||||
| `testAIConnection()` | `POST /settings/ai/test` |
|
||||
@@ -152,6 +163,10 @@ Key functions:
|
||||
| `adminAddGroupMember(gId, uId)` | `POST /admin/groups/{gId}/members/{uId}` |
|
||||
| `adminRemoveGroupMember(gId, uId)` | `DELETE /admin/groups/{gId}/members/{uId}` |
|
||||
| `updateDocumentLimits(data)` | `PATCH /settings/documents/limits` |
|
||||
| `getPlugins()` | `GET /plugins` — list accessible plugins |
|
||||
| `getPluginManifest(id)` | `GET /plugins/{id}/manifest` |
|
||||
| `getPluginSettings(id)` | `GET /plugins/{id}/settings` |
|
||||
| `updatePluginSettings(id, data)` | `PATCH /plugins/{id}/settings` |
|
||||
|
||||
---
|
||||
|
||||
@@ -168,8 +183,10 @@ Key functions:
|
||||
| Component | Path | Description |
|
||||
|-----------|------|-------------|
|
||||
| `AppShell` | `src/components/AppShell.tsx` | Layout wrapper: Sidebar + scrollable main content |
|
||||
| `Sidebar` | `src/components/Sidebar.tsx` | Collapsible left nav (icons-only ↔ icons+labels) |
|
||||
| `Sidebar` | `src/components/Sidebar.tsx` | Collapsible left nav; includes dynamic "Extensions" section |
|
||||
| `ThemeToggle` | `src/components/ThemeToggle.tsx` | Sun/moon ghost icon button; persists to localStorage |
|
||||
| `PluginSchemaForm` | `src/components/PluginSchemaForm.tsx` | JSON Schema → React form (boolean/string/number/readOnly fields) |
|
||||
| `PluginSettingsPage` | `src/pages/PluginSettingsPage.tsx` | Generic plugin settings page (manifest-driven) |
|
||||
| `Button` | `src/components/ui/button.tsx` | shadcn/ui Button (default, ghost, outline, destructive) |
|
||||
| `Input` | `src/components/ui/input.tsx` | shadcn/ui Input |
|
||||
|
||||
@@ -188,10 +205,11 @@ Key functions:
|
||||
- [x] UI component library: shadcn/ui + Tailwind CSS — installed and wired up
|
||||
- [x] AppShell + Sidebar replacing inline Nav component
|
||||
- [x] Light/dark theme context with OS preference detection
|
||||
- [x] Generic plugin infrastructure: Extensions sidebar section, PluginSchemaForm, PluginSettingsPage
|
||||
- [ ] Suggestion badges in DocumentsPage for `suggested_folder` / `suggested_filename` (confirm/reject buttons)
|
||||
- [ ] Toast notification system (upload success, save feedback, errors)
|
||||
- [ ] Loading skeletons
|
||||
- [ ] `POST /queue/jobs` integration — show AI processing queue status / progress per document
|
||||
- [ ] Re-process document button (`POST /documents/{id}/reprocess` — needs backend endpoint first)
|
||||
- [ ] Advanced filter: extracted data fields (vendor, due date, amount) — needs backend support
|
||||
- [x] Groups admin UI — list, create, edit, delete, add/remove members
|
||||
- [ ] App permissions UI per group (blocked on backend group_app_permissions)
|
||||
|
||||
@@ -15,6 +15,7 @@ import DocumentsPage from "./pages/DocumentsPage";
|
||||
import DocumentAdminSettingsPage from "./pages/DocumentAdminSettingsPage";
|
||||
import AIAdminSettingsPage from "./pages/AIAdminSettingsPage";
|
||||
import SettingsPage from "./pages/SettingsPage";
|
||||
import PluginSettingsPage from "./pages/PluginSettingsPage";
|
||||
|
||||
function PrivateRoute({ children }: { children: React.ReactNode }) {
|
||||
const { token } = useAuth();
|
||||
@@ -55,6 +56,7 @@ export default function App() {
|
||||
/>
|
||||
<Route path="/profile" element={<PrivateRoute><ProfilePage /></PrivateRoute>} />
|
||||
<Route path="/settings" element={<PrivateRoute><SettingsPage /></PrivateRoute>} />
|
||||
<Route path="/settings/plugins/:id" element={<PrivateRoute><PluginSettingsPage /></PrivateRoute>} />
|
||||
<Route path="/admin" element={<AdminRoute><AdminPage /></AdminRoute>} />
|
||||
<Route path="/admin/users" element={<AdminRoute><AdminUsersPage /></AdminRoute>} />
|
||||
<Route path="/admin/groups" element={<AdminRoute><AdminGroupsPage /></AdminRoute>} />
|
||||
|
||||
@@ -107,6 +107,10 @@ export interface DocumentOut {
|
||||
created_at: string;
|
||||
processed_at: string | null;
|
||||
categories: CategoryOut[];
|
||||
source: string;
|
||||
watch_path: string | null;
|
||||
suggested_folder: string | null;
|
||||
suggested_filename: string | null;
|
||||
}
|
||||
|
||||
export interface DocumentPage {
|
||||
@@ -371,3 +375,59 @@ export const updateSystemPrompt = (
|
||||
api
|
||||
.patch<SystemPromptsData>(`/settings/system-prompts/${serviceId}`, data)
|
||||
.then((r) => r.data);
|
||||
|
||||
// --- Document suggestions (watch-ingested documents) ---
|
||||
export const confirmFolderSuggestion = (docId: string) =>
|
||||
api.post(`/documents/${docId}/suggestions/folder/confirm`);
|
||||
|
||||
export const rejectFolderSuggestion = (docId: string) =>
|
||||
api.post(`/documents/${docId}/suggestions/folder/reject`);
|
||||
|
||||
export const confirmFilenameSuggestion = (docId: string) =>
|
||||
api.post(`/documents/${docId}/suggestions/filename/confirm`);
|
||||
|
||||
export const rejectFilenameSuggestion = (docId: string) =>
|
||||
api.post(`/documents/${docId}/suggestions/filename/reject`);
|
||||
|
||||
// --- Plugins ---
|
||||
export interface PluginOut {
|
||||
id: string;
|
||||
name: string;
|
||||
icon: string;
|
||||
version: string;
|
||||
}
|
||||
|
||||
export interface PluginSchemaProperty {
|
||||
type: string;
|
||||
title: string;
|
||||
description?: string;
|
||||
readOnly?: boolean;
|
||||
}
|
||||
|
||||
export interface PluginManifest {
|
||||
id: string;
|
||||
name: string;
|
||||
icon: string;
|
||||
version: string;
|
||||
access: {
|
||||
allow_superuser: boolean;
|
||||
required_groups: string[];
|
||||
};
|
||||
settings_schema: {
|
||||
type: string;
|
||||
title?: string;
|
||||
properties: Record<string, PluginSchemaProperty>;
|
||||
};
|
||||
}
|
||||
|
||||
export const getPlugins = () =>
|
||||
api.get<PluginOut[]>("/plugins").then((r) => r.data);
|
||||
|
||||
export const getPluginManifest = (id: string) =>
|
||||
api.get<PluginManifest>(`/plugins/${id}/manifest`).then((r) => r.data);
|
||||
|
||||
export const getPluginSettings = (id: string) =>
|
||||
api.get<Record<string, unknown>>(`/plugins/${id}/settings`).then((r) => r.data);
|
||||
|
||||
export const updatePluginSettings = (id: string, data: Record<string, unknown>) =>
|
||||
api.patch<Record<string, unknown>>(`/plugins/${id}/settings`, data).then((r) => r.data);
|
||||
|
||||
@@ -0,0 +1,119 @@
|
||||
import { useState, useEffect } from "react";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { cn } from "@/lib/utils";
|
||||
import type { PluginSchemaProperty } from "@/api/client";
|
||||
|
||||
interface PluginSchema {
|
||||
type: string;
|
||||
title?: string;
|
||||
properties: Record<string, PluginSchemaProperty>;
|
||||
}
|
||||
|
||||
interface PluginSchemaFormProps {
|
||||
schema: PluginSchema;
|
||||
values: Record<string, unknown>;
|
||||
onSave: (values: Record<string, unknown>) => void;
|
||||
isPending?: boolean;
|
||||
isError?: boolean;
|
||||
isSuccess?: boolean;
|
||||
}
|
||||
|
||||
function Toggle({ checked, onChange }: { checked: boolean; onChange: (v: boolean) => void }) {
|
||||
return (
|
||||
<button
|
||||
type="button"
|
||||
role="switch"
|
||||
aria-checked={checked}
|
||||
onClick={() => onChange(!checked)}
|
||||
className={cn(
|
||||
"relative inline-flex h-6 w-11 shrink-0 items-center rounded-full transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-primary",
|
||||
checked ? "bg-primary" : "bg-muted/60 border border-border"
|
||||
)}
|
||||
>
|
||||
<span
|
||||
className={cn(
|
||||
"inline-block h-4 w-4 rounded-full bg-white shadow-sm transition-transform",
|
||||
checked ? "translate-x-6" : "translate-x-1"
|
||||
)}
|
||||
/>
|
||||
</button>
|
||||
);
|
||||
}
|
||||
|
||||
export default function PluginSchemaForm({
|
||||
schema,
|
||||
values,
|
||||
onSave,
|
||||
isPending,
|
||||
isError,
|
||||
isSuccess,
|
||||
}: PluginSchemaFormProps) {
|
||||
const [form, setForm] = useState<Record<string, unknown>>(values);
|
||||
|
||||
useEffect(() => {
|
||||
setForm(values);
|
||||
}, [values]);
|
||||
|
||||
const setField = (key: string, value: unknown) => {
|
||||
setForm((prev) => ({ ...prev, [key]: value }));
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{Object.entries(schema.properties).map(([key, prop]) => (
|
||||
<div key={key} className="space-y-1.5">
|
||||
<div className="flex items-center justify-between gap-4">
|
||||
<div className="min-w-0">
|
||||
<p className="text-sm font-medium text-foreground">{prop.title}</p>
|
||||
{prop.description && (
|
||||
<p className="text-xs text-muted mt-0.5">{prop.description}</p>
|
||||
)}
|
||||
</div>
|
||||
{prop.type === "boolean" && !prop.readOnly && (
|
||||
<Toggle
|
||||
checked={Boolean(form[key])}
|
||||
onChange={(v) => setField(key, v)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{prop.type === "string" && prop.readOnly && (
|
||||
<p className="text-sm text-muted font-mono bg-muted/20 px-3 py-1.5 rounded-md border border-border">
|
||||
{String(form[key] ?? "")}
|
||||
</p>
|
||||
)}
|
||||
|
||||
{prop.type === "string" && !prop.readOnly && (
|
||||
<Input
|
||||
value={String(form[key] ?? "")}
|
||||
onChange={(e) => setField(key, e.target.value)}
|
||||
className="h-9"
|
||||
/>
|
||||
)}
|
||||
|
||||
{prop.type === "number" && !prop.readOnly && (
|
||||
<Input
|
||||
type="number"
|
||||
value={String(form[key] ?? "")}
|
||||
onChange={(e) => setField(key, Number(e.target.value))}
|
||||
className="h-9"
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
|
||||
<div className="flex items-center gap-3 pt-2">
|
||||
<Button onClick={() => onSave(form)} disabled={isPending} size="sm">
|
||||
{isPending ? "Saving…" : "Save changes"}
|
||||
</Button>
|
||||
{isError && (
|
||||
<span className="text-sm text-destructive">Failed to save. Please try again.</span>
|
||||
)}
|
||||
{isSuccess && !isPending && (
|
||||
<span className="text-sm text-green-600 dark:text-green-400">Saved successfully.</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -16,11 +16,12 @@ import {
|
||||
Users,
|
||||
UsersRound,
|
||||
Palette,
|
||||
Puzzle,
|
||||
} from "lucide-react";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import ThemeToggle from "@/components/ThemeToggle";
|
||||
import { useAuth } from "@/hooks/useAuth";
|
||||
import { getMe, listCategories } from "@/api/client";
|
||||
import { getMe, getPlugins, listCategories } from "@/api/client";
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
export default function Sidebar() {
|
||||
@@ -56,6 +57,14 @@ export default function Sidebar() {
|
||||
enabled: appsOpen && docsOpen && !!user,
|
||||
});
|
||||
|
||||
const { data: plugins = [] } = useQuery({
|
||||
queryKey: ["plugins"],
|
||||
queryFn: getPlugins,
|
||||
enabled: !!user,
|
||||
// Empty array on 404/error — regular users simply see no plugins
|
||||
retry: false,
|
||||
});
|
||||
|
||||
const navItemClass = (isActive: boolean) =>
|
||||
cn(
|
||||
"flex items-center rounded-lg transition-colors",
|
||||
@@ -209,6 +218,40 @@ export default function Sidebar() {
|
||||
)}
|
||||
</NavLink>
|
||||
|
||||
{/* Extensions — visible only when the user has accessible plugins */}
|
||||
{plugins.length > 0 && (
|
||||
<div>
|
||||
{sidebarExpanded ? (
|
||||
<>
|
||||
<div className="px-3 py-1.5">
|
||||
<span className="text-xs font-semibold uppercase tracking-wider text-muted">
|
||||
Extensions
|
||||
</span>
|
||||
</div>
|
||||
<div className="space-y-0.5">
|
||||
{plugins.map((plugin) => (
|
||||
<NavLink
|
||||
key={plugin.id}
|
||||
to={`/settings/plugins/${plugin.id}`}
|
||||
className={({ isActive }) => subItemClass(isActive)}
|
||||
>
|
||||
<Puzzle className="h-4 w-4 shrink-0" />
|
||||
<span className="whitespace-nowrap truncate">{plugin.name}</span>
|
||||
</NavLink>
|
||||
))}
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<NavLink
|
||||
to={`/settings/plugins/${plugins[0].id}`}
|
||||
className={({ isActive }) => navItemClass(isActive)}
|
||||
>
|
||||
<Puzzle className="h-5 w-5 shrink-0" />
|
||||
</NavLink>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Admin — expandable */}
|
||||
{user?.is_admin && (
|
||||
<div>
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
import { useParams } from "react-router-dom";
|
||||
import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query";
|
||||
import { getPluginManifest, getPluginSettings, updatePluginSettings } from "@/api/client";
|
||||
import PluginSchemaForm from "@/components/PluginSchemaForm";
|
||||
|
||||
export default function PluginSettingsPage() {
|
||||
const { id } = useParams<{ id: string }>();
|
||||
const queryClient = useQueryClient();
|
||||
|
||||
const { data: manifest, isLoading: manifestLoading, isError: manifestError } = useQuery({
|
||||
queryKey: ["plugin-manifest", id],
|
||||
queryFn: () => getPluginManifest(id!),
|
||||
enabled: !!id,
|
||||
retry: false,
|
||||
});
|
||||
|
||||
const { data: settings, isLoading: settingsLoading } = useQuery({
|
||||
queryKey: ["plugin-settings", id],
|
||||
queryFn: () => getPluginSettings(id!),
|
||||
enabled: !!id && !!manifest,
|
||||
});
|
||||
|
||||
const mutation = useMutation({
|
||||
mutationFn: (values: Record<string, unknown>) => updatePluginSettings(id!, values),
|
||||
onSuccess: () => {
|
||||
queryClient.invalidateQueries({ queryKey: ["plugin-settings", id] });
|
||||
},
|
||||
});
|
||||
|
||||
if (manifestLoading || settingsLoading) {
|
||||
return <p className="text-sm text-muted p-6">Loading…</p>;
|
||||
}
|
||||
|
||||
if (manifestError || !manifest) {
|
||||
return (
|
||||
<p className="text-sm text-destructive p-6">
|
||||
Plugin not found or you do not have access to its settings.
|
||||
</p>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="max-w-xl p-6 space-y-6">
|
||||
<div>
|
||||
<h1 className="text-xl font-semibold text-foreground">
|
||||
{manifest.settings_schema.title ?? manifest.name}
|
||||
</h1>
|
||||
<p className="text-sm text-muted mt-1">
|
||||
{manifest.name} · v{manifest.version}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<PluginSchemaForm
|
||||
schema={manifest.settings_schema}
|
||||
values={settings ?? {}}
|
||||
onSave={(values) => mutation.mutate(values)}
|
||||
isPending={mutation.isPending}
|
||||
isError={mutation.isError}
|
||||
isSuccess={mutation.isSuccess}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user