Add PDF document service with AI extraction and per-app settings

- New `features/doc-service` FastAPI microservice: PDF upload, async
  text extraction (pdfplumber), AI classification via Anthropic/Ollama/
  LM Studio, per-user categories, file download
- Alembic migration isolated with `alembic_version_doc_service` table
- Main backend: httpx proxy routers for /api/documents/* and
  /api/documents/categories/*, admin settings API at /api/settings/*
- Runtime config in /config/doc_service_config.json (shared Docker
  volume); api_key masking on reads; atomic write with os.replace()
- Frontend: DocumentsPage, DocumentAdminSettingsPage, updated AppsPage
  launcher hub, simplified Nav (removed Settings link), new routes
- docker-compose: doc-service service, doc_data + app_config volumes,
  removed internal:true from backend-net for outbound AI API calls
- Fix pre-commit hook: probe Docker socket path so git subprocess picks
  up Docker Desktop on macOS
- Fix security_check.py: use sys.executable for bandit so venv python
  is used instead of system python

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
curo1305
2026-04-14 05:28:11 +02:00
parent d423bea134
commit 0d34867a69
52 changed files with 2500 additions and 28 deletions
+85 -1
View File
@@ -1,11 +1,95 @@
import { Link } from "react-router-dom";
import { useQuery } from "@tanstack/react-query";
import Nav from "../components/Nav";
import { getMe } from "../api/client";
interface AppCard {
slug: string;
name: string;
description: string;
status: "available" | "coming_soon";
path: string;
settingsPath?: string;
}
const APPS: AppCard[] = [
{
slug: "documents",
name: "Documents",
description: "Upload PDF files, extract data, and organise them with categories.",
status: "available",
path: "/apps/documents",
settingsPath: "/apps/documents/settings/admin",
},
];
export default function AppsPage() {
const { data: user } = useQuery({ queryKey: ["me"], queryFn: getMe });
return (
<>
<Nav />
<div style={{ padding: 32 }}>
<div style={{ padding: 32, maxWidth: 900, margin: "0 auto" }}>
<h1>Apps</h1>
<div style={{ display: "flex", gap: 24, flexWrap: "wrap", marginTop: 24 }}>
{APPS.map((app) => (
<div
key={app.slug}
style={{
border: "1px solid #ddd",
borderRadius: 8,
padding: 24,
width: 280,
display: "flex",
flexDirection: "column",
gap: 12,
}}
>
<div style={{ display: "flex", justifyContent: "space-between", alignItems: "center" }}>
<h2 style={{ margin: 0, fontSize: 18 }}>{app.name}</h2>
{app.status === "available" ? (
<span style={{ fontSize: 12, color: "#2a9d8f", fontWeight: 600 }}>Available</span>
) : (
<span style={{ fontSize: 12, color: "#aaa" }}>Coming soon</span>
)}
</div>
<p style={{ margin: 0, color: "#555", fontSize: 14 }}>{app.description}</p>
<div style={{ display: "flex", gap: 8, marginTop: "auto" }}>
{app.status === "available" && (
<Link
to={app.path}
style={{
padding: "6px 14px",
background: "#222",
color: "#fff",
borderRadius: 4,
textDecoration: "none",
fontSize: 14,
}}
>
Open
</Link>
)}
{user?.is_admin && app.settingsPath && app.status === "available" && (
<Link
to={app.settingsPath}
style={{
padding: "6px 14px",
border: "1px solid #ccc",
borderRadius: 4,
textDecoration: "none",
fontSize: 14,
color: "#333",
}}
title="Settings"
>
Settings
</Link>
)}
</div>
</div>
))}
</div>
</div>
</>
);
@@ -0,0 +1,298 @@
import { useEffect, useState } from "react";
import { useQuery, useMutation } from "@tanstack/react-query";
import Nav from "../components/Nav";
import {
getDocumentSettings,
updateDocumentAISettings,
testDocumentAIConnection,
updateDocumentLimits,
} from "../api/client";
type Provider = "anthropic" | "ollama" | "lmstudio";
function Section({ title, children }: { title: string; children: React.ReactNode }) {
return (
<section style={{ marginBottom: 36 }}>
<h2 style={{ fontSize: 18, marginBottom: 16 }}>{title}</h2>
{children}
</section>
);
}
function Field({
label,
children,
}: {
label: string;
children: React.ReactNode;
}) {
return (
<div style={{ marginBottom: 12 }}>
<label style={{ display: "block", fontSize: 13, marginBottom: 4, color: "#555" }}>
{label}
</label>
{children}
</div>
);
}
const inputStyle: React.CSSProperties = {
width: "100%",
padding: "7px 10px",
fontSize: 14,
border: "1px solid #ccc",
borderRadius: 4,
boxSizing: "border-box",
};
export default function DocumentAdminSettingsPage() {
const { data: rawSettings, isLoading } = useQuery({
queryKey: ["docSettings"],
queryFn: getDocumentSettings,
});
const [provider, setProvider] = useState<Provider>("anthropic");
const [anthropicKey, setAnthropicKey] = useState("");
const [anthropicModel, setAnthropicModel] = useState("");
const [ollamaUrl, setOllamaUrl] = useState("");
const [ollamaModel, setOllamaModel] = useState("");
const [ollamaKey, setOllamaKey] = useState("");
const [lmstudioUrl, setLmstudioUrl] = useState("");
const [lmstudioModel, setLmstudioModel] = useState("");
const [lmstudioKey, setLmstudioKey] = useState("");
const [maxPdfMb, setMaxPdfMb] = useState(20);
const [testResult, setTestResult] = useState<{
ok: boolean;
response?: string;
error?: string;
} | null>(null);
// Populate form from loaded settings
useEffect(() => {
if (!rawSettings) return;
const s = rawSettings as Record<string, unknown>;
const ai = s.ai as Record<string, unknown> | undefined;
const docs = s.documents as Record<string, unknown> | undefined;
if (ai?.provider) setProvider(ai.provider as Provider);
const ant = ai?.anthropic as Record<string, string> | undefined;
if (ant?.api_key) setAnthropicKey(ant.api_key);
if (ant?.model) setAnthropicModel(ant.model);
const oll = ai?.ollama as Record<string, string> | undefined;
if (oll?.base_url) setOllamaUrl(oll.base_url);
if (oll?.model) setOllamaModel(oll.model);
if (oll?.api_key) setOllamaKey(oll.api_key);
const lms = ai?.lmstudio as Record<string, string> | undefined;
if (lms?.base_url) setLmstudioUrl(lms.base_url);
if (lms?.model) setLmstudioModel(lms.model);
if (lms?.api_key) setLmstudioKey(lms.api_key);
if (typeof docs?.max_pdf_bytes === "number") {
setMaxPdfMb(Math.round((docs.max_pdf_bytes as number) / (1024 * 1024)));
}
}, [rawSettings]);
const aiMut = useMutation({
mutationFn: updateDocumentAISettings,
});
const testMut = useMutation({
mutationFn: testDocumentAIConnection,
onSuccess: (data) => setTestResult(data),
});
const limitsMut = useMutation({
mutationFn: (mb: number) => updateDocumentLimits(mb),
});
const saveAI = () => {
aiMut.mutate({
provider,
anthropic_api_key: anthropicKey,
anthropic_model: anthropicModel,
ollama_base_url: ollamaUrl,
ollama_model: ollamaModel,
ollama_api_key: ollamaKey,
lmstudio_base_url: lmstudioUrl,
lmstudio_model: lmstudioModel,
lmstudio_api_key: lmstudioKey,
});
};
if (isLoading) {
return (
<>
<Nav />
<div style={{ padding: 32 }}>Loading</div>
</>
);
}
return (
<>
<Nav />
<div style={{ padding: 32, maxWidth: 600, margin: "0 auto" }}>
<h1 style={{ fontSize: 24, marginBottom: 32 }}>Documents Settings</h1>
<Section title="AI Provider">
<Field label="Provider">
<select
value={provider}
onChange={(e) => setProvider(e.target.value as Provider)}
style={inputStyle}
>
<option value="anthropic">Anthropic (cloud)</option>
<option value="ollama">Ollama (local)</option>
<option value="lmstudio">LM Studio (local)</option>
</select>
</Field>
{provider === "anthropic" && (
<>
<Field label="API Key">
<input
type="password"
value={anthropicKey}
onChange={(e) => setAnthropicKey(e.target.value)}
placeholder="sk-ant-… (leave blank to keep current)"
style={inputStyle}
/>
</Field>
<Field label="Model">
<input
value={anthropicModel}
onChange={(e) => setAnthropicModel(e.target.value)}
placeholder="claude-haiku-4-5-20251001"
style={inputStyle}
/>
</Field>
</>
)}
{provider === "ollama" && (
<>
<Field label="Base URL">
<input
value={ollamaUrl}
onChange={(e) => setOllamaUrl(e.target.value)}
placeholder="http://192.168.1.x:11434/v1"
style={inputStyle}
/>
</Field>
<Field label="Model">
<input
value={ollamaModel}
onChange={(e) => setOllamaModel(e.target.value)}
placeholder="llama3.2"
style={inputStyle}
/>
</Field>
<Field label="API Key (usually 'ollama')">
<input
value={ollamaKey}
onChange={(e) => setOllamaKey(e.target.value)}
placeholder="ollama"
style={inputStyle}
/>
</Field>
</>
)}
{provider === "lmstudio" && (
<>
<Field label="Base URL">
<input
value={lmstudioUrl}
onChange={(e) => setLmstudioUrl(e.target.value)}
placeholder="http://192.168.1.x:1234/v1"
style={inputStyle}
/>
</Field>
<Field label="Model">
<input
value={lmstudioModel}
onChange={(e) => setLmstudioModel(e.target.value)}
placeholder="local-model"
style={inputStyle}
/>
</Field>
<Field label="API Key (can be empty)">
<input
value={lmstudioKey}
onChange={(e) => setLmstudioKey(e.target.value)}
placeholder=""
style={inputStyle}
/>
</Field>
</>
)}
<div style={{ display: "flex", gap: 10, marginTop: 16 }}>
<button
onClick={saveAI}
disabled={aiMut.isPending}
style={{ padding: "8px 16px", cursor: "pointer", background: "#222", color: "#fff", borderRadius: 4, border: "none" }}
>
{aiMut.isPending ? "Saving…" : "Save"}
</button>
<button
onClick={() => testMut.mutate()}
disabled={testMut.isPending}
style={{ padding: "8px 16px", cursor: "pointer", borderRadius: 4, border: "1px solid #ccc" }}
>
{testMut.isPending ? "Testing…" : "Test Connection"}
</button>
</div>
{aiMut.isSuccess && (
<p style={{ marginTop: 8, fontSize: 13, color: "#2a9d8f" }}>Settings saved.</p>
)}
{aiMut.isError && (
<p style={{ marginTop: 8, fontSize: 13, color: "#c00" }}>Failed to save settings.</p>
)}
{testResult && (
<div
style={{
marginTop: 10,
padding: 10,
borderRadius: 4,
background: testResult.ok ? "#e8f5e9" : "#fdecea",
fontSize: 13,
}}
>
{testResult.ok ? (
<>Connected. Response: <em>{testResult.response}</em></>
) : (
<>Connection failed: {testResult.error}</>
)}
</div>
)}
</Section>
<Section title="Upload Limits">
<Field label="Max file size (MB)">
<input
type="number"
min={1}
max={200}
value={maxPdfMb}
onChange={(e) => setMaxPdfMb(Number(e.target.value))}
style={{ ...inputStyle, width: 120 }}
/>
</Field>
<button
onClick={() => limitsMut.mutate(maxPdfMb)}
disabled={limitsMut.isPending}
style={{ padding: "8px 16px", cursor: "pointer", background: "#222", color: "#fff", borderRadius: 4, border: "none", marginTop: 8 }}
>
{limitsMut.isPending ? "Saving…" : "Save"}
</button>
{limitsMut.isSuccess && (
<p style={{ marginTop: 8, fontSize: 13, color: "#2a9d8f" }}>Limits saved.</p>
)}
</Section>
</div>
</>
);
}
+370
View File
@@ -0,0 +1,370 @@
import { useRef, useState, useEffect } from "react";
import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query";
import Nav from "../components/Nav";
import {
listDocuments,
uploadDocument,
deleteDocument,
downloadDocument,
getDocumentStatus,
listCategories,
createCategory,
assignCategory,
removeCategory,
type DocumentOut,
type CategoryOut,
} from "../api/client";
function StatusBadge({ status }: { status: DocumentOut["status"] }) {
const colors: Record<DocumentOut["status"], string> = {
pending: "#f4a261",
processing: "#2196f3",
done: "#2a9d8f",
failed: "#e63946",
};
return (
<span style={{
fontSize: 12,
fontWeight: 600,
padding: "2px 8px",
borderRadius: 4,
background: colors[status],
color: "#fff",
}}>
{status}
</span>
);
}
function DocumentRow({
doc,
categories,
onDelete,
}: {
doc: DocumentOut;
categories: CategoryOut[];
onDelete: (id: string) => void;
}) {
const [expanded, setExpanded] = useState(false);
const qc = useQueryClient();
// Poll status while pending/processing
const { data: liveStatus } = useQuery({
queryKey: ["docStatus", doc.id],
queryFn: () => getDocumentStatus(doc.id),
// v5: refetchInterval receives the Query object; data lives in query.state.data
refetchInterval: (query) => {
const s = query.state.data?.status;
return s === "pending" || s === "processing" ? 3000 : false;
},
enabled: doc.status === "pending" || doc.status === "processing",
});
useEffect(() => {
if (liveStatus?.status === "done" || liveStatus?.status === "failed") {
qc.invalidateQueries({ queryKey: ["documents"] });
}
}, [liveStatus?.status, qc]);
const assignMut = useMutation({
mutationFn: ({ catId }: { catId: string }) => assignCategory(doc.id, catId),
onSuccess: () => qc.invalidateQueries({ queryKey: ["documents"] }),
});
const removeCatMut = useMutation({
mutationFn: ({ catId }: { catId: string }) => removeCategory(doc.id, catId),
onSuccess: () => qc.invalidateQueries({ queryKey: ["documents"] }),
});
const assignedIds = new Set(doc.categories.map((c) => c.id));
const unassigned = categories.filter((c) => !assignedIds.has(c.id));
let extractedData: Record<string, unknown> | null = null;
if (doc.extracted_data) {
try {
extractedData = JSON.parse(doc.extracted_data);
} catch {
// ignore
}
}
const tags: string[] = [];
if (doc.tags) {
try {
const parsed = JSON.parse(doc.tags);
if (Array.isArray(parsed)) tags.push(...parsed);
} catch {
// ignore
}
}
return (
<div style={{ border: "1px solid #ddd", borderRadius: 6, marginBottom: 12 }}>
<div
style={{
display: "flex",
alignItems: "center",
gap: 12,
padding: "12px 16px",
cursor: "pointer",
}}
onClick={() => setExpanded((e) => !e)}
>
<span style={{ flex: 1, fontWeight: 500 }}>{doc.filename}</span>
<StatusBadge status={doc.status} />
{doc.document_type && (
<span style={{ fontSize: 12, color: "#555" }}>{doc.document_type}</span>
)}
<span style={{ fontSize: 12, color: "#999" }}>
{(doc.file_size / 1024).toFixed(0)} KB
</span>
<button
onClick={(e) => {
e.stopPropagation();
downloadDocument(doc.id, doc.filename);
}}
style={{ fontSize: 12, cursor: "pointer" }}
>
Download
</button>
<button
onClick={(e) => {
e.stopPropagation();
if (confirm(`Delete "${doc.filename}"?`)) onDelete(doc.id);
}}
style={{ fontSize: 12, color: "#c00", cursor: "pointer" }}
>
Delete
</button>
</div>
{expanded && (
<div style={{ padding: "0 16px 16px", borderTop: "1px solid #eee" }}>
{tags.length > 0 && (
<div style={{ marginTop: 10 }}>
<strong>Tags:</strong>{" "}
{tags.map((t) => (
<span
key={t}
style={{
fontSize: 12,
background: "#eee",
borderRadius: 3,
padding: "2px 6px",
marginRight: 4,
}}
>
{t}
</span>
))}
</div>
)}
{extractedData && (
<div style={{ marginTop: 10 }}>
<strong>Extracted data:</strong>
<table style={{ marginTop: 6, fontSize: 13, borderCollapse: "collapse" }}>
<tbody>
{Object.entries(extractedData)
.filter(([k]) => k !== "tags")
.map(([k, v]) => (
<tr key={k}>
<td style={{ paddingRight: 16, color: "#666", verticalAlign: "top" }}>{k}</td>
<td>
{Array.isArray(v)
? v.length === 0
? "—"
: JSON.stringify(v, null, 2)
: v !== null && v !== undefined && v !== ""
? String(v)
: "—"}
</td>
</tr>
))}
</tbody>
</table>
</div>
)}
{doc.error_message && (
<div style={{ marginTop: 10, color: "#c00", fontSize: 13 }}>
Error: {doc.error_message}
</div>
)}
<div style={{ marginTop: 12 }}>
<strong style={{ fontSize: 13 }}>Categories:</strong>{" "}
{doc.categories.map((c) => (
<span
key={c.id}
style={{
fontSize: 12,
background: "#dce8ff",
borderRadius: 3,
padding: "2px 6px",
marginRight: 4,
}}
>
{c.name}{" "}
<button
onClick={() => removeCatMut.mutate({ catId: c.id })}
style={{ fontSize: 10, cursor: "pointer", color: "#555", background: "none", border: "none" }}
>
x
</button>
</span>
))}
{unassigned.length > 0 && (
<select
defaultValue=""
onChange={(e) => {
if (e.target.value) assignMut.mutate({ catId: e.target.value });
e.target.value = "";
}}
style={{ fontSize: 12, marginLeft: 4 }}
>
<option value="">+ add category</option>
{unassigned.map((c) => (
<option key={c.id} value={c.id}>{c.name}</option>
))}
</select>
)}
</div>
</div>
)}
</div>
);
}
export default function DocumentsPage() {
const qc = useQueryClient();
const fileRef = useRef<HTMLInputElement>(null);
const [newCatName, setNewCatName] = useState("");
const [uploadError, setUploadError] = useState<string | null>(null);
const { data: documents = [], isLoading } = useQuery({
queryKey: ["documents"],
queryFn: listDocuments,
});
const { data: categories = [] } = useQuery({
queryKey: ["categories"],
queryFn: listCategories,
});
const uploadMut = useMutation({
mutationFn: uploadDocument,
onSuccess: () => {
setUploadError(null);
qc.invalidateQueries({ queryKey: ["documents"] });
},
onError: (err: unknown) => {
const msg =
(err as { response?: { data?: { detail?: string } } })?.response?.data?.detail ??
"Upload failed";
setUploadError(msg);
},
});
const deleteMut = useMutation({
mutationFn: deleteDocument,
onSuccess: () => qc.invalidateQueries({ queryKey: ["documents"] }),
});
const createCatMut = useMutation({
mutationFn: createCategory,
onSuccess: () => {
setNewCatName("");
qc.invalidateQueries({ queryKey: ["categories"] });
},
});
const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
const file = e.target.files?.[0];
if (file) uploadMut.mutate(file);
e.target.value = "";
};
return (
<>
<Nav />
<div style={{ padding: 32, maxWidth: 900, margin: "0 auto" }}>
<h1>Documents</h1>
{/* Upload */}
<div style={{ marginBottom: 24 }}>
<input
ref={fileRef}
type="file"
accept="application/pdf"
style={{ display: "none" }}
onChange={handleFileChange}
/>
<button
onClick={() => fileRef.current?.click()}
disabled={uploadMut.isPending}
style={{ padding: "8px 16px", cursor: "pointer" }}
>
{uploadMut.isPending ? "Uploading…" : "Upload PDF"}
</button>
{uploadError && (
<span style={{ marginLeft: 12, color: "#c00", fontSize: 13 }}>{uploadError}</span>
)}
</div>
{/* Category management */}
<details style={{ marginBottom: 24 }}>
<summary style={{ cursor: "pointer", fontWeight: 500 }}>Manage categories</summary>
<div style={{ marginTop: 10, display: "flex", gap: 8, flexWrap: "wrap" }}>
{categories.map((c) => (
<span
key={c.id}
style={{
fontSize: 13,
background: "#eee",
borderRadius: 4,
padding: "4px 10px",
}}
>
{c.name}
</span>
))}
</div>
<form
style={{ marginTop: 10, display: "flex", gap: 8 }}
onSubmit={(e) => {
e.preventDefault();
if (newCatName.trim()) createCatMut.mutate(newCatName.trim());
}}
>
<input
value={newCatName}
onChange={(e) => setNewCatName(e.target.value)}
placeholder="New category name"
style={{ padding: "6px 10px", fontSize: 13 }}
/>
<button type="submit" disabled={createCatMut.isPending} style={{ cursor: "pointer" }}>
Add
</button>
</form>
</details>
{/* Document list */}
{isLoading ? (
<p>Loading</p>
) : documents.length === 0 ? (
<p style={{ color: "#666" }}>No documents yet. Upload a PDF to get started.</p>
) : (
documents.map((doc) => (
<DocumentRow
key={doc.id}
doc={doc}
categories={categories}
onDelete={(id) => deleteMut.mutate(id)}
/>
))
)}
</div>
</>
);
}
-12
View File
@@ -1,12 +0,0 @@
import Nav from "../components/Nav";
export default function SettingsPage() {
return (
<>
<Nav />
<div style={{ padding: 32 }}>
<h1>Settings</h1>
</div>
</>
);
}