Add service health checks and dynamic Apps page

Backend polls each registered service's /health endpoint every 30 s via a
background asyncio task. GET /api/services exposes the live status snapshot.
The Apps page now renders from this endpoint — showing "Unavailable" (dimmed,
non-clickable) when a service is registered but its container is unreachable.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
curo1305
2026-04-17 17:31:36 +02:00
parent 1f8f866414
commit 3248607790
10 changed files with 278 additions and 80 deletions
+1
View File
@@ -15,6 +15,7 @@ class Settings(BaseSettings):
CORS_ORIGINS: list[str] = ["http://localhost:5173"]
DOC_SERVICE_URL: str = "http://doc-service:8001"
AI_SERVICE_URL: str = "http://ai-service:8010"
@field_validator("JWT_PRIVATE_KEY", "JWT_PUBLIC_KEY", mode="before")
+25 -2
View File
@@ -1,11 +1,33 @@
import asyncio
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.core.config import settings
from app.routers import admin, auth, categories_proxy, documents_proxy, profile, users
from app.routers import admin, auth, categories_proxy, documents_proxy, profile, services, users
from app.routers import settings as settings_router
from app.services.service_health import check_all, health_check_loop, register_services
app = FastAPI(title=settings.PROJECT_NAME, version="0.1.0")
@asynccontextmanager
async def lifespan(app: FastAPI):
register_services(
doc_service_url=settings.DOC_SERVICE_URL,
ai_service_url=settings.AI_SERVICE_URL,
)
# Run an initial check immediately so the first API response is accurate
await check_all()
task = asyncio.create_task(health_check_loop())
yield
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
app = FastAPI(title=settings.PROJECT_NAME, version="0.1.0", lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
@@ -20,6 +42,7 @@ app.include_router(users.router, prefix="/api/users", tags=["users"])
app.include_router(profile.router, prefix="/api/profile", tags=["profile"])
app.include_router(admin.router, prefix="/api/admin", tags=["admin"])
app.include_router(settings_router.router, prefix="/api/settings", tags=["settings"])
app.include_router(services.router, prefix="/api/services", tags=["services"])
# categories_proxy MUST be registered before documents_proxy —
# otherwise /api/documents/{path:path} swallows /api/documents/categories/*
app.include_router(
+22
View File
@@ -0,0 +1,22 @@
"""
GET /api/services — returns health status for all registered feature services.
Available to any authenticated user so the frontend can drive app visibility.
"""
from fastapi import APIRouter, Depends
from app.deps import get_current_user
from app.models.user import User
from app.services.service_health import get_all_statuses
router = APIRouter()
@router.get("")
async def list_services(_: User = Depends(get_current_user)) -> list[dict]:
"""
Returns each registered service with its current health status.
healthy=true → service responded 200 on its last /health poll
healthy=false → unreachable, timed out, or not yet polled
"""
return get_all_statuses()
View File
+108
View File
@@ -0,0 +1,108 @@
"""
Background health-checker for registered feature services.
Polls each service's /health endpoint every POLL_INTERVAL seconds and stores
the result in an in-memory dict. The REST layer reads from that dict — no DB,
no blocking calls on the request path.
"""
import asyncio
import logging
from dataclasses import dataclass, field
import httpx
logger = logging.getLogger(__name__)
POLL_INTERVAL = 30 # seconds
@dataclass
class ServiceDefinition:
id: str
name: str
description: str
internal_url: str # e.g. http://doc-service:8001
health_path: str = "/health"
app_path: str = "" # frontend route; empty = no open button
settings_path: str = "" # frontend admin-settings route
# ── Registry ──────────────────────────────────────────────────────────────────
# Add new services here. The internal_url is filled in at startup from settings.
_REGISTRY: list[ServiceDefinition] = []
# id → True/False/None (None = not yet checked)
_health: dict[str, bool | None] = {}
def register_services(doc_service_url: str, ai_service_url: str) -> None:
"""Called once during app startup to populate the registry from config."""
global _REGISTRY, _health
_REGISTRY = [
ServiceDefinition(
id="doc-service",
name="Documents",
description="Upload PDF files, extract data, and organise them with categories.",
internal_url=doc_service_url,
health_path="/health",
app_path="/apps/documents",
settings_path="/apps/documents/settings/admin",
),
ServiceDefinition(
id="ai-service",
name="AI Service",
description="Shared AI provider for all features. Configure model, credentials, and connection.",
internal_url=ai_service_url,
health_path="/health",
app_path="",
settings_path="/apps/ai/settings/admin",
),
]
_health = {svc.id: None for svc in _REGISTRY}
# ── Health check logic ────────────────────────────────────────────────────────
async def _check_service(svc: ServiceDefinition) -> None:
url = f"{svc.internal_url}{svc.health_path}"
try:
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(url)
_health[svc.id] = resp.status_code == 200
except Exception:
_health[svc.id] = False
async def check_all() -> None:
"""Run health checks for all registered services concurrently."""
await asyncio.gather(*[_check_service(svc) for svc in _REGISTRY])
async def health_check_loop() -> None:
"""Runs forever; polls every POLL_INTERVAL seconds."""
while True:
await check_all()
await asyncio.sleep(POLL_INTERVAL)
# ── Public read API ───────────────────────────────────────────────────────────
def get_all_statuses() -> list[dict]:
"""Return the current health snapshot for all registered services."""
return [
{
"id": svc.id,
"name": svc.name,
"description": svc.description,
"app_path": svc.app_path,
"settings_path": svc.settings_path,
# None means not yet checked; treat as unhealthy for the UI
"healthy": bool(_health.get(svc.id)),
}
for svc in _REGISTRY
]