diff --git a/backend/app/services/service_health.py b/backend/app/services/service_health.py index b5928e0..9d7763e 100644 --- a/backend/app/services/service_health.py +++ b/backend/app/services/service_health.py @@ -7,7 +7,7 @@ no blocking calls on the request path. """ import asyncio import logging -from dataclasses import dataclass, field +from dataclasses import dataclass import httpx @@ -62,6 +62,7 @@ def register_services(doc_service_url: str, ai_service_url: str) -> None: ] _health = {svc.id: None for svc in _REGISTRY} + logger.info("Service registry initialised with %d services", len(_REGISTRY)) # ── Health check logic ──────────────────────────────────────────────────────── @@ -69,12 +70,23 @@ def register_services(doc_service_url: str, ai_service_url: str) -> None: async def _check_service(svc: ServiceDefinition) -> None: url = f"{svc.internal_url}{svc.health_path}" + prev = _health.get(svc.id) try: async with httpx.AsyncClient(timeout=5.0) as client: resp = await client.get(url) - _health[svc.id] = resp.status_code == 200 - except Exception: - _health[svc.id] = False + healthy = resp.status_code == 200 + except Exception as exc: + logger.debug("Health check failed for %s: %s", svc.id, exc) + healthy = False + + _health[svc.id] = healthy + + # Log only on transitions so the logs stay quiet during normal operation + if prev != healthy: + if healthy: + logger.info("Service %s is now HEALTHY", svc.id) + else: + logger.warning("Service %s is now UNHEALTHY", svc.id) async def check_all() -> None: @@ -83,9 +95,16 @@ async def check_all() -> None: async def health_check_loop() -> None: - """Runs forever; polls every POLL_INTERVAL seconds.""" + """Runs forever; polls every POLL_INTERVAL seconds. + + Exceptions inside a single polling round are caught so the loop cannot + be killed by a transient error. + """ while True: - await check_all() + try: + await check_all() + except Exception: + logger.exception("Unexpected error during health check round; will retry") await asyncio.sleep(POLL_INTERVAL) diff --git a/frontend/src/pages/AppsPage.tsx b/frontend/src/pages/AppsPage.tsx index 8f73c23..ef367b7 100644 --- a/frontend/src/pages/AppsPage.tsx +++ b/frontend/src/pages/AppsPage.tsx @@ -32,6 +32,7 @@ export default function AppsPage() { queryKey: ["services"], queryFn: getServices, refetchInterval: 30_000, + refetchIntervalInBackground: true, }); return (