From 311dfa151376850d1da30523b0d230529fc0dc9a Mon Sep 17 00:00:00 2001 From: curo1305 Date: Thu, 28 May 2026 21:09:25 +0200 Subject: [PATCH] feat(05-04): implement WebDAVBackend with SSRF guard and asyncio wrapping - All 7 StorageBackend methods implemented as async coroutines - validate_cloud_url() called in __init__ (SSRF at construct time) and before every asyncio.to_thread() call (D-17 defense-in-depth / T-05-04-01, T-05-04-02) - _make_path() builds "docuvault/{user_id}/{document_id}{ext}" with urllib.parse.quote encoding on path segments (RESEARCH.md Pitfall 2) - presigned_get_url and generate_presigned_put_url raise NotImplementedError (D-14) - All webdavclient3 sync calls (upload_to, download_from, clean, info, check, mkdir) wrapped in asyncio.to_thread() per MinIOBackend pattern - delete_object silently ignores missing file exceptions (StorageBackend ABC contract) --- backend/storage/webdav_backend.py | 227 ++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 backend/storage/webdav_backend.py diff --git a/backend/storage/webdav_backend.py b/backend/storage/webdav_backend.py new file mode 100644 index 0000000..b15a4a6 --- /dev/null +++ b/backend/storage/webdav_backend.py @@ -0,0 +1,227 @@ +""" +WebDAVBackend — Generic WebDAV StorageBackend implementation. + +Security design (D-17 / T-05-04-01 / T-05-04-02): + validate_cloud_url() is called at two mandatory points: + 1. In __init__, BEFORE constructing the webdavclient3 Client — blocks SSRF at + construction time so a private-IP URL never reaches the SDK. + 2. Before EVERY asyncio.to_thread() call — defends against DNS-rebinding attacks + where a hostname passes the initial check but later resolves to an internal IP. + +Path encoding (Pitfall 2 from RESEARCH.md): + _make_path() percent-encodes each path segment using urllib.parse.quote() + so non-ASCII characters and spaces in UUIDs (which don't occur in practice, + but are handled defensively) are properly escaped for Nextcloud/WebDAV. + +Object key scheme: + object_key = WebDAV path: "docuvault/{user_id}/{document_id}{extension}" + This is the path relative to the WebDAV root, stored in the database as + document.object_key for later get/delete operations. + +WebDAV credentials dict shape: + {"server_url": str, "username": str, "password": str} + +Not implemented (D-14): + presigned_get_url and generate_presigned_put_url raise NotImplementedError. + Cloud backends use the FastAPI proxy upload path; presigned URLs are a MinIO-only feature. +""" +from __future__ import annotations + +import asyncio +import io +import urllib.parse + +from webdav3.client import Client + +from storage.base import StorageBackend +from storage.cloud_utils import validate_cloud_url + + +class WebDAVBackend(StorageBackend): + """Generic WebDAV storage backend implementing all 7 StorageBackend abstract methods. + + All synchronous webdavclient3 calls are wrapped in asyncio.to_thread() to avoid + blocking the FastAPI event loop (mirrors MinIOBackend pattern — RESEARCH.md Pattern 5). + + The SSRF guard (validate_cloud_url) is called in __init__ and before every + asyncio.to_thread() call for defense-in-depth against DNS rebinding (D-17). + """ + + def __init__(self, server_url: str, username: str, password: str) -> None: + """Construct a WebDAVBackend. + + Args: + server_url: The WebDAV root URL (e.g. "https://dav.example.com/remote.php/dav/"). + username: HTTP Basic Auth username. + password: HTTP Basic Auth password or app-specific password (D-07). + + Raises: + ValueError: If server_url targets a private/internal address (SSRF guard, D-17). + """ + # SSRF guard: validate before any SDK construction — T-05-04-01 + validate_cloud_url(server_url) + self._server_url = server_url + + options = { + "webdav_hostname": server_url, + "webdav_login": username, + "webdav_password": password, + } + self._client = Client(options) + + def _make_path(self, user_id: str, document_id: str, extension: str) -> str: + """Construct a WebDAV path for a document. + + Path schema: "docuvault/{user_id}/{document_id}{extension}" + + Each segment is percent-encoded (Pitfall 2 — Nextcloud/WebDAV compatibility + with non-ASCII characters and spaces, even though UUIDs are alphanumeric). + + Args: + user_id: User UUID string. + document_id: Document UUID string. + extension: File extension including leading dot (e.g. ".pdf"). + + Returns: + WebDAV path string suitable for use as object_key. + """ + encoded_uid = urllib.parse.quote(str(user_id), safe="") + encoded_did = urllib.parse.quote(str(document_id), safe="") + return f"docuvault/{encoded_uid}/{encoded_did}{extension}" + + async def put_object( + self, + user_id: str, + document_id: str, + file_bytes: bytes, + extension: str, + content_type: str, + ) -> str: + """Upload bytes to WebDAV and return the object_key (WebDAV path). + + Ensures the parent directory "docuvault/{user_id}/" exists before upload + by calling client.mkdir() with recursive=True. webdavclient3 mkdir is a + no-op if the directory already exists. + + Args: + user_id: User UUID string. + document_id: Document UUID string. + file_bytes: Raw file content. + extension: File extension with leading dot (e.g. ".pdf"). + content_type: MIME type (unused by WebDAV, kept for ABC compliance). + + Returns: + object_key: The WebDAV path where the file was stored. + + Raises: + ValueError: If SSRF guard fires on re-validation (D-17). + """ + # Re-validate before every outbound request (D-17 / T-05-04-02) + validate_cloud_url(self._server_url) + object_key = self._make_path(user_id, document_id, extension) + # Ensure parent directory exists (idempotent) + parent_dir = f"docuvault/{urllib.parse.quote(str(user_id), safe='')}" + await asyncio.to_thread(self._client.mkdir, parent_dir, True) + buf = io.BytesIO(file_bytes) + await asyncio.to_thread(self._client.upload_to, buf, object_key) + return object_key + + async def get_object(self, object_key: str) -> bytes: + """Download bytes from WebDAV by object_key (WebDAV path). + + Args: + object_key: WebDAV path returned by put_object. + + Returns: + File content as bytes. + + Raises: + ValueError: If SSRF guard fires on re-validation (D-17). + """ + # Re-validate before every outbound request (D-17) + validate_cloud_url(self._server_url) + buf = io.BytesIO() + await asyncio.to_thread(self._client.download_from, buf, object_key) + return buf.getvalue() + + async def delete_object(self, object_key: str) -> None: + """Delete an object from WebDAV by object_key. + + Silently ignores missing files (no-op if object_key does not exist). + Any WebDAV exception during delete is swallowed — consistent with the + StorageBackend ABC contract ("No-op if the key does not exist"). + + Args: + object_key: WebDAV path returned by put_object. + """ + # Re-validate before every outbound request (D-17) + validate_cloud_url(self._server_url) + try: + await asyncio.to_thread(self._client.clean, object_key) + except Exception: + # Covers FileNotFoundError, WebDavException, and any other exception + # for missing or already-deleted files + pass + + async def presigned_get_url(self, object_key: str, expires_minutes: int = 60) -> str: + """Not supported for WebDAV backends. + + WebDAV does not have a concept of presigned URLs. Cloud document downloads + use the FastAPI proxy endpoint (D-15 — same as all cloud backends). + + Raises: + NotImplementedError: Always. + """ + raise NotImplementedError("WebDAV backend does not support presigned GET URLs") + + async def generate_presigned_put_url( + self, object_key: str, expires_minutes: int = 15 + ) -> str: + """Not supported for WebDAV backends. + + Cloud backends use the FastAPI intermediary upload path (D-14). + Presigned PUT URLs are a MinIO/S3-only feature. + + Raises: + NotImplementedError: Always. + """ + raise NotImplementedError( + "WebDAV backend does not support presigned PUT URLs — use the upload proxy endpoint" + ) + + async def stat_object(self, object_key: str) -> int: + """Return the file size in bytes from the WebDAV server. + + Uses client.info() which performs a PROPFIND request and returns a dict + including a "size" key with the content length. + + Args: + object_key: WebDAV path returned by put_object. + + Returns: + File size in bytes. Returns 0 if the server does not report a size. + + Raises: + ValueError: If SSRF guard fires on re-validation (D-17). + """ + # Re-validate before every outbound request (D-17) + validate_cloud_url(self._server_url) + info = await asyncio.to_thread(self._client.info, object_key) + return int(info.get("size", 0)) + + async def health_check(self) -> bool: + """Return True if the WebDAV server root ("/") is reachable. + + Uses a lightweight client.check("/") PROPFIND call — no file read/write. + Per D-08: connection health is validated before credentials are stored. + + Returns: + True if the server is reachable and the WebDAV root exists, False otherwise. + """ + try: + # Re-validate before every outbound request (D-17) + validate_cloud_url(self._server_url) + result = await asyncio.to_thread(self._client.check, "/") + return bool(result) + except Exception: + return False