""" WebDAVBackend — Generic WebDAV StorageBackend implementation. Security design (D-17 / T-05-04-01 / T-05-04-02): validate_cloud_url() is called at two mandatory points: 1. In __init__, BEFORE constructing the webdavclient3 Client — blocks SSRF at construction time so a private-IP URL never reaches the SDK. 2. Before EVERY asyncio.to_thread() call — defends against DNS-rebinding attacks where a hostname passes the initial check but later resolves to an internal IP. Path encoding (Pitfall 2 from RESEARCH.md): _make_path() percent-encodes each path segment using urllib.parse.quote() so non-ASCII characters and spaces in UUIDs (which don't occur in practice, but are handled defensively) are properly escaped for Nextcloud/WebDAV. Object key scheme: object_key = WebDAV path: "docuvault/{user_id}/{document_id}{extension}" This is the path relative to the WebDAV root, stored in the database as document.object_key for later get/delete operations. WebDAV credentials dict shape: {"server_url": str, "username": str, "password": str} Not implemented (D-14): presigned_get_url and generate_presigned_put_url raise NotImplementedError. Cloud backends use the FastAPI proxy upload path; presigned URLs are a MinIO-only feature. """ from __future__ import annotations import asyncio import io import urllib.parse from webdav3.client import Client from storage.base import StorageBackend from storage.cloud_utils import validate_cloud_url class WebDAVBackend(StorageBackend): """Generic WebDAV storage backend implementing all 7 StorageBackend abstract methods. All synchronous webdavclient3 calls are wrapped in asyncio.to_thread() to avoid blocking the FastAPI event loop (mirrors MinIOBackend pattern — RESEARCH.md Pattern 5). The SSRF guard (validate_cloud_url) is called in __init__ and before every asyncio.to_thread() call for defense-in-depth against DNS rebinding (D-17). """ def __init__(self, server_url: str, username: str, password: str) -> None: """Construct a WebDAVBackend. Args: server_url: The WebDAV root URL (e.g. "https://dav.example.com/remote.php/dav/"). username: HTTP Basic Auth username. password: HTTP Basic Auth password or app-specific password (D-07). Raises: ValueError: If server_url targets a private/internal address (SSRF guard, D-17). """ # SSRF guard: validate before any SDK construction — T-05-04-01 validate_cloud_url(server_url) self._server_url = server_url options = { "webdav_hostname": server_url, "webdav_login": username, "webdav_password": password, } self._client = Client(options) def _make_path(self, user_id: str, document_id: str, extension: str) -> str: """Construct a WebDAV path for a document. Path schema: "docuvault/{user_id}/{document_id}{extension}" Each segment is percent-encoded (Pitfall 2 — Nextcloud/WebDAV compatibility with non-ASCII characters and spaces, even though UUIDs are alphanumeric). Args: user_id: User UUID string. document_id: Document UUID string. extension: File extension including leading dot (e.g. ".pdf"). Returns: WebDAV path string suitable for use as object_key. """ encoded_uid = urllib.parse.quote(str(user_id), safe="") encoded_did = urllib.parse.quote(str(document_id), safe="") return f"docuvault/{encoded_uid}/{encoded_did}{extension}" async def put_object( self, user_id: str, document_id: str, file_bytes: bytes, extension: str, content_type: str, ) -> str: """Upload bytes to WebDAV and return the object_key (WebDAV path). Ensures the parent directory "docuvault/{user_id}/" exists before upload by calling client.mkdir() with recursive=True. webdavclient3 mkdir is a no-op if the directory already exists. Args: user_id: User UUID string. document_id: Document UUID string. file_bytes: Raw file content. extension: File extension with leading dot (e.g. ".pdf"). content_type: MIME type (unused by WebDAV, kept for ABC compliance). Returns: object_key: The WebDAV path where the file was stored. Raises: ValueError: If SSRF guard fires on re-validation (D-17). """ # Re-validate before every outbound request (D-17 / T-05-04-02) validate_cloud_url(self._server_url) object_key = self._make_path(user_id, document_id, extension) # Ensure parent directory exists (idempotent) parent_dir = f"docuvault/{urllib.parse.quote(str(user_id), safe='')}" await asyncio.to_thread(self._client.mkdir, parent_dir, True) buf = io.BytesIO(file_bytes) await asyncio.to_thread(self._client.upload_to, buf, object_key) return object_key async def get_object(self, object_key: str) -> bytes: """Download bytes from WebDAV by object_key (WebDAV path). Args: object_key: WebDAV path returned by put_object. Returns: File content as bytes. Raises: ValueError: If SSRF guard fires on re-validation (D-17). """ # Re-validate before every outbound request (D-17) validate_cloud_url(self._server_url) buf = io.BytesIO() await asyncio.to_thread(self._client.download_from, buf, object_key) return buf.getvalue() async def delete_object(self, object_key: str) -> None: """Delete an object from WebDAV by object_key. Silently ignores missing files (no-op if object_key does not exist). Any WebDAV exception during delete is swallowed — consistent with the StorageBackend ABC contract ("No-op if the key does not exist"). Args: object_key: WebDAV path returned by put_object. """ # Re-validate before every outbound request (D-17) validate_cloud_url(self._server_url) try: await asyncio.to_thread(self._client.clean, object_key) except Exception: # Covers FileNotFoundError, WebDavException, and any other exception # for missing or already-deleted files pass async def presigned_get_url(self, object_key: str, expires_minutes: int = 60) -> str: """Not supported for WebDAV backends. WebDAV does not have a concept of presigned URLs. Cloud document downloads use the FastAPI proxy endpoint (D-15 — same as all cloud backends). Raises: NotImplementedError: Always. """ raise NotImplementedError("WebDAV backend does not support presigned GET URLs") async def generate_presigned_put_url( self, object_key: str, expires_minutes: int = 15 ) -> str: """Not supported for WebDAV backends. Cloud backends use the FastAPI intermediary upload path (D-14). Presigned PUT URLs are a MinIO/S3-only feature. Raises: NotImplementedError: Always. """ raise NotImplementedError( "WebDAV backend does not support presigned PUT URLs — use the upload proxy endpoint" ) async def stat_object(self, object_key: str) -> int: """Return the file size in bytes from the WebDAV server. Uses client.info() which performs a PROPFIND request and returns a dict including a "size" key with the content length. Args: object_key: WebDAV path returned by put_object. Returns: File size in bytes. Returns 0 if the server does not report a size. Raises: ValueError: If SSRF guard fires on re-validation (D-17). """ # Re-validate before every outbound request (D-17) validate_cloud_url(self._server_url) info = await asyncio.to_thread(self._client.info, object_key) return int(info.get("size", 0)) async def health_check(self) -> bool: """Return True if the WebDAV server root ("/") is reachable. Uses a lightweight client.check("/") PROPFIND call — no file read/write. Per D-08: connection health is validated before credentials are stored. Returns: True if the server is reachable and the WebDAV root exists, False otherwise. """ try: # Re-validate before every outbound request (D-17) validate_cloud_url(self._server_url) result = await asyncio.to_thread(self._client.check, "/") return bool(result) except Exception: return False