feat(05-04): implement WebDAVBackend with SSRF guard and asyncio wrapping
- All 7 StorageBackend methods implemented as async coroutines
- validate_cloud_url() called in __init__ (SSRF at construct time) and before
every asyncio.to_thread() call (D-17 defense-in-depth / T-05-04-01, T-05-04-02)
- _make_path() builds "docuvault/{user_id}/{document_id}{ext}" with urllib.parse.quote
encoding on path segments (RESEARCH.md Pitfall 2)
- presigned_get_url and generate_presigned_put_url raise NotImplementedError (D-14)
- All webdavclient3 sync calls (upload_to, download_from, clean, info, check, mkdir)
wrapped in asyncio.to_thread() per MinIOBackend pattern
- delete_object silently ignores missing file exceptions (StorageBackend ABC contract)
This commit is contained in:
@@ -0,0 +1,227 @@
|
||||
"""
|
||||
WebDAVBackend — Generic WebDAV StorageBackend implementation.
|
||||
|
||||
Security design (D-17 / T-05-04-01 / T-05-04-02):
|
||||
validate_cloud_url() is called at two mandatory points:
|
||||
1. In __init__, BEFORE constructing the webdavclient3 Client — blocks SSRF at
|
||||
construction time so a private-IP URL never reaches the SDK.
|
||||
2. Before EVERY asyncio.to_thread() call — defends against DNS-rebinding attacks
|
||||
where a hostname passes the initial check but later resolves to an internal IP.
|
||||
|
||||
Path encoding (Pitfall 2 from RESEARCH.md):
|
||||
_make_path() percent-encodes each path segment using urllib.parse.quote()
|
||||
so non-ASCII characters and spaces in UUIDs (which don't occur in practice,
|
||||
but are handled defensively) are properly escaped for Nextcloud/WebDAV.
|
||||
|
||||
Object key scheme:
|
||||
object_key = WebDAV path: "docuvault/{user_id}/{document_id}{extension}"
|
||||
This is the path relative to the WebDAV root, stored in the database as
|
||||
document.object_key for later get/delete operations.
|
||||
|
||||
WebDAV credentials dict shape:
|
||||
{"server_url": str, "username": str, "password": str}
|
||||
|
||||
Not implemented (D-14):
|
||||
presigned_get_url and generate_presigned_put_url raise NotImplementedError.
|
||||
Cloud backends use the FastAPI proxy upload path; presigned URLs are a MinIO-only feature.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import io
|
||||
import urllib.parse
|
||||
|
||||
from webdav3.client import Client
|
||||
|
||||
from storage.base import StorageBackend
|
||||
from storage.cloud_utils import validate_cloud_url
|
||||
|
||||
|
||||
class WebDAVBackend(StorageBackend):
|
||||
"""Generic WebDAV storage backend implementing all 7 StorageBackend abstract methods.
|
||||
|
||||
All synchronous webdavclient3 calls are wrapped in asyncio.to_thread() to avoid
|
||||
blocking the FastAPI event loop (mirrors MinIOBackend pattern — RESEARCH.md Pattern 5).
|
||||
|
||||
The SSRF guard (validate_cloud_url) is called in __init__ and before every
|
||||
asyncio.to_thread() call for defense-in-depth against DNS rebinding (D-17).
|
||||
"""
|
||||
|
||||
def __init__(self, server_url: str, username: str, password: str) -> None:
|
||||
"""Construct a WebDAVBackend.
|
||||
|
||||
Args:
|
||||
server_url: The WebDAV root URL (e.g. "https://dav.example.com/remote.php/dav/").
|
||||
username: HTTP Basic Auth username.
|
||||
password: HTTP Basic Auth password or app-specific password (D-07).
|
||||
|
||||
Raises:
|
||||
ValueError: If server_url targets a private/internal address (SSRF guard, D-17).
|
||||
"""
|
||||
# SSRF guard: validate before any SDK construction — T-05-04-01
|
||||
validate_cloud_url(server_url)
|
||||
self._server_url = server_url
|
||||
|
||||
options = {
|
||||
"webdav_hostname": server_url,
|
||||
"webdav_login": username,
|
||||
"webdav_password": password,
|
||||
}
|
||||
self._client = Client(options)
|
||||
|
||||
def _make_path(self, user_id: str, document_id: str, extension: str) -> str:
|
||||
"""Construct a WebDAV path for a document.
|
||||
|
||||
Path schema: "docuvault/{user_id}/{document_id}{extension}"
|
||||
|
||||
Each segment is percent-encoded (Pitfall 2 — Nextcloud/WebDAV compatibility
|
||||
with non-ASCII characters and spaces, even though UUIDs are alphanumeric).
|
||||
|
||||
Args:
|
||||
user_id: User UUID string.
|
||||
document_id: Document UUID string.
|
||||
extension: File extension including leading dot (e.g. ".pdf").
|
||||
|
||||
Returns:
|
||||
WebDAV path string suitable for use as object_key.
|
||||
"""
|
||||
encoded_uid = urllib.parse.quote(str(user_id), safe="")
|
||||
encoded_did = urllib.parse.quote(str(document_id), safe="")
|
||||
return f"docuvault/{encoded_uid}/{encoded_did}{extension}"
|
||||
|
||||
async def put_object(
|
||||
self,
|
||||
user_id: str,
|
||||
document_id: str,
|
||||
file_bytes: bytes,
|
||||
extension: str,
|
||||
content_type: str,
|
||||
) -> str:
|
||||
"""Upload bytes to WebDAV and return the object_key (WebDAV path).
|
||||
|
||||
Ensures the parent directory "docuvault/{user_id}/" exists before upload
|
||||
by calling client.mkdir() with recursive=True. webdavclient3 mkdir is a
|
||||
no-op if the directory already exists.
|
||||
|
||||
Args:
|
||||
user_id: User UUID string.
|
||||
document_id: Document UUID string.
|
||||
file_bytes: Raw file content.
|
||||
extension: File extension with leading dot (e.g. ".pdf").
|
||||
content_type: MIME type (unused by WebDAV, kept for ABC compliance).
|
||||
|
||||
Returns:
|
||||
object_key: The WebDAV path where the file was stored.
|
||||
|
||||
Raises:
|
||||
ValueError: If SSRF guard fires on re-validation (D-17).
|
||||
"""
|
||||
# Re-validate before every outbound request (D-17 / T-05-04-02)
|
||||
validate_cloud_url(self._server_url)
|
||||
object_key = self._make_path(user_id, document_id, extension)
|
||||
# Ensure parent directory exists (idempotent)
|
||||
parent_dir = f"docuvault/{urllib.parse.quote(str(user_id), safe='')}"
|
||||
await asyncio.to_thread(self._client.mkdir, parent_dir, True)
|
||||
buf = io.BytesIO(file_bytes)
|
||||
await asyncio.to_thread(self._client.upload_to, buf, object_key)
|
||||
return object_key
|
||||
|
||||
async def get_object(self, object_key: str) -> bytes:
|
||||
"""Download bytes from WebDAV by object_key (WebDAV path).
|
||||
|
||||
Args:
|
||||
object_key: WebDAV path returned by put_object.
|
||||
|
||||
Returns:
|
||||
File content as bytes.
|
||||
|
||||
Raises:
|
||||
ValueError: If SSRF guard fires on re-validation (D-17).
|
||||
"""
|
||||
# Re-validate before every outbound request (D-17)
|
||||
validate_cloud_url(self._server_url)
|
||||
buf = io.BytesIO()
|
||||
await asyncio.to_thread(self._client.download_from, buf, object_key)
|
||||
return buf.getvalue()
|
||||
|
||||
async def delete_object(self, object_key: str) -> None:
|
||||
"""Delete an object from WebDAV by object_key.
|
||||
|
||||
Silently ignores missing files (no-op if object_key does not exist).
|
||||
Any WebDAV exception during delete is swallowed — consistent with the
|
||||
StorageBackend ABC contract ("No-op if the key does not exist").
|
||||
|
||||
Args:
|
||||
object_key: WebDAV path returned by put_object.
|
||||
"""
|
||||
# Re-validate before every outbound request (D-17)
|
||||
validate_cloud_url(self._server_url)
|
||||
try:
|
||||
await asyncio.to_thread(self._client.clean, object_key)
|
||||
except Exception:
|
||||
# Covers FileNotFoundError, WebDavException, and any other exception
|
||||
# for missing or already-deleted files
|
||||
pass
|
||||
|
||||
async def presigned_get_url(self, object_key: str, expires_minutes: int = 60) -> str:
|
||||
"""Not supported for WebDAV backends.
|
||||
|
||||
WebDAV does not have a concept of presigned URLs. Cloud document downloads
|
||||
use the FastAPI proxy endpoint (D-15 — same as all cloud backends).
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Always.
|
||||
"""
|
||||
raise NotImplementedError("WebDAV backend does not support presigned GET URLs")
|
||||
|
||||
async def generate_presigned_put_url(
|
||||
self, object_key: str, expires_minutes: int = 15
|
||||
) -> str:
|
||||
"""Not supported for WebDAV backends.
|
||||
|
||||
Cloud backends use the FastAPI intermediary upload path (D-14).
|
||||
Presigned PUT URLs are a MinIO/S3-only feature.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Always.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"WebDAV backend does not support presigned PUT URLs — use the upload proxy endpoint"
|
||||
)
|
||||
|
||||
async def stat_object(self, object_key: str) -> int:
|
||||
"""Return the file size in bytes from the WebDAV server.
|
||||
|
||||
Uses client.info() which performs a PROPFIND request and returns a dict
|
||||
including a "size" key with the content length.
|
||||
|
||||
Args:
|
||||
object_key: WebDAV path returned by put_object.
|
||||
|
||||
Returns:
|
||||
File size in bytes. Returns 0 if the server does not report a size.
|
||||
|
||||
Raises:
|
||||
ValueError: If SSRF guard fires on re-validation (D-17).
|
||||
"""
|
||||
# Re-validate before every outbound request (D-17)
|
||||
validate_cloud_url(self._server_url)
|
||||
info = await asyncio.to_thread(self._client.info, object_key)
|
||||
return int(info.get("size", 0))
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
"""Return True if the WebDAV server root ("/") is reachable.
|
||||
|
||||
Uses a lightweight client.check("/") PROPFIND call — no file read/write.
|
||||
Per D-08: connection health is validated before credentials are stored.
|
||||
|
||||
Returns:
|
||||
True if the server is reachable and the WebDAV root exists, False otherwise.
|
||||
"""
|
||||
try:
|
||||
# Re-validate before every outbound request (D-17)
|
||||
validate_cloud_url(self._server_url)
|
||||
result = await asyncio.to_thread(self._client.check, "/")
|
||||
return bool(result)
|
||||
except Exception:
|
||||
return False
|
||||
Reference in New Issue
Block a user