311dfa1513
- All 7 StorageBackend methods implemented as async coroutines
- validate_cloud_url() called in __init__ (SSRF at construct time) and before
every asyncio.to_thread() call (D-17 defense-in-depth / T-05-04-01, T-05-04-02)
- _make_path() builds "docuvault/{user_id}/{document_id}{ext}" with urllib.parse.quote
encoding on path segments (RESEARCH.md Pitfall 2)
- presigned_get_url and generate_presigned_put_url raise NotImplementedError (D-14)
- All webdavclient3 sync calls (upload_to, download_from, clean, info, check, mkdir)
wrapped in asyncio.to_thread() per MinIOBackend pattern
- delete_object silently ignores missing file exceptions (StorageBackend ABC contract)
228 lines
8.7 KiB
Python
228 lines
8.7 KiB
Python
"""
|
|
WebDAVBackend — Generic WebDAV StorageBackend implementation.
|
|
|
|
Security design (D-17 / T-05-04-01 / T-05-04-02):
|
|
validate_cloud_url() is called at two mandatory points:
|
|
1. In __init__, BEFORE constructing the webdavclient3 Client — blocks SSRF at
|
|
construction time so a private-IP URL never reaches the SDK.
|
|
2. Before EVERY asyncio.to_thread() call — defends against DNS-rebinding attacks
|
|
where a hostname passes the initial check but later resolves to an internal IP.
|
|
|
|
Path encoding (Pitfall 2 from RESEARCH.md):
|
|
_make_path() percent-encodes each path segment using urllib.parse.quote()
|
|
so non-ASCII characters and spaces in UUIDs (which don't occur in practice,
|
|
but are handled defensively) are properly escaped for Nextcloud/WebDAV.
|
|
|
|
Object key scheme:
|
|
object_key = WebDAV path: "docuvault/{user_id}/{document_id}{extension}"
|
|
This is the path relative to the WebDAV root, stored in the database as
|
|
document.object_key for later get/delete operations.
|
|
|
|
WebDAV credentials dict shape:
|
|
{"server_url": str, "username": str, "password": str}
|
|
|
|
Not implemented (D-14):
|
|
presigned_get_url and generate_presigned_put_url raise NotImplementedError.
|
|
Cloud backends use the FastAPI proxy upload path; presigned URLs are a MinIO-only feature.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import io
|
|
import urllib.parse
|
|
|
|
from webdav3.client import Client
|
|
|
|
from storage.base import StorageBackend
|
|
from storage.cloud_utils import validate_cloud_url
|
|
|
|
|
|
class WebDAVBackend(StorageBackend):
|
|
"""Generic WebDAV storage backend implementing all 7 StorageBackend abstract methods.
|
|
|
|
All synchronous webdavclient3 calls are wrapped in asyncio.to_thread() to avoid
|
|
blocking the FastAPI event loop (mirrors MinIOBackend pattern — RESEARCH.md Pattern 5).
|
|
|
|
The SSRF guard (validate_cloud_url) is called in __init__ and before every
|
|
asyncio.to_thread() call for defense-in-depth against DNS rebinding (D-17).
|
|
"""
|
|
|
|
def __init__(self, server_url: str, username: str, password: str) -> None:
|
|
"""Construct a WebDAVBackend.
|
|
|
|
Args:
|
|
server_url: The WebDAV root URL (e.g. "https://dav.example.com/remote.php/dav/").
|
|
username: HTTP Basic Auth username.
|
|
password: HTTP Basic Auth password or app-specific password (D-07).
|
|
|
|
Raises:
|
|
ValueError: If server_url targets a private/internal address (SSRF guard, D-17).
|
|
"""
|
|
# SSRF guard: validate before any SDK construction — T-05-04-01
|
|
validate_cloud_url(server_url)
|
|
self._server_url = server_url
|
|
|
|
options = {
|
|
"webdav_hostname": server_url,
|
|
"webdav_login": username,
|
|
"webdav_password": password,
|
|
}
|
|
self._client = Client(options)
|
|
|
|
def _make_path(self, user_id: str, document_id: str, extension: str) -> str:
|
|
"""Construct a WebDAV path for a document.
|
|
|
|
Path schema: "docuvault/{user_id}/{document_id}{extension}"
|
|
|
|
Each segment is percent-encoded (Pitfall 2 — Nextcloud/WebDAV compatibility
|
|
with non-ASCII characters and spaces, even though UUIDs are alphanumeric).
|
|
|
|
Args:
|
|
user_id: User UUID string.
|
|
document_id: Document UUID string.
|
|
extension: File extension including leading dot (e.g. ".pdf").
|
|
|
|
Returns:
|
|
WebDAV path string suitable for use as object_key.
|
|
"""
|
|
encoded_uid = urllib.parse.quote(str(user_id), safe="")
|
|
encoded_did = urllib.parse.quote(str(document_id), safe="")
|
|
return f"docuvault/{encoded_uid}/{encoded_did}{extension}"
|
|
|
|
async def put_object(
|
|
self,
|
|
user_id: str,
|
|
document_id: str,
|
|
file_bytes: bytes,
|
|
extension: str,
|
|
content_type: str,
|
|
) -> str:
|
|
"""Upload bytes to WebDAV and return the object_key (WebDAV path).
|
|
|
|
Ensures the parent directory "docuvault/{user_id}/" exists before upload
|
|
by calling client.mkdir() with recursive=True. webdavclient3 mkdir is a
|
|
no-op if the directory already exists.
|
|
|
|
Args:
|
|
user_id: User UUID string.
|
|
document_id: Document UUID string.
|
|
file_bytes: Raw file content.
|
|
extension: File extension with leading dot (e.g. ".pdf").
|
|
content_type: MIME type (unused by WebDAV, kept for ABC compliance).
|
|
|
|
Returns:
|
|
object_key: The WebDAV path where the file was stored.
|
|
|
|
Raises:
|
|
ValueError: If SSRF guard fires on re-validation (D-17).
|
|
"""
|
|
# Re-validate before every outbound request (D-17 / T-05-04-02)
|
|
validate_cloud_url(self._server_url)
|
|
object_key = self._make_path(user_id, document_id, extension)
|
|
# Ensure parent directory exists (idempotent)
|
|
parent_dir = f"docuvault/{urllib.parse.quote(str(user_id), safe='')}"
|
|
await asyncio.to_thread(self._client.mkdir, parent_dir, True)
|
|
buf = io.BytesIO(file_bytes)
|
|
await asyncio.to_thread(self._client.upload_to, buf, object_key)
|
|
return object_key
|
|
|
|
async def get_object(self, object_key: str) -> bytes:
|
|
"""Download bytes from WebDAV by object_key (WebDAV path).
|
|
|
|
Args:
|
|
object_key: WebDAV path returned by put_object.
|
|
|
|
Returns:
|
|
File content as bytes.
|
|
|
|
Raises:
|
|
ValueError: If SSRF guard fires on re-validation (D-17).
|
|
"""
|
|
# Re-validate before every outbound request (D-17)
|
|
validate_cloud_url(self._server_url)
|
|
buf = io.BytesIO()
|
|
await asyncio.to_thread(self._client.download_from, buf, object_key)
|
|
return buf.getvalue()
|
|
|
|
async def delete_object(self, object_key: str) -> None:
|
|
"""Delete an object from WebDAV by object_key.
|
|
|
|
Silently ignores missing files (no-op if object_key does not exist).
|
|
Any WebDAV exception during delete is swallowed — consistent with the
|
|
StorageBackend ABC contract ("No-op if the key does not exist").
|
|
|
|
Args:
|
|
object_key: WebDAV path returned by put_object.
|
|
"""
|
|
# Re-validate before every outbound request (D-17)
|
|
validate_cloud_url(self._server_url)
|
|
try:
|
|
await asyncio.to_thread(self._client.clean, object_key)
|
|
except Exception:
|
|
# Covers FileNotFoundError, WebDavException, and any other exception
|
|
# for missing or already-deleted files
|
|
pass
|
|
|
|
async def presigned_get_url(self, object_key: str, expires_minutes: int = 60) -> str:
|
|
"""Not supported for WebDAV backends.
|
|
|
|
WebDAV does not have a concept of presigned URLs. Cloud document downloads
|
|
use the FastAPI proxy endpoint (D-15 — same as all cloud backends).
|
|
|
|
Raises:
|
|
NotImplementedError: Always.
|
|
"""
|
|
raise NotImplementedError("WebDAV backend does not support presigned GET URLs")
|
|
|
|
async def generate_presigned_put_url(
|
|
self, object_key: str, expires_minutes: int = 15
|
|
) -> str:
|
|
"""Not supported for WebDAV backends.
|
|
|
|
Cloud backends use the FastAPI intermediary upload path (D-14).
|
|
Presigned PUT URLs are a MinIO/S3-only feature.
|
|
|
|
Raises:
|
|
NotImplementedError: Always.
|
|
"""
|
|
raise NotImplementedError(
|
|
"WebDAV backend does not support presigned PUT URLs — use the upload proxy endpoint"
|
|
)
|
|
|
|
async def stat_object(self, object_key: str) -> int:
|
|
"""Return the file size in bytes from the WebDAV server.
|
|
|
|
Uses client.info() which performs a PROPFIND request and returns a dict
|
|
including a "size" key with the content length.
|
|
|
|
Args:
|
|
object_key: WebDAV path returned by put_object.
|
|
|
|
Returns:
|
|
File size in bytes. Returns 0 if the server does not report a size.
|
|
|
|
Raises:
|
|
ValueError: If SSRF guard fires on re-validation (D-17).
|
|
"""
|
|
# Re-validate before every outbound request (D-17)
|
|
validate_cloud_url(self._server_url)
|
|
info = await asyncio.to_thread(self._client.info, object_key)
|
|
return int(info.get("size", 0))
|
|
|
|
async def health_check(self) -> bool:
|
|
"""Return True if the WebDAV server root ("/") is reachable.
|
|
|
|
Uses a lightweight client.check("/") PROPFIND call — no file read/write.
|
|
Per D-08: connection health is validated before credentials are stored.
|
|
|
|
Returns:
|
|
True if the server is reachable and the WebDAV root exists, False otherwise.
|
|
"""
|
|
try:
|
|
# Re-validate before every outbound request (D-17)
|
|
validate_cloud_url(self._server_url)
|
|
result = await asyncio.to_thread(self._client.check, "/")
|
|
return bool(result)
|
|
except Exception:
|
|
return False
|