From d13801538d0d38a7d5286e530c3266719fba34be Mon Sep 17 00:00:00 2001 From: curo1305 Date: Thu, 28 May 2026 19:55:28 +0200 Subject: [PATCH] =?UTF-8?q?fix(05):=20revise=20Phase=205=20plans=20based?= =?UTF-8?q?=20on=20checker=20feedback=20=E2=80=94=20B1-B4,=20W1-W4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit B1: Mark RESEARCH.md Open Questions as (RESOLVED) with decision text for all 3 B2: Backends now stateless — raise CloudConnectionError(reason=) only; API layer in cloud.py owns token refresh + DB update via _call_cloud_op helper B3: Add Task 3 to Plan 05 — cloud connection + object cleanup on account deletion (SEC-09) B4: Add frontend_url setting to Plan 01 Task 1; Plan 05 uses settings.frontend_url for OAuth callback redirects W1: ROADMAP.md Phase 5 now correctly labels Plans 03+04 as Wave 3 (not Wave 2) W2: Plan 06 invalid_grant test now asserts both 503 HTTP response AND DB REQUIRES_REAUTH W3: Plan 06 Task 2 split into unit tests (4, cloud_utils.py) and integration tests (11, HTTP) W4: Plan 07 adds Vitest tests for cloudConnections store (4 tests) and SettingsCloudTab mount test (2 tests) per CLAUDE.md testing protocol Co-Authored-By: Claude Sonnet 4.6 --- .planning/ROADMAP.md | 4 +- .../05-cloud-storage-backends/05-01-PLAN.md | 13 +- .../05-cloud-storage-backends/05-03-PLAN.md | 35 +++-- .../05-cloud-storage-backends/05-05-PLAN.md | 102 ++++++++++++++- .../05-cloud-storage-backends/05-06-PLAN.md | 120 +++++++++++++----- .../05-cloud-storage-backends/05-07-PLAN.md | 108 +++++++++++++++- .../05-cloud-storage-backends/05-RESEARCH.md | 5 +- 7 files changed, 328 insertions(+), 59 deletions(-) diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md index 45f15c9..4d84cb8 100644 --- a/.planning/ROADMAP.md +++ b/.planning/ROADMAP.md @@ -193,8 +193,10 @@ Before any phase is marked complete, all three gates must pass: **Wave 1** — Test scaffold + dependencies - [ ] 05-01-PLAN.md — Wave 0 xfail stubs, conftest cloud fixtures, requirements.txt packages, config.py settings -**Wave 2** — Shared utilities (parallel) +**Wave 2** — Shared utilities - [ ] 05-02-PLAN.md — cloud_utils.py (SSRF + HKDF), cloud_cache.py (TTLCache), storage factory extension + +**Wave 3** — Cloud backends (parallel, both blocked on Wave 2 / Plan 05-02) - [ ] 05-03-PLAN.md — GoogleDriveBackend + OneDriveBackend (all 7 StorageBackend methods) - [ ] 05-04-PLAN.md — NextcloudBackend + WebDAVBackend (all 7 StorageBackend methods) diff --git a/.planning/phases/05-cloud-storage-backends/05-01-PLAN.md b/.planning/phases/05-cloud-storage-backends/05-01-PLAN.md index 1cddfe0..c5e7ad5 100644 --- a/.planning/phases/05-cloud-storage-backends/05-01-PLAN.md +++ b/.planning/phases/05-cloud-storage-backends/05-01-PLAN.md @@ -24,7 +24,7 @@ must_haves: - "All 15 Phase 5 test stubs exist in test_cloud.py and xfail with strict=False" - "conftest.py has mock_google_drive_creds, mock_onedrive_creds, mock_webdav_client, cloud_connection_factory fixtures" - "requirements.txt includes all 6 new packages with correct version pins" - - "config.py has CLOUD_CREDS_KEY, GOOGLE_CLIENT_ID/SECRET, ONEDRIVE_CLIENT_ID/SECRET/TENANT_ID, BACKEND_URL settings" + - "config.py has CLOUD_CREDS_KEY, GOOGLE_CLIENT_ID/SECRET, ONEDRIVE_CLIENT_ID/SECRET/TENANT_ID, BACKEND_URL, FRONTEND_URL settings" - "pytest -v passes with zero failures after Wave 0 (stubs xfail, not fail)" artifacts: - path: "backend/tests/test_cloud.py" @@ -96,7 +96,7 @@ From backend/api/admin.py: - requirements.txt contains all 6 new packages with their exact version pins - - config.py Settings class has: cloud_creds_key (str, default "CHANGEME-32-bytes-padded!!"), google_client_id (str, default ""), google_client_secret (str, default ""), onedrive_client_id (str, default ""), onedrive_client_secret (str, default ""), onedrive_tenant_id (str, default "common"), backend_url (str, default "http://localhost:8000") + - config.py Settings class has: cloud_creds_key (str, default "CHANGEME-32-bytes-padded!!"), google_client_id (str, default ""), google_client_secret (str, default ""), onedrive_client_id (str, default ""), onedrive_client_secret (str, default ""), onedrive_tenant_id (str, default "common"), backend_url (str, default "http://localhost:8000"), frontend_url (str, default "http://localhost:5173") - All new settings have empty-string or safe defaults so the app boots without cloud credentials configured @@ -113,9 +113,10 @@ From backend/api/admin.py: - onedrive_client_secret: str = "" (ONEDRIVE_CLIENT_SECRET) - onedrive_tenant_id: str = "common" (ONEDRIVE_TENANT_ID — "common" works for personal + org accounts) - backend_url: str = "http://localhost:8000" (BACKEND_URL — used to construct OAuth callback URLs) + - frontend_url: str = "http://localhost:5173" (FRONTEND_URL — used to construct OAuth success/error redirect to Vue app; per B4 fix) .env.example should have the CLOUD_CREDS_KEY, GOOGLE_CLIENT_ID, GOOGLE_CLIENT_SECRET, - ONEDRIVE_CLIENT_ID, ONEDRIVE_CLIENT_SECRET, ONEDRIVE_TENANT_ID, BACKEND_URL entries + ONEDRIVE_CLIENT_ID, ONEDRIVE_CLIENT_SECRET, ONEDRIVE_TENANT_ID, BACKEND_URL, FRONTEND_URL entries (create .env.example if it doesn't exist, or append if it does). @@ -123,10 +124,10 @@ From backend/api/admin.py: - backend/requirements.txt contains lines matching: cryptography>=41.0.0, google-auth-oauthlib>=1.3.1, google-api-python-client>=2.196.0, msal>=1.36.0, webdavclient3>=3.14.7, cachetools>=5.3.0 - - backend/config.py contains `cloud_creds_key: str` and `google_client_id: str` and `backend_url: str` + - backend/config.py contains `cloud_creds_key: str` and `google_client_id: str` and `backend_url: str` and `frontend_url: str` - `python -c "from config import settings; print(settings.cloud_creds_key)"` prints without ImportError - requirements.txt has all 6 Phase 5 package lines; config.py imports and Settings loads without error; all 7 new cloud settings accessible via settings.{field_name} + requirements.txt has all 6 Phase 5 package lines; config.py imports and Settings loads without error; all 8 new cloud settings accessible via settings.{field_name} @@ -278,7 +279,7 @@ cd /Users/nik/Documents/Progamming/document_scanner/backend && python -m pytest - pytest tests/test_cloud.py exits 0; all 15 stubs show xfailed - pytest -v (full suite) exits 0 with zero failures - requirements.txt contains all 6 new package lines -- config.py Settings loads without error; cloud_creds_key, google_client_id, backend_url all accessible +- config.py Settings loads without error; cloud_creds_key, google_client_id, backend_url, frontend_url all accessible - conftest.py has 4 new fixtures: mock_google_drive_creds, mock_onedrive_creds, mock_webdav_client, cloud_connection_factory diff --git a/.planning/phases/05-cloud-storage-backends/05-03-PLAN.md b/.planning/phases/05-cloud-storage-backends/05-03-PLAN.md index af1a59f..793ef52 100644 --- a/.planning/phases/05-cloud-storage-backends/05-03-PLAN.md +++ b/.planning/phases/05-cloud-storage-backends/05-03-PLAN.md @@ -20,7 +20,7 @@ must_haves: - "OneDriveBackend implements all 7 StorageBackend abstract methods" - "generate_presigned_put_url and presigned_get_url raise NotImplementedError on both cloud backends (D-14)" - "All sync SDK calls wrapped in asyncio.to_thread() — event loop never blocked" - - "On-demand token refresh: 401/token-expiry error triggers transparent refresh; invalid_grant sets REQUIRES_REAUTH" + - "Backends are stateless: raise CloudConnectionError(reason="token_expired") on expiry or CloudConnectionError(reason="invalid_grant") on revocation — DB update belongs to API layer (D-05/D-06, B2 design)" - "Google OAuth Flow uses access_type='offline', prompt='consent' (Pitfall 1 prevention)" - "OneDrive uses resumable upload sessions (createUploadSession) for all files (Pitfall 6 prevention)" artifacts: @@ -44,7 +44,7 @@ must_haves: Implement GoogleDriveBackend and OneDriveBackend — the two OAuth-based cloud StorageBackend concrete classes. -Purpose: These backends handle Google Drive v3 and Microsoft Graph file operations. Both use async-wrapped sync SDKs, on-demand token refresh, and handle the invalid_grant → REQUIRES_REAUTH transition per D-05/D-06. +Purpose: These backends handle Google Drive v3 and Microsoft Graph file operations. Both use async-wrapped sync SDKs and raise CloudConnectionError(reason) for token expiry/revocation. The DB transition (REQUIRES_REAUTH) is handled by the API layer per B2 design — backends are stateless. Output: google_drive_backend.py and onedrive_backend.py, each implementing all 7 StorageBackend methods. @@ -91,11 +91,13 @@ Microsoft Graph: GET /me/drive/items/{item_id}/content — streams bytes Microsoft Graph: DELETE /me/drive/items/{item_id} OneDrive object_key = item_id from upload response - -Custom exception: CloudConnectionError (raised when invalid_grant detected) -On 401 / token-expiry: refresh token, update credentials_enc in conn, retry once -On invalid_grant: set conn.status = "REQUIRES_REAUTH", raise CloudConnectionError -Both backends need session + conn parameters for the refresh/update path (passed by the API layer caller) + +Custom exception: CloudConnectionError raised with reason attribute: + - reason="token_expired": API layer will refresh the token, update DB, and retry + - reason="invalid_grant": API layer will set conn.status="REQUIRES_REAUTH" in DB and raise HTTPException(503) +Backends are STATELESS — they raise CloudConnectionError but do NOT update DB or conn directly. +DB updates happen in the _call_cloud_op() helper in cloud.py (Plan 05), which has the session. +This keeps backends testable without DB fixtures. @@ -119,8 +121,10 @@ Both backends need session + conn parameters for the refresh/update path (passed - stat_object: calls service.files().get(fileId=key, fields="size") wrapped in asyncio.to_thread(); returns int(metadata.get("size", 0)) - health_check: tries files().list(pageSize=1) wrapped in asyncio.to_thread(); returns True/False - All sync googleapiclient calls wrapped in asyncio.to_thread() (Pitfall 7) - - On-demand token refresh: _is_token_expired(e) detects googleapiclient.errors.HttpError status 401; _refresh_google_creds(credentials) calls google.auth.transport.requests.Request() to refresh; returns updated credentials dict or None on invalid_grant - - CloudConnectionError exception class defined in this module for invalid_grant signaling + - CloudConnectionError exception class defined in this module; raised with reason attribute (not raised directly by the DB operations) + - On HttpError 401 (token expired): raise CloudConnectionError(reason="token_expired") — the API layer in cloud.py handles the actual refresh and DB update per D-05 (B2 design) + - On invalid_grant detection (googleapiclient.errors.HttpError with specific message or custom check): raise CloudConnectionError(reason="invalid_grant") — the API layer sets REQUIRES_REAUTH per D-06 (B2 design) + - Backends have NO session parameter and perform NO DB writes — they are stateless signal-raisers only Create backend/storage/google_drive_backend.py with: @@ -136,7 +140,10 @@ Both backends need session + conn parameters for the refresh/update path (passed from google.auth.transport.requests import Request from storage.base import StorageBackend - class CloudConnectionError(Exception): pass + class CloudConnectionError(Exception): + def __init__(self, msg: str = "", *, reason: str = ""): + super().__init__(msg) + self.reason = reason # "token_expired" | "invalid_grant" class GoogleDriveBackend(StorageBackend): SCOPES = ["https://www.googleapis.com/auth/drive.file"] @@ -243,8 +250,10 @@ print('All 7 methods are coroutines: OK') - generate_presigned_put_url: raises NotImplementedError - stat_object: GET /me/drive/items/{item_id}?$select=size; return int(response["size"]) - health_check: GET /me/drive?$select=id; return True/False - - _refresh_token(credentials: dict) -> dict | None: calls msal.ConfidentialClientApplication.acquire_token_by_refresh_token(); returns new credentials dict or None if result.get("error") == "invalid_grant" + - _refresh_token() -> dict | None: calls msal.ConfidentialClientApplication.acquire_token_by_refresh_token(); returns new credentials dict or None if result.get("error") == "invalid_grant" + - _ensure_valid_token(): on expired token calls _refresh_token(); if None raises CloudConnectionError(reason="invalid_grant"); if success updates self._credentials - All sync msal calls wrapped in asyncio.to_thread(); httpx calls are already async (use await httpx.AsyncClient) + - Backend is stateless: raises CloudConnectionError(reason="token_expired") or CloudConnectionError(reason="invalid_grant") — no DB writes (B2 design; DB updates handled by API layer _call_cloud_op helper in cloud.py) - CHUNK_SIZE = 10 * 1024 * 1024 (10 MB, above Graph's 4 MB limit) @@ -390,8 +399,8 @@ cd /Users/nik/Documents/Progamming/document_scanner/backend && python -m pytest -- GoogleDriveBackend: all 7 methods async; presigned methods raise NotImplementedError; CloudConnectionError defined -- OneDriveBackend: all 7 methods async; CHUNK_SIZE=10MB; presigned methods raise NotImplementedError; CloudConnectionError imported +- GoogleDriveBackend: all 7 methods async; presigned methods raise NotImplementedError; CloudConnectionError(reason=) defined; backend raises errors, does NO DB writes +- OneDriveBackend: all 7 methods async; CHUNK_SIZE=10MB; presigned methods raise NotImplementedError; CloudConnectionError imported; backend raises errors, does NO DB writes - pytest -v exits 0, 0 failures; test_cloud.py still all xfailed diff --git a/.planning/phases/05-cloud-storage-backends/05-05-PLAN.md b/.planning/phases/05-cloud-storage-backends/05-05-PLAN.md index a461fa0..e9081a0 100644 --- a/.planning/phases/05-cloud-storage-backends/05-05-PLAN.md +++ b/.planning/phases/05-cloud-storage-backends/05-05-PLAN.md @@ -9,6 +9,7 @@ depends_on: files_modified: - backend/api/cloud.py - backend/main.py + - backend/api/auth.py autonomous: true requirements: - CLOUD-01 @@ -17,11 +18,12 @@ requirements: - CLOUD-04 - CLOUD-05 - CLOUD-06 + - SEC-09 must_haves: truths: - "GET /api/cloud/oauth/initiate/{provider} redirects to provider OAuth URL; state token in Redis with 30-min TTL" - - "GET /api/cloud/oauth/callback/{provider} validates state, exchanges code, encrypts credentials, saves CloudConnection, redirects to /settings?cloud_connected={provider}" + - "GET /api/cloud/oauth/callback/{provider} validates state, exchanges code, encrypts credentials, saves CloudConnection, redirects to {settings.frontend_url}/settings?cloud_connected={provider}" - "POST /api/cloud/connections/webdav validates URL (SSRF), tests connection (PROPFIND), encrypts + saves credentials" - "GET /api/cloud/connections returns CloudConnectionOut list — no credentials_enc" - "DELETE /api/cloud/connections/{id} deletes credentials_enc row; subsequent use returns 503" @@ -30,6 +32,8 @@ must_haves: - "All endpoints use get_regular_user dep — admin blocked (403)" - "OAuth callback invalid state returns 400; invalid provider returns 400" - "write_audit_log called on connect, disconnect, and REQUIRES_REAUTH transitions" + - "_call_cloud_op(conn, user, session, op_fn) helper in cloud.py wraps all cloud ops: retries once on token_expired (refresh+DB update), sets REQUIRES_REAUTH+HTTPException(503) on invalid_grant" + - "Account deletion purges all CloudConnection rows and calls delete_object on cloud-stored documents (SEC-09)" artifacts: - path: "backend/api/cloud.py" provides: "All /api/cloud/* endpoints + /api/users/me/default-storage" @@ -101,6 +105,7 @@ From backend/config.py (after Plan 01): settings.google_client_id, google_client_secret: str settings.onedrive_client_id, onedrive_client_secret, onedrive_tenant_id: str settings.backend_url: str (used in OAuth callback redirect_uri) + settings.frontend_url: str (used in OAuth callback success/error redirect to Vue — per B4 fix) From backend/storage/cloud_utils.py: def encrypt_credentials(master_key: bytes, user_id: str, credentials: dict) -> str @@ -160,6 +165,22 @@ From backend/services/cloud_cache.py: get_cloud_folders_cached(user_id, provider router = APIRouter(prefix="/api/cloud", tags=["cloud"]) users_router = APIRouter(prefix="/api/users", tags=["users"]) + _call_cloud_op helper (add as a module-level async function in cloud.py, per B2 design): + async def _call_cloud_op(conn: CloudConnection, user: User, session: AsyncSession, op_fn): + """Wraps a cloud operation with transparent token refresh (D-05) and invalid_grant handling (D-06). + + 1. Calls op_fn() — a zero-argument async callable that performs the cloud operation. + 2. On CloudConnectionError(reason="token_expired"): decrypt current creds, refresh via provider, + encrypt new creds, update conn.credentials_enc in DB, rebuild backend, retry op_fn() once. + 3. On CloudConnectionError(reason="invalid_grant"): set conn.status="REQUIRES_REAUTH", + await session.commit(), call write_audit_log(event_type="cloud.requires_reauth"), + raise HTTPException(503, "Cloud connection requires re-authentication. Please reconnect in Settings."). + 4. Propagates all other exceptions unchanged. + """ + All upload/download/list calls in cloud.py MUST go through _call_cloud_op. + op_fn is a zero-argument async lambda that already has the backend instance captured in closure. + The backend instance is rebuilt after refresh using the new credentials dict. + Pydantic request models: class WebDAVConnectRequest(BaseModel): server_url: str; username: str; password: str; provider: str class DefaultStorageRequest(BaseModel): backend: str @@ -273,6 +294,84 @@ assert len(cloud_routes) >= 5, f'Expected 5+ cloud routes, got {len(cloud_routes Both cloud routers registered in main.py; all cloud routes visible in app.routes; full pytest suite passes + + Task 3: Cloud connection cleanup on account deletion (SEC-09) + backend/api/auth.py + + - backend/api/auth.py — find the DELETE /api/users/me endpoint (account self-deletion), verify it exists from Phase 2; if it does not exist, check backend/api/admin.py for DELETE /api/admin/users/{id} + - backend/db/models.py — CloudConnection (user_id, provider, status), Document (user_id, storage_backend, object_key) + - backend/storage/__init__.py — get_storage_backend_for_document signature + + + - When a user deletes their account (DELETE /api/users/me or admin DELETE /api/admin/users/{id}): + 1. Query all CloudConnection rows for the user + 2. For each connection, query all Document rows for that user where storage_backend == connection.provider + 3. For each such document, call get_storage_backend_for_document(doc, user, session) and await backend.delete_object(doc.object_key) — catch and log exceptions but do NOT abort the deletion + 4. Delete all CloudConnection rows for the user (credentials_enc purged) + - This runs BEFORE the user row is deleted (FK cascade would remove connections anyway, but credentials must be actively purged from the cloud provider) + - Runs in the same DB transaction as user deletion — if user deletion succeeds, cloud cleanup has completed + - No orphaned credentials_enc rows after account deletion (SEC-09) + + + Read backend/api/auth.py to locate the account deletion endpoint. Also check backend/api/admin.py for admin-initiated user deletion. + + In the account deletion handler (DELETE /api/users/me), add a cloud cleanup block BEFORE the user row deletion: + + 1. Import at top of file (if not already present): + from db.models import CloudConnection, Document + from storage import get_storage_backend_for_document + from sqlalchemy import select + + 2. Cloud cleanup block (insert before the DELETE user statement): + cloud_conns_result = await session.execute( + select(CloudConnection).where(CloudConnection.user_id == current_user.id) + ) + cloud_conns = cloud_conns_result.scalars().all() + for conn in cloud_conns: + # Delete cloud objects for this provider + docs_result = await session.execute( + select(Document).where( + Document.user_id == current_user.id, + Document.storage_backend == conn.provider, + ) + ) + for doc in docs_result.scalars().all(): + try: + backend = await get_storage_backend_for_document(doc, current_user, session) + await backend.delete_object(doc.object_key) + except Exception: + pass # Do not abort user deletion on cloud error + await session.delete(conn) + await session.flush() # Flush connection deletes before user delete + + If DELETE /api/users/me does not exist in auth.py, check admin.py for the admin-delete endpoint and add the same cleanup block there. Document which file was modified in the summary. + + write_audit_log call: add event_type="cloud.credentials_purged" after the cleanup loop, + with metadata_={"providers": [c.provider for c in cloud_conns]}. + + + cd /Users/nik/Documents/Progamming/document_scanner/backend && python -c " +import ast +import os +for fname in ['api/auth.py', 'api/admin.py']: + if os.path.exists(fname): + with open(fname) as f: + src = f.read() + if 'cloud_conns' in src or 'CloudConnection' in src: + print(f'OK: cloud cleanup found in {fname}') +" && python -m pytest -v --tb=short 2>&1 | tail -5 + + + - Either backend/api/auth.py or backend/api/admin.py contains cloud connection cleanup logic before user deletion + - CloudConnection rows are deleted for the user as part of account deletion + - delete_object called for each cloud-stored document before credentials are purged + - write_audit_log called with event_type="cloud.credentials_purged" + - pytest -v exits 0 with 0 failures + - No orphaned credentials_enc rows after account deletion (SEC-09) + + Cloud connection cleanup wired into account deletion; credentials_enc purged; SEC-09 satisfied + + @@ -308,6 +407,7 @@ cd /Users/nik/Documents/Progamming/document_scanner/backend && python -m pytest - main.py: both routers registered; all routes visible in app.routes - pytest -v exits 0, 0 failures - test_cloud.py stubs transition from xfail to green for test_credentials_enc_not_exposed, test_connection_status_display, test_disconnect_deletes_credentials, test_ssrf_validation, test_cross_user_idor, test_admin_cannot_see_credentials +- SEC-09: account deletion endpoint purges CloudConnection rows and cloud-stored document objects before deleting user row diff --git a/.planning/phases/05-cloud-storage-backends/05-06-PLAN.md b/.planning/phases/05-cloud-storage-backends/05-06-PLAN.md index b426a8d..c95156e 100644 --- a/.planning/phases/05-cloud-storage-backends/05-06-PLAN.md +++ b/.planning/phases/05-cloud-storage-backends/05-06-PLAN.md @@ -161,58 +161,115 @@ print('documents.py parses without error: OK') - Task 2: Promote all 15 xfail stubs to real passing tests + Task 2: Promote unit test stubs to real tests (cloud_utils.py coverage) backend/tests/test_cloud.py - - backend/tests/test_cloud.py — current 15 xfail stubs + - backend/tests/test_cloud.py — current xfail stubs + - backend/storage/cloud_utils.py — validate_cloud_url, encrypt_credentials, decrypt_credentials + - backend/storage/__init__.py — get_storage_backend_for_document + - backend/storage/minio_backend.py — MinIOBackend class + + + - 4 unit tests promoted; they test cloud_utils.py and the factory — no DB, no HTTP client, no network (W3 split: unit tests only) + - test_credential_round_trip: pure unit test; calls encrypt_credentials + decrypt_credentials; asserts round-trip equals original; asserts ciphertext != plaintext string + - test_ssrf_validation: @pytest.mark.parametrize over [("http://localhost/dav",True),("http://127.0.0.1/dav",True),("http://169.254.169.254/dav",True),("http://10.0.0.1/dav",True),("http://192.168.1.1/dav",True),("https://nextcloud.example.com/dav",False)]; asserts ValueError raised for private IPs; no exception for valid public URL + - test_ssrf_link_local: calls validate_cloud_url("http://169.254.169.254/metadata"); asserts ValueError + - test_factory_returns_correct_backend: constructs a mock Document(storage_backend="minio") and mock User; patches get_storage_backend() to return a MagicMock of MinIOBackend; calls get_storage_backend_for_document with a mock AsyncSession; asserts result is the expected backend type + + + Promote the 4 unit-test stubs in test_cloud.py. These tests have no DB/HTTP dependencies: + + 1. test_credential_round_trip — no fixtures needed: + from storage.cloud_utils import encrypt_credentials, decrypt_credentials + master_key = b"test-master-key-32bytes-padded!!" + user_id = "550e8400-e29b-41d4-a716-446655440000" + creds = {"access_token": "ya29.xxx", "refresh_token": "1//xxx"} + enc = encrypt_credentials(master_key, user_id, creds) + assert isinstance(enc, str) and "access_token" not in enc + dec = decrypt_credentials(master_key, user_id, enc) + assert dec == creds + + 2. test_ssrf_validation — @pytest.mark.parametrize: + All private/loopback/link-local URLs raise ValueError; valid public URL passes. + Remove the xfail decorator; add parametrize decorator from behavior spec. + + 3. test_ssrf_link_local — simple unit test: + from storage.cloud_utils import validate_cloud_url + with pytest.raises(ValueError): validate_cloud_url("http://169.254.169.254/metadata") + + 4. test_factory_returns_correct_backend — mock-based unit test: + from unittest.mock import MagicMock, AsyncMock, patch + from storage import get_storage_backend_for_document + Mock a Document with storage_backend="minio", a User, and an AsyncSession. + Patch get_storage_backend() to return a MinIOBackend mock. + Run asyncio.run(get_storage_backend_for_document(mock_doc, mock_user, mock_session)). + Assert result is the patched MinIOBackend. + + Remove @pytest.mark.xfail(strict=False) from all 4 stubs once implemented. + Leave the other 11 stubs with xfail decorators (they are promoted in Task 3). + + + cd /Users/nik/Documents/Progamming/document_scanner/backend && python -m pytest tests/test_cloud.py::test_credential_round_trip tests/test_cloud.py::test_ssrf_validation tests/test_cloud.py::test_ssrf_link_local tests/test_cloud.py::test_factory_returns_correct_backend -v 2>&1 | tail -10 + + + - test_credential_round_trip, test_ssrf_validation, test_ssrf_link_local, test_factory_returns_correct_backend all PASSED + - test_ssrf_validation is parametrized (multiple params visible in output) + - No xfail decorators on these 4 tests + - Other 11 tests still xfail (not broken by this task) + - `pytest tests/test_cloud.py -v` exits 0 + + 4 unit tests promoted to PASSED; cloud_utils.py coverage established; 11 integration stubs still xfailed + + + + Task 3: Promote integration test stubs to real passing tests (HTTP endpoint coverage) + backend/tests/test_cloud.py + + - backend/tests/test_cloud.py — current xfail stubs (11 remaining after Task 2) - backend/tests/conftest.py — all fixtures including cloud_connection_factory, mock_google_drive_creds, async_client, db_session - backend/api/cloud.py — endpoint paths and request/response shapes - backend/api/admin.py — CloudConnectionOut fields - - backend/storage/cloud_utils.py — validate_cloud_url, encrypt_credentials, decrypt_credentials - - .planning/phases/05-cloud-storage-backends/05-VALIDATION.md — test map with requirement → test correspondence - backend/db/models.py — CloudConnection, User, Document fields + - .planning/phases/05-cloud-storage-backends/05-VALIDATION.md — test map with requirement → test correspondence - - All 15 tests pass (no xfailed, no failed) after implementation - - test_credential_round_trip: pure unit test; calls encrypt_credentials + decrypt_credentials; asserts round-trip equals original; asserts ciphertext != plaintext + - 11 integration tests promoted; all use async_client, db_session, and/or monkeypatch (W3 split: integration tests only) - test_credentials_enc_not_exposed: creates CloudConnection via cloud_connection_factory; calls GET /api/cloud/connections with valid auth; asserts "credentials_enc" not in response JSON at any level - test_cloud_upload_no_presigned: creates CloudConnection; mocks cloud backend put_object; calls POST /api/documents/upload with target_backend="google_drive"; asserts no "upload_url" in response - test_connection_status_display: creates ACTIVE CloudConnection; calls GET /api/cloud/connections; asserts response item has status == "ACTIVE" - - test_invalid_grant_sets_requires_reauth: creates CloudConnection; monkey-patches get_storage_backend_for_document to raise CloudConnectionError; calls GET /api/documents/{id}/content; asserts 503 response; then separately tests that the DB connection has status == "REQUIRES_REAUTH" after the transition is triggered through the backend + - test_invalid_grant_sets_requires_reauth: creates CloudConnection with status="ACTIVE"; monkey-patches the cloud backend operation to raise CloudConnectionError(reason="invalid_grant"); calls GET /api/documents/{id}/content; asserts 503 response; then re-queries the CloudConnection from DB and asserts connection.status == "REQUIRES_REAUTH" — both HTTP response AND DB state verified (W2 fix) - test_disconnect_deletes_credentials: creates CloudConnection; calls DELETE /api/cloud/connections/{id}; asserts 204; queries DB to confirm row deleted - - test_factory_returns_correct_backend: calls get_storage_backend_for_document with mock Document(storage_backend="minio"); asserts isinstance result MinIOBackend - - test_ssrf_validation: parametrized over RFC-1918, loopback, link-local, valid URL inputs; asserts ValueError raised for private IPs; no exception for valid public URL - - test_ssrf_link_local: calls validate_cloud_url("http://169.254.169.254/metadata"); asserts ValueError - test_admin_cannot_see_credentials: creates admin user + CloudConnection; calls GET /api/cloud/connections with admin auth; asserts 403 response - test_cross_user_idor: creates two users + CloudConnections; calls DELETE /api/cloud/connections/{user2_connection_id} with user1 auth; asserts 404 - - test_connect_google_drive: calls GET /api/cloud/oauth/initiate/google_drive with valid auth; asserts 302 redirect containing "accounts.google.com" in location header; asserts Redis key "oauth_state:" exists + - test_connect_google_drive: calls GET /api/cloud/oauth/initiate/google_drive with valid auth; asserts 302 redirect containing "accounts.google.com" in location header - test_oauth_callback_valid_state: pre-seeds Redis with oauth_state key; mocks google_auth_oauthlib.flow.Flow.fetch_token; calls GET /api/cloud/oauth/callback/google_drive?code=test&state={seed_state}; asserts 302 redirect to /settings?cloud_connected=google_drive - test_oauth_callback_invalid_state: calls GET /api/cloud/oauth/callback/google_drive?code=x&state=invalid; asserts 400 - - test_webdav_connect_validates: mocks WebDAVBackend health_check to return False; calls POST /api/cloud/connections/webdav with localhost URL; asserts 422 (SSRF blocked before health check) - - For tests requiring auth: use helper to create User rows and generate access tokens (pattern from test_auth_api.py or test_documents.py). - For tests requiring Redis: use monkeypatch to mock app.state.redis.setex, get, delete. - For tests requiring cloud SDKs: monkeypatch/MagicMock the SDK calls — no real network calls in tests. + - test_webdav_connect_validates: calls POST /api/cloud/connections/webdav with localhost URL; asserts 422 (SSRF blocked — validate_cloud_url raises ValueError before health check) - Rewrite backend/tests/test_cloud.py, replacing each pytest.xfail("not implemented yet") stub body with a real test implementation. + Promote the 11 remaining xfail stubs in test_cloud.py to real integration tests. - Keep: all 15 test function names, all @pytest.mark.asyncio decorators, pytestmark = pytest.mark.asyncio. - Remove: @pytest.mark.xfail(strict=False) decorators from all stubs once each is implemented. - Add: proper fixture parameters to each test function (async_client, db_session, monkeypatch, etc.). + Keep: all 11 test function names, all @pytest.mark.asyncio decorators. + Remove: @pytest.mark.xfail(strict=False) from all 11 stubs. + Add: proper fixture parameters (async_client, db_session, monkeypatch). - Auth helper (add as a local conftest helper or module-level fixture): - async def _create_user_and_token(session, role="user") — creates User row, generates JWT access token - (Mirror pattern from existing test_auth_api.py or test_documents.py) + Auth helper (add as module-level async def or import from conftest): + async def _create_user_and_token(session, role="user") — creates User row, generates JWT access token. + Mirror pattern from existing test_auth_api.py or test_documents.py. - For test_credential_round_trip: no fixtures needed (pure unit test). - For test_ssrf_validation: parametrize with @pytest.mark.parametrize. - For tests needing cloud API: use async_client fixture. - For tests needing Redis: monkeypatch app.state.redis. + For Redis tests (test_connect_google_drive, test_oauth_callback_valid_state, test_oauth_callback_invalid_state): + monkeypatch app.state.redis.setex, app.state.redis.get, app.state.redis.delete. + test_oauth_callback_valid_state: pre-seed via monkeypatch return values; mock Flow.fetch_token. - Important: tests must pass under SQLite in-memory (non-INTEGRATION mode). Cloud SDK calls must be mocked (no real network calls). OAuth state tests mock Redis. + For test_invalid_grant_sets_requires_reauth (W2 requirement): + Create CloudConnection; monkeypatch get_storage_backend_for_document to raise + CloudConnectionError(reason="invalid_grant"); call GET /api/documents/{id}/content; + assert 503; then session.refresh(connection); assert connection.status == "REQUIRES_REAUTH". + Note: the DB write of REQUIRES_REAUTH must actually be committed by _call_cloud_op — + test verifies the real DB state, not just the HTTP response. - When implementing test_invalid_grant_sets_requires_reauth: focus on the 503 response assertion (the backend routing returning 503 when CloudConnectionError is raised). The REQUIRES_REAUTH DB update happens inside the cloud backend during the operation — for unit testing, verify the 503 response is returned and trust the integration test to verify the DB state. + For SDK mocking: monkeypatch or patch the SDK calls at the module import level. + All tests must pass under SQLite in-memory (non-INTEGRATION mode). cd /Users/nik/Documents/Progamming/document_scanner/backend && python -m pytest tests/test_cloud.py -v 2>&1 @@ -220,8 +277,7 @@ print('documents.py parses without error: OK') - `pytest tests/test_cloud.py -v` exits 0 - Output shows all 15 tests PASSED (no xfailed, no FAILED, no ERROR) - - test_credential_round_trip: no xfail decorator; passes with round-trip assertion - - test_ssrf_validation: parametrized; all params pass + - test_invalid_grant_sets_requires_reauth: 503 HTTP response AND DB connection.status == "REQUIRES_REAUTH" (W2 + W3 combined) - test_credentials_enc_not_exposed: "credentials_enc" not present anywhere in response JSON - test_admin_cannot_see_credentials: 403 for admin role - test_cross_user_idor: 404 for cross-user connection access @@ -258,6 +314,8 @@ cd /Users/nik/Documents/Progamming/document_scanner/backend && python -m pytest - POST /api/documents/upload: target_backend routing works for cloud backends; MinIO flow unchanged - GET /api/documents/{id}/content: uses get_storage_backend_for_document; CloudConnectionError → 503 +- test_cloud.py Task 2 (unit): test_credential_round_trip, test_ssrf_validation, test_ssrf_link_local, test_factory_returns_correct_backend all PASSED +- test_cloud.py Task 3 (integration): all 11 integration stubs PASSED including REQUIRES_REAUTH DB assertion - test_cloud.py: all 15 tests PASSED; no xfailed - pytest -v (full suite): exits 0, 0 failures diff --git a/.planning/phases/05-cloud-storage-backends/05-07-PLAN.md b/.planning/phases/05-cloud-storage-backends/05-07-PLAN.md index e63b21f..9d699dd 100644 --- a/.planning/phases/05-cloud-storage-backends/05-07-PLAN.md +++ b/.planning/phases/05-cloud-storage-backends/05-07-PLAN.md @@ -7,6 +7,7 @@ depends_on: - "05-06" files_modified: - frontend/src/stores/cloudConnections.js + - frontend/src/stores/__tests__/cloudConnections.test.js - frontend/src/api/client.js - frontend/src/views/SettingsView.vue - frontend/src/components/settings/SettingsPreferencesTab.vue @@ -31,10 +32,14 @@ must_haves: - "OAuth redirect success/error handled in onMounted via ?cloud_connected= and ?cloud_error= query params" - "Success toast auto-dismisses in 5 seconds; error banner persists until dismissed" - "cloudConnectionsStore: connections, loading, error state; fetchConnections, disconnect, disconnectAll actions" + - "Vitest unit tests for cloudConnections store (4 tests) and SettingsCloudTab mount test (2 tests) — per CLAUDE.md testing protocol (W4)" artifacts: - path: "frontend/src/stores/cloudConnections.js" provides: "Pinia store for cloud connections state" contains: "useCloudConnectionsStore" + - path: "frontend/src/stores/__tests__/cloudConnections.test.js" + provides: "Vitest unit tests for cloudConnections store (W4)" + contains: "fetchConnections" - path: "frontend/src/api/client.js" provides: "Cloud API client functions" contains: "listCloudConnections" @@ -117,8 +122,8 @@ Save button label: "Connect {providerLabel}" - Task 1: Create cloudConnections Pinia store and API client additions - frontend/src/stores/cloudConnections.js, frontend/src/api/client.js + Task 1: Create cloudConnections Pinia store, API client additions, and Vitest tests + frontend/src/stores/cloudConnections.js, frontend/src/stores/__tests__/cloudConnections.test.js, frontend/src/api/client.js - frontend/src/stores/folders.js — Pinia store structure (defineStore composition API pattern) - frontend/src/api/client.js — existing API function patterns, request() helper @@ -193,6 +198,55 @@ Save button label: "Connect {providerLabel}" body: JSON.stringify({ backend }), }) } + + Create frontend/src/stores/__tests__/cloudConnections.test.js (W4 — Vitest unit tests per CLAUDE.md): + Tests must mock api/client.js functions (no real HTTP calls). + + import { setActivePinia, createPinia } from 'pinia' + import { describe, it, expect, vi, beforeEach } from 'vitest' + import { useCloudConnectionsStore } from '../cloudConnections.js' + import * as api from '../../api/client.js' + + beforeEach(() => { setActivePinia(createPinia()) }) + + describe('useCloudConnectionsStore', () => { + it('fetchConnections sets connections from API response', async () => { + vi.spyOn(api, 'listCloudConnections').mockResolvedValue({ items: [{id:'1',provider:'google_drive',status:'ACTIVE'}] }) + const store = useCloudConnectionsStore() + await store.fetchConnections() + expect(store.connections).toHaveLength(1) + expect(store.connections[0].provider).toBe('google_drive') + expect(store.loading).toBe(false) + }) + + it('fetchConnections sets error on API failure', async () => { + vi.spyOn(api, 'listCloudConnections').mockRejectedValue(new Error('Network error')) + const store = useCloudConnectionsStore() + await store.fetchConnections() + expect(store.error).toBeTruthy() + expect(store.connections).toHaveLength(0) + }) + + it('disconnect removes connection from state after API call', async () => { + vi.spyOn(api, 'disconnectCloud').mockResolvedValue(undefined) + const store = useCloudConnectionsStore() + store.connections = [{ id: 'conn-1', provider: 'google_drive', status: 'ACTIVE' }] + await store.disconnect('conn-1') + expect(store.connections).toHaveLength(0) + expect(api.disconnectCloud).toHaveBeenCalledWith('conn-1') + }) + + it('disconnectAll clears all connections', async () => { + vi.spyOn(api, 'disconnectCloud').mockResolvedValue(undefined) + const store = useCloudConnectionsStore() + store.connections = [ + { id: 'a', provider: 'google_drive', status: 'ACTIVE' }, + { id: 'b', provider: 'onedrive', status: 'ACTIVE' }, + ] + await store.disconnectAll() + expect(store.connections).toHaveLength(0) + }) + }) cd /Users/nik/Documents/Progamming/document_scanner/frontend && node -e " @@ -207,15 +261,19 @@ const api = fs.readFileSync('src/api/client.js', 'utf8'); if (!api.includes(name)) throw new Error('Missing from api/client.js: ' + name); console.log('OK api: ' + name); }); -" +if (!fs.existsSync('src/stores/__tests__/cloudConnections.test.js')) throw new Error('Missing Vitest file'); +console.log('OK: Vitest test file exists'); +" && npm run test -- src/stores/__tests__/cloudConnections.test.js 2>&1 | tail -10 - frontend/src/stores/cloudConnections.js exists with useCloudConnectionsStore - Store exports: connections (ref), loading (ref), error (ref), fetchConnections, disconnect, disconnectAll - frontend/src/api/client.js contains listCloudConnections, disconnectCloud, connectWebDav, updateDefaultStorage + - frontend/src/stores/__tests__/cloudConnections.test.js exists with 4 Vitest tests (W4 — CLAUDE.md requirement) + - All 4 Vitest tests pass: fetchConnections, fetchConnections error path, disconnect, disconnectAll - No modifications to existing API functions (folders, auth, etc.) - cloudConnections.js store created; 4 new API functions appended to client.js; existing API functions untouched + cloudConnections.js store created; 4 new API functions appended to client.js; 4 Vitest unit tests passing; existing API functions untouched @@ -225,6 +283,7 @@ const api = fs.readFileSync('src/api/client.js', 'utf8'); frontend/src/components/settings/SettingsPreferencesTab.vue, frontend/src/components/settings/SettingsAiTab.vue, frontend/src/components/settings/SettingsCloudTab.vue, + frontend/src/components/settings/__tests__/SettingsCloudTab.test.js, frontend/src/components/cloud/CloudCredentialModal.vue @@ -313,6 +372,40 @@ const api = fs.readFileSync('src/api/client.js', 'utf8'); Preserve: p-8 max-w-3xl mx-auto wrapper, h2 heading, description paragraph. Check existing components: look for ConfirmBlock in frontend/src/components/ui/ — if present, use it for disconnect confirmation dialogs. If not present, implement inline confirmation pattern. + + 6. Create frontend/src/components/settings/__tests__/SettingsCloudTab.test.js (W4 — CLAUDE.md requires tests for new components): + import { mount } from '@vue/test-utils' + import { createTestingPinia } from '@pinia/testing' + import { describe, it, expect } from 'vitest' + import SettingsCloudTab from '../SettingsCloudTab.vue' + + describe('SettingsCloudTab', () => { + it('renders all 4 provider rows', () => { + const wrapper = mount(SettingsCloudTab, { + global: { + plugins: [createTestingPinia({ createSpy: vi.fn })], + }, + }) + expect(wrapper.text()).toContain('Google Drive') + expect(wrapper.text()).toContain('OneDrive') + expect(wrapper.text()).toContain('Nextcloud') + expect(wrapper.text()).toContain('WebDAV') + }) + + it('shows "Not connected" state when no connections active', () => { + const wrapper = mount(SettingsCloudTab, { + global: { + plugins: [createTestingPinia({ + createSpy: vi.fn, + initialState: { cloudConnections: { connections: [], loading: false, error: null } }, + })], + }, + }) + // All providers have Connect buttons when no connections exist + const connectButtons = wrapper.findAll('button') + expect(connectButtons.length).toBeGreaterThan(0) + }) + }) cd /Users/nik/Documents/Progamming/document_scanner/frontend && node -e " @@ -322,6 +415,7 @@ const files = [ 'src/components/settings/SettingsPreferencesTab.vue', 'src/components/settings/SettingsAiTab.vue', 'src/components/settings/SettingsCloudTab.vue', + 'src/components/settings/__tests__/SettingsCloudTab.test.js', 'src/components/cloud/CloudCredentialModal.vue', ]; files.forEach(f => { @@ -338,7 +432,7 @@ const cloud = fs.readFileSync('src/components/settings/SettingsCloudTab.vue', 'u if (!cloud.includes('google_drive')) throw new Error('SettingsCloudTab missing google_drive provider'); if (!cloud.includes('CloudCredentialModal')) throw new Error('SettingsCloudTab missing CloudCredentialModal'); console.log('SettingsCloudTab providers and modal: OK'); -" && npm --prefix /Users/nik/Documents/Progamming/document_scanner/frontend run build 2>&1 | tail -5 +" && npm --prefix /Users/nik/Documents/Progamming/document_scanner/frontend run test -- src/stores/__tests__/cloudConnections.test.js src/components/settings/__tests__/SettingsCloudTab.test.js 2>&1 | tail -10 && npm --prefix /Users/nik/Documents/Progamming/document_scanner/frontend run build 2>&1 | tail -5 - All 5 new/modified files exist @@ -349,8 +443,9 @@ console.log('SettingsCloudTab providers and modal: OK'); - SettingsCloudTab.vue uses useCloudConnectionsStore - CloudCredentialModal.vue contains authMethod ref and auth method radio group - `npm run build` (Vite build) exits 0 without errors + - SettingsCloudTab.test.js exists with at least one mount test confirming all 4 providers render (W4 — CLAUDE.md requirement for new components) - 5 files created/modified; 3-tab SettingsView with OAuth handling; SettingsCloudTab with 4 providers; CloudCredentialModal; Vite build passes + 5 files created/modified; 3-tab SettingsView with OAuth handling; SettingsCloudTab with 4 providers; CloudCredentialModal; SettingsCloudTab mount test; Vite build passes @@ -384,6 +479,7 @@ cd /Users/nik/Documents/Progamming/document_scanner/frontend && npm run build 2> - SettingsView.vue: 3-tab layout; OAuth success/error handling; tab strip matches AdminView pattern - SettingsCloudTab.vue: all 4 providers; status badges; action buttons per status; REQUIRES_REAUTH banner; disconnect all - CloudCredentialModal.vue: server URL + username + auth method toggle + password; correct cancel/save labels +- Vitest: cloudConnections.test.js (4 tests passing) and SettingsCloudTab.test.js (2 tests passing) - Vite build exits 0 diff --git a/.planning/phases/05-cloud-storage-backends/05-RESEARCH.md b/.planning/phases/05-cloud-storage-backends/05-RESEARCH.md index 1b47276..c015d60 100644 --- a/.planning/phases/05-cloud-storage-backends/05-RESEARCH.md +++ b/.planning/phases/05-cloud-storage-backends/05-RESEARCH.md @@ -809,22 +809,25 @@ async def list_connections( --- -## Open Questions +## Open Questions (RESOLVED) 1. **Google Drive object key scheme for `stat_object`** - What we know: MinIO `stat_object` returns size in bytes from the storage layer. Google Drive returns file metadata including `size` from `files.get(fileId, fields='size')`. - What's unclear: Google Drive may not return `size` for Google Workspace files (Docs, Sheets, Slides) since they have no binary size. DocuVault uploads binary files, so this may not be an issue in practice. - Recommendation: Implement `stat_object` using `service.files().get(fileId=object_key, fields="size").execute()` and return `int(metadata["size"])`. Add a fallback of `0` for files without a size. + - **RESOLVED:** Use `service.files().get(fileId=object_key, fields="size").execute()` and return `int(metadata.get("size", 0))`. DocuVault only uploads binary files so the `0` fallback handles edge cases without breaking functionality. 2. **Nextcloud folder listing path convention** - What we know: Nextcloud WebDAV base path is typically `/remote.php/dav/files/{username}/`. - What's unclear: Whether the `webdavclient3` `Client` automatically handles the `/remote.php/dav/files/{username}/` prefix or whether it must be included in the `server_url`. - Recommendation: Store `server_url` as the full WebDAV root (e.g., `https://nc.example.com/remote.php/dav/files/alice/`) and use relative paths within it. Test with PROPFIND on the root to validate the connection (D-08). + - **RESOLVED:** `server_url` stores the full WebDAV root including the `/remote.php/dav/files/{username}/` prefix. All relative paths within WebDAVBackend and NextcloudBackend are appended to this base. Connection validation uses a PROPFIND on the root path per D-08. 3. **Microsoft Graph upload for files > 4 MB** - What we know: Simple upload (PUT `/me/drive/root:/{path}:/content`) is limited to 4 MB. Resumable sessions handle larger files. - What's unclear: The Phase 5 plan should specify whether to implement resumable sessions upfront or use a 4 MB size gate. - Recommendation: Implement resumable upload session (`createUploadSession`) for all files to avoid the hard limit. It handles both small and large files without a size check. + - **RESOLVED:** Implement `createUploadSession` for ALL file sizes (no size gate). `CHUNK_SIZE = 10 * 1024 * 1024` (10 MB, above Graph 4 MB limit) used in all OneDrive uploads. Pitfall 6 documented in Common Pitfalls section. ---