feat(03-02): implement presigned upload flow, quota enforcement, cleanup task
- Replace POST /api/documents/upload with POST /api/documents/upload-url + /{id}/confirm
- upload-url: create pending Document row with user_id=None (Wave 2), return presigned PUT URL
- confirm: stat MinIO for authoritative size (T-03-05), atomic quota UPDATE (T-03-06, STORE-03)
- Confirm returns 413 with {used_bytes, limit_bytes, rejected_bytes} on quota exceeded (STORE-05)
- Wave 2 guard: skip quota UPDATE when doc.user_id is None (Plan 03-03 removes this)
- Add GET /api/auth/me/quota to api/auth.py (STORE-04)
- services/storage.py: remove save_upload (D-04); add GREATEST(0, used_bytes-delta) quota decrement to delete_document (STORE-06)
- tasks/document_tasks.py: add cleanup_abandoned_uploads Celery beat task (D-06)
- celery_app.py: add beat_schedule for cleanup-abandoned-uploads every 30 minutes
- tests/test_documents.py: replace legacy /upload tests with xfail; add real test logic for upload-url/confirm/get-quota
- tests/test_quota.py: implement real test logic with xfail for PostgreSQL-specific SQL
This commit is contained in:
@@ -92,3 +92,51 @@ async def _run(document_id: str) -> dict:
|
||||
"status": "classification_failed",
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
|
||||
@celery_app.task(name="tasks.document_tasks.cleanup_abandoned_uploads")
|
||||
def cleanup_abandoned_uploads() -> dict:
|
||||
"""Periodic Celery beat task — deletes Document rows with status='pending'
|
||||
older than 1 hour and their MinIO objects (D-06).
|
||||
|
||||
Enqueued by Celery beat every 30 minutes (celery_app.py beat_schedule).
|
||||
Quota is never reserved for pending rows — no quota cleanup needed.
|
||||
"""
|
||||
return asyncio.run(_cleanup_abandoned())
|
||||
|
||||
|
||||
async def _cleanup_abandoned() -> dict:
|
||||
"""Async body for cleanup_abandoned_uploads.
|
||||
|
||||
Selects Document rows with status='pending' older than 1 hour,
|
||||
removes their MinIO objects (best-effort), then deletes the DB rows.
|
||||
Returns {"cleaned": N} count.
|
||||
"""
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from sqlalchemy import select
|
||||
|
||||
from db.session import AsyncSessionLocal
|
||||
from db.models import Document
|
||||
from storage import get_storage_backend
|
||||
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(hours=1)
|
||||
async with AsyncSessionLocal() as session:
|
||||
result = await session.execute(
|
||||
select(Document).where(
|
||||
Document.status == "pending",
|
||||
Document.created_at < cutoff,
|
||||
)
|
||||
)
|
||||
docs = result.scalars().all()
|
||||
backend = get_storage_backend()
|
||||
cleaned = 0
|
||||
for doc in docs:
|
||||
try:
|
||||
if doc.object_key:
|
||||
await backend.delete_object(doc.object_key)
|
||||
except Exception:
|
||||
pass # MinIO object may not exist yet — safe to ignore
|
||||
await session.delete(doc)
|
||||
cleaned += 1
|
||||
await session.commit()
|
||||
return {"cleaned": cleaned}
|
||||
|
||||
Reference in New Issue
Block a user