feat(phase-4-05): document streaming proxy GET /api/documents/{id}/content (DOC-02)
- Add _parse_range() helper: validates Range header bounds, raises 416 on invalid - Add stream_document_content endpoint with get_regular_user dep (admin → 403) - Access check: owner OR Share.recipient_id; neither → 404 - Bytes fetched via get_object() only — presigned_get_url() never called - Range requests return 206 + Content-Range header - Add pdf_open_mode column to User ORM model (migration 0004 already applied) - Use HTTP_416_RANGE_NOT_SATISFIABLE (non-deprecated constant)
This commit is contained in:
@@ -20,7 +20,8 @@ import uuid
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
|
||||||
|
from fastapi.responses import StreamingResponse
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from sqlalchemy import select, text, func
|
from sqlalchemy import select, text, func
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
@@ -395,3 +396,97 @@ async def classify_document(
|
|||||||
raise HTTPException(500, f"Classification failed: {e}")
|
raise HTTPException(500, f"Classification failed: {e}")
|
||||||
|
|
||||||
return {"topics": topics}
|
return {"topics": topics}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Range header parsing helper ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _parse_range(range_header: str, file_size: int) -> tuple:
|
||||||
|
"""Parse a 'bytes=X-Y' Range header and return (start, end).
|
||||||
|
|
||||||
|
Returns (start, end) where both are inclusive byte offsets.
|
||||||
|
Raises HTTP 416 on any invalid or out-of-bounds range.
|
||||||
|
|
||||||
|
T-04-05-03: validates start <= end, start >= 0, end < file_size.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
h = range_header.replace("bytes=", "").split("-")
|
||||||
|
start = int(h[0]) if h[0] != "" else 0
|
||||||
|
end = int(h[1]) if h[1] != "" else file_size - 1
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
raise HTTPException(status.HTTP_416_RANGE_NOT_SATISFIABLE)
|
||||||
|
if start > end or start < 0 or end >= file_size:
|
||||||
|
raise HTTPException(status.HTTP_416_RANGE_NOT_SATISFIABLE)
|
||||||
|
return start, end
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET /api/documents/{doc_id}/content ──────────────────────────────────────
|
||||||
|
|
||||||
|
@router.get("/{doc_id}/content")
|
||||||
|
async def stream_document_content(
|
||||||
|
doc_id: str,
|
||||||
|
request: Request,
|
||||||
|
session: AsyncSession = Depends(get_db),
|
||||||
|
current_user: User = Depends(get_regular_user),
|
||||||
|
):
|
||||||
|
"""Stream document bytes directly from MinIO (DOC-02).
|
||||||
|
|
||||||
|
T-04-05-01: uses get_regular_user — admin role → 403 (critical security invariant).
|
||||||
|
T-04-05-02: bytes fetched via get_object() ONLY — presigned_get_url() never called.
|
||||||
|
T-04-05-03: Range header validated via _parse_range(); invalid range → 416.
|
||||||
|
T-04-05-04: access gated on ownership OR active Share.recipient_id.
|
||||||
|
|
||||||
|
Returns 200 (or 206 for Range requests) with:
|
||||||
|
Content-Type: doc.content_type
|
||||||
|
Content-Disposition: inline; filename="<filename>"
|
||||||
|
Accept-Ranges: bytes
|
||||||
|
Content-Length: <size>
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
uid = uuid.UUID(doc_id)
|
||||||
|
except ValueError:
|
||||||
|
raise HTTPException(status_code=404, detail="Document not found")
|
||||||
|
|
||||||
|
doc = await session.get(Document, uid)
|
||||||
|
if doc is None:
|
||||||
|
raise HTTPException(status_code=404, detail="Document not found")
|
||||||
|
|
||||||
|
# Access control: owner OR share recipient (T-04-05-04)
|
||||||
|
if doc.user_id != current_user.id:
|
||||||
|
result = await session.execute(
|
||||||
|
select(Share).where(
|
||||||
|
Share.document_id == doc.id,
|
||||||
|
Share.recipient_id == current_user.id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
share = result.scalar_one_or_none()
|
||||||
|
if share is None:
|
||||||
|
raise HTTPException(status_code=404, detail="Document not found")
|
||||||
|
|
||||||
|
# Fetch bytes directly from MinIO — NEVER via presigned URL (T-04-05-02)
|
||||||
|
file_bytes = await get_storage_backend().get_object(doc.object_key)
|
||||||
|
file_size = len(file_bytes)
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"content-type": doc.content_type,
|
||||||
|
"content-disposition": f'inline; filename="{doc.filename}"',
|
||||||
|
"accept-ranges": "bytes",
|
||||||
|
"content-length": str(file_size),
|
||||||
|
}
|
||||||
|
|
||||||
|
range_header = request.headers.get("range")
|
||||||
|
if range_header:
|
||||||
|
start, end = _parse_range(range_header, file_size)
|
||||||
|
chunk = file_bytes[start : end + 1]
|
||||||
|
headers["content-range"] = f"bytes {start}-{end}/{file_size}"
|
||||||
|
headers["content-length"] = str(len(chunk))
|
||||||
|
return StreamingResponse(
|
||||||
|
iter([chunk]),
|
||||||
|
status_code=206,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
return StreamingResponse(
|
||||||
|
iter([file_bytes]),
|
||||||
|
status_code=200,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|||||||
@@ -61,6 +61,9 @@ class User(Base):
|
|||||||
default_storage_backend: Mapped[str] = mapped_column(
|
default_storage_backend: Mapped[str] = mapped_column(
|
||||||
String, nullable=False, default="minio"
|
String, nullable=False, default="minio"
|
||||||
)
|
)
|
||||||
|
pdf_open_mode: Mapped[str] = mapped_column(
|
||||||
|
String, nullable=False, server_default="in_app"
|
||||||
|
)
|
||||||
created_at: Mapped[datetime] = mapped_column(
|
created_at: Mapped[datetime] = mapped_column(
|
||||||
TIMESTAMP(timezone=True), nullable=False, server_default=func.now()
|
TIMESTAMP(timezone=True), nullable=False, server_default=func.now()
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user