2f3efb9bf9
- storage.py: replace aiofiles filesystem ops with httpx calls to
storage-service PUT/GET/DELETE /objects/documents/{key}
- Document model: rename file_path → storage_key (plain object key, no path prefix)
- Migration 0008: ALTER COLUMN + data migration strips /data/documents/ prefix
- documents.py: update upload, delete, download endpoints; _extract_pdf_text
now takes bytes (pdfplumber.open(BytesIO)) instead of a filesystem path
- file_watcher.py: store storage_key instead of file_path on ingestion
- doc-service config: add STORAGE_SERVICE_URL env var
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
62 lines
2.0 KiB
Python
62 lines
2.0 KiB
Python
"""
|
|
Storage client for the storage-service HTTP API.
|
|
|
|
All persistent file I/O goes through storage-service:8020.
|
|
The bucket for all document PDFs is 'documents'.
|
|
Keys follow the pattern:
|
|
uploaded: {user_id}/{doc_id}.pdf
|
|
watch-ingested: watch/{doc_id}.pdf
|
|
"""
|
|
import logging
|
|
|
|
import httpx
|
|
|
|
from app.core.config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_BUCKET = "documents"
|
|
|
|
|
|
def _storage_url(key: str) -> str:
|
|
return f"{settings.STORAGE_SERVICE_URL}/objects/{_BUCKET}/{key}"
|
|
|
|
|
|
def build_storage_key(user_id: str, doc_id: str) -> str:
|
|
"""Return the canonical storage key for a document."""
|
|
return f"{user_id}/{doc_id}.pdf"
|
|
|
|
|
|
async def save_upload(file_data: bytes, user_id: str, doc_id: str) -> str:
|
|
"""Upload bytes to storage-service. Returns the storage key."""
|
|
key = build_storage_key(user_id, doc_id)
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
resp = await client.put(
|
|
_storage_url(key),
|
|
content=file_data,
|
|
headers={"Content-Type": "application/octet-stream"},
|
|
)
|
|
resp.raise_for_status()
|
|
return key
|
|
|
|
|
|
async def download_file(storage_key: str) -> bytes:
|
|
"""Download bytes from storage-service by storage key."""
|
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
resp = await client.get(_storage_url(storage_key))
|
|
if resp.status_code == 404:
|
|
raise FileNotFoundError(f"Object not found: {storage_key}")
|
|
resp.raise_for_status()
|
|
return resp.content
|
|
|
|
|
|
async def delete_file(storage_key: str) -> None:
|
|
"""Delete an object from storage-service. Swallows errors — deletion failure must not 500."""
|
|
try:
|
|
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
resp = await client.delete(_storage_url(storage_key))
|
|
if resp.status_code not in (204, 404):
|
|
logger.warning("storage-service DELETE returned %s for key %s", resp.status_code, storage_key)
|
|
except Exception as exc:
|
|
logger.warning("Could not delete %s from storage-service: %s", storage_key, exc)
|