chore: initial commit — existing single-user document scanner codebase
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,110 @@
|
||||
"""
|
||||
Unit tests for AI provider JSON parsing robustness and classifier orchestration.
|
||||
Uses a mock provider — no real AI calls made.
|
||||
"""
|
||||
import json
|
||||
import pytest
|
||||
from ai.openai_provider import _parse_classification, _parse_suggestions, _strip_code_fences
|
||||
from ai.base import ClassificationResult
|
||||
|
||||
|
||||
def test_parse_clean_json():
|
||||
raw = '{"assigned_topics": ["finance", "invoices"], "new_topic_suggestions": []}'
|
||||
result = _parse_classification(raw)
|
||||
assert result.topics == ["finance", "invoices"]
|
||||
assert result.suggested_new_topics == []
|
||||
|
||||
|
||||
def test_parse_with_code_fence():
|
||||
raw = '```json\n{"assigned_topics": ["legal"], "new_topic_suggestions": ["contracts"]}\n```'
|
||||
result = _parse_classification(raw)
|
||||
assert result.topics == ["legal"]
|
||||
assert result.suggested_new_topics == ["contracts"]
|
||||
|
||||
|
||||
def test_parse_with_preamble():
|
||||
raw = 'Here is the classification:\n{"assigned_topics": ["hr"], "new_topic_suggestions": []}\nDone.'
|
||||
result = _parse_classification(raw)
|
||||
assert result.topics == ["hr"]
|
||||
|
||||
|
||||
def test_parse_malformed_returns_empty():
|
||||
raw = "I cannot classify this document."
|
||||
result = _parse_classification(raw)
|
||||
assert result.topics == []
|
||||
assert result.suggested_new_topics == []
|
||||
|
||||
|
||||
def test_strip_code_fences():
|
||||
raw = "```json\n{}\n```"
|
||||
assert _strip_code_fences(raw) == "{}"
|
||||
|
||||
|
||||
def test_parse_suggestions_clean():
|
||||
raw = '{"suggested_topics": ["Human Resources", "Onboarding"]}'
|
||||
result = _parse_suggestions(raw)
|
||||
assert "Human Resources" in result
|
||||
assert "Onboarding" in result
|
||||
|
||||
|
||||
def test_parse_suggestions_with_fence():
|
||||
raw = "```\n{\"suggested_topics\": [\"Finance\"]}\n```"
|
||||
result = _parse_suggestions(raw)
|
||||
assert result == ["Finance"]
|
||||
|
||||
|
||||
def test_parse_suggestions_malformed():
|
||||
raw = "No suggestions available."
|
||||
result = _parse_suggestions(raw)
|
||||
assert result == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_classifier_with_mock_provider(isolated_data_dir):
|
||||
"""Test classifier orchestration with a mock provider."""
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from ai.base import ClassificationResult
|
||||
import services.storage as st
|
||||
|
||||
# Create a document
|
||||
doc_id = "test-doc-1"
|
||||
st.save_metadata({
|
||||
"id": doc_id,
|
||||
"original_name": "test.txt",
|
||||
"filename": "test-doc-1.txt",
|
||||
"mime_type": "text/plain",
|
||||
"size_bytes": 50,
|
||||
"extracted_text": "Invoice for services rendered in March 2026.",
|
||||
"topics": [],
|
||||
"created_at": "2026-01-01T00:00:00Z",
|
||||
"classified_at": None,
|
||||
})
|
||||
|
||||
# Create some topics
|
||||
st.create_topic("Finance")
|
||||
st.create_topic("Legal")
|
||||
|
||||
mock_result = ClassificationResult(
|
||||
topics=["Finance"],
|
||||
suggested_new_topics=["Invoices"],
|
||||
reasoning="Document is about financial invoicing.",
|
||||
)
|
||||
|
||||
with patch("services.classifier.get_provider") as mock_get_provider:
|
||||
mock_provider = AsyncMock()
|
||||
mock_provider.classify = AsyncMock(return_value=mock_result)
|
||||
mock_get_provider.return_value = mock_provider
|
||||
|
||||
from services.classifier import classify_document
|
||||
topics = await classify_document(doc_id)
|
||||
|
||||
assert "Finance" in topics
|
||||
assert "Invoices" in topics
|
||||
|
||||
# Verify new topic was auto-created
|
||||
all_topics = st.load_topics()
|
||||
assert any(t["name"] == "Invoices" for t in all_topics)
|
||||
|
||||
# Verify document was updated
|
||||
meta = st.get_metadata(doc_id)
|
||||
assert "Finance" in meta["topics"]
|
||||
Reference in New Issue
Block a user