Pyra/tests/integration/test_lmstudio.py

"""
Live integration test against LM Studio at localhost:1234.
Skipped automatically if LM Studio is not running or no model is loaded.
"""
import httpx
import pytest

_LMSTUDIO_BASE_URL = "http://localhost:1234/v1"


def _get_loaded_model() -> str | None:
    """Return the first currently loaded model ID from LM Studio, or None."""
    try:
        resp = httpx.get(f"{_LMSTUDIO_BASE_URL}/models", timeout=2.0)
        resp.raise_for_status()
        models = resp.json().get("data", [])
        return models[0]["id"] if models else None
    except Exception:
        return None


@pytest.fixture()
def lmstudio_model() -> str:
    """Resolve the first loaded model in LM Studio; skip if none available."""
    model = _get_loaded_model()
    if model is None:
        pytest.skip("LM Studio not reachable or no model currently loaded")
    return model


def test_basic_completion(lmstudio_model):
    import litellm
    litellm.suppress_debug_info = True

    response = litellm.completion(
        model=f"openai/{lmstudio_model}",
        messages=[{"role": "user", "content": "Reply with exactly the word: PONG"}],
        api_base=_LMSTUDIO_BASE_URL,
        api_key="lm-studio",
        max_tokens=20,
        stream=False,
    )
    text = response.choices[0].message.content
    assert text and len(text) > 0


def test_streaming_completion(lmstudio_model):
    import litellm
    litellm.suppress_debug_info = True

    stream = litellm.completion(
        model=f"openai/{lmstudio_model}",
        messages=[{"role": "user", "content": "Count from 1 to 3."}],
        api_base=_LMSTUDIO_BASE_URL,
        api_key="lm-studio",
        max_tokens=50,
        stream=True,
    )
    chunks = list(stream)
    assert len(chunks) > 0
    full_text = "".join(c.choices[0].delta.content or "" for c in chunks)
    assert len(full_text) > 0


def test_injection_scan_on_live_response(tmp_pyra_home, lmstudio_model):
    """Verify injection scanner runs on real model output without false positives."""
    import litellm
    from pyra.security.injection import scan_response
    litellm.suppress_debug_info = True

    response = litellm.completion(
        model=f"openai/{lmstudio_model}",
        messages=[{"role": "user", "content": "Explain what a list is in Python."}],
        api_base=_LMSTUDIO_BASE_URL,
        api_key="lm-studio",
        max_tokens=200,
        stream=False,
    )
    text = response.choices[0].message.content
    warnings = scan_response(text)
    # Not asserting zero warnings — some models may have quirky phrasing —
    # but at least the scanner must not crash on real output
    for w in warnings:
        print(f"[warning] {w.pattern_label}: {w.matched_text!r}")


def test_pyra_chat_session_with_lmstudio(tmp_pyra_home, lmstudio_model):
    """Full stack: config → vault → history → litellm → injection scan."""
    from pyra.config.schema import PyraConfig, ProviderConfig
    from pyra.config.manager import save_config
    from pyra.chat.history import ConversationHistory
    import litellm

    litellm.suppress_debug_info = True

    cfg = PyraConfig(
        ai=ProviderConfig(
            provider_id="lmstudio",
            model=lmstudio_model,
            base_url=_LMSTUDIO_BASE_URL,
        )
    )
    save_config(cfg)

    history = ConversationHistory(cfg)
    history.add_user("Say hello in one word.")
    messages = history.build_for_api()

    response = litellm.completion(
        model=f"openai/{lmstudio_model}",
        messages=messages,
        api_base=_LMSTUDIO_BASE_URL,
        api_key="lm-studio",
        max_tokens=30,
        stream=False,
    )

    text = response.choices[0].message.content
    assert text and len(text.strip()) > 0