""" Live integration test against LM Studio at localhost:1234. Skipped automatically if LM Studio is not running or no model is loaded. """ import httpx import pytest _LMSTUDIO_BASE_URL = "http://localhost:1234/v1" def _get_loaded_model() -> str | None: """Return the first currently loaded model ID from LM Studio, or None.""" try: resp = httpx.get(f"{_LMSTUDIO_BASE_URL}/models", timeout=2.0) resp.raise_for_status() models = resp.json().get("data", []) return models[0]["id"] if models else None except Exception: return None @pytest.fixture() def lmstudio_model() -> str: """Resolve the first loaded model in LM Studio; skip if none available.""" model = _get_loaded_model() if model is None: pytest.skip("LM Studio not reachable or no model currently loaded") return model def test_basic_completion(lmstudio_model): import litellm litellm.suppress_debug_info = True response = litellm.completion( model=f"openai/{lmstudio_model}", messages=[{"role": "user", "content": "Reply with exactly the word: PONG"}], api_base=_LMSTUDIO_BASE_URL, api_key="lm-studio", max_tokens=20, stream=False, ) text = response.choices[0].message.content assert text and len(text) > 0 def test_streaming_completion(lmstudio_model): import litellm litellm.suppress_debug_info = True stream = litellm.completion( model=f"openai/{lmstudio_model}", messages=[{"role": "user", "content": "Count from 1 to 3."}], api_base=_LMSTUDIO_BASE_URL, api_key="lm-studio", max_tokens=50, stream=True, ) chunks = list(stream) assert len(chunks) > 0 full_text = "".join(c.choices[0].delta.content or "" for c in chunks) assert len(full_text) > 0 def test_injection_scan_on_live_response(tmp_pyra_home, lmstudio_model): """Verify injection scanner runs on real model output without false positives.""" import litellm from pyra.security.injection import scan_response litellm.suppress_debug_info = True response = litellm.completion( model=f"openai/{lmstudio_model}", messages=[{"role": "user", "content": "Explain what a list is in Python."}], api_base=_LMSTUDIO_BASE_URL, api_key="lm-studio", max_tokens=200, stream=False, ) text = response.choices[0].message.content warnings = scan_response(text) # Not asserting zero warnings — some models may have quirky phrasing — # but at least the scanner must not crash on real output for w in warnings: print(f"[warning] {w.pattern_label}: {w.matched_text!r}") def test_pyra_chat_session_with_lmstudio(tmp_pyra_home, lmstudio_model): """Full stack: config → vault → history → litellm → injection scan.""" from pyra.config.schema import PyraConfig, ProviderConfig from pyra.config.manager import save_config from pyra.chat.history import ConversationHistory import litellm litellm.suppress_debug_info = True cfg = PyraConfig( ai=ProviderConfig( provider_id="lmstudio", model=lmstudio_model, base_url=_LMSTUDIO_BASE_URL, ) ) save_config(cfg) history = ConversationHistory(cfg) history.add_user("Say hello in one word.") messages = history.build_for_api() response = litellm.completion( model=f"openai/{lmstudio_model}", messages=messages, api_base=_LMSTUDIO_BASE_URL, api_key="lm-studio", max_tokens=30, stream=False, ) text = response.choices[0].message.content assert text and len(text.strip()) > 0