5eb81404c2
Replace the static model text prompt with live API queries: - _fetch_local_models(): queries /v1/models (LM Studio, llama.cpp) or /api/tags (Ollama) and returns a questionary.select list - _fetch_lmstudio_available_models(): queries LM Studio's beta /api/v0/models to list downloaded-but-not-loaded models - _load_lmstudio_model(): tries /api/v0/models/load to load a model in-place; falls back to telling the user to load manually - Cloud providers keep the existing text-input behaviour Also replace hardcoded LMSTUDIO_MODEL in integration tests with a lmstudio_model fixture that queries the API at runtime and uses whichever model is currently loaded (skips if none). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
120 lines
3.6 KiB
Python
120 lines
3.6 KiB
Python
"""
|
|
Live integration test against LM Studio at localhost:1234.
|
|
Skipped automatically if LM Studio is not running or no model is loaded.
|
|
"""
|
|
import httpx
|
|
import pytest
|
|
|
|
_LMSTUDIO_BASE_URL = "http://localhost:1234/v1"
|
|
|
|
|
|
def _get_loaded_model() -> str | None:
|
|
"""Return the first currently loaded model ID from LM Studio, or None."""
|
|
try:
|
|
resp = httpx.get(f"{_LMSTUDIO_BASE_URL}/models", timeout=2.0)
|
|
resp.raise_for_status()
|
|
models = resp.json().get("data", [])
|
|
return models[0]["id"] if models else None
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
@pytest.fixture()
|
|
def lmstudio_model() -> str:
|
|
"""Resolve the first loaded model in LM Studio; skip if none available."""
|
|
model = _get_loaded_model()
|
|
if model is None:
|
|
pytest.skip("LM Studio not reachable or no model currently loaded")
|
|
return model
|
|
|
|
|
|
def test_basic_completion(lmstudio_model):
|
|
import litellm
|
|
litellm.suppress_debug_info = True
|
|
|
|
response = litellm.completion(
|
|
model=f"openai/{lmstudio_model}",
|
|
messages=[{"role": "user", "content": "Reply with exactly the word: PONG"}],
|
|
api_base=_LMSTUDIO_BASE_URL,
|
|
api_key="lm-studio",
|
|
max_tokens=20,
|
|
stream=False,
|
|
)
|
|
text = response.choices[0].message.content
|
|
assert text and len(text) > 0
|
|
|
|
|
|
def test_streaming_completion(lmstudio_model):
|
|
import litellm
|
|
litellm.suppress_debug_info = True
|
|
|
|
stream = litellm.completion(
|
|
model=f"openai/{lmstudio_model}",
|
|
messages=[{"role": "user", "content": "Count from 1 to 3."}],
|
|
api_base=_LMSTUDIO_BASE_URL,
|
|
api_key="lm-studio",
|
|
max_tokens=50,
|
|
stream=True,
|
|
)
|
|
chunks = list(stream)
|
|
assert len(chunks) > 0
|
|
full_text = "".join(c.choices[0].delta.content or "" for c in chunks)
|
|
assert len(full_text) > 0
|
|
|
|
|
|
def test_injection_scan_on_live_response(tmp_pyra_home, lmstudio_model):
|
|
"""Verify injection scanner runs on real model output without false positives."""
|
|
import litellm
|
|
from pyra.security.injection import scan_response
|
|
litellm.suppress_debug_info = True
|
|
|
|
response = litellm.completion(
|
|
model=f"openai/{lmstudio_model}",
|
|
messages=[{"role": "user", "content": "Explain what a list is in Python."}],
|
|
api_base=_LMSTUDIO_BASE_URL,
|
|
api_key="lm-studio",
|
|
max_tokens=200,
|
|
stream=False,
|
|
)
|
|
text = response.choices[0].message.content
|
|
warnings = scan_response(text)
|
|
# Not asserting zero warnings — some models may have quirky phrasing —
|
|
# but at least the scanner must not crash on real output
|
|
for w in warnings:
|
|
print(f"[warning] {w.pattern_label}: {w.matched_text!r}")
|
|
|
|
|
|
def test_pyra_chat_session_with_lmstudio(tmp_pyra_home, lmstudio_model):
|
|
"""Full stack: config → vault → history → litellm → injection scan."""
|
|
from pyra.config.schema import PyraConfig, ProviderConfig
|
|
from pyra.config.manager import save_config
|
|
from pyra.chat.history import ConversationHistory
|
|
import litellm
|
|
|
|
litellm.suppress_debug_info = True
|
|
|
|
cfg = PyraConfig(
|
|
ai=ProviderConfig(
|
|
provider_id="lmstudio",
|
|
model=lmstudio_model,
|
|
base_url=_LMSTUDIO_BASE_URL,
|
|
)
|
|
)
|
|
save_config(cfg)
|
|
|
|
history = ConversationHistory(cfg)
|
|
history.add_user("Say hello in one word.")
|
|
messages = history.build_for_api()
|
|
|
|
response = litellm.completion(
|
|
model=f"openai/{lmstudio_model}",
|
|
messages=messages,
|
|
api_base=_LMSTUDIO_BASE_URL,
|
|
api_key="lm-studio",
|
|
max_tokens=30,
|
|
stream=False,
|
|
)
|
|
|
|
text = response.choices[0].message.content
|
|
assert text and len(text.strip()) > 0
|