From 0b0cd07330e8226bd81cb09b6d4c8eb3f6ef44de Mon Sep 17 00:00:00 2001 From: curo1305 Date: Tue, 19 May 2026 00:03:14 +0200 Subject: [PATCH] fix(chat): fall back to streaming when provider rejects function calling Local models (e.g. Gemma on LM Studio) return HTTP 400 when sent a tools-spec request. Catch litellm.BadRequestError in the tool-use loop, inform the user once that tools are disabled, and retry as a plain streaming call so the conversation continues normally. Co-Authored-By: Claude Sonnet 4.6 --- src/pyra/chat/session.py | 42 +++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/pyra/chat/session.py b/src/pyra/chat/session.py index aa24776..f378cab 100644 --- a/src/pyra/chat/session.py +++ b/src/pyra/chat/session.py @@ -282,25 +282,35 @@ def _call_ai( return render_streaming_response(stream) # Plugin tool-use loop (non-streaming for tool calls, renders final response) - for _iteration in range(10): - response = litellm.completion( + try: + for _iteration in range(10): + response = litellm.completion( + **base_kwargs, + messages=history.build_for_api(), + tools=tools_spec, + tool_choice="auto", + stream=False, + ) + message = response.choices[0].message + + if not message.tool_calls: + return render_text_response(message.content or "") + + history.add_tool_call_message(message) + results = executor.execute_tool_call_batch(message.tool_calls) + for r in results: + history.add_tool_result(r["tool_call_id"], r["result"]) + + return render_text_response("Error: tool-use loop exceeded maximum iterations.") + + except litellm.BadRequestError: + render_info("This model does not support function calling — tools disabled.") + stream = litellm.completion( **base_kwargs, messages=history.build_for_api(), - tools=tools_spec, - tool_choice="auto", - stream=False, + stream=True, ) - message = response.choices[0].message - - if not message.tool_calls: - return render_text_response(message.content or "") - - history.add_tool_call_message(message) - results = executor.execute_tool_call_batch(message.tool_calls) - for r in results: - history.add_tool_result(r["tool_call_id"], r["result"]) - - return render_text_response("Error: tool-use loop exceeded maximum iterations.") + return render_streaming_response(stream) def _show_help(plugin_slash: dict) -> None: