fix(chat): fall back to streaming when provider rejects function calling

Local models (e.g. Gemma on LM Studio) return HTTP 400 when sent a
tools-spec request. Catch litellm.BadRequestError in the tool-use loop,
inform the user once that tools are disabled, and retry as a plain
streaming call so the conversation continues normally.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
curo1305
2026-05-19 00:03:14 +02:00
parent f1213e28c8
commit 0b0cd07330
+26 -16
View File
@@ -282,25 +282,35 @@ def _call_ai(
return render_streaming_response(stream)
# Plugin tool-use loop (non-streaming for tool calls, renders final response)
for _iteration in range(10):
response = litellm.completion(
try:
for _iteration in range(10):
response = litellm.completion(
**base_kwargs,
messages=history.build_for_api(),
tools=tools_spec,
tool_choice="auto",
stream=False,
)
message = response.choices[0].message
if not message.tool_calls:
return render_text_response(message.content or "")
history.add_tool_call_message(message)
results = executor.execute_tool_call_batch(message.tool_calls)
for r in results:
history.add_tool_result(r["tool_call_id"], r["result"])
return render_text_response("Error: tool-use loop exceeded maximum iterations.")
except litellm.BadRequestError:
render_info("This model does not support function calling — tools disabled.")
stream = litellm.completion(
**base_kwargs,
messages=history.build_for_api(),
tools=tools_spec,
tool_choice="auto",
stream=False,
stream=True,
)
message = response.choices[0].message
if not message.tool_calls:
return render_text_response(message.content or "")
history.add_tool_call_message(message)
results = executor.execute_tool_call_batch(message.tool_calls)
for r in results:
history.add_tool_result(r["tool_call_id"], r["result"])
return render_text_response("Error: tool-use loop exceeded maximum iterations.")
return render_streaming_response(stream)
def _show_help(plugin_slash: dict) -> None: