fix(chat): fall back to streaming when provider rejects function calling

Local models (e.g. Gemma on LM Studio) return HTTP 400 when sent a tools-spec request. Catch litellm.BadRequestError in the tool-use loop, inform the user once that tools are disabled, and retry as a plain streaming call so the conversation continues normally. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-19 00:03:14 +02:00
parent f1213e28c8
commit 0b0cd07330
1 changed files with 26 additions and 16 deletions
@@ -282,25 +282,35 @@ def _call_ai(
        return render_streaming_response(stream)
    # Plugin tool-use loop (non-streaming for tool calls, renders final response)
-    for _iteration in range(10):
+    try:
-        response = litellm.completion(
+        for _iteration in range(10):
            response = litellm.completion(
                **base_kwargs,
                messages=history.build_for_api(),
                tools=tools_spec,
                tool_choice="auto",
                stream=False,
            )
            message = response.choices[0].message
            if not message.tool_calls:
                return render_text_response(message.content or "")
            history.add_tool_call_message(message)
            results = executor.execute_tool_call_batch(message.tool_calls)
            for r in results:
                history.add_tool_result(r["tool_call_id"], r["result"])
        return render_text_response("Error: tool-use loop exceeded maximum iterations.")
    except litellm.BadRequestError:
        render_info("This model does not support function calling — tools disabled.")
        stream = litellm.completion(
            **base_kwargs,
            messages=history.build_for_api(),
-            tools=tools_spec,
+            stream=True,
            tool_choice="auto",
            stream=False,
        )
-        message = response.choices[0].message
+        return render_streaming_response(stream)
        if not message.tool_calls:
            return render_text_response(message.content or "")
        history.add_tool_call_message(message)
        results = executor.execute_tool_call_batch(message.tool_calls)
        for r in results:
            history.add_tool_result(r["tool_call_id"], r["result"])
    return render_text_response("Error: tool-use loop exceeded maximum iterations.")
 def _show_help(plugin_slash: dict) -> None: