zeroclaw_tool_call_parser/
lib.rs

1//! Tool call parsing for LLM responses.
2//!
3//! Extracts structured tool calls from free-text LLM output. Handles a dozen
4//! different formats: JSON, XML `<tool_call>` tags, GLM-style shortened syntax,
5//! MiniMax `<invoke>` blocks, Perl-style `[TOOL_CALL]` blocks, markdown fences,
6//! OpenAI native format, and more.
7//!
8//! This crate has no dependency on agent state, memory, model_providers, or channels.
9//! It is pure text transformation.
10
11use regex::Regex;
12use std::{collections::HashSet, sync::LazyLock};
13
14/// A single parsed tool call extracted from LLM output.
15#[derive(Debug, Clone)]
16pub struct ParsedToolCall {
17    pub name: String,
18    pub arguments: serde_json::Value,
19    pub tool_call_id: Option<String>,
20}
21
22/// Internal tool protocol envelope variants that must not be treated as
23/// user-visible channel text.
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum ToolProtocolEnvelopeKind {
26    ToolCalls,
27    ToolCallsAlias,
28    FunctionCall,
29    ToolResult,
30    ResponsesFunctionCall,
31    TaggedToolCall,
32}
33
34fn parse_arguments_value(raw: Option<&serde_json::Value>) -> serde_json::Value {
35    let initial = match raw {
36        Some(serde_json::Value::String(s)) => serde_json::from_str::<serde_json::Value>(s)
37            .unwrap_or_else(|_| serde_json::Value::Object(serde_json::Map::new())),
38        Some(value) => value.clone(),
39        None => serde_json::Value::Object(serde_json::Map::new()),
40    };
41    unwrap_nested_json_strings(initial)
42}
43
44/// Recursively unwrap stringified JSON objects/arrays nested inside tool arguments.
45/// Why: Gemini (and some other model_providers) sometimes double-encode nested object/array
46/// parameters as JSON strings inside the outer arguments payload, which breaks tools
47/// that expect `Value::Object` / `Value::Array` at those positions.
48fn unwrap_nested_json_strings(value: serde_json::Value) -> serde_json::Value {
49    match value {
50        serde_json::Value::Object(map) => {
51            let mut out = serde_json::Map::with_capacity(map.len());
52            for (k, v) in map {
53                out.insert(k, unwrap_nested_json_strings(v));
54            }
55            serde_json::Value::Object(out)
56        }
57        serde_json::Value::Array(items) => {
58            serde_json::Value::Array(items.into_iter().map(unwrap_nested_json_strings).collect())
59        }
60        serde_json::Value::String(s) => {
61            let trimmed = s.trim_start();
62            if trimmed.starts_with('{') || trimmed.starts_with('[') {
63                match serde_json::from_str::<serde_json::Value>(&s) {
64                    Ok(parsed) => unwrap_nested_json_strings(parsed),
65                    Err(_) => serde_json::Value::String(s),
66                }
67            } else {
68                serde_json::Value::String(s)
69            }
70        }
71        other => other,
72    }
73}
74
75fn parse_tool_call_id(
76    root: &serde_json::Value,
77    function: Option<&serde_json::Value>,
78) -> Option<String> {
79    function
80        .and_then(|func| func.get("id"))
81        .or_else(|| root.get("id"))
82        .or_else(|| root.get("tool_call_id"))
83        .or_else(|| root.get("call_id"))
84        .and_then(serde_json::Value::as_str)
85        .map(str::trim)
86        .filter(|id| !id.is_empty())
87        .map(ToString::to_string)
88}
89
90pub fn canonicalize_json_for_tool_signature(value: &serde_json::Value) -> serde_json::Value {
91    match value {
92        serde_json::Value::Object(map) => {
93            let mut keys: Vec<String> = map.keys().cloned().collect();
94            keys.sort_unstable();
95            let mut ordered = serde_json::Map::new();
96            for key in keys {
97                if let Some(child) = map.get(&key) {
98                    ordered.insert(key, canonicalize_json_for_tool_signature(child));
99                }
100            }
101            serde_json::Value::Object(ordered)
102        }
103        serde_json::Value::Array(items) => serde_json::Value::Array(
104            items
105                .iter()
106                .map(canonicalize_json_for_tool_signature)
107                .collect(),
108        ),
109        _ => value.clone(),
110    }
111}
112
113fn parse_tool_call_value(value: &serde_json::Value) -> Option<ParsedToolCall> {
114    if let Some(function) = value.get("function") {
115        let tool_call_id = parse_tool_call_id(value, Some(function));
116        let raw_name = function
117            .get("name")
118            .and_then(|v| v.as_str())
119            .unwrap_or("")
120            .trim();
121        let name = map_tool_name_alias(raw_name).to_string();
122        if !name.is_empty() {
123            let arguments = parse_arguments_value(
124                function
125                    .get("arguments")
126                    .or_else(|| function.get("parameters")),
127            );
128            return Some(ParsedToolCall {
129                name,
130                arguments,
131                tool_call_id,
132            });
133        }
134    }
135
136    let tool_call_id = parse_tool_call_id(value, None);
137    let raw_name = value
138        .get("name")
139        .and_then(|v| v.as_str())
140        .unwrap_or("")
141        .trim();
142    let name = map_tool_name_alias(raw_name).to_string();
143
144    if name.is_empty() {
145        return None;
146    }
147
148    let arguments =
149        parse_arguments_value(value.get("arguments").or_else(|| value.get("parameters")));
150    Some(ParsedToolCall {
151        name,
152        arguments,
153        tool_call_id,
154    })
155}
156
157fn parse_tool_calls_from_json_value(value: &serde_json::Value) -> Vec<ParsedToolCall> {
158    let mut calls = Vec::new();
159
160    if let Some(tool_calls) = value.get("tool_calls").and_then(|v| v.as_array()) {
161        for call in tool_calls {
162            if let Some(parsed) = parse_tool_call_value(call) {
163                calls.push(parsed);
164            }
165        }
166
167        if !calls.is_empty() {
168            return calls;
169        }
170    }
171
172    if let Some(array) = value.as_array() {
173        for item in array {
174            if let Some(parsed) = parse_tool_call_value(item) {
175                calls.push(parsed);
176            }
177        }
178        return calls;
179    }
180
181    if let Some(parsed) = parse_tool_call_value(value) {
182        calls.push(parsed);
183    }
184
185    calls
186}
187
188fn has_non_empty_string(value: &serde_json::Value, key: &str) -> bool {
189    value
190        .get(key)
191        .and_then(serde_json::Value::as_str)
192        .is_some_and(|s| !s.trim().is_empty())
193}
194
195fn has_arguments_signal(value: &serde_json::Value) -> bool {
196    value.get("arguments").is_some() || value.get("parameters").is_some()
197}
198
199fn looks_like_tool_call_object(value: &serde_json::Value) -> bool {
200    if let Some(function) = value.get("function").and_then(serde_json::Value::as_object) {
201        let function = serde_json::Value::Object(function.clone());
202        return has_non_empty_string(&function, "name") && has_arguments_signal(&function);
203    }
204
205    has_non_empty_string(value, "name") && has_arguments_signal(value)
206}
207
208fn tool_call_array_has_protocol_shape(value: &serde_json::Value, key: &str) -> bool {
209    value
210        .get(key)
211        .and_then(serde_json::Value::as_array)
212        .is_some_and(|items| !items.is_empty() && items.iter().any(looks_like_tool_call_object))
213}
214
215fn has_tool_protocol_object_signal(value: &serde_json::Value) -> bool {
216    let Some(object) = value.as_object() else {
217        return false;
218    };
219
220    let has_args = has_arguments_signal(value);
221    let has_call_id = has_non_empty_string(value, "id")
222        || has_non_empty_string(value, "call_id")
223        || has_non_empty_string(value, "tool_call_id");
224
225    object
226        .get("function")
227        .and_then(serde_json::Value::as_object)
228        .is_some()
229        || (has_non_empty_string(value, "name") && has_args)
230        || (has_args && has_call_id)
231}
232
233fn tool_call_array_has_malformed_protocol_signal(value: &serde_json::Value, key: &str) -> bool {
234    value
235        .get(key)
236        .and_then(serde_json::Value::as_array)
237        .is_some_and(|items| !items.is_empty() && items.iter().any(has_tool_protocol_object_signal))
238}
239
240fn classify_tool_protocol_json_value(
241    value: &serde_json::Value,
242) -> Option<ToolProtocolEnvelopeKind> {
243    if value
244        .get("type")
245        .and_then(serde_json::Value::as_str)
246        .is_some_and(|ty| ty == "function_call")
247        && has_non_empty_string(value, "name")
248        && (has_arguments_signal(value) || has_non_empty_string(value, "call_id"))
249    {
250        return Some(ToolProtocolEnvelopeKind::ResponsesFunctionCall);
251    }
252
253    if tool_call_array_has_protocol_shape(value, "tool_calls") {
254        return Some(ToolProtocolEnvelopeKind::ToolCalls);
255    }
256
257    if tool_call_array_has_protocol_shape(value, "toolcalls") {
258        return Some(ToolProtocolEnvelopeKind::ToolCallsAlias);
259    }
260
261    if value
262        .get("function_call")
263        .is_some_and(looks_like_tool_call_object)
264    {
265        return Some(ToolProtocolEnvelopeKind::FunctionCall);
266    }
267
268    if has_non_empty_string(value, "tool_call_id")
269        && (value.get("content").is_some()
270            || value.get("result").is_some()
271            || value.get("output").is_some())
272    {
273        return Some(ToolProtocolEnvelopeKind::ToolResult);
274    }
275
276    None
277}
278
279fn json_value_mentions_known_tool(
280    value: &serde_json::Value,
281    known_tool_names: &HashSet<String>,
282) -> bool {
283    if known_tool_names.is_empty() {
284        return false;
285    }
286
287    let Some(object) = value.as_object() else {
288        return value.as_array().is_some_and(|items| {
289            items
290                .iter()
291                .any(|item| json_value_mentions_known_tool(item, known_tool_names))
292        });
293    };
294
295    let name_matches = |candidate: Option<&serde_json::Value>| {
296        candidate
297            .and_then(serde_json::Value::as_str)
298            .map(str::trim)
299            .filter(|name| !name.is_empty())
300            .is_some_and(|name| known_tool_names.contains(&name.to_ascii_lowercase()))
301    };
302
303    if name_matches(object.get("name")) {
304        return true;
305    }
306
307    if let Some(function) = object
308        .get("function")
309        .and_then(serde_json::Value::as_object)
310    {
311        let function = serde_json::Value::Object(function.clone());
312        if json_value_mentions_known_tool(&function, known_tool_names) {
313            return true;
314        }
315    }
316
317    if let Some(function_call) = object.get("function_call")
318        && json_value_mentions_known_tool(function_call, known_tool_names)
319    {
320        return true;
321    }
322
323    ["tool_calls", "toolcalls"].iter().any(|key| {
324        object
325            .get(*key)
326            .and_then(serde_json::Value::as_array)
327            .is_some_and(|items| {
328                items
329                    .iter()
330                    .any(|item| json_value_mentions_known_tool(item, known_tool_names))
331            })
332    })
333}
334
335pub fn tool_protocol_envelope_mentions_known_tool(
336    text: &str,
337    known_tool_names: &HashSet<String>,
338) -> bool {
339    if known_tool_names.is_empty() {
340        return false;
341    }
342
343    let trimmed = text.trim();
344    if trimmed.is_empty() {
345        return false;
346    }
347
348    if let Some(body) = json_fence_body(trimmed) {
349        return tool_protocol_envelope_mentions_known_tool(body, known_tool_names);
350    }
351
352    if starts_with_tool_protocol_tag_or_fence(trimmed) || contains_tool_protocol_tag_marker(trimmed)
353    {
354        let (_, calls) = parse_tool_calls(trimmed);
355        if calls
356            .iter()
357            .any(|call| known_tool_names.contains(&call.name.to_ascii_lowercase()))
358        {
359            return true;
360        }
361    }
362
363    serde_json::from_str::<serde_json::Value>(trimmed)
364        .is_ok_and(|value| json_value_mentions_known_tool(&value, known_tool_names))
365}
366
367fn has_malformed_tool_protocol_json_signal(value: &serde_json::Value) -> bool {
368    // Empty `tool_calls: []` is a valid strict-provider compatibility case;
369    // similar business JSON must also carry protocol-shaped fields before it
370    // is withheld from user-visible output.
371    tool_call_array_has_malformed_protocol_signal(value, "tool_calls")
372        || tool_call_array_has_malformed_protocol_signal(value, "toolcalls")
373        || value
374            .get("function_call")
375            .is_some_and(has_tool_protocol_object_signal)
376        || (value
377            .get("type")
378            .and_then(serde_json::Value::as_str)
379            .is_some_and(|ty| ty == "function_call")
380            && (has_non_empty_string(value, "name")
381                || has_non_empty_string(value, "call_id")
382                || has_arguments_signal(value)))
383        || (has_non_empty_string(value, "tool_call_id")
384            && (value.get("content").is_some()
385                || value.get("result").is_some()
386                || value.get("output").is_some()))
387}
388
389fn starts_with_tool_protocol_tag_or_fence(text: &str) -> bool {
390    let lower = text.trim_start().to_ascii_lowercase();
391    lower.starts_with("<tool_call")
392        || lower.starts_with("<toolcall")
393        || lower.starts_with("<tool-call")
394        || lower.starts_with("<invoke")
395        || lower.starts_with("<functioncall")
396        || lower.starts_with("<function_call")
397        || starts_with_tool_protocol_fence_lower(&lower)
398        || lower.starts_with("[tool_call]")
399}
400
401fn starts_with_tool_protocol_fence(text: &str) -> bool {
402    let lower = text.trim_start().to_ascii_lowercase();
403    starts_with_tool_protocol_fence_lower(&lower)
404}
405
406fn starts_with_tool_protocol_fence_lower(lower: &str) -> bool {
407    lower.starts_with("```tool_call")
408        || lower.starts_with("```toolcall")
409        || lower.starts_with("```tool-call")
410        || lower.starts_with("```invoke")
411        || starts_with_tool_name_fence_lower(lower)
412}
413
414fn starts_with_tool_name_fence_lower(lower: &str) -> bool {
415    let Some(rest) = lower.strip_prefix("```tool") else {
416        return false;
417    };
418    matches!(rest.chars().next(), Some(c) if c.is_whitespace() && c != '\n' && c != '\r')
419}
420
421fn contains_tool_protocol_tag_marker(text: &str) -> bool {
422    let lower = text.to_ascii_lowercase();
423    lower.contains("<tool_call")
424        || lower.contains("<toolcall")
425        || lower.contains("<tool-call")
426        || lower.contains("<invoke")
427        || lower.contains("<functioncall")
428        || lower.contains("<function_call")
429        || lower.contains("```tool_call")
430        || lower.contains("```toolcall")
431        || lower.contains("```tool-call")
432        || lower.contains("```invoke")
433        || lower.contains("```tool ")
434        || lower.contains("[tool_call]")
435}
436
437pub fn looks_like_tool_protocol_example(text: &str) -> bool {
438    let trimmed = text.trim();
439    if trimmed.is_empty() {
440        return false;
441    }
442
443    if let Some((body, visible_text)) = leading_json_fence_body_and_trailing_text(trimmed)
444        && classify_tool_protocol_envelope(body).is_some()
445        && has_example_context(visible_text)
446    {
447        return true;
448    }
449
450    if starts_with_tool_protocol_fence(trimmed) || contains_tool_protocol_tag_marker(trimmed) {
451        let (visible_text, calls) = parse_tool_calls(trimmed);
452        if !calls.is_empty() && has_example_context(&visible_text) {
453            return true;
454        }
455    }
456
457    false
458}
459
460fn has_example_context(text: &str) -> bool {
461    let lower = text.to_ascii_lowercase();
462    lower.contains("example")
463        || lower.contains("sample")
464        || lower.contains("示例")
465        // Common Chinese "for example" / "sample" markers. We keep this list
466        // intentionally small to avoid accidentally exempting real protocol leaks.
467        || lower.contains("例如")
468        || lower.contains("比如")
469        || lower.contains("举例")
470        || lower.contains("例子")
471        || lower.contains("比方说")
472        || lower.contains("譬如")
473}
474
475fn leading_json_fence_body_and_trailing_text(trimmed: &str) -> Option<(&str, &str)> {
476    let rest = trimmed.strip_prefix("```")?;
477    let first_newline = rest.find('\n')?;
478    let language = rest[..first_newline].trim().trim_end_matches('\r');
479    if !language.eq_ignore_ascii_case("json") {
480        return None;
481    }
482
483    let body_with_close = &rest[first_newline + 1..];
484    let close_start = body_with_close.find("```")?;
485    let body = body_with_close[..close_start].trim();
486    let trailing = body_with_close[close_start + 3..].trim();
487    (!body.is_empty() && !trailing.is_empty()).then_some((body, trailing))
488}
489
490pub fn contains_tool_protocol_tag_call(text: &str) -> bool {
491    if !contains_tool_protocol_tag_marker(text) || looks_like_tool_protocol_example(text) {
492        return false;
493    }
494
495    let (_, calls) = parse_tool_calls(text);
496    !calls.is_empty()
497}
498
499fn classify_tagged_tool_protocol_envelope(text: &str) -> Option<ToolProtocolEnvelopeKind> {
500    if !starts_with_tool_protocol_tag_or_fence(text) {
501        return None;
502    }
503    if looks_like_tool_protocol_example(text) {
504        return None;
505    }
506
507    let is_fence = starts_with_tool_protocol_fence(text);
508    let (visible_text, calls) = parse_tool_calls(text);
509    (!calls.is_empty() && (is_fence || visible_text.trim().is_empty()))
510        .then_some(ToolProtocolEnvelopeKind::TaggedToolCall)
511}
512
513fn looks_like_malformed_tagged_tool_protocol_envelope(text: &str) -> bool {
514    if !starts_with_tool_protocol_tag_or_fence(text) {
515        return false;
516    }
517    if looks_like_tool_protocol_example(text) {
518        return false;
519    }
520
521    let (visible_text, calls) = parse_tool_calls(text);
522    if !calls.is_empty() || !visible_text.trim().is_empty() {
523        return false;
524    }
525
526    let lower = text.to_ascii_lowercase();
527    lower.contains("arguments")
528        || lower.contains("parameters")
529        || lower.contains("function")
530        || lower.contains("name")
531        || lower.contains("call_id")
532        || lower.contains("tool_call_id")
533}
534
535fn has_malformed_tool_protocol_text_signal(text: &str) -> bool {
536    let trimmed = text.trim_start();
537    let lower = trimmed.to_ascii_lowercase();
538    let json_like =
539        trimmed.starts_with('{') || trimmed.starts_with('[') || lower.starts_with("```json");
540    if !json_like {
541        return false;
542    }
543
544    // Malformed text cannot be parsed into a Value, so keep the tool-result
545    // signal close to the valid-envelope shape to avoid business JSON false positives.
546    let has_tool_result_shape = text.contains("\"tool_call_id\"")
547        && (text.contains("\"content\"")
548            || text.contains("\"result\"")
549            || text.contains("\"output\""));
550    let has_protocol_container = text.contains("\"tool_calls\"")
551        || text.contains("\"toolcalls\"")
552        || text.contains("\"function_call\"");
553    let has_arguments = text.contains("\"arguments\"") || text.contains("\"parameters\"");
554    let has_call_id = text.contains("\"call_id\"") || text.contains("\"tool_call_id\"");
555
556    has_tool_result_shape || (has_protocol_container && has_arguments && has_call_id)
557}
558
559fn malformed_text_mentions_known_tool(text: &str, known_tool_names: &HashSet<String>) -> bool {
560    if known_tool_names.is_empty() {
561        return false;
562    }
563
564    static JSON_NAME_FIELD_RE: LazyLock<Regex> =
565        LazyLock::new(|| Regex::new(r#""name"\s*:\s*"([^"]+)""#).unwrap());
566
567    JSON_NAME_FIELD_RE.captures_iter(text).any(|cap| {
568        cap.get(1)
569            .map(|name| name.as_str().trim().to_ascii_lowercase())
570            .is_some_and(|name| known_tool_names.contains(&name))
571    })
572}
573
574fn has_malformed_tool_protocol_text_signal_for_known_tools(
575    text: &str,
576    known_tool_names: &HashSet<String>,
577) -> bool {
578    if has_malformed_tool_protocol_text_signal(text) {
579        return true;
580    }
581
582    let trimmed = text.trim_start();
583    let lower = trimmed.to_ascii_lowercase();
584    let json_like =
585        trimmed.starts_with('{') || trimmed.starts_with('[') || lower.starts_with("```json");
586    if !json_like {
587        return false;
588    }
589
590    let has_protocol_container = text.contains("\"tool_calls\"")
591        || text.contains("\"toolcalls\"")
592        || text.contains("\"function_call\"");
593    let has_arguments = text.contains("\"arguments\"") || text.contains("\"parameters\"");
594
595    has_protocol_container
596        && has_arguments
597        && malformed_text_mentions_known_tool(text, known_tool_names)
598}
599
600fn json_fence_body(trimmed: &str) -> Option<&str> {
601    let rest = trimmed.strip_prefix("```")?;
602    let first_newline = rest.find('\n')?;
603    let language = rest[..first_newline].trim().trim_end_matches('\r');
604    if !language.eq_ignore_ascii_case("json") {
605        return None;
606    }
607
608    let body_with_close = &rest[first_newline + 1..];
609    let close_start = body_with_close.rfind("```")?;
610    if !body_with_close[close_start + 3..].trim().is_empty() {
611        return None;
612    }
613    Some(body_with_close[..close_start].trim())
614}
615
616pub fn classify_tool_protocol_envelope(text: &str) -> Option<ToolProtocolEnvelopeKind> {
617    let trimmed = text.trim();
618    if trimmed.is_empty() {
619        return None;
620    }
621
622    if let Some(kind) = classify_tagged_tool_protocol_envelope(trimmed) {
623        return Some(kind);
624    }
625
626    if let Some(body) = json_fence_body(trimmed) {
627        return classify_tool_protocol_envelope(body);
628    }
629
630    let value = serde_json::from_str::<serde_json::Value>(trimmed).ok()?;
631    classify_tool_protocol_json_value(&value)
632}
633
634pub fn looks_like_tool_protocol_envelope(text: &str) -> bool {
635    let trimmed = text.trim();
636    if trimmed.is_empty() {
637        return false;
638    }
639
640    if classify_tool_protocol_envelope(trimmed).is_some() {
641        return true;
642    }
643
644    if let Some(body) = json_fence_body(trimmed) {
645        return looks_like_tool_protocol_envelope(body);
646    }
647
648    serde_json::from_str::<serde_json::Value>(trimmed)
649        .is_ok_and(|value| has_malformed_tool_protocol_json_signal(&value))
650}
651
652pub fn looks_like_malformed_tool_protocol_envelope(text: &str) -> bool {
653    let trimmed = text.trim();
654    if looks_like_tool_protocol_example(trimmed) {
655        return false;
656    }
657
658    if looks_like_malformed_tagged_tool_protocol_envelope(trimmed) {
659        return true;
660    }
661
662    let lower = trimmed.to_ascii_lowercase();
663    let json_like =
664        trimmed.starts_with('{') || trimmed.starts_with('[') || lower.starts_with("```json");
665    if trimmed.is_empty() || !json_like {
666        return false;
667    }
668
669    if let Some(body) = json_fence_body(trimmed) {
670        return looks_like_malformed_tool_protocol_envelope(body);
671    }
672
673    if serde_json::from_str::<serde_json::Value>(trimmed).is_ok() {
674        return false;
675    }
676
677    has_malformed_tool_protocol_text_signal(trimmed)
678}
679
680pub fn looks_like_malformed_tool_protocol_envelope_for_known_tools(
681    text: &str,
682    known_tool_names: &HashSet<String>,
683) -> bool {
684    let trimmed = text.trim();
685    if looks_like_tool_protocol_example(trimmed) {
686        return false;
687    }
688
689    if looks_like_malformed_tool_protocol_envelope(trimmed) {
690        return true;
691    }
692
693    let lower = trimmed.to_ascii_lowercase();
694    let json_like =
695        trimmed.starts_with('{') || trimmed.starts_with('[') || lower.starts_with("```json");
696    if trimmed.is_empty() || !json_like {
697        return false;
698    }
699
700    if let Some(body) = json_fence_body(trimmed) {
701        return looks_like_malformed_tool_protocol_envelope_for_known_tools(body, known_tool_names);
702    }
703
704    if serde_json::from_str::<serde_json::Value>(trimmed).is_ok() {
705        return false;
706    }
707
708    has_malformed_tool_protocol_text_signal_for_known_tools(trimmed, known_tool_names)
709}
710
711fn is_xml_meta_tag(tag: &str) -> bool {
712    let normalized = tag.to_ascii_lowercase();
713    matches!(
714        normalized.as_str(),
715        "tool_call"
716            | "toolcall"
717            | "tool-call"
718            | "invoke"
719            | "thinking"
720            | "thought"
721            | "analysis"
722            | "reasoning"
723            | "reflection"
724    )
725}
726
727/// Match opening XML tags: `<tag_name>`.  Does NOT use backreferences.
728static XML_OPEN_TAG_RE: LazyLock<Regex> =
729    LazyLock::new(|| Regex::new(r"<([a-zA-Z_][a-zA-Z0-9_-]*)>").unwrap());
730
731/// MiniMax XML invoke format:
732/// `<invoke name="shell"><parameter name="command">pwd</parameter></invoke>`
733static MINIMAX_INVOKE_RE: LazyLock<Regex> = LazyLock::new(|| {
734    Regex::new(r#"(?is)<invoke\b[^>]*\bname\s*=\s*(?:"([^"]+)"|'([^']+)')[^>]*>(.*?)</invoke>"#)
735        .unwrap()
736});
737
738static MINIMAX_PARAMETER_RE: LazyLock<Regex> = LazyLock::new(|| {
739    Regex::new(
740        r#"(?is)<parameter\b[^>]*\bname\s*=\s*(?:"([^"]+)"|'([^']+)')[^>]*>(.*?)</parameter>"#,
741    )
742    .unwrap()
743});
744
745/// Extracts all `<tag>…</tag>` pairs from `input`, returning `(tag_name, inner_content)`.
746/// Handles matching closing tags without regex backreferences.
747fn extract_xml_pairs(input: &str) -> Vec<(&str, &str)> {
748    let mut results = Vec::new();
749    let mut search_start = 0;
750    while let Some(open_cap) = XML_OPEN_TAG_RE.captures(&input[search_start..]) {
751        let full_open = open_cap.get(0).unwrap();
752        let tag_name = open_cap.get(1).unwrap().as_str();
753        let open_end = search_start + full_open.end();
754
755        let closing_tag = format!("</{tag_name}>");
756        if let Some(close_pos) = input[open_end..].find(&closing_tag) {
757            let inner = &input[open_end..open_end + close_pos];
758            results.push((tag_name, inner.trim()));
759            search_start = open_end + close_pos + closing_tag.len();
760        } else {
761            search_start = open_end;
762        }
763    }
764    results
765}
766
767/// Parse XML-style tool calls in `<tool_call>` bodies.
768/// Supports both nested argument tags and JSON argument payloads:
769/// - `<memory_recall><query>...</query></memory_recall>`
770/// - `<shell>{"command":"pwd"}</shell>`
771fn parse_xml_tool_calls(xml_content: &str) -> Option<Vec<ParsedToolCall>> {
772    let mut calls = Vec::new();
773    let trimmed = xml_content.trim();
774
775    if !trimmed.starts_with('<') || !trimmed.contains('>') {
776        return None;
777    }
778
779    for (tool_name_str, inner_content) in extract_xml_pairs(trimmed) {
780        let tool_name = tool_name_str.to_string();
781        if is_xml_meta_tag(&tool_name) {
782            continue;
783        }
784
785        if inner_content.is_empty() {
786            continue;
787        }
788
789        let mut args = serde_json::Map::new();
790
791        if let Some(first_json) = extract_json_values(inner_content).into_iter().next() {
792            match first_json {
793                serde_json::Value::Object(object_args) => {
794                    args = object_args;
795                }
796                other => {
797                    args.insert("value".to_string(), other);
798                }
799            }
800        } else {
801            for (key_str, value) in extract_xml_pairs(inner_content) {
802                let key = key_str.to_string();
803                if is_xml_meta_tag(&key) {
804                    continue;
805                }
806                if !value.is_empty() {
807                    args.insert(key, serde_json::Value::String(value.to_string()));
808                }
809            }
810
811            if args.is_empty() {
812                args.insert(
813                    "content".to_string(),
814                    serde_json::Value::String(inner_content.to_string()),
815                );
816            }
817        }
818
819        calls.push(ParsedToolCall {
820            name: tool_name,
821            arguments: serde_json::Value::Object(args),
822            tool_call_id: None,
823        });
824    }
825
826    if calls.is_empty() { None } else { Some(calls) }
827}
828
829/// Parse MiniMax-style XML tool calls with attributed invoke/parameter tags.
830fn parse_minimax_invoke_calls(response: &str) -> Option<(String, Vec<ParsedToolCall>)> {
831    let mut calls = Vec::new();
832    let mut text_parts = Vec::new();
833    let mut last_end = 0usize;
834
835    for cap in MINIMAX_INVOKE_RE.captures_iter(response) {
836        let Some(full_match) = cap.get(0) else {
837            continue;
838        };
839
840        let before = response[last_end..full_match.start()].trim();
841        if !before.is_empty() {
842            text_parts.push(before.to_string());
843        }
844
845        let name = cap
846            .get(1)
847            .or_else(|| cap.get(2))
848            .map(|m| m.as_str().trim())
849            .filter(|v| !v.is_empty());
850        let body = cap.get(3).map(|m| m.as_str()).unwrap_or("").trim();
851        last_end = full_match.end();
852
853        let Some(name) = name else {
854            continue;
855        };
856
857        let mut args = serde_json::Map::new();
858        for param_cap in MINIMAX_PARAMETER_RE.captures_iter(body) {
859            let key = param_cap
860                .get(1)
861                .or_else(|| param_cap.get(2))
862                .map(|m| m.as_str().trim())
863                .unwrap_or_default();
864            if key.is_empty() {
865                continue;
866            }
867            let value = param_cap
868                .get(3)
869                .map(|m| m.as_str().trim())
870                .unwrap_or_default();
871            if value.is_empty() {
872                continue;
873            }
874
875            let parsed = extract_json_values(value).into_iter().next();
876            args.insert(
877                key.to_string(),
878                parsed.unwrap_or_else(|| serde_json::Value::String(value.to_string())),
879            );
880        }
881
882        if args.is_empty() {
883            if let Some(first_json) = extract_json_values(body).into_iter().next() {
884                match first_json {
885                    serde_json::Value::Object(obj) => args = obj,
886                    other => {
887                        args.insert("value".to_string(), other);
888                    }
889                }
890            } else if !body.is_empty() {
891                args.insert(
892                    "content".to_string(),
893                    serde_json::Value::String(body.to_string()),
894                );
895            }
896        }
897
898        calls.push(ParsedToolCall {
899            name: name.to_string(),
900            arguments: serde_json::Value::Object(args),
901            tool_call_id: None,
902        });
903    }
904
905    if calls.is_empty() {
906        return None;
907    }
908
909    let after = response[last_end..].trim();
910    if !after.is_empty() {
911        text_parts.push(after.to_string());
912    }
913
914    let text = text_parts
915        .join("\n")
916        .replace("<minimax:tool_call>", "")
917        .replace("</minimax:tool_call>", "")
918        .replace("<minimax:toolcall>", "")
919        .replace("</minimax:toolcall>", "")
920        .trim()
921        .to_string();
922
923    Some((text, calls))
924}
925
926const TOOL_CALL_OPEN_TAGS: [&str; 6] = [
927    "<tool_call>",
928    "<toolcall>",
929    "<tool-call>",
930    "<invoke>",
931    "<minimax:tool_call>",
932    "<minimax:toolcall>",
933];
934
935const TOOL_CALL_CLOSE_TAGS: [&str; 6] = [
936    "</tool_call>",
937    "</toolcall>",
938    "</tool-call>",
939    "</invoke>",
940    "</minimax:tool_call>",
941    "</minimax:toolcall>",
942];
943
944fn find_first_tag<'a>(haystack: &str, tags: &'a [&'a str]) -> Option<(usize, &'a str)> {
945    tags.iter()
946        .filter_map(|tag| haystack.find(tag).map(|idx| (idx, *tag)))
947        .min_by_key(|(idx, _)| *idx)
948}
949
950fn extract_first_json_value_with_end(input: &str) -> Option<(serde_json::Value, usize)> {
951    let trimmed = input.trim_start();
952    let trim_offset = input.len().saturating_sub(trimmed.len());
953
954    for (byte_idx, ch) in trimmed.char_indices() {
955        if ch != '{' && ch != '[' {
956            continue;
957        }
958
959        let slice = &trimmed[byte_idx..];
960        let mut stream = serde_json::Deserializer::from_str(slice).into_iter::<serde_json::Value>();
961        if let Some(Ok(value)) = stream.next() {
962            let consumed = stream.byte_offset();
963            if consumed > 0 {
964                return Some((value, trim_offset + byte_idx + consumed));
965            }
966        }
967    }
968
969    None
970}
971
972fn strip_leading_close_tags(mut input: &str) -> &str {
973    loop {
974        let trimmed = input.trim_start();
975        if !trimmed.starts_with("</") {
976            return trimmed;
977        }
978
979        let Some(close_end) = trimmed.find('>') else {
980            return "";
981        };
982        input = &trimmed[close_end + 1..];
983    }
984}
985
986/// Extract JSON values from a string.
987///
988/// # Security Warning
989///
990/// This function extracts ANY JSON objects/arrays from the input. It MUST only
991/// be used on content that is already trusted to be from the LLM, such as
992/// content inside `<invoke>` tags where the LLM has explicitly indicated intent
993/// to make a tool call. Do NOT use this on raw user input or content that
994/// could contain prompt injection payloads.
995fn extract_json_values(input: &str) -> Vec<serde_json::Value> {
996    let mut values = Vec::new();
997    let trimmed = input.trim();
998    if trimmed.is_empty() {
999        return values;
1000    }
1001
1002    if let Ok(value) = serde_json::from_str::<serde_json::Value>(trimmed) {
1003        values.push(value);
1004        return values;
1005    }
1006
1007    let char_positions: Vec<(usize, char)> = trimmed.char_indices().collect();
1008    let mut idx = 0;
1009    while idx < char_positions.len() {
1010        let (byte_idx, ch) = char_positions[idx];
1011        if ch == '{' || ch == '[' {
1012            let slice = &trimmed[byte_idx..];
1013            let mut stream =
1014                serde_json::Deserializer::from_str(slice).into_iter::<serde_json::Value>();
1015            if let Some(Ok(value)) = stream.next() {
1016                let consumed = stream.byte_offset();
1017                if consumed > 0 {
1018                    values.push(value);
1019                    let next_byte = byte_idx + consumed;
1020                    while idx < char_positions.len() && char_positions[idx].0 < next_byte {
1021                        idx += 1;
1022                    }
1023                    continue;
1024                }
1025            }
1026        }
1027        idx += 1;
1028    }
1029
1030    values
1031}
1032
1033/// Find the end position of a JSON object by tracking balanced braces.
1034fn find_json_end(input: &str) -> Option<usize> {
1035    let trimmed = input.trim_start();
1036    let offset = input.len() - trimmed.len();
1037
1038    if !trimmed.starts_with('{') {
1039        return None;
1040    }
1041
1042    let mut depth = 0;
1043    let mut in_string = false;
1044    let mut escape_next = false;
1045
1046    for (i, ch) in trimmed.char_indices() {
1047        if escape_next {
1048            escape_next = false;
1049            continue;
1050        }
1051
1052        match ch {
1053            '\\' if in_string => escape_next = true,
1054            '"' => in_string = !in_string,
1055            '{' if !in_string => depth += 1,
1056            '}' if !in_string => {
1057                depth -= 1;
1058                if depth == 0 {
1059                    return Some(offset + i + ch.len_utf8());
1060                }
1061            }
1062            _ => {}
1063        }
1064    }
1065
1066    None
1067}
1068
1069/// Parse XML attribute-style tool calls from response text.
1070/// This handles MiniMax and similar model_providers that output:
1071/// ```xml
1072/// <minimax:toolcall>
1073/// <invoke name="shell">
1074/// <parameter name="command">ls</parameter>
1075/// </invoke>
1076/// </minimax:toolcall>
1077/// ```
1078fn parse_xml_attribute_tool_calls(response: &str) -> Vec<ParsedToolCall> {
1079    let mut calls = Vec::new();
1080
1081    // Regex to find <invoke name="toolname">...</invoke> blocks
1082    static INVOKE_RE: LazyLock<Regex> = LazyLock::new(|| {
1083        Regex::new(r#"(?s)<invoke\s+name="([^"]+)"[^>]*>(.*?)</invoke>"#).unwrap()
1084    });
1085
1086    // Regex to find <parameter name="paramname">value</parameter>
1087    static PARAM_RE: LazyLock<Regex> = LazyLock::new(|| {
1088        Regex::new(r#"<parameter\s+name="([^"]+)"[^>]*>([^<]*)</parameter>"#).unwrap()
1089    });
1090
1091    for cap in INVOKE_RE.captures_iter(response) {
1092        let tool_name = cap.get(1).map(|m| m.as_str()).unwrap_or("");
1093        let inner = cap.get(2).map(|m| m.as_str()).unwrap_or("");
1094
1095        if tool_name.is_empty() {
1096            continue;
1097        }
1098
1099        let mut arguments = serde_json::Map::new();
1100
1101        for param_cap in PARAM_RE.captures_iter(inner) {
1102            let param_name = param_cap.get(1).map(|m| m.as_str()).unwrap_or("");
1103            let param_value = param_cap.get(2).map(|m| m.as_str()).unwrap_or("");
1104
1105            if !param_name.is_empty() {
1106                arguments.insert(
1107                    param_name.to_string(),
1108                    serde_json::Value::String(param_value.to_string()),
1109                );
1110            }
1111        }
1112
1113        if !arguments.is_empty() {
1114            calls.push(ParsedToolCall {
1115                name: map_tool_name_alias(tool_name).to_string(),
1116                arguments: serde_json::Value::Object(arguments),
1117                tool_call_id: None,
1118            });
1119        }
1120    }
1121
1122    calls
1123}
1124
1125/// Parse Perl/hash-ref style tool calls from response text.
1126/// This handles formats like:
1127/// ```text
1128/// TOOL_CALL
1129/// {tool => "shell", args => {
1130///   --command "ls -la"
1131///   --description "List current directory contents"
1132/// }}
1133/// /TOOL_CALL
1134/// ```
1135/// Also handles the square bracket variant emitted by models like MiniMax 2.7:
1136/// ```text
1137/// [TOOL_CALL]{tool => "shell", args => {--command "echo hello"}}[/TOOL_CALL]
1138/// ```
1139fn parse_perl_style_tool_calls(response: &str) -> Vec<ParsedToolCall> {
1140    let mut calls = Vec::new();
1141
1142    // Regex to find TOOL_CALL blocks - handle double closing braces }}
1143    // Matches both `TOOL_CALL { ... }} /TOOL_CALL` and `[TOOL_CALL]{ ... }}[/TOOL_CALL]`
1144    static PERL_RE: LazyLock<Regex> = LazyLock::new(|| {
1145        Regex::new(r"(?s)(?:\[TOOL_CALL\]|TOOL_CALL)\s*\{(.+?)\}\}\s*(?:\[/TOOL_CALL\]|/TOOL_CALL)")
1146            .unwrap()
1147    });
1148
1149    // Regex to find tool => "name" in the content
1150    static TOOL_NAME_RE: LazyLock<Regex> =
1151        LazyLock::new(|| Regex::new(r#"tool\s*=>\s*"([^"]+)""#).unwrap());
1152
1153    // Regex to find args => { ... } block.
1154    // The closing brace is optional: in the square bracket variant [TOOL_CALL]{...}}[/TOOL_CALL]
1155    // the outer regex may consume the inner closing brace, so the args content may run to end of string.
1156    static ARGS_BLOCK_RE: LazyLock<Regex> =
1157        LazyLock::new(|| Regex::new(r"(?s)args\s*=>\s*\{(.+?)(?:\}|$)").unwrap());
1158
1159    // Regex to find --key "value" pairs
1160    static ARGS_RE: LazyLock<Regex> =
1161        LazyLock::new(|| Regex::new(r#"--(\w+)\s+"([^"]+)""#).unwrap());
1162
1163    for cap in PERL_RE.captures_iter(response) {
1164        let content = cap.get(1).map(|m| m.as_str()).unwrap_or("");
1165
1166        // Extract tool name
1167        let tool_name = TOOL_NAME_RE
1168            .captures(content)
1169            .and_then(|c| c.get(1))
1170            .map(|m| m.as_str())
1171            .unwrap_or("");
1172
1173        if tool_name.is_empty() {
1174            continue;
1175        }
1176
1177        // Extract args block
1178        let args_block = ARGS_BLOCK_RE
1179            .captures(content)
1180            .and_then(|c| c.get(1))
1181            .map(|m| m.as_str())
1182            .unwrap_or("");
1183
1184        let mut arguments = serde_json::Map::new();
1185
1186        for arg_cap in ARGS_RE.captures_iter(args_block) {
1187            let key = arg_cap.get(1).map(|m| m.as_str()).unwrap_or("");
1188            let value = arg_cap.get(2).map(|m| m.as_str()).unwrap_or("");
1189
1190            if !key.is_empty() {
1191                arguments.insert(
1192                    key.to_string(),
1193                    serde_json::Value::String(value.to_string()),
1194                );
1195            }
1196        }
1197
1198        if !arguments.is_empty() {
1199            calls.push(ParsedToolCall {
1200                name: map_tool_name_alias(tool_name).to_string(),
1201                arguments: serde_json::Value::Object(arguments),
1202                tool_call_id: None,
1203            });
1204        }
1205    }
1206
1207    calls
1208}
1209
1210/// Parse FunctionCall-style tool calls from response text.
1211/// This handles formats like:
1212/// ```text
1213/// <FunctionCall>
1214/// file_read
1215/// <code>path>/Users/kylelampa/Documents/zeroclaw/README.md</code>
1216/// </FunctionCall>
1217/// ```
1218fn parse_function_call_tool_calls(response: &str) -> Vec<ParsedToolCall> {
1219    let mut calls = Vec::new();
1220
1221    // Regex to find <FunctionCall> blocks
1222    static FUNC_RE: LazyLock<Regex> = LazyLock::new(|| {
1223        Regex::new(r"(?s)<FunctionCall>\s*(\w+)\s*<code>([^<]+)</code>\s*</FunctionCall>").unwrap()
1224    });
1225
1226    for cap in FUNC_RE.captures_iter(response) {
1227        let tool_name = cap.get(1).map(|m| m.as_str()).unwrap_or("");
1228        let args_text = cap.get(2).map(|m| m.as_str()).unwrap_or("");
1229
1230        if tool_name.is_empty() {
1231            continue;
1232        }
1233
1234        // Parse key>value pairs (e.g., path>/Users/.../file.txt)
1235        let mut arguments = serde_json::Map::new();
1236        for line in args_text.lines() {
1237            let line = line.trim();
1238            if let Some(pos) = line.find('>') {
1239                let key = line[..pos].trim();
1240                let value = line[pos + 1..].trim();
1241                if !key.is_empty() && !value.is_empty() {
1242                    arguments.insert(
1243                        key.to_string(),
1244                        serde_json::Value::String(value.to_string()),
1245                    );
1246                }
1247            }
1248        }
1249
1250        if !arguments.is_empty() {
1251            calls.push(ParsedToolCall {
1252                name: map_tool_name_alias(tool_name).to_string(),
1253                arguments: serde_json::Value::Object(arguments),
1254                tool_call_id: None,
1255            });
1256        }
1257    }
1258
1259    calls
1260}
1261
1262/// Parse GLM-style tool calls from response text.
1263/// Map tool name aliases from various LLM model_providers to ZeroClaw tool names.
1264/// This handles variations like "fileread" -> "file_read", "bash" -> "shell", etc.
1265fn map_tool_name_alias(tool_name: &str) -> &str {
1266    // Strip any dotted namespace prefix (keep only the final segment).
1267    // Covers Gemini-emitted `default_api.<name>` and `tools.<name>`, plus
1268    // MCP-server-name prefixes like `google_workspace.search_gmail_messages`
1269    // that Gemini-via-OpenRouter also emits when the tool originates from
1270    // an MCP server. The registry is indexed by bare tool name, so we
1271    // normalize by taking the last segment.
1272    let tool_name = tool_name
1273        .rsplit_once('.')
1274        .map(|(_, suffix)| suffix)
1275        .unwrap_or(tool_name);
1276    match tool_name {
1277        // Shell variations (including GLM aliases that map to shell)
1278        "shell" | "bash" | "sh" | "exec" | "command" | "cmd" | "browser_open" | "browser"
1279        | "web_search" => "shell",
1280        // Messaging variations
1281        "send_message" | "sendmessage" => "message_send",
1282        // File tool variations
1283        "fileread" | "file_read" | "readfile" | "read_file" | "file" => "file_read",
1284        "filewrite" | "file_write" | "writefile" | "write_file" => "file_write",
1285        "filelist" | "file_list" | "listfiles" | "list_files" => "file_list",
1286        // Memory variations
1287        "memoryrecall" | "memory_recall" | "recall" | "memrecall" => "memory_recall",
1288        "memorystore" | "memory_store" | "store" | "memstore" => "memory_store",
1289        "memoryforget" | "memory_forget" | "forget" | "memforget" => "memory_forget",
1290        // HTTP variations
1291        "http_request" | "http" | "fetch" | "curl" | "wget" => "http_request",
1292        _ => tool_name,
1293    }
1294}
1295
1296fn build_curl_command(url: &str) -> Option<String> {
1297    if !(url.starts_with("http://") || url.starts_with("https://")) {
1298        return None;
1299    }
1300
1301    if url.chars().any(char::is_whitespace) {
1302        return None;
1303    }
1304
1305    let escaped = url.replace('\'', r#"'\\''"#);
1306    Some(format!("curl -s '{}'", escaped))
1307}
1308
1309fn parse_glm_style_tool_calls(text: &str) -> Vec<(String, serde_json::Value, Option<String>)> {
1310    let mut calls = Vec::new();
1311
1312    for line in text.lines() {
1313        let line = line.trim();
1314        if line.is_empty() {
1315            continue;
1316        }
1317
1318        // Format: tool_name/param>value or tool_name/{json}
1319        if let Some(pos) = line.find('/') {
1320            let tool_part = &line[..pos];
1321            let rest = &line[pos + 1..];
1322
1323            if tool_part.chars().all(|c| c.is_alphanumeric() || c == '_') {
1324                let tool_name = map_tool_name_alias(tool_part);
1325
1326                if let Some(gt_pos) = rest.find('>') {
1327                    let param_name = rest[..gt_pos].trim();
1328                    let value = rest[gt_pos + 1..].trim();
1329
1330                    let arguments = match tool_name {
1331                        "shell" => {
1332                            if param_name == "url" {
1333                                let Some(command) = build_curl_command(value) else {
1334                                    continue;
1335                                };
1336                                serde_json::json!({ "command": command })
1337                            } else if value.starts_with("http://") || value.starts_with("https://")
1338                            {
1339                                if let Some(command) = build_curl_command(value) {
1340                                    serde_json::json!({ "command": command })
1341                                } else {
1342                                    serde_json::json!({ "command": value })
1343                                }
1344                            } else {
1345                                serde_json::json!({ "command": value })
1346                            }
1347                        }
1348                        "http_request" => {
1349                            serde_json::json!({"url": value, "method": "GET"})
1350                        }
1351                        _ => serde_json::json!({ param_name: value }),
1352                    };
1353
1354                    calls.push((tool_name.to_string(), arguments, Some(line.to_string())));
1355                    continue;
1356                }
1357
1358                if rest.starts_with('{')
1359                    && let Ok(json_args) = serde_json::from_str::<serde_json::Value>(rest)
1360                {
1361                    calls.push((tool_name.to_string(), json_args, Some(line.to_string())));
1362                }
1363            }
1364        }
1365    }
1366
1367    calls
1368}
1369
1370/// Return the canonical default parameter name for a tool.
1371///
1372/// When a model emits a shortened call like `shell>uname -a` (without an
1373/// explicit `/param_name`), we need to infer which parameter the value maps
1374/// to. This function encodes the mapping for known ZeroClaw tools.
1375fn default_param_for_tool(tool: &str) -> &'static str {
1376    match tool {
1377        "shell" | "bash" | "sh" | "exec" | "command" | "cmd" => "command",
1378        // All file tools default to "path"
1379        "file_read" | "fileread" | "readfile" | "read_file" | "file" | "file_write"
1380        | "filewrite" | "writefile" | "write_file" | "file_edit" | "fileedit" | "editfile"
1381        | "edit_file" | "file_list" | "filelist" | "listfiles" | "list_files" => "path",
1382        // Memory recall/forget and web search tools all default to "query"
1383        "memory_recall" | "memoryrecall" | "recall" | "memrecall" | "memory_forget"
1384        | "memoryforget" | "forget" | "memforget" | "web_search_tool" | "web_search"
1385        | "websearch" | "search" => "query",
1386        "memory_store" | "memorystore" | "store" | "memstore" => "content",
1387        // HTTP and browser tools default to "url"
1388        "http_request" | "http" | "fetch" | "curl" | "wget" | "browser_open" | "browser" => "url",
1389        _ => "input",
1390    }
1391}
1392
1393/// Parse GLM-style shortened tool call bodies found inside `<tool_call>` tags.
1394///
1395/// Handles three sub-formats that GLM-4.7 emits:
1396///
1397/// 1. **Shortened**: `tool_name>value` — single value mapped via
1398///    [`default_param_for_tool`].
1399/// 2. **YAML-like multi-line**: `tool_name>\nkey: value\nkey: value` — each
1400///    subsequent `key: value` line becomes a parameter.
1401/// 3. **Attribute-style**: `tool_name key="value" [/]>` — XML-like attributes.
1402///
1403/// Returns `None` if the body does not match any of these formats.
1404fn parse_glm_shortened_body(body: &str) -> Option<ParsedToolCall> {
1405    let body = body.trim();
1406    if body.is_empty() {
1407        return None;
1408    }
1409
1410    let function_style = body.find('(').and_then(|open| {
1411        if body.ends_with(')') && open > 0 {
1412            Some((body[..open].trim(), body[open + 1..body.len() - 1].trim()))
1413        } else {
1414            None
1415        }
1416    });
1417
1418    // Check attribute-style FIRST: `tool_name key="value" />`
1419    // Must come before `>` check because `/>` contains `>` and would
1420    // misparse the tool name in the first branch.
1421    let (tool_raw, value_part) = if let Some((tool, args)) = function_style {
1422        (tool, args)
1423    } else if body.contains("=\"") {
1424        // Attribute-style: split at first whitespace to get tool name
1425        let split_pos = body.find(|c: char| c.is_whitespace()).unwrap_or(body.len());
1426        let tool = body[..split_pos].trim();
1427        let attrs = body[split_pos..]
1428            .trim()
1429            .trim_end_matches("/>")
1430            .trim_end_matches('>')
1431            .trim_end_matches('/')
1432            .trim();
1433        (tool, attrs)
1434    } else if let Some(gt_pos) = body.find('>') {
1435        // GLM shortened: `tool_name>value`
1436        let tool = body[..gt_pos].trim();
1437        let value = body[gt_pos + 1..].trim();
1438        // Strip trailing self-close markers that some models emit
1439        let value = value.trim_end_matches("/>").trim_end_matches('/').trim();
1440        (tool, value)
1441    } else {
1442        return None;
1443    };
1444
1445    // Validate tool name: must be alphanumeric + underscore only
1446    let tool_raw = tool_raw.trim_end_matches(|c: char| c.is_whitespace());
1447    if tool_raw.is_empty() || !tool_raw.chars().all(|c| c.is_alphanumeric() || c == '_') {
1448        return None;
1449    }
1450
1451    let tool_name = map_tool_name_alias(tool_raw);
1452
1453    // Try attribute-style: `key="value" key2="value2"`
1454    if value_part.contains("=\"") {
1455        let mut args = serde_json::Map::new();
1456        // Simple attribute parser: key="value" pairs
1457        let mut rest = value_part;
1458        while let Some(eq_pos) = rest.find("=\"") {
1459            let key_start = rest[..eq_pos]
1460                .rfind(|c: char| c.is_whitespace())
1461                .map(|p| p + 1)
1462                .unwrap_or(0);
1463            let key = rest[key_start..eq_pos]
1464                .trim()
1465                .trim_matches(|c: char| c == ',' || c == ';');
1466            let after_quote = &rest[eq_pos + 2..];
1467            if let Some(end_quote) = after_quote.find('"') {
1468                let value = &after_quote[..end_quote];
1469                if !key.is_empty() {
1470                    args.insert(
1471                        key.to_string(),
1472                        serde_json::Value::String(value.to_string()),
1473                    );
1474                }
1475                rest = &after_quote[end_quote + 1..];
1476            } else {
1477                break;
1478            }
1479        }
1480        if !args.is_empty() {
1481            return Some(ParsedToolCall {
1482                name: tool_name.to_string(),
1483                arguments: serde_json::Value::Object(args),
1484                tool_call_id: None,
1485            });
1486        }
1487    }
1488
1489    // Try YAML-style multi-line: each line is `key: value`
1490    if value_part.contains('\n') {
1491        let mut args = serde_json::Map::new();
1492        for line in value_part.lines() {
1493            let line = line.trim();
1494            if line.is_empty() {
1495                continue;
1496            }
1497            if let Some(colon_pos) = line.find(':') {
1498                let key = line[..colon_pos].trim();
1499                let value = line[colon_pos + 1..].trim();
1500                if !key.is_empty() && !value.is_empty() {
1501                    // Normalize boolean-like values
1502                    let json_value = match value {
1503                        "true" | "yes" => serde_json::Value::Bool(true),
1504                        "false" | "no" => serde_json::Value::Bool(false),
1505                        _ => serde_json::Value::String(value.to_string()),
1506                    };
1507                    args.insert(key.to_string(), json_value);
1508                }
1509            }
1510        }
1511        if !args.is_empty() {
1512            return Some(ParsedToolCall {
1513                name: tool_name.to_string(),
1514                arguments: serde_json::Value::Object(args),
1515                tool_call_id: None,
1516            });
1517        }
1518    }
1519
1520    // Single-value shortened: `tool>value`
1521    if !value_part.is_empty() {
1522        let param = default_param_for_tool(tool_raw);
1523        let arguments = match tool_name {
1524            "shell" => {
1525                if value_part.starts_with("http://") || value_part.starts_with("https://") {
1526                    if let Some(cmd) = build_curl_command(value_part) {
1527                        serde_json::json!({ "command": cmd })
1528                    } else {
1529                        serde_json::json!({ "command": value_part })
1530                    }
1531                } else {
1532                    serde_json::json!({ "command": value_part })
1533                }
1534            }
1535            "http_request" => serde_json::json!({"url": value_part, "method": "GET"}),
1536            _ => serde_json::json!({ param: value_part }),
1537        };
1538        return Some(ParsedToolCall {
1539            name: tool_name.to_string(),
1540            arguments,
1541            tool_call_id: None,
1542        });
1543    }
1544
1545    None
1546}
1547
1548// ── Tool-Call Parsing ─────────────────────────────────────────────────────
1549// LLM responses may contain tool calls in multiple formats depending on
1550// the model_provider. Parsing follows a priority chain:
1551//   1. OpenAI-style JSON with `tool_calls` array (native API)
1552//   2. XML tags: <tool_call>, <toolcall>, <tool-call>, <invoke>
1553//   3. Markdown code blocks with `tool_call` language
1554//   4. GLM-style line-based format (e.g. `shell/command>ls`)
1555// SECURITY: We never fall back to extracting arbitrary JSON from the
1556// response body, because that would enable prompt-injection attacks where
1557// malicious content in emails/files/web pages mimics a tool call.
1558
1559/// Parse tool calls from an LLM response that uses XML-style function calling.
1560///
1561/// Expected format (common with system-prompt-guided tool use):
1562/// ```text
1563/// <tool_call>
1564/// {"name": "shell", "arguments": {"command": "ls"}}
1565/// </tool_call>
1566/// ```
1567///
1568/// Also accepts common tag variants (`<toolcall>`, `<tool-call>`) for model
1569/// compatibility.
1570///
1571/// Also supports JSON with `tool_calls` array from OpenAI-format responses.
1572pub fn parse_tool_calls(response: &str) -> (String, Vec<ParsedToolCall>) {
1573    // Strip `<think>...</think>` blocks before parsing.  Qwen and other
1574    // reasoning models embed chain-of-thought inline in the response text;
1575    // these tags can interfere with `<tool_call>` extraction and must be
1576    // removed first.
1577    let cleaned = strip_think_tags(response);
1578    let response = cleaned.as_str();
1579
1580    let mut text_parts = Vec::new();
1581    let mut calls = Vec::new();
1582    let mut remaining = response;
1583
1584    // First, try to parse as OpenAI-style JSON response with tool_calls array
1585    // This handles model_providers like Minimax that return tool_calls in native JSON format
1586    if let Ok(json_value) = serde_json::from_str::<serde_json::Value>(response.trim()) {
1587        calls = parse_tool_calls_from_json_value(&json_value);
1588        if !calls.is_empty() {
1589            // If we found tool_calls, extract any content field as text
1590            if let Some(content) = json_value.get("content").and_then(|v| v.as_str())
1591                && !content.trim().is_empty()
1592            {
1593                text_parts.push(content.trim().to_string());
1594            }
1595            return (text_parts.join("\n"), calls);
1596        }
1597    }
1598
1599    if let Some((minimax_text, minimax_calls)) = parse_minimax_invoke_calls(response)
1600        && !minimax_calls.is_empty()
1601    {
1602        return (minimax_text, minimax_calls);
1603    }
1604
1605    // Fall back to XML-style tool-call tag parsing.
1606    while let Some((start, open_tag)) = find_first_tag(remaining, &TOOL_CALL_OPEN_TAGS) {
1607        // Everything before the tag is text
1608        let before = &remaining[..start];
1609        if !before.trim().is_empty() {
1610            text_parts.push(before.trim().to_string());
1611        }
1612
1613        let Some(close_tag) = (match open_tag {
1614            "<tool_call>" => Some("</tool_call>"),
1615            "<toolcall>" => Some("</toolcall>"),
1616            "<tool-call>" => Some("</tool-call>"),
1617            "<invoke>" => Some("</invoke>"),
1618            "<minimax:tool_call>" => Some("</minimax:tool_call>"),
1619            "<minimax:toolcall>" => Some("</minimax:toolcall>"),
1620            _ => None,
1621        }) else {
1622            break;
1623        };
1624
1625        let after_open = &remaining[start + open_tag.len()..];
1626        if let Some(close_idx) = after_open.find(close_tag) {
1627            let inner = &after_open[..close_idx];
1628            let mut parsed_any = false;
1629
1630            // Try JSON format first
1631            let json_values = extract_json_values(inner);
1632            for value in json_values {
1633                let parsed_calls = parse_tool_calls_from_json_value(&value);
1634                if !parsed_calls.is_empty() {
1635                    parsed_any = true;
1636                    calls.extend(parsed_calls);
1637                }
1638            }
1639
1640            // If JSON parsing failed, try XML format (DeepSeek/GLM style)
1641            if !parsed_any && let Some(xml_calls) = parse_xml_tool_calls(inner) {
1642                calls.extend(xml_calls);
1643                parsed_any = true;
1644            }
1645
1646            if !parsed_any {
1647                // GLM-style shortened body: `shell>uname -a` or `shell\ncommand: date`
1648                if let Some(glm_call) = parse_glm_shortened_body(inner) {
1649                    calls.push(glm_call);
1650                    parsed_any = true;
1651                }
1652            }
1653
1654            if !parsed_any {
1655                ::zeroclaw_log::record!(
1656                    WARN,
1657                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
1658                        .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
1659                    "Malformed <tool_call>: expected tool-call object in tag body (JSON/XML/GLM)"
1660                );
1661            }
1662
1663            remaining = &after_open[close_idx + close_tag.len()..];
1664        } else {
1665            // Matching close tag not found — try cross-alias close tags first.
1666            // Models sometimes mix open/close tag aliases (e.g. <tool_call>...</invoke>).
1667            let mut resolved = false;
1668            if let Some((cross_idx, cross_tag)) = find_first_tag(after_open, &TOOL_CALL_CLOSE_TAGS)
1669            {
1670                let inner = &after_open[..cross_idx];
1671                let mut parsed_any = false;
1672
1673                // Try JSON
1674                let json_values = extract_json_values(inner);
1675                for value in json_values {
1676                    let parsed_calls = parse_tool_calls_from_json_value(&value);
1677                    if !parsed_calls.is_empty() {
1678                        parsed_any = true;
1679                        calls.extend(parsed_calls);
1680                    }
1681                }
1682
1683                // Try XML
1684                if !parsed_any && let Some(xml_calls) = parse_xml_tool_calls(inner) {
1685                    calls.extend(xml_calls);
1686                    parsed_any = true;
1687                }
1688
1689                // Try GLM shortened body
1690                if !parsed_any && let Some(glm_call) = parse_glm_shortened_body(inner) {
1691                    calls.push(glm_call);
1692                    parsed_any = true;
1693                }
1694
1695                if parsed_any {
1696                    remaining = &after_open[cross_idx + cross_tag.len()..];
1697                    resolved = true;
1698                }
1699            }
1700
1701            if resolved {
1702                continue;
1703            }
1704
1705            // No cross-alias close tag resolved — fall back to JSON recovery
1706            // from unclosed tags (brace-balancing).
1707            if let Some(json_end) = find_json_end(after_open)
1708                && let Ok(value) =
1709                    serde_json::from_str::<serde_json::Value>(&after_open[..json_end])
1710            {
1711                let parsed_calls = parse_tool_calls_from_json_value(&value);
1712                if !parsed_calls.is_empty() {
1713                    calls.extend(parsed_calls);
1714                    remaining = strip_leading_close_tags(&after_open[json_end..]);
1715                    continue;
1716                }
1717            }
1718
1719            if let Some((value, consumed_end)) = extract_first_json_value_with_end(after_open) {
1720                let parsed_calls = parse_tool_calls_from_json_value(&value);
1721                if !parsed_calls.is_empty() {
1722                    calls.extend(parsed_calls);
1723                    remaining = strip_leading_close_tags(&after_open[consumed_end..]);
1724                    continue;
1725                }
1726            }
1727
1728            // Last resort: try GLM shortened body on everything after the open tag.
1729            // The model may have emitted `<tool_call>shell>ls` with no close tag at all.
1730            let glm_input = after_open.trim();
1731            if let Some(glm_call) = parse_glm_shortened_body(glm_input) {
1732                calls.push(glm_call);
1733                remaining = "";
1734                continue;
1735            }
1736
1737            remaining = &remaining[start..];
1738            break;
1739        }
1740    }
1741
1742    // If XML tags found nothing, try markdown code blocks with tool_call language.
1743    // Models behind OpenRouter sometimes output ```tool_call ... ``` or hybrid
1744    // ```tool_call ... </tool_call> instead of structured API calls or XML tags.
1745    if calls.is_empty() {
1746        static MD_TOOL_CALL_RE: LazyLock<Regex> = LazyLock::new(|| {
1747            Regex::new(
1748                r"(?s)```(?:tool[_-]?call|invoke)\s*\n(.*?)(?:```|</tool[_-]?call>|</toolcall>|</invoke>|</minimax:toolcall>)",
1749            )
1750            .unwrap()
1751        });
1752        let mut md_text_parts: Vec<String> = Vec::new();
1753        let mut last_end = 0;
1754
1755        for cap in MD_TOOL_CALL_RE.captures_iter(response) {
1756            let full_match = cap.get(0).unwrap();
1757            let before = &response[last_end..full_match.start()];
1758            if !before.trim().is_empty() {
1759                md_text_parts.push(before.trim().to_string());
1760            }
1761            let inner = &cap[1];
1762            let json_values = extract_json_values(inner);
1763            for value in json_values {
1764                let parsed_calls = parse_tool_calls_from_json_value(&value);
1765                calls.extend(parsed_calls);
1766            }
1767            last_end = full_match.end();
1768        }
1769
1770        if !calls.is_empty() {
1771            let after = &response[last_end..];
1772            if !after.trim().is_empty() {
1773                md_text_parts.push(after.trim().to_string());
1774            }
1775            text_parts = md_text_parts;
1776            remaining = "";
1777        }
1778    }
1779
1780    // Try ```tool <name> format used by some model_providers (e.g., xAI grok)
1781    // Example: ```tool file_write\n{"path": "...", "content": "..."}\n```
1782    if calls.is_empty() {
1783        static MD_TOOL_NAME_RE: LazyLock<Regex> =
1784            LazyLock::new(|| Regex::new(r"(?s)```tool\s+(\w+)\s*\n(.*?)(?:```|$)").unwrap());
1785        let mut md_text_parts: Vec<String> = Vec::new();
1786        let mut last_end = 0;
1787
1788        for cap in MD_TOOL_NAME_RE.captures_iter(response) {
1789            let full_match = cap.get(0).unwrap();
1790            let before = &response[last_end..full_match.start()];
1791            if !before.trim().is_empty() {
1792                md_text_parts.push(before.trim().to_string());
1793            }
1794            let tool_name = &cap[1];
1795            let inner = &cap[2];
1796
1797            // Try to parse the inner content as JSON arguments
1798            let json_values = extract_json_values(inner);
1799            if json_values.is_empty() {
1800                // Log a warning if we found a tool block but couldn't parse arguments
1801                ::zeroclaw_log::record!(WARN, ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note).with_outcome(::zeroclaw_log::EventOutcome::Unknown).with_attrs(::serde_json::json!({"tool_name": tool_name, "inner": inner.chars().take(100).collect::<String>()})), "Found ```tool <name> block but could not parse JSON arguments");
1802            } else {
1803                for value in json_values {
1804                    let arguments = if value.is_object() {
1805                        value
1806                    } else {
1807                        serde_json::Value::Object(serde_json::Map::new())
1808                    };
1809                    calls.push(ParsedToolCall {
1810                        name: tool_name.to_string(),
1811                        arguments,
1812                        tool_call_id: None,
1813                    });
1814                }
1815            }
1816            last_end = full_match.end();
1817        }
1818
1819        if !calls.is_empty() {
1820            let after = &response[last_end..];
1821            if !after.trim().is_empty() {
1822                md_text_parts.push(after.trim().to_string());
1823            }
1824            text_parts = md_text_parts;
1825            remaining = "";
1826        }
1827    }
1828
1829    // XML attribute-style tool calls:
1830    // <minimax:toolcall>
1831    // <invoke name="shell">
1832    // <parameter name="command">ls</parameter>
1833    // </invoke>
1834    // </minimax:toolcall>
1835    if calls.is_empty() {
1836        let xml_calls = parse_xml_attribute_tool_calls(remaining);
1837        if !xml_calls.is_empty() {
1838            let mut cleaned_text = remaining.to_string();
1839            for call in xml_calls {
1840                calls.push(call);
1841                // Try to remove the XML from text
1842                if let Some(start) = cleaned_text.find("<minimax:toolcall>")
1843                    && let Some(end) = cleaned_text.find("</minimax:toolcall>")
1844                {
1845                    let end_pos = end + "</minimax:toolcall>".len();
1846                    if end_pos <= cleaned_text.len() {
1847                        cleaned_text =
1848                            format!("{}{}", &cleaned_text[..start], &cleaned_text[end_pos..]);
1849                    }
1850                }
1851            }
1852            if !cleaned_text.trim().is_empty() {
1853                text_parts.push(cleaned_text.trim().to_string());
1854            }
1855            remaining = "";
1856        }
1857    }
1858
1859    // Perl/hash-ref style tool calls:
1860    // TOOL_CALL
1861    // {tool => "shell", args => {
1862    //   --command "ls -la"
1863    //   --description "List current directory contents"
1864    // }}
1865    // /TOOL_CALL
1866    if calls.is_empty() {
1867        let perl_calls = parse_perl_style_tool_calls(remaining);
1868        if !perl_calls.is_empty() {
1869            let mut cleaned_text = remaining.to_string();
1870            for call in perl_calls {
1871                calls.push(call);
1872                // Try to remove the TOOL_CALL block from text
1873                while let Some(start) = cleaned_text.find("TOOL_CALL") {
1874                    if let Some(end) = cleaned_text.find("/TOOL_CALL") {
1875                        let end_pos = end + "/TOOL_CALL".len();
1876                        if end_pos <= cleaned_text.len() {
1877                            cleaned_text =
1878                                format!("{}{}", &cleaned_text[..start], &cleaned_text[end_pos..]);
1879                        }
1880                    } else {
1881                        break;
1882                    }
1883                }
1884            }
1885            if !cleaned_text.trim().is_empty() {
1886                text_parts.push(cleaned_text.trim().to_string());
1887            }
1888            remaining = "";
1889        }
1890    }
1891
1892    // <FunctionCall>
1893    // file_read
1894    // <code>path>/Users/...</code>
1895    // </FunctionCall>
1896    if calls.is_empty() {
1897        let func_calls = parse_function_call_tool_calls(remaining);
1898        if !func_calls.is_empty() {
1899            let mut cleaned_text = remaining.to_string();
1900            for call in func_calls {
1901                calls.push(call);
1902                // Try to remove the FunctionCall block from text
1903                while let Some(start) = cleaned_text.find("<FunctionCall>") {
1904                    if let Some(end) = cleaned_text.find("</FunctionCall>") {
1905                        let end_pos = end + "</FunctionCall>".len();
1906                        if end_pos <= cleaned_text.len() {
1907                            cleaned_text =
1908                                format!("{}{}", &cleaned_text[..start], &cleaned_text[end_pos..]);
1909                        }
1910                    } else {
1911                        break;
1912                    }
1913                }
1914            }
1915            if !cleaned_text.trim().is_empty() {
1916                text_parts.push(cleaned_text.trim().to_string());
1917            }
1918            remaining = "";
1919        }
1920    }
1921
1922    // GLM-style tool calls (browser_open/url>https://..., shell/command>ls, etc.)
1923    if calls.is_empty() {
1924        let glm_calls = parse_glm_style_tool_calls(remaining);
1925        if !glm_calls.is_empty() {
1926            let mut cleaned_text = remaining.to_string();
1927            for (name, args, raw) in &glm_calls {
1928                calls.push(ParsedToolCall {
1929                    name: name.clone(),
1930                    arguments: args.clone(),
1931                    tool_call_id: None,
1932                });
1933                if let Some(r) = raw {
1934                    cleaned_text = cleaned_text.replace(r, "");
1935                }
1936            }
1937            if !cleaned_text.trim().is_empty() {
1938                text_parts.push(cleaned_text.trim().to_string());
1939            }
1940            remaining = "";
1941        }
1942    }
1943
1944    // SECURITY: We do NOT fall back to extracting arbitrary JSON from the response
1945    // here. That would enable prompt injection attacks where malicious content
1946    // (e.g., in emails, files, or web pages) could include JSON that mimics a
1947    // tool call. Tool calls MUST be explicitly wrapped in either:
1948    // 1. OpenAI-style JSON with a "tool_calls" array
1949    // 2. ZeroClaw tool-call tags (<tool_call>, <toolcall>, <tool-call>)
1950    // 3. Markdown code blocks with tool_call/toolcall/tool-call language
1951    // 4. Explicit GLM line-based call formats (e.g. `shell/command>...`)
1952    // This ensures only the LLM's intentional tool calls are executed.
1953
1954    // Remaining text after last tool call
1955    if !remaining.trim().is_empty() {
1956        text_parts.push(remaining.trim().to_string());
1957    }
1958
1959    (text_parts.join("\n"), calls)
1960}
1961
1962/// Remove `<think>...</think>` blocks from model output.
1963/// Qwen and other reasoning models embed chain-of-thought inline in the
1964/// response text using `<think>` tags.  These must be removed before parsing
1965/// tool-call tags or displaying output.
1966pub fn strip_think_tags(s: &str) -> String {
1967    let mut result = String::with_capacity(s.len());
1968    let mut rest = s;
1969    loop {
1970        if let Some(start) = rest.find("<think>") {
1971            result.push_str(&rest[..start]);
1972            if let Some(end) = rest[start..].find("</think>") {
1973                rest = &rest[start + end + "</think>".len()..];
1974            } else {
1975                // Unclosed tag: drop the rest to avoid leaking partial reasoning.
1976                break;
1977            }
1978        } else {
1979            result.push_str(rest);
1980            break;
1981        }
1982    }
1983    result.trim().to_string()
1984}
1985
1986/// Strip prompt-guided tool artifacts from visible output while preserving
1987/// raw model text in history for future turns.
1988pub fn strip_tool_result_blocks(text: &str) -> String {
1989    static TOOL_RESULT_RE: LazyLock<Regex> =
1990        LazyLock::new(|| Regex::new(r"(?s)<tool_result[^>]*>.*?</tool_result>").unwrap());
1991    static THINKING_RE: LazyLock<Regex> =
1992        LazyLock::new(|| Regex::new(r"(?s)<thinking>.*?</thinking>").unwrap());
1993    static THINK_RE: LazyLock<Regex> =
1994        LazyLock::new(|| Regex::new(r"(?s)<think>.*?</think>").unwrap());
1995    static TOOL_RESULTS_PREFIX_RE: LazyLock<Regex> =
1996        LazyLock::new(|| Regex::new(r"(?m)^\[Tool results\]\s*\n?").unwrap());
1997    static EXCESS_BLANK_LINES_RE: LazyLock<Regex> =
1998        LazyLock::new(|| Regex::new(r"\n{3,}").unwrap());
1999
2000    let result = TOOL_RESULT_RE.replace_all(text, "");
2001    let result = THINKING_RE.replace_all(&result, "");
2002    let result = THINK_RE.replace_all(&result, "");
2003    let result = TOOL_RESULTS_PREFIX_RE.replace_all(&result, "");
2004    let result = EXCESS_BLANK_LINES_RE.replace_all(result.trim(), "\n\n");
2005
2006    result.trim().to_string()
2007}
2008
2009pub fn detect_tool_call_parse_issue(
2010    response: &str,
2011    parsed_calls: &[ParsedToolCall],
2012) -> Option<String> {
2013    if !parsed_calls.is_empty() {
2014        return None;
2015    }
2016
2017    let trimmed = response.trim();
2018    if trimmed.is_empty() {
2019        return None;
2020    }
2021
2022    if looks_like_tool_protocol_envelope(trimmed) {
2023        return Some(
2024            "response resembled an internal tool protocol envelope but no valid tool call could be parsed"
2025                .into(),
2026        );
2027    }
2028
2029    if let Ok(value) = serde_json::from_str::<serde_json::Value>(trimmed) {
2030        return has_malformed_tool_protocol_json_signal(&value).then(|| {
2031            "response resembled an internal tool protocol envelope but no valid tool call could be parsed"
2032                .into()
2033        });
2034    }
2035
2036    if has_malformed_tool_protocol_text_signal(trimmed) {
2037        return Some(
2038            "response resembled an internal tool protocol envelope but no valid tool call could be parsed"
2039                .into(),
2040        );
2041    }
2042
2043    let contains_tool_payload_marker = trimmed.contains("<tool_call")
2044        || trimmed.contains("<toolcall")
2045        || trimmed.contains("<tool-call")
2046        || trimmed.contains("```tool_call")
2047        || trimmed.contains("```toolcall")
2048        || trimmed.contains("```tool-call")
2049        || trimmed.contains("```tool file_")
2050        || trimmed.contains("```tool shell")
2051        || trimmed.contains("```tool web_")
2052        || trimmed.contains("```tool memory_")
2053        || trimmed.contains("```tool ") // Generic ```tool <name> pattern
2054        || trimmed.contains("TOOL_CALL")
2055        || trimmed.contains("[TOOL_CALL]")
2056        || trimmed.contains("<FunctionCall>");
2057
2058    if contains_tool_payload_marker {
2059        if looks_like_tool_protocol_example(trimmed) {
2060            return None;
2061        }
2062        if contains_tool_protocol_tag_call(trimmed) {
2063            return Some(
2064                "response resembled a tool-call payload but no valid tool call could be parsed"
2065                    .into(),
2066            );
2067        }
2068
2069        let (visible_text, recovered_calls) = parse_tool_calls(trimmed);
2070        if !recovered_calls.is_empty() && !visible_text.trim().is_empty() {
2071            return None;
2072        }
2073        if !recovered_calls.is_empty() || visible_text.trim().is_empty() {
2074            return Some(
2075                "response resembled a tool-call payload but no valid tool call could be parsed"
2076                    .into(),
2077            );
2078        }
2079    }
2080
2081    if looks_like_malformed_tool_protocol_envelope(trimmed) {
2082        Some("response resembled a tool-call payload but no valid tool call could be parsed".into())
2083    } else {
2084        None
2085    }
2086}
2087
2088pub fn build_native_assistant_history_from_parsed_calls(
2089    text: &str,
2090    tool_calls: &[ParsedToolCall],
2091    reasoning_content: Option<&str>,
2092) -> Option<String> {
2093    // Strict provider validators (DeepSeek V4, NVIDIA NIM, ...) reject
2094    // assistant messages that carry `tool_calls: []`. When there are no
2095    // parsed calls, return None so the caller falls through to a plain
2096    // text assistant message. See #6298.
2097    if tool_calls.is_empty() {
2098        return None;
2099    }
2100
2101    let calls_json = tool_calls
2102        .iter()
2103        .map(|tc| {
2104            Some(serde_json::json!({
2105                "id": tc.tool_call_id.clone()?,
2106                "name": tc.name,
2107                "arguments": serde_json::to_string(&tc.arguments).unwrap_or_else(|_| "{}".to_string()),
2108            }))
2109        })
2110        .collect::<Option<Vec<_>>>()?;
2111
2112    let content = if text.trim().is_empty() {
2113        serde_json::Value::Null
2114    } else {
2115        serde_json::Value::String(text.trim().to_string())
2116    };
2117
2118    let mut obj = serde_json::json!({
2119        "content": content,
2120        "tool_calls": calls_json,
2121    });
2122
2123    if let Some(rc) = reasoning_content {
2124        obj.as_object_mut().unwrap().insert(
2125            "reasoning_content".to_string(),
2126            serde_json::Value::String(rc.to_string()),
2127        );
2128    }
2129
2130    Some(obj.to_string())
2131}
2132
2133#[cfg(test)]
2134mod tests {
2135    use super::*;
2136
2137    #[test]
2138    fn build_native_assistant_history_returns_none_for_empty_calls() {
2139        // Regression: strict providers (DeepSeek V4, NVIDIA NIM) reject
2140        // assistant messages carrying `tool_calls: []`. Empty input must
2141        // not produce a serialised assistant message with an empty array.
2142        // See #6298.
2143        let result = build_native_assistant_history_from_parsed_calls("answer text", &[], None);
2144        assert!(
2145            result.is_none(),
2146            "expected None for empty tool_calls slice, got {result:?}"
2147        );
2148    }
2149
2150    #[test]
2151    fn build_native_assistant_history_returns_none_for_empty_calls_with_reasoning() {
2152        // Even with reasoning_content set, an empty tool_calls slice must
2153        // collapse to None — the caller falls back to a plain assistant
2154        // message, and the reasoning round-trip happens through a separate
2155        // path that does not produce `tool_calls: []`.
2156        let result = build_native_assistant_history_from_parsed_calls(
2157            "answer text",
2158            &[],
2159            Some("deep thought"),
2160        );
2161        assert!(result.is_none());
2162    }
2163
2164    #[test]
2165    fn build_native_assistant_history_emits_tool_calls_when_non_empty() {
2166        // No-regression check: the normal path with a real parsed call
2167        // still produces a serialised assistant message and the
2168        // `tool_calls` field is a non-empty array.
2169        let calls = vec![ParsedToolCall {
2170            name: "shell".into(),
2171            arguments: serde_json::json!({"command": "pwd"}),
2172            tool_call_id: Some("call_1".into()),
2173        }];
2174        let result = build_native_assistant_history_from_parsed_calls("answer", &calls, None);
2175        let s = result.expect("Some(_) for non-empty tool_calls");
2176        let parsed: serde_json::Value = serde_json::from_str(&s).unwrap();
2177        assert_eq!(parsed["content"].as_str(), Some("answer"));
2178        let arr = parsed["tool_calls"].as_array().expect("tool_calls array");
2179        assert_eq!(arr.len(), 1);
2180        assert_eq!(arr[0]["name"].as_str(), Some("shell"));
2181    }
2182
2183    #[test]
2184    fn parse_arguments_value_unwraps_nested_object_string() {
2185        let raw = serde_json::json!({
2186            "service": "gmail",
2187            "params": "{\"maxResults\":3}"
2188        });
2189        let out = parse_arguments_value(Some(&raw));
2190        assert_eq!(out["service"], serde_json::json!("gmail"));
2191        assert_eq!(out["params"], serde_json::json!({"maxResults": 3}));
2192    }
2193
2194    #[test]
2195    fn parse_arguments_value_unwraps_nested_array_string() {
2196        let raw = serde_json::json!({ "items": "[1,2,3]" });
2197        let out = parse_arguments_value(Some(&raw));
2198        assert_eq!(out["items"], serde_json::json!([1, 2, 3]));
2199    }
2200
2201    #[test]
2202    fn parse_arguments_value_leaves_non_json_strings_alone() {
2203        let raw = serde_json::json!({
2204            "greeting": "hello",
2205            "answer": "42",
2206            "truthy": "true",
2207            "broken": "{not json"
2208        });
2209        let out = parse_arguments_value(Some(&raw));
2210        assert_eq!(out["greeting"], serde_json::json!("hello"));
2211        assert_eq!(out["answer"], serde_json::json!("42"));
2212        assert_eq!(out["truthy"], serde_json::json!("true"));
2213        assert_eq!(out["broken"], serde_json::json!("{not json"));
2214    }
2215
2216    #[test]
2217    fn parse_arguments_value_handles_double_encoding() {
2218        let inner = r#"{"params":"{\"maxResults\":3}"}"#;
2219        let raw = serde_json::Value::String(inner.to_string());
2220        let out = parse_arguments_value(Some(&raw));
2221        assert_eq!(out["params"], serde_json::json!({"maxResults": 3}));
2222    }
2223
2224    #[test]
2225    fn parse_tool_call_value_handles_gemini_double_encoded_params() {
2226        let inner = r#"{"service":"gmail","resource":"users","sub_resource":"messages","method":"list","params":"{\"maxResults\":3}"}"#;
2227        let call_json = serde_json::json!({
2228            "function": {
2229                "name": "google_workspace",
2230                "arguments": inner
2231            }
2232        });
2233        let parsed = parse_tool_call_value(&call_json).expect("expected a parsed call");
2234        assert_eq!(parsed.name, "google_workspace");
2235        assert_eq!(
2236            parsed.arguments["params"],
2237            serde_json::json!({"maxResults": 3})
2238        );
2239        assert_eq!(
2240            parsed.arguments["sub_resource"],
2241            serde_json::json!("messages")
2242        );
2243    }
2244
2245    #[test]
2246    fn parse_tool_calls_extracts_multiple_calls() {
2247        let response = r#"<tool_call>
2248{"name": "file_read", "arguments": {"path": "a.txt"}}
2249</tool_call>
2250<tool_call>
2251{"name": "file_read", "arguments": {"path": "b.txt"}}
2252</tool_call>"#;
2253
2254        let (_, calls) = parse_tool_calls(response);
2255        assert_eq!(calls.len(), 2);
2256        assert_eq!(calls[0].name, "file_read");
2257        assert_eq!(calls[1].name, "file_read");
2258    }
2259
2260    #[test]
2261    fn parse_tool_calls_returns_text_only_when_no_calls() {
2262        let response = "Just a normal response with no tools.";
2263        let (text, calls) = parse_tool_calls(response);
2264        assert_eq!(text, "Just a normal response with no tools.");
2265        assert!(calls.is_empty());
2266    }
2267
2268    #[test]
2269    fn parse_tool_calls_handles_malformed_json() {
2270        let response = r#"<tool_call>
2271not valid json
2272</tool_call>
2273Some text after."#;
2274
2275        let (text, calls) = parse_tool_calls(response);
2276        assert!(calls.is_empty());
2277        assert!(text.contains("Some text after."));
2278    }
2279
2280    #[test]
2281    fn parse_tool_calls_text_before_and_after() {
2282        let response = r#"Before text.
2283<tool_call>
2284{"name": "shell", "arguments": {"command": "echo hi"}}
2285</tool_call>
2286After text."#;
2287
2288        let (text, calls) = parse_tool_calls(response);
2289        assert!(text.contains("Before text."));
2290        assert!(text.contains("After text."));
2291        assert_eq!(calls.len(), 1);
2292    }
2293
2294    #[test]
2295    fn parse_tool_calls_handles_openai_format() {
2296        // OpenAI-style response with tool_calls array
2297        let response = r#"{"content": "Let me check that for you.", "tool_calls": [{"type": "function", "function": {"name": "shell", "arguments": "{\"command\": \"ls -la\"}"}}]}"#;
2298
2299        let (text, calls) = parse_tool_calls(response);
2300        assert_eq!(text, "Let me check that for you.");
2301        assert_eq!(calls.len(), 1);
2302        assert_eq!(calls[0].name, "shell");
2303        assert_eq!(
2304            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2305            "ls -la"
2306        );
2307    }
2308
2309    #[test]
2310    fn parse_tool_calls_handles_openai_format_multiple_calls() {
2311        let response = r#"{"tool_calls": [{"type": "function", "function": {"name": "file_read", "arguments": "{\"path\": \"a.txt\"}"}}, {"type": "function", "function": {"name": "file_read", "arguments": "{\"path\": \"b.txt\"}"}}]}"#;
2312
2313        let (_, calls) = parse_tool_calls(response);
2314        assert_eq!(calls.len(), 2);
2315        assert_eq!(calls[0].name, "file_read");
2316        assert_eq!(calls[1].name, "file_read");
2317    }
2318
2319    #[test]
2320    fn parse_tool_calls_openai_format_without_content() {
2321        // Some model_providers don't include content field with tool_calls
2322        let response = r#"{"tool_calls": [{"type": "function", "function": {"name": "memory_recall", "arguments": "{}"}}]}"#;
2323
2324        let (text, calls) = parse_tool_calls(response);
2325        assert!(text.is_empty()); // No content field
2326        assert_eq!(calls.len(), 1);
2327        assert_eq!(calls[0].name, "memory_recall");
2328    }
2329
2330    #[test]
2331    fn parse_tool_calls_preserves_openai_tool_call_ids() {
2332        let response = r#"{"tool_calls":[{"id":"call_42","function":{"name":"shell","arguments":"{\"command\":\"pwd\"}"}}]}"#;
2333        let (_, calls) = parse_tool_calls(response);
2334        assert_eq!(calls.len(), 1);
2335        assert_eq!(calls[0].tool_call_id.as_deref(), Some("call_42"));
2336    }
2337
2338    #[test]
2339    fn parse_tool_calls_handles_markdown_json_inside_tool_call_tag() {
2340        let response = r#"<tool_call>
2341```json
2342{"name": "file_write", "arguments": {"path": "test.py", "content": "print('ok')"}}
2343```
2344</tool_call>"#;
2345
2346        let (text, calls) = parse_tool_calls(response);
2347        assert!(text.is_empty());
2348        assert_eq!(calls.len(), 1);
2349        assert_eq!(calls[0].name, "file_write");
2350        assert_eq!(
2351            calls[0].arguments.get("path").unwrap().as_str().unwrap(),
2352            "test.py"
2353        );
2354    }
2355
2356    #[test]
2357    fn parse_tool_calls_handles_noisy_tool_call_tag_body() {
2358        let response = r#"<tool_call>
2359I will now call the tool with this payload:
2360{"name": "shell", "arguments": {"command": "pwd"}}
2361</tool_call>"#;
2362
2363        let (text, calls) = parse_tool_calls(response);
2364        assert!(text.is_empty());
2365        assert_eq!(calls.len(), 1);
2366        assert_eq!(calls[0].name, "shell");
2367        assert_eq!(
2368            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2369            "pwd"
2370        );
2371    }
2372
2373    #[test]
2374    fn parse_tool_calls_handles_tool_call_inline_attributes_with_send_message_alias() {
2375        let response = r#"<tool_call>send_message channel="user_channel" message="Hello! How can I assist you today?"</tool_call>"#;
2376
2377        let (text, calls) = parse_tool_calls(response);
2378        assert!(text.is_empty());
2379        assert_eq!(calls.len(), 1);
2380        assert_eq!(calls[0].name, "message_send");
2381        assert_eq!(
2382            calls[0].arguments.get("channel").unwrap().as_str().unwrap(),
2383            "user_channel"
2384        );
2385        assert_eq!(
2386            calls[0].arguments.get("message").unwrap().as_str().unwrap(),
2387            "Hello! How can I assist you today?"
2388        );
2389    }
2390
2391    #[test]
2392    fn parse_tool_calls_handles_tool_call_function_style_arguments() {
2393        let response = r#"<tool_call>message_send(channel="general", message="test")</tool_call>"#;
2394
2395        let (text, calls) = parse_tool_calls(response);
2396        assert!(text.is_empty());
2397        assert_eq!(calls.len(), 1);
2398        assert_eq!(calls[0].name, "message_send");
2399        assert_eq!(
2400            calls[0].arguments.get("channel").unwrap().as_str().unwrap(),
2401            "general"
2402        );
2403        assert_eq!(
2404            calls[0].arguments.get("message").unwrap().as_str().unwrap(),
2405            "test"
2406        );
2407    }
2408
2409    #[test]
2410    fn parse_tool_calls_handles_xml_nested_tool_payload() {
2411        let response = r#"<tool_call>
2412<memory_recall>
2413<query>project roadmap</query>
2414</memory_recall>
2415</tool_call>"#;
2416
2417        let (text, calls) = parse_tool_calls(response);
2418        assert!(text.is_empty());
2419        assert_eq!(calls.len(), 1);
2420        assert_eq!(calls[0].name, "memory_recall");
2421        assert_eq!(
2422            calls[0].arguments.get("query").unwrap().as_str().unwrap(),
2423            "project roadmap"
2424        );
2425    }
2426
2427    #[test]
2428    fn parse_tool_calls_ignores_xml_thinking_wrapper() {
2429        let response = r#"<tool_call>
2430<thinking>Need to inspect memory first</thinking>
2431<memory_recall>
2432<query>recent deploy notes</query>
2433</memory_recall>
2434</tool_call>"#;
2435
2436        let (text, calls) = parse_tool_calls(response);
2437        assert!(text.is_empty());
2438        assert_eq!(calls.len(), 1);
2439        assert_eq!(calls[0].name, "memory_recall");
2440        assert_eq!(
2441            calls[0].arguments.get("query").unwrap().as_str().unwrap(),
2442            "recent deploy notes"
2443        );
2444    }
2445
2446    #[test]
2447    fn parse_tool_calls_handles_xml_with_json_arguments() {
2448        let response = r#"<tool_call>
2449<shell>{"command":"pwd"}</shell>
2450</tool_call>"#;
2451
2452        let (text, calls) = parse_tool_calls(response);
2453        assert!(text.is_empty());
2454        assert_eq!(calls.len(), 1);
2455        assert_eq!(calls[0].name, "shell");
2456        assert_eq!(
2457            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2458            "pwd"
2459        );
2460    }
2461
2462    #[test]
2463    fn parse_tool_calls_handles_markdown_tool_call_fence() {
2464        let response = r#"I'll check that.
2465```tool_call
2466{"name": "shell", "arguments": {"command": "pwd"}}
2467```
2468Done."#;
2469
2470        let (text, calls) = parse_tool_calls(response);
2471        assert_eq!(calls.len(), 1);
2472        assert_eq!(calls[0].name, "shell");
2473        assert_eq!(
2474            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2475            "pwd"
2476        );
2477        assert!(text.contains("I'll check that."));
2478        assert!(text.contains("Done."));
2479        assert!(!text.contains("```tool_call"));
2480    }
2481
2482    #[test]
2483    fn parse_tool_calls_handles_markdown_tool_call_hybrid_close_tag() {
2484        let response = r#"Preface
2485```tool-call
2486{"name": "shell", "arguments": {"command": "date"}}
2487</tool_call>
2488Tail"#;
2489
2490        let (text, calls) = parse_tool_calls(response);
2491        assert_eq!(calls.len(), 1);
2492        assert_eq!(calls[0].name, "shell");
2493        assert_eq!(
2494            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2495            "date"
2496        );
2497        assert!(text.contains("Preface"));
2498        assert!(text.contains("Tail"));
2499        assert!(!text.contains("```tool-call"));
2500    }
2501
2502    #[test]
2503    fn parse_tool_calls_handles_markdown_invoke_fence() {
2504        let response = r#"Checking.
2505```invoke
2506{"name": "shell", "arguments": {"command": "date"}}
2507```
2508Done."#;
2509
2510        let (text, calls) = parse_tool_calls(response);
2511        assert_eq!(calls.len(), 1);
2512        assert_eq!(calls[0].name, "shell");
2513        assert_eq!(
2514            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2515            "date"
2516        );
2517        assert!(text.contains("Checking."));
2518        assert!(text.contains("Done."));
2519    }
2520
2521    #[test]
2522    fn parse_tool_calls_handles_tool_name_fence_format() {
2523        //: xAI grok models use ```tool <name> format
2524        let response = r#"I'll write a test file.
2525```tool file_write
2526{"path": "/home/user/test.txt", "content": "Hello world"}
2527```
2528Done."#;
2529
2530        let (text, calls) = parse_tool_calls(response);
2531        assert_eq!(calls.len(), 1);
2532        assert_eq!(calls[0].name, "file_write");
2533        assert_eq!(
2534            calls[0].arguments.get("path").unwrap().as_str().unwrap(),
2535            "/home/user/test.txt"
2536        );
2537        assert!(text.contains("I'll write a test file."));
2538        assert!(text.contains("Done."));
2539    }
2540
2541    #[test]
2542    fn parse_tool_calls_handles_tool_name_fence_shell() {
2543        //: Test shell command in ```tool shell format
2544        let response = r#"```tool shell
2545{"command": "ls -la"}
2546```"#;
2547
2548        let (_text, calls) = parse_tool_calls(response);
2549        assert_eq!(calls.len(), 1);
2550        assert_eq!(calls[0].name, "shell");
2551        assert_eq!(
2552            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2553            "ls -la"
2554        );
2555    }
2556
2557    #[test]
2558    fn parse_tool_calls_handles_multiple_tool_name_fences() {
2559        // Multiple tool calls in ```tool <name> format
2560        let response = r#"First, I'll write a file.
2561```tool file_write
2562{"path": "/tmp/a.txt", "content": "A"}
2563```
2564Then read it.
2565```tool file_read
2566{"path": "/tmp/a.txt"}
2567```
2568Done."#;
2569
2570        let (text, calls) = parse_tool_calls(response);
2571        assert_eq!(calls.len(), 2);
2572        assert_eq!(calls[0].name, "file_write");
2573        assert_eq!(calls[1].name, "file_read");
2574        assert!(text.contains("First, I'll write a file."));
2575        assert!(text.contains("Then read it."));
2576        assert!(text.contains("Done."));
2577    }
2578
2579    #[test]
2580    fn parse_tool_calls_handles_toolcall_tag_alias() {
2581        let response = r#"<toolcall>
2582{"name": "shell", "arguments": {"command": "date"}}
2583</toolcall>"#;
2584
2585        let (text, calls) = parse_tool_calls(response);
2586        assert!(text.is_empty());
2587        assert_eq!(calls.len(), 1);
2588        assert_eq!(calls[0].name, "shell");
2589        assert_eq!(
2590            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2591            "date"
2592        );
2593    }
2594
2595    #[test]
2596    fn parse_tool_calls_handles_tool_dash_call_tag_alias() {
2597        let response = r#"<tool-call>
2598{"name": "shell", "arguments": {"command": "whoami"}}
2599</tool-call>"#;
2600
2601        let (text, calls) = parse_tool_calls(response);
2602        assert!(text.is_empty());
2603        assert_eq!(calls.len(), 1);
2604        assert_eq!(calls[0].name, "shell");
2605        assert_eq!(
2606            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2607            "whoami"
2608        );
2609    }
2610
2611    #[test]
2612    fn parse_tool_calls_handles_invoke_tag_alias() {
2613        let response = r#"<invoke>
2614{"name": "shell", "arguments": {"command": "uptime"}}
2615</invoke>"#;
2616
2617        let (text, calls) = parse_tool_calls(response);
2618        assert!(text.is_empty());
2619        assert_eq!(calls.len(), 1);
2620        assert_eq!(calls[0].name, "shell");
2621        assert_eq!(
2622            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2623            "uptime"
2624        );
2625    }
2626
2627    #[test]
2628    fn parse_tool_calls_handles_minimax_invoke_parameter_format() {
2629        let response = r#"<minimax:tool_call>
2630<invoke name="shell">
2631<parameter name="command">sqlite3 /tmp/test.db ".tables"</parameter>
2632</invoke>
2633</minimax:tool_call>"#;
2634
2635        let (text, calls) = parse_tool_calls(response);
2636        assert!(text.is_empty());
2637        assert_eq!(calls.len(), 1);
2638        assert_eq!(calls[0].name, "shell");
2639        assert_eq!(
2640            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2641            r#"sqlite3 /tmp/test.db ".tables""#
2642        );
2643    }
2644
2645    #[test]
2646    fn parse_tool_calls_handles_minimax_invoke_with_surrounding_text() {
2647        let response = r#"Preface
2648<minimax:tool_call>
2649<invoke name='http_request'>
2650<parameter name='url'>https://example.com</parameter>
2651<parameter name='method'>GET</parameter>
2652</invoke>
2653</minimax:tool_call>
2654Tail"#;
2655
2656        let (text, calls) = parse_tool_calls(response);
2657        assert!(text.contains("Preface"));
2658        assert!(text.contains("Tail"));
2659        assert_eq!(calls.len(), 1);
2660        assert_eq!(calls[0].name, "http_request");
2661        assert_eq!(
2662            calls[0].arguments.get("url").unwrap().as_str().unwrap(),
2663            "https://example.com"
2664        );
2665        assert_eq!(
2666            calls[0].arguments.get("method").unwrap().as_str().unwrap(),
2667            "GET"
2668        );
2669    }
2670
2671    #[test]
2672    fn parse_tool_calls_handles_minimax_toolcall_alias_and_cross_close_tag() {
2673        let response = r#"<tool_call>
2674{"name":"shell","arguments":{"command":"date"}}
2675</minimax:toolcall>"#;
2676
2677        let (text, calls) = parse_tool_calls(response);
2678        assert!(text.is_empty());
2679        assert_eq!(calls.len(), 1);
2680        assert_eq!(calls[0].name, "shell");
2681        assert_eq!(
2682            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2683            "date"
2684        );
2685    }
2686
2687    #[test]
2688    fn parse_tool_calls_handles_perl_style_tool_call_blocks() {
2689        let response = r#"TOOL_CALL
2690{tool => "shell", args => { --command "uname -a" }}}
2691/TOOL_CALL"#;
2692
2693        let calls = parse_perl_style_tool_calls(response);
2694        assert_eq!(calls.len(), 1);
2695        assert_eq!(calls[0].name, "shell");
2696        assert_eq!(
2697            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2698            "uname -a"
2699        );
2700    }
2701
2702    #[test]
2703    fn parse_tool_calls_handles_square_bracket_tool_call_blocks() {
2704        let response =
2705            r#"[TOOL_CALL]{tool => "shell", args => {--command "echo hello"}}[/TOOL_CALL]"#;
2706
2707        let calls = parse_perl_style_tool_calls(response);
2708        assert_eq!(calls.len(), 1);
2709        assert_eq!(calls[0].name, "shell");
2710        assert_eq!(
2711            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2712            "echo hello"
2713        );
2714    }
2715
2716    #[test]
2717    fn parse_tool_calls_handles_square_bracket_multiline() {
2718        let response = r#"[TOOL_CALL]
2719{tool => "file_read", args => {
2720  --path "/tmp/test.txt"
2721  --description "Read test file"
2722}}
2723[/TOOL_CALL]"#;
2724
2725        let calls = parse_perl_style_tool_calls(response);
2726        assert_eq!(calls.len(), 1);
2727        assert_eq!(calls[0].name, "file_read");
2728        assert_eq!(
2729            calls[0].arguments.get("path").unwrap().as_str().unwrap(),
2730            "/tmp/test.txt"
2731        );
2732        assert_eq!(
2733            calls[0]
2734                .arguments
2735                .get("description")
2736                .unwrap()
2737                .as_str()
2738                .unwrap(),
2739            "Read test file"
2740        );
2741    }
2742
2743    #[test]
2744    fn parse_tool_calls_recovers_unclosed_tool_call_with_json() {
2745        let response = r#"I will call the tool now.
2746<tool_call>
2747{"name": "shell", "arguments": {"command": "uptime -p"}}"#;
2748
2749        let (text, calls) = parse_tool_calls(response);
2750        assert!(text.contains("I will call the tool now."));
2751        assert_eq!(calls.len(), 1);
2752        assert_eq!(calls[0].name, "shell");
2753        assert_eq!(
2754            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2755            "uptime -p"
2756        );
2757    }
2758
2759    #[test]
2760    fn parse_tool_calls_recovers_mismatched_close_tag() {
2761        let response = r#"<tool_call>
2762{"name": "shell", "arguments": {"command": "uptime"}}
2763</arg_value>"#;
2764
2765        let (text, calls) = parse_tool_calls(response);
2766        assert!(text.is_empty());
2767        assert_eq!(calls.len(), 1);
2768        assert_eq!(calls[0].name, "shell");
2769        assert_eq!(
2770            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2771            "uptime"
2772        );
2773    }
2774
2775    #[test]
2776    fn parse_tool_calls_recovers_cross_alias_closing_tags() {
2777        let response = r#"<toolcall>
2778{"name": "shell", "arguments": {"command": "date"}}
2779</tool_call>"#;
2780
2781        let (text, calls) = parse_tool_calls(response);
2782        assert!(text.is_empty());
2783        assert_eq!(calls.len(), 1);
2784        assert_eq!(calls[0].name, "shell");
2785    }
2786
2787    #[test]
2788    fn parse_tool_calls_rejects_raw_tool_json_without_tags() {
2789        // SECURITY: Raw JSON without explicit wrappers should NOT be parsed
2790        // This prevents prompt injection attacks where malicious content
2791        // could include JSON that mimics a tool call.
2792        let response = r#"Sure, creating the file now.
2793{"name": "file_write", "arguments": {"path": "hello.py", "content": "print('hello')"}}"#;
2794
2795        let (text, calls) = parse_tool_calls(response);
2796        assert!(text.contains("Sure, creating the file now."));
2797        assert_eq!(
2798            calls.len(),
2799            0,
2800            "Raw JSON without wrappers should not be parsed"
2801        );
2802    }
2803
2804    #[test]
2805    fn parse_tool_calls_handles_empty_tool_result() {
2806        // Recovery: Empty tool_result tag should be handled gracefully
2807        let response = r#"I'll run that command.
2808<tool_result name="shell">
2809
2810</tool_result>
2811Done."#;
2812        let (text, calls) = parse_tool_calls(response);
2813        assert!(text.contains("Done."));
2814        assert!(calls.is_empty());
2815    }
2816
2817    #[test]
2818    fn strip_tool_result_blocks_removes_single_block() {
2819        let input = r#"<tool_result name="memory_recall" status="ok">
2820{"matches":["hello"]}
2821</tool_result>
2822Here is my answer."#;
2823        assert_eq!(strip_tool_result_blocks(input), "Here is my answer.");
2824    }
2825
2826    #[test]
2827    fn strip_tool_result_blocks_removes_multiple_blocks() {
2828        let input = r#"<tool_result name="memory_recall" status="ok">
2829{"matches":[]}
2830</tool_result>
2831<tool_result name="shell" status="ok">
2832done
2833</tool_result>
2834Final answer."#;
2835        assert_eq!(strip_tool_result_blocks(input), "Final answer.");
2836    }
2837
2838    #[test]
2839    fn strip_tool_result_blocks_removes_prefix() {
2840        let input =
2841            "[Tool results]\n<tool_result name=\"shell\" status=\"ok\">\nok\n</tool_result>\nDone.";
2842        assert_eq!(strip_tool_result_blocks(input), "Done.");
2843    }
2844
2845    #[test]
2846    fn strip_tool_result_blocks_removes_thinking() {
2847        let input = "<thinking>\nLet me think...\n</thinking>\nHere is the answer.";
2848        assert_eq!(strip_tool_result_blocks(input), "Here is the answer.");
2849    }
2850
2851    #[test]
2852    fn strip_tool_result_blocks_removes_think_tags() {
2853        let input = "<think>\nLet me reason...\n</think>\nHere is the answer.";
2854        assert_eq!(strip_tool_result_blocks(input), "Here is the answer.");
2855    }
2856
2857    #[test]
2858    fn parse_tool_calls_strips_think_before_tool_call() {
2859        // Qwen regression: <think> tags before <tool_call> tags should be
2860        // stripped, allowing the tool call to be parsed correctly.
2861        let response = "<think>I need to list files to understand the project</think>\n<tool_call>\n{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}\n</tool_call>";
2862        let (text, calls) = parse_tool_calls(response);
2863        assert_eq!(
2864            calls.len(),
2865            1,
2866            "should parse tool call after stripping think tags"
2867        );
2868        assert_eq!(calls[0].name, "shell");
2869        assert_eq!(
2870            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2871            "ls"
2872        );
2873        assert!(text.is_empty(), "think content should not appear as text");
2874    }
2875
2876    #[test]
2877    fn parse_tool_calls_strips_think_only_returns_empty() {
2878        // When response is only <think> tags with no tool calls, should
2879        // return empty text and no calls.
2880        let response = "<think>Just thinking, no action needed</think>";
2881        let (text, calls) = parse_tool_calls(response);
2882        assert!(calls.is_empty());
2883        assert!(text.is_empty());
2884    }
2885
2886    #[test]
2887    fn parse_tool_calls_handles_qwen_think_with_multiple_tool_calls() {
2888        let response = "<think>I need to check two things</think>\n<tool_call>\n{\"name\":\"shell\",\"arguments\":{\"command\":\"date\"}}\n</tool_call>\n<tool_call>\n{\"name\":\"shell\",\"arguments\":{\"command\":\"pwd\"}}\n</tool_call>";
2889        let (_, calls) = parse_tool_calls(response);
2890        assert_eq!(calls.len(), 2);
2891        assert_eq!(
2892            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
2893            "date"
2894        );
2895        assert_eq!(
2896            calls[1].arguments.get("command").unwrap().as_str().unwrap(),
2897            "pwd"
2898        );
2899    }
2900
2901    #[test]
2902    fn strip_tool_result_blocks_preserves_clean_text() {
2903        let input = "Hello, this is a normal response.";
2904        assert_eq!(strip_tool_result_blocks(input), input);
2905    }
2906
2907    #[test]
2908    fn strip_tool_result_blocks_returns_empty_for_only_tags() {
2909        let input = "<tool_result name=\"memory_recall\" status=\"ok\">\n{}\n</tool_result>";
2910        assert_eq!(strip_tool_result_blocks(input), "");
2911    }
2912
2913    #[test]
2914    fn parse_arguments_value_handles_null() {
2915        // Recovery: null arguments are returned as-is (Value::Null)
2916        let value = serde_json::json!(null);
2917        let result = parse_arguments_value(Some(&value));
2918        assert!(result.is_null());
2919    }
2920
2921    #[test]
2922    fn parse_tool_calls_handles_empty_tool_calls_array() {
2923        // Recovery: Empty tool_calls array returns original response (no tool parsing)
2924        let response = r#"{"content": "Hello", "tool_calls": []}"#;
2925        let (text, calls) = parse_tool_calls(response);
2926        // When tool_calls is empty, the entire JSON is returned as text
2927        assert!(text.contains("Hello"));
2928        assert!(calls.is_empty());
2929    }
2930
2931    #[test]
2932    fn detect_tool_call_parse_issue_flags_malformed_payloads() {
2933        let response =
2934            "<tool_call>{\"name\":\"shell\",\"arguments\":{\"command\":\"pwd\"}</tool_call>";
2935        let issue = detect_tool_call_parse_issue(response, &[]);
2936        assert!(
2937            issue.is_some(),
2938            "malformed tool payload should be flagged for diagnostics"
2939        );
2940    }
2941
2942    #[test]
2943    fn detect_tool_call_parse_issue_ignores_normal_text() {
2944        let issue = detect_tool_call_parse_issue("Thanks, done.", &[]);
2945        assert!(issue.is_none());
2946    }
2947
2948    #[test]
2949    fn detect_tool_call_parse_issue_ignores_empty_tool_calls_array() {
2950        let issue = detect_tool_call_parse_issue(r#"{"content":"Hello","tool_calls":[]}"#, &[]);
2951        assert!(issue.is_none());
2952    }
2953
2954    #[test]
2955    fn detect_tool_call_parse_issue_ignores_json_fenced_business_tool_calls() {
2956        let response = r#"```json
2957{"tool_calls":[{"service":"billing","count":2}]}
2958```"#;
2959        let issue = detect_tool_call_parse_issue(response, &[]);
2960        assert!(issue.is_none());
2961    }
2962
2963    #[test]
2964    fn detect_tool_call_parse_issue_ignores_tool_call_fenced_example() {
2965        let response = r#"```tool_call
2966{"name":"shell","arguments":{"command":"pwd"}}
2967```
2968This is an example, not an invocation."#;
2969
2970        let issue = detect_tool_call_parse_issue(response, &[]);
2971
2972        assert!(issue.is_none());
2973    }
2974
2975    #[test]
2976    fn detect_tool_call_parse_issue_flags_standalone_tool_call_fence() {
2977        let response = r#"```tool_call
2978{"name":"shell","arguments":{"command":"pwd"}}
2979```"#;
2980
2981        let issue = detect_tool_call_parse_issue(response, &[]);
2982
2983        assert!(issue.is_some());
2984    }
2985
2986    #[test]
2987    fn detect_tool_call_parse_issue_ignores_tool_call_tag_example() {
2988        let response = r#"<tool_call>
2989{"name":"shell","arguments":{"command":"pwd"}}
2990</tool_call>
2991This is an example, not an invocation."#;
2992
2993        let issue = detect_tool_call_parse_issue(response, &[]);
2994
2995        assert!(issue.is_none());
2996    }
2997
2998    #[test]
2999    fn detect_tool_call_parse_issue_flags_tagged_tool_call_with_trailing_text() {
3000        let response = r#"<tool_call>
3001{"name":"shell","arguments":{"command":"pwd"}}
3002</tool_call>
3003Done."#;
3004
3005        let issue = detect_tool_call_parse_issue(response, &[]);
3006
3007        assert!(issue.is_some());
3008    }
3009
3010    #[test]
3011    fn detect_tool_call_parse_issue_flags_json_fenced_tool_protocol() {
3012        let response = r#"```json
3013{"tool_calls":[{"name":"shell","arguments":{"command":"pwd"}}]}
3014```"#;
3015        let issue = detect_tool_call_parse_issue(response, &[]);
3016        assert!(issue.is_some());
3017    }
3018
3019    #[test]
3020    fn detect_tool_call_parse_issue_flags_malformed_tool_result_envelope() {
3021        let response = r#"{"tool_call_id":"call_1","content":"raw tool output""#;
3022        let issue = detect_tool_call_parse_issue(response, &[]);
3023        assert!(issue.is_some());
3024    }
3025
3026    #[test]
3027    fn detect_tool_call_parse_issue_ignores_malformed_tool_call_id_only_json() {
3028        let response = r#"{"tool_call_id":"support-case-1""#;
3029        let issue = detect_tool_call_parse_issue(response, &[]);
3030        assert!(issue.is_none());
3031    }
3032
3033    #[test]
3034    fn detect_tool_call_parse_issue_flags_malformed_nonempty_tool_calls_array() {
3035        let issue = detect_tool_call_parse_issue(
3036            r#"{"content":null,"tool_calls":[{"call_id":"call_1","arguments":"{}"}]}"#,
3037            &[],
3038        );
3039        assert!(issue.is_some());
3040    }
3041
3042    #[test]
3043    fn detect_tool_call_parse_issue_ignores_malformed_business_tool_calls_without_call_id() {
3044        for response in [
3045            r#"{"tool_calls":[{"name":"support_case","arguments":{"id":"A1"}}"#,
3046            r#"{"toolcalls":[{"name":"support_case","arguments":{"id":"A1"}}"#,
3047        ] {
3048            let issue = detect_tool_call_parse_issue(response, &[]);
3049
3050            assert!(
3051                issue.is_none(),
3052                "business JSON without a tool call id must not be treated as internal protocol: {response}"
3053            );
3054            assert!(
3055                !looks_like_malformed_tool_protocol_envelope(response),
3056                "business JSON without a tool call id must not be classified as malformed protocol: {response}"
3057            );
3058        }
3059    }
3060
3061    #[test]
3062    fn looks_like_tool_protocol_envelope_flags_malformed_nonempty_tool_calls_array() {
3063        assert!(looks_like_tool_protocol_envelope(
3064            r#"{"content":null,"tool_calls":[{"call_id":"call_1","arguments":"{}"}]}"#
3065        ));
3066        assert!(!looks_like_tool_protocol_envelope(
3067            r#"{"content":"Hello","tool_calls":[]}"#
3068        ));
3069    }
3070
3071    #[test]
3072    fn classify_tool_protocol_envelope_flags_internal_json_variants() {
3073        assert_eq!(
3074            classify_tool_protocol_envelope(
3075                r#"{"content":null,"tool_calls":[{"id":"call_1","name":"shell","arguments":"{}"}]}"#
3076            ),
3077            Some(ToolProtocolEnvelopeKind::ToolCalls)
3078        );
3079        assert_eq!(
3080            classify_tool_protocol_envelope(
3081                r#"{"toolcalls":[{"name":"shell","arguments":{"command":"pwd"}}]}"#
3082            ),
3083            Some(ToolProtocolEnvelopeKind::ToolCallsAlias)
3084        );
3085        assert_eq!(
3086            classify_tool_protocol_envelope(r#"{"tool_calls":[{"name":"shell","arguments":{}}]}"#),
3087            Some(ToolProtocolEnvelopeKind::ToolCalls)
3088        );
3089        assert_eq!(
3090            classify_tool_protocol_envelope(r#"{"toolcalls":[{"name":"shell","arguments":{}}]}"#),
3091            Some(ToolProtocolEnvelopeKind::ToolCallsAlias)
3092        );
3093        assert_eq!(
3094            classify_tool_protocol_envelope(
3095                r#"{"function_call":{"name":"shell","arguments":"{\"command\":\"pwd\"}"}}"#
3096            ),
3097            Some(ToolProtocolEnvelopeKind::FunctionCall)
3098        );
3099        assert_eq!(
3100            classify_tool_protocol_envelope(
3101                r#"{"tool_call_id":"call_1","content":"command output"}"#
3102            ),
3103            Some(ToolProtocolEnvelopeKind::ToolResult)
3104        );
3105        assert_eq!(
3106            classify_tool_protocol_envelope(
3107                r#"{"type":"function_call","call_id":"call_1","name":"shell","arguments":"{}"}"#
3108            ),
3109            Some(ToolProtocolEnvelopeKind::ResponsesFunctionCall)
3110        );
3111        assert_eq!(
3112            classify_tool_protocol_envelope(
3113                r#"```json
3114{"tool_calls":[{"name":"shell","arguments":{"command":"pwd"}}]}
3115```"#
3116            ),
3117            Some(ToolProtocolEnvelopeKind::ToolCalls)
3118        );
3119    }
3120
3121    #[test]
3122    fn classify_tool_protocol_envelope_preserves_tool_call_examples() {
3123        let fenced_example = r#"```tool_call
3124{"name":"shell","arguments":{"command":"pwd"}}
3125```
3126This is an example, not an invocation."#;
3127        let embedded_fenced_example = r#"Here is an example:
3128```tool_call
3129{"name":"shell","arguments":{"command":"pwd"}}
3130```"#;
3131        let embedded_fenced_example_cn = r#"例如：
3132```tool_call
3133{"name":"shell","arguments":{"command":"pwd"}}
3134```"#;
3135        let tag_example = r#"<tool_call>
3136{"name":"shell","arguments":{"command":"pwd"}}
3137</tool_call>
3138This is an example, not an invocation."#;
3139        let tag_example_cn = r#"比如：
3140<tool_call>
3141{"name":"shell","arguments":{"command":"pwd"}}
3142</tool_call>"#;
3143
3144        assert_eq!(classify_tool_protocol_envelope(fenced_example), None);
3145        assert!(!looks_like_tool_protocol_envelope(fenced_example));
3146        assert_eq!(
3147            classify_tool_protocol_envelope(embedded_fenced_example),
3148            None
3149        );
3150        assert!(!looks_like_tool_protocol_envelope(embedded_fenced_example));
3151        assert!(looks_like_tool_protocol_example(embedded_fenced_example));
3152        assert_eq!(
3153            classify_tool_protocol_envelope(embedded_fenced_example_cn),
3154            None
3155        );
3156        assert!(!looks_like_tool_protocol_envelope(
3157            embedded_fenced_example_cn
3158        ));
3159        assert!(looks_like_tool_protocol_example(embedded_fenced_example_cn));
3160        assert_eq!(classify_tool_protocol_envelope(tag_example), None);
3161        assert!(!looks_like_tool_protocol_envelope(tag_example));
3162        assert_eq!(classify_tool_protocol_envelope(tag_example_cn), None);
3163        assert!(!looks_like_tool_protocol_envelope(tag_example_cn));
3164        assert!(looks_like_tool_protocol_example(tag_example_cn));
3165    }
3166
3167    #[test]
3168    fn contains_tool_protocol_tag_call_flags_embedded_tool_call_fences() {
3169        let embedded = r#"Let me call it:
3170```tool_call
3171{"name":"shell","arguments":{"command":"pwd"}}
3172```
3173Done."#;
3174
3175        assert!(contains_tool_protocol_tag_call(embedded));
3176    }
3177
3178    #[test]
3179    fn classify_tool_protocol_envelope_flags_standalone_tool_fences() {
3180        let tool_call_fence = r#"```tool_call
3181{"name":"shell","arguments":{"command":"pwd"}}
3182```"#;
3183        let invoke_fence = r#"```invoke
3184{"name":"shell","arguments":{"command":"pwd"}}
3185```"#;
3186        let tool_name_fence = r#"```tool shell
3187{"command":"pwd"}
3188```"#;
3189
3190        assert_eq!(
3191            classify_tool_protocol_envelope(tool_call_fence),
3192            Some(ToolProtocolEnvelopeKind::TaggedToolCall)
3193        );
3194        assert!(looks_like_tool_protocol_envelope(tool_call_fence));
3195        assert_eq!(
3196            classify_tool_protocol_envelope(invoke_fence),
3197            Some(ToolProtocolEnvelopeKind::TaggedToolCall)
3198        );
3199        assert!(looks_like_tool_protocol_envelope(invoke_fence));
3200        assert_eq!(
3201            classify_tool_protocol_envelope(tool_name_fence),
3202            Some(ToolProtocolEnvelopeKind::TaggedToolCall)
3203        );
3204        assert!(looks_like_tool_protocol_envelope(tool_name_fence));
3205    }
3206
3207    #[test]
3208    fn classify_tool_protocol_envelope_preserves_top_level_arrays_without_protocol_marker() {
3209        assert!(!looks_like_tool_protocol_envelope(
3210            r#"[{"service":"billing","count":2}]"#
3211        ));
3212
3213        assert!(!looks_like_tool_protocol_envelope(
3214            r#"[{"name":"shell","arguments":{}}]"#
3215        ));
3216    }
3217
3218    #[test]
3219    fn classify_tool_protocol_envelope_preserves_top_level_schema_array() {
3220        let schema = r#"[{"name":"planner","parameters":{"goal":"string"}}]"#;
3221
3222        assert_eq!(classify_tool_protocol_envelope(schema), None);
3223        assert!(!looks_like_tool_protocol_envelope(schema));
3224    }
3225
3226    #[test]
3227    fn classify_tool_protocol_envelope_preserves_plain_user_json() {
3228        let profile = r#"{"name":"profile","parameters":{"timezone":"UTC"}}"#;
3229        assert_eq!(classify_tool_protocol_envelope(profile), None);
3230        assert!(!looks_like_tool_protocol_envelope(profile));
3231    }
3232
3233    #[test]
3234    fn looks_like_tool_protocol_envelope_preserves_plain_json_with_similar_keys() {
3235        let config = r#"{"function_call":false,"description":"disable the feature"}"#;
3236        assert!(!looks_like_tool_protocol_envelope(config));
3237
3238        let audit_log = r#"{"tool_calls":[{"service":"billing","count":2}]}"#;
3239        assert!(!looks_like_tool_protocol_envelope(audit_log));
3240
3241        let queued_case =
3242            r#"{"tool_calls":[{"id":"case-1","status":"queued","service":"billing"}]}"#;
3243        assert!(!looks_like_tool_protocol_envelope(queued_case));
3244
3245        let named_record =
3246            r#"{"tool_calls":[{"name":"planner","status":"queued","service":"workflow"}]}"#;
3247        assert!(!looks_like_tool_protocol_envelope(named_record));
3248    }
3249
3250    #[test]
3251    fn parse_tool_calls_handles_whitespace_only_name() {
3252        // Recovery: Whitespace-only tool name should return None
3253        let value = serde_json::json!({"function": {"name": "   ", "arguments": {}}});
3254        let result = parse_tool_call_value(&value);
3255        assert!(result.is_none());
3256    }
3257
3258    #[test]
3259    fn parse_tool_calls_handles_empty_string_arguments() {
3260        // Recovery: Empty string arguments should be handled
3261        let value = serde_json::json!({"name": "test", "arguments": ""});
3262        let result = parse_tool_call_value(&value);
3263        assert!(result.is_some());
3264        assert_eq!(result.unwrap().name, "test");
3265    }
3266
3267    #[test]
3268    fn parse_arguments_value_handles_invalid_json_string() {
3269        // Recovery: Invalid JSON string should return empty object
3270        let value = serde_json::Value::String("not valid json".to_string());
3271        let result = parse_arguments_value(Some(&value));
3272        assert!(result.is_object());
3273        assert!(result.as_object().unwrap().is_empty());
3274    }
3275
3276    #[test]
3277    fn parse_arguments_value_handles_none() {
3278        // Recovery: None arguments should return empty object
3279        let result = parse_arguments_value(None);
3280        assert!(result.is_object());
3281        assert!(result.as_object().unwrap().is_empty());
3282    }
3283
3284    #[test]
3285    fn parse_tool_calls_from_json_value_handles_empty_array() {
3286        // Recovery: Empty tool_calls array should return empty vec
3287        let value = serde_json::json!({"tool_calls": []});
3288        let result = parse_tool_calls_from_json_value(&value);
3289        assert!(result.is_empty());
3290    }
3291
3292    #[test]
3293    fn parse_tool_calls_from_json_value_handles_missing_tool_calls() {
3294        // Recovery: Missing tool_calls field should fall through
3295        let value = serde_json::json!({"name": "test", "arguments": {}});
3296        let result = parse_tool_calls_from_json_value(&value);
3297        assert_eq!(result.len(), 1);
3298    }
3299
3300    #[test]
3301    fn parse_tool_calls_from_json_value_handles_top_level_array() {
3302        // Recovery: Top-level array of tool calls
3303        let value = serde_json::json!([
3304            {"name": "tool_a", "arguments": {}},
3305            {"name": "tool_b", "arguments": {}}
3306        ]);
3307        let result = parse_tool_calls_from_json_value(&value);
3308        assert_eq!(result.len(), 2);
3309    }
3310
3311    #[test]
3312    fn parse_glm_style_browser_open_url() {
3313        let response = "browser_open/url>https://example.com";
3314        let calls = parse_glm_style_tool_calls(response);
3315        assert_eq!(calls.len(), 1);
3316        assert_eq!(calls[0].0, "shell");
3317        assert!(calls[0].1["command"].as_str().unwrap().contains("curl"));
3318        assert!(
3319            calls[0].1["command"]
3320                .as_str()
3321                .unwrap()
3322                .contains("example.com")
3323        );
3324    }
3325
3326    #[test]
3327    fn parse_glm_style_shell_command() {
3328        let response = "shell/command>ls -la";
3329        let calls = parse_glm_style_tool_calls(response);
3330        assert_eq!(calls.len(), 1);
3331        assert_eq!(calls[0].0, "shell");
3332        assert_eq!(calls[0].1["command"], "ls -la");
3333    }
3334
3335    #[test]
3336    fn parse_glm_style_http_request() {
3337        let response = "http_request/url>https://api.example.com/data";
3338        let calls = parse_glm_style_tool_calls(response);
3339        assert_eq!(calls.len(), 1);
3340        assert_eq!(calls[0].0, "http_request");
3341        assert_eq!(calls[0].1["url"], "https://api.example.com/data");
3342        assert_eq!(calls[0].1["method"], "GET");
3343    }
3344
3345    #[test]
3346    fn parse_glm_style_ignores_plain_url() {
3347        // A bare URL should NOT be interpreted as a tool call — this was
3348        // causing false positives when LLMs included URLs in normal text.
3349        let response = "https://example.com/api";
3350        let calls = parse_glm_style_tool_calls(response);
3351        assert!(
3352            calls.is_empty(),
3353            "plain URL must not be parsed as tool call"
3354        );
3355    }
3356
3357    #[test]
3358    fn parse_glm_style_json_args() {
3359        let response = r#"shell/{"command": "echo hello"}"#;
3360        let calls = parse_glm_style_tool_calls(response);
3361        assert_eq!(calls.len(), 1);
3362        assert_eq!(calls[0].0, "shell");
3363        assert_eq!(calls[0].1["command"], "echo hello");
3364    }
3365
3366    #[test]
3367    fn parse_glm_style_multiple_calls() {
3368        let response = r#"shell/command>ls
3369browser_open/url>https://example.com"#;
3370        let calls = parse_glm_style_tool_calls(response);
3371        assert_eq!(calls.len(), 2);
3372    }
3373
3374    #[test]
3375    fn parse_glm_style_tool_call_integration() {
3376        // Integration test: GLM format should be parsed in parse_tool_calls
3377        let response = "Checking...\nbrowser_open/url>https://example.com\nDone";
3378        let (text, calls) = parse_tool_calls(response);
3379        assert_eq!(calls.len(), 1);
3380        assert_eq!(calls[0].name, "shell");
3381        assert!(text.contains("Checking"));
3382        assert!(text.contains("Done"));
3383    }
3384
3385    #[test]
3386    fn parse_glm_style_rejects_non_http_url_param() {
3387        let response = "browser_open/url>javascript:alert(1)";
3388        let calls = parse_glm_style_tool_calls(response);
3389        assert!(calls.is_empty());
3390    }
3391
3392    #[test]
3393    fn parse_tool_calls_handles_unclosed_tool_call_tag() {
3394        let response = "<tool_call>{\"name\":\"shell\",\"arguments\":{\"command\":\"pwd\"}}\nDone";
3395        let (text, calls) = parse_tool_calls(response);
3396        assert_eq!(calls.len(), 1);
3397        assert_eq!(calls[0].name, "shell");
3398        assert_eq!(calls[0].arguments["command"], "pwd");
3399        assert_eq!(text, "Done");
3400    }
3401
3402    #[test]
3403    fn parse_tool_calls_empty_input_returns_empty() {
3404        let (text, calls) = parse_tool_calls("");
3405        assert!(calls.is_empty(), "empty input should produce no tool calls");
3406        assert!(text.is_empty(), "empty input should produce no text");
3407    }
3408
3409    #[test]
3410    fn parse_tool_calls_whitespace_only_returns_empty_calls() {
3411        let (text, calls) = parse_tool_calls("   \n\t  ");
3412        assert!(calls.is_empty());
3413        assert!(text.is_empty() || text.trim().is_empty());
3414    }
3415
3416    #[test]
3417    fn parse_tool_calls_nested_xml_tags_handled() {
3418        // Double-wrapped tool call should still parse the inner call
3419        let response = r#"<tool_call><tool_call>{"name":"echo","arguments":{"msg":"hi"}}</tool_call></tool_call>"#;
3420        let (_text, calls) = parse_tool_calls(response);
3421        // Should find at least one tool call
3422        assert!(
3423            !calls.is_empty(),
3424            "nested XML tags should still yield at least one tool call"
3425        );
3426    }
3427
3428    #[test]
3429    fn parse_tool_calls_truncated_json_no_panic() {
3430        // Incomplete JSON inside tool_call tags
3431        let response = r#"<tool_call>{"name":"shell","arguments":{"command":"ls"</tool_call>"#;
3432        let (_text, _calls) = parse_tool_calls(response);
3433        // Should not panic — graceful handling of truncated JSON
3434    }
3435
3436    #[test]
3437    fn parse_tool_calls_empty_json_object_in_tag() {
3438        let response = "<tool_call>{}</tool_call>";
3439        let (_text, calls) = parse_tool_calls(response);
3440        // Empty JSON object has no name field — should not produce valid tool call
3441        assert!(
3442            calls.is_empty(),
3443            "empty JSON object should not produce a tool call"
3444        );
3445    }
3446
3447    #[test]
3448    fn parse_tool_calls_closing_tag_only_returns_text() {
3449        let response = "Some text </tool_call> more text";
3450        let (text, calls) = parse_tool_calls(response);
3451        assert!(
3452            calls.is_empty(),
3453            "closing tag only should not produce calls"
3454        );
3455        assert!(
3456            !text.is_empty(),
3457            "text around orphaned closing tag should be preserved"
3458        );
3459    }
3460
3461    #[test]
3462    fn parse_tool_calls_very_large_arguments_no_panic() {
3463        let large_arg = "x".repeat(100_000);
3464        let response = format!(
3465            r#"<tool_call>{{"name":"echo","arguments":{{"message":"{}"}}}}</tool_call>"#,
3466            large_arg
3467        );
3468        let (_text, calls) = parse_tool_calls(&response);
3469        assert_eq!(calls.len(), 1, "large arguments should still parse");
3470        assert_eq!(calls[0].name, "echo");
3471    }
3472
3473    #[test]
3474    fn parse_tool_calls_special_characters_in_arguments() {
3475        let response = r#"<tool_call>{"name":"echo","arguments":{"message":"hello \"world\" <>&'\n\t"}}</tool_call>"#;
3476        let (_text, calls) = parse_tool_calls(response);
3477        assert_eq!(calls.len(), 1);
3478        assert_eq!(calls[0].name, "echo");
3479    }
3480
3481    #[test]
3482    fn parse_tool_calls_text_with_embedded_json_not_extracted() {
3483        // Raw JSON without any tags should NOT be extracted as a tool call
3484        let response = r#"Here is some data: {"name":"echo","arguments":{"message":"hi"}} end."#;
3485        let (_text, calls) = parse_tool_calls(response);
3486        assert!(
3487            calls.is_empty(),
3488            "raw JSON in text without tags should not be extracted"
3489        );
3490    }
3491
3492    #[test]
3493    fn parse_tool_calls_multiple_formats_mixed() {
3494        // Mix of text and properly tagged tool call
3495        let response = r#"I'll help you with that.
3496
3497<tool_call>
3498{"name":"shell","arguments":{"command":"echo hello"}}
3499</tool_call>
3500
3501Let me check the result."#;
3502        let (text, calls) = parse_tool_calls(response);
3503        assert_eq!(
3504            calls.len(),
3505            1,
3506            "should extract one tool call from mixed content"
3507        );
3508        assert_eq!(calls[0].name, "shell");
3509        assert!(
3510            text.contains("help you"),
3511            "text before tool call should be preserved"
3512        );
3513    }
3514
3515    #[test]
3516    fn parse_tool_calls_cross_alias_close_tag_with_json() {
3517        // <tool_call> opened but closed with </invoke> — JSON body
3518        let input = r#"<tool_call>{"name": "shell", "arguments": {"command": "ls"}}</invoke>"#;
3519        let (text, calls) = parse_tool_calls(input);
3520        assert_eq!(calls.len(), 1);
3521        assert_eq!(calls[0].name, "shell");
3522        assert_eq!(calls[0].arguments["command"], "ls");
3523        assert!(text.is_empty());
3524    }
3525
3526    #[test]
3527    fn parse_tool_calls_cross_alias_close_tag_with_glm_shortened() {
3528        // <tool_call>shell>uname -a</invoke> — GLM shortened inside cross-alias tags
3529        let input = "<tool_call>shell>uname -a</invoke>";
3530        let (text, calls) = parse_tool_calls(input);
3531        assert_eq!(calls.len(), 1);
3532        assert_eq!(calls[0].name, "shell");
3533        assert_eq!(calls[0].arguments["command"], "uname -a");
3534        assert!(text.is_empty());
3535    }
3536
3537    #[test]
3538    fn parse_tool_calls_glm_shortened_body_in_matched_tags() {
3539        // <tool_call>shell>pwd</tool_call> — GLM shortened in matched tags
3540        let input = "<tool_call>shell>pwd</tool_call>";
3541        let (text, calls) = parse_tool_calls(input);
3542        assert_eq!(calls.len(), 1);
3543        assert_eq!(calls[0].name, "shell");
3544        assert_eq!(calls[0].arguments["command"], "pwd");
3545        assert!(text.is_empty());
3546    }
3547
3548    #[test]
3549    fn parse_tool_calls_glm_yaml_style_in_tags() {
3550        // <tool_call>shell>\ncommand: date\napproved: true</invoke>
3551        let input = "<tool_call>shell>\ncommand: date\napproved: true</invoke>";
3552        let (text, calls) = parse_tool_calls(input);
3553        assert_eq!(calls.len(), 1);
3554        assert_eq!(calls[0].name, "shell");
3555        assert_eq!(calls[0].arguments["command"], "date");
3556        assert_eq!(calls[0].arguments["approved"], true);
3557        assert!(text.is_empty());
3558    }
3559
3560    #[test]
3561    fn parse_tool_calls_attribute_style_in_tags() {
3562        // <tool_call>shell command="date" /></tool_call>
3563        let input = r#"<tool_call>shell command="date" /></tool_call>"#;
3564        let (text, calls) = parse_tool_calls(input);
3565        assert_eq!(calls.len(), 1);
3566        assert_eq!(calls[0].name, "shell");
3567        assert_eq!(calls[0].arguments["command"], "date");
3568        assert!(text.is_empty());
3569    }
3570
3571    #[test]
3572    fn parse_tool_calls_file_read_shortened_in_cross_alias() {
3573        // <tool_call>file_read path=".env" /></invoke>
3574        let input = r#"<tool_call>file_read path=".env" /></invoke>"#;
3575        let (text, calls) = parse_tool_calls(input);
3576        assert_eq!(calls.len(), 1);
3577        assert_eq!(calls[0].name, "file_read");
3578        assert_eq!(calls[0].arguments["path"], ".env");
3579        assert!(text.is_empty());
3580    }
3581
3582    #[test]
3583    fn parse_tool_calls_unclosed_glm_shortened_no_close_tag() {
3584        // <tool_call>shell>ls -la (no close tag at all)
3585        let input = "<tool_call>shell>ls -la";
3586        let (text, calls) = parse_tool_calls(input);
3587        assert_eq!(calls.len(), 1);
3588        assert_eq!(calls[0].name, "shell");
3589        assert_eq!(calls[0].arguments["command"], "ls -la");
3590        assert!(text.is_empty());
3591    }
3592
3593    #[test]
3594    fn parse_tool_calls_text_before_cross_alias() {
3595        // Text before and after cross-alias tool call
3596        let input = "Let me check that.\n<tool_call>shell>uname -a</invoke>\nDone.";
3597        let (text, calls) = parse_tool_calls(input);
3598        assert_eq!(calls.len(), 1);
3599        assert_eq!(calls[0].name, "shell");
3600        assert_eq!(calls[0].arguments["command"], "uname -a");
3601        assert!(text.contains("Let me check that."));
3602        assert!(text.contains("Done."));
3603    }
3604
3605    #[test]
3606    fn parse_glm_shortened_body_url_to_curl() {
3607        // URL values for shell should be wrapped in curl
3608        let call = parse_glm_shortened_body("shell>https://example.com/api").unwrap();
3609        assert_eq!(call.name, "shell");
3610        let cmd = call.arguments["command"].as_str().unwrap();
3611        assert!(cmd.contains("curl"));
3612        assert!(cmd.contains("example.com"));
3613    }
3614
3615    #[test]
3616    fn parse_glm_shortened_body_browser_open_maps_to_shell_command() {
3617        // browser_open aliases to shell, and shortened calls must still emit
3618        // shell's canonical "command" argument.
3619        let call = parse_glm_shortened_body("browser_open>https://example.com").unwrap();
3620        assert_eq!(call.name, "shell");
3621        let cmd = call.arguments["command"].as_str().unwrap();
3622        assert!(cmd.contains("curl"));
3623        assert!(cmd.contains("example.com"));
3624    }
3625
3626    #[test]
3627    fn parse_glm_shortened_body_memory_recall() {
3628        // memory_recall>some query — default param is "query"
3629        let call = parse_glm_shortened_body("memory_recall>recent meetings").unwrap();
3630        assert_eq!(call.name, "memory_recall");
3631        assert_eq!(call.arguments["query"], "recent meetings");
3632    }
3633
3634    #[test]
3635    fn parse_glm_shortened_body_function_style_alias_maps_to_message_send() {
3636        let call =
3637            parse_glm_shortened_body(r#"sendmessage(channel="alerts", message="hi")"#).unwrap();
3638        assert_eq!(call.name, "message_send");
3639        assert_eq!(call.arguments["channel"], "alerts");
3640        assert_eq!(call.arguments["message"], "hi");
3641    }
3642
3643    #[test]
3644    fn parse_glm_shortened_body_rejects_empty() {
3645        assert!(parse_glm_shortened_body("").is_none());
3646        assert!(parse_glm_shortened_body("   ").is_none());
3647    }
3648
3649    #[test]
3650    fn parse_glm_shortened_body_rejects_invalid_tool_name() {
3651        // Tool names with special characters should be rejected
3652        assert!(parse_glm_shortened_body("not-a-tool>value").is_none());
3653        assert!(parse_glm_shortened_body("tool name>value").is_none());
3654    }
3655
3656    #[test]
3657    fn build_native_assistant_history_from_parsed_calls_includes_reasoning_content() {
3658        let calls = vec![ParsedToolCall {
3659            name: "shell".into(),
3660            arguments: serde_json::json!({"command": "pwd"}),
3661            tool_call_id: Some("call_2".into()),
3662        }];
3663        let result = build_native_assistant_history_from_parsed_calls(
3664            "answer",
3665            &calls,
3666            Some("deep thought"),
3667        );
3668        assert!(result.is_some());
3669        let parsed: serde_json::Value = serde_json::from_str(result.as_deref().unwrap()).unwrap();
3670        assert_eq!(parsed["content"].as_str(), Some("answer"));
3671        assert_eq!(parsed["reasoning_content"].as_str(), Some("deep thought"));
3672        assert!(parsed["tool_calls"].is_array());
3673    }
3674
3675    #[test]
3676    fn build_native_assistant_history_from_parsed_calls_omits_reasoning_content_when_none() {
3677        let calls = vec![ParsedToolCall {
3678            name: "shell".into(),
3679            arguments: serde_json::json!({"command": "pwd"}),
3680            tool_call_id: Some("call_2".into()),
3681        }];
3682        let result = build_native_assistant_history_from_parsed_calls("answer", &calls, None);
3683        assert!(result.is_some());
3684        let parsed: serde_json::Value = serde_json::from_str(result.as_deref().unwrap()).unwrap();
3685        assert_eq!(parsed["content"].as_str(), Some("answer"));
3686        assert!(parsed.get("reasoning_content").is_none());
3687    }
3688
3689    // ═══════════════════════════════════════════════════════════════════════
3690
3691    // ═══════════════════════════════════════════════════════════════════════
3692    // Additional parser internals tests (moved from zeroclaw-runtime to keep
3693    // functions crate-private per Beta-tier API stability policy)
3694    // ═══════════════════════════════════════════════════════════════════════
3695
3696    #[test]
3697    fn parse_tool_call_value_handles_missing_name_field() {
3698        let value = serde_json::json!({"function": {"arguments": {}}});
3699        let result = parse_tool_call_value(&value);
3700        assert!(result.is_none());
3701    }
3702
3703    #[test]
3704    fn parse_tool_call_value_handles_top_level_name() {
3705        let value = serde_json::json!({"name": "test_tool", "arguments": {}});
3706        let result = parse_tool_call_value(&value);
3707        assert!(result.is_some());
3708        assert_eq!(result.unwrap().name, "test_tool");
3709    }
3710
3711    #[test]
3712    fn parse_tool_call_value_accepts_top_level_parameters_alias() {
3713        let value = serde_json::json!({
3714            "name": "schedule",
3715            "parameters": {"action": "create", "message": "test"}
3716        });
3717        let result = parse_tool_call_value(&value).expect("tool call should parse");
3718        assert_eq!(result.name, "schedule");
3719        assert_eq!(
3720            result.arguments.get("action").and_then(|v| v.as_str()),
3721            Some("create")
3722        );
3723    }
3724
3725    #[test]
3726    fn parse_tool_call_value_accepts_function_parameters_alias() {
3727        let value = serde_json::json!({
3728            "function": {
3729                "name": "shell",
3730                "parameters": {"command": "date"}
3731            }
3732        });
3733        let result = parse_tool_call_value(&value).expect("tool call should parse");
3734        assert_eq!(result.name, "shell");
3735        assert_eq!(
3736            result.arguments.get("command").and_then(|v| v.as_str()),
3737            Some("date")
3738        );
3739    }
3740
3741    #[test]
3742    fn parse_tool_call_value_preserves_tool_call_id_aliases() {
3743        let value = serde_json::json!({
3744            "call_id": "legacy_1",
3745            "function": {
3746                "name": "shell",
3747                "arguments": {"command": "date"}
3748            }
3749        });
3750        let result = parse_tool_call_value(&value).expect("tool call should parse");
3751        assert_eq!(result.tool_call_id.as_deref(), Some("legacy_1"));
3752    }
3753
3754    #[test]
3755    fn extract_json_values_handles_empty_string() {
3756        let result = extract_json_values("");
3757        assert!(result.is_empty());
3758    }
3759
3760    #[test]
3761    fn extract_json_values_handles_whitespace_only() {
3762        let result = extract_json_values(
3763            "   
3764	  ",
3765        );
3766        assert!(result.is_empty());
3767    }
3768
3769    #[test]
3770    fn extract_json_values_handles_multiple_objects() {
3771        let input = r#"{"a": 1}{"b": 2}{"c": 3}"#;
3772        let result = extract_json_values(input);
3773        assert_eq!(result.len(), 3);
3774    }
3775
3776    #[test]
3777    fn extract_json_values_handles_arrays() {
3778        let input = r#"[1, 2, 3]{"key": "value"}"#;
3779        let result = extract_json_values(input);
3780        assert_eq!(result.len(), 2);
3781    }
3782
3783    #[test]
3784    fn map_tool_name_alias_direct_coverage() {
3785        assert_eq!(map_tool_name_alias("bash"), "shell");
3786        assert_eq!(map_tool_name_alias("filelist"), "file_list");
3787        assert_eq!(map_tool_name_alias("memorystore"), "memory_store");
3788        assert_eq!(map_tool_name_alias("memoryforget"), "memory_forget");
3789        assert_eq!(map_tool_name_alias("http"), "http_request");
3790        assert_eq!(
3791            map_tool_name_alias("totally_unknown_tool"),
3792            "totally_unknown_tool"
3793        );
3794    }
3795
3796    #[test]
3797    fn map_tool_name_alias_strips_dotted_namespaces() {
3798        // Gemini-style static prefixes still work.
3799        assert_eq!(map_tool_name_alias("default_api.file_read"), "file_read");
3800        assert_eq!(map_tool_name_alias("tools.shell"), "shell");
3801
3802        // MCP-server-name prefixes (Gemini-via-OpenRouter also emits these
3803        // when the tool originates from an MCP server; the registry is
3804        // indexed by bare tool name, so we must strip them too).
3805        assert_eq!(
3806            map_tool_name_alias("google_workspace.search_gmail_messages"),
3807            "search_gmail_messages"
3808        );
3809
3810        // Only the final segment is kept even with multiple dots.
3811        assert_eq!(map_tool_name_alias("a.b.c.final"), "final");
3812
3813        // Stripped segment still runs through the alias table.
3814        assert_eq!(map_tool_name_alias("default_api.bash"), "shell");
3815
3816        // Names without any dot are unaffected.
3817        assert_eq!(map_tool_name_alias("file_read"), "file_read");
3818    }
3819
3820    #[test]
3821    fn default_param_for_tool_coverage() {
3822        assert_eq!(default_param_for_tool("shell"), "command");
3823        assert_eq!(default_param_for_tool("bash"), "command");
3824        assert_eq!(default_param_for_tool("file_read"), "path");
3825        assert_eq!(default_param_for_tool("memory_recall"), "query");
3826        assert_eq!(default_param_for_tool("memory_store"), "content");
3827        assert_eq!(default_param_for_tool("web_search_tool"), "query");
3828        assert_eq!(default_param_for_tool("web_search"), "query");
3829        assert_eq!(default_param_for_tool("search"), "query");
3830        assert_eq!(default_param_for_tool("http_request"), "url");
3831        assert_eq!(default_param_for_tool("browser_open"), "url");
3832        assert_eq!(default_param_for_tool("unknown_tool"), "input");
3833    }
3834}
zeroclaw_tool_call_parser/lib.rs

zeroclaw_tool_call_parser/
lib.rs