zeroclaw_runtime/agent/
history_pruner.rs

1use zeroclaw_api::model_provider::ChatMessage;
2
3pub use zeroclaw_config::scattered_types::HistoryPrunerConfig;
4
5// ---------------------------------------------------------------------------
6// Stats
7// ---------------------------------------------------------------------------
8
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct PruneStats {
11    pub messages_before: usize,
12    pub messages_after: usize,
13    pub collapsed_pairs: usize,
14    pub dropped_messages: usize,
15}
16
17// ---------------------------------------------------------------------------
18// Token estimation
19// ---------------------------------------------------------------------------
20
21fn estimate_tokens(messages: &[ChatMessage]) -> usize {
22    let raw: usize = messages
23        .iter()
24        .map(|m| m.content.len().div_ceil(4) + 4)
25        .sum();
26    // Apply 1.2x safety margin consistent with context_compressor to avoid
27    // underestimation that leads to context_length_exceeded errors.
28    #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
29    {
30        (raw as f64 * 1.2) as usize
31    }
32}
33
34// ---------------------------------------------------------------------------
35// Protected-index helpers
36// ---------------------------------------------------------------------------
37
38fn protected_indices(messages: &[ChatMessage], keep_recent: usize) -> Vec<bool> {
39    let len = messages.len();
40    let mut protected = vec![false; len];
41    for (i, msg) in messages.iter().enumerate() {
42        if msg.role == "system" {
43            protected[i] = true;
44        }
45    }
46    let recent_start = len.saturating_sub(keep_recent);
47    for p in protected.iter_mut().skip(recent_start) {
48        *p = true;
49    }
50    protected
51}
52
53// ---------------------------------------------------------------------------
54// Orphaned tool-message sanitiser
55// ---------------------------------------------------------------------------
56
57/// Outcome of a single `remove_orphaned_tool_messages` pass. The caller
58/// is responsible for logging — that's where the agent/channel/session
59/// context lives.
60#[derive(Debug, Default, Clone)]
61pub struct PrunedOrphans {
62    /// Total tool / assistant messages removed across both passes.
63    pub removed: usize,
64    /// `tool_call_id`s that lost their pairing.
65    pub orphan_tool_call_ids: Vec<String>,
66}
67
68fn is_tool_exchange_summary(content: &str) -> bool {
69    content.starts_with("[Tool exchange:") && content.contains("results collapsed]")
70}
71
72fn assistant_tool_calls_have_immediate_results(
73    messages: &[ChatMessage],
74    assistant_idx: usize,
75    tool_call_ids: &[String],
76) -> bool {
77    if tool_call_ids.is_empty() {
78        return false;
79    }
80
81    tool_call_ids.iter().all(|expected| {
82        messages
83            .iter()
84            .skip(assistant_idx + 1)
85            .take_while(|msg| msg.role == "tool")
86            .filter_map(|msg| extract_tool_call_id(&msg.content))
87            .any(|actual| actual == *expected)
88    })
89}
90
91/// True when the assistant at `prev_idx` is itself an unresolved tool-call
92/// dispatch: it claims `tool_calls` but the rows between it and `next_idx`
93/// do not answer all of them. This is the genuinely poisoned shape where a
94/// second dispatch follows a first that never landed — distinct from a
95/// healthy `assistant(text preamble)` → `assistant(tool_calls)` turn, where
96/// the preamble has no tool_calls and is left untouched.
97fn assistant_is_unresolved_dispatch(
98    messages: &[ChatMessage],
99    prev_idx: usize,
100    next_idx: usize,
101) -> bool {
102    match extract_assistant_tool_call_ids(&messages[prev_idx].content) {
103        Some(ids) if !ids.is_empty() => {
104            let between = &messages[prev_idx + 1..next_idx];
105            !ids.iter().all(|id| {
106                between.iter().any(|m| {
107                    m.role == "tool" && extract_tool_call_id(&m.content).as_ref() == Some(id)
108                })
109            })
110        }
111        _ => false,
112    }
113}
114
115impl PrunedOrphans {
116    pub fn is_empty(&self) -> bool {
117        self.removed == 0
118    }
119}
120
121/// Remove `tool`-role messages whose `tool_call_id` has no matching
122/// `tool_use` / `tool_calls` entry in a preceding assistant message.
123///
124/// After any history truncation (drain, remove, prune) the first surviving
125/// message(s) may be `tool` results whose assistant request was trimmed away.
126/// The Anthropic API (and others) reject these with a 400 error.
127pub fn remove_orphaned_tool_messages(messages: &mut Vec<ChatMessage>) -> PrunedOrphans {
128    let mut outcome = PrunedOrphans::default();
129    // Pass 1: Remove a second `assistant(tool_calls)` (and its immediate
130    // tool results) only when the *preceding* assistant is itself
131    // problematic in a way that normalization would corrupt:
132    //
133    //   * a collapsed tool-exchange summary whose merge would orphan this
134    //     dispatch's results (the GLM-history case, #7013), or
135    //   * an unresolved tool-call dispatch — a first dispatch that never
136    //     landed, immediately followed by this one (the poisoned
137    //     double-dispatch case).
138    //
139    // A healthy turn shape `assistant(text preamble)` → `assistant(tool_calls)`
140    // → `tool` must NOT be touched: the preamble has no tool_calls and is
141    // neither a summary nor an unresolved dispatch, so it is left intact.
142    // Nuking the dispatch there produces the "amnesia mid-tool-loop"
143    // failure where the model sees the next turn with none of its work.
144    let mut i = 0;
145    while i < messages.len() {
146        let assistant_tool_call_ids = if messages[i].role == "assistant" {
147            extract_assistant_tool_call_ids(&messages[i].content)
148        } else {
149            None
150        };
151        if let Some(doomed_ids) = assistant_tool_call_ids
152            && i > 0
153            && messages[i - 1].role == "assistant"
154            && ((is_tool_exchange_summary(&messages[i - 1].content)
155                && !assistant_tool_calls_have_immediate_results(messages, i, &doomed_ids))
156                || assistant_is_unresolved_dispatch(messages, i - 1, i))
157        {
158            outcome
159                .orphan_tool_call_ids
160                .extend(doomed_ids.iter().cloned());
161            messages.remove(i);
162            outcome.removed += 1;
163            while i < messages.len() && messages[i].role == "tool" {
164                let dominated = match extract_tool_call_id(&messages[i].content) {
165                    Some(id) => doomed_ids.iter().any(|d| d == &id),
166                    None => true,
167                };
168                if dominated {
169                    messages.remove(i);
170                    outcome.removed += 1;
171                } else {
172                    break;
173                }
174            }
175        } else {
176            i += 1;
177        }
178    }
179
180    // Pass 2: Remove remaining orphan tool messages whose tool_call_id
181    // is not in the preceding assistant's structured tool_calls array.
182    // A substring match on the assistant's *text* is NOT sufficient —
183    // compaction summaries are instructed to preserve identifiers, so an
184    // id can appear in prose without an actual tool_use block backing it.
185    i = 0;
186    while i < messages.len() {
187        if messages[i].role != "tool" {
188            i += 1;
189            continue;
190        }
191
192        let assistant_idx = (0..i)
193            .rev()
194            .take_while(|&j| messages[j].role == "assistant" || messages[j].role == "tool")
195            .find(|&j| messages[j].role == "assistant");
196
197        let is_orphan = match assistant_idx {
198            None => true,
199            Some(idx) => match extract_assistant_tool_call_ids(&messages[idx].content) {
200                None => true,
201                Some(ids) => match extract_tool_call_id(&messages[i].content) {
202                    Some(tool_call_id) => !ids.iter().any(|id| id == &tool_call_id),
203                    None => false,
204                },
205            },
206        };
207
208        if is_orphan {
209            if let Some(id) = extract_tool_call_id(&messages[i].content) {
210                outcome.orphan_tool_call_ids.push(id);
211            }
212            messages.remove(i);
213            outcome.removed += 1;
214        } else {
215            i += 1;
216        }
217    }
218    outcome
219}
220
221/// Try to extract a `tool_call_id` from a tool-role message's JSON content.
222///
223/// Tool messages are stored as JSON like:
224/// `{"content": "...", "tool_call_id": "toolu_01Abc..."}`
225fn extract_tool_call_id(content: &str) -> Option<String> {
226    let value: serde_json::Value = serde_json::from_str(content).ok()?;
227    value
228        .get("tool_call_id")
229        .and_then(|v| v.as_str())
230        .map(|s| s.to_string())
231}
232
233/// Extract the list of structured tool-call IDs an assistant message
234/// is claiming to have invoked, if any. Returns `None` when the content
235/// does not parse as a JSON object with a `tool_calls` array — meaning the
236/// assistant has no native tool_use blocks backing any tool_results.
237fn extract_assistant_tool_call_ids(content: &str) -> Option<Vec<String>> {
238    let value: serde_json::Value = serde_json::from_str(content).ok()?;
239    let arr = value.get("tool_calls")?.as_array()?;
240    let ids: Vec<String> = arr
241        .iter()
242        .filter_map(|call| call.get("id").and_then(|v| v.as_str()).map(str::to_owned))
243        .collect();
244    if ids.is_empty() { None } else { Some(ids) }
245}
246
247// ---------------------------------------------------------------------------
248// Public entry point
249// ---------------------------------------------------------------------------
250
251pub fn prune_history(messages: &mut Vec<ChatMessage>, config: &HistoryPrunerConfig) -> PruneStats {
252    let messages_before = messages.len();
253    if !config.enabled || messages.is_empty() {
254        return PruneStats {
255            messages_before,
256            messages_after: messages_before,
257            collapsed_pairs: 0,
258            dropped_messages: 0,
259        };
260    }
261
262    let mut collapsed_pairs: usize = 0;
263
264    // Phase 1 – collapse assistant+tool groups atomically.
265    // An assistant message followed by one or more consecutive tool messages
266    // forms an atomic group (tool_use + tool_result pairing). Collapsing only
267    // part of the group would orphan tool_use blocks, causing API 400 errors
268    // from model_providers that enforce pairing (e.g., Anthropic).
269    //
270    // The group is collapsed only when *every* tool in it is unprotected —
271    // the same all-or-nothing rule Phase 2 uses. If `keep_recent` protects
272    // any tool in the group we skip the whole group. Partial collapse would
273    // leave a protected tool behind whose parent assistant has been
274    // rewritten to a summary with no "tool_calls" marker, which Phase 3's
275    // orphan sweep then evicts — silently violating `keep_recent`. See
276    // #5823.
277    if config.collapse_tool_results {
278        let mut i = 0;
279        while i < messages.len() {
280            let protected = protected_indices(messages, config.keep_recent);
281            if messages[i].role == "assistant" && !protected[i] {
282                // Count consecutive tool messages following this assistant
283                // and remember whether any of them is protected.
284                let mut tool_count = 0;
285                let mut any_tool_protected = false;
286                while i + 1 + tool_count < messages.len()
287                    && messages[i + 1 + tool_count].role == "tool"
288                {
289                    if protected[i + 1 + tool_count] {
290                        any_tool_protected = true;
291                    }
292                    tool_count += 1;
293                }
294                if tool_count > 0 && !any_tool_protected {
295                    let summary =
296                        format!("[Tool exchange: {tool_count} tool call(s) — results collapsed]");
297                    messages[i] = ChatMessage {
298                        role: "assistant".to_string(),
299                        content: summary,
300                    };
301                    for _ in 0..tool_count {
302                        messages.remove(i + 1);
303                    }
304                    collapsed_pairs += tool_count;
305                    continue;
306                }
307                if tool_count > 0 {
308                    // Protected tool inside the group → skip the whole
309                    // group intact so Phase 3's orphan sweep has no
310                    // pretext to remove those tools.
311                    i += 1 + tool_count;
312                    continue;
313                }
314            }
315            i += 1;
316        }
317    }
318
319    // Phase 2 – budget enforcement: drop messages to fit token budget.
320    // Tool groups (assistant + consecutive tool messages) are dropped
321    // atomically to preserve tool_use/tool_result pairing.
322    let mut dropped_messages: usize = 0;
323    while estimate_tokens(messages) > config.max_tokens {
324        let protected = protected_indices(messages, config.keep_recent);
325        let mut dropped_any = false;
326        let mut i = 0;
327        while i < messages.len() {
328            if protected[i] {
329                i += 1;
330                continue;
331            }
332            if messages[i].role == "assistant" {
333                // Count following tool messages — drop as atomic group,
334                // but skip if any tool in the group is protected.
335                let mut tool_count = 0;
336                let mut any_tool_protected = false;
337                while i + 1 + tool_count < messages.len()
338                    && messages[i + 1 + tool_count].role == "tool"
339                {
340                    if protected[i + 1 + tool_count] {
341                        any_tool_protected = true;
342                    }
343                    tool_count += 1;
344                }
345                if tool_count > 0 && !any_tool_protected {
346                    for _ in 0..=tool_count {
347                        messages.remove(i);
348                    }
349                    dropped_messages += 1 + tool_count;
350                    dropped_any = true;
351                    break;
352                } else if tool_count > 0 {
353                    // Group has protected tools — skip past it
354                    i += 1 + tool_count;
355                    continue;
356                }
357            }
358            // Non-tool-group message — safe to drop individually
359            messages.remove(i);
360            dropped_messages += 1;
361            dropped_any = true;
362            break;
363        }
364        if !dropped_any {
365            break;
366        }
367    }
368
369    // Phase 3 – merge consecutive synthetic tool-exchange summaries. GLM/Z.AI
370    // reject adjacent assistant messages, but these summaries are safe to
371    // combine because they are both pruner-generated placeholders.
372    let mut i = 0;
373    while i + 1 < messages.len() {
374        if messages[i].role == "assistant"
375            && messages[i + 1].role == "assistant"
376            && is_tool_exchange_summary(&messages[i].content)
377            && is_tool_exchange_summary(&messages[i + 1].content)
378        {
379            let next = messages.remove(i + 1);
380            messages[i].content = format!("{}\n\n{}", messages[i].content, next.content);
381            dropped_messages += 1;
382        } else {
383            i += 1;
384        }
385    }
386
387    // Phase 4 – remove orphaned tool messages left behind by phases 1-3.
388    dropped_messages += remove_orphaned_tool_messages(messages).removed;
389
390    // Phase 5 – separate any remaining adjacent assistant messages. These can
391    // happen when a protected assistant(tool_calls) group follows a collapsed
392    // summary. Insert a tiny user boundary rather than dropping protected data.
393    let mut i = 1;
394    while i < messages.len() {
395        if messages[i - 1].role == "assistant" && messages[i].role == "assistant" {
396            messages.insert(
397                i,
398                ChatMessage {
399                    role: "user".to_string(),
400                    content: "[context continues]".to_string(),
401                },
402            );
403            i += 2;
404        } else {
405            i += 1;
406        }
407    }
408
409    PruneStats {
410        messages_before,
411        messages_after: messages.len(),
412        collapsed_pairs,
413        dropped_messages,
414    }
415}
416
417#[cfg(test)]
418mod tests {
419    use super::*;
420
421    fn msg(role: &str, content: &str) -> ChatMessage {
422        ChatMessage {
423            role: role.to_string(),
424            content: content.to_string(),
425        }
426    }
427
428    #[test]
429    fn prune_disabled_is_noop() {
430        let mut messages = vec![
431            msg("system", "You are helpful."),
432            msg("user", "Hello"),
433            msg("assistant", "Hi there!"),
434        ];
435        let config = HistoryPrunerConfig {
436            enabled: false,
437            ..Default::default()
438        };
439        let stats = prune_history(&mut messages, &config);
440        assert_eq!(messages.len(), 3);
441        assert_eq!(messages[0].content, "You are helpful.");
442        assert_eq!(stats.messages_before, 3);
443        assert_eq!(stats.messages_after, 3);
444        assert_eq!(stats.collapsed_pairs, 0);
445    }
446
447    #[test]
448    fn prune_under_budget_no_change() {
449        let mut messages = vec![
450            msg("system", "You are helpful."),
451            msg("user", "Hello"),
452            msg("assistant", "Hi!"),
453        ];
454        let config = HistoryPrunerConfig {
455            enabled: true,
456            max_tokens: 8192,
457            keep_recent: 2,
458            collapse_tool_results: false,
459        };
460        let stats = prune_history(&mut messages, &config);
461        assert_eq!(messages.len(), 3);
462        assert_eq!(stats.collapsed_pairs, 0);
463        assert_eq!(stats.dropped_messages, 0);
464    }
465
466    #[test]
467    fn prune_collapses_tool_pairs() {
468        let tool_result = "a".repeat(160);
469        let mut messages = vec![
470            msg("system", "sys"),
471            msg("assistant", "calling tool X"),
472            msg("tool", &tool_result),
473            msg("user", "thanks"),
474            msg("assistant", "done"),
475        ];
476        let config = HistoryPrunerConfig {
477            enabled: true,
478            max_tokens: 100_000,
479            keep_recent: 2,
480            collapse_tool_results: true,
481        };
482        let stats = prune_history(&mut messages, &config);
483        assert_eq!(stats.collapsed_pairs, 1);
484        assert_eq!(messages.len(), 4);
485        assert_eq!(messages[1].role, "assistant");
486        assert!(messages[1].content.contains("1 tool call(s)"));
487    }
488
489    #[test]
490    fn prune_preserves_system_and_recent() {
491        let big = "x".repeat(40_000);
492        let mut messages = vec![
493            msg("system", "system prompt"),
494            msg("user", &big),
495            msg("assistant", "old reply"),
496            msg("user", "recent1"),
497            msg("assistant", "recent2"),
498        ];
499        let config = HistoryPrunerConfig {
500            enabled: true,
501            max_tokens: 100,
502            keep_recent: 2,
503            collapse_tool_results: false,
504        };
505        let stats = prune_history(&mut messages, &config);
506        assert!(messages.iter().any(|m| m.role == "system"));
507        assert!(messages.iter().any(|m| m.content == "recent1"));
508        assert!(messages.iter().any(|m| m.content == "recent2"));
509        assert!(stats.dropped_messages > 0);
510    }
511
512    #[test]
513    fn prune_drops_oldest_when_over_budget() {
514        let filler = "y".repeat(400);
515        let mut messages = vec![
516            msg("system", "sys"),
517            msg("user", &filler),
518            msg("assistant", &filler),
519            msg("user", "recent-user"),
520            msg("assistant", "recent-assistant"),
521        ];
522        let config = HistoryPrunerConfig {
523            enabled: true,
524            max_tokens: 150,
525            keep_recent: 2,
526            collapse_tool_results: false,
527        };
528        let stats = prune_history(&mut messages, &config);
529        assert!(stats.dropped_messages >= 1);
530        assert_eq!(messages[0].role, "system");
531        assert!(messages.iter().any(|m| m.content == "recent-user"));
532        assert!(messages.iter().any(|m| m.content == "recent-assistant"));
533    }
534
535    #[test]
536    fn prune_empty_messages() {
537        let mut messages: Vec<ChatMessage> = vec![];
538        let config = HistoryPrunerConfig {
539            enabled: true,
540            ..Default::default()
541        };
542        let stats = prune_history(&mut messages, &config);
543        assert_eq!(stats.messages_before, 0);
544        assert_eq!(stats.messages_after, 0);
545    }
546
547    #[test]
548    fn prune_collapses_multi_tool_group() {
549        let mut messages = vec![
550            msg("system", "sys"),
551            msg(
552                "assistant",
553                r#"{"content":null,"tool_calls":[{"id":"t1","name":"shell","arguments":"{}"},{"id":"t2","name":"web","arguments":"{}"}]}"#,
554            ),
555            msg("tool", r#"{"tool_call_id":"t1","content":"result1"}"#),
556            msg("tool", r#"{"tool_call_id":"t2","content":"result2"}"#),
557            msg("user", "thanks"),
558            msg("assistant", "done"),
559        ];
560        let config = HistoryPrunerConfig {
561            enabled: true,
562            max_tokens: 100_000,
563            keep_recent: 2,
564            collapse_tool_results: true,
565        };
566        let stats = prune_history(&mut messages, &config);
567        assert_eq!(stats.collapsed_pairs, 2);
568        // assistant(tool_calls) + 2 tool messages → 1 summary assistant
569        assert_eq!(messages.len(), 4); // sys, summary, user, assistant
570        assert!(messages[1].content.contains("2 tool call(s)"));
571        // No tool messages remain
572        assert!(!messages.iter().any(|m| m.role == "tool"));
573    }
574
575    #[test]
576    fn prune_drops_tool_group_atomically() {
577        let big = "x".repeat(2000);
578        let mut messages = vec![
579            msg("system", "sys"),
580            msg("assistant", &big),
581            msg("tool", &big),
582            msg("tool", &big),
583            msg("user", "recent"),
584            msg("assistant", "recent reply"),
585        ];
586        let config = HistoryPrunerConfig {
587            enabled: true,
588            max_tokens: 50, // very low — forces drops
589            keep_recent: 2,
590            collapse_tool_results: false, // skip collapse, go straight to drop
591        };
592        let stats = prune_history(&mut messages, &config);
593        assert!(stats.dropped_messages >= 3); // assistant + 2 tools dropped together
594        // No orphaned tool messages
595        for (i, m) in messages.iter().enumerate() {
596            if m.role == "tool" {
597                assert!(
598                    i > 0 && messages[i - 1].role == "assistant",
599                    "tool message at index {i} has no preceding assistant"
600                );
601            }
602        }
603    }
604
605    #[test]
606    fn prune_never_orphans_tool_use() {
607        // Simulate a conversation with multiple tool groups
608        let filler = "y".repeat(500);
609        let mut messages = vec![
610            msg("system", "sys"),
611            msg("user", "q1"),
612            msg("assistant", &filler), // tool group 1
613            msg("tool", &filler),
614            msg("user", "q2"),
615            msg("assistant", &filler), // tool group 2
616            msg("tool", &filler),
617            msg("tool", &filler),
618            msg("user", "recent"),
619            msg("assistant", "recent reply"),
620        ];
621        let config = HistoryPrunerConfig {
622            enabled: true,
623            max_tokens: 100,
624            keep_recent: 2,
625            collapse_tool_results: true,
626        };
627        prune_history(&mut messages, &config);
628        // Verify invariant: no tool message without a preceding assistant
629        for (i, m) in messages.iter().enumerate() {
630            if m.role == "tool" {
631                assert!(
632                    i > 0 && messages[i - 1].role == "assistant",
633                    "orphaned tool message at index {i}: {:?}",
634                    messages.iter().map(|m| &m.role).collect::<Vec<_>>()
635                );
636            }
637        }
638    }
639
640    #[test]
641    fn prune_protects_recent_tool_groups() {
642        let mut messages = vec![
643            msg("system", "sys"),
644            msg("user", "old"),
645            msg("assistant", "old reply"),
646            msg("user", "do something"),
647            msg(
648                "assistant",
649                r#"{"content":"checking","tool_calls":[{"id":"toolu_recent","name":"shell","arguments":"{}"}]}"#,
650            ),
651            msg(
652                "tool",
653                r#"{"tool_call_id":"toolu_recent","content":"tool result"}"#,
654            ),
655            msg("user", "recent"),
656        ];
657        let config = HistoryPrunerConfig {
658            enabled: true,
659            max_tokens: 100_000,
660            keep_recent: 3, // protects last 3: tool call, tool result, recent
661            collapse_tool_results: true,
662        };
663        let stats = prune_history(&mut messages, &config);
664        // Protected tool group should not be collapsed
665        assert!(messages.iter().any(|m| m.role == "tool"));
666        assert_eq!(stats.collapsed_pairs, 0);
667    }
668
669    #[test]
670    fn prune_under_realistic_token_pressure_preserves_tool_pairing() {
671        // Simulate 15 tool iterations with realistic content sizes
672        let mut messages = vec![msg("system", "You are helpful.")];
673        messages.push(msg("user", "Research this topic thoroughly"));
674
675        // 15 tool iterations — each adds assistant(tool_calls) + tool(result)
676        for i in 0..15 {
677            let tool_json = format!(
678                r#"{{"content":"iteration {i}","tool_calls":[{{"id":"t{i}","name":"web_search","arguments":"{{}}"}}]}}"#
679            );
680            messages.push(msg("assistant", &tool_json));
681            // Realistic tool result size (~2K chars each)
682            let result = format!(
683                r#"{{"tool_call_id":"t{i}","content":"{}"}}"#,
684                "x".repeat(2000)
685            );
686            messages.push(msg("tool", &result));
687        }
688        messages.push(msg("assistant", "Here's what I found..."));
689
690        // 33 messages total: system + user + 15*(assistant+tool) + final assistant
691        assert_eq!(messages.len(), 33);
692
693        let config = HistoryPrunerConfig {
694            enabled: true,
695            max_tokens: 2000, // Forces pruning of older iterations
696            keep_recent: 4,
697            collapse_tool_results: true,
698        };
699
700        prune_history(&mut messages, &config);
701
702        // Invariant: no orphaned tool messages after pruning
703        for (i, m) in messages.iter().enumerate() {
704            if m.role == "tool" {
705                assert!(
706                    i > 0 && messages[i - 1].role == "assistant",
707                    "orphaned tool at index {i}: roles = {:?}",
708                    messages.iter().map(|m| &m.role).collect::<Vec<_>>()
709                );
710            }
711        }
712    }
713
714    #[test]
715    fn prune_merges_consecutive_collapsed_assistant_messages() {
716        let mut messages = vec![
717            msg("system", "sys"),
718            msg(
719                "assistant",
720                r#"{"content":null,"tool_calls":[{"id":"t1","name":"shell","arguments":"{}"}]}"#,
721            ),
722            msg("tool", r#"{"tool_call_id":"t1","content":"first"}"#),
723            msg(
724                "assistant",
725                r#"{"content":null,"tool_calls":[{"id":"t2","name":"web","arguments":"{}"}]}"#,
726            ),
727            msg("tool", r#"{"tool_call_id":"t2","content":"second"}"#),
728            msg("user", "recent"),
729            msg("assistant", "done"),
730        ];
731
732        let config = HistoryPrunerConfig {
733            enabled: true,
734            max_tokens: 100_000,
735            keep_recent: 2,
736            collapse_tool_results: true,
737        };
738        let stats = prune_history(&mut messages, &config);
739
740        assert_eq!(stats.collapsed_pairs, 2);
741        assert_eq!(messages.len(), 4);
742        assert_eq!(messages[1].role, "assistant");
743        assert!(messages[1].content.contains("1 tool call(s)"));
744        assert_eq!(messages.iter().filter(|m| m.role == "assistant").count(), 2);
745        assert!(
746            messages
747                .windows(2)
748                .all(|pair| !(pair[0].role == "assistant" && pair[1].role == "assistant")),
749            "pruned roles should not contain adjacent assistants: {:?}",
750            messages.iter().map(|m| m.role.as_str()).collect::<Vec<_>>()
751        );
752    }
753
754    #[test]
755    fn prune_preserves_straddled_tool_group_after_collapsed_summary() {
756        let mut messages = vec![
757            msg("system", "sys"),
758            msg(
759                "assistant",
760                r#"{"content":null,"tool_calls":[{"id":"old","name":"shell","arguments":"{}"}]}"#,
761            ),
762            msg("tool", r#"{"tool_call_id":"old","content":"old result"}"#),
763            msg(
764                "assistant",
765                r#"{"content":null,"tool_calls":[{"id":"live","name":"shell","arguments":"{}"}]}"#,
766            ),
767            msg("tool", r#"{"tool_call_id":"live","content":"live result"}"#),
768            msg("user", "follow up"),
769        ];
770
771        let config = HistoryPrunerConfig {
772            enabled: true,
773            max_tokens: 100_000,
774            keep_recent: 3,
775            collapse_tool_results: true,
776        };
777        let stats = prune_history(&mut messages, &config);
778
779        assert_eq!(stats.collapsed_pairs, 1);
780        assert!(
781            messages
782                .iter()
783                .any(|m| m.role == "assistant" && m.content.contains("\"id\":\"live\"")),
784            "protected assistant tool call should survive: {messages:?}"
785        );
786        assert!(
787            messages
788                .iter()
789                .any(|m| m.role == "tool" && m.content.contains("\"tool_call_id\":\"live\"")),
790            "matching protected tool result should survive: {messages:?}"
791        );
792        assert!(
793            messages
794                .iter()
795                .any(|m| m.role == "user" && m.content == "[context continues]"),
796            "Phase 5 should separate collapsed summary from live assistant"
797        );
798        assert!(
799            messages
800                .windows(2)
801                .all(|pair| !(pair[0].role == "assistant" && pair[1].role == "assistant")),
802            "pruned roles should not contain adjacent assistants: {:?}",
803            messages.iter().map(|m| m.role.as_str()).collect::<Vec<_>>()
804        );
805    }
806
807    #[test]
808    fn prune_removes_dangling_tool_call_after_collapsed_summary() {
809        let mut messages = vec![
810            msg("system", "sys"),
811            msg(
812                "assistant",
813                "[Tool exchange: 1 tool call(s) — results collapsed]",
814            ),
815            msg(
816                "assistant",
817                r#"{"content":null,"tool_calls":[{"id":"dangling","name":"shell","arguments":"{}"}]}"#,
818            ),
819            msg("user", "follow up"),
820        ];
821
822        let config = HistoryPrunerConfig {
823            enabled: true,
824            max_tokens: 100_000,
825            keep_recent: 2,
826            collapse_tool_results: true,
827        };
828        let stats = prune_history(&mut messages, &config);
829
830        assert_eq!(stats.dropped_messages, 1);
831        assert!(
832            !messages
833                .iter()
834                .any(|m| m.content.contains("\"id\":\"dangling\"")),
835            "dangling assistant tool call should not survive: {messages:?}"
836        );
837        assert_eq!(
838            messages.iter().map(|m| m.role.as_str()).collect::<Vec<_>>(),
839            vec!["system", "assistant", "user"]
840        );
841    }
842
843    #[test]
844    fn prune_does_not_merge_json_tool_call_assistants_as_summaries() {
845        let mut messages = vec![
846            msg("system", "sys"),
847            msg(
848                "assistant",
849                r#"{"content":null,"tool_calls":[{"id":"live1","name":"shell","arguments":"{}"}]}"#,
850            ),
851            msg("tool", r#"{"tool_call_id":"live1","content":"first"}"#),
852            msg(
853                "assistant",
854                r#"{"content":null,"tool_calls":[{"id":"live2","name":"web","arguments":"{}"}]}"#,
855            ),
856            msg("tool", r#"{"tool_call_id":"live2","content":"second"}"#),
857        ];
858
859        let config = HistoryPrunerConfig {
860            enabled: true,
861            max_tokens: 100_000,
862            keep_recent: 4,
863            collapse_tool_results: true,
864        };
865        let stats = prune_history(&mut messages, &config);
866
867        assert_eq!(stats.collapsed_pairs, 0);
868        assert!(
869            messages
870                .iter()
871                .any(|m| m.content.contains("\"id\":\"live1\"")),
872            "first protected tool call should remain structured"
873        );
874        assert!(
875            messages
876                .iter()
877                .any(|m| m.content.contains("\"id\":\"live2\"")),
878            "second protected tool call should remain structured"
879        );
880    }
881
882    #[test]
883    fn prune_inserts_separator_when_tight_budget_leaves_protected_assistants() {
884        let mut messages = vec![
885            msg("system", "sys"),
886            msg("assistant", "protected assistant one"),
887            msg("assistant", "protected assistant two"),
888        ];
889
890        let config = HistoryPrunerConfig {
891            enabled: true,
892            max_tokens: 1,
893            keep_recent: 2,
894            collapse_tool_results: false,
895        };
896        let stats = prune_history(&mut messages, &config);
897
898        assert_eq!(stats.dropped_messages, 0);
899        assert_eq!(
900            messages.iter().map(|m| m.role.as_str()).collect::<Vec<_>>(),
901            vec!["system", "assistant", "user", "assistant"]
902        );
903        assert_eq!(messages[2].content, "[context continues]");
904    }
905
906    // -----------------------------------------------------------------------
907    // remove_orphaned_tool_messages tests
908    // -----------------------------------------------------------------------
909
910    #[test]
911    fn orphan_tool_at_start_is_removed() {
912        // Simulates the exact bug: session drain removes the assistant
913        // message but leaves its tool results at the start.
914        let mut messages = vec![
915            msg("system", "sys"),
916            msg(
917                "tool",
918                r#"{"content":"file listing","tool_call_id":"toolu_01HiJXWbhx"}"#,
919            ),
920            msg(
921                "tool",
922                r#"{"content":"another result","tool_call_id":"toolu_01AQP25qUz"}"#,
923            ),
924            msg("user", "thanks"),
925            msg("assistant", "done"),
926        ];
927        let pruned = remove_orphaned_tool_messages(&mut messages);
928        assert_eq!(pruned.removed, 2);
929        assert_eq!(messages.len(), 3);
930        assert_eq!(messages[0].role, "system");
931        assert_eq!(messages[1].role, "user");
932        assert_eq!(messages[2].role, "assistant");
933    }
934
935    #[test]
936    fn valid_tool_pair_preserved() {
937        // A properly paired assistant+tool sequence must survive.
938        let assistant_with_tools = r#"{"content":"checking","tool_calls":[{"id":"toolu_abc123","name":"shell","arguments":"{}"}]}"#;
939        let tool_result = r#"{"content":"ok","tool_call_id":"toolu_abc123"}"#;
940        let mut messages = vec![
941            msg("system", "sys"),
942            msg("user", "do it"),
943            msg("assistant", assistant_with_tools),
944            msg("tool", tool_result),
945            msg("assistant", "done"),
946        ];
947        let pruned = remove_orphaned_tool_messages(&mut messages);
948        assert_eq!(pruned.removed, 0);
949        assert_eq!(messages.len(), 5);
950    }
951
952    #[test]
953    fn multi_tool_call_batch_preserved() {
954        // An assistant with 3 tool_calls followed by 3 tool results.
955        let assistant_content = r#"{"content":"running","tool_calls":[{"id":"toolu_aaa","name":"shell","arguments":"{}"},{"id":"toolu_bbb","name":"shell","arguments":"{}"},{"id":"toolu_ccc","name":"shell","arguments":"{}"}]}"#;
956        let mut messages = vec![
957            msg("system", "sys"),
958            msg("user", "do all 3"),
959            msg("assistant", assistant_content),
960            msg("tool", r#"{"content":"r1","tool_call_id":"toolu_aaa"}"#),
961            msg("tool", r#"{"content":"r2","tool_call_id":"toolu_bbb"}"#),
962            msg("tool", r#"{"content":"r3","tool_call_id":"toolu_ccc"}"#),
963            msg("assistant", "all done"),
964        ];
965        let pruned = remove_orphaned_tool_messages(&mut messages);
966        assert_eq!(pruned.removed, 0);
967        assert_eq!(messages.len(), 7);
968    }
969
970    #[test]
971    fn mismatched_tool_id_is_removed() {
972        // Tool result references a tool_call_id not in the assistant message.
973        let assistant_content = r#"{"content":"running","tool_calls":[{"id":"toolu_aaa","name":"shell","arguments":"{}"}]}"#;
974        let mut messages = vec![
975            msg("system", "sys"),
976            msg("user", "go"),
977            msg("assistant", assistant_content),
978            msg("tool", r#"{"content":"ok","tool_call_id":"toolu_aaa"}"#),
979            msg("tool", r#"{"content":"stale","tool_call_id":"toolu_GONE"}"#),
980            msg("assistant", "done"),
981        ];
982        let pruned = remove_orphaned_tool_messages(&mut messages);
983        assert_eq!(pruned.removed, 1);
984        assert_eq!(messages.len(), 5);
985        // The valid tool result stays, the orphan is gone.
986        assert_eq!(messages[3].role, "tool");
987        assert!(messages[3].content.contains("toolu_aaa"));
988    }
989
990    #[test]
991    fn orphan_tool_in_middle_after_collapsed_pair() {
992        // Phase 1 collapsed an assistant+tool pair into a summary, but
993        // a subsequent tool message referenced the original tool_call_id.
994        let mut messages = vec![
995            msg("system", "sys"),
996            msg("assistant", "[Tool result: truncated...]"), // collapsed
997            msg(
998                "tool",
999                r#"{"content":"leftover","tool_call_id":"toolu_OLD"}"#,
1000            ),
1001            msg("user", "next"),
1002            msg("assistant", "ok"),
1003        ];
1004        let pruned = remove_orphaned_tool_messages(&mut messages);
1005        assert_eq!(pruned.removed, 1);
1006        assert_eq!(messages.len(), 4);
1007        assert_eq!(messages[1].role, "assistant");
1008        assert_eq!(messages[2].role, "user");
1009    }
1010
1011    #[test]
1012    fn preamble_then_tool_calls_is_kept_intact() {
1013        // Healthy shape: `[A: "let me check"] [A: tool_calls] [T: result]`.
1014        // The assistant first emits a brief preamble, then dispatches the
1015        // tool, then the tool returns. This is the normal flow of a real
1016        // tool-using turn — Pass 1 must NOT touch it.
1017        let tool_calls_assistant = r#"{"content":null,"tool_calls":[{"id":"toolu_LIVE","name":"shell","arguments":"{}"}]}"#;
1018        let mut messages = vec![
1019            msg("system", "sys"),
1020            msg("user", "do something"),
1021            msg("assistant", "Let me check."),
1022            msg("assistant", tool_calls_assistant),
1023            msg("tool", r#"{"content":"ok","tool_call_id":"toolu_LIVE"}"#),
1024            msg("assistant", "Here are the results."),
1025        ];
1026        let before = messages.len();
1027        let pruned = remove_orphaned_tool_messages(&mut messages);
1028        assert_eq!(
1029            pruned.removed, 0,
1030            "preamble + dispatch + result is a healthy turn, not orphan poisoning"
1031        );
1032        assert_eq!(messages.len(), before);
1033    }
1034
1035    #[test]
1036    fn back_to_back_unresolved_tool_calls_strips_later_dispatch() {
1037        // Genuinely poisoned shape: `[A: tool_calls A]` followed
1038        // immediately by `[A: tool_calls B]` with no tool result for A
1039        // sitting between them. The earlier dispatch is unresolved, so
1040        // the later assistant + its results are removed to restore a
1041        // well-formed turn.
1042        let first_dispatch = r#"{"content":null,"tool_calls":[{"id":"toolu_LOST","name":"shell","arguments":"{}"}]}"#;
1043        let second_dispatch = r#"{"content":null,"tool_calls":[{"id":"toolu_DEAD","name":"shell","arguments":"{}"}]}"#;
1044        let mut messages = vec![
1045            msg("system", "sys"),
1046            msg("user", "do something"),
1047            msg("assistant", first_dispatch),
1048            msg("assistant", second_dispatch),
1049            msg("tool", r#"{"content":"ok","tool_call_id":"toolu_DEAD"}"#),
1050            msg("assistant", "summary"),
1051        ];
1052        let pruned = remove_orphaned_tool_messages(&mut messages);
1053        assert_eq!(
1054            pruned.removed, 2,
1055            "second dispatch + its tool_result must be removed when prior dispatch is unresolved"
1056        );
1057        // What survives: sys, user, first_dispatch (now orphaned), summary.
1058        // Pass 2 then sweeps any remaining orphan tool messages — there
1059        // are none after Pass 1, but the orphaned first_dispatch itself
1060        // (assistant with tool_calls and no responses) stays, because
1061        // this function only removes *tool*-role orphans in Pass 2,
1062        // not stranded assistant dispatches.
1063        assert_eq!(messages.len(), 4);
1064        assert_eq!(messages[2].content, first_dispatch);
1065        assert_eq!(messages[3].content, "summary");
1066    }
1067
1068    #[test]
1069    fn tool_without_parseable_id_kept_if_assistant_has_tool_calls() {
1070        // Conservative: if we can't parse the tool_call_id, keep the
1071        // message as long as the preceding assistant has tool_calls.
1072        let assistant_content = r#"{"content":"running","tool_calls":[{"id":"toolu_x","name":"shell","arguments":"{}"}]}"#;
1073        let mut messages = vec![
1074            msg("system", "sys"),
1075            msg("user", "go"),
1076            msg("assistant", assistant_content),
1077            msg("tool", "plain text result without json"),
1078            msg("assistant", "done"),
1079        ];
1080        let pruned = remove_orphaned_tool_messages(&mut messages);
1081        assert_eq!(pruned.removed, 0);
1082        assert_eq!(messages.len(), 5);
1083    }
1084
1085    #[test]
1086    fn phase2_budget_respects_protected_tool_messages() {
1087        // Phase 2 should not drop tool messages that fall within the
1088        // keep_recent protection window, even when the assistant that
1089        // starts the group is outside the window.
1090        let tool_content = r#"{"tool_call_id":"toolu_recent","content":"result"}"#;
1091        let assistant_tool = r#"{"content":"calling","tool_calls":[{"id":"toolu_recent","name":"shell","arguments":"{}"}]}"#;
1092        let mut messages = vec![
1093            msg("system", "sys"),
1094            msg("user", "old question"),
1095            msg(
1096                "assistant",
1097                "old answer with lots of padding text to inflate token count significantly beyond budget",
1098            ),
1099            msg("user", "another old question"),
1100            msg("assistant", assistant_tool),  // outside keep_recent
1101            msg("tool", tool_content),         // inside keep_recent (3rd from end)
1102            msg("user", "recent question"),    // inside keep_recent (2nd from end)
1103            msg("assistant", "recent answer"), // inside keep_recent (1st from end)
1104        ];
1105        // Budget tight enough that Phase 2 fires, keep_recent=3 protects last 3
1106        let config = HistoryPrunerConfig {
1107            enabled: true,
1108            max_tokens: 50,
1109            keep_recent: 3,
1110            collapse_tool_results: true,
1111        };
1112        prune_history(&mut messages, &config);
1113        // The protected tool message must survive
1114        assert!(
1115            messages.iter().any(|m| m.content.contains("toolu_recent")),
1116            "Protected tool message was dropped by Phase 2 budget enforcement"
1117        );
1118    }
1119
1120    /// Regression test for issue #5813: a compaction summary preserves
1121    /// identifiers by design (UUIDs, tokens, tool_call_ids). That means the
1122    /// summary text may contain the tool_call_id of a tool_result whose
1123    /// tool_use was dropped. The orphan detector must not be fooled by a
1124    /// substring match on the summary — it must confirm the id appears in
1125    /// a structured tool_calls array.
1126    #[test]
1127    fn orphan_tool_not_fooled_by_id_in_summary_text() {
1128        let summary = "[CONTEXT SUMMARY \u{2014} 4 messages compressed]\n\
1129             Earlier turns invoked shell with tool_calls id toolu_01Orphan \
1130             and returned ok.";
1131        let mut messages = vec![
1132            msg("system", "sys"),
1133            msg("assistant", summary),
1134            msg(
1135                "tool",
1136                r#"{"tool_call_id":"toolu_01Orphan","content":"stale"}"#,
1137            ),
1138            msg("user", "new question"),
1139        ];
1140        let pruned = remove_orphaned_tool_messages(&mut messages);
1141        assert_eq!(
1142            pruned.removed, 1,
1143            "orphan must be removed even if its id is mentioned in summary text"
1144        );
1145        assert!(!messages.iter().any(|m| m.role == "tool"));
1146    }
1147
1148    /// Regression test for issue #5743: MiniMax rejects orphaned tool-role
1149    /// messages whose assistant (with `tool_calls`) was trimmed by the
1150    /// channel orchestrator's proactive history trimming.
1151    #[test]
1152    fn orphan_tool_from_trimmed_channel_history() {
1153        // Simulates the scenario: channel history was trimmed and the
1154        // assistant message containing tool_calls was dropped, leaving
1155        // orphaned tool results with MiniMax-style IDs.
1156        let tool_result =
1157            r#"{"content":"search results","tool_call_id":"chatcmpl-tool-92a12a15c14f3b36"}"#;
1158        let mut messages = vec![
1159            msg("system", "You are a helpful assistant"),
1160            msg("tool", tool_result),
1161            msg("assistant", "Here are the search results"),
1162            msg("user", "Thanks, now summarize them"),
1163        ];
1164        let pruned = remove_orphaned_tool_messages(&mut messages);
1165        assert_eq!(pruned.removed, 1, "orphaned tool message should be removed");
1166        assert_eq!(messages.len(), 3);
1167        assert_eq!(messages[0].role, "system");
1168        assert_eq!(messages[1].role, "assistant");
1169        assert_eq!(messages[2].role, "user");
1170    }
1171
1172    /// Regression for #5823:
1173    ///
1174    /// When `keep_recent` protects the *tail* of a multi-tool group but not
1175    /// the preceding assistant, Phase 1 used to collapse the unprotected
1176    /// tools and rewrite the assistant to a summary that no longer contained
1177    /// `"tool_calls"`. Phase 3's orphan sweep then classified the still-live
1178    /// protected tool as an orphan (because the new summary does not contain
1179    /// `"tool_calls"`) and removed it — silently violating `keep_recent`.
1180    ///
1181    /// After the fix Phase 1 treats the group as atomic: if any tool in it
1182    /// is protected, the entire group is left intact.
1183    #[test]
1184    fn prune_does_not_evict_protected_tool_when_group_straddles_keep_recent() {
1185        let mut messages = vec![
1186            msg("system", "sys"),
1187            msg("user", "query"),
1188            msg(
1189                "assistant",
1190                r#"{"content":null,"tool_calls":[
1191                    {"id":"t1","name":"shell","arguments":"{}"},
1192                    {"id":"t2","name":"web","arguments":"{}"}
1193                ]}"#,
1194            ),
1195            msg("tool", r#"{"tool_call_id":"t1","content":"first"}"#),
1196            msg(
1197                "tool",
1198                r#"{"tool_call_id":"t2","content":"PROTECTED second"}"#,
1199            ),
1200            msg("user", "follow up"),
1201            msg("assistant", "final"),
1202        ];
1203
1204        let config = HistoryPrunerConfig {
1205            enabled: true,
1206            // Budget is well above the estimated token cost so Phase 2 does
1207            // not drop anything; this test isolates the Phase 1 / Phase 3
1208            // interaction.
1209            max_tokens: 100_000,
1210            keep_recent: 3,
1211            collapse_tool_results: true,
1212        };
1213
1214        let stats = prune_history(&mut messages, &config);
1215
1216        assert_eq!(stats.messages_before, 7);
1217        assert!(
1218            messages
1219                .iter()
1220                .any(|m| m.content.contains("PROTECTED second")),
1221            "a tool message protected by keep_recent must survive; \
1222             got roles {:?}",
1223            messages.iter().map(|m| m.role.as_str()).collect::<Vec<_>>()
1224        );
1225    }
1226}
zeroclaw_runtime/agent/history_pruner.rs

zeroclaw_runtime/agent/
history_pruner.rs