Skip to main content

zeroclaw_runtime/agent/
history.rs

1use crate::agent::history_pruner::remove_orphaned_tool_messages;
2use anyhow::Result;
3use regex::Regex;
4use serde::{Deserialize, Serialize};
5use std::path::Path;
6use std::sync::LazyLock;
7use zeroclaw_providers::ChatMessage;
8
9/// Default trigger for auto-compaction when non-system message count exceeds this threshold.
10/// Prefer passing the config-driven value via `run_tool_call_loop`; this constant is only
11/// used when callers omit the parameter.
12pub const DEFAULT_MAX_HISTORY_MESSAGES: usize = 50;
13
14// Matches a local image path that a tool printed as bare text so it can be
15// promoted to an `[IMAGE:…]` marker. Three rooted forms are recognized:
16//   - POSIX absolute:      `/path/to/a.png`
17//   - Windows drive:       `C:\path\a.png` or `C:/path/a.png`
18//   - Windows UNC share:   `\\server\share\a.png`
19// Only rooted paths are promoted; `is_existing_local_image_path` further
20// requires the path to be absolute and to point at a real file, so on
21// non-Windows hosts the Windows forms match here but are filtered out there
22// (their `is_absolute()` is false), leaving behavior unchanged off-Windows.
23static LOCAL_IMAGE_PATH_RE: LazyLock<Regex> = LazyLock::new(|| {
24    Regex::new(
25        r#"(?:[A-Za-z]:[\\/]|\\\\[^\s<>'"`\]\)/\\]+[\\/]|/)[^\s<>'"`\]\)]+?\.(?i:png|jpe?g|webp|gif|bmp)"#,
26    )
27    .expect("valid image path regex")
28});
29
30/// Find the largest byte index `<= i` that is a valid char boundary.
31/// MSRV-compatible replacement for `str::floor_char_boundary` (stable in 1.91).
32pub fn floor_char_boundary(s: &str, i: usize) -> usize {
33    if i >= s.len() {
34        return s.len();
35    }
36    let mut pos = i;
37    while pos > 0 && !s.is_char_boundary(pos) {
38        pos -= 1;
39    }
40    pos
41}
42
43/// Indicates which side of a truncated string a boundary belongs to when
44/// nudging it away from a half-cut `[IMAGE:...]` marker.
45#[derive(Clone, Copy)]
46enum TruncationSide {
47    /// Boundary is the end of the kept head; nudge backward (out of the marker).
48    Head,
49    /// Boundary is the start of the kept tail; nudge forward (out of the marker).
50    Tail,
51}
52
53/// If `boundary` falls inside an `[IMAGE:...]` marker (i.e. between an
54/// unclosed `[IMAGE:` and its closing `]`), nudge it onto the nearest
55/// complete-marker boundary. The malformed half-marker is dropped into the
56/// truncated middle rather than emitted to the regex, which would otherwise
57/// silently fail to match and quietly lose the image.
58fn nudge_around_image_marker(s: &str, boundary: usize, side: TruncationSide) -> usize {
59    const OPEN: &str = "[IMAGE:";
60    if boundary == 0 || boundary >= s.len() {
61        return boundary;
62    }
63
64    // Walk forward to find the most recent `[IMAGE:` whose `[` is strictly
65    // before `boundary`. Searching forward (rather than `rfind` on a prefix)
66    // correctly handles the case where `boundary` itself splits the literal
67    // `[IMAGE:` token.
68    let mut search_from = 0usize;
69    let mut last_open: Option<usize> = None;
70    while let Some(rel) = s[search_from..].find(OPEN) {
71        let open_idx = search_from + rel;
72        if open_idx >= boundary {
73            break;
74        }
75        last_open = Some(open_idx);
76        search_from = open_idx + OPEN.len();
77    }
78    let Some(open_idx) = last_open else {
79        return boundary;
80    };
81
82    // First `]` after the opener closes the marker (canonicalize regex
83    // forbids `]` inside paths, so this is unambiguous in practice).
84    let close_idx = match s[open_idx..].find(']') {
85        Some(rel) => open_idx + rel,
86        None => return boundary, // malformed input — leave the boundary alone
87    };
88
89    if close_idx < boundary {
90        return boundary; // marker fully closed before boundary — safe
91    }
92
93    match side {
94        TruncationSide::Head => open_idx,
95        TruncationSide::Tail => (close_idx + 1).min(s.len()),
96    }
97}
98
99/// Truncate a tool result to `max_chars`, keeping head (2/3) + tail (1/3)
100/// with a marker in the middle. Returns input unchanged if within limit or
101/// `max_chars == 0` (disabled).
102///
103/// Boundaries are nudged inward when they would split an `[IMAGE:...]`
104/// marker, so the multimodal regex never sees a half-marker in the
105/// surviving head/tail. This matches the canonicalization step that runs
106/// immediately before truncation in `run_tool_call_loop`.
107pub fn truncate_tool_result(output: &str, max_chars: usize) -> String {
108    if max_chars == 0 || output.len() <= max_chars {
109        return output.to_string();
110    }
111    let head_len = max_chars * 2 / 3;
112    let tail_len = max_chars.saturating_sub(head_len);
113    let head_end = floor_char_boundary(output, head_len);
114    // ceil_char_boundary: find smallest byte index >= i on a char boundary
115    let tail_start_raw = output.len().saturating_sub(tail_len);
116    let tail_start = if tail_start_raw >= output.len() {
117        output.len()
118    } else {
119        let mut pos = tail_start_raw;
120        while pos < output.len() && !output.is_char_boundary(pos) {
121            pos += 1;
122        }
123        pos
124    };
125
126    // Step boundaries away from any `[IMAGE:...]` marker they would bisect.
127    // `[IMAGE:` and `]` are pure ASCII, so the adjusted indices land on
128    // valid UTF-8 char boundaries.
129    let head_end = nudge_around_image_marker(output, head_end, TruncationSide::Head);
130    let tail_start = nudge_around_image_marker(output, tail_start, TruncationSide::Tail);
131
132    // Guard against overlap when max_chars is very small
133    if head_end >= tail_start {
134        return output[..floor_char_boundary(output, max_chars)].to_string();
135    }
136    let truncated_chars = tail_start - head_end;
137    format!(
138        "{}\n\n[... {} characters truncated ...]\n\n{}",
139        &output[..head_end],
140        truncated_chars,
141        &output[tail_start..]
142    )
143}
144
145fn is_existing_local_image_path(path: &str) -> bool {
146    let candidate = Path::new(path);
147    candidate.is_absolute()
148        && candidate.is_file()
149        && candidate
150            .extension()
151            .and_then(|ext| ext.to_str())
152            .is_some_and(|ext| {
153                matches!(
154                    ext.to_ascii_lowercase().as_str(),
155                    "png" | "jpg" | "jpeg" | "webp" | "gif" | "bmp"
156                )
157            })
158}
159
160/// Collect the inner payloads of every explicit `[IMAGE:…]` marker already
161/// present in `output`. A bare path matching one of these must not be promoted
162/// into a *second* marker, otherwise the same image would be counted (and
163/// inlined) twice. This lets a tool emit both a durable human-readable path
164/// line and an explicit marker for the same file (e.g. `image_info`, which
165/// keeps a `File: <path>` line so the path survives in history after the image
166/// marker is stripped from older turns) without the pipeline double-counting.
167fn existing_marker_payloads(output: &str) -> std::collections::HashSet<&str> {
168    const OPEN: &str = "[IMAGE:";
169    let mut set = std::collections::HashSet::new();
170    let mut from = 0usize;
171    while let Some(rel) = output[from..].find(OPEN) {
172        let inner_start = from + rel + OPEN.len();
173        let Some(rel_end) = output[inner_start..].find(']') else {
174            break;
175        };
176        let inner_end = inner_start + rel_end;
177        set.insert(output[inner_start..inner_end].trim());
178        from = inner_end + 1;
179    }
180    set
181}
182
183/// Rewrite real local image file paths in tool output into `[IMAGE:...]`
184/// markers so the multimodal pipeline can normalize them before the next
185/// provider call. This targets shell/skill outputs that print filesystem
186/// paths directly rather than returning explicit media markers.
187pub fn canonicalize_tool_result_media_markers(output: &str) -> String {
188    let existing_markers = existing_marker_payloads(output);
189    let mut rewritten = String::with_capacity(output.len());
190    let mut cursor = 0usize;
191    let mut changed = false;
192
193    for mat in LOCAL_IMAGE_PATH_RE.find_iter(output) {
194        let start = mat.start();
195        let end = mat.end();
196        let path = &output[start..end];
197
198        // Skip paths that are already part of an explicit media marker.
199        if output[..start].ends_with("[IMAGE:") {
200            continue;
201        }
202
203        // Skip a bare path that already appears inside an explicit marker
204        // elsewhere in the same output — promoting it would double-count the
205        // image (see `existing_marker_payloads`).
206        if existing_markers.contains(path) {
207            continue;
208        }
209
210        if !is_existing_local_image_path(path) {
211            continue;
212        }
213
214        rewritten.push_str(&output[cursor..start]);
215        rewritten.push_str("[IMAGE:");
216        rewritten.push_str(path);
217        rewritten.push(']');
218        cursor = end;
219        changed = true;
220    }
221
222    if !changed {
223        return output.to_string();
224    }
225
226    rewritten.push_str(&output[cursor..]);
227    rewritten
228}
229
230/// Truncate a tool message's content, preserving JSON structure when the
231/// message stores `tool_call_id` alongside `content` (native tool-call
232/// format). Without this, `truncate_tool_result` destroys the JSON envelope
233/// and downstream model_providers receive a `null` `call_id`.
234pub fn truncate_tool_message(msg_content: &str, max_chars: usize) -> String {
235    if max_chars == 0 || msg_content.len() <= max_chars {
236        return msg_content.to_string();
237    }
238    if let Ok(mut obj) =
239        serde_json::from_str::<serde_json::Map<String, serde_json::Value>>(msg_content)
240        && obj.contains_key("tool_call_id")
241        && let Some(serde_json::Value::String(inner)) = obj.get("content")
242    {
243        let truncated = truncate_tool_result(inner, max_chars);
244        obj.insert("content".to_string(), serde_json::Value::String(truncated));
245        return serde_json::to_string(&obj).unwrap_or_else(|_| msg_content.to_string());
246    }
247    truncate_tool_result(msg_content, max_chars)
248}
249
250/// Aggressively trim old tool result messages in history to recover from
251/// context overflow. Keeps the last `protect_last_n` messages untouched.
252/// Returns total characters saved.
253pub fn fast_trim_tool_results(
254    history: &mut [zeroclaw_providers::ChatMessage],
255    protect_last_n: usize,
256) -> usize {
257    let trim_to = 2000;
258    let mut saved = 0;
259    let cutoff = history.len().saturating_sub(protect_last_n);
260    for msg in &mut history[..cutoff] {
261        if msg.role == "tool" && msg.content.len() > trim_to {
262            let original_len = msg.content.len();
263            msg.content = truncate_tool_message(&msg.content, trim_to);
264            saved += original_len - msg.content.len();
265        }
266    }
267    saved
268}
269
270/// Emergency: drop oldest non-system, non-recent messages from history.
271/// Tool groups (assistant + consecutive tool messages) are dropped
272/// atomically to preserve tool_use/tool_result pairing.
273/// Returns number of messages dropped.
274pub fn emergency_history_trim(
275    history: &mut Vec<zeroclaw_providers::ChatMessage>,
276    keep_recent: usize,
277) -> usize {
278    let mut dropped = 0;
279    let target_drop = history.len() / 3;
280    let mut i = 0;
281    while dropped < target_drop && i < history.len().saturating_sub(keep_recent) {
282        if history[i].role == "system" {
283            i += 1;
284        } else if history[i].role == "assistant" {
285            // Count following tool messages — drop as atomic group
286            let mut tool_count = 0;
287            while i + 1 + tool_count < history.len().saturating_sub(keep_recent)
288                && history[i + 1 + tool_count].role == "tool"
289            {
290                tool_count += 1;
291            }
292            for _ in 0..=tool_count {
293                history.remove(i);
294                dropped += 1;
295            }
296        } else {
297            history.remove(i);
298            dropped += 1;
299        }
300    }
301    dropped += remove_orphaned_tool_messages(history).removed;
302    dropped
303}
304
305/// Estimate token count for a message history using ~4 chars/token heuristic.
306/// Includes a small overhead per message for role/framing tokens.
307pub fn estimate_history_tokens(history: &[ChatMessage]) -> usize {
308    history
309        .iter()
310        .map(|m| {
311            // ~4 chars per token + ~4 framing tokens per message (role, delimiters)
312            m.content.len().div_ceil(4) + 4
313        })
314        .sum()
315}
316
317pub fn normalize_system_messages(history: &mut Vec<ChatMessage>) {
318    let mut saw_system = false;
319    let mut system_content = String::new();
320    let mut non_system = Vec::with_capacity(history.len());
321
322    for message in history.drain(..) {
323        if message.role == "system" {
324            saw_system = true;
325            if !message.content.is_empty() {
326                if !system_content.is_empty() {
327                    system_content.push_str("\n\n");
328                }
329                system_content.push_str(&message.content);
330            }
331        } else {
332            non_system.push(message);
333        }
334    }
335
336    if saw_system && !system_content.is_empty() {
337        history.push(ChatMessage::system(system_content));
338    }
339    history.extend(non_system);
340}
341
342pub fn append_or_merge_system_message(history: &mut Vec<ChatMessage>, content: impl Into<String>) {
343    let content = content.into();
344    if content.is_empty() {
345        normalize_system_messages(history);
346        return;
347    }
348
349    if let Some(system_message) = history.iter_mut().find(|message| message.role == "system") {
350        if !system_message.content.is_empty() {
351            system_message.content.push_str("\n\n");
352        }
353        system_message.content.push_str(&content);
354    } else {
355        history.insert(0, ChatMessage::system(content));
356    }
357    normalize_system_messages(history);
358}
359
360/// Trim conversation history to prevent unbounded growth.
361///
362/// Preserves: the system prompt (if any), the first user message (the framing
363/// anchor — losing it is what caused the silent-amnesia bug where models said
364/// "the first message I have is 'Continue'"), and the most recent
365/// `max_history` messages (minus one slot already taken by the anchor).
366///
367/// Drops from the middle. Emits a WARN with counts on every fire so silent
368/// amnesia is impossible to miss again.
369pub fn trim_history(history: &mut Vec<ChatMessage>, max_history: usize) {
370    let has_system = history.first().is_some_and(|m| m.role == "system");
371    let non_system_count = if has_system {
372        history.len() - 1
373    } else {
374        history.len()
375    };
376
377    if non_system_count <= max_history {
378        return;
379    }
380
381    let system_offset = usize::from(has_system);
382
383    // Find the first user message (the framing anchor). If `max_history` is
384    // too small to fit both the anchor and any recent context, fall back to
385    // the old tail-only behaviour rather than producing a degenerate window.
386    let anchor_idx = history
387        .iter()
388        .enumerate()
389        .skip(system_offset)
390        .find(|(_, m)| m.role == "user")
391        .map(|(i, _)| i);
392
393    let messages_before = history.len();
394
395    let dropped_range = match anchor_idx {
396        Some(anchor) if max_history >= 2 => {
397            // Reserve one slot for the anchor; keep `max_history - 1` most recent.
398            let tail_keep = max_history - 1;
399            let tail_start = history.len().saturating_sub(tail_keep);
400            // Middle range to drop: (anchor + 1) .. tail_start.
401            let drop_start = anchor + 1;
402            if tail_start <= drop_start {
403                // Anchor is already inside the tail window — nothing in the
404                // middle to drop. Fall through to plain head-drop below.
405                None
406            } else {
407                Some(drop_start..tail_start)
408            }
409        }
410        _ => None,
411    };
412
413    if let Some(range) = dropped_range {
414        history.drain(range);
415    } else {
416        // No anchor, or `max_history < 2`: original head-drop behaviour.
417        let to_remove = non_system_count - max_history;
418        history.drain(system_offset..system_offset + to_remove);
419    }
420
421    remove_orphaned_tool_messages(history);
422    normalize_system_messages(history);
423
424    let dropped = messages_before.saturating_sub(history.len());
425    if dropped > 0 {
426        ::zeroclaw_log::record!(
427            WARN,
428            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
429                .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
430                .with_attrs(::serde_json::json!({
431                    "messages_before": messages_before,
432                    "messages_after": history.len(),
433                    "dropped": dropped,
434                    "max_history": max_history,
435                    "kept_anchor": anchor_idx.is_some() && max_history >= 2,
436                })),
437            "trim_history fired: middle of conversation dropped. Raise \
438             [runtime_profiles.<name>] max_history_messages or enable \
439             compact_context to avoid silent context loss."
440        );
441    }
442}
443
444#[derive(Debug, Clone, Serialize, Deserialize)]
445pub struct InteractiveSessionState {
446    pub version: u32,
447    pub history: Vec<ChatMessage>,
448}
449
450impl InteractiveSessionState {
451    fn from_history(history: &[ChatMessage]) -> Self {
452        Self {
453            version: 1,
454            history: history.to_vec(),
455        }
456    }
457}
458
459pub fn load_interactive_session_history(
460    path: &Path,
461    system_prompt: &str,
462) -> Result<Vec<ChatMessage>> {
463    if !path.exists() {
464        return Ok(vec![ChatMessage::system(system_prompt)]);
465    }
466
467    let raw = std::fs::read_to_string(path)?;
468    let mut state: InteractiveSessionState = serde_json::from_str(&raw)?;
469    if state.history.is_empty() {
470        state.history.push(ChatMessage::system(system_prompt));
471    } else if state.history.first().map(|msg| msg.role.as_str()) != Some("system") {
472        state.history.insert(0, ChatMessage::system(system_prompt));
473    }
474    normalize_system_messages(&mut state.history);
475    if state.history.first().map(|msg| msg.role.as_str()) != Some("system") {
476        state.history.insert(0, ChatMessage::system(system_prompt));
477    }
478
479    // Self-heal persisted sessions that were written with orphaned
480    // tool_result messages (e.g. a crash mid-compaction, or a trim that
481    // dropped the assistant tool_use block but left its tool_result).
482    // Without this the next API call fails with 400 "unexpected tool_use_id
483    // found in tool_result blocks" and the session stays bricked until the
484    // file is deleted.
485    remove_orphaned_tool_messages(&mut state.history);
486
487    Ok(state.history)
488}
489
490pub fn save_interactive_session_history(path: &Path, history: &[ChatMessage]) -> Result<()> {
491    if let Some(parent) = path.parent() {
492        std::fs::create_dir_all(parent)?;
493    }
494
495    let payload = serde_json::to_string_pretty(&InteractiveSessionState::from_history(history))?;
496    std::fs::write(path, payload)?;
497    Ok(())
498}
499
500#[cfg(test)]
501mod tests {
502    use super::*;
503
504    #[test]
505    fn canonicalize_tool_result_media_markers_wraps_existing_local_image_path() {
506        let dir = tempfile::tempdir().unwrap();
507        let image = dir.path().join("generated.png");
508        std::fs::write(&image, [0x89, b'P', b'N', b'G', b'\r', b'\n', 0x1a, b'\n']).unwrap();
509
510        let input = format!(
511            "Image generated successfully.\nFile: {}",
512            image.display().to_string()
513        );
514        let output = canonicalize_tool_result_media_markers(&input);
515
516        assert!(output.contains("[IMAGE:"));
517        assert!(output.contains(&format!("[IMAGE:{}]", image.display().to_string())));
518    }
519
520    #[test]
521    fn canonicalize_tool_result_media_markers_ignores_missing_paths() {
522        let input = "File: /tmp/definitely-missing-zeroclaw-image.png";
523        let output = canonicalize_tool_result_media_markers(input);
524        assert_eq!(output, input);
525    }
526
527    #[test]
528    fn canonicalize_tool_result_media_markers_preserves_existing_markers() {
529        let input = "Already tagged [IMAGE:/tmp/already-tagged.png]";
530        let output = canonicalize_tool_result_media_markers(input);
531        assert_eq!(output, input);
532    }
533
534    #[test]
535    fn canonicalize_tool_result_media_markers_dedups_path_already_in_marker() {
536        // `image_info` emits a durable `File: <path>` line *and* an explicit
537        // `[IMAGE:<path>]` marker for the same file (so the path survives in
538        // history once the marker is stripped from older turns). The promoter
539        // must not wrap the bare `File:` path into a second marker, which would
540        // double-count the image. Order-independent: the bare path appears
541        // before the marker here.
542        let input = "File: /tmp/pic.png\nFormat: png\n[IMAGE:/tmp/pic.png]";
543        let output = canonicalize_tool_result_media_markers(input);
544        assert_eq!(
545            output, input,
546            "bare path duplicating an existing marker must not be promoted"
547        );
548        assert_eq!(
549            output.matches("[IMAGE:").count(),
550            1,
551            "exactly one image marker expected, got: {output}"
552        );
553    }
554
555    /// Regression: when `truncate_tool_result`'s head boundary fell inside an
556    /// `[IMAGE:...]` marker, the head ended up containing a half-marker like
557    /// `[IMAGE:/very/long/pa` that the multimodal regex would silently fail
558    /// to match. The boundary now rewinds to the marker opener so the broken
559    /// half is dropped into the truncated middle. See PR #6183 review.
560    #[test]
561    fn truncate_tool_result_does_not_split_image_marker_at_head_boundary() {
562        // 200-byte path → marker length 207 bytes. With max_chars=80 the
563        // naive head_end (= 80 * 2 / 3 = 53) falls inside the marker.
564        let path = format!("/tmp/{}.png", "a".repeat(200));
565        let marker = format!("[IMAGE:{path}]");
566        let output = format!("prefix-text {marker} trailing-text padding-padding");
567
568        let truncated = truncate_tool_result(&output, 80);
569
570        assert!(
571            truncated.contains("[... ") && truncated.contains("characters truncated ...]"),
572            "expected truncation marker in output, got: {truncated}"
573        );
574        // No half-`[IMAGE:` marker should leak into the surviving content.
575        let stripped = truncated.replace(&marker, "");
576        assert!(
577            !stripped.contains("[IMAGE:"),
578            "half-`[IMAGE:` marker leaked into truncated output: {truncated}"
579        );
580    }
581
582    /// Regression: tail boundary previously could land inside an
583    /// `[IMAGE:...]` marker, leaving a stray closing `...png]` fragment in
584    /// the surviving tail. The boundary now advances past the closing `]`.
585    #[test]
586    fn truncate_tool_result_does_not_split_image_marker_at_tail_boundary() {
587        // Marker placed near the end so tail_start (~max_chars / 3 from the
588        // end) lands inside it.
589        let path = format!("/tmp/{}.png", "b".repeat(200));
590        let marker = format!("[IMAGE:{path}]");
591        let output = format!("{} preamble-content-line {marker} ending", "x".repeat(400));
592
593        let truncated = truncate_tool_result(&output, 90);
594
595        let stripped = truncated.replace(&marker, "");
596        assert!(
597            !stripped.contains("[IMAGE:") && !stripped.contains(".png]"),
598            "half-`[IMAGE:` marker leaked into truncated output: {truncated}"
599        );
600    }
601
602    /// When a complete `[IMAGE:...]` marker fits naturally inside the
603    /// retained head, truncation must not damage it.
604    #[test]
605    fn truncate_tool_result_keeps_complete_marker_in_head() {
606        let marker = "[IMAGE:/tmp/short.png]";
607        let output = format!("{marker} {}", "y".repeat(500));
608
609        let truncated = truncate_tool_result(&output, 200);
610
611        assert!(
612            truncated.starts_with(marker),
613            "expected head to retain full marker, got: {truncated}"
614        );
615    }
616}