zeroclaw_runtime/agent/
context_compressor.rs

1use std::fmt::Write;
2use std::time::Duration;
3
4use anyhow::Result;
5use std::sync::Arc;
6
7use zeroclaw_api::model_provider::{ChatMessage, ModelProvider};
8use zeroclaw_memory::traits::Memory;
9use zeroclaw_providers::multimodal;
10
11pub use zeroclaw_config::scattered_types::ContextCompressionConfig;
12
13// ---------------------------------------------------------------------------
14// Result
15// ---------------------------------------------------------------------------
16
17#[derive(Debug, Clone)]
18pub struct CompressionResult {
19    pub compressed: bool,
20    pub tokens_before: usize,
21    pub tokens_after: usize,
22    pub passes_used: u32,
23}
24
25// ---------------------------------------------------------------------------
26// Probe tiers for unknown model context windows
27// ---------------------------------------------------------------------------
28
29const PROBE_TIERS: &[usize] = &[
30    2_000_000, 1_000_000, 512_000, 200_000, 128_000, 64_000, 32_000,
31];
32
33fn next_probe_tier(current: usize) -> usize {
34    PROBE_TIERS
35        .iter()
36        .copied()
37        .find(|&tier| tier < current)
38        .unwrap_or(32_000)
39}
40
41// ---------------------------------------------------------------------------
42// Error message parsing
43// ---------------------------------------------------------------------------
44
45/// Try to extract the actual context window limit from a model_provider error message.
46pub fn parse_context_limit_from_error(msg: &str) -> Option<usize> {
47    // Match patterns like "maximum context length is 128000" or "limit of 200000 tokens"
48    // or "context window of 131072" or "available context size (8448 tokens)"
49    let re_patterns: &[&str] = &[
50        // "maximum context length is 128000"
51        r"(?:max(?:imum)?|limit)\s*(?:context\s*)?(?:length|size|window)?\s*(?:is|of|:)?\s*(\d{4,})",
52        // "context length is 128000" / "context window of 131072"
53        r"context\s*(?:length|size|window)\s*(?:is|of|:)?\s*(\d{4,})",
54        // "128000 token context" / "128000 limit"
55        r"(\d{4,})\s*(?:tokens?\s*)?(?:context|limit)",
56        // "available context size (8448 tokens)"
57        r"available context size\s*\(\s*(\d{4,})",
58        // "> 128000 maximum context length" (Anthropic-style)
59        r">\s*(\d{4,})\s*(?:maximum|max)?\s*(?:context)?\s*(?:length|size|window|tokens?)",
60    ];
61    let lower = msg.to_lowercase();
62    for pattern in re_patterns {
63        if let Ok(re) = regex::Regex::new(pattern)
64            && let Some(caps) = re.captures(&lower)
65            && let Some(m) = caps.get(1)
66            && let Ok(limit) = m.as_str().parse::<usize>()
67            && (1024..=10_000_000).contains(&limit)
68        {
69            return Some(limit);
70        }
71    }
72    None
73}
74
75// ---------------------------------------------------------------------------
76// Token estimation
77// ---------------------------------------------------------------------------
78
79/// Estimate token count for a message history using ~4 chars/token heuristic
80/// with a 1.2x safety margin.
81pub fn estimate_tokens(messages: &[ChatMessage]) -> usize {
82    let raw: usize = messages
83        .iter()
84        .map(|m| m.content.len().div_ceil(4) + 4)
85        .sum();
86    // 1.2x safety margin to account for underestimation
87    #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
88    {
89        (raw as f64 * 1.2) as usize
90    }
91}
92
93// ---------------------------------------------------------------------------
94// Summarizer prompt
95// ---------------------------------------------------------------------------
96
97const SUMMARIZER_SYSTEM: &str = "\
98You are a conversation compaction engine. Summarize the conversation segment below into concise context.
99
100PRESERVE exactly:
101- All identifiers (UUIDs, hashes, file paths, URLs, tokens, IPs)
102- Actions taken (tool calls, file operations, commands run)
103- Key information obtained (data, results, error messages)
104- Decisions made and user preferences expressed
105- Current task status and unresolved items
106- Constraints and requirements mentioned
107
108OMIT:
109- Verbose tool output (keep only key results)
110- Repeated greetings or filler
111- Redundant information already stated
112
113Output concise bullet points. Be thorough but brief.";
114
115// ---------------------------------------------------------------------------
116// ContextCompressor
117// ---------------------------------------------------------------------------
118
119pub struct ContextCompressor {
120    config: ContextCompressionConfig,
121    context_window: usize,
122    memory: Option<Arc<dyn Memory>>,
123}
124
125impl ContextCompressor {
126    pub fn new(config: ContextCompressionConfig, context_window: usize) -> Self {
127        Self {
128            config,
129            context_window,
130            memory: None,
131        }
132    }
133
134    /// Attach a memory handle so compression summaries are persisted before
135    /// old messages are discarded. Without this, compressed facts are lost.
136    pub fn with_memory(mut self, memory: Arc<dyn Memory>) -> Self {
137        self.memory = Some(memory);
138        self
139    }
140
141    /// Update the context window size (e.g. after error-driven probing).
142    pub fn set_context_window(&mut self, window: usize) {
143        self.context_window = window;
144    }
145
146    /// Fast-path: trim oversized tool results in non-protected messages.
147    /// Returns total characters saved. No LLM call needed.
148    fn fast_trim_tool_results(&self, history: &mut [ChatMessage]) -> usize {
149        let max = self.config.tool_result_retrim_chars;
150        if max == 0 {
151            return 0;
152        }
153        let mut saved = 0;
154        let protect_start = self.config.protect_first_n.min(history.len());
155        let protect_end = history.len().saturating_sub(self.config.protect_last_n);
156
157        if protect_start >= protect_end {
158            return 0;
159        }
160
161        for msg in &mut history[protect_start..protect_end] {
162            if msg.role != "tool" {
163                continue;
164            }
165            if msg.content.len() <= max {
166                continue;
167            }
168            // Skip exempt tools
169            if self
170                .config
171                .tool_result_trim_exempt
172                .iter()
173                .any(|t| msg.content.contains(t.as_str()))
174            {
175                continue;
176            }
177            // Skip base64 images
178            if msg.content.contains("data:image/") {
179                continue;
180            }
181            let original_len = msg.content.len();
182            msg.content = crate::agent::history::truncate_tool_message(&msg.content, max);
183            saved += original_len - msg.content.len();
184        }
185        saved
186    }
187
188    /// Main entry point. Compresses history in-place if over threshold.
189    ///
190    /// `temperature` is forwarded verbatim to the summarizer LLM call.
191    /// Pass `None` to let the provider decide (required for models that
192    /// reject `temperature`, e.g. claude-opus-4-7).
193    pub async fn compress_if_needed(
194        &self,
195        history: &mut Vec<ChatMessage>,
196        model_provider: &dyn ModelProvider,
197        model: &str,
198        temperature: Option<f64>,
199    ) -> Result<CompressionResult> {
200        if !self.config.enabled {
201            let tokens = estimate_tokens(history);
202            return Ok(CompressionResult {
203                compressed: false,
204                tokens_before: tokens,
205                tokens_after: tokens,
206                passes_used: 0,
207            });
208        }
209
210        let tokens_before = estimate_tokens(history);
211        #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
212        let threshold = (self.context_window as f64 * self.config.threshold_ratio) as usize;
213
214        if tokens_before <= threshold {
215            return Ok(CompressionResult {
216                compressed: false,
217                tokens_before,
218                tokens_after: tokens_before,
219                passes_used: 0,
220            });
221        }
222
223        // Fast-trim pass — may resolve overflow without an LLM call
224        let chars_saved = self.fast_trim_tool_results(history);
225        if chars_saved > 0 {
226            ::zeroclaw_log::record!(
227                INFO,
228                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
229                    .with_attrs(::serde_json::json!({"chars_saved": chars_saved})),
230                "Fast-trim saved chars from old tool results"
231            );
232            let recheck = estimate_tokens(history);
233            if recheck <= threshold {
234                return Ok(CompressionResult {
235                    compressed: true,
236                    tokens_before,
237                    tokens_after: recheck,
238                    passes_used: 0,
239                });
240            }
241        }
242
243        let mut passes_used = 0;
244        for _ in 0..self.config.max_passes {
245            let did_compress = self
246                .compress_once(history, model_provider, model, temperature)
247                .await?;
248            if did_compress {
249                passes_used += 1;
250            }
251            if estimate_tokens(history) <= threshold || !did_compress {
252                break;
253            }
254        }
255
256        let tokens_after = estimate_tokens(history);
257        Ok(CompressionResult {
258            compressed: passes_used > 0,
259            tokens_before,
260            tokens_after,
261            passes_used,
262        })
263    }
264
265    /// Reactive compression triggered by a context_length_exceeded error.
266    /// Parses the actual limit from the error, steps down probe tiers, and re-compresses.
267    pub async fn compress_on_error(
268        &mut self,
269        history: &mut Vec<ChatMessage>,
270        model_provider: &dyn ModelProvider,
271        model: &str,
272        temperature: Option<f64>,
273        error_msg: &str,
274    ) -> Result<bool> {
275        // Try to extract actual limit from error message
276        if let Some(limit) = parse_context_limit_from_error(error_msg) {
277            self.context_window = limit;
278        } else {
279            // Step down to next probe tier
280            self.context_window = next_probe_tier(self.context_window);
281        }
282
283        ::zeroclaw_log::record!(
284            INFO,
285            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
286                .with_attrs(::serde_json::json!({"context_window": self.context_window})),
287            "Context limit adjusted, re-compressing"
288        );
289
290        let result = self
291            .compress_if_needed(history, model_provider, model, temperature)
292            .await?;
293        Ok(result.compressed)
294    }
295
296    /// Single compression pass: protect head/tail, summarize middle.
297    async fn compress_once(
298        &self,
299        history: &mut Vec<ChatMessage>,
300        model_provider: &dyn ModelProvider,
301        model: &str,
302        temperature: Option<f64>,
303    ) -> Result<bool> {
304        let n = history.len();
305        let protected_total = self.config.protect_first_n + self.config.protect_last_n;
306        if n <= protected_total {
307            return Ok(false);
308        }
309
310        let mut start = self.config.protect_first_n.min(n);
311        let mut end = n.saturating_sub(self.config.protect_last_n);
312
313        // Align boundaries to avoid orphaning tool_call/tool_result pairs
314        start = align_boundary_forward(history, start);
315        end = align_boundary_backward(history, end);
316
317        if start >= end {
318            return Ok(false);
319        }
320
321        let summary_model = self.config.summary_model.as_deref().unwrap_or(model);
322        let preserve_media_markers =
323            self.config.summary_model.is_none() && model_provider.supports_vision();
324
325        // Build transcript from the middle section
326        let middle = &history[start..end];
327        let transcript = build_summarizer_transcript(
328            middle,
329            self.config.source_max_chars,
330            preserve_media_markers,
331        );
332
333        if transcript.is_empty() {
334            return Ok(false);
335        }
336
337        let message_count = end - start;
338
339        let identifier_note = if self.config.identifier_policy == "strict" {
340            "\nIMPORTANT: Preserve all identifiers exactly as they appear."
341        } else {
342            ""
343        };
344
345        let user_prompt = format!(
346            "Summarize the following conversation history ({message_count} messages) for context preservation. \
347             Keep it concise (max 20 bullet points).{identifier_note}\n\n{transcript}"
348        );
349
350        // LLM summarization with safety timeout
351        let timeout = Duration::from_secs(self.config.timeout_secs);
352        let summary_raw = match tokio::time::timeout(
353            timeout,
354            model_provider.chat_with_system(
355                Some(SUMMARIZER_SYSTEM),
356                &user_prompt,
357                summary_model,
358                temperature,
359            ),
360        )
361        .await
362        {
363            Ok(Ok(s)) => s,
364            Ok(Err(e)) => {
365                ::zeroclaw_log::record!(
366                    WARN,
367                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
368                        .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
369                        .with_attrs(::serde_json::json!({"error": format!("{}", e)})),
370                    "Summarization LLM call failed, using transcript truncation"
371                );
372                truncate_chars(&transcript, self.config.summary_max_chars)
373            }
374            Err(_) => {
375                ::zeroclaw_log::record!(
376                    WARN,
377                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
378                        .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
379                    &format!(
380                        "Summarization timed out after {}s, using transcript truncation",
381                        self.config.timeout_secs
382                    )
383                );
384                truncate_chars(&transcript, self.config.summary_max_chars)
385            }
386        };
387
388        let summary = truncate_chars(&summary_raw, self.config.summary_max_chars);
389
390        // Persist the compression summary to memory before discarding old messages.
391        // This ensures facts from compressed turns remain retrievable via memory recall.
392        if let Some(ref memory) = self.memory {
393            let facts_key = format!("compressed_context_{}", uuid::Uuid::new_v4());
394            if let Err(e) = memory
395                .store(
396                    &facts_key,
397                    &summary,
398                    zeroclaw_memory::traits::MemoryCategory::Daily,
399                    None,
400                )
401                .await
402            {
403                ::zeroclaw_log::record!(
404                    DEBUG,
405                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
406                        .with_attrs(::serde_json::json!({"error": format!("{}", e)})),
407                    "Failed to save compression summary to memory"
408                );
409            } else {
410                ::zeroclaw_log::record!(
411                    DEBUG,
412                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
413                        .with_attrs(::serde_json::json!({"message_count": message_count})),
414                    "Saved compression summary to memory before discarding  messages"
415                );
416            }
417        }
418
419        // Splice: head + [SUMMARY] + tail
420        let summary_msg = build_summary_message(&history[start..end], &summary, message_count);
421        history.splice(start..end, std::iter::once(summary_msg));
422
423        // Repair orphaned tool pairs
424        repair_tool_pairs(history);
425
426        Ok(true)
427    }
428}
429
430// ---------------------------------------------------------------------------
431// Boundary alignment
432// ---------------------------------------------------------------------------
433
434/// Move boundary forward past any orphaned tool results at the start.
435fn align_boundary_forward(messages: &[ChatMessage], idx: usize) -> usize {
436    let mut i = idx;
437    while i < messages.len() && messages[i].role == "tool" {
438        i += 1;
439    }
440    i
441}
442
443/// Move the tail boundary backward past any orphan-creating split.
444///
445/// First step past any leading `tool` messages — their owning assistant
446/// is earlier and must travel with them into the protected tail.
447///
448/// Second, if we land on an assistant that owns `tool_calls`, back up
449/// past it as well. Otherwise that assistant gets summarized while its
450/// already-protected `tool_result` blocks remain in the tail, creating
451/// the 400 "unexpected tool_use_id in tool_result blocks" failure mode
452/// at the root of #5813.
453fn align_boundary_backward(messages: &[ChatMessage], idx: usize) -> usize {
454    let mut i = idx;
455    loop {
456        while i > 0 && messages[i].role == "tool" {
457            i -= 1;
458        }
459        if messages[i].role == "assistant"
460            && let Ok(v) = serde_json::from_str::<serde_json::Value>(&messages[i].content)
461            && v.get("tool_calls")
462                .and_then(|a| a.as_array())
463                .is_some_and(|a| !a.is_empty())
464        {
465            if i == 0 {
466                break;
467            }
468            i -= 1;
469            continue;
470        }
471        break;
472    }
473    i
474}
475
476// ---------------------------------------------------------------------------
477// Tool pair repair
478// ---------------------------------------------------------------------------
479
480/// Remove orphaned tool_results and add stubs for orphaned tool_calls.
481///
482/// After compression, some tool results may reference tool_calls that were
483/// summarized away, and vice versa. This function cleans up the history
484/// so every tool_result has a matching assistant message and every
485/// tool_call-bearing assistant message has results.
486fn repair_tool_pairs(messages: &mut Vec<ChatMessage>) {
487    // Heuristic: tool messages whose content references a call ID that no longer
488    // exists in any assistant message should be removed. Since ChatMessage is a
489    // simple role+content struct (no structured tool_call_id field), we use a
490    // simpler approach: remove any "tool" message that immediately follows the
491    // [CONTEXT SUMMARY] message (it's orphaned by definition).
492    let mut i = 0;
493    while i < messages.len() {
494        if messages[i].content.contains("[CONTEXT SUMMARY") {
495            // Remove any immediately following orphaned tool results
496            while i + 1 < messages.len() && messages[i + 1].role == "tool" {
497                messages.remove(i + 1);
498            }
499        }
500        i += 1;
501    }
502
503    // Also check for tool results at the very start (after system prompt) that
504    // are orphaned because their assistant message was compressed.
505    let start = if messages.first().is_some_and(|m| m.role == "system") {
506        1
507    } else {
508        0
509    };
510    while start < messages.len() && messages[start].role == "tool" {
511        messages.remove(start);
512    }
513}
514
515// ---------------------------------------------------------------------------
516// Helpers
517// ---------------------------------------------------------------------------
518
519fn build_full_transcript(messages: &[ChatMessage]) -> String {
520    let mut transcript = String::new();
521    for msg in messages {
522        let role = msg.role.to_uppercase();
523        let _ = writeln!(transcript, "{role}: {}", msg.content.trim());
524    }
525    transcript
526}
527
528fn build_summarizer_transcript(
529    messages: &[ChatMessage],
530    max_chars: usize,
531    preserve_media_markers: bool,
532) -> String {
533    let transcript = build_full_transcript(messages);
534    if preserve_media_markers {
535        // Vision-capable summarizer can read media markers; preserve them so
536        // visual content is reflected in the summary (per #6189 contract).
537        return truncate_owned_if_needed(transcript, max_chars);
538    }
539
540    // Non-vision summarizer cannot consume media markers. Strip ALL inbound
541    // attachment-kind markers (IMAGE, PHOTO, DOCUMENT, FILE, VIDEO, VOICE,
542    // AUDIO — case-insensitive) instead of just `[IMAGE:...]`, otherwise a
543    // local filesystem path can leak into the auxiliary `chat_with_system`
544    // payload and the upstream API rejects it as a malformed `image_url.url`.
545    truncate_owned_if_needed(multimodal::strip_media_markers(&transcript), max_chars)
546}
547
548fn truncate_owned_if_needed(s: String, max: usize) -> String {
549    if s.len() > max {
550        truncate_chars(&s, max)
551    } else {
552        s
553    }
554}
555
556fn truncate_chars(s: &str, max: usize) -> String {
557    if s.len() <= max {
558        return s.to_string();
559    }
560    // Find a safe char boundary
561    let mut end = max;
562    while end > 0 && !s.is_char_boundary(end) {
563        end -= 1;
564    }
565    let mut result = s[..end].to_string();
566    result.push_str("...");
567    result
568}
569
570/// Construct the synthesized assistant message that replaces a compressed
571/// range. When the compressed range contains an assistant turn with
572/// `reasoning_content` (a thinking-mode response from providers like
573/// DeepSeek V4), embed the most recent such payload in the summary as a
574/// JSON-encoded `{content, reasoning_content}` body — matching the shape
575/// `build_native_assistant_history` already produces — so the next request
576/// to the provider passes its reasoning round-trip check. See #6269.
577fn build_summary_message(
578    compressed: &[ChatMessage],
579    summary: &str,
580    message_count: usize,
581) -> ChatMessage {
582    let summary_text = format!(
583        "[CONTEXT SUMMARY \u{2014} {message_count} earlier messages compressed]\n\n{summary}"
584    );
585
586    let last_reasoning = compressed
587        .iter()
588        .rev()
589        .filter(|m| m.role == "assistant")
590        .find_map(|m| {
591            serde_json::from_str::<serde_json::Value>(&m.content)
592                .ok()
593                .and_then(|v| {
594                    v.get("reasoning_content")
595                        .and_then(|rc| rc.as_str().map(ToString::to_string))
596                })
597        });
598
599    if let Some(rc) = last_reasoning {
600        let payload = serde_json::json!({
601            "content": summary_text,
602            "reasoning_content": rc,
603        });
604        ChatMessage::assistant(payload.to_string())
605    } else {
606        ChatMessage::assistant(summary_text)
607    }
608}
609
610// ---------------------------------------------------------------------------
611// Tests
612// ---------------------------------------------------------------------------
613
614#[cfg(test)]
615mod tests {
616    use super::*;
617    use async_trait::async_trait;
618    use parking_lot::Mutex;
619
620    fn msg(role: &str, content: &str) -> ChatMessage {
621        ChatMessage {
622            role: role.to_string(),
623            content: content.to_string(),
624        }
625    }
626
627    struct CaptureSummarizerModelProvider {
628        supports_vision: bool,
629        seen_messages: Mutex<Vec<String>>,
630    }
631
632    #[async_trait]
633    impl ModelProvider for CaptureSummarizerModelProvider {
634        async fn chat_with_system(
635            &self,
636            _system_prompt: Option<&str>,
637            message: &str,
638            _model: &str,
639            _temperature: Option<f64>,
640        ) -> Result<String> {
641            self.seen_messages.lock().push(message.to_string());
642            Ok("summary".to_string())
643        }
644
645        async fn chat(
646            &self,
647            _request: zeroclaw_api::model_provider::ChatRequest<'_>,
648            _model: &str,
649            _temperature: Option<f64>,
650        ) -> Result<zeroclaw_api::model_provider::ChatResponse> {
651            unreachable!("context compressor uses chat_with_system")
652        }
653
654        fn supports_vision(&self) -> bool {
655            self.supports_vision
656        }
657    }
658    impl ::zeroclaw_api::attribution::Attributable for CaptureSummarizerModelProvider {
659        fn role(&self) -> ::zeroclaw_api::attribution::Role {
660            ::zeroclaw_api::attribution::Role::Provider(
661                ::zeroclaw_api::attribution::ProviderKind::Model(
662                    ::zeroclaw_api::attribution::ModelProviderKind::Custom,
663                ),
664            )
665        }
666        fn alias(&self) -> &str {
667            "CaptureSummarizerModelProvider"
668        }
669    }
670
671    #[test]
672    fn test_estimate_tokens() {
673        let messages = vec![msg("user", "hello world")]; // 11 chars
674        let tokens = estimate_tokens(&messages);
675        // 11/4 ceil = 3, +4 framing = 7, *1.2 = 8.4 -> 8
676        assert!(tokens > 0);
677    }
678
679    #[test]
680    fn test_estimate_tokens_empty() {
681        assert_eq!(estimate_tokens(&[]), 0);
682    }
683
684    #[test]
685    fn test_parse_context_limit_anthropic() {
686        let msg = "prompt is too long: 150000 tokens > 128000 maximum context length";
687        assert_eq!(parse_context_limit_from_error(msg), Some(128_000));
688    }
689
690    #[test]
691    fn test_parse_context_limit_openai() {
692        let msg = "This model's maximum context length is 128000 tokens. However, your messages resulted in 150000 tokens.";
693        assert_eq!(parse_context_limit_from_error(msg), Some(128_000));
694    }
695
696    #[test]
697    fn test_parse_context_limit_llamacpp() {
698        let msg = "request (8968 tokens) exceeds the available context size (8448 tokens)";
699        assert_eq!(parse_context_limit_from_error(msg), Some(8448));
700    }
701
702    #[test]
703    fn test_parse_context_limit_none() {
704        assert_eq!(parse_context_limit_from_error("some random error"), None);
705    }
706
707    #[test]
708    fn test_parse_context_limit_rejects_small() {
709        let msg = "limit is 100 tokens";
710        assert_eq!(parse_context_limit_from_error(msg), None); // < 1024
711    }
712
713    #[test]
714    fn test_next_probe_tier() {
715        assert_eq!(next_probe_tier(2_000_001), 2_000_000);
716        assert_eq!(next_probe_tier(2_000_000), 1_000_000);
717        assert_eq!(next_probe_tier(200_000), 128_000);
718        assert_eq!(next_probe_tier(64_000), 32_000);
719        assert_eq!(next_probe_tier(32_000), 32_000); // floor
720        assert_eq!(next_probe_tier(10_000), 32_000); // below all tiers
721    }
722
723    #[test]
724    fn test_align_boundary_forward_skips_tool() {
725        let messages = vec![
726            msg("system", "sys"),
727            msg("user", "q"),
728            msg("tool", "result1"),
729            msg("tool", "result2"),
730            msg("user", "next"),
731        ];
732        // Starting at index 2 (tool), should skip to index 4
733        assert_eq!(align_boundary_forward(&messages, 2), 4);
734    }
735
736    #[test]
737    fn test_align_boundary_forward_noop() {
738        let messages = vec![
739            msg("system", "sys"),
740            msg("user", "q"),
741            msg("assistant", "a"),
742        ];
743        assert_eq!(align_boundary_forward(&messages, 1), 1);
744    }
745
746    #[test]
747    fn test_repair_tool_pairs_removes_orphaned() {
748        let mut messages = vec![
749            msg("system", "sys"),
750            msg(
751                "assistant",
752                "[CONTEXT SUMMARY — 5 earlier messages compressed]\nstuff",
753            ),
754            msg("tool", "orphaned result"),
755            msg("user", "next question"),
756        ];
757        repair_tool_pairs(&mut messages);
758        assert_eq!(messages.len(), 3);
759        assert_eq!(messages[2].role, "user");
760    }
761
762    #[test]
763    fn test_repair_tool_pairs_no_false_positives() {
764        let mut messages = vec![
765            msg("system", "sys"),
766            msg("user", "q"),
767            msg("assistant", "calling tool"),
768            msg("tool", "result"),
769            msg("user", "thanks"),
770        ];
771        repair_tool_pairs(&mut messages);
772        assert_eq!(messages.len(), 5); // no change
773    }
774
775    /// Regression test for the root-cause #5813 fix: when the tail
776    /// boundary lands on an assistant with `tool_calls`, the function
777    /// must back up past it so the assistant travels with its
778    /// `tool_result` blocks into the protected tail. Otherwise the
779    /// assistant gets summarized while its results survive, creating an
780    /// orphan and producing the 400 "unexpected tool_use_id" failure.
781    #[test]
782    fn test_align_boundary_backward_backs_up_past_tool_call_assistant() {
783        let messages = vec![
784            msg("system", "sys"),
785            msg("user", "q1"),
786            msg("assistant", "old reply 1"),
787            msg("user", "q2"),
788            msg(
789                "assistant",
790                r#"{"content":null,"tool_calls":[{"id":"toolu_X","name":"shell","arguments":"{}"}]}"#,
791            ),
792            msg("tool", r#"{"tool_call_id":"toolu_X","content":"result"}"#),
793            msg("user", "follow-up"),
794        ];
795        // Initial boundary lands on the assistant(tool_calls) at index 4.
796        // The function must back up past it so the pair stays in the tail.
797        let aligned = align_boundary_backward(&messages, 4);
798        assert!(
799            aligned < 4,
800            "boundary should retreat past assistant(tool_calls) at idx 4, got {aligned}"
801        );
802    }
803
804    #[test]
805    fn test_align_boundary_backward_noop_on_plain_assistant() {
806        let messages = vec![
807            msg("system", "sys"),
808            msg("user", "q"),
809            msg("assistant", "plain text reply"),
810            msg("user", "next"),
811        ];
812        // No tool_calls on the assistant — boundary should not retreat.
813        assert_eq!(align_boundary_backward(&messages, 2), 2);
814    }
815
816    #[test]
817    fn test_build_transcript() {
818        let messages = vec![msg("user", "hello"), msg("assistant", "hi there")];
819        let t = build_full_transcript(&messages);
820        assert!(t.contains("USER: hello"));
821        assert!(t.contains("ASSISTANT: hi there"));
822    }
823
824    #[test]
825    fn test_build_summarizer_transcript_strips_all_attachment_kinds_for_non_vision_provider() {
826        // The non-vision summarizer branch must strip every inbound
827        // attachment-kind alias the channel parsers can emit, not just
828        // `[IMAGE:]`. Mirrors `ATTACHMENT_KINDS` in
829        // `crates/zeroclaw-channels/src/util.rs`. Regression: a `[PHOTO:]`
830        // or `[DOCUMENT:]` marker still leaking through would surface a
831        // local filesystem path in the auxiliary `chat_with_system` payload
832        // and the upstream API would reject it.
833        let messages = vec![msg(
834            "user",
835            "Take a look at [IMAGE:/a.jpg] [PHOTO:/b.jpg] [DOCUMENT:/c.pdf] \
836             [FILE:/d.zip] [VIDEO:/e.mp4] [VOICE:/f.ogg] [AUDIO:/g.wav] please",
837        )];
838        let transcript = build_summarizer_transcript(&messages, 10_000, false);
839        for prefix in [
840            "[IMAGE:",
841            "[PHOTO:",
842            "[DOCUMENT:",
843            "[FILE:",
844            "[VIDEO:",
845            "[VOICE:",
846            "[AUDIO:",
847        ] {
848            assert!(
849                !transcript.contains(prefix),
850                "non-vision transcript should not contain raw {prefix} marker: {transcript}"
851            );
852        }
853        assert!(
854            transcript.contains("[media attachment]"),
855            "non-vision transcript should contain placeholder: {transcript}"
856        );
857        assert!(transcript.contains("Take a look at"));
858        assert!(transcript.contains("please"));
859    }
860
861    #[test]
862    fn test_build_summarizer_transcript_strips_media_markers_before_truncation() {
863        let long_path = format!(
864            "/private/tmp/zeroclaw/signal_inbound/{}",
865            "nested-directory/".repeat(12)
866        );
867        let messages = vec![msg(
868            "user",
869            &format!("Please summarize [IMAGE:{long_path}photo.png] after text"),
870        )];
871
872        let transcript = build_summarizer_transcript(&messages, 64, false);
873
874        assert!(
875            !transcript.contains("[IMAGE:"),
876            "non-vision transcript should not retain a split image marker: {transcript}"
877        );
878        assert!(
879            !transcript.contains("/private/tmp"),
880            "non-vision transcript should not leak local path fragments: {transcript}"
881        );
882        assert!(
883            transcript.contains("[media attachment]"),
884            "non-vision transcript should preserve an attachment placeholder: {transcript}"
885        );
886    }
887
888    #[test]
889    fn test_build_transcript_truncates() {
890        let messages = vec![msg("user", &"x".repeat(1000))];
891        let t = truncate_owned_if_needed(build_full_transcript(&messages), 100);
892        assert!(t.len() <= 103); // 100 + "..."
893    }
894
895    #[test]
896    fn test_build_summarizer_transcript_strips_image_markers_for_non_vision_provider() {
897        let messages = vec![msg(
898            "user",
899            "Describe this photo [IMAGE:/tmp/test.png]\nKeep the caption",
900        )];
901        let transcript = build_summarizer_transcript(&messages, 10_000, false);
902        assert!(!transcript.contains("[IMAGE:"));
903        assert!(transcript.contains("Describe this photo"));
904        assert!(transcript.contains("Keep the caption"));
905    }
906
907    #[test]
908    fn test_build_summarizer_transcript_keeps_image_markers_for_vision_provider() {
909        let messages = vec![msg("user", "Describe this photo [IMAGE:/tmp/test.png]")];
910        let transcript = build_summarizer_transcript(&messages, 10_000, true);
911        assert!(transcript.contains("[IMAGE:/tmp/test.png]"));
912    }
913
914    #[test]
915    fn test_truncate_chars() {
916        assert_eq!(truncate_chars("hello world", 5), "hello...");
917        assert_eq!(truncate_chars("hi", 10), "hi");
918    }
919
920    #[test]
921    fn test_config_defaults() {
922        let config = ContextCompressionConfig::default();
923        assert!(config.enabled);
924        assert!((config.threshold_ratio - 0.50).abs() < f64::EPSILON);
925        assert_eq!(config.protect_first_n, 3);
926        assert_eq!(config.protect_last_n, 4);
927        assert_eq!(config.max_passes, 3);
928        assert_eq!(config.summary_max_chars, 4_000);
929        assert_eq!(config.source_max_chars, 50_000);
930        assert_eq!(config.timeout_secs, 60);
931        assert!(config.summary_model.is_none());
932        assert_eq!(config.identifier_policy, "strict");
933    }
934
935    #[test]
936    fn test_config_serde_defaults() {
937        let json = "{}";
938        let config: ContextCompressionConfig = serde_json::from_str(json).unwrap();
939        assert!(config.enabled);
940        assert_eq!(config.protect_first_n, 3);
941        assert_eq!(config.max_passes, 3);
942    }
943
944    #[test]
945    fn test_config_serde_override() {
946        let json = r#"{"enabled": false, "protect_first_n": 5, "max_passes": 1}"#;
947        let config: ContextCompressionConfig = serde_json::from_str(json).unwrap();
948        assert!(!config.enabled);
949        assert_eq!(config.protect_first_n, 5);
950        assert_eq!(config.max_passes, 1);
951    }
952
953    #[tokio::test]
954    async fn compress_if_needed_strips_image_markers_before_non_vision_summarization() {
955        let config = ContextCompressionConfig {
956            protect_first_n: 1,
957            protect_last_n: 1,
958            threshold_ratio: 0.01,
959            ..Default::default()
960        };
961        let compressor = ContextCompressor::new(config, 64);
962        let model_provider = CaptureSummarizerModelProvider {
963            supports_vision: false,
964            seen_messages: Mutex::new(Vec::new()),
965        };
966        let mut history = vec![
967            msg("system", "sys"),
968            msg("user", "Earlier question [IMAGE:/tmp/example.png]"),
969            msg("assistant", "Earlier answer"),
970            msg("user", "Newest question"),
971        ];
972
973        let result = compressor
974            .compress_if_needed(&mut history, &model_provider, "model", None)
975            .await
976            .expect("compression should succeed");
977
978        assert!(result.compressed);
979        let seen = model_provider.seen_messages.lock();
980        let prompt = seen.last().expect("summarizer should be invoked");
981        assert!(!prompt.contains("[IMAGE:"));
982        assert!(!prompt.contains("/tmp/example.png"));
983    }
984
985    #[tokio::test]
986    async fn compress_if_needed_strips_image_markers_when_summary_model_overrides() {
987        let config = ContextCompressionConfig {
988            protect_first_n: 1,
989            protect_last_n: 1,
990            threshold_ratio: 0.01,
991            summary_model: Some("text-summary-model".to_string()),
992            ..Default::default()
993        };
994        let compressor = ContextCompressor::new(config, 64);
995        let model_provider = CaptureSummarizerModelProvider {
996            supports_vision: true,
997            seen_messages: Mutex::new(Vec::new()),
998        };
999        let mut history = vec![
1000            msg("system", "sys"),
1001            msg("user", "Earlier question [IMAGE:/tmp/summary-override.png]"),
1002            msg("assistant", "Earlier answer"),
1003            msg("user", "Newest question"),
1004        ];
1005
1006        let result = compressor
1007            .compress_if_needed(&mut history, &model_provider, "default-vision-model", None)
1008            .await
1009            .expect("compression should succeed");
1010
1011        assert!(result.compressed);
1012        let seen = model_provider.seen_messages.lock();
1013        let prompt = seen.last().expect("summarizer should be invoked");
1014        assert!(!prompt.contains("[IMAGE:"));
1015        assert!(!prompt.contains("/tmp/summary-override.png"));
1016    }
1017
1018    // ── fast_trim_tool_results tests ────────────────────────────────
1019
1020    #[test]
1021    fn test_fast_trim_protects_first_and_last_n() {
1022        let config = ContextCompressionConfig {
1023            protect_first_n: 2,
1024            protect_last_n: 2,
1025            tool_result_retrim_chars: 100,
1026            ..Default::default()
1027        };
1028        let compressor = ContextCompressor::new(config, 128_000);
1029        let big = "x".repeat(5_000);
1030        let mut history = vec![
1031            msg("system", "sys"),
1032            msg("tool", &big), // index 1 — protected (first 2)
1033            msg("user", "q"),
1034            msg("tool", &big),   // index 3 — trimmable
1035            msg("user", "next"), // index 4 — protected (last 2)
1036            msg("tool", &big),   // index 5 — protected (last 2)
1037        ];
1038        let saved = compressor.fast_trim_tool_results(&mut history);
1039        assert!(saved > 0);
1040        // Protected messages unchanged
1041        assert_eq!(history[1].content.len(), 5_000);
1042        assert_eq!(history[5].content.len(), 5_000);
1043        // Trimmable message was trimmed
1044        assert!(history[3].content.len() <= 200); // 100 + marker overhead
1045    }
1046
1047    #[test]
1048    fn test_fast_trim_skips_images() {
1049        let config = ContextCompressionConfig {
1050            protect_first_n: 0,
1051            protect_last_n: 0,
1052            tool_result_retrim_chars: 100,
1053            ..Default::default()
1054        };
1055        let compressor = ContextCompressor::new(config, 128_000);
1056        let img = format!("data:image/{}", "x".repeat(5_000));
1057        let mut history = vec![msg("tool", &img)];
1058        let saved = compressor.fast_trim_tool_results(&mut history);
1059        assert_eq!(saved, 0);
1060        assert!(history[0].content.len() > 5_000);
1061    }
1062
1063    #[test]
1064    fn test_fast_trim_skips_exempt_tools() {
1065        let config = ContextCompressionConfig {
1066            protect_first_n: 0,
1067            protect_last_n: 0,
1068            tool_result_retrim_chars: 100,
1069            tool_result_trim_exempt: vec!["KEEPME".to_string()],
1070            ..Default::default()
1071        };
1072        let compressor = ContextCompressor::new(config, 128_000);
1073        let content = format!("KEEPME {}", "x".repeat(5_000));
1074        let mut history = vec![msg("tool", &content)];
1075        let saved = compressor.fast_trim_tool_results(&mut history);
1076        assert_eq!(saved, 0);
1077    }
1078
1079    #[test]
1080    fn test_fast_trim_skips_small_results() {
1081        let config = ContextCompressionConfig {
1082            protect_first_n: 0,
1083            protect_last_n: 0,
1084            tool_result_retrim_chars: 2_000,
1085            ..Default::default()
1086        };
1087        let compressor = ContextCompressor::new(config, 128_000);
1088        let mut history = vec![msg("tool", "small result")];
1089        let saved = compressor.fast_trim_tool_results(&mut history);
1090        assert_eq!(saved, 0);
1091    }
1092
1093    #[test]
1094    fn test_fast_trim_skips_non_tool_messages() {
1095        let config = ContextCompressionConfig {
1096            protect_first_n: 0,
1097            protect_last_n: 0,
1098            tool_result_retrim_chars: 100,
1099            ..Default::default()
1100        };
1101        let compressor = ContextCompressor::new(config, 128_000);
1102        let big = "x".repeat(5_000);
1103        let mut history = vec![msg("user", &big), msg("assistant", &big)];
1104        let saved = compressor.fast_trim_tool_results(&mut history);
1105        assert_eq!(saved, 0);
1106    }
1107
1108    #[test]
1109    fn test_fast_trim_config_defaults() {
1110        let config = ContextCompressionConfig::default();
1111        assert_eq!(config.tool_result_retrim_chars, 2_000);
1112        assert!(config.tool_result_trim_exempt.is_empty());
1113    }
1114
1115    #[test]
1116    fn test_fast_trim_disabled_when_zero() {
1117        let config = ContextCompressionConfig {
1118            protect_first_n: 0,
1119            protect_last_n: 0,
1120            tool_result_retrim_chars: 0,
1121            ..Default::default()
1122        };
1123        let compressor = ContextCompressor::new(config, 128_000);
1124        let big = "x".repeat(5_000);
1125        let mut history = vec![msg("tool", &big)];
1126        let saved = compressor.fast_trim_tool_results(&mut history);
1127        assert_eq!(saved, 0);
1128    }
1129
1130    /// When the compressed range has no thinking-mode reasoning_content,
1131    /// the synthesized summary is plain text — same as before #6269.
1132    #[test]
1133    fn build_summary_message_uses_plain_text_when_no_reasoning() {
1134        let compressed = vec![
1135            msg("user", "what's the weather"),
1136            msg("assistant", "it's sunny"),
1137        ];
1138        let out = build_summary_message(&compressed, "weather chat", 2);
1139        assert_eq!(out.role, "assistant");
1140        assert!(out.content.starts_with("[CONTEXT SUMMARY"));
1141        assert!(out.content.contains("weather chat"));
1142        assert!(
1143            serde_json::from_str::<serde_json::Value>(&out.content).is_err(),
1144            "plain-text summary must not parse as JSON"
1145        );
1146    }
1147
1148    /// Regression test for #6269 — when an assistant message in the
1149    /// compressed range carries `reasoning_content` (thinking-mode replay
1150    /// payload), the synthesized summary preserves it via JSON-encoded
1151    /// content matching `build_native_assistant_history`'s shape.
1152    /// Without this, providers that require reasoning round-trip
1153    /// (DeepSeek V4 thinking) reject every post-compression request.
1154    #[test]
1155    fn build_summary_message_preserves_reasoning_content_when_present() {
1156        let assistant_with_reasoning = serde_json::json!({
1157            "content": "let me look",
1158            "reasoning_content": "user wants weather; need to check",
1159        })
1160        .to_string();
1161        let compressed = vec![
1162            msg("user", "what's the weather"),
1163            msg("assistant", &assistant_with_reasoning),
1164        ];
1165
1166        let out = build_summary_message(&compressed, "weather chat", 2);
1167        assert_eq!(out.role, "assistant");
1168        let parsed: serde_json::Value = serde_json::from_str(&out.content)
1169            .expect("summary must be JSON when reasoning_content is preserved");
1170        assert!(
1171            parsed["content"]
1172                .as_str()
1173                .is_some_and(|s| s.starts_with("[CONTEXT SUMMARY")),
1174            "summary text belongs in `content`",
1175        );
1176        assert_eq!(
1177            parsed["reasoning_content"].as_str(),
1178            Some("user wants weather; need to check"),
1179            "must carry reasoning_content from the most recent compressed assistant turn",
1180        );
1181    }
1182
1183    /// When multiple compressed assistant turns have reasoning_content,
1184    /// the most recent one survives — this matches DeepSeek's protocol
1185    /// expectation that the *immediately prior* assistant turn's
1186    /// reasoning is what gets replayed.
1187    #[test]
1188    fn build_summary_message_picks_last_reasoning_content() {
1189        let earlier = serde_json::json!({
1190            "content": "first answer",
1191            "reasoning_content": "EARLIER reasoning",
1192        })
1193        .to_string();
1194        let later = serde_json::json!({
1195            "content": "second answer",
1196            "reasoning_content": "LATER reasoning",
1197        })
1198        .to_string();
1199        let compressed = vec![
1200            msg("user", "q1"),
1201            msg("assistant", &earlier),
1202            msg("user", "q2"),
1203            msg("assistant", &later),
1204        ];
1205
1206        let out = build_summary_message(&compressed, "two-turn chat", 4);
1207        let parsed: serde_json::Value = serde_json::from_str(&out.content).unwrap();
1208        assert_eq!(
1209            parsed["reasoning_content"].as_str(),
1210            Some("LATER reasoning"),
1211            "must pick the most recent reasoning_content, not the earliest",
1212        );
1213    }
1214}
zeroclaw_runtime/agent/context_compressor.rs

zeroclaw_runtime/agent/
context_compressor.rs