1use std::fmt::Write;
2use std::time::Duration;
3
4use anyhow::Result;
5use std::sync::Arc;
6
7use zeroclaw_api::model_provider::{ChatMessage, ModelProvider};
8use zeroclaw_memory::traits::Memory;
9use zeroclaw_providers::multimodal;
10
11pub use zeroclaw_config::scattered_types::ContextCompressionConfig;
12
13#[derive(Debug, Clone)]
18pub struct CompressionResult {
19 pub compressed: bool,
20 pub tokens_before: usize,
21 pub tokens_after: usize,
22 pub passes_used: u32,
23}
24
25const PROBE_TIERS: &[usize] = &[
30 2_000_000, 1_000_000, 512_000, 200_000, 128_000, 64_000, 32_000,
31];
32
33fn next_probe_tier(current: usize) -> usize {
34 PROBE_TIERS
35 .iter()
36 .copied()
37 .find(|&tier| tier < current)
38 .unwrap_or(32_000)
39}
40
41pub fn parse_context_limit_from_error(msg: &str) -> Option<usize> {
47 let re_patterns: &[&str] = &[
50 r"(?:max(?:imum)?|limit)\s*(?:context\s*)?(?:length|size|window)?\s*(?:is|of|:)?\s*(\d{4,})",
52 r"context\s*(?:length|size|window)\s*(?:is|of|:)?\s*(\d{4,})",
54 r"(\d{4,})\s*(?:tokens?\s*)?(?:context|limit)",
56 r"available context size\s*\(\s*(\d{4,})",
58 r">\s*(\d{4,})\s*(?:maximum|max)?\s*(?:context)?\s*(?:length|size|window|tokens?)",
60 ];
61 let lower = msg.to_lowercase();
62 for pattern in re_patterns {
63 if let Ok(re) = regex::Regex::new(pattern)
64 && let Some(caps) = re.captures(&lower)
65 && let Some(m) = caps.get(1)
66 && let Ok(limit) = m.as_str().parse::<usize>()
67 && (1024..=10_000_000).contains(&limit)
68 {
69 return Some(limit);
70 }
71 }
72 None
73}
74
75pub fn estimate_tokens(messages: &[ChatMessage]) -> usize {
82 let raw: usize = messages
83 .iter()
84 .map(|m| m.content.len().div_ceil(4) + 4)
85 .sum();
86 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
88 {
89 (raw as f64 * 1.2) as usize
90 }
91}
92
93const SUMMARIZER_SYSTEM: &str = "\
98You are a conversation compaction engine. Summarize the conversation segment below into concise context.
99
100PRESERVE exactly:
101- All identifiers (UUIDs, hashes, file paths, URLs, tokens, IPs)
102- Actions taken (tool calls, file operations, commands run)
103- Key information obtained (data, results, error messages)
104- Decisions made and user preferences expressed
105- Current task status and unresolved items
106- Constraints and requirements mentioned
107
108OMIT:
109- Verbose tool output (keep only key results)
110- Repeated greetings or filler
111- Redundant information already stated
112
113Output concise bullet points. Be thorough but brief.";
114
115pub struct ContextCompressor {
120 config: ContextCompressionConfig,
121 context_window: usize,
122 memory: Option<Arc<dyn Memory>>,
123}
124
125impl ContextCompressor {
126 pub fn new(config: ContextCompressionConfig, context_window: usize) -> Self {
127 Self {
128 config,
129 context_window,
130 memory: None,
131 }
132 }
133
134 pub fn with_memory(mut self, memory: Arc<dyn Memory>) -> Self {
137 self.memory = Some(memory);
138 self
139 }
140
141 pub fn set_context_window(&mut self, window: usize) {
143 self.context_window = window;
144 }
145
146 fn fast_trim_tool_results(&self, history: &mut [ChatMessage]) -> usize {
149 let max = self.config.tool_result_retrim_chars;
150 if max == 0 {
151 return 0;
152 }
153 let mut saved = 0;
154 let protect_start = self.config.protect_first_n.min(history.len());
155 let protect_end = history.len().saturating_sub(self.config.protect_last_n);
156
157 if protect_start >= protect_end {
158 return 0;
159 }
160
161 for msg in &mut history[protect_start..protect_end] {
162 if msg.role != "tool" {
163 continue;
164 }
165 if msg.content.len() <= max {
166 continue;
167 }
168 if self
170 .config
171 .tool_result_trim_exempt
172 .iter()
173 .any(|t| msg.content.contains(t.as_str()))
174 {
175 continue;
176 }
177 if msg.content.contains("data:image/") {
179 continue;
180 }
181 let original_len = msg.content.len();
182 msg.content = crate::agent::history::truncate_tool_message(&msg.content, max);
183 saved += original_len - msg.content.len();
184 }
185 saved
186 }
187
188 pub async fn compress_if_needed(
194 &self,
195 history: &mut Vec<ChatMessage>,
196 model_provider: &dyn ModelProvider,
197 model: &str,
198 temperature: Option<f64>,
199 ) -> Result<CompressionResult> {
200 if !self.config.enabled {
201 let tokens = estimate_tokens(history);
202 return Ok(CompressionResult {
203 compressed: false,
204 tokens_before: tokens,
205 tokens_after: tokens,
206 passes_used: 0,
207 });
208 }
209
210 let tokens_before = estimate_tokens(history);
211 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
212 let threshold = (self.context_window as f64 * self.config.threshold_ratio) as usize;
213
214 if tokens_before <= threshold {
215 return Ok(CompressionResult {
216 compressed: false,
217 tokens_before,
218 tokens_after: tokens_before,
219 passes_used: 0,
220 });
221 }
222
223 let chars_saved = self.fast_trim_tool_results(history);
225 if chars_saved > 0 {
226 ::zeroclaw_log::record!(
227 INFO,
228 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
229 .with_attrs(::serde_json::json!({"chars_saved": chars_saved})),
230 "Fast-trim saved chars from old tool results"
231 );
232 let recheck = estimate_tokens(history);
233 if recheck <= threshold {
234 return Ok(CompressionResult {
235 compressed: true,
236 tokens_before,
237 tokens_after: recheck,
238 passes_used: 0,
239 });
240 }
241 }
242
243 let mut passes_used = 0;
244 for _ in 0..self.config.max_passes {
245 let did_compress = self
246 .compress_once(history, model_provider, model, temperature)
247 .await?;
248 if did_compress {
249 passes_used += 1;
250 }
251 if estimate_tokens(history) <= threshold || !did_compress {
252 break;
253 }
254 }
255
256 let tokens_after = estimate_tokens(history);
257 Ok(CompressionResult {
258 compressed: passes_used > 0,
259 tokens_before,
260 tokens_after,
261 passes_used,
262 })
263 }
264
265 pub async fn compress_on_error(
268 &mut self,
269 history: &mut Vec<ChatMessage>,
270 model_provider: &dyn ModelProvider,
271 model: &str,
272 temperature: Option<f64>,
273 error_msg: &str,
274 ) -> Result<bool> {
275 if let Some(limit) = parse_context_limit_from_error(error_msg) {
277 self.context_window = limit;
278 } else {
279 self.context_window = next_probe_tier(self.context_window);
281 }
282
283 ::zeroclaw_log::record!(
284 INFO,
285 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
286 .with_attrs(::serde_json::json!({"context_window": self.context_window})),
287 "Context limit adjusted, re-compressing"
288 );
289
290 let result = self
291 .compress_if_needed(history, model_provider, model, temperature)
292 .await?;
293 Ok(result.compressed)
294 }
295
296 async fn compress_once(
298 &self,
299 history: &mut Vec<ChatMessage>,
300 model_provider: &dyn ModelProvider,
301 model: &str,
302 temperature: Option<f64>,
303 ) -> Result<bool> {
304 let n = history.len();
305 let protected_total = self.config.protect_first_n + self.config.protect_last_n;
306 if n <= protected_total {
307 return Ok(false);
308 }
309
310 let mut start = self.config.protect_first_n.min(n);
311 let mut end = n.saturating_sub(self.config.protect_last_n);
312
313 start = align_boundary_forward(history, start);
315 end = align_boundary_backward(history, end);
316
317 if start >= end {
318 return Ok(false);
319 }
320
321 let summary_model = self.config.summary_model.as_deref().unwrap_or(model);
322 let preserve_media_markers =
323 self.config.summary_model.is_none() && model_provider.supports_vision();
324
325 let middle = &history[start..end];
327 let transcript = build_summarizer_transcript(
328 middle,
329 self.config.source_max_chars,
330 preserve_media_markers,
331 );
332
333 if transcript.is_empty() {
334 return Ok(false);
335 }
336
337 let message_count = end - start;
338
339 let identifier_note = if self.config.identifier_policy == "strict" {
340 "\nIMPORTANT: Preserve all identifiers exactly as they appear."
341 } else {
342 ""
343 };
344
345 let user_prompt = format!(
346 "Summarize the following conversation history ({message_count} messages) for context preservation. \
347 Keep it concise (max 20 bullet points).{identifier_note}\n\n{transcript}"
348 );
349
350 let timeout = Duration::from_secs(self.config.timeout_secs);
352 let summary_raw = match tokio::time::timeout(
353 timeout,
354 model_provider.chat_with_system(
355 Some(SUMMARIZER_SYSTEM),
356 &user_prompt,
357 summary_model,
358 temperature,
359 ),
360 )
361 .await
362 {
363 Ok(Ok(s)) => s,
364 Ok(Err(e)) => {
365 ::zeroclaw_log::record!(
366 WARN,
367 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
368 .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
369 .with_attrs(::serde_json::json!({"error": format!("{}", e)})),
370 "Summarization LLM call failed, using transcript truncation"
371 );
372 truncate_chars(&transcript, self.config.summary_max_chars)
373 }
374 Err(_) => {
375 ::zeroclaw_log::record!(
376 WARN,
377 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
378 .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
379 &format!(
380 "Summarization timed out after {}s, using transcript truncation",
381 self.config.timeout_secs
382 )
383 );
384 truncate_chars(&transcript, self.config.summary_max_chars)
385 }
386 };
387
388 let summary = truncate_chars(&summary_raw, self.config.summary_max_chars);
389
390 if let Some(ref memory) = self.memory {
393 let facts_key = format!("compressed_context_{}", uuid::Uuid::new_v4());
394 if let Err(e) = memory
395 .store(
396 &facts_key,
397 &summary,
398 zeroclaw_memory::traits::MemoryCategory::Daily,
399 None,
400 )
401 .await
402 {
403 ::zeroclaw_log::record!(
404 DEBUG,
405 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
406 .with_attrs(::serde_json::json!({"error": format!("{}", e)})),
407 "Failed to save compression summary to memory"
408 );
409 } else {
410 ::zeroclaw_log::record!(
411 DEBUG,
412 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
413 .with_attrs(::serde_json::json!({"message_count": message_count})),
414 "Saved compression summary to memory before discarding messages"
415 );
416 }
417 }
418
419 let summary_msg = build_summary_message(&history[start..end], &summary, message_count);
421 history.splice(start..end, std::iter::once(summary_msg));
422
423 repair_tool_pairs(history);
425
426 Ok(true)
427 }
428}
429
430fn align_boundary_forward(messages: &[ChatMessage], idx: usize) -> usize {
436 let mut i = idx;
437 while i < messages.len() && messages[i].role == "tool" {
438 i += 1;
439 }
440 i
441}
442
443fn align_boundary_backward(messages: &[ChatMessage], idx: usize) -> usize {
454 let mut i = idx;
455 loop {
456 while i > 0 && messages[i].role == "tool" {
457 i -= 1;
458 }
459 if messages[i].role == "assistant"
460 && let Ok(v) = serde_json::from_str::<serde_json::Value>(&messages[i].content)
461 && v.get("tool_calls")
462 .and_then(|a| a.as_array())
463 .is_some_and(|a| !a.is_empty())
464 {
465 if i == 0 {
466 break;
467 }
468 i -= 1;
469 continue;
470 }
471 break;
472 }
473 i
474}
475
476fn repair_tool_pairs(messages: &mut Vec<ChatMessage>) {
487 let mut i = 0;
493 while i < messages.len() {
494 if messages[i].content.contains("[CONTEXT SUMMARY") {
495 while i + 1 < messages.len() && messages[i + 1].role == "tool" {
497 messages.remove(i + 1);
498 }
499 }
500 i += 1;
501 }
502
503 let start = if messages.first().is_some_and(|m| m.role == "system") {
506 1
507 } else {
508 0
509 };
510 while start < messages.len() && messages[start].role == "tool" {
511 messages.remove(start);
512 }
513}
514
515fn build_full_transcript(messages: &[ChatMessage]) -> String {
520 let mut transcript = String::new();
521 for msg in messages {
522 let role = msg.role.to_uppercase();
523 let _ = writeln!(transcript, "{role}: {}", msg.content.trim());
524 }
525 transcript
526}
527
528fn build_summarizer_transcript(
529 messages: &[ChatMessage],
530 max_chars: usize,
531 preserve_media_markers: bool,
532) -> String {
533 let transcript = build_full_transcript(messages);
534 if preserve_media_markers {
535 return truncate_owned_if_needed(transcript, max_chars);
538 }
539
540 truncate_owned_if_needed(multimodal::strip_media_markers(&transcript), max_chars)
546}
547
548fn truncate_owned_if_needed(s: String, max: usize) -> String {
549 if s.len() > max {
550 truncate_chars(&s, max)
551 } else {
552 s
553 }
554}
555
556fn truncate_chars(s: &str, max: usize) -> String {
557 if s.len() <= max {
558 return s.to_string();
559 }
560 let mut end = max;
562 while end > 0 && !s.is_char_boundary(end) {
563 end -= 1;
564 }
565 let mut result = s[..end].to_string();
566 result.push_str("...");
567 result
568}
569
570fn build_summary_message(
578 compressed: &[ChatMessage],
579 summary: &str,
580 message_count: usize,
581) -> ChatMessage {
582 let summary_text = format!(
583 "[CONTEXT SUMMARY \u{2014} {message_count} earlier messages compressed]\n\n{summary}"
584 );
585
586 let last_reasoning = compressed
587 .iter()
588 .rev()
589 .filter(|m| m.role == "assistant")
590 .find_map(|m| {
591 serde_json::from_str::<serde_json::Value>(&m.content)
592 .ok()
593 .and_then(|v| {
594 v.get("reasoning_content")
595 .and_then(|rc| rc.as_str().map(ToString::to_string))
596 })
597 });
598
599 if let Some(rc) = last_reasoning {
600 let payload = serde_json::json!({
601 "content": summary_text,
602 "reasoning_content": rc,
603 });
604 ChatMessage::assistant(payload.to_string())
605 } else {
606 ChatMessage::assistant(summary_text)
607 }
608}
609
610#[cfg(test)]
615mod tests {
616 use super::*;
617 use async_trait::async_trait;
618 use parking_lot::Mutex;
619
620 fn msg(role: &str, content: &str) -> ChatMessage {
621 ChatMessage {
622 role: role.to_string(),
623 content: content.to_string(),
624 }
625 }
626
627 struct CaptureSummarizerModelProvider {
628 supports_vision: bool,
629 seen_messages: Mutex<Vec<String>>,
630 }
631
632 #[async_trait]
633 impl ModelProvider for CaptureSummarizerModelProvider {
634 async fn chat_with_system(
635 &self,
636 _system_prompt: Option<&str>,
637 message: &str,
638 _model: &str,
639 _temperature: Option<f64>,
640 ) -> Result<String> {
641 self.seen_messages.lock().push(message.to_string());
642 Ok("summary".to_string())
643 }
644
645 async fn chat(
646 &self,
647 _request: zeroclaw_api::model_provider::ChatRequest<'_>,
648 _model: &str,
649 _temperature: Option<f64>,
650 ) -> Result<zeroclaw_api::model_provider::ChatResponse> {
651 unreachable!("context compressor uses chat_with_system")
652 }
653
654 fn supports_vision(&self) -> bool {
655 self.supports_vision
656 }
657 }
658 impl ::zeroclaw_api::attribution::Attributable for CaptureSummarizerModelProvider {
659 fn role(&self) -> ::zeroclaw_api::attribution::Role {
660 ::zeroclaw_api::attribution::Role::Provider(
661 ::zeroclaw_api::attribution::ProviderKind::Model(
662 ::zeroclaw_api::attribution::ModelProviderKind::Custom,
663 ),
664 )
665 }
666 fn alias(&self) -> &str {
667 "CaptureSummarizerModelProvider"
668 }
669 }
670
671 #[test]
672 fn test_estimate_tokens() {
673 let messages = vec![msg("user", "hello world")]; let tokens = estimate_tokens(&messages);
675 assert!(tokens > 0);
677 }
678
679 #[test]
680 fn test_estimate_tokens_empty() {
681 assert_eq!(estimate_tokens(&[]), 0);
682 }
683
684 #[test]
685 fn test_parse_context_limit_anthropic() {
686 let msg = "prompt is too long: 150000 tokens > 128000 maximum context length";
687 assert_eq!(parse_context_limit_from_error(msg), Some(128_000));
688 }
689
690 #[test]
691 fn test_parse_context_limit_openai() {
692 let msg = "This model's maximum context length is 128000 tokens. However, your messages resulted in 150000 tokens.";
693 assert_eq!(parse_context_limit_from_error(msg), Some(128_000));
694 }
695
696 #[test]
697 fn test_parse_context_limit_llamacpp() {
698 let msg = "request (8968 tokens) exceeds the available context size (8448 tokens)";
699 assert_eq!(parse_context_limit_from_error(msg), Some(8448));
700 }
701
702 #[test]
703 fn test_parse_context_limit_none() {
704 assert_eq!(parse_context_limit_from_error("some random error"), None);
705 }
706
707 #[test]
708 fn test_parse_context_limit_rejects_small() {
709 let msg = "limit is 100 tokens";
710 assert_eq!(parse_context_limit_from_error(msg), None); }
712
713 #[test]
714 fn test_next_probe_tier() {
715 assert_eq!(next_probe_tier(2_000_001), 2_000_000);
716 assert_eq!(next_probe_tier(2_000_000), 1_000_000);
717 assert_eq!(next_probe_tier(200_000), 128_000);
718 assert_eq!(next_probe_tier(64_000), 32_000);
719 assert_eq!(next_probe_tier(32_000), 32_000); assert_eq!(next_probe_tier(10_000), 32_000); }
722
723 #[test]
724 fn test_align_boundary_forward_skips_tool() {
725 let messages = vec![
726 msg("system", "sys"),
727 msg("user", "q"),
728 msg("tool", "result1"),
729 msg("tool", "result2"),
730 msg("user", "next"),
731 ];
732 assert_eq!(align_boundary_forward(&messages, 2), 4);
734 }
735
736 #[test]
737 fn test_align_boundary_forward_noop() {
738 let messages = vec![
739 msg("system", "sys"),
740 msg("user", "q"),
741 msg("assistant", "a"),
742 ];
743 assert_eq!(align_boundary_forward(&messages, 1), 1);
744 }
745
746 #[test]
747 fn test_repair_tool_pairs_removes_orphaned() {
748 let mut messages = vec![
749 msg("system", "sys"),
750 msg(
751 "assistant",
752 "[CONTEXT SUMMARY — 5 earlier messages compressed]\nstuff",
753 ),
754 msg("tool", "orphaned result"),
755 msg("user", "next question"),
756 ];
757 repair_tool_pairs(&mut messages);
758 assert_eq!(messages.len(), 3);
759 assert_eq!(messages[2].role, "user");
760 }
761
762 #[test]
763 fn test_repair_tool_pairs_no_false_positives() {
764 let mut messages = vec![
765 msg("system", "sys"),
766 msg("user", "q"),
767 msg("assistant", "calling tool"),
768 msg("tool", "result"),
769 msg("user", "thanks"),
770 ];
771 repair_tool_pairs(&mut messages);
772 assert_eq!(messages.len(), 5); }
774
775 #[test]
782 fn test_align_boundary_backward_backs_up_past_tool_call_assistant() {
783 let messages = vec![
784 msg("system", "sys"),
785 msg("user", "q1"),
786 msg("assistant", "old reply 1"),
787 msg("user", "q2"),
788 msg(
789 "assistant",
790 r#"{"content":null,"tool_calls":[{"id":"toolu_X","name":"shell","arguments":"{}"}]}"#,
791 ),
792 msg("tool", r#"{"tool_call_id":"toolu_X","content":"result"}"#),
793 msg("user", "follow-up"),
794 ];
795 let aligned = align_boundary_backward(&messages, 4);
798 assert!(
799 aligned < 4,
800 "boundary should retreat past assistant(tool_calls) at idx 4, got {aligned}"
801 );
802 }
803
804 #[test]
805 fn test_align_boundary_backward_noop_on_plain_assistant() {
806 let messages = vec![
807 msg("system", "sys"),
808 msg("user", "q"),
809 msg("assistant", "plain text reply"),
810 msg("user", "next"),
811 ];
812 assert_eq!(align_boundary_backward(&messages, 2), 2);
814 }
815
816 #[test]
817 fn test_build_transcript() {
818 let messages = vec![msg("user", "hello"), msg("assistant", "hi there")];
819 let t = build_full_transcript(&messages);
820 assert!(t.contains("USER: hello"));
821 assert!(t.contains("ASSISTANT: hi there"));
822 }
823
824 #[test]
825 fn test_build_summarizer_transcript_strips_all_attachment_kinds_for_non_vision_provider() {
826 let messages = vec![msg(
834 "user",
835 "Take a look at [IMAGE:/a.jpg] [PHOTO:/b.jpg] [DOCUMENT:/c.pdf] \
836 [FILE:/d.zip] [VIDEO:/e.mp4] [VOICE:/f.ogg] [AUDIO:/g.wav] please",
837 )];
838 let transcript = build_summarizer_transcript(&messages, 10_000, false);
839 for prefix in [
840 "[IMAGE:",
841 "[PHOTO:",
842 "[DOCUMENT:",
843 "[FILE:",
844 "[VIDEO:",
845 "[VOICE:",
846 "[AUDIO:",
847 ] {
848 assert!(
849 !transcript.contains(prefix),
850 "non-vision transcript should not contain raw {prefix} marker: {transcript}"
851 );
852 }
853 assert!(
854 transcript.contains("[media attachment]"),
855 "non-vision transcript should contain placeholder: {transcript}"
856 );
857 assert!(transcript.contains("Take a look at"));
858 assert!(transcript.contains("please"));
859 }
860
861 #[test]
862 fn test_build_summarizer_transcript_strips_media_markers_before_truncation() {
863 let long_path = format!(
864 "/private/tmp/zeroclaw/signal_inbound/{}",
865 "nested-directory/".repeat(12)
866 );
867 let messages = vec![msg(
868 "user",
869 &format!("Please summarize [IMAGE:{long_path}photo.png] after text"),
870 )];
871
872 let transcript = build_summarizer_transcript(&messages, 64, false);
873
874 assert!(
875 !transcript.contains("[IMAGE:"),
876 "non-vision transcript should not retain a split image marker: {transcript}"
877 );
878 assert!(
879 !transcript.contains("/private/tmp"),
880 "non-vision transcript should not leak local path fragments: {transcript}"
881 );
882 assert!(
883 transcript.contains("[media attachment]"),
884 "non-vision transcript should preserve an attachment placeholder: {transcript}"
885 );
886 }
887
888 #[test]
889 fn test_build_transcript_truncates() {
890 let messages = vec![msg("user", &"x".repeat(1000))];
891 let t = truncate_owned_if_needed(build_full_transcript(&messages), 100);
892 assert!(t.len() <= 103); }
894
895 #[test]
896 fn test_build_summarizer_transcript_strips_image_markers_for_non_vision_provider() {
897 let messages = vec![msg(
898 "user",
899 "Describe this photo [IMAGE:/tmp/test.png]\nKeep the caption",
900 )];
901 let transcript = build_summarizer_transcript(&messages, 10_000, false);
902 assert!(!transcript.contains("[IMAGE:"));
903 assert!(transcript.contains("Describe this photo"));
904 assert!(transcript.contains("Keep the caption"));
905 }
906
907 #[test]
908 fn test_build_summarizer_transcript_keeps_image_markers_for_vision_provider() {
909 let messages = vec![msg("user", "Describe this photo [IMAGE:/tmp/test.png]")];
910 let transcript = build_summarizer_transcript(&messages, 10_000, true);
911 assert!(transcript.contains("[IMAGE:/tmp/test.png]"));
912 }
913
914 #[test]
915 fn test_truncate_chars() {
916 assert_eq!(truncate_chars("hello world", 5), "hello...");
917 assert_eq!(truncate_chars("hi", 10), "hi");
918 }
919
920 #[test]
921 fn test_config_defaults() {
922 let config = ContextCompressionConfig::default();
923 assert!(config.enabled);
924 assert!((config.threshold_ratio - 0.50).abs() < f64::EPSILON);
925 assert_eq!(config.protect_first_n, 3);
926 assert_eq!(config.protect_last_n, 4);
927 assert_eq!(config.max_passes, 3);
928 assert_eq!(config.summary_max_chars, 4_000);
929 assert_eq!(config.source_max_chars, 50_000);
930 assert_eq!(config.timeout_secs, 60);
931 assert!(config.summary_model.is_none());
932 assert_eq!(config.identifier_policy, "strict");
933 }
934
935 #[test]
936 fn test_config_serde_defaults() {
937 let json = "{}";
938 let config: ContextCompressionConfig = serde_json::from_str(json).unwrap();
939 assert!(config.enabled);
940 assert_eq!(config.protect_first_n, 3);
941 assert_eq!(config.max_passes, 3);
942 }
943
944 #[test]
945 fn test_config_serde_override() {
946 let json = r#"{"enabled": false, "protect_first_n": 5, "max_passes": 1}"#;
947 let config: ContextCompressionConfig = serde_json::from_str(json).unwrap();
948 assert!(!config.enabled);
949 assert_eq!(config.protect_first_n, 5);
950 assert_eq!(config.max_passes, 1);
951 }
952
953 #[tokio::test]
954 async fn compress_if_needed_strips_image_markers_before_non_vision_summarization() {
955 let config = ContextCompressionConfig {
956 protect_first_n: 1,
957 protect_last_n: 1,
958 threshold_ratio: 0.01,
959 ..Default::default()
960 };
961 let compressor = ContextCompressor::new(config, 64);
962 let model_provider = CaptureSummarizerModelProvider {
963 supports_vision: false,
964 seen_messages: Mutex::new(Vec::new()),
965 };
966 let mut history = vec![
967 msg("system", "sys"),
968 msg("user", "Earlier question [IMAGE:/tmp/example.png]"),
969 msg("assistant", "Earlier answer"),
970 msg("user", "Newest question"),
971 ];
972
973 let result = compressor
974 .compress_if_needed(&mut history, &model_provider, "model", None)
975 .await
976 .expect("compression should succeed");
977
978 assert!(result.compressed);
979 let seen = model_provider.seen_messages.lock();
980 let prompt = seen.last().expect("summarizer should be invoked");
981 assert!(!prompt.contains("[IMAGE:"));
982 assert!(!prompt.contains("/tmp/example.png"));
983 }
984
985 #[tokio::test]
986 async fn compress_if_needed_strips_image_markers_when_summary_model_overrides() {
987 let config = ContextCompressionConfig {
988 protect_first_n: 1,
989 protect_last_n: 1,
990 threshold_ratio: 0.01,
991 summary_model: Some("text-summary-model".to_string()),
992 ..Default::default()
993 };
994 let compressor = ContextCompressor::new(config, 64);
995 let model_provider = CaptureSummarizerModelProvider {
996 supports_vision: true,
997 seen_messages: Mutex::new(Vec::new()),
998 };
999 let mut history = vec![
1000 msg("system", "sys"),
1001 msg("user", "Earlier question [IMAGE:/tmp/summary-override.png]"),
1002 msg("assistant", "Earlier answer"),
1003 msg("user", "Newest question"),
1004 ];
1005
1006 let result = compressor
1007 .compress_if_needed(&mut history, &model_provider, "default-vision-model", None)
1008 .await
1009 .expect("compression should succeed");
1010
1011 assert!(result.compressed);
1012 let seen = model_provider.seen_messages.lock();
1013 let prompt = seen.last().expect("summarizer should be invoked");
1014 assert!(!prompt.contains("[IMAGE:"));
1015 assert!(!prompt.contains("/tmp/summary-override.png"));
1016 }
1017
1018 #[test]
1021 fn test_fast_trim_protects_first_and_last_n() {
1022 let config = ContextCompressionConfig {
1023 protect_first_n: 2,
1024 protect_last_n: 2,
1025 tool_result_retrim_chars: 100,
1026 ..Default::default()
1027 };
1028 let compressor = ContextCompressor::new(config, 128_000);
1029 let big = "x".repeat(5_000);
1030 let mut history = vec![
1031 msg("system", "sys"),
1032 msg("tool", &big), msg("user", "q"),
1034 msg("tool", &big), msg("user", "next"), msg("tool", &big), ];
1038 let saved = compressor.fast_trim_tool_results(&mut history);
1039 assert!(saved > 0);
1040 assert_eq!(history[1].content.len(), 5_000);
1042 assert_eq!(history[5].content.len(), 5_000);
1043 assert!(history[3].content.len() <= 200); }
1046
1047 #[test]
1048 fn test_fast_trim_skips_images() {
1049 let config = ContextCompressionConfig {
1050 protect_first_n: 0,
1051 protect_last_n: 0,
1052 tool_result_retrim_chars: 100,
1053 ..Default::default()
1054 };
1055 let compressor = ContextCompressor::new(config, 128_000);
1056 let img = format!("data:image/{}", "x".repeat(5_000));
1057 let mut history = vec![msg("tool", &img)];
1058 let saved = compressor.fast_trim_tool_results(&mut history);
1059 assert_eq!(saved, 0);
1060 assert!(history[0].content.len() > 5_000);
1061 }
1062
1063 #[test]
1064 fn test_fast_trim_skips_exempt_tools() {
1065 let config = ContextCompressionConfig {
1066 protect_first_n: 0,
1067 protect_last_n: 0,
1068 tool_result_retrim_chars: 100,
1069 tool_result_trim_exempt: vec!["KEEPME".to_string()],
1070 ..Default::default()
1071 };
1072 let compressor = ContextCompressor::new(config, 128_000);
1073 let content = format!("KEEPME {}", "x".repeat(5_000));
1074 let mut history = vec![msg("tool", &content)];
1075 let saved = compressor.fast_trim_tool_results(&mut history);
1076 assert_eq!(saved, 0);
1077 }
1078
1079 #[test]
1080 fn test_fast_trim_skips_small_results() {
1081 let config = ContextCompressionConfig {
1082 protect_first_n: 0,
1083 protect_last_n: 0,
1084 tool_result_retrim_chars: 2_000,
1085 ..Default::default()
1086 };
1087 let compressor = ContextCompressor::new(config, 128_000);
1088 let mut history = vec![msg("tool", "small result")];
1089 let saved = compressor.fast_trim_tool_results(&mut history);
1090 assert_eq!(saved, 0);
1091 }
1092
1093 #[test]
1094 fn test_fast_trim_skips_non_tool_messages() {
1095 let config = ContextCompressionConfig {
1096 protect_first_n: 0,
1097 protect_last_n: 0,
1098 tool_result_retrim_chars: 100,
1099 ..Default::default()
1100 };
1101 let compressor = ContextCompressor::new(config, 128_000);
1102 let big = "x".repeat(5_000);
1103 let mut history = vec![msg("user", &big), msg("assistant", &big)];
1104 let saved = compressor.fast_trim_tool_results(&mut history);
1105 assert_eq!(saved, 0);
1106 }
1107
1108 #[test]
1109 fn test_fast_trim_config_defaults() {
1110 let config = ContextCompressionConfig::default();
1111 assert_eq!(config.tool_result_retrim_chars, 2_000);
1112 assert!(config.tool_result_trim_exempt.is_empty());
1113 }
1114
1115 #[test]
1116 fn test_fast_trim_disabled_when_zero() {
1117 let config = ContextCompressionConfig {
1118 protect_first_n: 0,
1119 protect_last_n: 0,
1120 tool_result_retrim_chars: 0,
1121 ..Default::default()
1122 };
1123 let compressor = ContextCompressor::new(config, 128_000);
1124 let big = "x".repeat(5_000);
1125 let mut history = vec![msg("tool", &big)];
1126 let saved = compressor.fast_trim_tool_results(&mut history);
1127 assert_eq!(saved, 0);
1128 }
1129
1130 #[test]
1133 fn build_summary_message_uses_plain_text_when_no_reasoning() {
1134 let compressed = vec![
1135 msg("user", "what's the weather"),
1136 msg("assistant", "it's sunny"),
1137 ];
1138 let out = build_summary_message(&compressed, "weather chat", 2);
1139 assert_eq!(out.role, "assistant");
1140 assert!(out.content.starts_with("[CONTEXT SUMMARY"));
1141 assert!(out.content.contains("weather chat"));
1142 assert!(
1143 serde_json::from_str::<serde_json::Value>(&out.content).is_err(),
1144 "plain-text summary must not parse as JSON"
1145 );
1146 }
1147
1148 #[test]
1155 fn build_summary_message_preserves_reasoning_content_when_present() {
1156 let assistant_with_reasoning = serde_json::json!({
1157 "content": "let me look",
1158 "reasoning_content": "user wants weather; need to check",
1159 })
1160 .to_string();
1161 let compressed = vec![
1162 msg("user", "what's the weather"),
1163 msg("assistant", &assistant_with_reasoning),
1164 ];
1165
1166 let out = build_summary_message(&compressed, "weather chat", 2);
1167 assert_eq!(out.role, "assistant");
1168 let parsed: serde_json::Value = serde_json::from_str(&out.content)
1169 .expect("summary must be JSON when reasoning_content is preserved");
1170 assert!(
1171 parsed["content"]
1172 .as_str()
1173 .is_some_and(|s| s.starts_with("[CONTEXT SUMMARY")),
1174 "summary text belongs in `content`",
1175 );
1176 assert_eq!(
1177 parsed["reasoning_content"].as_str(),
1178 Some("user wants weather; need to check"),
1179 "must carry reasoning_content from the most recent compressed assistant turn",
1180 );
1181 }
1182
1183 #[test]
1188 fn build_summary_message_picks_last_reasoning_content() {
1189 let earlier = serde_json::json!({
1190 "content": "first answer",
1191 "reasoning_content": "EARLIER reasoning",
1192 })
1193 .to_string();
1194 let later = serde_json::json!({
1195 "content": "second answer",
1196 "reasoning_content": "LATER reasoning",
1197 })
1198 .to_string();
1199 let compressed = vec![
1200 msg("user", "q1"),
1201 msg("assistant", &earlier),
1202 msg("user", "q2"),
1203 msg("assistant", &later),
1204 ];
1205
1206 let out = build_summary_message(&compressed, "two-turn chat", 4);
1207 let parsed: serde_json::Value = serde_json::from_str(&out.content).unwrap();
1208 assert_eq!(
1209 parsed["reasoning_content"].as_str(),
1210 Some("LATER reasoning"),
1211 "must pick the most recent reasoning_content, not the earliest",
1212 );
1213 }
1214}