zeroclaw_channels/
util.rs

1/// Truncate a string to `max_chars` Unicode characters, appending "..." if truncated.
2pub fn truncate_with_ellipsis(s: &str, max_chars: usize) -> String {
3    match s.char_indices().nth(max_chars) {
4        Some((idx, _)) => {
5            let truncated = &s[..idx];
6            format!("{}...", truncated.trim_end())
7        }
8        None => s.to_string(),
9    }
10}
11
12pub const BLOCK_KIT_PREFIX: &str = "__ZEROCLAW_BLOCK_KIT__";
13
14pub fn strip_tool_call_tags(message: &str) -> String {
15    const TOOL_CALL_OPEN_TAGS: [&str; 7] = [
16        "<function_calls>",
17        "<function_call>",
18        "<tool_call>",
19        "<toolcall>",
20        "<tool-call>",
21        "<tool>",
22        "<invoke>",
23    ];
24
25    fn find_first_tag<'a>(haystack: &str, tags: &'a [&'a str]) -> Option<(usize, &'a str)> {
26        tags.iter()
27            .filter_map(|tag| haystack.find(tag).map(|idx| (idx, *tag)))
28            .min_by_key(|(idx, _)| *idx)
29    }
30
31    fn matching_close_tag(open_tag: &str) -> Option<&'static str> {
32        match open_tag {
33            "<function_calls>" => Some("</function_calls>"),
34            "<function_call>" => Some("</function_call>"),
35            "<tool_call>" => Some("</tool_call>"),
36            "<toolcall>" => Some("</toolcall>"),
37            "<tool-call>" => Some("</tool-call>"),
38            "<tool>" => Some("</tool>"),
39            "<invoke>" => Some("</invoke>"),
40            _ => None,
41        }
42    }
43
44    fn extract_first_json_end(input: &str) -> Option<usize> {
45        let trimmed = input.trim_start();
46        let trim_offset = input.len().saturating_sub(trimmed.len());
47
48        for (byte_idx, ch) in trimmed.char_indices() {
49            if ch != '{' && ch != '[' {
50                continue;
51            }
52
53            let slice = &trimmed[byte_idx..];
54            let mut stream =
55                serde_json::Deserializer::from_str(slice).into_iter::<serde_json::Value>();
56            if let Some(Ok(_value)) = stream.next() {
57                let consumed = stream.byte_offset();
58                if consumed > 0 {
59                    return Some(trim_offset + byte_idx + consumed);
60                }
61            }
62        }
63
64        None
65    }
66
67    fn strip_leading_close_tags(mut input: &str) -> &str {
68        loop {
69            let trimmed = input.trim_start();
70            if !trimmed.starts_with("</") {
71                return trimmed;
72            }
73
74            let Some(close_end) = trimmed.find('>') else {
75                return "";
76            };
77            input = &trimmed[close_end + 1..];
78        }
79    }
80
81    // Does the tag structure run to the end of the message? A *real* truncated
82    // tool call is the model getting cut off, so the unterminated structure is
83    // the last thing in the message. If natural-language prose resumes after the
84    // tags, this is an inline *example* (the model is discussing tool calls), not
85    // a truncation — so we should keep it. Bias toward keeping: a little leaked
86    // XML beats eating the user's text.
87    fn tool_structure_runs_to_end(inner: &str) -> bool {
88        let mut rest = inner.trim_start();
89        // Consume a run of `<...>` tags (and whitespace between them).
90        while rest.starts_with('<') {
91            match rest.find('>') {
92                Some(gt) => rest = rest[gt + 1..].trim_start(),
93                // Cut off mid-tag (no closing '>') — a classic truncation.
94                None => return true,
95            }
96        }
97        let tail = rest.trim();
98        if tail.is_empty() {
99            // Tags ran cleanly to the end → truncation.
100            return true;
101        }
102        // Non-empty tail: prose ⇒ inline example (keep); otherwise it's a
103        // truncated tag/param value (drop).
104        !looks_like_prose(tail)
105    }
106
107    // Heuristic: does `text` read like resumed natural-language prose (as opposed
108    // to a cut-off parameter value)? True on an internal sentence boundary
109    // (". " / "! " / "? " + a letter) or a multi-word string that ends like a
110    // sentence. Deliberately lenient so ambiguous tails are kept, not dropped.
111    fn looks_like_prose(text: &str) -> bool {
112        let bytes = text.as_bytes();
113        for i in 0..bytes.len().saturating_sub(1) {
114            if matches!(bytes[i], b'.' | b'!' | b'?')
115                && matches!(bytes[i + 1], b' ' | b'\n' | b'\t')
116                && text[i + 1..]
117                    .trim_start()
118                    .chars()
119                    .next()
120                    .is_some_and(|c| c.is_alphabetic())
121            {
122                return true;
123            }
124        }
125        let trimmed = text.trim_end();
126        let ends_like_sentence = trimmed
127            .chars()
128            .last()
129            .is_some_and(|c| matches!(c, '.' | '!' | '?'))
130            && trimmed
131                .chars()
132                .rev()
133                .nth(1)
134                .is_some_and(|c| c.is_alphabetic());
135        ends_like_sentence && text.trim().contains(' ')
136    }
137
138    let mut kept_segments = Vec::new();
139    let mut remaining = message;
140
141    while let Some((start, open_tag)) = find_first_tag(remaining, &TOOL_CALL_OPEN_TAGS) {
142        let before = &remaining[..start];
143        if !before.is_empty() {
144            kept_segments.push(before.to_string());
145        }
146
147        let Some(close_tag) = matching_close_tag(open_tag) else {
148            break;
149        };
150        let after_open = &remaining[start + open_tag.len()..];
151
152        if let Some(close_idx) = after_open.find(close_tag) {
153            remaining = &after_open[close_idx + close_tag.len()..];
154            continue;
155        }
156
157        if let Some(consumed_end) = extract_first_json_end(after_open) {
158            remaining = strip_leading_close_tags(&after_open[consumed_end..]);
159            continue;
160        }
161
162        // Unterminated open tag with no parseable JSON body. Drop the broken
163        // tail ONLY when it looks like tool-call structure (`<invoke>` /
164        // `<parameter>` / `<tool*>` / `<function*>` / `{` / `[`) AND that
165        // structure runs to the end of the message — i.e. a real truncation
166        // where the model was cut off mid-call. If prose resumes after the
167        // structure, the model is showing an *example*, not making a call, so
168        // keep it verbatim (a little leaked XML beats eating the user's reply).
169        // Text that merely mentions a tag is likewise kept.
170        let inner = after_open.trim_start();
171        let inner_lower = inner.to_ascii_lowercase();
172        let looks_like_tool_structure = inner_lower.starts_with("<invoke")
173            || inner_lower.starts_with("<parameter")
174            || inner_lower.starts_with("<tool")
175            || inner_lower.starts_with("<function")
176            || inner.starts_with('{')
177            || inner.starts_with('[');
178        if looks_like_tool_structure && tool_structure_runs_to_end(inner) {
179            remaining = "";
180            break;
181        }
182
183        kept_segments.push(remaining[start..].to_string());
184        remaining = "";
185        break;
186    }
187
188    if !remaining.is_empty() {
189        kept_segments.push(remaining.to_string());
190    }
191
192    let mut result = kept_segments.concat();
193
194    // Clean up any resulting blank lines (but preserve paragraphs)
195    while result.contains("\n\n\n") {
196        result = result.replace("\n\n\n", "\n\n");
197    }
198
199    result.trim().to_string()
200}
201
202/// Recognized attachment marker kinds (e.g. `[IMAGE:/path]`, `[DOCUMENT:url]`).
203const ATTACHMENT_KINDS: &[&str] = &[
204    "IMAGE", "PHOTO", "DOCUMENT", "FILE", "VIDEO", "AUDIO", "VOICE",
205];
206
207/// Parse `[KIND:target]` attachment markers out of a message.
208/// Returns cleaned text (markers removed) and a vec of `(kind, target)` pairs.
209pub fn parse_attachment_markers(message: &str) -> (String, Vec<(String, String)>) {
210    let mut cleaned = String::with_capacity(message.len());
211    let mut attachments = Vec::new();
212    let mut cursor = 0usize;
213
214    while let Some(rel_start) = message[cursor..].find('[') {
215        let start = cursor + rel_start;
216        cleaned.push_str(&message[cursor..start]);
217
218        let Some(rel_end) = message[start..].find(']') else {
219            cleaned.push_str(&message[start..]);
220            cursor = message.len();
221            break;
222        };
223        let end = start + rel_end;
224        let marker_text = &message[start + 1..end];
225
226        let parsed = marker_text.split_once(':').and_then(|(kind, target)| {
227            let kind_upper = kind.trim().to_ascii_uppercase();
228            let target = target.trim();
229            if target.is_empty() || !ATTACHMENT_KINDS.contains(&kind_upper.as_str()) {
230                return None;
231            }
232            Some((kind_upper, target.to_string()))
233        });
234
235        if let Some(attachment) = parsed {
236            attachments.push(attachment);
237        } else {
238            cleaned.push_str(&message[start..=end]);
239        }
240
241        cursor = end + 1;
242    }
243
244    if cursor < message.len() {
245        cleaned.push_str(&message[cursor..]);
246    }
247
248    (cleaned.trim().to_string(), attachments)
249}
250
251/// Generate a short 6-character lowercase alphanumeric approval token.
252///
253/// Uses the full `[a-z0-9]` alphabet (36 options per position, 36^6 ≈ 2.2B
254/// combinations) — not UUID hex (which would give only 16^6 ≈ 16.7M and
255/// would materially weaken the WhatsApp no-per-sender-check design
256/// described in the PR #6010 security note).
257#[cfg(any(
258    feature = "channel-discord",
259    feature = "channel-signal",
260    feature = "channel-slack",
261    feature = "channel-whatsapp-cloud",
262    feature = "whatsapp-web",
263    test
264))]
265pub(crate) fn new_approval_token() -> String {
266    use rand::RngExt;
267    const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyz0123456789";
268    let mut rng = rand::rng();
269    (0..6)
270        .map(|_| CHARSET[rng.random_range(0..CHARSET.len())] as char)
271        .collect()
272}
273
274/// Parse an approval reply of the form `"TOKEN yes|no|always ..."`.
275///
276/// Returns `Some((token, response))` when the text begins with a 6-character
277/// alphanumeric token followed by a recognised action word. Returns `None`
278/// for any other input so normal messages are not intercepted.
279pub fn parse_approval_reply(
280    text: &str,
281) -> Option<(String, zeroclaw_api::channel::ChannelApprovalResponse)> {
282    use zeroclaw_api::channel::ChannelApprovalResponse;
283    let lower = text.trim().to_lowercase();
284    let mut parts = lower.splitn(2, ' ');
285    let token = parts.next()?.to_string();
286    if token.len() != 6 || !token.chars().all(|c| c.is_ascii_alphanumeric()) {
287        return None;
288    }
289    let action_word = parts.next()?.split_whitespace().next()?;
290    let response = match action_word {
291        "yes" | "y" | "approve" => ChannelApprovalResponse::Approve,
292        "no" | "n" | "deny" => ChannelApprovalResponse::Deny,
293        "always" => ChannelApprovalResponse::AlwaysApprove,
294        _ => return None,
295    };
296    Some((token, response))
297}
298
299/// Generate a conversation history key from a channel message.
300pub fn conversation_history_key(msg: &zeroclaw_api::channel::ChannelMessage) -> String {
301    match &msg.thread_ts {
302        Some(tid) => format!(
303            "{}_{}_{}_{}",
304            msg.channel, msg.reply_target, tid, msg.sender
305        ),
306        None => format!("{}_{}_{}", msg.channel, msg.reply_target, msg.sender),
307    }
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313
314    #[test]
315    fn strip_drops_truncated_function_calls_envelope_keeps_prose() {
316        // Truncated `<function_calls><invoke …><parameter …` (model cut off):
317        // the broken tail is dropped, the preceding prose survives.
318        let msg = "Here's the result:\n<function_calls>\n<invoke name=\"shell\">\n<parameter name=\"command\">sed -n '1,5p' file.rs";
319        assert_eq!(strip_tool_call_tags(msg), "Here's the result:");
320
321        // Envelope-only (no prose) -> empty.
322        let only = "<function_calls>\n<invoke name=\"shell\">\n<parameter name=\"command\">date";
323        assert_eq!(strip_tool_call_tags(only), "");
324
325        // Complete envelope is still stripped (unchanged behaviour).
326        let complete = "before <function_calls><invoke name=\"shell\"><parameter name=\"command\">date</parameter></invoke></function_calls> after";
327        assert_eq!(strip_tool_call_tags(complete), "before  after");
328    }
329
330    #[test]
331    fn strip_keeps_prose_that_merely_mentions_a_tag() {
332        // An unterminated opener followed by ordinary prose (not tool structure)
333        // is kept — the model is talking about the tag, not calling a tool.
334        let msg =
335            "The bug is that models emit <function_calls> and never close it, hanging the parser.";
336        assert_eq!(strip_tool_call_tags(msg), msg);
337    }
338
339    #[test]
340    fn strip_keeps_unterminated_example_followed_by_prose() {
341        // An unterminated opener IS followed by tool structure, but prose
342        // resumes after it — so it's an inline example, not a truncation.
343        // Keep it verbatim (the EOF rule: a real truncation ends the message).
344        let xml_example = "The model emits <function_calls><invoke name=\"x\"> and then keeps going. This sentence matters.";
345        assert_eq!(strip_tool_call_tags(xml_example), xml_example);
346
347        let json_example = "Emit <tool_call> {then describe the schema} in your docs.";
348        assert_eq!(strip_tool_call_tags(json_example), json_example);
349    }
350
351    #[test]
352    fn strip_still_drops_genuine_truncation_to_end() {
353        // No prose after the structure — the model was cut off mid-call. Drop.
354        let truncated = "Here's the result:\n<function_calls>\n<invoke name=\"shell\">\n<parameter name=\"command\">sed -n '1,5p' file.rs";
355        assert_eq!(strip_tool_call_tags(truncated), "Here's the result:");
356
357        // Cut off mid-tag (no closing '>') is also a truncation.
358        let mid_tag = "Working on it <function_calls><invoke name=\"sh";
359        assert_eq!(strip_tool_call_tags(mid_tag), "Working on it");
360    }
361
362    #[test]
363    fn parse_attachment_markers_extracts_known_kinds() {
364        let (cleaned, attachments) =
365            parse_attachment_markers("Here [IMAGE:/tmp/a.png] and [DOCUMENT:/tmp/b.pdf] done");
366        assert_eq!(cleaned, "Here  and  done");
367        assert_eq!(attachments.len(), 2);
368        assert_eq!(attachments[0], ("IMAGE".into(), "/tmp/a.png".into()));
369        assert_eq!(attachments[1], ("DOCUMENT".into(), "/tmp/b.pdf".into()));
370    }
371
372    #[test]
373    fn parse_attachment_markers_preserves_unknown_kinds() {
374        let (cleaned, attachments) = parse_attachment_markers("Check [UNKNOWN:foo] out");
375        assert_eq!(cleaned, "Check [UNKNOWN:foo] out");
376        assert!(attachments.is_empty());
377    }
378
379    #[test]
380    fn parse_attachment_markers_preserves_empty_target() {
381        let (cleaned, attachments) = parse_attachment_markers("See [IMAGE:] here");
382        assert_eq!(cleaned, "See [IMAGE:] here");
383        assert!(attachments.is_empty());
384    }
385
386    #[test]
387    fn parse_attachment_markers_no_markers() {
388        let (cleaned, attachments) = parse_attachment_markers("Hello world");
389        assert_eq!(cleaned, "Hello world");
390        assert!(attachments.is_empty());
391    }
392
393    #[test]
394    fn parse_attachment_markers_all_kinds() {
395        let input = "[IMAGE:a] [PHOTO:b] [DOCUMENT:c] [FILE:d] [VIDEO:e] [AUDIO:f] [VOICE:g]";
396        let (_, attachments) = parse_attachment_markers(input);
397        assert_eq!(attachments.len(), 7);
398    }
399
400    #[test]
401    fn parse_attachment_markers_case_insensitive_kind() {
402        let (_, attachments) = parse_attachment_markers("[image:/tmp/a.png]");
403        assert_eq!(attachments.len(), 1);
404        assert_eq!(attachments[0].0, "IMAGE");
405    }
406
407    #[test]
408    fn new_approval_token_is_6_char_alphanumeric() {
409        let token = super::new_approval_token();
410        assert_eq!(token.len(), 6);
411        assert!(token.chars().all(|c| c.is_ascii_alphanumeric()));
412    }
413
414    #[test]
415    fn parse_approval_reply_accepts_canonical_forms() {
416        use zeroclaw_api::channel::ChannelApprovalResponse;
417        let cases = [
418            ("abc123 yes", ChannelApprovalResponse::Approve),
419            ("abc123 y", ChannelApprovalResponse::Approve),
420            ("abc123 approve", ChannelApprovalResponse::Approve),
421            ("ABC123 YES", ChannelApprovalResponse::Approve),
422            (
423                "abc123 yes please go ahead",
424                ChannelApprovalResponse::Approve,
425            ),
426            ("abc123 no", ChannelApprovalResponse::Deny),
427            ("abc123 n", ChannelApprovalResponse::Deny),
428            ("abc123 deny", ChannelApprovalResponse::Deny),
429            ("abc123 always", ChannelApprovalResponse::AlwaysApprove),
430        ];
431        for (input, expected) in cases {
432            let (token, response) = super::parse_approval_reply(input)
433                .unwrap_or_else(|| panic!("expected Some for input {:?}", input));
434            assert_eq!(
435                token,
436                input.trim().to_lowercase().split(' ').next().unwrap()
437            );
438            assert_eq!(response, expected, "input: {input:?}");
439        }
440    }
441
442    #[test]
443    fn parse_approval_reply_rejects_bad_input() {
444        let bad = [
445            "yes",
446            "abc123",
447            "abc 123 yes",
448            "toolname yes",
449            "abc123 maybe",
450            "",
451            "abc123 ",
452        ];
453        for input in bad {
454            assert!(
455                super::parse_approval_reply(input).is_none(),
456                "expected None for input {:?}",
457                input
458            );
459        }
460    }
461}
zeroclaw_channels/util.rs

zeroclaw_channels/
util.rs