Skip to main content

zeroclaw_channels/
imessage.rs

1use async_trait::async_trait;
2use directories::UserDirs;
3use rusqlite::{Connection, OpenFlags};
4use std::sync::Arc;
5use tokio::sync::mpsc;
6use zeroclaw_api::channel::{Channel, ChannelMessage, SendMessage};
7
8/// Extract plain text from an iMessage `attributedBody` typedstream blob.
9///
10/// Modern macOS (Ventura+) stores message content in `attributedBody` as an
11/// `NSMutableAttributedString` serialized via Apple's typedstream format,
12/// rather than the plain `text` column.
13///
14/// This follows the well-documented marker-based approach used by LangChain,
15/// steipete/imsg, and mac_apt (all MIT-licensed). See:
16/// <https://chrissardegna.com/blog/reverse-engineering-apples-typedstream-format/>
17fn extract_text_from_attributed_body(blob: &[u8]) -> Option<String> {
18    // Find the start-of-text marker: [0x01, 0x2B]
19    // 0x2B is the C-string type tag in Apple's typedstream format.
20    let marker_pos = blob.windows(2).position(|w| w == [0x01, 0x2B])?;
21    let rest = blob.get(marker_pos + 2..)?;
22
23    if rest.is_empty() {
24        return None;
25    }
26
27    // Read variable-length prefix immediately after the marker.
28    // The length determines text extent — we do NOT scan for an end marker,
29    // because byte pairs like [0x86, 0x84] can appear inside valid UTF-8
30    // (e.g. U+2184 LATIN SMALL LETTER REVERSED C encodes to E2 86 84).
31    //
32    //   0x00-0x7F => literal length (1 byte)
33    //   0x81      => next 2 bytes are little-endian u16 length
34    //   0x82      => next 4 bytes are little-endian u32 length
35    //   0x80, 0x83+ are not observed in iMessage typedstreams; reject gracefully.
36    let (length, text_start) = match rest[0] {
37        0x81 if rest.len() >= 3 => {
38            let len = u16::from_le_bytes([rest[1], rest[2]]) as usize;
39            (len, 3)
40        }
41        0x82 if rest.len() >= 5 => {
42            let len = u32::from_le_bytes([rest[1], rest[2], rest[3], rest[4]]) as usize;
43            (len, 5)
44        }
45        b if b <= 0x7F => (b as usize, 1),
46        _ => return None,
47    };
48
49    let text_bytes = rest.get(text_start..text_start + length)?;
50    std::str::from_utf8(text_bytes).ok().map(str::to_owned)
51}
52
53/// Resolve message content from the `text` column with `attributedBody` fallback.
54///
55/// Prefers the plain `text` column when present. Falls back to parsing the
56/// typedstream blob in `attributedBody` (modern macOS). Logs a warning when
57/// `attributedBody` exists but cannot be parsed.
58fn resolve_message_content(rowid: i64, text: Option<String>, body: Option<Vec<u8>>) -> String {
59    text.filter(|t| !t.trim().is_empty())
60        .or_else(|| {
61            let parsed = body.as_deref().and_then(extract_text_from_attributed_body);
62            if parsed.is_none() && body.as_ref().is_some_and(|b| !b.is_empty()) {
63                ::zeroclaw_log::record!(
64                    WARN,
65                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
66                        .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
67                        .with_attrs(::serde_json::json!({"rowid": rowid})),
68                    "failed to parse attributedBody"
69                );
70            }
71            parsed
72        })
73        .unwrap_or_default()
74}
75
76/// iMessage channel using macOS `AppleScript` bridge.
77/// Polls the Messages database for new messages and sends replies via `osascript`.
78#[derive(Clone)]
79pub struct IMessageChannel {
80    /// The alias key under `[channels.imessage.<alias>]` this handle is
81    /// bound to. Used to scope peer-group writes and resolver lookups.
82    alias: String,
83    /// Resolves inbound external peers from canonical state at message-time.
84    /// No cache (see AGENTS.md "ABSOLUTE RULE — SINGLE SOURCE OF TRUTH").
85    peer_resolver: Arc<dyn Fn() -> Vec<String> + Send + Sync>,
86    poll_interval_secs: u64,
87}
88
89impl IMessageChannel {
90    pub fn new(
91        alias: impl Into<String>,
92        peer_resolver: Arc<dyn Fn() -> Vec<String> + Send + Sync>,
93    ) -> Self {
94        Self {
95            alias: alias.into(),
96            peer_resolver,
97            poll_interval_secs: 3,
98        }
99    }
100
101    /// Return the alias under `[channels.imessage.<alias>]` that this
102    /// channel handle is bound to.
103    pub fn alias(&self) -> &str {
104        &self.alias
105    }
106
107    fn is_contact_allowed(&self, sender: &str) -> bool {
108        let peers = (self.peer_resolver)();
109        crate::allowlist::is_user_allowed(&peers, sender, crate::allowlist::Match::CaseInsensitive)
110    }
111}
112
113/// Escape a string for safe interpolation into `AppleScript`.
114///
115/// This prevents injection attacks by escaping:
116/// - Backslashes (`\` → `\\`)
117/// - Double quotes (`"` → `\"`)
118/// - Newlines (`\n` → `\\n`, `\r` → `\\r`) to prevent code injection via line breaks
119fn escape_applescript(s: &str) -> String {
120    s.replace('\\', "\\\\")
121        .replace('"', "\\\"")
122        .replace('\n', "\\n")
123        .replace('\r', "\\r")
124}
125
126/// Validate that a target looks like a valid phone number or email address.
127///
128/// This is a defense-in-depth measure to reject obviously malicious targets
129/// before they reach `AppleScript` interpolation.
130///
131/// Valid patterns:
132/// - Phone: starts with `+` followed by digits (with optional spaces/dashes)
133/// - Email: contains `@` with alphanumeric chars on both sides
134fn is_valid_imessage_target(target: &str) -> bool {
135    let target = target.trim();
136    if target.is_empty() {
137        return false;
138    }
139
140    // Phone number: +1234567890 or +1 234-567-8900
141    if target.starts_with('+') {
142        let digits_only: String = target.chars().filter(char::is_ascii_digit).collect();
143        // Must have at least 7 digits (shortest valid phone numbers)
144        return digits_only.len() >= 7 && digits_only.len() <= 15;
145    }
146
147    // Email: simple validation (contains @ with chars on both sides)
148    if let Some(at_pos) = target.find('@') {
149        let local = &target[..at_pos];
150        let domain = &target[at_pos + 1..];
151
152        // Local part: non-empty, alphanumeric + common email chars
153        let local_valid = !local.is_empty()
154            && local
155                .chars()
156                .all(|c| c.is_alphanumeric() || "._+-".contains(c));
157
158        // Domain: non-empty, contains a dot, alphanumeric + dots/hyphens
159        let domain_valid = !domain.is_empty()
160            && domain.contains('.')
161            && domain
162                .chars()
163                .all(|c| c.is_alphanumeric() || ".-".contains(c));
164
165        return local_valid && domain_valid;
166    }
167
168    false
169}
170
171impl ::zeroclaw_api::attribution::Attributable for IMessageChannel {
172    fn role(&self) -> ::zeroclaw_api::attribution::Role {
173        ::zeroclaw_api::attribution::Role::Channel(
174            ::zeroclaw_api::attribution::ChannelKind::IMessage,
175        )
176    }
177    fn alias(&self) -> &str {
178        &self.alias
179    }
180}
181
182#[async_trait]
183impl Channel for IMessageChannel {
184    fn name(&self) -> &str {
185        "imessage"
186    }
187
188    async fn send(&self, message: &SendMessage) -> anyhow::Result<()> {
189        // Defense-in-depth: validate target format before any interpolation
190        if !is_valid_imessage_target(&message.recipient) {
191            anyhow::bail!(
192                "Invalid iMessage target: must be a phone number (+1234567890) or email (user@example.com)"
193            );
194        }
195
196        // SECURITY: Escape both message AND target to prevent AppleScript injection
197        // See: CWE-78 (OS Command Injection)
198        let escaped_msg = escape_applescript(&message.content);
199        let escaped_target = escape_applescript(&message.recipient);
200
201        let script = format!(
202            r#"tell application "Messages"
203    set targetService to 1st account whose service type = iMessage
204    set targetBuddy to participant "{escaped_target}" of targetService
205    send "{escaped_msg}" to targetBuddy
206end tell"#
207        );
208
209        let output = tokio::process::Command::new("osascript")
210            .arg("-e")
211            .arg(&script)
212            .output()
213            .await?;
214
215        if !output.status.success() {
216            let stderr = String::from_utf8_lossy(&output.stderr);
217            anyhow::bail!("iMessage send failed: {stderr}");
218        }
219
220        Ok(())
221    }
222
223    async fn listen(&self, tx: mpsc::Sender<ChannelMessage>) -> anyhow::Result<()> {
224        ::zeroclaw_log::record!(
225            INFO,
226            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
227            "iMessage channel listening (AppleScript bridge)..."
228        );
229
230        // Query the Messages SQLite database for new messages
231        // The database is at ~/Library/Messages/chat.db
232        let db_path = UserDirs::new()
233            .map(|u| u.home_dir().join("Library/Messages/chat.db"))
234            .ok_or_else(|| {
235                ::zeroclaw_log::record!(
236                    ERROR,
237                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
238                        .with_outcome(::zeroclaw_log::EventOutcome::Failure),
239                    "Cannot find home directory"
240                );
241                anyhow::Error::msg("Cannot find home directory")
242            })?;
243
244        if !db_path.exists() {
245            anyhow::bail!(
246                "Messages database not found at {}. Ensure Messages.app is set up and Full Disk Access is granted.",
247                db_path.display()
248            );
249        }
250
251        // Open a persistent read-only connection instead of creating
252        // a new one on every 3-second poll cycle.
253        let path = db_path.to_path_buf();
254        let conn = tokio::task::spawn_blocking(move || -> anyhow::Result<Connection> {
255            Ok(Connection::open_with_flags(
256                &path,
257                OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_NO_MUTEX,
258            )?)
259        })
260        .await??;
261
262        // Track the last ROWID we've seen (shuttle conn in and out)
263        let (mut conn, initial_rowid) =
264            tokio::task::spawn_blocking(move || -> anyhow::Result<(Connection, i64)> {
265                let rowid = {
266                    let mut stmt =
267                        conn.prepare("SELECT MAX(ROWID) FROM message WHERE is_from_me = 0")?;
268                    let rowid: Option<i64> = stmt.query_row([], |row| row.get(0))?;
269                    rowid.unwrap_or(0)
270                };
271                Ok((conn, rowid))
272            })
273            .await??;
274        let mut last_rowid = initial_rowid;
275
276        loop {
277            tokio::time::sleep(tokio::time::Duration::from_secs(self.poll_interval_secs)).await;
278
279            let since = last_rowid;
280            let (returned_conn, poll_result) = tokio::task::spawn_blocking(
281                move || -> (Connection, anyhow::Result<Vec<(i64, String, String)>>) {
282                    let result = (|| -> anyhow::Result<Vec<(i64, String, String)>> {
283                        let mut stmt = conn.prepare(
284                            "SELECT m.ROWID, h.id, m.text, m.attributedBody \
285                     FROM message m \
286                     JOIN handle h ON m.handle_id = h.ROWID \
287                     WHERE m.ROWID > ?1 \
288                     AND m.is_from_me = 0 \
289                     AND (m.text IS NOT NULL OR m.attributedBody IS NOT NULL) \
290                     ORDER BY m.ROWID ASC \
291                     LIMIT 20",
292                        )?;
293                        let rows = stmt.query_map([since], |row| {
294                            let rowid = row.get::<_, i64>(0)?;
295                            let sender = row.get::<_, String>(1)?;
296                            let text: Option<String> = row.get(2)?;
297                            let body: Option<Vec<u8>> = row.get(3)?;
298                            Ok((rowid, sender, resolve_message_content(rowid, text, body)))
299                        })?;
300                        let results = rows.collect::<Result<Vec<_>, _>>()?;
301                        Ok(results)
302                    })();
303
304                    (conn, result)
305                },
306            )
307            .await
308            .map_err(|e| {
309                ::zeroclaw_log::record!(
310                    ERROR,
311                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
312                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
313                        .with_attrs(::serde_json::json!({"error": format!("{}", e)})),
314                    "iMessage poll worker join error"
315                );
316                anyhow::Error::msg(format!("iMessage poll worker join error: {e}"))
317            })?;
318            conn = returned_conn;
319
320            match poll_result {
321                Ok(messages) => {
322                    for (rowid, sender, text) in messages {
323                        if rowid > last_rowid {
324                            last_rowid = rowid;
325                        }
326
327                        if !self.is_contact_allowed(&sender) {
328                            continue;
329                        }
330
331                        if text.trim().is_empty() {
332                            continue;
333                        }
334
335                        let msg = ChannelMessage {
336                            id: rowid.to_string(),
337                            sender: sender.clone(),
338                            reply_target: sender.clone(),
339                            content: text,
340                            channel: "imessage".to_string(),
341                            channel_alias: Some(self.alias.clone()),
342                            timestamp: std::time::SystemTime::now()
343                                .duration_since(std::time::UNIX_EPOCH)
344                                .unwrap_or_default()
345                                .as_secs(),
346                            thread_ts: None,
347                            interruption_scope_id: None,
348                            attachments: vec![],
349                            subject: None,
350                        };
351
352                        if tx.send(msg).await.is_err() {
353                            return Ok(());
354                        }
355                    }
356                }
357                Err(e) => {
358                    ::zeroclaw_log::record!(
359                        WARN,
360                        ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
361                            .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
362                            .with_attrs(::serde_json::json!({"error": format!("{}", e)})),
363                        "iMessage poll error"
364                    );
365                }
366            }
367        }
368    }
369
370    async fn health_check(&self) -> bool {
371        if !cfg!(target_os = "macos") {
372            return false;
373        }
374
375        let db_path = UserDirs::new()
376            .map(|u| u.home_dir().join("Library/Messages/chat.db"))
377            .unwrap_or_default();
378
379        db_path.exists()
380    }
381}
382
383/// Get the current max ROWID from the messages table.
384/// Uses rusqlite with parameterized queries for security (CWE-89 prevention).
385#[cfg(test)]
386async fn get_max_rowid(db_path: &std::path::Path) -> anyhow::Result<i64> {
387    let path = db_path.to_path_buf();
388    let result = tokio::task::spawn_blocking(move || -> anyhow::Result<i64> {
389        let conn = Connection::open_with_flags(
390            &path,
391            OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_NO_MUTEX,
392        )?;
393        let mut stmt = conn.prepare("SELECT MAX(ROWID) FROM message WHERE is_from_me = 0")?;
394        let rowid: Option<i64> = stmt.query_row([], |row| row.get(0))?;
395        Ok(rowid.unwrap_or(0))
396    })
397    .await??;
398    Ok(result)
399}
400
401/// Fetch messages newer than `since_rowid`.
402/// Uses rusqlite with parameterized queries for security (CWE-89 prevention).
403/// The `since_rowid` parameter is bound safely, preventing SQL injection.
404#[cfg(test)]
405async fn fetch_new_messages(
406    db_path: &std::path::Path,
407    since_rowid: i64,
408) -> anyhow::Result<Vec<(i64, String, String)>> {
409    let path = db_path.to_path_buf();
410    let results =
411        tokio::task::spawn_blocking(move || -> anyhow::Result<Vec<(i64, String, String)>> {
412            let conn = Connection::open_with_flags(
413                &path,
414                OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_NO_MUTEX,
415            )?;
416            let mut stmt = conn.prepare(
417                "SELECT m.ROWID, h.id, m.text, m.attributedBody \
418             FROM message m \
419             JOIN handle h ON m.handle_id = h.ROWID \
420             WHERE m.ROWID > ?1 \
421             AND m.is_from_me = 0 \
422             AND (m.text IS NOT NULL OR m.attributedBody IS NOT NULL) \
423             ORDER BY m.ROWID ASC \
424             LIMIT 20",
425            )?;
426            let rows = stmt.query_map([since_rowid], |row| {
427                let rowid = row.get::<_, i64>(0)?;
428                let sender = row.get::<_, String>(1)?;
429                let text: Option<String> = row.get(2)?;
430                let body: Option<Vec<u8>> = row.get(3)?;
431                Ok((rowid, sender, resolve_message_content(rowid, text, body)))
432            })?;
433            let results: Vec<_> = rows
434                .collect::<Result<Vec<_>, _>>()?
435                .into_iter()
436                .filter(|(_, _, content)| !content.trim().is_empty())
437                .collect();
438            Ok(results)
439        })
440        .await??;
441    Ok(results)
442}
443
444#[cfg(test)]
445mod tests {
446    use super::*;
447
448    #[test]
449    fn creates_with_contacts() {
450        let ch = IMessageChannel::new(
451            "imessage_test_alias",
452            Arc::new(|| vec!["+1234567890".into()]),
453        );
454        assert_eq!(ch.poll_interval_secs, 3);
455        assert!(ch.is_contact_allowed("+1234567890"));
456    }
457
458    #[test]
459    fn creates_with_empty_contacts() {
460        let ch = IMessageChannel::new("imessage_test_alias", Arc::new(Vec::new));
461        assert!(!ch.is_contact_allowed("anyone"));
462    }
463
464    #[test]
465    fn wildcard_allows_anyone() {
466        let ch = IMessageChannel::new("imessage_test_alias", Arc::new(|| vec!["*".into()]));
467        assert!(ch.is_contact_allowed("+1234567890"));
468        assert!(ch.is_contact_allowed("random@icloud.com"));
469        assert!(ch.is_contact_allowed(""));
470    }
471
472    #[test]
473    fn specific_contact_allowed() {
474        let ch = IMessageChannel::new(
475            "imessage_test_alias",
476            Arc::new(|| vec!["+1234567890".into(), "user@icloud.com".into()]),
477        );
478        assert!(ch.is_contact_allowed("+1234567890"));
479        assert!(ch.is_contact_allowed("user@icloud.com"));
480    }
481
482    #[test]
483    fn unknown_contact_denied() {
484        let ch = IMessageChannel::new(
485            "imessage_test_alias",
486            Arc::new(|| vec!["+1234567890".into()]),
487        );
488        assert!(!ch.is_contact_allowed("+9999999999"));
489        assert!(!ch.is_contact_allowed("hacker@evil.com"));
490    }
491
492    #[test]
493    fn contact_case_insensitive() {
494        let ch = IMessageChannel::new(
495            "imessage_test_alias",
496            Arc::new(|| vec!["User@iCloud.com".into()]),
497        );
498        assert!(ch.is_contact_allowed("user@icloud.com"));
499        assert!(ch.is_contact_allowed("USER@ICLOUD.COM"));
500    }
501
502    #[test]
503    fn empty_allowlist_denies_all() {
504        let ch = IMessageChannel::new("imessage_test_alias", Arc::new(Vec::new));
505        assert!(!ch.is_contact_allowed("+1234567890"));
506        assert!(!ch.is_contact_allowed("anyone"));
507    }
508
509    #[test]
510    fn name_returns_imessage() {
511        let ch = IMessageChannel::new("imessage_test_alias", Arc::new(Vec::new));
512        assert_eq!(ch.name(), "imessage");
513    }
514
515    #[test]
516    fn wildcard_among_others_still_allows_all() {
517        let ch = IMessageChannel::new(
518            "imessage_test_alias",
519            Arc::new(|| vec!["+111".into(), "*".into(), "+222".into()]),
520        );
521        assert!(ch.is_contact_allowed("totally-unknown"));
522    }
523
524    #[test]
525    fn contact_with_spaces_exact_match() {
526        let ch = IMessageChannel::new(
527            "imessage_test_alias",
528            Arc::new(|| vec!["  spaced  ".into()]),
529        );
530        assert!(ch.is_contact_allowed("  spaced  "));
531        assert!(!ch.is_contact_allowed("spaced"));
532    }
533
534    // ══════════════════════════════════════════════════════════
535    // AppleScript Escaping Tests (CWE-78 Prevention)
536    // ══════════════════════════════════════════════════════════
537
538    #[test]
539    fn escape_applescript_double_quotes() {
540        assert_eq!(escape_applescript(r#"hello "world""#), r#"hello \"world\""#);
541    }
542
543    #[test]
544    fn escape_applescript_backslashes() {
545        assert_eq!(escape_applescript(r"path\to\file"), r"path\\to\\file");
546    }
547
548    #[test]
549    fn escape_applescript_mixed() {
550        assert_eq!(
551            escape_applescript(r#"say "hello\" world"#),
552            r#"say \"hello\\\" world"#
553        );
554    }
555
556    #[test]
557    fn escape_applescript_injection_attempt() {
558        // This is the exact attack vector from the security report
559        let malicious = r#"" & do shell script "id" & ""#;
560        let escaped = escape_applescript(malicious);
561        // After escaping, the quotes should be escaped and not break out
562        assert_eq!(escaped, r#"\" & do shell script \"id\" & \""#);
563        // Verify all quotes are now escaped (preceded by backslash)
564        // The escaped string should not have any unescaped quotes (quote not preceded by backslash)
565        let chars: Vec<char> = escaped.chars().collect();
566        for (i, &c) in chars.iter().enumerate() {
567            if c == '"' {
568                // Every quote must be preceded by a backslash
569                assert!(
570                    i > 0 && chars[i - 1] == '\\',
571                    "Found unescaped quote at position {i}"
572                );
573            }
574        }
575    }
576
577    #[test]
578    fn escape_applescript_empty_string() {
579        assert_eq!(escape_applescript(""), "");
580    }
581
582    #[test]
583    fn escape_applescript_no_special_chars() {
584        assert_eq!(escape_applescript("hello world"), "hello world");
585    }
586
587    #[test]
588    fn escape_applescript_unicode() {
589        assert_eq!(escape_applescript("hello 🦀 world"), "hello 🦀 world");
590    }
591
592    #[test]
593    fn escape_applescript_newlines_escaped() {
594        assert_eq!(escape_applescript("line1\nline2"), "line1\\nline2");
595        assert_eq!(escape_applescript("line1\rline2"), "line1\\rline2");
596        assert_eq!(escape_applescript("line1\r\nline2"), "line1\\r\\nline2");
597    }
598
599    // ══════════════════════════════════════════════════════════
600    // Target Validation Tests
601    // ══════════════════════════════════════════════════════════
602
603    #[test]
604    fn valid_phone_number_simple() {
605        assert!(is_valid_imessage_target("+1234567890"));
606    }
607
608    #[test]
609    fn valid_phone_number_with_country_code() {
610        assert!(is_valid_imessage_target("+14155551234"));
611    }
612
613    #[test]
614    fn valid_phone_number_with_spaces() {
615        assert!(is_valid_imessage_target("+1 415 555 1234"));
616    }
617
618    #[test]
619    fn valid_phone_number_with_dashes() {
620        assert!(is_valid_imessage_target("+1-415-555-1234"));
621    }
622
623    #[test]
624    fn valid_phone_number_international() {
625        assert!(is_valid_imessage_target("+447911123456")); // UK
626        assert!(is_valid_imessage_target("+81312345678")); // Japan
627    }
628
629    #[test]
630    fn valid_email_simple() {
631        assert!(is_valid_imessage_target("user@example.com"));
632    }
633
634    #[test]
635    fn valid_email_with_subdomain() {
636        assert!(is_valid_imessage_target("user@mail.example.com"));
637    }
638
639    #[test]
640    fn valid_email_with_plus() {
641        assert!(is_valid_imessage_target("user+tag@example.com"));
642    }
643
644    #[test]
645    fn valid_email_with_dots() {
646        assert!(is_valid_imessage_target("first.last@example.com"));
647    }
648
649    #[test]
650    fn valid_email_icloud() {
651        assert!(is_valid_imessage_target("user@icloud.com"));
652        assert!(is_valid_imessage_target("user@me.com"));
653    }
654
655    #[test]
656    fn invalid_target_empty() {
657        assert!(!is_valid_imessage_target(""));
658        assert!(!is_valid_imessage_target("   "));
659    }
660
661    #[test]
662    fn invalid_target_no_plus_prefix() {
663        // Phone numbers must start with +
664        assert!(!is_valid_imessage_target("1234567890"));
665    }
666
667    #[test]
668    fn invalid_target_too_short_phone() {
669        // Less than 7 digits
670        assert!(!is_valid_imessage_target("+123456"));
671    }
672
673    #[test]
674    fn invalid_target_too_long_phone() {
675        // More than 15 digits
676        assert!(!is_valid_imessage_target("+1234567890123456"));
677    }
678
679    #[test]
680    fn invalid_target_email_no_at() {
681        assert!(!is_valid_imessage_target("userexample.com"));
682    }
683
684    #[test]
685    fn invalid_target_email_no_domain() {
686        assert!(!is_valid_imessage_target("user@"));
687    }
688
689    #[test]
690    fn invalid_target_email_no_local() {
691        assert!(!is_valid_imessage_target("@example.com"));
692    }
693
694    #[test]
695    fn invalid_target_email_no_dot_in_domain() {
696        assert!(!is_valid_imessage_target("user@localhost"));
697    }
698
699    #[test]
700    fn invalid_target_injection_attempt() {
701        // The exact attack vector from the security report
702        assert!(!is_valid_imessage_target(r#"" & do shell script "id" & ""#));
703    }
704
705    #[test]
706    fn invalid_target_applescript_injection() {
707        // Various injection attempts
708        assert!(!is_valid_imessage_target(r#"test" & quit"#));
709        assert!(!is_valid_imessage_target(r"test\ndo shell script"));
710        assert!(!is_valid_imessage_target("test\"; malicious code; \""));
711    }
712
713    #[test]
714    fn invalid_target_special_chars() {
715        assert!(!is_valid_imessage_target("user<script>@example.com"));
716        assert!(!is_valid_imessage_target("user@example.com; rm -rf /"));
717    }
718
719    #[test]
720    fn invalid_target_null_byte() {
721        assert!(!is_valid_imessage_target("user\0@example.com"));
722    }
723
724    #[test]
725    fn invalid_target_newline() {
726        assert!(!is_valid_imessage_target("user\n@example.com"));
727    }
728
729    #[test]
730    fn target_with_leading_trailing_whitespace_trimmed() {
731        // Should trim and validate
732        assert!(is_valid_imessage_target("  +1234567890  "));
733        assert!(is_valid_imessage_target("  user@example.com  "));
734    }
735
736    // ══════════════════════════════════════════════════════════
737    // SQLite/rusqlite Database Tests (CWE-89 Prevention)
738    // ══════════════════════════════════════════════════════════
739
740    /// Helper to create a temporary test database with Messages schema
741    fn create_test_db() -> (tempfile::TempDir, std::path::PathBuf) {
742        let dir = tempfile::tempdir().unwrap();
743        let db_path = dir.path().join("chat.db");
744
745        let conn = Connection::open(&db_path).unwrap();
746
747        // Create minimal schema matching macOS Messages.app
748        conn.execute_batch(
749            "CREATE TABLE handle (
750                ROWID INTEGER PRIMARY KEY,
751                id TEXT NOT NULL
752            );
753            CREATE TABLE message (
754                ROWID INTEGER PRIMARY KEY,
755                handle_id INTEGER,
756                text TEXT,
757                attributedBody BLOB,
758                is_from_me INTEGER DEFAULT 0,
759                FOREIGN KEY (handle_id) REFERENCES handle(ROWID)
760            );",
761        )
762        .unwrap();
763
764        (dir, db_path)
765    }
766
767    #[tokio::test]
768    async fn get_max_rowid_empty_database() {
769        let (_dir, db_path) = create_test_db();
770        let result = get_max_rowid(&db_path).await;
771        assert!(result.is_ok());
772        // Empty table returns 0 (NULL coalesced)
773        assert_eq!(result.unwrap(), 0);
774    }
775
776    #[tokio::test]
777    async fn get_max_rowid_with_messages() {
778        let (_dir, db_path) = create_test_db();
779
780        // Insert test data
781        {
782            let conn = Connection::open(&db_path).unwrap();
783            conn.execute(
784                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
785                [],
786            )
787            .unwrap();
788            conn.execute(
789                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (100, 1, 'Hello', 0)",
790                []
791            ).unwrap();
792            conn.execute(
793                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (200, 1, 'World', 0)",
794                []
795            ).unwrap();
796            // This one is from_me=1, should be ignored
797            conn.execute(
798                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (300, 1, 'Sent', 1)",
799                []
800            ).unwrap();
801        }
802
803        let result = get_max_rowid(&db_path).await.unwrap();
804        // Should return 200, not 300 (ignores is_from_me=1)
805        assert_eq!(result, 200);
806    }
807
808    #[tokio::test]
809    async fn get_max_rowid_nonexistent_database() {
810        let path = std::path::Path::new("/nonexistent/path/chat.db");
811        let result = get_max_rowid(path).await;
812        assert!(result.is_err());
813    }
814
815    #[tokio::test]
816    async fn fetch_new_messages_empty_database() {
817        let (_dir, db_path) = create_test_db();
818        let result = fetch_new_messages(&db_path, 0).await;
819        assert!(result.is_ok());
820        assert!(result.unwrap().is_empty());
821    }
822
823    #[tokio::test]
824    async fn fetch_new_messages_returns_correct_data() {
825        let (_dir, db_path) = create_test_db();
826
827        // Insert test data
828        {
829            let conn = Connection::open(&db_path).unwrap();
830            conn.execute(
831                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
832                [],
833            )
834            .unwrap();
835            conn.execute(
836                "INSERT INTO handle (ROWID, id) VALUES (2, 'user@example.com')",
837                [],
838            )
839            .unwrap();
840            conn.execute(
841                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (10, 1, 'First message', 0)",
842                []
843            ).unwrap();
844            conn.execute(
845                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (20, 2, 'Second message', 0)",
846                []
847            ).unwrap();
848        }
849
850        let result = fetch_new_messages(&db_path, 0).await.unwrap();
851        assert_eq!(result.len(), 2);
852        assert_eq!(
853            result[0],
854            (10, "+1234567890".to_string(), "First message".to_string())
855        );
856        assert_eq!(
857            result[1],
858            (
859                20,
860                "user@example.com".to_string(),
861                "Second message".to_string()
862            )
863        );
864    }
865
866    #[tokio::test]
867    async fn fetch_new_messages_filters_by_rowid() {
868        let (_dir, db_path) = create_test_db();
869
870        // Insert test data
871        {
872            let conn = Connection::open(&db_path).unwrap();
873            conn.execute(
874                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
875                [],
876            )
877            .unwrap();
878            conn.execute(
879                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (10, 1, 'Old message', 0)",
880                []
881            ).unwrap();
882            conn.execute(
883                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (20, 1, 'New message', 0)",
884                []
885            ).unwrap();
886        }
887
888        // Fetch only messages after ROWID 15
889        let result = fetch_new_messages(&db_path, 15).await.unwrap();
890        assert_eq!(result.len(), 1);
891        assert_eq!(result[0].0, 20);
892        assert_eq!(result[0].2, "New message");
893    }
894
895    #[tokio::test]
896    async fn fetch_new_messages_excludes_sent_messages() {
897        let (_dir, db_path) = create_test_db();
898
899        // Insert test data
900        {
901            let conn = Connection::open(&db_path).unwrap();
902            conn.execute(
903                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
904                [],
905            )
906            .unwrap();
907            conn.execute(
908                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (10, 1, 'Received', 0)",
909                []
910            ).unwrap();
911            conn.execute(
912                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (20, 1, 'Sent by me', 1)",
913                []
914            ).unwrap();
915        }
916
917        let result = fetch_new_messages(&db_path, 0).await.unwrap();
918        assert_eq!(result.len(), 1);
919        assert_eq!(result[0].2, "Received");
920    }
921
922    #[tokio::test]
923    async fn fetch_new_messages_excludes_null_text_and_null_body() {
924        let (_dir, db_path) = create_test_db();
925
926        // Insert test data: one with text, one with neither text nor attributedBody
927        {
928            let conn = Connection::open(&db_path).unwrap();
929            conn.execute(
930                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
931                [],
932            )
933            .unwrap();
934            conn.execute(
935                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (10, 1, 'Has text', 0)",
936                []
937            ).unwrap();
938            conn.execute(
939                "INSERT INTO message (ROWID, handle_id, text, attributedBody, is_from_me) VALUES (20, 1, NULL, NULL, 0)",
940                [],
941            )
942            .unwrap();
943        }
944
945        let result = fetch_new_messages(&db_path, 0).await.unwrap();
946        // Message with NULL text AND NULL attributedBody is excluded
947        assert_eq!(result.len(), 1);
948        assert_eq!(result[0].2, "Has text");
949    }
950
951    #[tokio::test]
952    async fn fetch_new_messages_respects_limit() {
953        let (_dir, db_path) = create_test_db();
954
955        // Insert 25 messages (limit is 20)
956        {
957            let conn = Connection::open(&db_path).unwrap();
958            conn.execute(
959                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
960                [],
961            )
962            .unwrap();
963            for i in 1..=25 {
964                conn.execute(
965                    &format!("INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES ({i}, 1, 'Message {i}', 0)"),
966                    []
967                ).unwrap();
968            }
969        }
970
971        let result = fetch_new_messages(&db_path, 0).await.unwrap();
972        assert_eq!(result.len(), 20); // Limited to 20
973        assert_eq!(result[0].0, 1); // First message
974        assert_eq!(result[19].0, 20); // 20th message
975    }
976
977    #[tokio::test]
978    async fn fetch_new_messages_ordered_by_rowid_asc() {
979        let (_dir, db_path) = create_test_db();
980
981        // Insert messages out of order
982        {
983            let conn = Connection::open(&db_path).unwrap();
984            conn.execute(
985                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
986                [],
987            )
988            .unwrap();
989            conn.execute(
990                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (30, 1, 'Third', 0)",
991                []
992            ).unwrap();
993            conn.execute(
994                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (10, 1, 'First', 0)",
995                []
996            ).unwrap();
997            conn.execute(
998                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (20, 1, 'Second', 0)",
999                []
1000            ).unwrap();
1001        }
1002
1003        let result = fetch_new_messages(&db_path, 0).await.unwrap();
1004        assert_eq!(result.len(), 3);
1005        assert_eq!(result[0].0, 10);
1006        assert_eq!(result[1].0, 20);
1007        assert_eq!(result[2].0, 30);
1008    }
1009
1010    #[tokio::test]
1011    async fn fetch_new_messages_nonexistent_database() {
1012        let path = std::path::Path::new("/nonexistent/path/chat.db");
1013        let result = fetch_new_messages(path, 0).await;
1014        assert!(result.is_err());
1015    }
1016
1017    #[tokio::test]
1018    async fn fetch_new_messages_handles_special_characters() {
1019        let (_dir, db_path) = create_test_db();
1020
1021        // Insert message with special characters (potential SQL injection patterns)
1022        {
1023            let conn = Connection::open(&db_path).unwrap();
1024            conn.execute(
1025                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
1026                [],
1027            )
1028            .unwrap();
1029            conn.execute(
1030                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (10, 1, 'Hello \"world'' OR 1=1; DROP TABLE message;--', 0)",
1031                []
1032            ).unwrap();
1033        }
1034
1035        let result = fetch_new_messages(&db_path, 0).await.unwrap();
1036        assert_eq!(result.len(), 1);
1037        // The special characters should be preserved, not interpreted as SQL
1038        assert!(result[0].2.contains("DROP TABLE"));
1039    }
1040
1041    #[tokio::test]
1042    async fn fetch_new_messages_handles_unicode() {
1043        let (_dir, db_path) = create_test_db();
1044
1045        {
1046            let conn = Connection::open(&db_path).unwrap();
1047            conn.execute(
1048                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
1049                [],
1050            )
1051            .unwrap();
1052            conn.execute(
1053                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (10, 1, 'Hello 🦀 世界 مرحبا', 0)",
1054                []
1055            ).unwrap();
1056        }
1057
1058        let result = fetch_new_messages(&db_path, 0).await.unwrap();
1059        assert_eq!(result.len(), 1);
1060        assert_eq!(result[0].2, "Hello 🦀 世界 مرحبا");
1061    }
1062
1063    #[tokio::test]
1064    async fn fetch_new_messages_filters_empty_text() {
1065        let (_dir, db_path) = create_test_db();
1066
1067        {
1068            let conn = Connection::open(&db_path).unwrap();
1069            conn.execute(
1070                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
1071                [],
1072            )
1073            .unwrap();
1074            conn.execute(
1075                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (10, 1, '', 0)",
1076                [],
1077            )
1078            .unwrap();
1079        }
1080
1081        let result = fetch_new_messages(&db_path, 0).await.unwrap();
1082        // Empty-content messages are filtered out
1083        assert!(result.is_empty());
1084    }
1085
1086    #[tokio::test]
1087    async fn fetch_new_messages_negative_rowid_edge_case() {
1088        let (_dir, db_path) = create_test_db();
1089
1090        {
1091            let conn = Connection::open(&db_path).unwrap();
1092            conn.execute(
1093                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
1094                [],
1095            )
1096            .unwrap();
1097            conn.execute(
1098                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (10, 1, 'Test', 0)",
1099                []
1100            ).unwrap();
1101        }
1102
1103        // Negative rowid should still work (fetch all messages with ROWID > -1)
1104        let result = fetch_new_messages(&db_path, -1).await.unwrap();
1105        assert_eq!(result.len(), 1);
1106    }
1107
1108    #[tokio::test]
1109    async fn fetch_new_messages_large_rowid_edge_case() {
1110        let (_dir, db_path) = create_test_db();
1111
1112        {
1113            let conn = Connection::open(&db_path).unwrap();
1114            conn.execute(
1115                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
1116                [],
1117            )
1118            .unwrap();
1119            conn.execute(
1120                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (10, 1, 'Test', 0)",
1121                []
1122            ).unwrap();
1123        }
1124
1125        // Very large rowid should return empty (no messages after this)
1126        let result = fetch_new_messages(&db_path, i64::MAX - 1).await.unwrap();
1127        assert!(result.is_empty());
1128    }
1129
1130    // ══════════════════════════════════════════════════════════
1131    // attributedBody / typedstream parsing tests
1132    // ══════════════════════════════════════════════════════════
1133
1134    /// Build a minimal typedstream blob containing the given text.
1135    /// Format: [header] [class bytes] [0x01, 0x2B] [length-prefix] [utf8] [0x86, 0x84]
1136    fn make_attributed_body(text: &str) -> Vec<u8> {
1137        let text_bytes = text.as_bytes();
1138        let mut blob = Vec::new();
1139        // Fake streamtyped header (not parsed by our extractor)
1140        blob.extend_from_slice(b"\x04\x0bstreamtyped\x81\xe8\x03");
1141        // Class hierarchy bytes (skipped by marker scan)
1142        blob.extend_from_slice(b"\x84\x84NSMutableAttributedString\x00");
1143        // Start-of-text marker
1144        blob.push(0x01);
1145        blob.push(0x2B);
1146        // Length prefix (try_from panics on violation — correct for test helper)
1147        let len = text_bytes.len();
1148        if len <= 0x7F {
1149            blob.push(u8::try_from(len).unwrap());
1150        } else if len <= 0xFFFF {
1151            blob.push(0x81);
1152            blob.extend_from_slice(&u16::try_from(len).unwrap().to_le_bytes());
1153        } else {
1154            blob.push(0x82);
1155            blob.extend_from_slice(&u32::try_from(len).unwrap().to_le_bytes());
1156        }
1157        // Text content
1158        blob.extend_from_slice(text_bytes);
1159        // End-of-text marker
1160        blob.push(0x86);
1161        blob.push(0x84);
1162        // Trailing attribute bytes (ignored)
1163        blob.extend_from_slice(b"\x86\x86");
1164        blob
1165    }
1166
1167    // Real attributedBody blob from macOS chat.db, captured during testing.
1168    // Decodes to: "Testing with imsg installed"
1169    const REAL_BLOB_TESTING: &[u8] = &[
1170        0x04, 0x0B, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6D, 0x74, 0x79, 0x70, 0x65, 0x64, 0x81, 0xE8,
1171        0x03, 0x84, 0x01, 0x40, 0x84, 0x84, 0x84, 0x12, 0x4E, 0x53, 0x41, 0x74, 0x74, 0x72, 0x69,
1172        0x62, 0x75, 0x74, 0x65, 0x64, 0x53, 0x74, 0x72, 0x69, 0x6E, 0x67, 0x00, 0x84, 0x84, 0x08,
1173        0x4E, 0x53, 0x4F, 0x62, 0x6A, 0x65, 0x63, 0x74, 0x00, 0x85, 0x92, 0x84, 0x84, 0x84, 0x08,
1174        0x4E, 0x53, 0x53, 0x74, 0x72, 0x69, 0x6E, 0x67, 0x01, 0x94, 0x84, 0x01, 0x2B, 0x1B, 0x54,
1175        0x65, 0x73, 0x74, 0x69, 0x6E, 0x67, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x69, 0x6D, 0x73,
1176        0x67, 0x20, 0x69, 0x6E, 0x73, 0x74, 0x61, 0x6C, 0x6C, 0x65, 0x64, 0x86, 0x84, 0x02, 0x69,
1177        0x49, 0x01, 0x1B, 0x92, 0x84, 0x84, 0x84, 0x0C, 0x4E, 0x53, 0x44, 0x69, 0x63, 0x74, 0x69,
1178        0x6F, 0x6E, 0x61, 0x72, 0x79, 0x00, 0x94, 0x84, 0x01, 0x69, 0x01, 0x92, 0x84, 0x96, 0x96,
1179        0x1D, 0x5F, 0x5F, 0x6B, 0x49, 0x4D, 0x4D, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x50, 0x61,
1180        0x72, 0x74, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x4E, 0x61, 0x6D, 0x65,
1181        0x86, 0x92, 0x84, 0x84, 0x84, 0x08, 0x4E, 0x53, 0x4E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0x00,
1182        0x84, 0x84, 0x07, 0x4E, 0x53, 0x56, 0x61, 0x6C, 0x75, 0x65, 0x00, 0x94, 0x84, 0x01, 0x2A,
1183        0x84, 0x99, 0x99, 0x00, 0x86, 0x86, 0x86,
1184    ];
1185
1186    // Real attributedBody blob from unknownbreaker/MessageBridge (MIT).
1187    // Decodes to: "1"
1188    const REAL_BLOB_ONE: &[u8] = &[
1189        0x04, 0x0b, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x74, 0x79, 0x70, 0x65, 0x64, 0x81, 0xe8,
1190        0x03, 0x84, 0x01, 0x40, 0x84, 0x84, 0x84, 0x12, 0x4e, 0x53, 0x41, 0x74, 0x74, 0x72, 0x69,
1191        0x62, 0x75, 0x74, 0x65, 0x64, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x00, 0x84, 0x84, 0x08,
1192        0x4e, 0x53, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x00, 0x85, 0x92, 0x84, 0x84, 0x84, 0x08,
1193        0x4e, 0x53, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x01, 0x94, 0x84, 0x01, 0x2b, 0x01, 0x31,
1194        0x86, 0x84, 0x02, 0x69, 0x49, 0x01, 0x01, 0x92, 0x84, 0x84, 0x84, 0x0c, 0x4e, 0x53, 0x44,
1195        0x69, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x72, 0x79, 0x00, 0x94, 0x84, 0x01, 0x69, 0x01,
1196        0x92, 0x84, 0x96, 0x96, 0x1d, 0x5f, 0x5f, 0x6b, 0x49, 0x4d, 0x4d, 0x65, 0x73, 0x73, 0x61,
1197        0x67, 0x65, 0x50, 0x61, 0x72, 0x74, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65,
1198        0x4e, 0x61, 0x6d, 0x65, 0x86, 0x92, 0x84, 0x84, 0x84, 0x08, 0x4e, 0x53, 0x4e, 0x75, 0x6d,
1199        0x62, 0x65, 0x72, 0x00, 0x84, 0x84, 0x07, 0x4e, 0x53, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x00,
1200        0x94, 0x84, 0x01, 0x2a, 0x84, 0x99, 0x99, 0x00, 0x86, 0x86, 0x86,
1201    ];
1202
1203    #[test]
1204    fn extract_real_blob_testing_with_imsg() {
1205        let result = extract_text_from_attributed_body(REAL_BLOB_TESTING);
1206        assert_eq!(result, Some("Testing with imsg installed".to_string()));
1207    }
1208
1209    #[test]
1210    fn extract_real_blob_single_char() {
1211        // From unknownbreaker/MessageBridge (MIT)
1212        let result = extract_text_from_attributed_body(REAL_BLOB_ONE);
1213        assert_eq!(result, Some("1".to_string()));
1214    }
1215
1216    #[test]
1217    fn extract_text_containing_end_marker_bytes() {
1218        // U+2184 LATIN SMALL LETTER REVERSED C encodes to E2 86 84 in UTF-8.
1219        // The old parser scanned for [0x86, 0x84] as end marker and would
1220        // truncate here. The length-based parser must handle this correctly.
1221        let text = "before\u{2184}after";
1222        let blob = make_attributed_body(text);
1223        let result = extract_text_from_attributed_body(&blob);
1224        assert_eq!(result, Some(text.to_string()));
1225    }
1226
1227    #[test]
1228    fn extract_zero_length_returns_empty_string() {
1229        // Marker found with length prefix = 0. Valid typedstream encoding
1230        // for an empty NSString — parser returns Some(""), which
1231        // resolve_message_content() will treat as empty and discard.
1232        let blob = b"\x01\x2B\x00";
1233        let result = extract_text_from_attributed_body(blob);
1234        assert_eq!(result, Some(String::new()));
1235    }
1236
1237    #[test]
1238    fn extract_no_markers_returns_none() {
1239        let blob = b"just some random bytes with no markers";
1240        let result = extract_text_from_attributed_body(blob);
1241        assert!(result.is_none());
1242    }
1243
1244    #[test]
1245    fn extract_invalid_utf8_returns_none() {
1246        let blob = b"\x01\x2B\x04\xFF\xFE\x80\x81";
1247        let result = extract_text_from_attributed_body(blob);
1248        assert!(result.is_none());
1249    }
1250
1251    #[test]
1252    fn extract_truncated_blob_returns_none() {
1253        // Length prefix says 27 bytes but blob is truncated
1254        let blob = b"\x01\x2B\x1B\x54\x65\x73\x74";
1255        let result = extract_text_from_attributed_body(blob);
1256        assert!(result.is_none());
1257    }
1258
1259    #[test]
1260    fn extract_long_text_two_byte_length() {
1261        // >127 bytes triggers 0x81 length prefix
1262        let long_text: String = "A".repeat(200);
1263        let blob = make_attributed_body(&long_text);
1264        let result = extract_text_from_attributed_body(&blob);
1265        assert_eq!(result, Some(long_text));
1266    }
1267
1268    #[test]
1269    fn extract_four_byte_length_prefix() {
1270        // Test the 0x82 branch: 4-byte little-endian u32 length prefix.
1271        // Construct directly — make_attributed_body only emits 0x82 for >64KB.
1272        let text = b"Hello";
1273        let mut blob = Vec::new();
1274        blob.extend_from_slice(b"\x01\x2B"); // start marker
1275        blob.push(0x82); // 4-byte length tag
1276        blob.extend_from_slice(&5_u32.to_le_bytes()); // length = 5
1277        blob.extend_from_slice(text);
1278        let result = extract_text_from_attributed_body(&blob);
1279        assert_eq!(result, Some("Hello".to_string()));
1280    }
1281
1282    #[test]
1283    fn extract_text_boundary_127_to_128() {
1284        // 127 is max single-byte length, 128 is min two-byte length
1285        for len in [127, 128] {
1286            let text: String = "X".repeat(len);
1287            let blob = make_attributed_body(&text);
1288            let result = extract_text_from_attributed_body(&blob);
1289            assert_eq!(result, Some(text), "failed at length {len}");
1290        }
1291    }
1292
1293    #[tokio::test]
1294    async fn fetch_new_messages_reads_attributed_body_fallback() {
1295        let (_dir, db_path) = create_test_db();
1296
1297        {
1298            let conn = Connection::open(&db_path).unwrap();
1299            conn.execute(
1300                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
1301                [],
1302            )
1303            .unwrap();
1304            // Real blob from macOS chat.db — text=NULL, attributedBody present
1305            conn.execute(
1306                "INSERT INTO message (ROWID, handle_id, text, attributedBody, is_from_me) VALUES (10, 1, NULL, ?1, 0)",
1307                [REAL_BLOB_TESTING.to_vec()],
1308            ).unwrap();
1309        }
1310
1311        let result = fetch_new_messages(&db_path, 0).await.unwrap();
1312        assert_eq!(result.len(), 1);
1313        assert_eq!(result[0].2, "Testing with imsg installed");
1314    }
1315
1316    #[tokio::test]
1317    async fn fetch_new_messages_empty_text_falls_back_to_attributed_body() {
1318        let (_dir, db_path) = create_test_db();
1319
1320        {
1321            let conn = Connection::open(&db_path).unwrap();
1322            conn.execute(
1323                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
1324                [],
1325            )
1326            .unwrap();
1327            // text = '' (empty string, not NULL) with valid attributedBody
1328            conn.execute(
1329                "INSERT INTO message (ROWID, handle_id, text, attributedBody, is_from_me) VALUES (10, 1, '', ?1, 0)",
1330                [REAL_BLOB_ONE.to_vec()],
1331            ).unwrap();
1332        }
1333
1334        let result = fetch_new_messages(&db_path, 0).await.unwrap();
1335        assert_eq!(result.len(), 1);
1336        assert_eq!(result[0].2, "1");
1337    }
1338
1339    #[tokio::test]
1340    async fn fetch_new_messages_prefers_text_over_attributed_body() {
1341        let (_dir, db_path) = create_test_db();
1342
1343        {
1344            let conn = Connection::open(&db_path).unwrap();
1345            conn.execute(
1346                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
1347                [],
1348            )
1349            .unwrap();
1350            // Both text and attributedBody present — text column wins
1351            conn.execute(
1352                "INSERT INTO message (ROWID, handle_id, text, attributedBody, is_from_me) VALUES (10, 1, 'Plain text', ?1, 0)",
1353                [REAL_BLOB_ONE.to_vec()],
1354            ).unwrap();
1355        }
1356
1357        let result = fetch_new_messages(&db_path, 0).await.unwrap();
1358        assert_eq!(result.len(), 1);
1359        assert_eq!(result[0].2, "Plain text");
1360    }
1361
1362    #[tokio::test]
1363    async fn fetch_new_messages_mixed_text_and_attributed_body() {
1364        let (_dir, db_path) = create_test_db();
1365
1366        {
1367            let conn = Connection::open(&db_path).unwrap();
1368            conn.execute(
1369                "INSERT INTO handle (ROWID, id) VALUES (1, '+1234567890')",
1370                [],
1371            )
1372            .unwrap();
1373            // Old-style message with text column
1374            conn.execute(
1375                "INSERT INTO message (ROWID, handle_id, text, is_from_me) VALUES (10, 1, 'Legacy message', 0)",
1376                []
1377            ).unwrap();
1378            // Modern message with only attributedBody (real blob)
1379            conn.execute(
1380                "INSERT INTO message (ROWID, handle_id, text, attributedBody, is_from_me) VALUES (20, 1, NULL, ?1, 0)",
1381                [REAL_BLOB_ONE.to_vec()],
1382            ).unwrap();
1383            // Message with neither (should be excluded)
1384            conn.execute(
1385                "INSERT INTO message (ROWID, handle_id, text, attributedBody, is_from_me) VALUES (30, 1, NULL, NULL, 0)",
1386                [],
1387            ).unwrap();
1388        }
1389
1390        let result = fetch_new_messages(&db_path, 0).await.unwrap();
1391        assert_eq!(result.len(), 2);
1392        assert_eq!(result[0].2, "Legacy message");
1393        assert_eq!(result[1].2, "1");
1394    }
1395}