zeroclaw_runtime/tools/
file_read.rs

1use crate::security::SecurityPolicy;
2use async_trait::async_trait;
3use serde_json::json;
4use std::sync::Arc;
5use zeroclaw_api::tool::{Tool, ToolResult};
6
7const MAX_FILE_SIZE_BYTES: u64 = 10 * 1024 * 1024;
8
9/// Read file contents with workspace sandboxing.
10pub struct FileReadTool {
11    security: Arc<SecurityPolicy>,
12}
13
14impl FileReadTool {
15    pub fn new(security: Arc<SecurityPolicy>) -> Self {
16        Self { security }
17    }
18
19    /// Resolve a caller-supplied path to an absolute candidate. Reject
20    /// only path-shape attacks (null byte, `..` traversal); the
21    /// allowlist gate is `SecurityPolicy::is_resolved_path_readable`
22    /// after canonicalize, which already unions `allowed_roots` and
23    /// `allowed_roots_read_only`.
24    fn resolve_candidate(&self, path: &str) -> anyhow::Result<std::path::PathBuf> {
25        if path.contains('\0') {
26            anyhow::bail!("Path not allowed: contains null byte");
27        }
28        if std::path::Path::new(path)
29            .components()
30            .any(|c| matches!(c, std::path::Component::ParentDir))
31        {
32            anyhow::bail!("Path not allowed by security policy: {path}");
33        }
34
35        let p = std::path::Path::new(path);
36        if p.is_absolute() {
37            return Ok(p.to_path_buf());
38        }
39
40        let workspace_dir = &self.security.workspace_dir;
41        if let Ok(workspace_rootless) = workspace_dir.strip_prefix("/")
42            && let Ok(stripped) = p.strip_prefix(workspace_rootless)
43        {
44            return Ok(if stripped.as_os_str().is_empty() {
45                workspace_dir.clone()
46            } else {
47                workspace_dir.join(stripped)
48            });
49        }
50
51        Ok(workspace_dir.join(p))
52    }
53}
54
55#[async_trait]
56impl Tool for FileReadTool {
57    fn name(&self) -> &str {
58        "file_read"
59    }
60
61    fn description(&self) -> &str {
62        "Read file contents with line numbers. Supports partial reading via offset and limit. Extracts text from PDF; other binary files are read with lossy UTF-8 conversion."
63    }
64
65    fn parameters_schema(&self) -> serde_json::Value {
66        json!({
67            "type": "object",
68            "properties": {
69                "path": {
70                    "type": "string",
71                    "description": "Path to the file. Relative paths resolve from workspace root; absolute paths must be within the workspace."
72                },
73                "offset": {
74                    "type": "integer",
75                    "description": "Starting line number (1-based, default: 1)"
76                },
77                "limit": {
78                    "type": "integer",
79                    "description": "Maximum number of lines to return (default: all)"
80                }
81            },
82            "required": ["path"]
83        })
84    }
85
86    async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
87        let path = args.get("path").and_then(|v| v.as_str()).ok_or_else(|| {
88            ::zeroclaw_log::record!(
89                WARN,
90                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
91                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
92                    .with_attrs(::serde_json::json!({"param": "path"})),
93                "tool argument validation failed"
94            );
95
96            anyhow::Error::msg("Missing 'path' parameter")
97        })?;
98
99        // Cross-cutting rate limiting and path-allowlist checks live in the
100        // RateLimitedTool + PathGuardedTool wrappers at registration time
101        // (see zeroclaw-runtime::tools::mod).  Successful reads consume one
102        // budget slot via the outer RateLimitedTool.
103        //
104        // Read-tool exception: post-`PathGuardedTool` resolve/canonicalize
105        // failures (path-traversal that slipped through allowlist, missing
106        // file) also consume one budget slot, charged here, so that callers
107        // cannot probe path existence for free.  The outer wrapper only
108        // records on `success: true`, so calling `record_action()` on these
109        // failure paths charges exactly one slot per attempt — matching the
110        // pre-wrapper semantics where every attempted read cost one slot.
111
112        // Validate and build candidate path using workspace_dir directly.
113        let full_path = match self.resolve_candidate(path) {
114            Ok(p) => p,
115            Err(e) => {
116                let _ = self.security.record_action();
117                return Ok(ToolResult {
118                    success: false,
119                    output: String::new(),
120                    error: Some(e.to_string()),
121                });
122            }
123        };
124
125        // Canonicalize to resolve symlinks, then enforce workspace boundary.
126        let resolved_path = match tokio::fs::canonicalize(&full_path).await {
127            Ok(p) => p,
128            Err(e) => {
129                let _ = self.security.record_action();
130                return Ok(ToolResult {
131                    success: false,
132                    output: String::new(),
133                    error: Some(format!("Failed to resolve file path: {e}")),
134                });
135            }
136        };
137
138        // Read access: workspace + read-write allowlist + read-only allowlist
139        // + universal POSIX device files (/dev/null, etc.).
140        if !self.security.is_resolved_path_readable(&resolved_path) {
141            return Ok(ToolResult {
142                success: false,
143                output: String::new(),
144                error: Some(format!("Path escapes workspace directory: {path}")),
145            });
146        }
147
148        // Check file size AFTER canonicalization to prevent TOCTOU symlink bypass
149        match tokio::fs::metadata(&resolved_path).await {
150            Ok(meta) => {
151                if meta.len() > MAX_FILE_SIZE_BYTES {
152                    return Ok(ToolResult {
153                        success: false,
154                        output: String::new(),
155                        error: Some(format!(
156                            "File too large: {} bytes (limit: {MAX_FILE_SIZE_BYTES} bytes)",
157                            meta.len()
158                        )),
159                    });
160                }
161            }
162            Err(e) => {
163                return Ok(ToolResult {
164                    success: false,
165                    output: String::new(),
166                    error: Some(format!("Failed to read file metadata: {e}")),
167                });
168            }
169        }
170
171        match tokio::fs::read_to_string(&resolved_path).await {
172            Ok(contents) => {
173                let lines: Vec<&str> = contents.lines().collect();
174                let total = lines.len();
175
176                if total == 0 {
177                    return Ok(ToolResult {
178                        success: true,
179                        output: String::new(),
180                        error: None,
181                    });
182                }
183
184                let offset = args
185                    .get("offset")
186                    .and_then(|v| v.as_u64())
187                    .map(|v| {
188                        usize::try_from(v.max(1))
189                            .unwrap_or(usize::MAX)
190                            .saturating_sub(1)
191                    })
192                    .unwrap_or(0);
193                let start = offset.min(total);
194
195                let end = match args.get("limit").and_then(|v| v.as_u64()) {
196                    Some(l) => {
197                        let limit = usize::try_from(l).unwrap_or(usize::MAX);
198                        (start.saturating_add(limit)).min(total)
199                    }
200                    None => total,
201                };
202
203                if start >= end {
204                    return Ok(ToolResult {
205                        success: true,
206                        output: format!("[No lines in range, file has {total} lines]"),
207                        error: None,
208                    });
209                }
210
211                let numbered: String = lines[start..end]
212                    .iter()
213                    .enumerate()
214                    .map(|(i, line)| format!("{}: {}", start + i + 1, line))
215                    .collect::<Vec<_>>()
216                    .join("\n");
217
218                let partial = start > 0 || end < total;
219                let summary = if partial {
220                    format!("\n[Lines {}-{} of {total}]", start + 1, end)
221                } else {
222                    format!("\n[{total} lines total]")
223                };
224
225                Ok(ToolResult {
226                    success: true,
227                    output: format!("{numbered}{summary}"),
228                    error: None,
229                })
230            }
231            Err(_) => {
232                // Not valid UTF-8 — read raw bytes and try to extract text
233                let bytes = tokio::fs::read(&resolved_path).await.map_err(|e| {
234                    ::zeroclaw_log::record!(
235                        WARN,
236                        ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
237                            .with_outcome(::zeroclaw_log::EventOutcome::Failure)
238                            .with_attrs(::serde_json::json!({
239                                "path": resolved_path.display().to_string(),
240                                "error": format!("{}", e),
241                            })),
242                        "file_read: raw byte fallback read failed"
243                    );
244                    anyhow::Error::msg(format!("Failed to read file: {e}"))
245                })?;
246
247                if let Some(text) = try_extract_pdf_text(&bytes) {
248                    return Ok(ToolResult {
249                        success: true,
250                        output: text,
251                        error: None,
252                    });
253                }
254
255                // Lossy fallback — replaces invalid bytes with U+FFFD
256                let lossy = String::from_utf8_lossy(&bytes).into_owned();
257                Ok(ToolResult {
258                    success: true,
259                    output: lossy,
260                    error: None,
261                })
262            }
263        }
264    }
265}
266
267#[cfg(feature = "rag-pdf")]
268fn try_extract_pdf_text(bytes: &[u8]) -> Option<String> {
269    if bytes.len() < 5 || &bytes[..5] != b"%PDF-" {
270        return None;
271    }
272    let text = pdf_extract::extract_text_from_mem(bytes).ok()?;
273    if text.trim().is_empty() {
274        return None;
275    }
276    Some(text)
277}
278
279#[cfg(not(feature = "rag-pdf"))]
280fn try_extract_pdf_text(_bytes: &[u8]) -> Option<String> {
281    None
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287    use crate::security::{AutonomyLevel, SecurityPolicy};
288
289    fn test_tool(workspace: std::path::PathBuf) -> FileReadTool {
290        let security = Arc::new(SecurityPolicy {
291            autonomy: AutonomyLevel::Supervised,
292            workspace_dir: workspace,
293            ..SecurityPolicy::default()
294        });
295        FileReadTool::new(security)
296    }
297
298    fn test_tool_with(
299        workspace: std::path::PathBuf,
300        autonomy: AutonomyLevel,
301        max_actions_per_hour: u32,
302    ) -> FileReadTool {
303        let security = Arc::new(SecurityPolicy {
304            autonomy,
305            workspace_dir: workspace,
306            max_actions_per_hour,
307            ..SecurityPolicy::default()
308        });
309        FileReadTool::new(security)
310    }
311
312    #[test]
313    fn file_read_name() {
314        let tool = test_tool(std::env::temp_dir());
315        assert_eq!(tool.name(), "file_read");
316    }
317
318    #[test]
319    fn file_read_schema_has_path() {
320        let tool = test_tool(std::env::temp_dir());
321        let schema = tool.parameters_schema();
322        assert!(schema["properties"]["path"].is_object());
323        assert!(schema["properties"]["offset"].is_object());
324        assert!(schema["properties"]["limit"].is_object());
325        assert!(
326            schema["required"]
327                .as_array()
328                .unwrap()
329                .contains(&json!("path"))
330        );
331        // offset and limit are optional
332        assert!(
333            !schema["required"]
334                .as_array()
335                .unwrap()
336                .contains(&json!("offset"))
337        );
338    }
339
340    #[tokio::test]
341    async fn file_read_existing_file() {
342        let dir = std::env::temp_dir().join("zeroclaw_test_file_read");
343        let _ = tokio::fs::remove_dir_all(&dir).await;
344        tokio::fs::create_dir_all(&dir).await.unwrap();
345        tokio::fs::write(dir.join("test.txt"), "hello world")
346            .await
347            .unwrap();
348
349        let tool = test_tool(dir.clone());
350        let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
351        assert!(result.success);
352        assert!(result.output.contains("1: hello world"));
353        assert!(result.output.contains("[1 lines total]"));
354        assert!(result.error.is_none());
355
356        let _ = tokio::fs::remove_dir_all(&dir).await;
357    }
358
359    #[tokio::test]
360    async fn file_read_nonexistent_file() {
361        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_missing");
362        let _ = tokio::fs::remove_dir_all(&dir).await;
363        tokio::fs::create_dir_all(&dir).await.unwrap();
364
365        let tool = test_tool(dir.clone());
366        let result = tool.execute(json!({"path": "nope.txt"})).await.unwrap();
367        assert!(!result.success);
368        assert!(result.error.as_ref().unwrap().contains("Failed to resolve"));
369
370        let _ = tokio::fs::remove_dir_all(&dir).await;
371    }
372
373    #[tokio::test]
374    async fn file_read_blocks_path_traversal() {
375        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_traversal");
376        let _ = tokio::fs::remove_dir_all(&dir).await;
377        tokio::fs::create_dir_all(&dir).await.unwrap();
378
379        let tool = test_tool(dir.clone());
380        let result = tool
381            .execute(json!({"path": "../../../etc/passwd"}))
382            .await
383            .unwrap();
384        assert!(!result.success);
385        assert!(result.error.as_ref().unwrap().contains("not allowed"));
386
387        let _ = tokio::fs::remove_dir_all(&dir).await;
388    }
389
390    #[tokio::test]
391    async fn file_read_blocks_absolute_path() {
392        let tool = test_tool(std::env::temp_dir());
393
394        #[cfg(unix)]
395        let target = "/etc/passwd";
396        #[cfg(windows)]
397        let target = {
398            let sysroot = std::env::var("SystemRoot").unwrap_or_else(|_| r"C:\Windows".to_string());
399            std::path::PathBuf::from(sysroot).join(r"System32\drivers\etc\hosts")
400        };
401
402        let result = tool.execute(json!({"path": target})).await.unwrap();
403        assert!(!result.success);
404        assert!(result.error.as_ref().unwrap().contains("escapes workspace"));
405    }
406
407    #[tokio::test]
408    async fn file_read_allows_readonly_mode() {
409        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_readonly");
410        let _ = tokio::fs::remove_dir_all(&dir).await;
411        tokio::fs::create_dir_all(&dir).await.unwrap();
412        tokio::fs::write(dir.join("test.txt"), "readonly ok")
413            .await
414            .unwrap();
415
416        let tool = test_tool_with(dir.clone(), AutonomyLevel::ReadOnly, 20);
417        let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
418
419        assert!(result.success);
420        assert!(result.output.contains("1: readonly ok"));
421
422        let _ = tokio::fs::remove_dir_all(&dir).await;
423    }
424
425    #[tokio::test]
426    async fn file_read_missing_path_param() {
427        let tool = test_tool(std::env::temp_dir());
428        let result = tool.execute(json!({})).await;
429        assert!(result.is_err());
430    }
431
432    #[tokio::test]
433    async fn file_read_empty_file() {
434        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_empty");
435        let _ = tokio::fs::remove_dir_all(&dir).await;
436        tokio::fs::create_dir_all(&dir).await.unwrap();
437        tokio::fs::write(dir.join("empty.txt"), "").await.unwrap();
438
439        let tool = test_tool(dir.clone());
440        let result = tool.execute(json!({"path": "empty.txt"})).await.unwrap();
441        assert!(result.success);
442        assert_eq!(result.output, "");
443
444        let _ = tokio::fs::remove_dir_all(&dir).await;
445    }
446
447    #[tokio::test]
448    async fn file_read_nested_path() {
449        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_nested");
450        let _ = tokio::fs::remove_dir_all(&dir).await;
451        tokio::fs::create_dir_all(dir.join("sub/dir"))
452            .await
453            .unwrap();
454        tokio::fs::write(dir.join("sub/dir/deep.txt"), "deep content")
455            .await
456            .unwrap();
457
458        let tool = test_tool(dir.clone());
459        let result = tool
460            .execute(json!({"path": "sub/dir/deep.txt"}))
461            .await
462            .unwrap();
463        assert!(result.success);
464        assert!(result.output.contains("1: deep content"));
465
466        let _ = tokio::fs::remove_dir_all(&dir).await;
467    }
468
469    #[cfg(unix)]
470    #[tokio::test]
471    async fn file_read_blocks_symlink_escape() {
472        use std::os::unix::fs::symlink;
473
474        let root = std::env::temp_dir().join("zeroclaw_test_file_read_symlink_escape");
475        let workspace = root.join("workspace");
476        let outside = root.join("outside");
477
478        let _ = tokio::fs::remove_dir_all(&root).await;
479        tokio::fs::create_dir_all(&workspace).await.unwrap();
480        tokio::fs::create_dir_all(&outside).await.unwrap();
481
482        tokio::fs::write(outside.join("secret.txt"), "outside workspace")
483            .await
484            .unwrap();
485
486        symlink(outside.join("secret.txt"), workspace.join("escape.txt")).unwrap();
487
488        let tool = test_tool(workspace.clone());
489        let result = tool.execute(json!({"path": "escape.txt"})).await.unwrap();
490
491        assert!(!result.success);
492        assert!(
493            result
494                .error
495                .as_deref()
496                .unwrap_or("")
497                .contains("escapes workspace")
498        );
499
500        let _ = tokio::fs::remove_dir_all(&root).await;
501    }
502
503    #[tokio::test]
504    async fn file_read_blocks_outside_workspace_regardless_of_policy() {
505        let root = std::env::temp_dir().join("zeroclaw_test_file_read_blocks_outside");
506        let workspace = root.join("workspace");
507        let outside = root.join("outside");
508        let outside_file = outside.join("notes.txt");
509
510        let _ = tokio::fs::remove_dir_all(&root).await;
511        tokio::fs::create_dir_all(&workspace).await.unwrap();
512        tokio::fs::create_dir_all(&outside).await.unwrap();
513        tokio::fs::write(&outside_file, "outside").await.unwrap();
514
515        let tool = test_tool(workspace.clone());
516
517        let result = tool
518            .execute(json!({"path": outside_file.to_string_lossy().to_string()}))
519            .await
520            .unwrap();
521
522        assert!(!result.success);
523        assert!(result.error.as_ref().unwrap().contains("escapes workspace"));
524
525        let _ = tokio::fs::remove_dir_all(&root).await;
526    }
527
528    #[tokio::test]
529    async fn file_read_admits_absolute_path_under_read_only_root() {
530        let root =
531            std::env::temp_dir().join("zeroclaw_test_file_read_admits_absolute_path_under_ro_root");
532        let workspace = root.join("workspace");
533        let ro_root = root.join("shared");
534        let ro_file = ro_root.join("notes.txt");
535
536        let _ = tokio::fs::remove_dir_all(&root).await;
537        tokio::fs::create_dir_all(&workspace).await.unwrap();
538        tokio::fs::create_dir_all(&ro_root).await.unwrap();
539        tokio::fs::write(&ro_file, "cross-agent read")
540            .await
541            .unwrap();
542
543        let security = Arc::new(SecurityPolicy {
544            autonomy: AutonomyLevel::Supervised,
545            workspace_dir: workspace,
546            allowed_roots_read_only: vec![ro_root.clone()],
547            ..SecurityPolicy::default()
548        });
549        let tool = FileReadTool::new(security);
550
551        let result = tool
552            .execute(json!({"path": ro_file.to_string_lossy().to_string()}))
553            .await
554            .unwrap();
555
556        assert!(
557            result.success,
558            "absolute path under read-only root must read: {result:?}"
559        );
560        assert!(result.output.contains("cross-agent read"));
561
562        let _ = tokio::fs::remove_dir_all(&root).await;
563    }
564
565    #[tokio::test]
566    async fn file_read_with_offset_and_limit() {
567        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_offset");
568        let _ = tokio::fs::remove_dir_all(&dir).await;
569        tokio::fs::create_dir_all(&dir).await.unwrap();
570        tokio::fs::write(dir.join("lines.txt"), "aaa\nbbb\nccc\nddd\neee")
571            .await
572            .unwrap();
573
574        let tool = test_tool(dir.clone());
575
576        // Read lines 2-3
577        let result = tool
578            .execute(json!({"path": "lines.txt", "offset": 2, "limit": 2}))
579            .await
580            .unwrap();
581        assert!(result.success);
582        assert!(result.output.contains("2: bbb"));
583        assert!(result.output.contains("3: ccc"));
584        assert!(!result.output.contains("1: aaa"));
585        assert!(!result.output.contains("4: ddd"));
586        assert!(result.output.contains("[Lines 2-3 of 5]"));
587
588        // Read from offset 4 to end
589        let result = tool
590            .execute(json!({"path": "lines.txt", "offset": 4}))
591            .await
592            .unwrap();
593        assert!(result.success);
594        assert!(result.output.contains("4: ddd"));
595        assert!(result.output.contains("5: eee"));
596        assert!(result.output.contains("[Lines 4-5 of 5]"));
597
598        // Limit only (first 2 lines)
599        let result = tool
600            .execute(json!({"path": "lines.txt", "limit": 2}))
601            .await
602            .unwrap();
603        assert!(result.success);
604        assert!(result.output.contains("1: aaa"));
605        assert!(result.output.contains("2: bbb"));
606        assert!(!result.output.contains("3: ccc"));
607        assert!(result.output.contains("[Lines 1-2 of 5]"));
608
609        // Full read (no offset/limit) shows all lines
610        let result = tool.execute(json!({"path": "lines.txt"})).await.unwrap();
611        assert!(result.success);
612        assert!(result.output.contains("1: aaa"));
613        assert!(result.output.contains("5: eee"));
614        assert!(result.output.contains("[5 lines total]"));
615
616        let _ = tokio::fs::remove_dir_all(&dir).await;
617    }
618
619    #[tokio::test]
620    async fn file_read_offset_beyond_end() {
621        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_offset_end");
622        let _ = tokio::fs::remove_dir_all(&dir).await;
623        tokio::fs::create_dir_all(&dir).await.unwrap();
624        tokio::fs::write(dir.join("short.txt"), "one\ntwo")
625            .await
626            .unwrap();
627
628        let tool = test_tool(dir.clone());
629        let result = tool
630            .execute(json!({"path": "short.txt", "offset": 100}))
631            .await
632            .unwrap();
633        assert!(result.success);
634        assert!(
635            result
636                .output
637                .contains("[No lines in range, file has 2 lines]")
638        );
639
640        let _ = tokio::fs::remove_dir_all(&dir).await;
641    }
642
643    #[tokio::test]
644    async fn file_read_rejects_oversized_file() {
645        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_large");
646        let _ = tokio::fs::remove_dir_all(&dir).await;
647        tokio::fs::create_dir_all(&dir).await.unwrap();
648
649        // Create a file just over 10 MB
650        let big = vec![b'x'; 10 * 1024 * 1024 + 1];
651        tokio::fs::write(dir.join("huge.bin"), &big).await.unwrap();
652
653        let tool = test_tool(dir.clone());
654        let result = tool.execute(json!({"path": "huge.bin"})).await.unwrap();
655        assert!(!result.success);
656        assert!(result.error.as_ref().unwrap().contains("File too large"));
657
658        let _ = tokio::fs::remove_dir_all(&dir).await;
659    }
660
661    /// PDF files should be readable via pdf-extract text extraction.
662    #[tokio::test]
663    async fn file_read_extracts_pdf_text() {
664        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_pdf");
665        let _ = tokio::fs::remove_dir_all(&dir).await;
666        tokio::fs::create_dir_all(&dir).await.unwrap();
667
668        let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
669            .join("../../tests/fixtures/test_document.pdf");
670        tokio::fs::copy(&fixture, dir.join("report.pdf"))
671            .await
672            .expect("copy PDF fixture");
673
674        let tool = test_tool(dir.clone());
675        let result = tool.execute(json!({"path": "report.pdf"})).await.unwrap();
676
677        assert!(
678            result.success,
679            "PDF read must succeed, error: {:?}",
680            result.error
681        );
682        assert!(
683            result.output.contains("Hello"),
684            "extracted text must contain 'Hello', got: {}",
685            result.output
686        );
687
688        let _ = tokio::fs::remove_dir_all(&dir).await;
689    }
690
691    /// Non-UTF-8 binary files should be read with lossy conversion.
692    #[tokio::test]
693    async fn file_read_lossy_reads_binary_file() {
694        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_lossy");
695        let _ = tokio::fs::remove_dir_all(&dir).await;
696        tokio::fs::create_dir_all(&dir).await.unwrap();
697
698        // Write bytes that are not valid UTF-8 and not a PDF
699        let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'h', b'i', 0x80];
700        tokio::fs::write(dir.join("data.bin"), &binary_data)
701            .await
702            .unwrap();
703
704        let tool = test_tool(dir.clone());
705        let result = tool.execute(json!({"path": "data.bin"})).await.unwrap();
706
707        assert!(
708            result.success,
709            "lossy read must succeed, error: {:?}",
710            result.error
711        );
712        assert!(
713            result.output.contains('\u{FFFD}'),
714            "lossy output must contain replacement character, got: {:?}",
715            result.output
716        );
717        assert!(
718            result.output.contains("hi"),
719            "lossy output must preserve valid ASCII, got: {:?}",
720            result.output
721        );
722
723        let _ = tokio::fs::remove_dir_all(&dir).await;
724    }
725
726    // ── E2E: full agent pipeline with real FileReadTool + PDF extraction ──
727
728    mod e2e_helpers {
729        use crate::observability::{NoopObserver, Observer};
730        use std::sync::{Arc, Mutex};
731        use zeroclaw_config::schema::MemoryConfig;
732        use zeroclaw_memory::{self, Memory};
733        use zeroclaw_providers::{ChatMessage, ChatRequest, ChatResponse, ModelProvider};
734
735        pub type SharedRequests = Arc<Mutex<Vec<Vec<ChatMessage>>>>;
736
737        pub struct RecordingModelProvider {
738            responses: Mutex<Vec<ChatResponse>>,
739            pub requests: SharedRequests,
740        }
741
742        impl RecordingModelProvider {
743            pub fn new(responses: Vec<ChatResponse>) -> (Self, SharedRequests) {
744                let requests: SharedRequests = Arc::new(Mutex::new(Vec::new()));
745                let model_provider = Self {
746                    responses: Mutex::new(responses),
747                    requests: requests.clone(),
748                };
749                (model_provider, requests)
750            }
751        }
752
753        #[async_trait::async_trait]
754        impl ModelProvider for RecordingModelProvider {
755            async fn chat_with_system(
756                &self,
757                _system_prompt: Option<&str>,
758                _message: &str,
759                _model: &str,
760                _temperature: Option<f64>,
761            ) -> anyhow::Result<String> {
762                Ok("fallback".into())
763            }
764
765            async fn chat(
766                &self,
767                request: ChatRequest<'_>,
768                _model: &str,
769                _temperature: Option<f64>,
770            ) -> anyhow::Result<ChatResponse> {
771                self.requests
772                    .lock()
773                    .unwrap()
774                    .push(request.messages.to_vec());
775
776                let mut guard = self.responses.lock().unwrap();
777                if guard.is_empty() {
778                    return Ok(ChatResponse {
779                        text: Some("done".into()),
780                        tool_calls: vec![],
781                        usage: None,
782                        reasoning_content: None,
783                    });
784                }
785                Ok(guard.remove(0))
786            }
787        }
788        impl ::zeroclaw_api::attribution::Attributable for RecordingModelProvider {
789            fn role(&self) -> ::zeroclaw_api::attribution::Role {
790                ::zeroclaw_api::attribution::Role::Provider(
791                    ::zeroclaw_api::attribution::ProviderKind::Model(
792                        ::zeroclaw_api::attribution::ModelProviderKind::Custom,
793                    ),
794                )
795            }
796            fn alias(&self) -> &str {
797                "RecordingModelProvider"
798            }
799        }
800
801        pub fn make_memory() -> Arc<dyn Memory> {
802            let cfg = MemoryConfig {
803                backend: "none".into(),
804                ..MemoryConfig::default()
805            };
806            Arc::from(zeroclaw_memory::create_memory(&cfg, &std::env::temp_dir(), None).unwrap())
807        }
808
809        pub fn make_observer() -> Arc<dyn Observer> {
810            Arc::from(NoopObserver {})
811        }
812    }
813
814    /// End-to-end test: scripted model_provider calls `file_read` on a real PDF
815    /// fixture, the tool extracts text via pdf-extract, and the extracted
816    /// content reaches the model_provider in the tool result message.
817    #[tokio::test]
818    async fn e2e_agent_file_read_pdf_extraction() {
819        use crate::agent::agent::Agent;
820        use crate::agent::dispatcher::NativeToolDispatcher;
821        use e2e_helpers::*;
822        use zeroclaw_providers::{ChatResponse, ModelProvider, ToolCall};
823
824        // ── Set up workspace with PDF fixture ──
825        let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_file_read_pdf");
826        let _ = tokio::fs::remove_dir_all(&workspace).await;
827        tokio::fs::create_dir_all(&workspace).await.unwrap();
828
829        let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
830            .join("../../tests/fixtures/test_document.pdf");
831        tokio::fs::copy(&fixture, workspace.join("report.pdf"))
832            .await
833            .expect("copy PDF fixture");
834
835        // ── Build real FileReadTool ──
836        let security = Arc::new(SecurityPolicy {
837            autonomy: AutonomyLevel::Supervised,
838            workspace_dir: workspace.clone(),
839            ..SecurityPolicy::default()
840        });
841        let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
842
843        // ── Script model_provider: call file_read → then answer ──
844        let (model_provider, recorded) = RecordingModelProvider::new(vec![
845            // Turn 1 response: model_provider asks to read the PDF
846            ChatResponse {
847                text: Some(String::new()),
848                tool_calls: vec![ToolCall {
849                    id: "tc1".into(),
850                    name: "file_read".into(),
851                    arguments: r#"{"path": "report.pdf"}"#.into(),
852                    extra_content: None,
853                }],
854                usage: None,
855                reasoning_content: None,
856            },
857            // Turn 1 continued: model_provider sees tool result and answers
858            ChatResponse {
859                text: Some("The PDF contains a greeting: Hello PDF".into()),
860                tool_calls: vec![],
861                usage: None,
862                reasoning_content: None,
863            },
864        ]);
865
866        let mut agent = Agent::builder()
867            .model_provider(Box::new(model_provider) as Box<dyn ModelProvider>)
868            .tools(vec![file_read_tool])
869            .memory(make_memory())
870            .observer(make_observer())
871            .tool_dispatcher(Box::new(NativeToolDispatcher))
872            .workspace_dir(workspace.clone())
873            .build()
874            .unwrap();
875
876        // ── Execute ──
877        let response = agent
878            .turn("Read report.pdf and tell me what it says")
879            .await
880            .unwrap();
881
882        // ── Verify final response ──
883        assert!(
884            response.contains("Hello PDF"),
885            "agent response must contain PDF content, got: {response}",
886        );
887
888        // ── Verify model_provider received extracted PDF text in tool result ──
889        {
890            let all_requests = recorded.lock().unwrap();
891            assert!(
892                all_requests.len() >= 2,
893                "expected at least 2 model_provider requests (initial + after tool), got {}",
894                all_requests.len(),
895            );
896
897            let second_request = &all_requests[1];
898            let tool_result_msg = second_request
899                .iter()
900                .find(|m| m.role == "tool")
901                .expect("second request must contain a tool result message");
902
903            assert!(
904                tool_result_msg.content.contains("Hello"),
905                "tool result must contain extracted PDF text 'Hello', got: {}",
906                tool_result_msg.content,
907            );
908        }
909
910        let _ = tokio::fs::remove_dir_all(&workspace).await;
911    }
912
913    /// End-to-end test: agent calls `file_read` on a binary file, gets
914    /// lossy UTF-8 output with replacement characters in the tool result.
915    #[tokio::test]
916    async fn e2e_agent_file_read_lossy_binary() {
917        use crate::agent::agent::Agent;
918        use crate::agent::dispatcher::NativeToolDispatcher;
919        use e2e_helpers::*;
920        use zeroclaw_providers::{ChatResponse, ModelProvider, ToolCall};
921
922        // ── Set up workspace with binary file ──
923        let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_file_read_lossy");
924        let _ = tokio::fs::remove_dir_all(&workspace).await;
925        tokio::fs::create_dir_all(&workspace).await.unwrap();
926
927        let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'v', b'a', b'l', b'i', b'd', 0x80];
928        tokio::fs::write(workspace.join("data.bin"), &binary_data)
929            .await
930            .unwrap();
931
932        let security = Arc::new(SecurityPolicy {
933            autonomy: AutonomyLevel::Supervised,
934            workspace_dir: workspace.clone(),
935            ..SecurityPolicy::default()
936        });
937        let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
938
939        let (model_provider, recorded) = RecordingModelProvider::new(vec![
940            ChatResponse {
941                text: Some(String::new()),
942                tool_calls: vec![ToolCall {
943                    id: "tc1".into(),
944                    name: "file_read".into(),
945                    arguments: r#"{"path": "data.bin"}"#.into(),
946                    extra_content: None,
947                }],
948                usage: None,
949                reasoning_content: None,
950            },
951            ChatResponse {
952                text: Some("The file appears to be binary data.".into()),
953                tool_calls: vec![],
954                usage: None,
955                reasoning_content: None,
956            },
957        ]);
958
959        let mut agent = Agent::builder()
960            .model_provider(Box::new(model_provider) as Box<dyn ModelProvider>)
961            .tools(vec![file_read_tool])
962            .memory(make_memory())
963            .observer(make_observer())
964            .tool_dispatcher(Box::new(NativeToolDispatcher))
965            .workspace_dir(workspace.clone())
966            .build()
967            .unwrap();
968
969        let response = agent.turn("Read data.bin").await.unwrap();
970
971        assert!(
972            response.contains("binary"),
973            "agent response must mention binary, got: {response}",
974        );
975
976        // Verify tool result contains lossy output with replacement chars
977        {
978            let all_requests = recorded.lock().unwrap();
979            assert!(
980                all_requests.len() >= 2,
981                "expected at least 2 model_provider requests, got {}",
982                all_requests.len(),
983            );
984
985            let tool_result_msg = all_requests[1]
986                .iter()
987                .find(|m| m.role == "tool")
988                .expect("second request must contain a tool result message");
989
990            assert!(
991                tool_result_msg.content.contains("valid"),
992                "tool result must preserve valid ASCII from binary file, got: {}",
993                tool_result_msg.content,
994            );
995            assert!(
996                tool_result_msg.content.contains('\u{FFFD}'),
997                "tool result must contain replacement character for invalid bytes, got: {}",
998                tool_result_msg.content,
999            );
1000        }
1001
1002        let _ = tokio::fs::remove_dir_all(&workspace).await;
1003    }
1004
1005    /// Live e2e: real OpenAI Codex model_provider + real FileReadTool + PDF fixture.
1006    /// Verifies the model receives extracted PDF text and responds meaningfully.
1007    ///
1008    /// Requires valid OAuth credentials in `~/.zeroclaw/`.
1009    /// Run: `cargo test --lib -- tools::file_read::tests::e2e_live_file_read_pdf --ignored --nocapture`
1010    #[tokio::test]
1011    #[ignore = "requires valid OpenAI Codex OAuth credentials"]
1012    async fn e2e_live_file_read_pdf() {
1013        use crate::agent::agent::Agent;
1014        use crate::agent::dispatcher::XmlToolDispatcher;
1015        use e2e_helpers::*;
1016        use zeroclaw_providers::openai_codex::OpenAiCodexModelProvider;
1017        use zeroclaw_providers::{ModelProvider, ModelProviderRuntimeOptions};
1018
1019        // ── Set up workspace with PDF fixture ──
1020        let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_live_file_read_pdf");
1021        let _ = tokio::fs::remove_dir_all(&workspace).await;
1022        tokio::fs::create_dir_all(&workspace).await.unwrap();
1023
1024        let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
1025            .join("../../tests/fixtures/test_document.pdf");
1026        tokio::fs::copy(&fixture, workspace.join("report.pdf"))
1027            .await
1028            .expect("copy PDF fixture");
1029
1030        // ── Build real FileReadTool ──
1031        let security = Arc::new(SecurityPolicy {
1032            autonomy: AutonomyLevel::Supervised,
1033            workspace_dir: workspace.clone(),
1034            ..SecurityPolicy::default()
1035        });
1036        let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
1037
1038        // ── Real model_provider (OpenAI Codex uses XML tool dispatch) ──
1039        let model_provider =
1040            OpenAiCodexModelProvider::new("test", &ModelProviderRuntimeOptions::default(), None)
1041                .expect("model_provider should initialize");
1042
1043        let mut agent = Agent::builder()
1044            .model_provider(Box::new(model_provider) as Box<dyn ModelProvider>)
1045            .tools(vec![file_read_tool])
1046            .memory(make_memory())
1047            .observer(make_observer())
1048            .tool_dispatcher(Box::new(XmlToolDispatcher))
1049            .workspace_dir(workspace.clone())
1050            .model_name("gpt-5.3-codex".to_string())
1051            .build()
1052            .unwrap();
1053
1054        // ── Execute ──
1055        let response = agent
1056            .turn("Use the file_read tool to read report.pdf, then tell me what text it contains. Be concise.")
1057            .await
1058            .unwrap();
1059
1060        eprintln!("=== Live e2e response ===\n{response}\n=========================");
1061
1062        // ── Verify model saw the actual PDF content ("Hello PDF") ──
1063        let lower = response.to_lowercase();
1064        assert!(
1065            lower.contains("hello"),
1066            "model response must reference extracted PDF text 'Hello PDF', got: {response}",
1067        );
1068
1069        let _ = tokio::fs::remove_dir_all(&workspace).await;
1070    }
1071
1072    #[tokio::test]
1073    async fn file_read_blocks_null_byte_in_path() {
1074        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_null_byte");
1075        let _ = tokio::fs::remove_dir_all(&dir).await;
1076        tokio::fs::create_dir_all(&dir).await.unwrap();
1077
1078        let tool = test_tool(dir.clone());
1079        let result = tool
1080            .execute(json!({"path": "test\0evil.txt"}))
1081            .await
1082            .unwrap();
1083        assert!(!result.success);
1084        assert!(result.error.as_ref().unwrap().contains("not allowed"));
1085
1086        let _ = tokio::fs::remove_dir_all(&dir).await;
1087    }
1088
1089    #[cfg(unix)]
1090    #[tokio::test]
1091    async fn file_read_allows_dev_null() {
1092        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_dev_null");
1093        let _ = tokio::fs::remove_dir_all(&dir).await;
1094        tokio::fs::create_dir_all(&dir).await.unwrap();
1095
1096        let tool = test_tool(dir.clone());
1097        let result = tool.execute(json!({"path": "/dev/null"})).await.unwrap();
1098
1099        assert!(
1100            result.success,
1101            "file_read of /dev/null must succeed, error: {:?}",
1102            result.error
1103        );
1104        assert_eq!(result.output, "", "/dev/null must read as empty");
1105
1106        let _ = tokio::fs::remove_dir_all(&dir).await;
1107    }
1108
1109    #[tokio::test]
1110    async fn file_read_allowed_root_with_workspace_only() {
1111        let root = std::env::temp_dir().join("zeroclaw_test_file_read_allowed_root");
1112        let workspace = root.join("workspace");
1113        let allowed = root.join("allowed_dir");
1114
1115        let _ = tokio::fs::remove_dir_all(&root).await;
1116        tokio::fs::create_dir_all(&workspace).await.unwrap();
1117        tokio::fs::create_dir_all(&allowed).await.unwrap();
1118        tokio::fs::write(allowed.join("data.txt"), "allowed content")
1119            .await
1120            .unwrap();
1121
1122        let security = Arc::new(SecurityPolicy {
1123            autonomy: AutonomyLevel::Supervised,
1124            workspace_dir: workspace.clone(),
1125            workspace_only: true,
1126            allowed_roots: vec![allowed.clone()],
1127            ..SecurityPolicy::default()
1128        });
1129        let tool = FileReadTool::new(security);
1130
1131        // Absolute path under allowed_root should succeed
1132        let abs_path = allowed.join("data.txt").to_string_lossy().to_string();
1133        let result = tool.execute(json!({"path": &abs_path})).await.unwrap();
1134
1135        assert!(
1136            result.success,
1137            "file_read with allowed_root path should succeed, error: {:?}",
1138            result.error
1139        );
1140        assert!(result.output.contains("allowed content"));
1141
1142        // Path outside both workspace and allowed_roots should still fail
1143        let outside = root.join("outside");
1144        tokio::fs::create_dir_all(&outside).await.unwrap();
1145        tokio::fs::write(outside.join("secret.txt"), "secret")
1146            .await
1147            .unwrap();
1148        let outside_path = outside.join("secret.txt").to_string_lossy().to_string();
1149        let result = tool.execute(json!({"path": &outside_path})).await.unwrap();
1150        assert!(!result.success);
1151
1152        let _ = tokio::fs::remove_dir_all(&root).await;
1153    }
1154
1155    /// Anti-probing regression: a caller cannot probe file existence for free.
1156    /// Both `resolve_candidate` failures and `canonicalize` failures must
1157    /// consume one action-budget slot, so repeated probes hit the rate limit.
1158    #[tokio::test]
1159    async fn file_read_nonexistent_consumes_rate_limit_budget() {
1160        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_probe");
1161        let _ = tokio::fs::remove_dir_all(&dir).await;
1162        tokio::fs::create_dir_all(&dir).await.unwrap();
1163
1164        // Allow only 2 actions total.
1165        let tool = test_tool_with(dir.clone(), AutonomyLevel::Supervised, 2);
1166
1167        // Two failing reads each consume one slot via the inner-tool charge.
1168        let r1 = tool.execute(json!({"path": "nope1.txt"})).await.unwrap();
1169        assert!(!r1.success);
1170        assert!(
1171            r1.error
1172                .as_deref()
1173                .unwrap_or("")
1174                .contains("Failed to resolve")
1175        );
1176
1177        let r2 = tool.execute(json!({"path": "nope2.txt"})).await.unwrap();
1178        assert!(!r2.success);
1179        assert!(
1180            r2.error
1181                .as_deref()
1182                .unwrap_or("")
1183                .contains("Failed to resolve")
1184        );
1185
1186        // Third attempt: budget is now exhausted.  The inner tool still
1187        // charges, but `record_action()` returns false; the failure error
1188        // is unchanged from the caller's perspective (probing failed),
1189        // and the budget is observably full (a subsequent allowed read
1190        // would have to wait for the window to reset).
1191        let r3 = tool.execute(json!({"path": "nope3.txt"})).await.unwrap();
1192        assert!(!r3.success);
1193
1194        // Verify the budget is actually full by attempting a real read,
1195        // which must now report rate-limit exhaustion when wrapped, or at
1196        // minimum fail.  Here we use the inner-only tool, so we just
1197        // assert that record_action returns false (budget already at cap).
1198        // The inner tool's own retry would consume nothing more.
1199        assert!(!tool.security.record_action(), "budget must be exhausted");
1200
1201        let _ = tokio::fs::remove_dir_all(&dir).await;
1202    }
1203}
zeroclaw_runtime/tools/file_read.rs

zeroclaw_runtime/tools/
file_read.rs