Skip to main content

zeroclaw_runtime/tools/
file_read.rs

1use crate::security::SecurityPolicy;
2use async_trait::async_trait;
3use serde_json::json;
4use std::sync::Arc;
5use zeroclaw_api::tool::{Tool, ToolResult, with_ephemeral_workspace_warning};
6
7const MAX_FILE_SIZE_BYTES: u64 = 10 * 1024 * 1024;
8
9/// Read file contents with workspace sandboxing.
10pub struct FileReadTool {
11    security: Arc<SecurityPolicy>,
12    /// Whether the workspace is host-persistent. `false` on an ephemeral
13    /// runtime (Docker tmpfs / no volume mount), where reads can return stale
14    /// or empty data that does not reflect the host filesystem. When `false`,
15    /// successful text reads carry a loud ephemeral-workspace warning so the
16    /// agent doesn't trust the contents as host-backed. See issue #4627.
17    persistent_writes: bool,
18}
19
20impl FileReadTool {
21    pub fn new(security: Arc<SecurityPolicy>) -> Self {
22        Self {
23            security,
24            persistent_writes: true,
25        }
26    }
27
28    /// Construct with an explicit persistence flag derived from the active
29    /// runtime adapter's `has_filesystem_access()`. Mirrors
30    /// [`super::FileWriteTool::new_with_persistence`].
31    pub fn new_with_persistence(security: Arc<SecurityPolicy>, persistent_writes: bool) -> Self {
32        Self {
33            security,
34            persistent_writes,
35        }
36    }
37
38    /// Resolve a caller-supplied path to an absolute candidate. Reject
39    /// only path-shape attacks (null byte, `..` traversal); the
40    /// allowlist gate is `SecurityPolicy::is_resolved_path_readable`
41    /// after canonicalize, which already unions `allowed_roots` and
42    /// `allowed_roots_read_only`.
43    fn resolve_candidate(&self, path: &str) -> anyhow::Result<std::path::PathBuf> {
44        if path.contains('\0') {
45            anyhow::bail!("Path not allowed: contains null byte");
46        }
47        if std::path::Path::new(path)
48            .components()
49            .any(|c| matches!(c, std::path::Component::ParentDir))
50        {
51            anyhow::bail!("Path not allowed by security policy: {path}");
52        }
53
54        let p = std::path::Path::new(path);
55        if p.is_absolute() {
56            return Ok(p.to_path_buf());
57        }
58
59        let workspace_dir = &self.security.workspace_dir;
60        if let Ok(workspace_rootless) = workspace_dir.strip_prefix("/")
61            && let Ok(stripped) = p.strip_prefix(workspace_rootless)
62        {
63            return Ok(if stripped.as_os_str().is_empty() {
64                workspace_dir.clone()
65            } else {
66                workspace_dir.join(stripped)
67            });
68        }
69
70        Ok(workspace_dir.join(p))
71    }
72}
73
74#[async_trait]
75impl Tool for FileReadTool {
76    fn name(&self) -> &str {
77        "file_read"
78    }
79
80    fn description(&self) -> &str {
81        "Read file contents with line numbers. Supports partial reading via offset and limit. Extracts text from PDF; other binary files are read with lossy UTF-8 conversion. Set encoding=\"base64\" to return raw bytes base64-encoded (for binary files such as .xlsx/.docx); offset/limit are ignored in that mode."
82    }
83
84    fn parameters_schema(&self) -> serde_json::Value {
85        json!({
86            "type": "object",
87            "properties": {
88                "path": {
89                    "type": "string",
90                    "description": "Path to the file. Relative paths resolve from workspace root; absolute paths must be within the workspace."
91                },
92                "offset": {
93                    "type": "integer",
94                    "description": "Starting line number (1-based, default: 1). Ignored when encoding is 'base64'."
95                },
96                "limit": {
97                    "type": "integer",
98                    "description": "Maximum number of lines to return (default: all). Ignored when encoding is 'base64'."
99                },
100                "encoding": {
101                    "type": "string",
102                    "enum": ["utf8", "base64"],
103                    "description": "Output encoding (default: 'utf8'). Use 'base64' to read binary files as base64-encoded bytes."
104                }
105            },
106            "required": ["path"]
107        })
108    }
109
110    async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
111        // Base64 reads return a verbatim payload the caller decodes, so they
112        // must NOT be annotated — a prepended banner would corrupt decoding.
113        // Text reads on an ephemeral runtime may return stale/empty data, so
114        // they carry the loud warning instead (issue #4627).
115        let is_base64 = args.get("encoding").and_then(|v| v.as_str()) == Some("base64");
116        let mut result = self.read_path(args).await?;
117        if !self.persistent_writes && result.success && !is_base64 {
118            result.output = with_ephemeral_workspace_warning(&result.output);
119        }
120        Ok(result)
121    }
122}
123
124impl FileReadTool {
125    /// Resolve, sandbox-check, and read the requested path. The ephemeral
126    /// workspace warning is applied by the `Tool::execute` wrapper above.
127    async fn read_path(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
128        let path = args.get("path").and_then(|v| v.as_str()).ok_or_else(|| {
129            ::zeroclaw_log::record!(
130                WARN,
131                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
132                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
133                    .with_attrs(::serde_json::json!({"param": "path"})),
134                "tool argument validation failed"
135            );
136
137            anyhow::Error::msg("Missing 'path' parameter")
138        })?;
139
140        // Cross-cutting rate limiting and path-allowlist checks live in the
141        // RateLimitedTool + PathGuardedTool wrappers at registration time
142        // (see zeroclaw-runtime::tools::mod).  Successful reads consume one
143        // budget slot via the outer RateLimitedTool.
144        //
145        // Read-tool exception: post-`PathGuardedTool` resolve/canonicalize
146        // failures (path-traversal that slipped through allowlist, missing
147        // file) also consume one budget slot, charged here, so that callers
148        // cannot probe path existence for free.  The outer wrapper only
149        // records on `success: true`, so calling `record_action()` on these
150        // failure paths charges exactly one slot per attempt — matching the
151        // pre-wrapper semantics where every attempted read cost one slot.
152
153        // Validate and build candidate path using workspace_dir directly.
154        let full_path = match self.resolve_candidate(path) {
155            Ok(p) => p,
156            Err(e) => {
157                let _ = self.security.record_action();
158                return Ok(ToolResult {
159                    success: false,
160                    output: String::new(),
161                    error: Some(e.to_string()),
162                });
163            }
164        };
165
166        // Canonicalize to resolve symlinks, then enforce workspace boundary.
167        let resolved_path = match tokio::fs::canonicalize(&full_path).await {
168            Ok(p) => p,
169            Err(e) => {
170                let _ = self.security.record_action();
171                return Ok(ToolResult {
172                    success: false,
173                    output: String::new(),
174                    error: Some(format!("Failed to resolve file path: {e}")),
175                });
176            }
177        };
178
179        // Read access: workspace + read-write allowlist + read-only allowlist
180        // + universal POSIX device files (/dev/null, etc.).
181        if !self.security.is_resolved_path_readable(&resolved_path) {
182            return Ok(ToolResult {
183                success: false,
184                output: String::new(),
185                error: Some(format!("Path escapes workspace directory: {path}")),
186            });
187        }
188
189        // Check file size AFTER canonicalization to prevent TOCTOU symlink bypass
190        match tokio::fs::metadata(&resolved_path).await {
191            Ok(meta) => {
192                if meta.len() > MAX_FILE_SIZE_BYTES {
193                    return Ok(ToolResult {
194                        success: false,
195                        output: String::new(),
196                        error: Some(format!(
197                            "File too large: {} bytes (limit: {MAX_FILE_SIZE_BYTES} bytes)",
198                            meta.len()
199                        )),
200                    });
201                }
202            }
203            Err(e) => {
204                return Ok(ToolResult {
205                    success: false,
206                    output: String::new(),
207                    error: Some(format!("Failed to read file metadata: {e}")),
208                });
209            }
210        }
211
212        let encoding = args
213            .get("encoding")
214            .and_then(|v| v.as_str())
215            .unwrap_or("utf8");
216
217        if encoding == "base64" {
218            // Binary read: return raw bytes base64-encoded. Line numbering and
219            // offset/limit are text concepts and do not apply here.
220            let bytes = match tokio::fs::read(&resolved_path).await {
221                Ok(b) => b,
222                Err(e) => {
223                    return Ok(ToolResult {
224                        success: false,
225                        output: String::new(),
226                        error: Some(format!("Failed to read file: {e}")),
227                    });
228                }
229            };
230            use base64::Engine;
231            let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes);
232            return Ok(ToolResult {
233                success: true,
234                output: encoded,
235                error: None,
236            });
237        } else if encoding != "utf8" {
238            return Ok(ToolResult {
239                success: false,
240                output: String::new(),
241                error: Some(format!(
242                    "Unsupported encoding '{encoding}' (expected 'utf8' or 'base64')"
243                )),
244            });
245        }
246
247        match tokio::fs::read_to_string(&resolved_path).await {
248            Ok(contents) => {
249                let lines: Vec<&str> = contents.lines().collect();
250                let total = lines.len();
251
252                if total == 0 {
253                    return Ok(ToolResult {
254                        success: true,
255                        output: String::new(),
256                        error: None,
257                    });
258                }
259
260                let offset = args
261                    .get("offset")
262                    .and_then(|v| v.as_u64())
263                    .map(|v| {
264                        usize::try_from(v.max(1))
265                            .unwrap_or(usize::MAX)
266                            .saturating_sub(1)
267                    })
268                    .unwrap_or(0);
269                let start = offset.min(total);
270
271                let end = match args.get("limit").and_then(|v| v.as_u64()) {
272                    Some(l) => {
273                        let limit = usize::try_from(l).unwrap_or(usize::MAX);
274                        (start.saturating_add(limit)).min(total)
275                    }
276                    None => total,
277                };
278
279                if start >= end {
280                    return Ok(ToolResult {
281                        success: true,
282                        output: format!("[No lines in range, file has {total} lines]"),
283                        error: None,
284                    });
285                }
286
287                let numbered: String = lines[start..end]
288                    .iter()
289                    .enumerate()
290                    .map(|(i, line)| format!("{}: {}", start + i + 1, line))
291                    .collect::<Vec<_>>()
292                    .join("\n");
293
294                let partial = start > 0 || end < total;
295                let summary = if partial {
296                    format!("\n[Lines {}-{} of {total}]", start + 1, end)
297                } else {
298                    format!("\n[{total} lines total]")
299                };
300
301                Ok(ToolResult {
302                    success: true,
303                    output: format!("{numbered}{summary}"),
304                    error: None,
305                })
306            }
307            Err(_) => {
308                // Not valid UTF-8 — read raw bytes and try to extract text
309                let bytes = tokio::fs::read(&resolved_path).await.map_err(|e| {
310                    ::zeroclaw_log::record!(
311                        WARN,
312                        ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
313                            .with_outcome(::zeroclaw_log::EventOutcome::Failure)
314                            .with_attrs(::serde_json::json!({
315                                "path": resolved_path.display().to_string(),
316                                "error": format!("{}", e),
317                            })),
318                        "file_read: raw byte fallback read failed"
319                    );
320                    anyhow::Error::msg(format!("Failed to read file: {e}"))
321                })?;
322
323                if let Some(text) = try_extract_pdf_text(&bytes) {
324                    return Ok(ToolResult {
325                        success: true,
326                        output: text,
327                        error: None,
328                    });
329                }
330
331                // Lossy fallback — replaces invalid bytes with U+FFFD
332                let lossy = String::from_utf8_lossy(&bytes).into_owned();
333                Ok(ToolResult {
334                    success: true,
335                    output: lossy,
336                    error: None,
337                })
338            }
339        }
340    }
341}
342
343#[cfg(feature = "rag-pdf")]
344fn try_extract_pdf_text(bytes: &[u8]) -> Option<String> {
345    if bytes.len() < 5 || &bytes[..5] != b"%PDF-" {
346        return None;
347    }
348    let text = pdf_extract::extract_text_from_mem(bytes).ok()?;
349    if text.trim().is_empty() {
350        return None;
351    }
352    Some(text)
353}
354
355#[cfg(not(feature = "rag-pdf"))]
356fn try_extract_pdf_text(_bytes: &[u8]) -> Option<String> {
357    None
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363    use crate::security::{AutonomyLevel, SecurityPolicy};
364
365    fn test_tool(workspace: std::path::PathBuf) -> FileReadTool {
366        let security = Arc::new(SecurityPolicy {
367            autonomy: AutonomyLevel::Supervised,
368            workspace_dir: workspace,
369            ..SecurityPolicy::default()
370        });
371        FileReadTool::new(security)
372    }
373
374    fn test_tool_with(
375        workspace: std::path::PathBuf,
376        autonomy: AutonomyLevel,
377        max_actions_per_hour: u32,
378    ) -> FileReadTool {
379        let security = Arc::new(SecurityPolicy {
380            autonomy,
381            workspace_dir: workspace,
382            max_actions_per_hour,
383            ..SecurityPolicy::default()
384        });
385        FileReadTool::new(security)
386    }
387
388    fn ephemeral_tool(workspace: std::path::PathBuf) -> FileReadTool {
389        let security = Arc::new(SecurityPolicy {
390            autonomy: AutonomyLevel::Supervised,
391            workspace_dir: workspace,
392            ..SecurityPolicy::default()
393        });
394        FileReadTool::new_with_persistence(security, false)
395    }
396
397    #[test]
398    fn file_read_name() {
399        let tool = test_tool(std::env::temp_dir());
400        assert_eq!(tool.name(), "file_read");
401    }
402
403    #[test]
404    fn file_read_schema_has_path() {
405        let tool = test_tool(std::env::temp_dir());
406        let schema = tool.parameters_schema();
407        assert!(schema["properties"]["path"].is_object());
408        assert!(schema["properties"]["offset"].is_object());
409        assert!(schema["properties"]["limit"].is_object());
410        assert!(
411            schema["required"]
412                .as_array()
413                .unwrap()
414                .contains(&json!("path"))
415        );
416        // offset and limit are optional
417        assert!(
418            !schema["required"]
419                .as_array()
420                .unwrap()
421                .contains(&json!("offset"))
422        );
423    }
424
425    #[tokio::test]
426    async fn file_read_existing_file() {
427        let dir = std::env::temp_dir().join("zeroclaw_test_file_read");
428        let _ = tokio::fs::remove_dir_all(&dir).await;
429        tokio::fs::create_dir_all(&dir).await.unwrap();
430        tokio::fs::write(dir.join("test.txt"), "hello world")
431            .await
432            .unwrap();
433
434        let tool = test_tool(dir.clone());
435        let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
436        assert!(result.success);
437        assert!(result.output.contains("1: hello world"));
438        assert!(result.output.contains("[1 lines total]"));
439        assert!(result.error.is_none());
440
441        let _ = tokio::fs::remove_dir_all(&dir).await;
442    }
443
444    #[tokio::test]
445    async fn file_read_nonexistent_file() {
446        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_missing");
447        let _ = tokio::fs::remove_dir_all(&dir).await;
448        tokio::fs::create_dir_all(&dir).await.unwrap();
449
450        let tool = test_tool(dir.clone());
451        let result = tool.execute(json!({"path": "nope.txt"})).await.unwrap();
452        assert!(!result.success);
453        assert!(result.error.as_ref().unwrap().contains("Failed to resolve"));
454
455        let _ = tokio::fs::remove_dir_all(&dir).await;
456    }
457
458    #[tokio::test]
459    async fn file_read_blocks_path_traversal() {
460        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_traversal");
461        let _ = tokio::fs::remove_dir_all(&dir).await;
462        tokio::fs::create_dir_all(&dir).await.unwrap();
463
464        let tool = test_tool(dir.clone());
465        let result = tool
466            .execute(json!({"path": "../../../etc/passwd"}))
467            .await
468            .unwrap();
469        assert!(!result.success);
470        assert!(result.error.as_ref().unwrap().contains("not allowed"));
471
472        let _ = tokio::fs::remove_dir_all(&dir).await;
473    }
474
475    #[tokio::test]
476    async fn file_read_blocks_absolute_path() {
477        let tool = test_tool(std::env::temp_dir());
478
479        #[cfg(unix)]
480        let target = "/etc/passwd";
481        #[cfg(windows)]
482        let target = {
483            let sysroot = std::env::var("SystemRoot").unwrap_or_else(|_| r"C:\Windows".to_string());
484            std::path::PathBuf::from(sysroot).join(r"System32\drivers\etc\hosts")
485        };
486
487        let result = tool.execute(json!({"path": target})).await.unwrap();
488        assert!(!result.success);
489        assert!(result.error.as_ref().unwrap().contains("escapes workspace"));
490    }
491
492    #[tokio::test]
493    async fn file_read_allows_readonly_mode() {
494        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_readonly");
495        let _ = tokio::fs::remove_dir_all(&dir).await;
496        tokio::fs::create_dir_all(&dir).await.unwrap();
497        tokio::fs::write(dir.join("test.txt"), "readonly ok")
498            .await
499            .unwrap();
500
501        let tool = test_tool_with(dir.clone(), AutonomyLevel::ReadOnly, 20);
502        let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
503
504        assert!(result.success);
505        assert!(result.output.contains("1: readonly ok"));
506
507        let _ = tokio::fs::remove_dir_all(&dir).await;
508    }
509
510    #[tokio::test]
511    async fn file_read_missing_path_param() {
512        let tool = test_tool(std::env::temp_dir());
513        let result = tool.execute(json!({})).await;
514        assert!(result.is_err());
515    }
516
517    #[test]
518    fn file_read_schema_has_encoding() {
519        let tool = test_tool(std::env::temp_dir());
520        let schema = tool.parameters_schema();
521        assert!(schema["properties"]["encoding"].is_object());
522    }
523
524    #[tokio::test]
525    async fn file_read_base64_returns_encoded_bytes() {
526        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_base64");
527        let _ = tokio::fs::remove_dir_all(&dir).await;
528        tokio::fs::create_dir_all(&dir).await.unwrap();
529
530        // Non-UTF-8 bytes — proves we return raw bytes, not lossy text.
531        let raw: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'P', b'K', 0x03, 0x04];
532        tokio::fs::write(dir.join("data.bin"), &raw).await.unwrap();
533
534        let tool = test_tool(dir.clone());
535        let result = tool
536            .execute(json!({"path": "data.bin", "encoding": "base64"}))
537            .await
538            .unwrap();
539        assert!(result.success, "error: {:?}", result.error);
540
541        use base64::Engine;
542        let decoded = base64::engine::general_purpose::STANDARD
543            .decode(result.output.trim())
544            .expect("output must be valid base64");
545        assert_eq!(decoded, raw, "base64 read must round-trip exact bytes");
546
547        let _ = tokio::fs::remove_dir_all(&dir).await;
548    }
549
550    // ── Ephemeral-workspace warning (issue #4627) ────────────────
551
552    /// On an ephemeral runtime a successful text read may reflect stale/empty
553    /// data; the output carries a loud warning while preserving the contents.
554    #[tokio::test]
555    async fn file_read_warns_on_ephemeral_workspace() {
556        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_ephemeral");
557        let _ = tokio::fs::remove_dir_all(&dir).await;
558        tokio::fs::create_dir_all(&dir).await.unwrap();
559        tokio::fs::write(dir.join("notes.txt"), "host content?")
560            .await
561            .unwrap();
562
563        let tool = ephemeral_tool(dir.clone());
564        let result = tool.execute(json!({"path": "notes.txt"})).await.unwrap();
565        assert!(result.success);
566        assert!(
567            result.output.contains("EPHEMERAL WORKSPACE"),
568            "ephemeral warning must be present, got: {}",
569            result.output
570        );
571        assert!(result.output.contains("mount_workspace"));
572        assert!(
573            result.output.contains("host content?"),
574            "original read content must be preserved, got: {}",
575            result.output
576        );
577
578        let _ = tokio::fs::remove_dir_all(&dir).await;
579    }
580
581    /// base64 reads return a verbatim payload the caller decodes; prepending a
582    /// banner would corrupt decoding, so base64 reads must stay un-annotated.
583    #[tokio::test]
584    async fn file_read_base64_not_warned_on_ephemeral_workspace() {
585        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_ephemeral_b64");
586        let _ = tokio::fs::remove_dir_all(&dir).await;
587        tokio::fs::create_dir_all(&dir).await.unwrap();
588        let raw: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'P', b'K'];
589        tokio::fs::write(dir.join("data.bin"), &raw).await.unwrap();
590
591        let tool = ephemeral_tool(dir.clone());
592        let result = tool
593            .execute(json!({"path": "data.bin", "encoding": "base64"}))
594            .await
595            .unwrap();
596        assert!(result.success, "error: {:?}", result.error);
597        assert!(
598            !result.output.contains("EPHEMERAL WORKSPACE"),
599            "base64 payload must not be annotated, got: {}",
600            result.output
601        );
602        use base64::Engine;
603        let decoded = base64::engine::general_purpose::STANDARD
604            .decode(result.output.trim())
605            .expect("base64 output must still decode");
606        assert_eq!(decoded, raw, "base64 read must round-trip exact bytes");
607
608        let _ = tokio::fs::remove_dir_all(&dir).await;
609    }
610
611    /// A failed read returns no file data — not data loss — so no banner is
612    /// attached to either field.
613    #[tokio::test]
614    async fn file_read_failure_not_warned_on_ephemeral_workspace() {
615        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_ephemeral_fail");
616        let _ = tokio::fs::remove_dir_all(&dir).await;
617        tokio::fs::create_dir_all(&dir).await.unwrap();
618
619        let tool = ephemeral_tool(dir.clone());
620        let result = tool.execute(json!({"path": "missing.txt"})).await.unwrap();
621        assert!(!result.success);
622        assert!(!result.output.contains("EPHEMERAL WORKSPACE"));
623        assert!(
624            !result
625                .error
626                .as_deref()
627                .unwrap_or("")
628                .contains("EPHEMERAL WORKSPACE")
629        );
630
631        let _ = tokio::fs::remove_dir_all(&dir).await;
632    }
633
634    /// On a persistent runtime (the default) no warning is attached.
635    #[tokio::test]
636    async fn file_read_no_warning_when_persistent() {
637        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_persistent");
638        let _ = tokio::fs::remove_dir_all(&dir).await;
639        tokio::fs::create_dir_all(&dir).await.unwrap();
640        tokio::fs::write(dir.join("notes.txt"), "ok").await.unwrap();
641
642        let tool = test_tool(dir.clone());
643        let result = tool.execute(json!({"path": "notes.txt"})).await.unwrap();
644        assert!(result.success);
645        assert!(
646            !result.output.contains("EPHEMERAL WORKSPACE"),
647            "no ephemeral warning expected on a persistent runtime, got: {}",
648            result.output
649        );
650
651        let _ = tokio::fs::remove_dir_all(&dir).await;
652    }
653
654    #[tokio::test]
655    async fn file_read_unsupported_encoding_errors() {
656        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_bad_encoding");
657        let _ = tokio::fs::remove_dir_all(&dir).await;
658        tokio::fs::create_dir_all(&dir).await.unwrap();
659        tokio::fs::write(dir.join("f.txt"), "hi").await.unwrap();
660
661        let tool = test_tool(dir.clone());
662        let result = tool
663            .execute(json!({"path": "f.txt", "encoding": "hex"}))
664            .await
665            .unwrap();
666        assert!(!result.success);
667        assert!(
668            result
669                .error
670                .as_deref()
671                .unwrap_or("")
672                .contains("Unsupported encoding")
673        );
674
675        let _ = tokio::fs::remove_dir_all(&dir).await;
676    }
677
678    #[tokio::test]
679    async fn file_read_empty_file() {
680        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_empty");
681        let _ = tokio::fs::remove_dir_all(&dir).await;
682        tokio::fs::create_dir_all(&dir).await.unwrap();
683        tokio::fs::write(dir.join("empty.txt"), "").await.unwrap();
684
685        let tool = test_tool(dir.clone());
686        let result = tool.execute(json!({"path": "empty.txt"})).await.unwrap();
687        assert!(result.success);
688        assert_eq!(result.output, "");
689
690        let _ = tokio::fs::remove_dir_all(&dir).await;
691    }
692
693    #[tokio::test]
694    async fn file_read_nested_path() {
695        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_nested");
696        let _ = tokio::fs::remove_dir_all(&dir).await;
697        tokio::fs::create_dir_all(dir.join("sub/dir"))
698            .await
699            .unwrap();
700        tokio::fs::write(dir.join("sub/dir/deep.txt"), "deep content")
701            .await
702            .unwrap();
703
704        let tool = test_tool(dir.clone());
705        let result = tool
706            .execute(json!({"path": "sub/dir/deep.txt"}))
707            .await
708            .unwrap();
709        assert!(result.success);
710        assert!(result.output.contains("1: deep content"));
711
712        let _ = tokio::fs::remove_dir_all(&dir).await;
713    }
714
715    #[cfg(unix)]
716    #[tokio::test]
717    async fn file_read_blocks_symlink_escape() {
718        use std::os::unix::fs::symlink;
719
720        let root = std::env::temp_dir().join("zeroclaw_test_file_read_symlink_escape");
721        let workspace = root.join("workspace");
722        let outside = root.join("outside");
723
724        let _ = tokio::fs::remove_dir_all(&root).await;
725        tokio::fs::create_dir_all(&workspace).await.unwrap();
726        tokio::fs::create_dir_all(&outside).await.unwrap();
727
728        tokio::fs::write(outside.join("secret.txt"), "outside workspace")
729            .await
730            .unwrap();
731
732        symlink(outside.join("secret.txt"), workspace.join("escape.txt")).unwrap();
733
734        let tool = test_tool(workspace.clone());
735        let result = tool.execute(json!({"path": "escape.txt"})).await.unwrap();
736
737        assert!(!result.success);
738        assert!(
739            result
740                .error
741                .as_deref()
742                .unwrap_or("")
743                .contains("escapes workspace")
744        );
745
746        let _ = tokio::fs::remove_dir_all(&root).await;
747    }
748
749    #[tokio::test]
750    async fn file_read_blocks_outside_workspace_regardless_of_policy() {
751        let root = std::env::temp_dir().join("zeroclaw_test_file_read_blocks_outside");
752        let workspace = root.join("workspace");
753        let outside = root.join("outside");
754        let outside_file = outside.join("notes.txt");
755
756        let _ = tokio::fs::remove_dir_all(&root).await;
757        tokio::fs::create_dir_all(&workspace).await.unwrap();
758        tokio::fs::create_dir_all(&outside).await.unwrap();
759        tokio::fs::write(&outside_file, "outside").await.unwrap();
760
761        let tool = test_tool(workspace.clone());
762
763        let result = tool
764            .execute(json!({"path": outside_file.to_string_lossy().to_string()}))
765            .await
766            .unwrap();
767
768        assert!(!result.success);
769        assert!(result.error.as_ref().unwrap().contains("escapes workspace"));
770
771        let _ = tokio::fs::remove_dir_all(&root).await;
772    }
773
774    #[tokio::test]
775    async fn file_read_admits_absolute_path_under_read_only_root() {
776        let root =
777            std::env::temp_dir().join("zeroclaw_test_file_read_admits_absolute_path_under_ro_root");
778        let workspace = root.join("workspace");
779        let ro_root = root.join("shared");
780        let ro_file = ro_root.join("notes.txt");
781
782        let _ = tokio::fs::remove_dir_all(&root).await;
783        tokio::fs::create_dir_all(&workspace).await.unwrap();
784        tokio::fs::create_dir_all(&ro_root).await.unwrap();
785        tokio::fs::write(&ro_file, "cross-agent read")
786            .await
787            .unwrap();
788
789        let security = Arc::new(SecurityPolicy {
790            autonomy: AutonomyLevel::Supervised,
791            workspace_dir: workspace,
792            allowed_roots_read_only: vec![ro_root.clone()],
793            ..SecurityPolicy::default()
794        });
795        let tool = FileReadTool::new(security);
796
797        let result = tool
798            .execute(json!({"path": ro_file.to_string_lossy().to_string()}))
799            .await
800            .unwrap();
801
802        assert!(
803            result.success,
804            "absolute path under read-only root must read: {result:?}"
805        );
806        assert!(result.output.contains("cross-agent read"));
807
808        let _ = tokio::fs::remove_dir_all(&root).await;
809    }
810
811    #[tokio::test]
812    async fn file_read_with_offset_and_limit() {
813        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_offset");
814        let _ = tokio::fs::remove_dir_all(&dir).await;
815        tokio::fs::create_dir_all(&dir).await.unwrap();
816        tokio::fs::write(dir.join("lines.txt"), "aaa\nbbb\nccc\nddd\neee")
817            .await
818            .unwrap();
819
820        let tool = test_tool(dir.clone());
821
822        // Read lines 2-3
823        let result = tool
824            .execute(json!({"path": "lines.txt", "offset": 2, "limit": 2}))
825            .await
826            .unwrap();
827        assert!(result.success);
828        assert!(result.output.contains("2: bbb"));
829        assert!(result.output.contains("3: ccc"));
830        assert!(!result.output.contains("1: aaa"));
831        assert!(!result.output.contains("4: ddd"));
832        assert!(result.output.contains("[Lines 2-3 of 5]"));
833
834        // Read from offset 4 to end
835        let result = tool
836            .execute(json!({"path": "lines.txt", "offset": 4}))
837            .await
838            .unwrap();
839        assert!(result.success);
840        assert!(result.output.contains("4: ddd"));
841        assert!(result.output.contains("5: eee"));
842        assert!(result.output.contains("[Lines 4-5 of 5]"));
843
844        // Limit only (first 2 lines)
845        let result = tool
846            .execute(json!({"path": "lines.txt", "limit": 2}))
847            .await
848            .unwrap();
849        assert!(result.success);
850        assert!(result.output.contains("1: aaa"));
851        assert!(result.output.contains("2: bbb"));
852        assert!(!result.output.contains("3: ccc"));
853        assert!(result.output.contains("[Lines 1-2 of 5]"));
854
855        // Full read (no offset/limit) shows all lines
856        let result = tool.execute(json!({"path": "lines.txt"})).await.unwrap();
857        assert!(result.success);
858        assert!(result.output.contains("1: aaa"));
859        assert!(result.output.contains("5: eee"));
860        assert!(result.output.contains("[5 lines total]"));
861
862        let _ = tokio::fs::remove_dir_all(&dir).await;
863    }
864
865    #[tokio::test]
866    async fn file_read_offset_beyond_end() {
867        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_offset_end");
868        let _ = tokio::fs::remove_dir_all(&dir).await;
869        tokio::fs::create_dir_all(&dir).await.unwrap();
870        tokio::fs::write(dir.join("short.txt"), "one\ntwo")
871            .await
872            .unwrap();
873
874        let tool = test_tool(dir.clone());
875        let result = tool
876            .execute(json!({"path": "short.txt", "offset": 100}))
877            .await
878            .unwrap();
879        assert!(result.success);
880        assert!(
881            result
882                .output
883                .contains("[No lines in range, file has 2 lines]")
884        );
885
886        let _ = tokio::fs::remove_dir_all(&dir).await;
887    }
888
889    #[tokio::test]
890    async fn file_read_rejects_oversized_file() {
891        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_large");
892        let _ = tokio::fs::remove_dir_all(&dir).await;
893        tokio::fs::create_dir_all(&dir).await.unwrap();
894
895        // Create a file just over 10 MB
896        let big = vec![b'x'; 10 * 1024 * 1024 + 1];
897        tokio::fs::write(dir.join("huge.bin"), &big).await.unwrap();
898
899        let tool = test_tool(dir.clone());
900        let result = tool.execute(json!({"path": "huge.bin"})).await.unwrap();
901        assert!(!result.success);
902        assert!(result.error.as_ref().unwrap().contains("File too large"));
903
904        let _ = tokio::fs::remove_dir_all(&dir).await;
905    }
906
907    /// PDF files should be readable via pdf-extract text extraction.
908    #[tokio::test]
909    async fn file_read_extracts_pdf_text() {
910        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_pdf");
911        let _ = tokio::fs::remove_dir_all(&dir).await;
912        tokio::fs::create_dir_all(&dir).await.unwrap();
913
914        let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
915            .join("../../tests/fixtures/test_document.pdf");
916        tokio::fs::copy(&fixture, dir.join("report.pdf"))
917            .await
918            .expect("copy PDF fixture");
919
920        let tool = test_tool(dir.clone());
921        let result = tool.execute(json!({"path": "report.pdf"})).await.unwrap();
922
923        assert!(
924            result.success,
925            "PDF read must succeed, error: {:?}",
926            result.error
927        );
928        assert!(
929            result.output.contains("Hello"),
930            "extracted text must contain 'Hello', got: {}",
931            result.output
932        );
933
934        let _ = tokio::fs::remove_dir_all(&dir).await;
935    }
936
937    /// Non-UTF-8 binary files should be read with lossy conversion.
938    #[tokio::test]
939    async fn file_read_lossy_reads_binary_file() {
940        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_lossy");
941        let _ = tokio::fs::remove_dir_all(&dir).await;
942        tokio::fs::create_dir_all(&dir).await.unwrap();
943
944        // Write bytes that are not valid UTF-8 and not a PDF
945        let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'h', b'i', 0x80];
946        tokio::fs::write(dir.join("data.bin"), &binary_data)
947            .await
948            .unwrap();
949
950        let tool = test_tool(dir.clone());
951        let result = tool.execute(json!({"path": "data.bin"})).await.unwrap();
952
953        assert!(
954            result.success,
955            "lossy read must succeed, error: {:?}",
956            result.error
957        );
958        assert!(
959            result.output.contains('\u{FFFD}'),
960            "lossy output must contain replacement character, got: {:?}",
961            result.output
962        );
963        assert!(
964            result.output.contains("hi"),
965            "lossy output must preserve valid ASCII, got: {:?}",
966            result.output
967        );
968
969        let _ = tokio::fs::remove_dir_all(&dir).await;
970    }
971
972    // ── E2E: full agent pipeline with real FileReadTool + PDF extraction ──
973
974    mod e2e_helpers {
975        use crate::observability::{NoopObserver, Observer};
976        use std::sync::{Arc, Mutex};
977        use zeroclaw_config::schema::MemoryConfig;
978        use zeroclaw_memory::{self, Memory};
979        use zeroclaw_providers::{ChatMessage, ChatRequest, ChatResponse, ModelProvider};
980
981        pub type SharedRequests = Arc<Mutex<Vec<Vec<ChatMessage>>>>;
982
983        pub struct RecordingModelProvider {
984            responses: Mutex<Vec<ChatResponse>>,
985            pub requests: SharedRequests,
986        }
987
988        impl RecordingModelProvider {
989            pub fn new(responses: Vec<ChatResponse>) -> (Self, SharedRequests) {
990                let requests: SharedRequests = Arc::new(Mutex::new(Vec::new()));
991                let model_provider = Self {
992                    responses: Mutex::new(responses),
993                    requests: requests.clone(),
994                };
995                (model_provider, requests)
996            }
997        }
998
999        #[async_trait::async_trait]
1000        impl ModelProvider for RecordingModelProvider {
1001            async fn chat_with_system(
1002                &self,
1003                _system_prompt: Option<&str>,
1004                _message: &str,
1005                _model: &str,
1006                _temperature: Option<f64>,
1007            ) -> anyhow::Result<String> {
1008                Ok("fallback".into())
1009            }
1010
1011            async fn chat(
1012                &self,
1013                request: ChatRequest<'_>,
1014                _model: &str,
1015                _temperature: Option<f64>,
1016            ) -> anyhow::Result<ChatResponse> {
1017                self.requests
1018                    .lock()
1019                    .unwrap()
1020                    .push(request.messages.to_vec());
1021
1022                let mut guard = self.responses.lock().unwrap();
1023                if guard.is_empty() {
1024                    return Ok(ChatResponse {
1025                        text: Some("done".into()),
1026                        tool_calls: vec![],
1027                        usage: None,
1028                        reasoning_content: None,
1029                    });
1030                }
1031                Ok(guard.remove(0))
1032            }
1033        }
1034        impl ::zeroclaw_api::attribution::Attributable for RecordingModelProvider {
1035            fn role(&self) -> ::zeroclaw_api::attribution::Role {
1036                ::zeroclaw_api::attribution::Role::Provider(
1037                    ::zeroclaw_api::attribution::ProviderKind::Model(
1038                        ::zeroclaw_api::attribution::ModelProviderKind::Custom,
1039                    ),
1040                )
1041            }
1042            fn alias(&self) -> &str {
1043                "RecordingModelProvider"
1044            }
1045        }
1046
1047        pub fn make_memory() -> Arc<dyn Memory> {
1048            let cfg = MemoryConfig {
1049                backend: "none".into(),
1050                ..MemoryConfig::default()
1051            };
1052            Arc::from(zeroclaw_memory::create_memory(&cfg, &std::env::temp_dir(), None).unwrap())
1053        }
1054
1055        pub fn make_observer() -> Arc<dyn Observer> {
1056            Arc::from(NoopObserver {})
1057        }
1058    }
1059
1060    /// End-to-end test: scripted model_provider calls `file_read` on a real PDF
1061    /// fixture, the tool extracts text via pdf-extract, and the extracted
1062    /// content reaches the model_provider in the tool result message.
1063    #[tokio::test]
1064    async fn e2e_agent_file_read_pdf_extraction() {
1065        use crate::agent::agent::Agent;
1066        use crate::agent::dispatcher::NativeToolDispatcher;
1067        use e2e_helpers::*;
1068        use zeroclaw_providers::{ChatResponse, ModelProvider, ToolCall};
1069
1070        // ── Set up workspace with PDF fixture ──
1071        let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_file_read_pdf");
1072        let _ = tokio::fs::remove_dir_all(&workspace).await;
1073        tokio::fs::create_dir_all(&workspace).await.unwrap();
1074
1075        let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
1076            .join("../../tests/fixtures/test_document.pdf");
1077        tokio::fs::copy(&fixture, workspace.join("report.pdf"))
1078            .await
1079            .expect("copy PDF fixture");
1080
1081        // ── Build real FileReadTool ──
1082        let security = Arc::new(SecurityPolicy {
1083            autonomy: AutonomyLevel::Supervised,
1084            workspace_dir: workspace.clone(),
1085            ..SecurityPolicy::default()
1086        });
1087        let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
1088
1089        // ── Script model_provider: call file_read → then answer ──
1090        let (model_provider, recorded) = RecordingModelProvider::new(vec![
1091            // Turn 1 response: model_provider asks to read the PDF
1092            ChatResponse {
1093                text: Some(String::new()),
1094                tool_calls: vec![ToolCall {
1095                    id: "tc1".into(),
1096                    name: "file_read".into(),
1097                    arguments: r#"{"path": "report.pdf"}"#.into(),
1098                    extra_content: None,
1099                }],
1100                usage: None,
1101                reasoning_content: None,
1102            },
1103            // Turn 1 continued: model_provider sees tool result and answers
1104            ChatResponse {
1105                text: Some("The PDF contains a greeting: Hello PDF".into()),
1106                tool_calls: vec![],
1107                usage: None,
1108                reasoning_content: None,
1109            },
1110        ]);
1111
1112        let mut agent = Agent::builder()
1113            .model_provider(Box::new(model_provider) as Box<dyn ModelProvider>)
1114            .tools(vec![file_read_tool])
1115            .memory(make_memory())
1116            .observer(make_observer())
1117            .tool_dispatcher(Box::new(NativeToolDispatcher))
1118            .workspace_dir(workspace.clone())
1119            .build()
1120            .unwrap();
1121
1122        // ── Execute ──
1123        let response = agent
1124            .turn("Read report.pdf and tell me what it says")
1125            .await
1126            .unwrap();
1127
1128        // ── Verify final response ──
1129        assert!(
1130            response.contains("Hello PDF"),
1131            "agent response must contain PDF content, got: {response}",
1132        );
1133
1134        // ── Verify model_provider received extracted PDF text in tool result ──
1135        {
1136            let all_requests = recorded.lock().unwrap();
1137            assert!(
1138                all_requests.len() >= 2,
1139                "expected at least 2 model_provider requests (initial + after tool), got {}",
1140                all_requests.len(),
1141            );
1142
1143            let second_request = &all_requests[1];
1144            let tool_result_msg = second_request
1145                .iter()
1146                .find(|m| m.role == "tool")
1147                .expect("second request must contain a tool result message");
1148
1149            assert!(
1150                tool_result_msg.content.contains("Hello"),
1151                "tool result must contain extracted PDF text 'Hello', got: {}",
1152                tool_result_msg.content,
1153            );
1154        }
1155
1156        let _ = tokio::fs::remove_dir_all(&workspace).await;
1157    }
1158
1159    /// End-to-end test: agent calls `file_read` on a binary file, gets
1160    /// lossy UTF-8 output with replacement characters in the tool result.
1161    #[tokio::test]
1162    async fn e2e_agent_file_read_lossy_binary() {
1163        use crate::agent::agent::Agent;
1164        use crate::agent::dispatcher::NativeToolDispatcher;
1165        use e2e_helpers::*;
1166        use zeroclaw_providers::{ChatResponse, ModelProvider, ToolCall};
1167
1168        // ── Set up workspace with binary file ──
1169        let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_file_read_lossy");
1170        let _ = tokio::fs::remove_dir_all(&workspace).await;
1171        tokio::fs::create_dir_all(&workspace).await.unwrap();
1172
1173        let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'v', b'a', b'l', b'i', b'd', 0x80];
1174        tokio::fs::write(workspace.join("data.bin"), &binary_data)
1175            .await
1176            .unwrap();
1177
1178        let security = Arc::new(SecurityPolicy {
1179            autonomy: AutonomyLevel::Supervised,
1180            workspace_dir: workspace.clone(),
1181            ..SecurityPolicy::default()
1182        });
1183        let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
1184
1185        let (model_provider, recorded) = RecordingModelProvider::new(vec![
1186            ChatResponse {
1187                text: Some(String::new()),
1188                tool_calls: vec![ToolCall {
1189                    id: "tc1".into(),
1190                    name: "file_read".into(),
1191                    arguments: r#"{"path": "data.bin"}"#.into(),
1192                    extra_content: None,
1193                }],
1194                usage: None,
1195                reasoning_content: None,
1196            },
1197            ChatResponse {
1198                text: Some("The file appears to be binary data.".into()),
1199                tool_calls: vec![],
1200                usage: None,
1201                reasoning_content: None,
1202            },
1203        ]);
1204
1205        let mut agent = Agent::builder()
1206            .model_provider(Box::new(model_provider) as Box<dyn ModelProvider>)
1207            .tools(vec![file_read_tool])
1208            .memory(make_memory())
1209            .observer(make_observer())
1210            .tool_dispatcher(Box::new(NativeToolDispatcher))
1211            .workspace_dir(workspace.clone())
1212            .build()
1213            .unwrap();
1214
1215        let response = agent.turn("Read data.bin").await.unwrap();
1216
1217        assert!(
1218            response.contains("binary"),
1219            "agent response must mention binary, got: {response}",
1220        );
1221
1222        // Verify tool result contains lossy output with replacement chars
1223        {
1224            let all_requests = recorded.lock().unwrap();
1225            assert!(
1226                all_requests.len() >= 2,
1227                "expected at least 2 model_provider requests, got {}",
1228                all_requests.len(),
1229            );
1230
1231            let tool_result_msg = all_requests[1]
1232                .iter()
1233                .find(|m| m.role == "tool")
1234                .expect("second request must contain a tool result message");
1235
1236            assert!(
1237                tool_result_msg.content.contains("valid"),
1238                "tool result must preserve valid ASCII from binary file, got: {}",
1239                tool_result_msg.content,
1240            );
1241            assert!(
1242                tool_result_msg.content.contains('\u{FFFD}'),
1243                "tool result must contain replacement character for invalid bytes, got: {}",
1244                tool_result_msg.content,
1245            );
1246        }
1247
1248        let _ = tokio::fs::remove_dir_all(&workspace).await;
1249    }
1250
1251    /// Live e2e: real OpenAI Codex model_provider + real FileReadTool + PDF fixture.
1252    /// Verifies the model receives extracted PDF text and responds meaningfully.
1253    ///
1254    /// Requires valid OAuth credentials in `~/.zeroclaw/`.
1255    /// Run: `cargo test --lib -- tools::file_read::tests::e2e_live_file_read_pdf --ignored --nocapture`
1256    #[tokio::test]
1257    #[ignore = "requires valid OpenAI Codex OAuth credentials"]
1258    async fn e2e_live_file_read_pdf() {
1259        use crate::agent::agent::Agent;
1260        use crate::agent::dispatcher::XmlToolDispatcher;
1261        use e2e_helpers::*;
1262        use zeroclaw_providers::openai_codex::OpenAiCodexModelProvider;
1263        use zeroclaw_providers::{ModelProvider, ModelProviderRuntimeOptions};
1264
1265        // ── Set up workspace with PDF fixture ──
1266        let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_live_file_read_pdf");
1267        let _ = tokio::fs::remove_dir_all(&workspace).await;
1268        tokio::fs::create_dir_all(&workspace).await.unwrap();
1269
1270        let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
1271            .join("../../tests/fixtures/test_document.pdf");
1272        tokio::fs::copy(&fixture, workspace.join("report.pdf"))
1273            .await
1274            .expect("copy PDF fixture");
1275
1276        // ── Build real FileReadTool ──
1277        let security = Arc::new(SecurityPolicy {
1278            autonomy: AutonomyLevel::Supervised,
1279            workspace_dir: workspace.clone(),
1280            ..SecurityPolicy::default()
1281        });
1282        let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
1283
1284        // ── Real model_provider (OpenAI Codex uses XML tool dispatch) ──
1285        let model_provider =
1286            OpenAiCodexModelProvider::new("test", &ModelProviderRuntimeOptions::default(), None)
1287                .expect("model_provider should initialize");
1288
1289        let mut agent = Agent::builder()
1290            .model_provider(Box::new(model_provider) as Box<dyn ModelProvider>)
1291            .tools(vec![file_read_tool])
1292            .memory(make_memory())
1293            .observer(make_observer())
1294            .tool_dispatcher(Box::new(XmlToolDispatcher))
1295            .workspace_dir(workspace.clone())
1296            .model_name("gpt-5.3-codex".to_string())
1297            .build()
1298            .unwrap();
1299
1300        // ── Execute ──
1301        let response = agent
1302            .turn("Use the file_read tool to read report.pdf, then tell me what text it contains. Be concise.")
1303            .await
1304            .unwrap();
1305
1306        eprintln!("=== Live e2e response ===\n{response}\n=========================");
1307
1308        // ── Verify model saw the actual PDF content ("Hello PDF") ──
1309        let lower = response.to_lowercase();
1310        assert!(
1311            lower.contains("hello"),
1312            "model response must reference extracted PDF text 'Hello PDF', got: {response}",
1313        );
1314
1315        let _ = tokio::fs::remove_dir_all(&workspace).await;
1316    }
1317
1318    #[tokio::test]
1319    async fn file_read_blocks_null_byte_in_path() {
1320        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_null_byte");
1321        let _ = tokio::fs::remove_dir_all(&dir).await;
1322        tokio::fs::create_dir_all(&dir).await.unwrap();
1323
1324        let tool = test_tool(dir.clone());
1325        let result = tool
1326            .execute(json!({"path": "test\0evil.txt"}))
1327            .await
1328            .unwrap();
1329        assert!(!result.success);
1330        assert!(result.error.as_ref().unwrap().contains("not allowed"));
1331
1332        let _ = tokio::fs::remove_dir_all(&dir).await;
1333    }
1334
1335    #[cfg(unix)]
1336    #[tokio::test]
1337    async fn file_read_allows_dev_null() {
1338        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_dev_null");
1339        let _ = tokio::fs::remove_dir_all(&dir).await;
1340        tokio::fs::create_dir_all(&dir).await.unwrap();
1341
1342        let tool = test_tool(dir.clone());
1343        let result = tool.execute(json!({"path": "/dev/null"})).await.unwrap();
1344
1345        assert!(
1346            result.success,
1347            "file_read of /dev/null must succeed, error: {:?}",
1348            result.error
1349        );
1350        assert_eq!(result.output, "", "/dev/null must read as empty");
1351
1352        let _ = tokio::fs::remove_dir_all(&dir).await;
1353    }
1354
1355    #[tokio::test]
1356    async fn file_read_allowed_root_with_workspace_only() {
1357        let root = std::env::temp_dir().join("zeroclaw_test_file_read_allowed_root");
1358        let workspace = root.join("workspace");
1359        let allowed = root.join("allowed_dir");
1360
1361        let _ = tokio::fs::remove_dir_all(&root).await;
1362        tokio::fs::create_dir_all(&workspace).await.unwrap();
1363        tokio::fs::create_dir_all(&allowed).await.unwrap();
1364        tokio::fs::write(allowed.join("data.txt"), "allowed content")
1365            .await
1366            .unwrap();
1367
1368        let security = Arc::new(SecurityPolicy {
1369            autonomy: AutonomyLevel::Supervised,
1370            workspace_dir: workspace.clone(),
1371            workspace_only: true,
1372            allowed_roots: vec![allowed.clone()],
1373            ..SecurityPolicy::default()
1374        });
1375        let tool = FileReadTool::new(security);
1376
1377        // Absolute path under allowed_root should succeed
1378        let abs_path = allowed.join("data.txt").to_string_lossy().to_string();
1379        let result = tool.execute(json!({"path": &abs_path})).await.unwrap();
1380
1381        assert!(
1382            result.success,
1383            "file_read with allowed_root path should succeed, error: {:?}",
1384            result.error
1385        );
1386        assert!(result.output.contains("allowed content"));
1387
1388        // Path outside both workspace and allowed_roots should still fail
1389        let outside = root.join("outside");
1390        tokio::fs::create_dir_all(&outside).await.unwrap();
1391        tokio::fs::write(outside.join("secret.txt"), "secret")
1392            .await
1393            .unwrap();
1394        let outside_path = outside.join("secret.txt").to_string_lossy().to_string();
1395        let result = tool.execute(json!({"path": &outside_path})).await.unwrap();
1396        assert!(!result.success);
1397
1398        let _ = tokio::fs::remove_dir_all(&root).await;
1399    }
1400
1401    /// Anti-probing regression: a caller cannot probe file existence for free.
1402    /// Both `resolve_candidate` failures and `canonicalize` failures must
1403    /// consume one action-budget slot, so repeated probes hit the rate limit.
1404    #[tokio::test]
1405    async fn file_read_nonexistent_consumes_rate_limit_budget() {
1406        let dir = std::env::temp_dir().join("zeroclaw_test_file_read_probe");
1407        let _ = tokio::fs::remove_dir_all(&dir).await;
1408        tokio::fs::create_dir_all(&dir).await.unwrap();
1409
1410        // Allow only 2 actions total.
1411        let tool = test_tool_with(dir.clone(), AutonomyLevel::Supervised, 2);
1412
1413        // Two failing reads each consume one slot via the inner-tool charge.
1414        let r1 = tool.execute(json!({"path": "nope1.txt"})).await.unwrap();
1415        assert!(!r1.success);
1416        assert!(
1417            r1.error
1418                .as_deref()
1419                .unwrap_or("")
1420                .contains("Failed to resolve")
1421        );
1422
1423        let r2 = tool.execute(json!({"path": "nope2.txt"})).await.unwrap();
1424        assert!(!r2.success);
1425        assert!(
1426            r2.error
1427                .as_deref()
1428                .unwrap_or("")
1429                .contains("Failed to resolve")
1430        );
1431
1432        // Third attempt: budget is now exhausted.  The inner tool still
1433        // charges, but `record_action()` returns false; the failure error
1434        // is unchanged from the caller's perspective (probing failed),
1435        // and the budget is observably full (a subsequent allowed read
1436        // would have to wait for the window to reset).
1437        let r3 = tool.execute(json!({"path": "nope3.txt"})).await.unwrap();
1438        assert!(!r3.success);
1439
1440        // Verify the budget is actually full by attempting a real read,
1441        // which must now report rate-limit exhaustion when wrapped, or at
1442        // minimum fail.  Here we use the inner-only tool, so we just
1443        // assert that record_action returns false (budget already at cap).
1444        // The inner tool's own retry would consume nothing more.
1445        assert!(!tool.security.record_action(), "budget must be exhausted");
1446
1447        let _ = tokio::fs::remove_dir_all(&dir).await;
1448    }
1449}