1use crate::security::SecurityPolicy;
2use async_trait::async_trait;
3use serde_json::json;
4use std::sync::Arc;
5use zeroclaw_api::tool::{Tool, ToolResult, with_ephemeral_workspace_warning};
6
7const MAX_FILE_SIZE_BYTES: u64 = 10 * 1024 * 1024;
8
9pub struct FileReadTool {
11 security: Arc<SecurityPolicy>,
12 persistent_writes: bool,
18}
19
20impl FileReadTool {
21 pub fn new(security: Arc<SecurityPolicy>) -> Self {
22 Self {
23 security,
24 persistent_writes: true,
25 }
26 }
27
28 pub fn new_with_persistence(security: Arc<SecurityPolicy>, persistent_writes: bool) -> Self {
32 Self {
33 security,
34 persistent_writes,
35 }
36 }
37
38 fn resolve_candidate(&self, path: &str) -> anyhow::Result<std::path::PathBuf> {
44 if path.contains('\0') {
45 anyhow::bail!("Path not allowed: contains null byte");
46 }
47 if std::path::Path::new(path)
48 .components()
49 .any(|c| matches!(c, std::path::Component::ParentDir))
50 {
51 anyhow::bail!("Path not allowed by security policy: {path}");
52 }
53
54 let p = std::path::Path::new(path);
55 if p.is_absolute() {
56 return Ok(p.to_path_buf());
57 }
58
59 let workspace_dir = &self.security.workspace_dir;
60 if let Ok(workspace_rootless) = workspace_dir.strip_prefix("/")
61 && let Ok(stripped) = p.strip_prefix(workspace_rootless)
62 {
63 return Ok(if stripped.as_os_str().is_empty() {
64 workspace_dir.clone()
65 } else {
66 workspace_dir.join(stripped)
67 });
68 }
69
70 Ok(workspace_dir.join(p))
71 }
72}
73
74#[async_trait]
75impl Tool for FileReadTool {
76 fn name(&self) -> &str {
77 "file_read"
78 }
79
80 fn description(&self) -> &str {
81 "Read file contents with line numbers. Supports partial reading via offset and limit. Extracts text from PDF; other binary files are read with lossy UTF-8 conversion. Set encoding=\"base64\" to return raw bytes base64-encoded (for binary files such as .xlsx/.docx); offset/limit are ignored in that mode."
82 }
83
84 fn parameters_schema(&self) -> serde_json::Value {
85 json!({
86 "type": "object",
87 "properties": {
88 "path": {
89 "type": "string",
90 "description": "Path to the file. Relative paths resolve from workspace root; absolute paths must be within the workspace."
91 },
92 "offset": {
93 "type": "integer",
94 "description": "Starting line number (1-based, default: 1). Ignored when encoding is 'base64'."
95 },
96 "limit": {
97 "type": "integer",
98 "description": "Maximum number of lines to return (default: all). Ignored when encoding is 'base64'."
99 },
100 "encoding": {
101 "type": "string",
102 "enum": ["utf8", "base64"],
103 "description": "Output encoding (default: 'utf8'). Use 'base64' to read binary files as base64-encoded bytes."
104 }
105 },
106 "required": ["path"]
107 })
108 }
109
110 async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
111 let is_base64 = args.get("encoding").and_then(|v| v.as_str()) == Some("base64");
116 let mut result = self.read_path(args).await?;
117 if !self.persistent_writes && result.success && !is_base64 {
118 result.output = with_ephemeral_workspace_warning(&result.output);
119 }
120 Ok(result)
121 }
122}
123
124impl FileReadTool {
125 async fn read_path(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
128 let path = args.get("path").and_then(|v| v.as_str()).ok_or_else(|| {
129 ::zeroclaw_log::record!(
130 WARN,
131 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
132 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
133 .with_attrs(::serde_json::json!({"param": "path"})),
134 "tool argument validation failed"
135 );
136
137 anyhow::Error::msg("Missing 'path' parameter")
138 })?;
139
140 let full_path = match self.resolve_candidate(path) {
155 Ok(p) => p,
156 Err(e) => {
157 let _ = self.security.record_action();
158 return Ok(ToolResult {
159 success: false,
160 output: String::new(),
161 error: Some(e.to_string()),
162 });
163 }
164 };
165
166 let resolved_path = match tokio::fs::canonicalize(&full_path).await {
168 Ok(p) => p,
169 Err(e) => {
170 let _ = self.security.record_action();
171 return Ok(ToolResult {
172 success: false,
173 output: String::new(),
174 error: Some(format!("Failed to resolve file path: {e}")),
175 });
176 }
177 };
178
179 if !self.security.is_resolved_path_readable(&resolved_path) {
182 return Ok(ToolResult {
183 success: false,
184 output: String::new(),
185 error: Some(format!("Path escapes workspace directory: {path}")),
186 });
187 }
188
189 match tokio::fs::metadata(&resolved_path).await {
191 Ok(meta) => {
192 if meta.len() > MAX_FILE_SIZE_BYTES {
193 return Ok(ToolResult {
194 success: false,
195 output: String::new(),
196 error: Some(format!(
197 "File too large: {} bytes (limit: {MAX_FILE_SIZE_BYTES} bytes)",
198 meta.len()
199 )),
200 });
201 }
202 }
203 Err(e) => {
204 return Ok(ToolResult {
205 success: false,
206 output: String::new(),
207 error: Some(format!("Failed to read file metadata: {e}")),
208 });
209 }
210 }
211
212 let encoding = args
213 .get("encoding")
214 .and_then(|v| v.as_str())
215 .unwrap_or("utf8");
216
217 if encoding == "base64" {
218 let bytes = match tokio::fs::read(&resolved_path).await {
221 Ok(b) => b,
222 Err(e) => {
223 return Ok(ToolResult {
224 success: false,
225 output: String::new(),
226 error: Some(format!("Failed to read file: {e}")),
227 });
228 }
229 };
230 use base64::Engine;
231 let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes);
232 return Ok(ToolResult {
233 success: true,
234 output: encoded,
235 error: None,
236 });
237 } else if encoding != "utf8" {
238 return Ok(ToolResult {
239 success: false,
240 output: String::new(),
241 error: Some(format!(
242 "Unsupported encoding '{encoding}' (expected 'utf8' or 'base64')"
243 )),
244 });
245 }
246
247 match tokio::fs::read_to_string(&resolved_path).await {
248 Ok(contents) => {
249 let lines: Vec<&str> = contents.lines().collect();
250 let total = lines.len();
251
252 if total == 0 {
253 return Ok(ToolResult {
254 success: true,
255 output: String::new(),
256 error: None,
257 });
258 }
259
260 let offset = args
261 .get("offset")
262 .and_then(|v| v.as_u64())
263 .map(|v| {
264 usize::try_from(v.max(1))
265 .unwrap_or(usize::MAX)
266 .saturating_sub(1)
267 })
268 .unwrap_or(0);
269 let start = offset.min(total);
270
271 let end = match args.get("limit").and_then(|v| v.as_u64()) {
272 Some(l) => {
273 let limit = usize::try_from(l).unwrap_or(usize::MAX);
274 (start.saturating_add(limit)).min(total)
275 }
276 None => total,
277 };
278
279 if start >= end {
280 return Ok(ToolResult {
281 success: true,
282 output: format!("[No lines in range, file has {total} lines]"),
283 error: None,
284 });
285 }
286
287 let numbered: String = lines[start..end]
288 .iter()
289 .enumerate()
290 .map(|(i, line)| format!("{}: {}", start + i + 1, line))
291 .collect::<Vec<_>>()
292 .join("\n");
293
294 let partial = start > 0 || end < total;
295 let summary = if partial {
296 format!("\n[Lines {}-{} of {total}]", start + 1, end)
297 } else {
298 format!("\n[{total} lines total]")
299 };
300
301 Ok(ToolResult {
302 success: true,
303 output: format!("{numbered}{summary}"),
304 error: None,
305 })
306 }
307 Err(_) => {
308 let bytes = tokio::fs::read(&resolved_path).await.map_err(|e| {
310 ::zeroclaw_log::record!(
311 WARN,
312 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
313 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
314 .with_attrs(::serde_json::json!({
315 "path": resolved_path.display().to_string(),
316 "error": format!("{}", e),
317 })),
318 "file_read: raw byte fallback read failed"
319 );
320 anyhow::Error::msg(format!("Failed to read file: {e}"))
321 })?;
322
323 if let Some(text) = try_extract_pdf_text(&bytes) {
324 return Ok(ToolResult {
325 success: true,
326 output: text,
327 error: None,
328 });
329 }
330
331 let lossy = String::from_utf8_lossy(&bytes).into_owned();
333 Ok(ToolResult {
334 success: true,
335 output: lossy,
336 error: None,
337 })
338 }
339 }
340 }
341}
342
343#[cfg(feature = "rag-pdf")]
344fn try_extract_pdf_text(bytes: &[u8]) -> Option<String> {
345 if bytes.len() < 5 || &bytes[..5] != b"%PDF-" {
346 return None;
347 }
348 let text = pdf_extract::extract_text_from_mem(bytes).ok()?;
349 if text.trim().is_empty() {
350 return None;
351 }
352 Some(text)
353}
354
355#[cfg(not(feature = "rag-pdf"))]
356fn try_extract_pdf_text(_bytes: &[u8]) -> Option<String> {
357 None
358}
359
360#[cfg(test)]
361mod tests {
362 use super::*;
363 use crate::security::{AutonomyLevel, SecurityPolicy};
364
365 fn test_tool(workspace: std::path::PathBuf) -> FileReadTool {
366 let security = Arc::new(SecurityPolicy {
367 autonomy: AutonomyLevel::Supervised,
368 workspace_dir: workspace,
369 ..SecurityPolicy::default()
370 });
371 FileReadTool::new(security)
372 }
373
374 fn test_tool_with(
375 workspace: std::path::PathBuf,
376 autonomy: AutonomyLevel,
377 max_actions_per_hour: u32,
378 ) -> FileReadTool {
379 let security = Arc::new(SecurityPolicy {
380 autonomy,
381 workspace_dir: workspace,
382 max_actions_per_hour,
383 ..SecurityPolicy::default()
384 });
385 FileReadTool::new(security)
386 }
387
388 fn ephemeral_tool(workspace: std::path::PathBuf) -> FileReadTool {
389 let security = Arc::new(SecurityPolicy {
390 autonomy: AutonomyLevel::Supervised,
391 workspace_dir: workspace,
392 ..SecurityPolicy::default()
393 });
394 FileReadTool::new_with_persistence(security, false)
395 }
396
397 #[test]
398 fn file_read_name() {
399 let tool = test_tool(std::env::temp_dir());
400 assert_eq!(tool.name(), "file_read");
401 }
402
403 #[test]
404 fn file_read_schema_has_path() {
405 let tool = test_tool(std::env::temp_dir());
406 let schema = tool.parameters_schema();
407 assert!(schema["properties"]["path"].is_object());
408 assert!(schema["properties"]["offset"].is_object());
409 assert!(schema["properties"]["limit"].is_object());
410 assert!(
411 schema["required"]
412 .as_array()
413 .unwrap()
414 .contains(&json!("path"))
415 );
416 assert!(
418 !schema["required"]
419 .as_array()
420 .unwrap()
421 .contains(&json!("offset"))
422 );
423 }
424
425 #[tokio::test]
426 async fn file_read_existing_file() {
427 let dir = std::env::temp_dir().join("zeroclaw_test_file_read");
428 let _ = tokio::fs::remove_dir_all(&dir).await;
429 tokio::fs::create_dir_all(&dir).await.unwrap();
430 tokio::fs::write(dir.join("test.txt"), "hello world")
431 .await
432 .unwrap();
433
434 let tool = test_tool(dir.clone());
435 let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
436 assert!(result.success);
437 assert!(result.output.contains("1: hello world"));
438 assert!(result.output.contains("[1 lines total]"));
439 assert!(result.error.is_none());
440
441 let _ = tokio::fs::remove_dir_all(&dir).await;
442 }
443
444 #[tokio::test]
445 async fn file_read_nonexistent_file() {
446 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_missing");
447 let _ = tokio::fs::remove_dir_all(&dir).await;
448 tokio::fs::create_dir_all(&dir).await.unwrap();
449
450 let tool = test_tool(dir.clone());
451 let result = tool.execute(json!({"path": "nope.txt"})).await.unwrap();
452 assert!(!result.success);
453 assert!(result.error.as_ref().unwrap().contains("Failed to resolve"));
454
455 let _ = tokio::fs::remove_dir_all(&dir).await;
456 }
457
458 #[tokio::test]
459 async fn file_read_blocks_path_traversal() {
460 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_traversal");
461 let _ = tokio::fs::remove_dir_all(&dir).await;
462 tokio::fs::create_dir_all(&dir).await.unwrap();
463
464 let tool = test_tool(dir.clone());
465 let result = tool
466 .execute(json!({"path": "../../../etc/passwd"}))
467 .await
468 .unwrap();
469 assert!(!result.success);
470 assert!(result.error.as_ref().unwrap().contains("not allowed"));
471
472 let _ = tokio::fs::remove_dir_all(&dir).await;
473 }
474
475 #[tokio::test]
476 async fn file_read_blocks_absolute_path() {
477 let tool = test_tool(std::env::temp_dir());
478
479 #[cfg(unix)]
480 let target = "/etc/passwd";
481 #[cfg(windows)]
482 let target = {
483 let sysroot = std::env::var("SystemRoot").unwrap_or_else(|_| r"C:\Windows".to_string());
484 std::path::PathBuf::from(sysroot).join(r"System32\drivers\etc\hosts")
485 };
486
487 let result = tool.execute(json!({"path": target})).await.unwrap();
488 assert!(!result.success);
489 assert!(result.error.as_ref().unwrap().contains("escapes workspace"));
490 }
491
492 #[tokio::test]
493 async fn file_read_allows_readonly_mode() {
494 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_readonly");
495 let _ = tokio::fs::remove_dir_all(&dir).await;
496 tokio::fs::create_dir_all(&dir).await.unwrap();
497 tokio::fs::write(dir.join("test.txt"), "readonly ok")
498 .await
499 .unwrap();
500
501 let tool = test_tool_with(dir.clone(), AutonomyLevel::ReadOnly, 20);
502 let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
503
504 assert!(result.success);
505 assert!(result.output.contains("1: readonly ok"));
506
507 let _ = tokio::fs::remove_dir_all(&dir).await;
508 }
509
510 #[tokio::test]
511 async fn file_read_missing_path_param() {
512 let tool = test_tool(std::env::temp_dir());
513 let result = tool.execute(json!({})).await;
514 assert!(result.is_err());
515 }
516
517 #[test]
518 fn file_read_schema_has_encoding() {
519 let tool = test_tool(std::env::temp_dir());
520 let schema = tool.parameters_schema();
521 assert!(schema["properties"]["encoding"].is_object());
522 }
523
524 #[tokio::test]
525 async fn file_read_base64_returns_encoded_bytes() {
526 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_base64");
527 let _ = tokio::fs::remove_dir_all(&dir).await;
528 tokio::fs::create_dir_all(&dir).await.unwrap();
529
530 let raw: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'P', b'K', 0x03, 0x04];
532 tokio::fs::write(dir.join("data.bin"), &raw).await.unwrap();
533
534 let tool = test_tool(dir.clone());
535 let result = tool
536 .execute(json!({"path": "data.bin", "encoding": "base64"}))
537 .await
538 .unwrap();
539 assert!(result.success, "error: {:?}", result.error);
540
541 use base64::Engine;
542 let decoded = base64::engine::general_purpose::STANDARD
543 .decode(result.output.trim())
544 .expect("output must be valid base64");
545 assert_eq!(decoded, raw, "base64 read must round-trip exact bytes");
546
547 let _ = tokio::fs::remove_dir_all(&dir).await;
548 }
549
550 #[tokio::test]
555 async fn file_read_warns_on_ephemeral_workspace() {
556 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_ephemeral");
557 let _ = tokio::fs::remove_dir_all(&dir).await;
558 tokio::fs::create_dir_all(&dir).await.unwrap();
559 tokio::fs::write(dir.join("notes.txt"), "host content?")
560 .await
561 .unwrap();
562
563 let tool = ephemeral_tool(dir.clone());
564 let result = tool.execute(json!({"path": "notes.txt"})).await.unwrap();
565 assert!(result.success);
566 assert!(
567 result.output.contains("EPHEMERAL WORKSPACE"),
568 "ephemeral warning must be present, got: {}",
569 result.output
570 );
571 assert!(result.output.contains("mount_workspace"));
572 assert!(
573 result.output.contains("host content?"),
574 "original read content must be preserved, got: {}",
575 result.output
576 );
577
578 let _ = tokio::fs::remove_dir_all(&dir).await;
579 }
580
581 #[tokio::test]
584 async fn file_read_base64_not_warned_on_ephemeral_workspace() {
585 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_ephemeral_b64");
586 let _ = tokio::fs::remove_dir_all(&dir).await;
587 tokio::fs::create_dir_all(&dir).await.unwrap();
588 let raw: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'P', b'K'];
589 tokio::fs::write(dir.join("data.bin"), &raw).await.unwrap();
590
591 let tool = ephemeral_tool(dir.clone());
592 let result = tool
593 .execute(json!({"path": "data.bin", "encoding": "base64"}))
594 .await
595 .unwrap();
596 assert!(result.success, "error: {:?}", result.error);
597 assert!(
598 !result.output.contains("EPHEMERAL WORKSPACE"),
599 "base64 payload must not be annotated, got: {}",
600 result.output
601 );
602 use base64::Engine;
603 let decoded = base64::engine::general_purpose::STANDARD
604 .decode(result.output.trim())
605 .expect("base64 output must still decode");
606 assert_eq!(decoded, raw, "base64 read must round-trip exact bytes");
607
608 let _ = tokio::fs::remove_dir_all(&dir).await;
609 }
610
611 #[tokio::test]
614 async fn file_read_failure_not_warned_on_ephemeral_workspace() {
615 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_ephemeral_fail");
616 let _ = tokio::fs::remove_dir_all(&dir).await;
617 tokio::fs::create_dir_all(&dir).await.unwrap();
618
619 let tool = ephemeral_tool(dir.clone());
620 let result = tool.execute(json!({"path": "missing.txt"})).await.unwrap();
621 assert!(!result.success);
622 assert!(!result.output.contains("EPHEMERAL WORKSPACE"));
623 assert!(
624 !result
625 .error
626 .as_deref()
627 .unwrap_or("")
628 .contains("EPHEMERAL WORKSPACE")
629 );
630
631 let _ = tokio::fs::remove_dir_all(&dir).await;
632 }
633
634 #[tokio::test]
636 async fn file_read_no_warning_when_persistent() {
637 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_persistent");
638 let _ = tokio::fs::remove_dir_all(&dir).await;
639 tokio::fs::create_dir_all(&dir).await.unwrap();
640 tokio::fs::write(dir.join("notes.txt"), "ok").await.unwrap();
641
642 let tool = test_tool(dir.clone());
643 let result = tool.execute(json!({"path": "notes.txt"})).await.unwrap();
644 assert!(result.success);
645 assert!(
646 !result.output.contains("EPHEMERAL WORKSPACE"),
647 "no ephemeral warning expected on a persistent runtime, got: {}",
648 result.output
649 );
650
651 let _ = tokio::fs::remove_dir_all(&dir).await;
652 }
653
654 #[tokio::test]
655 async fn file_read_unsupported_encoding_errors() {
656 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_bad_encoding");
657 let _ = tokio::fs::remove_dir_all(&dir).await;
658 tokio::fs::create_dir_all(&dir).await.unwrap();
659 tokio::fs::write(dir.join("f.txt"), "hi").await.unwrap();
660
661 let tool = test_tool(dir.clone());
662 let result = tool
663 .execute(json!({"path": "f.txt", "encoding": "hex"}))
664 .await
665 .unwrap();
666 assert!(!result.success);
667 assert!(
668 result
669 .error
670 .as_deref()
671 .unwrap_or("")
672 .contains("Unsupported encoding")
673 );
674
675 let _ = tokio::fs::remove_dir_all(&dir).await;
676 }
677
678 #[tokio::test]
679 async fn file_read_empty_file() {
680 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_empty");
681 let _ = tokio::fs::remove_dir_all(&dir).await;
682 tokio::fs::create_dir_all(&dir).await.unwrap();
683 tokio::fs::write(dir.join("empty.txt"), "").await.unwrap();
684
685 let tool = test_tool(dir.clone());
686 let result = tool.execute(json!({"path": "empty.txt"})).await.unwrap();
687 assert!(result.success);
688 assert_eq!(result.output, "");
689
690 let _ = tokio::fs::remove_dir_all(&dir).await;
691 }
692
693 #[tokio::test]
694 async fn file_read_nested_path() {
695 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_nested");
696 let _ = tokio::fs::remove_dir_all(&dir).await;
697 tokio::fs::create_dir_all(dir.join("sub/dir"))
698 .await
699 .unwrap();
700 tokio::fs::write(dir.join("sub/dir/deep.txt"), "deep content")
701 .await
702 .unwrap();
703
704 let tool = test_tool(dir.clone());
705 let result = tool
706 .execute(json!({"path": "sub/dir/deep.txt"}))
707 .await
708 .unwrap();
709 assert!(result.success);
710 assert!(result.output.contains("1: deep content"));
711
712 let _ = tokio::fs::remove_dir_all(&dir).await;
713 }
714
715 #[cfg(unix)]
716 #[tokio::test]
717 async fn file_read_blocks_symlink_escape() {
718 use std::os::unix::fs::symlink;
719
720 let root = std::env::temp_dir().join("zeroclaw_test_file_read_symlink_escape");
721 let workspace = root.join("workspace");
722 let outside = root.join("outside");
723
724 let _ = tokio::fs::remove_dir_all(&root).await;
725 tokio::fs::create_dir_all(&workspace).await.unwrap();
726 tokio::fs::create_dir_all(&outside).await.unwrap();
727
728 tokio::fs::write(outside.join("secret.txt"), "outside workspace")
729 .await
730 .unwrap();
731
732 symlink(outside.join("secret.txt"), workspace.join("escape.txt")).unwrap();
733
734 let tool = test_tool(workspace.clone());
735 let result = tool.execute(json!({"path": "escape.txt"})).await.unwrap();
736
737 assert!(!result.success);
738 assert!(
739 result
740 .error
741 .as_deref()
742 .unwrap_or("")
743 .contains("escapes workspace")
744 );
745
746 let _ = tokio::fs::remove_dir_all(&root).await;
747 }
748
749 #[tokio::test]
750 async fn file_read_blocks_outside_workspace_regardless_of_policy() {
751 let root = std::env::temp_dir().join("zeroclaw_test_file_read_blocks_outside");
752 let workspace = root.join("workspace");
753 let outside = root.join("outside");
754 let outside_file = outside.join("notes.txt");
755
756 let _ = tokio::fs::remove_dir_all(&root).await;
757 tokio::fs::create_dir_all(&workspace).await.unwrap();
758 tokio::fs::create_dir_all(&outside).await.unwrap();
759 tokio::fs::write(&outside_file, "outside").await.unwrap();
760
761 let tool = test_tool(workspace.clone());
762
763 let result = tool
764 .execute(json!({"path": outside_file.to_string_lossy().to_string()}))
765 .await
766 .unwrap();
767
768 assert!(!result.success);
769 assert!(result.error.as_ref().unwrap().contains("escapes workspace"));
770
771 let _ = tokio::fs::remove_dir_all(&root).await;
772 }
773
774 #[tokio::test]
775 async fn file_read_admits_absolute_path_under_read_only_root() {
776 let root =
777 std::env::temp_dir().join("zeroclaw_test_file_read_admits_absolute_path_under_ro_root");
778 let workspace = root.join("workspace");
779 let ro_root = root.join("shared");
780 let ro_file = ro_root.join("notes.txt");
781
782 let _ = tokio::fs::remove_dir_all(&root).await;
783 tokio::fs::create_dir_all(&workspace).await.unwrap();
784 tokio::fs::create_dir_all(&ro_root).await.unwrap();
785 tokio::fs::write(&ro_file, "cross-agent read")
786 .await
787 .unwrap();
788
789 let security = Arc::new(SecurityPolicy {
790 autonomy: AutonomyLevel::Supervised,
791 workspace_dir: workspace,
792 allowed_roots_read_only: vec![ro_root.clone()],
793 ..SecurityPolicy::default()
794 });
795 let tool = FileReadTool::new(security);
796
797 let result = tool
798 .execute(json!({"path": ro_file.to_string_lossy().to_string()}))
799 .await
800 .unwrap();
801
802 assert!(
803 result.success,
804 "absolute path under read-only root must read: {result:?}"
805 );
806 assert!(result.output.contains("cross-agent read"));
807
808 let _ = tokio::fs::remove_dir_all(&root).await;
809 }
810
811 #[tokio::test]
812 async fn file_read_with_offset_and_limit() {
813 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_offset");
814 let _ = tokio::fs::remove_dir_all(&dir).await;
815 tokio::fs::create_dir_all(&dir).await.unwrap();
816 tokio::fs::write(dir.join("lines.txt"), "aaa\nbbb\nccc\nddd\neee")
817 .await
818 .unwrap();
819
820 let tool = test_tool(dir.clone());
821
822 let result = tool
824 .execute(json!({"path": "lines.txt", "offset": 2, "limit": 2}))
825 .await
826 .unwrap();
827 assert!(result.success);
828 assert!(result.output.contains("2: bbb"));
829 assert!(result.output.contains("3: ccc"));
830 assert!(!result.output.contains("1: aaa"));
831 assert!(!result.output.contains("4: ddd"));
832 assert!(result.output.contains("[Lines 2-3 of 5]"));
833
834 let result = tool
836 .execute(json!({"path": "lines.txt", "offset": 4}))
837 .await
838 .unwrap();
839 assert!(result.success);
840 assert!(result.output.contains("4: ddd"));
841 assert!(result.output.contains("5: eee"));
842 assert!(result.output.contains("[Lines 4-5 of 5]"));
843
844 let result = tool
846 .execute(json!({"path": "lines.txt", "limit": 2}))
847 .await
848 .unwrap();
849 assert!(result.success);
850 assert!(result.output.contains("1: aaa"));
851 assert!(result.output.contains("2: bbb"));
852 assert!(!result.output.contains("3: ccc"));
853 assert!(result.output.contains("[Lines 1-2 of 5]"));
854
855 let result = tool.execute(json!({"path": "lines.txt"})).await.unwrap();
857 assert!(result.success);
858 assert!(result.output.contains("1: aaa"));
859 assert!(result.output.contains("5: eee"));
860 assert!(result.output.contains("[5 lines total]"));
861
862 let _ = tokio::fs::remove_dir_all(&dir).await;
863 }
864
865 #[tokio::test]
866 async fn file_read_offset_beyond_end() {
867 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_offset_end");
868 let _ = tokio::fs::remove_dir_all(&dir).await;
869 tokio::fs::create_dir_all(&dir).await.unwrap();
870 tokio::fs::write(dir.join("short.txt"), "one\ntwo")
871 .await
872 .unwrap();
873
874 let tool = test_tool(dir.clone());
875 let result = tool
876 .execute(json!({"path": "short.txt", "offset": 100}))
877 .await
878 .unwrap();
879 assert!(result.success);
880 assert!(
881 result
882 .output
883 .contains("[No lines in range, file has 2 lines]")
884 );
885
886 let _ = tokio::fs::remove_dir_all(&dir).await;
887 }
888
889 #[tokio::test]
890 async fn file_read_rejects_oversized_file() {
891 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_large");
892 let _ = tokio::fs::remove_dir_all(&dir).await;
893 tokio::fs::create_dir_all(&dir).await.unwrap();
894
895 let big = vec![b'x'; 10 * 1024 * 1024 + 1];
897 tokio::fs::write(dir.join("huge.bin"), &big).await.unwrap();
898
899 let tool = test_tool(dir.clone());
900 let result = tool.execute(json!({"path": "huge.bin"})).await.unwrap();
901 assert!(!result.success);
902 assert!(result.error.as_ref().unwrap().contains("File too large"));
903
904 let _ = tokio::fs::remove_dir_all(&dir).await;
905 }
906
907 #[tokio::test]
909 async fn file_read_extracts_pdf_text() {
910 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_pdf");
911 let _ = tokio::fs::remove_dir_all(&dir).await;
912 tokio::fs::create_dir_all(&dir).await.unwrap();
913
914 let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
915 .join("../../tests/fixtures/test_document.pdf");
916 tokio::fs::copy(&fixture, dir.join("report.pdf"))
917 .await
918 .expect("copy PDF fixture");
919
920 let tool = test_tool(dir.clone());
921 let result = tool.execute(json!({"path": "report.pdf"})).await.unwrap();
922
923 assert!(
924 result.success,
925 "PDF read must succeed, error: {:?}",
926 result.error
927 );
928 assert!(
929 result.output.contains("Hello"),
930 "extracted text must contain 'Hello', got: {}",
931 result.output
932 );
933
934 let _ = tokio::fs::remove_dir_all(&dir).await;
935 }
936
937 #[tokio::test]
939 async fn file_read_lossy_reads_binary_file() {
940 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_lossy");
941 let _ = tokio::fs::remove_dir_all(&dir).await;
942 tokio::fs::create_dir_all(&dir).await.unwrap();
943
944 let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'h', b'i', 0x80];
946 tokio::fs::write(dir.join("data.bin"), &binary_data)
947 .await
948 .unwrap();
949
950 let tool = test_tool(dir.clone());
951 let result = tool.execute(json!({"path": "data.bin"})).await.unwrap();
952
953 assert!(
954 result.success,
955 "lossy read must succeed, error: {:?}",
956 result.error
957 );
958 assert!(
959 result.output.contains('\u{FFFD}'),
960 "lossy output must contain replacement character, got: {:?}",
961 result.output
962 );
963 assert!(
964 result.output.contains("hi"),
965 "lossy output must preserve valid ASCII, got: {:?}",
966 result.output
967 );
968
969 let _ = tokio::fs::remove_dir_all(&dir).await;
970 }
971
972 mod e2e_helpers {
975 use crate::observability::{NoopObserver, Observer};
976 use std::sync::{Arc, Mutex};
977 use zeroclaw_config::schema::MemoryConfig;
978 use zeroclaw_memory::{self, Memory};
979 use zeroclaw_providers::{ChatMessage, ChatRequest, ChatResponse, ModelProvider};
980
981 pub type SharedRequests = Arc<Mutex<Vec<Vec<ChatMessage>>>>;
982
983 pub struct RecordingModelProvider {
984 responses: Mutex<Vec<ChatResponse>>,
985 pub requests: SharedRequests,
986 }
987
988 impl RecordingModelProvider {
989 pub fn new(responses: Vec<ChatResponse>) -> (Self, SharedRequests) {
990 let requests: SharedRequests = Arc::new(Mutex::new(Vec::new()));
991 let model_provider = Self {
992 responses: Mutex::new(responses),
993 requests: requests.clone(),
994 };
995 (model_provider, requests)
996 }
997 }
998
999 #[async_trait::async_trait]
1000 impl ModelProvider for RecordingModelProvider {
1001 async fn chat_with_system(
1002 &self,
1003 _system_prompt: Option<&str>,
1004 _message: &str,
1005 _model: &str,
1006 _temperature: Option<f64>,
1007 ) -> anyhow::Result<String> {
1008 Ok("fallback".into())
1009 }
1010
1011 async fn chat(
1012 &self,
1013 request: ChatRequest<'_>,
1014 _model: &str,
1015 _temperature: Option<f64>,
1016 ) -> anyhow::Result<ChatResponse> {
1017 self.requests
1018 .lock()
1019 .unwrap()
1020 .push(request.messages.to_vec());
1021
1022 let mut guard = self.responses.lock().unwrap();
1023 if guard.is_empty() {
1024 return Ok(ChatResponse {
1025 text: Some("done".into()),
1026 tool_calls: vec![],
1027 usage: None,
1028 reasoning_content: None,
1029 });
1030 }
1031 Ok(guard.remove(0))
1032 }
1033 }
1034 impl ::zeroclaw_api::attribution::Attributable for RecordingModelProvider {
1035 fn role(&self) -> ::zeroclaw_api::attribution::Role {
1036 ::zeroclaw_api::attribution::Role::Provider(
1037 ::zeroclaw_api::attribution::ProviderKind::Model(
1038 ::zeroclaw_api::attribution::ModelProviderKind::Custom,
1039 ),
1040 )
1041 }
1042 fn alias(&self) -> &str {
1043 "RecordingModelProvider"
1044 }
1045 }
1046
1047 pub fn make_memory() -> Arc<dyn Memory> {
1048 let cfg = MemoryConfig {
1049 backend: "none".into(),
1050 ..MemoryConfig::default()
1051 };
1052 Arc::from(zeroclaw_memory::create_memory(&cfg, &std::env::temp_dir(), None).unwrap())
1053 }
1054
1055 pub fn make_observer() -> Arc<dyn Observer> {
1056 Arc::from(NoopObserver {})
1057 }
1058 }
1059
1060 #[tokio::test]
1064 async fn e2e_agent_file_read_pdf_extraction() {
1065 use crate::agent::agent::Agent;
1066 use crate::agent::dispatcher::NativeToolDispatcher;
1067 use e2e_helpers::*;
1068 use zeroclaw_providers::{ChatResponse, ModelProvider, ToolCall};
1069
1070 let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_file_read_pdf");
1072 let _ = tokio::fs::remove_dir_all(&workspace).await;
1073 tokio::fs::create_dir_all(&workspace).await.unwrap();
1074
1075 let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
1076 .join("../../tests/fixtures/test_document.pdf");
1077 tokio::fs::copy(&fixture, workspace.join("report.pdf"))
1078 .await
1079 .expect("copy PDF fixture");
1080
1081 let security = Arc::new(SecurityPolicy {
1083 autonomy: AutonomyLevel::Supervised,
1084 workspace_dir: workspace.clone(),
1085 ..SecurityPolicy::default()
1086 });
1087 let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
1088
1089 let (model_provider, recorded) = RecordingModelProvider::new(vec![
1091 ChatResponse {
1093 text: Some(String::new()),
1094 tool_calls: vec![ToolCall {
1095 id: "tc1".into(),
1096 name: "file_read".into(),
1097 arguments: r#"{"path": "report.pdf"}"#.into(),
1098 extra_content: None,
1099 }],
1100 usage: None,
1101 reasoning_content: None,
1102 },
1103 ChatResponse {
1105 text: Some("The PDF contains a greeting: Hello PDF".into()),
1106 tool_calls: vec![],
1107 usage: None,
1108 reasoning_content: None,
1109 },
1110 ]);
1111
1112 let mut agent = Agent::builder()
1113 .model_provider(Box::new(model_provider) as Box<dyn ModelProvider>)
1114 .tools(vec![file_read_tool])
1115 .memory(make_memory())
1116 .observer(make_observer())
1117 .tool_dispatcher(Box::new(NativeToolDispatcher))
1118 .workspace_dir(workspace.clone())
1119 .build()
1120 .unwrap();
1121
1122 let response = agent
1124 .turn("Read report.pdf and tell me what it says")
1125 .await
1126 .unwrap();
1127
1128 assert!(
1130 response.contains("Hello PDF"),
1131 "agent response must contain PDF content, got: {response}",
1132 );
1133
1134 {
1136 let all_requests = recorded.lock().unwrap();
1137 assert!(
1138 all_requests.len() >= 2,
1139 "expected at least 2 model_provider requests (initial + after tool), got {}",
1140 all_requests.len(),
1141 );
1142
1143 let second_request = &all_requests[1];
1144 let tool_result_msg = second_request
1145 .iter()
1146 .find(|m| m.role == "tool")
1147 .expect("second request must contain a tool result message");
1148
1149 assert!(
1150 tool_result_msg.content.contains("Hello"),
1151 "tool result must contain extracted PDF text 'Hello', got: {}",
1152 tool_result_msg.content,
1153 );
1154 }
1155
1156 let _ = tokio::fs::remove_dir_all(&workspace).await;
1157 }
1158
1159 #[tokio::test]
1162 async fn e2e_agent_file_read_lossy_binary() {
1163 use crate::agent::agent::Agent;
1164 use crate::agent::dispatcher::NativeToolDispatcher;
1165 use e2e_helpers::*;
1166 use zeroclaw_providers::{ChatResponse, ModelProvider, ToolCall};
1167
1168 let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_file_read_lossy");
1170 let _ = tokio::fs::remove_dir_all(&workspace).await;
1171 tokio::fs::create_dir_all(&workspace).await.unwrap();
1172
1173 let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'v', b'a', b'l', b'i', b'd', 0x80];
1174 tokio::fs::write(workspace.join("data.bin"), &binary_data)
1175 .await
1176 .unwrap();
1177
1178 let security = Arc::new(SecurityPolicy {
1179 autonomy: AutonomyLevel::Supervised,
1180 workspace_dir: workspace.clone(),
1181 ..SecurityPolicy::default()
1182 });
1183 let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
1184
1185 let (model_provider, recorded) = RecordingModelProvider::new(vec![
1186 ChatResponse {
1187 text: Some(String::new()),
1188 tool_calls: vec![ToolCall {
1189 id: "tc1".into(),
1190 name: "file_read".into(),
1191 arguments: r#"{"path": "data.bin"}"#.into(),
1192 extra_content: None,
1193 }],
1194 usage: None,
1195 reasoning_content: None,
1196 },
1197 ChatResponse {
1198 text: Some("The file appears to be binary data.".into()),
1199 tool_calls: vec![],
1200 usage: None,
1201 reasoning_content: None,
1202 },
1203 ]);
1204
1205 let mut agent = Agent::builder()
1206 .model_provider(Box::new(model_provider) as Box<dyn ModelProvider>)
1207 .tools(vec![file_read_tool])
1208 .memory(make_memory())
1209 .observer(make_observer())
1210 .tool_dispatcher(Box::new(NativeToolDispatcher))
1211 .workspace_dir(workspace.clone())
1212 .build()
1213 .unwrap();
1214
1215 let response = agent.turn("Read data.bin").await.unwrap();
1216
1217 assert!(
1218 response.contains("binary"),
1219 "agent response must mention binary, got: {response}",
1220 );
1221
1222 {
1224 let all_requests = recorded.lock().unwrap();
1225 assert!(
1226 all_requests.len() >= 2,
1227 "expected at least 2 model_provider requests, got {}",
1228 all_requests.len(),
1229 );
1230
1231 let tool_result_msg = all_requests[1]
1232 .iter()
1233 .find(|m| m.role == "tool")
1234 .expect("second request must contain a tool result message");
1235
1236 assert!(
1237 tool_result_msg.content.contains("valid"),
1238 "tool result must preserve valid ASCII from binary file, got: {}",
1239 tool_result_msg.content,
1240 );
1241 assert!(
1242 tool_result_msg.content.contains('\u{FFFD}'),
1243 "tool result must contain replacement character for invalid bytes, got: {}",
1244 tool_result_msg.content,
1245 );
1246 }
1247
1248 let _ = tokio::fs::remove_dir_all(&workspace).await;
1249 }
1250
1251 #[tokio::test]
1257 #[ignore = "requires valid OpenAI Codex OAuth credentials"]
1258 async fn e2e_live_file_read_pdf() {
1259 use crate::agent::agent::Agent;
1260 use crate::agent::dispatcher::XmlToolDispatcher;
1261 use e2e_helpers::*;
1262 use zeroclaw_providers::openai_codex::OpenAiCodexModelProvider;
1263 use zeroclaw_providers::{ModelProvider, ModelProviderRuntimeOptions};
1264
1265 let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_live_file_read_pdf");
1267 let _ = tokio::fs::remove_dir_all(&workspace).await;
1268 tokio::fs::create_dir_all(&workspace).await.unwrap();
1269
1270 let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
1271 .join("../../tests/fixtures/test_document.pdf");
1272 tokio::fs::copy(&fixture, workspace.join("report.pdf"))
1273 .await
1274 .expect("copy PDF fixture");
1275
1276 let security = Arc::new(SecurityPolicy {
1278 autonomy: AutonomyLevel::Supervised,
1279 workspace_dir: workspace.clone(),
1280 ..SecurityPolicy::default()
1281 });
1282 let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
1283
1284 let model_provider =
1286 OpenAiCodexModelProvider::new("test", &ModelProviderRuntimeOptions::default(), None)
1287 .expect("model_provider should initialize");
1288
1289 let mut agent = Agent::builder()
1290 .model_provider(Box::new(model_provider) as Box<dyn ModelProvider>)
1291 .tools(vec![file_read_tool])
1292 .memory(make_memory())
1293 .observer(make_observer())
1294 .tool_dispatcher(Box::new(XmlToolDispatcher))
1295 .workspace_dir(workspace.clone())
1296 .model_name("gpt-5.3-codex".to_string())
1297 .build()
1298 .unwrap();
1299
1300 let response = agent
1302 .turn("Use the file_read tool to read report.pdf, then tell me what text it contains. Be concise.")
1303 .await
1304 .unwrap();
1305
1306 eprintln!("=== Live e2e response ===\n{response}\n=========================");
1307
1308 let lower = response.to_lowercase();
1310 assert!(
1311 lower.contains("hello"),
1312 "model response must reference extracted PDF text 'Hello PDF', got: {response}",
1313 );
1314
1315 let _ = tokio::fs::remove_dir_all(&workspace).await;
1316 }
1317
1318 #[tokio::test]
1319 async fn file_read_blocks_null_byte_in_path() {
1320 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_null_byte");
1321 let _ = tokio::fs::remove_dir_all(&dir).await;
1322 tokio::fs::create_dir_all(&dir).await.unwrap();
1323
1324 let tool = test_tool(dir.clone());
1325 let result = tool
1326 .execute(json!({"path": "test\0evil.txt"}))
1327 .await
1328 .unwrap();
1329 assert!(!result.success);
1330 assert!(result.error.as_ref().unwrap().contains("not allowed"));
1331
1332 let _ = tokio::fs::remove_dir_all(&dir).await;
1333 }
1334
1335 #[cfg(unix)]
1336 #[tokio::test]
1337 async fn file_read_allows_dev_null() {
1338 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_dev_null");
1339 let _ = tokio::fs::remove_dir_all(&dir).await;
1340 tokio::fs::create_dir_all(&dir).await.unwrap();
1341
1342 let tool = test_tool(dir.clone());
1343 let result = tool.execute(json!({"path": "/dev/null"})).await.unwrap();
1344
1345 assert!(
1346 result.success,
1347 "file_read of /dev/null must succeed, error: {:?}",
1348 result.error
1349 );
1350 assert_eq!(result.output, "", "/dev/null must read as empty");
1351
1352 let _ = tokio::fs::remove_dir_all(&dir).await;
1353 }
1354
1355 #[tokio::test]
1356 async fn file_read_allowed_root_with_workspace_only() {
1357 let root = std::env::temp_dir().join("zeroclaw_test_file_read_allowed_root");
1358 let workspace = root.join("workspace");
1359 let allowed = root.join("allowed_dir");
1360
1361 let _ = tokio::fs::remove_dir_all(&root).await;
1362 tokio::fs::create_dir_all(&workspace).await.unwrap();
1363 tokio::fs::create_dir_all(&allowed).await.unwrap();
1364 tokio::fs::write(allowed.join("data.txt"), "allowed content")
1365 .await
1366 .unwrap();
1367
1368 let security = Arc::new(SecurityPolicy {
1369 autonomy: AutonomyLevel::Supervised,
1370 workspace_dir: workspace.clone(),
1371 workspace_only: true,
1372 allowed_roots: vec![allowed.clone()],
1373 ..SecurityPolicy::default()
1374 });
1375 let tool = FileReadTool::new(security);
1376
1377 let abs_path = allowed.join("data.txt").to_string_lossy().to_string();
1379 let result = tool.execute(json!({"path": &abs_path})).await.unwrap();
1380
1381 assert!(
1382 result.success,
1383 "file_read with allowed_root path should succeed, error: {:?}",
1384 result.error
1385 );
1386 assert!(result.output.contains("allowed content"));
1387
1388 let outside = root.join("outside");
1390 tokio::fs::create_dir_all(&outside).await.unwrap();
1391 tokio::fs::write(outside.join("secret.txt"), "secret")
1392 .await
1393 .unwrap();
1394 let outside_path = outside.join("secret.txt").to_string_lossy().to_string();
1395 let result = tool.execute(json!({"path": &outside_path})).await.unwrap();
1396 assert!(!result.success);
1397
1398 let _ = tokio::fs::remove_dir_all(&root).await;
1399 }
1400
1401 #[tokio::test]
1405 async fn file_read_nonexistent_consumes_rate_limit_budget() {
1406 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_probe");
1407 let _ = tokio::fs::remove_dir_all(&dir).await;
1408 tokio::fs::create_dir_all(&dir).await.unwrap();
1409
1410 let tool = test_tool_with(dir.clone(), AutonomyLevel::Supervised, 2);
1412
1413 let r1 = tool.execute(json!({"path": "nope1.txt"})).await.unwrap();
1415 assert!(!r1.success);
1416 assert!(
1417 r1.error
1418 .as_deref()
1419 .unwrap_or("")
1420 .contains("Failed to resolve")
1421 );
1422
1423 let r2 = tool.execute(json!({"path": "nope2.txt"})).await.unwrap();
1424 assert!(!r2.success);
1425 assert!(
1426 r2.error
1427 .as_deref()
1428 .unwrap_or("")
1429 .contains("Failed to resolve")
1430 );
1431
1432 let r3 = tool.execute(json!({"path": "nope3.txt"})).await.unwrap();
1438 assert!(!r3.success);
1439
1440 assert!(!tool.security.record_action(), "budget must be exhausted");
1446
1447 let _ = tokio::fs::remove_dir_all(&dir).await;
1448 }
1449}