1use crate::security::SecurityPolicy;
2use async_trait::async_trait;
3use serde_json::json;
4use std::sync::Arc;
5use zeroclaw_api::tool::{Tool, ToolResult};
6
7const MAX_FILE_SIZE_BYTES: u64 = 10 * 1024 * 1024;
8
9pub struct FileReadTool {
11 security: Arc<SecurityPolicy>,
12}
13
14impl FileReadTool {
15 pub fn new(security: Arc<SecurityPolicy>) -> Self {
16 Self { security }
17 }
18
19 fn resolve_candidate(&self, path: &str) -> anyhow::Result<std::path::PathBuf> {
25 if path.contains('\0') {
26 anyhow::bail!("Path not allowed: contains null byte");
27 }
28 if std::path::Path::new(path)
29 .components()
30 .any(|c| matches!(c, std::path::Component::ParentDir))
31 {
32 anyhow::bail!("Path not allowed by security policy: {path}");
33 }
34
35 let p = std::path::Path::new(path);
36 if p.is_absolute() {
37 return Ok(p.to_path_buf());
38 }
39
40 let workspace_dir = &self.security.workspace_dir;
41 if let Ok(workspace_rootless) = workspace_dir.strip_prefix("/")
42 && let Ok(stripped) = p.strip_prefix(workspace_rootless)
43 {
44 return Ok(if stripped.as_os_str().is_empty() {
45 workspace_dir.clone()
46 } else {
47 workspace_dir.join(stripped)
48 });
49 }
50
51 Ok(workspace_dir.join(p))
52 }
53}
54
55#[async_trait]
56impl Tool for FileReadTool {
57 fn name(&self) -> &str {
58 "file_read"
59 }
60
61 fn description(&self) -> &str {
62 "Read file contents with line numbers. Supports partial reading via offset and limit. Extracts text from PDF; other binary files are read with lossy UTF-8 conversion."
63 }
64
65 fn parameters_schema(&self) -> serde_json::Value {
66 json!({
67 "type": "object",
68 "properties": {
69 "path": {
70 "type": "string",
71 "description": "Path to the file. Relative paths resolve from workspace root; absolute paths must be within the workspace."
72 },
73 "offset": {
74 "type": "integer",
75 "description": "Starting line number (1-based, default: 1)"
76 },
77 "limit": {
78 "type": "integer",
79 "description": "Maximum number of lines to return (default: all)"
80 }
81 },
82 "required": ["path"]
83 })
84 }
85
86 async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
87 let path = args.get("path").and_then(|v| v.as_str()).ok_or_else(|| {
88 ::zeroclaw_log::record!(
89 WARN,
90 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
91 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
92 .with_attrs(::serde_json::json!({"param": "path"})),
93 "tool argument validation failed"
94 );
95
96 anyhow::Error::msg("Missing 'path' parameter")
97 })?;
98
99 let full_path = match self.resolve_candidate(path) {
114 Ok(p) => p,
115 Err(e) => {
116 let _ = self.security.record_action();
117 return Ok(ToolResult {
118 success: false,
119 output: String::new(),
120 error: Some(e.to_string()),
121 });
122 }
123 };
124
125 let resolved_path = match tokio::fs::canonicalize(&full_path).await {
127 Ok(p) => p,
128 Err(e) => {
129 let _ = self.security.record_action();
130 return Ok(ToolResult {
131 success: false,
132 output: String::new(),
133 error: Some(format!("Failed to resolve file path: {e}")),
134 });
135 }
136 };
137
138 if !self.security.is_resolved_path_readable(&resolved_path) {
141 return Ok(ToolResult {
142 success: false,
143 output: String::new(),
144 error: Some(format!("Path escapes workspace directory: {path}")),
145 });
146 }
147
148 match tokio::fs::metadata(&resolved_path).await {
150 Ok(meta) => {
151 if meta.len() > MAX_FILE_SIZE_BYTES {
152 return Ok(ToolResult {
153 success: false,
154 output: String::new(),
155 error: Some(format!(
156 "File too large: {} bytes (limit: {MAX_FILE_SIZE_BYTES} bytes)",
157 meta.len()
158 )),
159 });
160 }
161 }
162 Err(e) => {
163 return Ok(ToolResult {
164 success: false,
165 output: String::new(),
166 error: Some(format!("Failed to read file metadata: {e}")),
167 });
168 }
169 }
170
171 match tokio::fs::read_to_string(&resolved_path).await {
172 Ok(contents) => {
173 let lines: Vec<&str> = contents.lines().collect();
174 let total = lines.len();
175
176 if total == 0 {
177 return Ok(ToolResult {
178 success: true,
179 output: String::new(),
180 error: None,
181 });
182 }
183
184 let offset = args
185 .get("offset")
186 .and_then(|v| v.as_u64())
187 .map(|v| {
188 usize::try_from(v.max(1))
189 .unwrap_or(usize::MAX)
190 .saturating_sub(1)
191 })
192 .unwrap_or(0);
193 let start = offset.min(total);
194
195 let end = match args.get("limit").and_then(|v| v.as_u64()) {
196 Some(l) => {
197 let limit = usize::try_from(l).unwrap_or(usize::MAX);
198 (start.saturating_add(limit)).min(total)
199 }
200 None => total,
201 };
202
203 if start >= end {
204 return Ok(ToolResult {
205 success: true,
206 output: format!("[No lines in range, file has {total} lines]"),
207 error: None,
208 });
209 }
210
211 let numbered: String = lines[start..end]
212 .iter()
213 .enumerate()
214 .map(|(i, line)| format!("{}: {}", start + i + 1, line))
215 .collect::<Vec<_>>()
216 .join("\n");
217
218 let partial = start > 0 || end < total;
219 let summary = if partial {
220 format!("\n[Lines {}-{} of {total}]", start + 1, end)
221 } else {
222 format!("\n[{total} lines total]")
223 };
224
225 Ok(ToolResult {
226 success: true,
227 output: format!("{numbered}{summary}"),
228 error: None,
229 })
230 }
231 Err(_) => {
232 let bytes = tokio::fs::read(&resolved_path).await.map_err(|e| {
234 ::zeroclaw_log::record!(
235 WARN,
236 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
237 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
238 .with_attrs(::serde_json::json!({
239 "path": resolved_path.display().to_string(),
240 "error": format!("{}", e),
241 })),
242 "file_read: raw byte fallback read failed"
243 );
244 anyhow::Error::msg(format!("Failed to read file: {e}"))
245 })?;
246
247 if let Some(text) = try_extract_pdf_text(&bytes) {
248 return Ok(ToolResult {
249 success: true,
250 output: text,
251 error: None,
252 });
253 }
254
255 let lossy = String::from_utf8_lossy(&bytes).into_owned();
257 Ok(ToolResult {
258 success: true,
259 output: lossy,
260 error: None,
261 })
262 }
263 }
264 }
265}
266
267#[cfg(feature = "rag-pdf")]
268fn try_extract_pdf_text(bytes: &[u8]) -> Option<String> {
269 if bytes.len() < 5 || &bytes[..5] != b"%PDF-" {
270 return None;
271 }
272 let text = pdf_extract::extract_text_from_mem(bytes).ok()?;
273 if text.trim().is_empty() {
274 return None;
275 }
276 Some(text)
277}
278
279#[cfg(not(feature = "rag-pdf"))]
280fn try_extract_pdf_text(_bytes: &[u8]) -> Option<String> {
281 None
282}
283
284#[cfg(test)]
285mod tests {
286 use super::*;
287 use crate::security::{AutonomyLevel, SecurityPolicy};
288
289 fn test_tool(workspace: std::path::PathBuf) -> FileReadTool {
290 let security = Arc::new(SecurityPolicy {
291 autonomy: AutonomyLevel::Supervised,
292 workspace_dir: workspace,
293 ..SecurityPolicy::default()
294 });
295 FileReadTool::new(security)
296 }
297
298 fn test_tool_with(
299 workspace: std::path::PathBuf,
300 autonomy: AutonomyLevel,
301 max_actions_per_hour: u32,
302 ) -> FileReadTool {
303 let security = Arc::new(SecurityPolicy {
304 autonomy,
305 workspace_dir: workspace,
306 max_actions_per_hour,
307 ..SecurityPolicy::default()
308 });
309 FileReadTool::new(security)
310 }
311
312 #[test]
313 fn file_read_name() {
314 let tool = test_tool(std::env::temp_dir());
315 assert_eq!(tool.name(), "file_read");
316 }
317
318 #[test]
319 fn file_read_schema_has_path() {
320 let tool = test_tool(std::env::temp_dir());
321 let schema = tool.parameters_schema();
322 assert!(schema["properties"]["path"].is_object());
323 assert!(schema["properties"]["offset"].is_object());
324 assert!(schema["properties"]["limit"].is_object());
325 assert!(
326 schema["required"]
327 .as_array()
328 .unwrap()
329 .contains(&json!("path"))
330 );
331 assert!(
333 !schema["required"]
334 .as_array()
335 .unwrap()
336 .contains(&json!("offset"))
337 );
338 }
339
340 #[tokio::test]
341 async fn file_read_existing_file() {
342 let dir = std::env::temp_dir().join("zeroclaw_test_file_read");
343 let _ = tokio::fs::remove_dir_all(&dir).await;
344 tokio::fs::create_dir_all(&dir).await.unwrap();
345 tokio::fs::write(dir.join("test.txt"), "hello world")
346 .await
347 .unwrap();
348
349 let tool = test_tool(dir.clone());
350 let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
351 assert!(result.success);
352 assert!(result.output.contains("1: hello world"));
353 assert!(result.output.contains("[1 lines total]"));
354 assert!(result.error.is_none());
355
356 let _ = tokio::fs::remove_dir_all(&dir).await;
357 }
358
359 #[tokio::test]
360 async fn file_read_nonexistent_file() {
361 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_missing");
362 let _ = tokio::fs::remove_dir_all(&dir).await;
363 tokio::fs::create_dir_all(&dir).await.unwrap();
364
365 let tool = test_tool(dir.clone());
366 let result = tool.execute(json!({"path": "nope.txt"})).await.unwrap();
367 assert!(!result.success);
368 assert!(result.error.as_ref().unwrap().contains("Failed to resolve"));
369
370 let _ = tokio::fs::remove_dir_all(&dir).await;
371 }
372
373 #[tokio::test]
374 async fn file_read_blocks_path_traversal() {
375 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_traversal");
376 let _ = tokio::fs::remove_dir_all(&dir).await;
377 tokio::fs::create_dir_all(&dir).await.unwrap();
378
379 let tool = test_tool(dir.clone());
380 let result = tool
381 .execute(json!({"path": "../../../etc/passwd"}))
382 .await
383 .unwrap();
384 assert!(!result.success);
385 assert!(result.error.as_ref().unwrap().contains("not allowed"));
386
387 let _ = tokio::fs::remove_dir_all(&dir).await;
388 }
389
390 #[tokio::test]
391 async fn file_read_blocks_absolute_path() {
392 let tool = test_tool(std::env::temp_dir());
393
394 #[cfg(unix)]
395 let target = "/etc/passwd";
396 #[cfg(windows)]
397 let target = {
398 let sysroot = std::env::var("SystemRoot").unwrap_or_else(|_| r"C:\Windows".to_string());
399 std::path::PathBuf::from(sysroot).join(r"System32\drivers\etc\hosts")
400 };
401
402 let result = tool.execute(json!({"path": target})).await.unwrap();
403 assert!(!result.success);
404 assert!(result.error.as_ref().unwrap().contains("escapes workspace"));
405 }
406
407 #[tokio::test]
408 async fn file_read_allows_readonly_mode() {
409 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_readonly");
410 let _ = tokio::fs::remove_dir_all(&dir).await;
411 tokio::fs::create_dir_all(&dir).await.unwrap();
412 tokio::fs::write(dir.join("test.txt"), "readonly ok")
413 .await
414 .unwrap();
415
416 let tool = test_tool_with(dir.clone(), AutonomyLevel::ReadOnly, 20);
417 let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
418
419 assert!(result.success);
420 assert!(result.output.contains("1: readonly ok"));
421
422 let _ = tokio::fs::remove_dir_all(&dir).await;
423 }
424
425 #[tokio::test]
426 async fn file_read_missing_path_param() {
427 let tool = test_tool(std::env::temp_dir());
428 let result = tool.execute(json!({})).await;
429 assert!(result.is_err());
430 }
431
432 #[tokio::test]
433 async fn file_read_empty_file() {
434 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_empty");
435 let _ = tokio::fs::remove_dir_all(&dir).await;
436 tokio::fs::create_dir_all(&dir).await.unwrap();
437 tokio::fs::write(dir.join("empty.txt"), "").await.unwrap();
438
439 let tool = test_tool(dir.clone());
440 let result = tool.execute(json!({"path": "empty.txt"})).await.unwrap();
441 assert!(result.success);
442 assert_eq!(result.output, "");
443
444 let _ = tokio::fs::remove_dir_all(&dir).await;
445 }
446
447 #[tokio::test]
448 async fn file_read_nested_path() {
449 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_nested");
450 let _ = tokio::fs::remove_dir_all(&dir).await;
451 tokio::fs::create_dir_all(dir.join("sub/dir"))
452 .await
453 .unwrap();
454 tokio::fs::write(dir.join("sub/dir/deep.txt"), "deep content")
455 .await
456 .unwrap();
457
458 let tool = test_tool(dir.clone());
459 let result = tool
460 .execute(json!({"path": "sub/dir/deep.txt"}))
461 .await
462 .unwrap();
463 assert!(result.success);
464 assert!(result.output.contains("1: deep content"));
465
466 let _ = tokio::fs::remove_dir_all(&dir).await;
467 }
468
469 #[cfg(unix)]
470 #[tokio::test]
471 async fn file_read_blocks_symlink_escape() {
472 use std::os::unix::fs::symlink;
473
474 let root = std::env::temp_dir().join("zeroclaw_test_file_read_symlink_escape");
475 let workspace = root.join("workspace");
476 let outside = root.join("outside");
477
478 let _ = tokio::fs::remove_dir_all(&root).await;
479 tokio::fs::create_dir_all(&workspace).await.unwrap();
480 tokio::fs::create_dir_all(&outside).await.unwrap();
481
482 tokio::fs::write(outside.join("secret.txt"), "outside workspace")
483 .await
484 .unwrap();
485
486 symlink(outside.join("secret.txt"), workspace.join("escape.txt")).unwrap();
487
488 let tool = test_tool(workspace.clone());
489 let result = tool.execute(json!({"path": "escape.txt"})).await.unwrap();
490
491 assert!(!result.success);
492 assert!(
493 result
494 .error
495 .as_deref()
496 .unwrap_or("")
497 .contains("escapes workspace")
498 );
499
500 let _ = tokio::fs::remove_dir_all(&root).await;
501 }
502
503 #[tokio::test]
504 async fn file_read_blocks_outside_workspace_regardless_of_policy() {
505 let root = std::env::temp_dir().join("zeroclaw_test_file_read_blocks_outside");
506 let workspace = root.join("workspace");
507 let outside = root.join("outside");
508 let outside_file = outside.join("notes.txt");
509
510 let _ = tokio::fs::remove_dir_all(&root).await;
511 tokio::fs::create_dir_all(&workspace).await.unwrap();
512 tokio::fs::create_dir_all(&outside).await.unwrap();
513 tokio::fs::write(&outside_file, "outside").await.unwrap();
514
515 let tool = test_tool(workspace.clone());
516
517 let result = tool
518 .execute(json!({"path": outside_file.to_string_lossy().to_string()}))
519 .await
520 .unwrap();
521
522 assert!(!result.success);
523 assert!(result.error.as_ref().unwrap().contains("escapes workspace"));
524
525 let _ = tokio::fs::remove_dir_all(&root).await;
526 }
527
528 #[tokio::test]
529 async fn file_read_admits_absolute_path_under_read_only_root() {
530 let root =
531 std::env::temp_dir().join("zeroclaw_test_file_read_admits_absolute_path_under_ro_root");
532 let workspace = root.join("workspace");
533 let ro_root = root.join("shared");
534 let ro_file = ro_root.join("notes.txt");
535
536 let _ = tokio::fs::remove_dir_all(&root).await;
537 tokio::fs::create_dir_all(&workspace).await.unwrap();
538 tokio::fs::create_dir_all(&ro_root).await.unwrap();
539 tokio::fs::write(&ro_file, "cross-agent read")
540 .await
541 .unwrap();
542
543 let security = Arc::new(SecurityPolicy {
544 autonomy: AutonomyLevel::Supervised,
545 workspace_dir: workspace,
546 allowed_roots_read_only: vec![ro_root.clone()],
547 ..SecurityPolicy::default()
548 });
549 let tool = FileReadTool::new(security);
550
551 let result = tool
552 .execute(json!({"path": ro_file.to_string_lossy().to_string()}))
553 .await
554 .unwrap();
555
556 assert!(
557 result.success,
558 "absolute path under read-only root must read: {result:?}"
559 );
560 assert!(result.output.contains("cross-agent read"));
561
562 let _ = tokio::fs::remove_dir_all(&root).await;
563 }
564
565 #[tokio::test]
566 async fn file_read_with_offset_and_limit() {
567 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_offset");
568 let _ = tokio::fs::remove_dir_all(&dir).await;
569 tokio::fs::create_dir_all(&dir).await.unwrap();
570 tokio::fs::write(dir.join("lines.txt"), "aaa\nbbb\nccc\nddd\neee")
571 .await
572 .unwrap();
573
574 let tool = test_tool(dir.clone());
575
576 let result = tool
578 .execute(json!({"path": "lines.txt", "offset": 2, "limit": 2}))
579 .await
580 .unwrap();
581 assert!(result.success);
582 assert!(result.output.contains("2: bbb"));
583 assert!(result.output.contains("3: ccc"));
584 assert!(!result.output.contains("1: aaa"));
585 assert!(!result.output.contains("4: ddd"));
586 assert!(result.output.contains("[Lines 2-3 of 5]"));
587
588 let result = tool
590 .execute(json!({"path": "lines.txt", "offset": 4}))
591 .await
592 .unwrap();
593 assert!(result.success);
594 assert!(result.output.contains("4: ddd"));
595 assert!(result.output.contains("5: eee"));
596 assert!(result.output.contains("[Lines 4-5 of 5]"));
597
598 let result = tool
600 .execute(json!({"path": "lines.txt", "limit": 2}))
601 .await
602 .unwrap();
603 assert!(result.success);
604 assert!(result.output.contains("1: aaa"));
605 assert!(result.output.contains("2: bbb"));
606 assert!(!result.output.contains("3: ccc"));
607 assert!(result.output.contains("[Lines 1-2 of 5]"));
608
609 let result = tool.execute(json!({"path": "lines.txt"})).await.unwrap();
611 assert!(result.success);
612 assert!(result.output.contains("1: aaa"));
613 assert!(result.output.contains("5: eee"));
614 assert!(result.output.contains("[5 lines total]"));
615
616 let _ = tokio::fs::remove_dir_all(&dir).await;
617 }
618
619 #[tokio::test]
620 async fn file_read_offset_beyond_end() {
621 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_offset_end");
622 let _ = tokio::fs::remove_dir_all(&dir).await;
623 tokio::fs::create_dir_all(&dir).await.unwrap();
624 tokio::fs::write(dir.join("short.txt"), "one\ntwo")
625 .await
626 .unwrap();
627
628 let tool = test_tool(dir.clone());
629 let result = tool
630 .execute(json!({"path": "short.txt", "offset": 100}))
631 .await
632 .unwrap();
633 assert!(result.success);
634 assert!(
635 result
636 .output
637 .contains("[No lines in range, file has 2 lines]")
638 );
639
640 let _ = tokio::fs::remove_dir_all(&dir).await;
641 }
642
643 #[tokio::test]
644 async fn file_read_rejects_oversized_file() {
645 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_large");
646 let _ = tokio::fs::remove_dir_all(&dir).await;
647 tokio::fs::create_dir_all(&dir).await.unwrap();
648
649 let big = vec![b'x'; 10 * 1024 * 1024 + 1];
651 tokio::fs::write(dir.join("huge.bin"), &big).await.unwrap();
652
653 let tool = test_tool(dir.clone());
654 let result = tool.execute(json!({"path": "huge.bin"})).await.unwrap();
655 assert!(!result.success);
656 assert!(result.error.as_ref().unwrap().contains("File too large"));
657
658 let _ = tokio::fs::remove_dir_all(&dir).await;
659 }
660
661 #[tokio::test]
663 async fn file_read_extracts_pdf_text() {
664 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_pdf");
665 let _ = tokio::fs::remove_dir_all(&dir).await;
666 tokio::fs::create_dir_all(&dir).await.unwrap();
667
668 let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
669 .join("../../tests/fixtures/test_document.pdf");
670 tokio::fs::copy(&fixture, dir.join("report.pdf"))
671 .await
672 .expect("copy PDF fixture");
673
674 let tool = test_tool(dir.clone());
675 let result = tool.execute(json!({"path": "report.pdf"})).await.unwrap();
676
677 assert!(
678 result.success,
679 "PDF read must succeed, error: {:?}",
680 result.error
681 );
682 assert!(
683 result.output.contains("Hello"),
684 "extracted text must contain 'Hello', got: {}",
685 result.output
686 );
687
688 let _ = tokio::fs::remove_dir_all(&dir).await;
689 }
690
691 #[tokio::test]
693 async fn file_read_lossy_reads_binary_file() {
694 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_lossy");
695 let _ = tokio::fs::remove_dir_all(&dir).await;
696 tokio::fs::create_dir_all(&dir).await.unwrap();
697
698 let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'h', b'i', 0x80];
700 tokio::fs::write(dir.join("data.bin"), &binary_data)
701 .await
702 .unwrap();
703
704 let tool = test_tool(dir.clone());
705 let result = tool.execute(json!({"path": "data.bin"})).await.unwrap();
706
707 assert!(
708 result.success,
709 "lossy read must succeed, error: {:?}",
710 result.error
711 );
712 assert!(
713 result.output.contains('\u{FFFD}'),
714 "lossy output must contain replacement character, got: {:?}",
715 result.output
716 );
717 assert!(
718 result.output.contains("hi"),
719 "lossy output must preserve valid ASCII, got: {:?}",
720 result.output
721 );
722
723 let _ = tokio::fs::remove_dir_all(&dir).await;
724 }
725
726 mod e2e_helpers {
729 use crate::observability::{NoopObserver, Observer};
730 use std::sync::{Arc, Mutex};
731 use zeroclaw_config::schema::MemoryConfig;
732 use zeroclaw_memory::{self, Memory};
733 use zeroclaw_providers::{ChatMessage, ChatRequest, ChatResponse, ModelProvider};
734
735 pub type SharedRequests = Arc<Mutex<Vec<Vec<ChatMessage>>>>;
736
737 pub struct RecordingModelProvider {
738 responses: Mutex<Vec<ChatResponse>>,
739 pub requests: SharedRequests,
740 }
741
742 impl RecordingModelProvider {
743 pub fn new(responses: Vec<ChatResponse>) -> (Self, SharedRequests) {
744 let requests: SharedRequests = Arc::new(Mutex::new(Vec::new()));
745 let model_provider = Self {
746 responses: Mutex::new(responses),
747 requests: requests.clone(),
748 };
749 (model_provider, requests)
750 }
751 }
752
753 #[async_trait::async_trait]
754 impl ModelProvider for RecordingModelProvider {
755 async fn chat_with_system(
756 &self,
757 _system_prompt: Option<&str>,
758 _message: &str,
759 _model: &str,
760 _temperature: Option<f64>,
761 ) -> anyhow::Result<String> {
762 Ok("fallback".into())
763 }
764
765 async fn chat(
766 &self,
767 request: ChatRequest<'_>,
768 _model: &str,
769 _temperature: Option<f64>,
770 ) -> anyhow::Result<ChatResponse> {
771 self.requests
772 .lock()
773 .unwrap()
774 .push(request.messages.to_vec());
775
776 let mut guard = self.responses.lock().unwrap();
777 if guard.is_empty() {
778 return Ok(ChatResponse {
779 text: Some("done".into()),
780 tool_calls: vec![],
781 usage: None,
782 reasoning_content: None,
783 });
784 }
785 Ok(guard.remove(0))
786 }
787 }
788 impl ::zeroclaw_api::attribution::Attributable for RecordingModelProvider {
789 fn role(&self) -> ::zeroclaw_api::attribution::Role {
790 ::zeroclaw_api::attribution::Role::Provider(
791 ::zeroclaw_api::attribution::ProviderKind::Model(
792 ::zeroclaw_api::attribution::ModelProviderKind::Custom,
793 ),
794 )
795 }
796 fn alias(&self) -> &str {
797 "RecordingModelProvider"
798 }
799 }
800
801 pub fn make_memory() -> Arc<dyn Memory> {
802 let cfg = MemoryConfig {
803 backend: "none".into(),
804 ..MemoryConfig::default()
805 };
806 Arc::from(zeroclaw_memory::create_memory(&cfg, &std::env::temp_dir(), None).unwrap())
807 }
808
809 pub fn make_observer() -> Arc<dyn Observer> {
810 Arc::from(NoopObserver {})
811 }
812 }
813
814 #[tokio::test]
818 async fn e2e_agent_file_read_pdf_extraction() {
819 use crate::agent::agent::Agent;
820 use crate::agent::dispatcher::NativeToolDispatcher;
821 use e2e_helpers::*;
822 use zeroclaw_providers::{ChatResponse, ModelProvider, ToolCall};
823
824 let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_file_read_pdf");
826 let _ = tokio::fs::remove_dir_all(&workspace).await;
827 tokio::fs::create_dir_all(&workspace).await.unwrap();
828
829 let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
830 .join("../../tests/fixtures/test_document.pdf");
831 tokio::fs::copy(&fixture, workspace.join("report.pdf"))
832 .await
833 .expect("copy PDF fixture");
834
835 let security = Arc::new(SecurityPolicy {
837 autonomy: AutonomyLevel::Supervised,
838 workspace_dir: workspace.clone(),
839 ..SecurityPolicy::default()
840 });
841 let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
842
843 let (model_provider, recorded) = RecordingModelProvider::new(vec![
845 ChatResponse {
847 text: Some(String::new()),
848 tool_calls: vec![ToolCall {
849 id: "tc1".into(),
850 name: "file_read".into(),
851 arguments: r#"{"path": "report.pdf"}"#.into(),
852 extra_content: None,
853 }],
854 usage: None,
855 reasoning_content: None,
856 },
857 ChatResponse {
859 text: Some("The PDF contains a greeting: Hello PDF".into()),
860 tool_calls: vec![],
861 usage: None,
862 reasoning_content: None,
863 },
864 ]);
865
866 let mut agent = Agent::builder()
867 .model_provider(Box::new(model_provider) as Box<dyn ModelProvider>)
868 .tools(vec![file_read_tool])
869 .memory(make_memory())
870 .observer(make_observer())
871 .tool_dispatcher(Box::new(NativeToolDispatcher))
872 .workspace_dir(workspace.clone())
873 .build()
874 .unwrap();
875
876 let response = agent
878 .turn("Read report.pdf and tell me what it says")
879 .await
880 .unwrap();
881
882 assert!(
884 response.contains("Hello PDF"),
885 "agent response must contain PDF content, got: {response}",
886 );
887
888 {
890 let all_requests = recorded.lock().unwrap();
891 assert!(
892 all_requests.len() >= 2,
893 "expected at least 2 model_provider requests (initial + after tool), got {}",
894 all_requests.len(),
895 );
896
897 let second_request = &all_requests[1];
898 let tool_result_msg = second_request
899 .iter()
900 .find(|m| m.role == "tool")
901 .expect("second request must contain a tool result message");
902
903 assert!(
904 tool_result_msg.content.contains("Hello"),
905 "tool result must contain extracted PDF text 'Hello', got: {}",
906 tool_result_msg.content,
907 );
908 }
909
910 let _ = tokio::fs::remove_dir_all(&workspace).await;
911 }
912
913 #[tokio::test]
916 async fn e2e_agent_file_read_lossy_binary() {
917 use crate::agent::agent::Agent;
918 use crate::agent::dispatcher::NativeToolDispatcher;
919 use e2e_helpers::*;
920 use zeroclaw_providers::{ChatResponse, ModelProvider, ToolCall};
921
922 let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_file_read_lossy");
924 let _ = tokio::fs::remove_dir_all(&workspace).await;
925 tokio::fs::create_dir_all(&workspace).await.unwrap();
926
927 let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'v', b'a', b'l', b'i', b'd', 0x80];
928 tokio::fs::write(workspace.join("data.bin"), &binary_data)
929 .await
930 .unwrap();
931
932 let security = Arc::new(SecurityPolicy {
933 autonomy: AutonomyLevel::Supervised,
934 workspace_dir: workspace.clone(),
935 ..SecurityPolicy::default()
936 });
937 let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
938
939 let (model_provider, recorded) = RecordingModelProvider::new(vec![
940 ChatResponse {
941 text: Some(String::new()),
942 tool_calls: vec![ToolCall {
943 id: "tc1".into(),
944 name: "file_read".into(),
945 arguments: r#"{"path": "data.bin"}"#.into(),
946 extra_content: None,
947 }],
948 usage: None,
949 reasoning_content: None,
950 },
951 ChatResponse {
952 text: Some("The file appears to be binary data.".into()),
953 tool_calls: vec![],
954 usage: None,
955 reasoning_content: None,
956 },
957 ]);
958
959 let mut agent = Agent::builder()
960 .model_provider(Box::new(model_provider) as Box<dyn ModelProvider>)
961 .tools(vec![file_read_tool])
962 .memory(make_memory())
963 .observer(make_observer())
964 .tool_dispatcher(Box::new(NativeToolDispatcher))
965 .workspace_dir(workspace.clone())
966 .build()
967 .unwrap();
968
969 let response = agent.turn("Read data.bin").await.unwrap();
970
971 assert!(
972 response.contains("binary"),
973 "agent response must mention binary, got: {response}",
974 );
975
976 {
978 let all_requests = recorded.lock().unwrap();
979 assert!(
980 all_requests.len() >= 2,
981 "expected at least 2 model_provider requests, got {}",
982 all_requests.len(),
983 );
984
985 let tool_result_msg = all_requests[1]
986 .iter()
987 .find(|m| m.role == "tool")
988 .expect("second request must contain a tool result message");
989
990 assert!(
991 tool_result_msg.content.contains("valid"),
992 "tool result must preserve valid ASCII from binary file, got: {}",
993 tool_result_msg.content,
994 );
995 assert!(
996 tool_result_msg.content.contains('\u{FFFD}'),
997 "tool result must contain replacement character for invalid bytes, got: {}",
998 tool_result_msg.content,
999 );
1000 }
1001
1002 let _ = tokio::fs::remove_dir_all(&workspace).await;
1003 }
1004
1005 #[tokio::test]
1011 #[ignore = "requires valid OpenAI Codex OAuth credentials"]
1012 async fn e2e_live_file_read_pdf() {
1013 use crate::agent::agent::Agent;
1014 use crate::agent::dispatcher::XmlToolDispatcher;
1015 use e2e_helpers::*;
1016 use zeroclaw_providers::openai_codex::OpenAiCodexModelProvider;
1017 use zeroclaw_providers::{ModelProvider, ModelProviderRuntimeOptions};
1018
1019 let workspace = std::env::temp_dir().join("zeroclaw_test_e2e_live_file_read_pdf");
1021 let _ = tokio::fs::remove_dir_all(&workspace).await;
1022 tokio::fs::create_dir_all(&workspace).await.unwrap();
1023
1024 let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
1025 .join("../../tests/fixtures/test_document.pdf");
1026 tokio::fs::copy(&fixture, workspace.join("report.pdf"))
1027 .await
1028 .expect("copy PDF fixture");
1029
1030 let security = Arc::new(SecurityPolicy {
1032 autonomy: AutonomyLevel::Supervised,
1033 workspace_dir: workspace.clone(),
1034 ..SecurityPolicy::default()
1035 });
1036 let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
1037
1038 let model_provider =
1040 OpenAiCodexModelProvider::new("test", &ModelProviderRuntimeOptions::default(), None)
1041 .expect("model_provider should initialize");
1042
1043 let mut agent = Agent::builder()
1044 .model_provider(Box::new(model_provider) as Box<dyn ModelProvider>)
1045 .tools(vec![file_read_tool])
1046 .memory(make_memory())
1047 .observer(make_observer())
1048 .tool_dispatcher(Box::new(XmlToolDispatcher))
1049 .workspace_dir(workspace.clone())
1050 .model_name("gpt-5.3-codex".to_string())
1051 .build()
1052 .unwrap();
1053
1054 let response = agent
1056 .turn("Use the file_read tool to read report.pdf, then tell me what text it contains. Be concise.")
1057 .await
1058 .unwrap();
1059
1060 eprintln!("=== Live e2e response ===\n{response}\n=========================");
1061
1062 let lower = response.to_lowercase();
1064 assert!(
1065 lower.contains("hello"),
1066 "model response must reference extracted PDF text 'Hello PDF', got: {response}",
1067 );
1068
1069 let _ = tokio::fs::remove_dir_all(&workspace).await;
1070 }
1071
1072 #[tokio::test]
1073 async fn file_read_blocks_null_byte_in_path() {
1074 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_null_byte");
1075 let _ = tokio::fs::remove_dir_all(&dir).await;
1076 tokio::fs::create_dir_all(&dir).await.unwrap();
1077
1078 let tool = test_tool(dir.clone());
1079 let result = tool
1080 .execute(json!({"path": "test\0evil.txt"}))
1081 .await
1082 .unwrap();
1083 assert!(!result.success);
1084 assert!(result.error.as_ref().unwrap().contains("not allowed"));
1085
1086 let _ = tokio::fs::remove_dir_all(&dir).await;
1087 }
1088
1089 #[cfg(unix)]
1090 #[tokio::test]
1091 async fn file_read_allows_dev_null() {
1092 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_dev_null");
1093 let _ = tokio::fs::remove_dir_all(&dir).await;
1094 tokio::fs::create_dir_all(&dir).await.unwrap();
1095
1096 let tool = test_tool(dir.clone());
1097 let result = tool.execute(json!({"path": "/dev/null"})).await.unwrap();
1098
1099 assert!(
1100 result.success,
1101 "file_read of /dev/null must succeed, error: {:?}",
1102 result.error
1103 );
1104 assert_eq!(result.output, "", "/dev/null must read as empty");
1105
1106 let _ = tokio::fs::remove_dir_all(&dir).await;
1107 }
1108
1109 #[tokio::test]
1110 async fn file_read_allowed_root_with_workspace_only() {
1111 let root = std::env::temp_dir().join("zeroclaw_test_file_read_allowed_root");
1112 let workspace = root.join("workspace");
1113 let allowed = root.join("allowed_dir");
1114
1115 let _ = tokio::fs::remove_dir_all(&root).await;
1116 tokio::fs::create_dir_all(&workspace).await.unwrap();
1117 tokio::fs::create_dir_all(&allowed).await.unwrap();
1118 tokio::fs::write(allowed.join("data.txt"), "allowed content")
1119 .await
1120 .unwrap();
1121
1122 let security = Arc::new(SecurityPolicy {
1123 autonomy: AutonomyLevel::Supervised,
1124 workspace_dir: workspace.clone(),
1125 workspace_only: true,
1126 allowed_roots: vec![allowed.clone()],
1127 ..SecurityPolicy::default()
1128 });
1129 let tool = FileReadTool::new(security);
1130
1131 let abs_path = allowed.join("data.txt").to_string_lossy().to_string();
1133 let result = tool.execute(json!({"path": &abs_path})).await.unwrap();
1134
1135 assert!(
1136 result.success,
1137 "file_read with allowed_root path should succeed, error: {:?}",
1138 result.error
1139 );
1140 assert!(result.output.contains("allowed content"));
1141
1142 let outside = root.join("outside");
1144 tokio::fs::create_dir_all(&outside).await.unwrap();
1145 tokio::fs::write(outside.join("secret.txt"), "secret")
1146 .await
1147 .unwrap();
1148 let outside_path = outside.join("secret.txt").to_string_lossy().to_string();
1149 let result = tool.execute(json!({"path": &outside_path})).await.unwrap();
1150 assert!(!result.success);
1151
1152 let _ = tokio::fs::remove_dir_all(&root).await;
1153 }
1154
1155 #[tokio::test]
1159 async fn file_read_nonexistent_consumes_rate_limit_budget() {
1160 let dir = std::env::temp_dir().join("zeroclaw_test_file_read_probe");
1161 let _ = tokio::fs::remove_dir_all(&dir).await;
1162 tokio::fs::create_dir_all(&dir).await.unwrap();
1163
1164 let tool = test_tool_with(dir.clone(), AutonomyLevel::Supervised, 2);
1166
1167 let r1 = tool.execute(json!({"path": "nope1.txt"})).await.unwrap();
1169 assert!(!r1.success);
1170 assert!(
1171 r1.error
1172 .as_deref()
1173 .unwrap_or("")
1174 .contains("Failed to resolve")
1175 );
1176
1177 let r2 = tool.execute(json!({"path": "nope2.txt"})).await.unwrap();
1178 assert!(!r2.success);
1179 assert!(
1180 r2.error
1181 .as_deref()
1182 .unwrap_or("")
1183 .contains("Failed to resolve")
1184 );
1185
1186 let r3 = tool.execute(json!({"path": "nope3.txt"})).await.unwrap();
1192 assert!(!r3.success);
1193
1194 assert!(!tool.security.record_action(), "budget must be exhausted");
1200
1201 let _ = tokio::fs::remove_dir_all(&dir).await;
1202 }
1203}