1use async_trait::async_trait;
2use futures_util::StreamExt;
3use serde_json::json;
4use std::path::Path;
5use std::sync::Arc;
6use std::time::{Duration, SystemTime, UNIX_EPOCH};
7use tokio::io::AsyncWriteExt;
8use zeroclaw_api::tool::{Tool, ToolResult};
9use zeroclaw_config::policy::SecurityPolicy;
10use zeroclaw_config::schema::FileDownloadConfig;
11
12const RESPONSE_BODY_LIMIT_BYTES: usize = 4 * 1024;
13
14pub struct FileDownloadTool {
15 security: Arc<SecurityPolicy>,
16 config: FileDownloadConfig,
17}
18
19impl FileDownloadTool {
20 pub fn new(security: Arc<SecurityPolicy>, config: FileDownloadConfig) -> Self {
21 Self { security, config }
22 }
23
24 async fn stream_to_temp(
29 response: reqwest::Response,
30 temp_path: &Path,
31 max_bytes: u64,
32 ) -> Result<u64, String> {
33 let mut file = tokio::fs::File::create(temp_path)
34 .await
35 .map_err(|e| format!("Failed to create temporary download file: {e}"))?;
36
37 let mut stream = response.bytes_stream();
38 let mut written: u64 = 0;
39 while let Some(chunk) = stream.next().await {
40 let chunk = chunk.map_err(|e| format!("Failed while reading response body: {e}"))?;
41 written = written.saturating_add(chunk.len() as u64);
42 if written > max_bytes {
43 return Err(format!(
44 "Download too large: exceeded limit of {max_bytes} bytes"
45 ));
46 }
47 file.write_all(&chunk)
48 .await
49 .map_err(|e| format!("Failed while writing downloaded bytes: {e}"))?;
50 }
51
52 file.flush()
53 .await
54 .map_err(|e| format!("Failed to flush downloaded file: {e}"))?;
55 Ok(written)
56 }
57}
58
59#[async_trait]
60impl Tool for FileDownloadTool {
61 fn name(&self) -> &str {
62 "file_download"
63 }
64
65 fn description(&self) -> &str {
66 "Download a file from the configured remote endpoint and write it to the \
67 agent's workspace. Supply the identifier of the document to fetch and a \
68 workspace-relative destination path; the endpoint URL is fixed by host \
69 config and is never model-controlled. Bytes are streamed straight to \
70 disk and are not loaded into model context. Returns the HTTP status, \
71 the number of bytes written, and the destination path."
72 }
73
74 fn parameters_schema(&self) -> serde_json::Value {
75 json!({
76 "type": "object",
77 "properties": {
78 "document_id": {
79 "type": "string",
80 "description": "Identifier of the document to fetch from the configured endpoint."
81 },
82 "dest_path": {
83 "type": "string",
84 "description": "Workspace-relative path to write the file to. The parent directory must already exist."
85 }
86 },
87 "required": ["document_id", "dest_path"]
88 })
89 }
90
91 async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
92 let Some(url) = self
93 .config
94 .url
95 .as_deref()
96 .map(str::trim)
97 .filter(|s| !s.is_empty())
98 else {
99 return Ok(ToolResult {
100 success: false,
101 output: String::new(),
102 error: Some(
103 "file_download is disabled: [file_download].url is not configured".into(),
104 ),
105 });
106 };
107
108 if !self.security.can_act() {
109 return Ok(ToolResult {
110 success: false,
111 output: String::new(),
112 error: Some("Action blocked: autonomy is read-only".into()),
113 });
114 }
115
116 if self.security.is_rate_limited() {
117 return Ok(ToolResult {
118 success: false,
119 output: String::new(),
120 error: Some("Rate limit exceeded: too many actions in the last hour".into()),
121 });
122 }
123
124 let document_id = args
125 .get("document_id")
126 .and_then(|v| v.as_str())
127 .map(str::trim)
128 .filter(|s| !s.is_empty())
129 .ok_or_else(|| {
130 ::zeroclaw_log::record!(
131 WARN,
132 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
133 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
134 .with_attrs(::serde_json::json!({"param": "document_id"})),
135 "file_download: missing document_id parameter"
136 );
137 anyhow::Error::msg("Missing 'document_id' parameter")
138 })?;
139
140 let dest_path = args
141 .get("dest_path")
142 .and_then(|v| v.as_str())
143 .map(str::trim)
144 .filter(|s| !s.is_empty())
145 .ok_or_else(|| {
146 ::zeroclaw_log::record!(
147 WARN,
148 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
149 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
150 .with_attrs(::serde_json::json!({"param": "dest_path"})),
151 "file_download: missing dest_path parameter"
152 );
153 anyhow::Error::msg("Missing 'dest_path' parameter")
154 })?;
155
156 let full = self.security.resolve_tool_path(dest_path);
159
160 let file_name = match full.file_name().and_then(|s| s.to_str()) {
161 Some(name) if name != "." && name != ".." => name.to_string(),
162 _ => {
163 return Ok(ToolResult {
164 success: false,
165 output: String::new(),
166 error: Some(format!(
167 "Invalid dest_path '{dest_path}': must end in a concrete file name"
168 )),
169 });
170 }
171 };
172
173 let Some(parent) = full.parent() else {
174 return Ok(ToolResult {
175 success: false,
176 output: String::new(),
177 error: Some(format!(
178 "Invalid dest_path '{dest_path}': has no parent directory"
179 )),
180 });
181 };
182
183 let canonical_parent = match tokio::fs::canonicalize(parent).await {
187 Ok(p) => p,
188 Err(e) => {
189 return Ok(ToolResult {
190 success: false,
191 output: String::new(),
192 error: Some(format!(
193 "Cannot resolve destination directory for '{dest_path}': {e}"
194 )),
195 });
196 }
197 };
198
199 if !self.security.is_resolved_path_allowed(&canonical_parent) {
200 return Ok(ToolResult {
201 success: false,
202 output: String::new(),
203 error: Some(
204 self.security
205 .resolved_path_violation_message(&canonical_parent),
206 ),
207 });
208 }
209
210 let dest = canonical_parent.join(&file_name);
211 if !self.security.is_resolved_path_allowed(&dest) {
212 return Ok(ToolResult {
213 success: false,
214 output: String::new(),
215 error: Some(self.security.resolved_path_violation_message(&dest)),
216 });
217 }
218
219 if !self.security.record_action() {
222 return Ok(ToolResult {
223 success: false,
224 output: String::new(),
225 error: Some("Rate limit exceeded: action budget exhausted".into()),
226 });
227 }
228
229 let builder = reqwest::Client::builder()
233 .timeout(Duration::from_secs(self.config.timeout_secs))
234 .connect_timeout(Duration::from_secs(10))
235 .redirect(reqwest::redirect::Policy::none());
236 let builder =
237 zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.file_download");
238 let client = match builder.build() {
239 Ok(c) => c,
240 Err(e) => {
241 return Ok(ToolResult {
242 success: false,
243 output: String::new(),
244 error: Some(format!("Failed to build download client: {e}")),
245 });
246 }
247 };
248
249 let mut request = client.get(url).query(&[("document_id", document_id)]);
250 for (k, v) in &self.config.headers {
251 request = request.header(k.as_str(), v.as_str());
252 }
253
254 let response = match request.send().await {
255 Ok(r) => r,
256 Err(e) => {
257 return Ok(ToolResult {
258 success: false,
259 output: String::new(),
260 error: Some(format!("Download request failed: {e}")),
261 });
262 }
263 };
264
265 let status = response.status();
266
267 if !status.is_success() {
268 let raw_body = response.text().await.unwrap_or_default();
269 let truncated = if raw_body.len() > RESPONSE_BODY_LIMIT_BYTES {
270 let mut cut = RESPONSE_BODY_LIMIT_BYTES;
276 while cut > 0 && !raw_body.is_char_boundary(cut) {
277 cut -= 1;
278 }
279 format!(
280 "{}... [truncated {} bytes]",
281 &raw_body[..cut],
282 raw_body.len() - cut
283 )
284 } else {
285 raw_body
286 };
287 return Ok(ToolResult {
288 success: false,
289 output: truncated,
290 error: Some(format!("Download endpoint returned status {status}")),
291 });
292 }
293
294 if let Some(len) = response.content_length()
297 && len > self.config.max_file_size_bytes
298 {
299 return Ok(ToolResult {
300 success: false,
301 output: String::new(),
302 error: Some(format!(
303 "Download too large: endpoint reports {len} bytes (limit: {} bytes)",
304 self.config.max_file_size_bytes
305 )),
306 });
307 }
308
309 let nanos = SystemTime::now()
313 .duration_since(UNIX_EPOCH)
314 .map(|d| d.as_nanos())
315 .unwrap_or(0);
316 let temp_path = canonical_parent.join(format!(".{file_name}.part-{nanos}"));
317
318 match Self::stream_to_temp(response, &temp_path, self.config.max_file_size_bytes).await {
319 Ok(written) => match tokio::fs::rename(&temp_path, &dest).await {
320 Ok(()) => Ok(ToolResult {
321 success: true,
322 output: format!("Downloaded {written} bytes to {dest_path} ({status})"),
323 error: None,
324 }),
325 Err(e) => {
326 let _ = tokio::fs::remove_file(&temp_path).await;
327 Ok(ToolResult {
328 success: false,
329 output: String::new(),
330 error: Some(format!("Failed to move downloaded file into place: {e}")),
331 })
332 }
333 },
334 Err(msg) => {
335 let _ = tokio::fs::remove_file(&temp_path).await;
336 Ok(ToolResult {
337 success: false,
338 output: String::new(),
339 error: Some(msg),
340 })
341 }
342 }
343 }
344}
345
346#[cfg(test)]
347mod tests {
348 use super::*;
349 use std::collections::HashMap;
350 use std::fs;
351 use std::path::PathBuf;
352 use tempfile::TempDir;
353 use wiremock::matchers::{header, method, path, query_param};
354 use wiremock::{Mock, MockServer, ResponseTemplate};
355 use zeroclaw_config::autonomy::AutonomyLevel;
356
357 fn test_security(workspace: PathBuf, level: AutonomyLevel) -> Arc<SecurityPolicy> {
358 Arc::new(SecurityPolicy {
359 autonomy: level,
360 max_actions_per_hour: 100,
361 workspace_dir: workspace,
362 ..SecurityPolicy::default()
363 })
364 }
365
366 fn cfg(url: Option<String>) -> FileDownloadConfig {
367 FileDownloadConfig {
368 url,
369 ..FileDownloadConfig::default()
370 }
371 }
372
373 fn part_files(dir: &Path) -> Vec<PathBuf> {
375 fs::read_dir(dir)
376 .unwrap()
377 .filter_map(|e| e.ok().map(|e| e.path()))
378 .filter(|p| {
379 p.file_name()
380 .and_then(|s| s.to_str())
381 .is_some_and(|n| n.contains(".part-"))
382 })
383 .collect()
384 }
385
386 #[test]
387 fn tool_name_and_description() {
388 let tmp = TempDir::new().unwrap();
389 let tool = FileDownloadTool::new(
390 test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
391 cfg(Some("https://example.com/download".into())),
392 );
393 assert_eq!(tool.name(), "file_download");
394 assert!(!tool.description().is_empty());
395 }
396
397 #[test]
398 fn schema_requires_document_id_and_dest_path() {
399 let tmp = TempDir::new().unwrap();
400 let tool = FileDownloadTool::new(
401 test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
402 cfg(Some("https://example.com/download".into())),
403 );
404 let schema = tool.parameters_schema();
405 assert_eq!(schema["type"], "object");
406 let required = schema["required"].as_array().unwrap();
407 assert!(required.contains(&serde_json::Value::String("document_id".into())));
408 assert!(required.contains(&serde_json::Value::String("dest_path".into())));
409 }
410
411 #[tokio::test]
412 async fn execute_fails_when_url_unset() {
413 let tmp = TempDir::new().unwrap();
414 let tool = FileDownloadTool::new(
415 test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
416 cfg(None),
417 );
418
419 let result = tool
420 .execute(json!({ "document_id": "doc-1", "dest_path": "out.bin" }))
421 .await
422 .unwrap();
423 assert!(!result.success);
424 assert!(result.error.unwrap().contains("disabled"));
425 assert!(!tmp.path().join("out.bin").exists());
426 }
427
428 #[tokio::test]
429 async fn execute_blocks_readonly_autonomy() {
430 let tmp = TempDir::new().unwrap();
431 let tool = FileDownloadTool::new(
432 test_security(tmp.path().to_path_buf(), AutonomyLevel::ReadOnly),
433 cfg(Some("https://example.com/download".into())),
434 );
435
436 let result = tool
437 .execute(json!({ "document_id": "doc-1", "dest_path": "out.bin" }))
438 .await
439 .unwrap();
440 assert!(!result.success);
441 assert!(result.error.unwrap().contains("read-only"));
442 assert!(!tmp.path().join("out.bin").exists());
443 }
444
445 #[tokio::test]
446 async fn execute_errors_on_missing_arguments() {
447 let tmp = TempDir::new().unwrap();
448 let tool = FileDownloadTool::new(
449 test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
450 cfg(Some("https://example.com/download".into())),
451 );
452
453 assert!(
454 tool.execute(json!({ "dest_path": "out.bin" }))
455 .await
456 .is_err()
457 );
458 assert!(
459 tool.execute(json!({ "document_id": "doc-1" }))
460 .await
461 .is_err()
462 );
463 assert!(
465 tool.execute(json!({ "document_id": " ", "dest_path": "out.bin" }))
466 .await
467 .is_err()
468 );
469 }
470
471 #[tokio::test]
472 async fn execute_rejects_traversal_dest_path() {
473 let tmp = TempDir::new().unwrap();
474 let tool = FileDownloadTool::new(
475 test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
476 cfg(Some("https://example.com/download".into())),
477 );
478
479 let result = tool
481 .execute(json!({ "document_id": "doc-1", "dest_path": "nested/.." }))
482 .await
483 .unwrap();
484 assert!(!result.success);
485 assert!(result.error.unwrap().contains("concrete file name"));
486 }
487
488 #[tokio::test]
489 async fn execute_rejects_dest_outside_workspace() {
490 let server = MockServer::start().await;
491 let workspace = TempDir::new().unwrap();
492 let outside = TempDir::new().unwrap();
493
494 Mock::given(method("GET"))
496 .and(path("/download"))
497 .respond_with(ResponseTemplate::new(200).set_body_bytes(b"should-not-arrive".to_vec()))
498 .expect(0)
499 .mount(&server)
500 .await;
501
502 let dest_abs = outside.path().join("escape.bin");
503 let config = FileDownloadConfig {
504 url: Some(format!("{}/download", server.uri())),
505 ..FileDownloadConfig::default()
506 };
507 let tool = FileDownloadTool::new(
508 test_security(workspace.path().to_path_buf(), AutonomyLevel::Full),
509 config,
510 );
511
512 let result = tool
513 .execute(json!({
514 "document_id": "doc-1",
515 "dest_path": dest_abs.to_string_lossy(),
516 }))
517 .await
518 .unwrap();
519
520 assert!(!result.success);
521 assert!(
522 !dest_abs.exists(),
523 "no file should be written outside workspace"
524 );
525 }
526
527 #[tokio::test]
528 async fn execute_downloads_file_to_dest() {
529 let server = MockServer::start().await;
530 let tmp = TempDir::new().unwrap();
531 let body = b"the-downloaded-bytes-\x00\x01\x02".to_vec();
532
533 Mock::given(method("GET"))
534 .and(path("/download"))
535 .and(query_param("document_id", "doc-123"))
536 .respond_with(ResponseTemplate::new(200).set_body_bytes(body.clone()))
537 .expect(1)
538 .mount(&server)
539 .await;
540
541 let config = FileDownloadConfig {
542 url: Some(format!("{}/download", server.uri())),
543 ..FileDownloadConfig::default()
544 };
545 let tool = FileDownloadTool::new(
546 test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
547 config,
548 );
549
550 let result = tool
551 .execute(json!({ "document_id": "doc-123", "dest_path": "out.bin" }))
552 .await
553 .unwrap();
554
555 assert!(result.success, "expected success, got {result:?}");
556 let written = fs::read(tmp.path().join("out.bin")).unwrap();
557 assert_eq!(written, body);
558 assert!(result.output.contains("out.bin"));
559 assert!(
560 part_files(tmp.path()).is_empty(),
561 "temp file must be cleaned up"
562 );
563 }
564
565 #[tokio::test]
566 async fn execute_sends_configured_bearer_header() {
567 let server = MockServer::start().await;
568 let tmp = TempDir::new().unwrap();
569
570 Mock::given(method("GET"))
571 .and(path("/download"))
572 .and(header("Authorization", "Bearer secret-token"))
573 .respond_with(ResponseTemplate::new(200).set_body_bytes(b"ok".to_vec()))
574 .expect(1)
575 .mount(&server)
576 .await;
577
578 let mut headers = HashMap::new();
579 headers.insert("Authorization".into(), "Bearer secret-token".into());
580 let config = FileDownloadConfig {
581 url: Some(format!("{}/download", server.uri())),
582 headers,
583 ..FileDownloadConfig::default()
584 };
585 let tool = FileDownloadTool::new(
586 test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
587 config,
588 );
589
590 let result = tool
591 .execute(json!({ "document_id": "doc-1", "dest_path": "out.bin" }))
592 .await
593 .unwrap();
594
595 assert!(result.success, "expected success, got {result:?}");
598 assert_eq!(fs::read(tmp.path().join("out.bin")).unwrap(), b"ok");
599 }
600
601 #[tokio::test]
602 async fn execute_reports_non_2xx_without_writing() {
603 let server = MockServer::start().await;
604 let tmp = TempDir::new().unwrap();
605
606 Mock::given(method("GET"))
607 .and(path("/download"))
608 .respond_with(ResponseTemplate::new(404).set_body_string("not_found"))
609 .expect(1)
610 .mount(&server)
611 .await;
612
613 let config = FileDownloadConfig {
614 url: Some(format!("{}/download", server.uri())),
615 ..FileDownloadConfig::default()
616 };
617 let tool = FileDownloadTool::new(
618 test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
619 config,
620 );
621
622 let result = tool
623 .execute(json!({ "document_id": "missing", "dest_path": "out.bin" }))
624 .await
625 .unwrap();
626
627 assert!(!result.success);
628 assert!(result.error.unwrap().contains("404"));
629 assert!(!tmp.path().join("out.bin").exists());
630 assert!(part_files(tmp.path()).is_empty());
631 }
632
633 #[tokio::test]
634 async fn execute_rejects_oversized_via_content_length() {
635 let server = MockServer::start().await;
636 let tmp = TempDir::new().unwrap();
637
638 Mock::given(method("GET"))
640 .and(path("/download"))
641 .respond_with(ResponseTemplate::new(200).set_body_bytes(vec![0u8; 2048]))
642 .mount(&server)
643 .await;
644
645 let mut config = FileDownloadConfig {
646 url: Some(format!("{}/download", server.uri())),
647 ..FileDownloadConfig::default()
648 };
649 config.max_file_size_bytes = 1024;
650 let tool = FileDownloadTool::new(
651 test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
652 config,
653 );
654
655 let result = tool
656 .execute(json!({ "document_id": "big", "dest_path": "out.bin" }))
657 .await
658 .unwrap();
659
660 assert!(!result.success);
661 assert!(
663 result.error.unwrap().contains("endpoint reports"),
664 "expected the Content-Length fast-reject path"
665 );
666 assert!(!tmp.path().join("out.bin").exists());
667 assert!(
668 part_files(tmp.path()).is_empty(),
669 "no partial file may remain"
670 );
671 }
672
673 #[tokio::test]
674 async fn execute_rejects_oversized_while_streaming_without_content_length() {
675 let server = MockServer::start().await;
676 let tmp = TempDir::new().unwrap();
677
678 Mock::given(method("GET"))
682 .and(path("/download"))
683 .respond_with(
684 ResponseTemplate::new(200)
685 .insert_header("Transfer-Encoding", "chunked")
686 .set_body_bytes(vec![0u8; 4096]),
687 )
688 .mount(&server)
689 .await;
690
691 let mut config = FileDownloadConfig {
692 url: Some(format!("{}/download", server.uri())),
693 ..FileDownloadConfig::default()
694 };
695 config.max_file_size_bytes = 1024;
696 let tool = FileDownloadTool::new(
697 test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
698 config,
699 );
700
701 let result = tool
702 .execute(json!({ "document_id": "big", "dest_path": "out.bin" }))
703 .await
704 .unwrap();
705
706 assert!(!result.success);
707 assert!(
710 result.error.unwrap().contains("exceeded limit"),
711 "expected the streaming size-cap path"
712 );
713 assert!(!tmp.path().join("out.bin").exists());
714 assert!(
715 part_files(tmp.path()).is_empty(),
716 "no partial file may remain"
717 );
718 }
719
720 #[tokio::test]
721 async fn execute_does_not_follow_redirects_from_configured_endpoint() {
722 let server = MockServer::start().await;
723 let tmp = TempDir::new().unwrap();
724
725 Mock::given(method("GET"))
729 .and(path("/download"))
730 .respond_with(
731 ResponseTemplate::new(302)
732 .insert_header("location", format!("{}/elsewhere", server.uri())),
733 )
734 .expect(1)
735 .mount(&server)
736 .await;
737 Mock::given(method("GET"))
738 .and(path("/elsewhere"))
739 .respond_with(ResponseTemplate::new(200).set_body_bytes(b"redirected-bytes".to_vec()))
740 .expect(0)
741 .mount(&server)
742 .await;
743
744 let config = FileDownloadConfig {
745 url: Some(format!("{}/download", server.uri())),
746 ..FileDownloadConfig::default()
747 };
748 let tool = FileDownloadTool::new(
749 test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
750 config,
751 );
752
753 let result = tool
754 .execute(json!({ "document_id": "doc-1", "dest_path": "out.bin" }))
755 .await
756 .unwrap();
757
758 assert!(!result.success);
759 assert!(
760 result.error.as_deref().unwrap_or("").contains("302"),
761 "expected the 302 status to surface; got {result:?}"
762 );
763 assert!(
764 !tmp.path().join("out.bin").exists(),
765 "no file may be written when the configured endpoint returns 3xx"
766 );
767 assert!(
768 part_files(tmp.path()).is_empty(),
769 "no partial file may remain after a 3xx response"
770 );
771 }
772
773 #[tokio::test]
774 async fn execute_truncates_non_ascii_error_body_safely() {
775 let server = MockServer::start().await;
776 let tmp = TempDir::new().unwrap();
777
778 let mut body = "x".repeat(4094);
784 body.push_str("世界世界世界世界世界世界");
785 assert!(!body.is_char_boundary(4096));
786
787 Mock::given(method("GET"))
788 .and(path("/download"))
789 .respond_with(ResponseTemplate::new(500).set_body_string(body.clone()))
790 .expect(1)
791 .mount(&server)
792 .await;
793
794 let config = FileDownloadConfig {
795 url: Some(format!("{}/download", server.uri())),
796 ..FileDownloadConfig::default()
797 };
798 let tool = FileDownloadTool::new(
799 test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
800 config,
801 );
802
803 let result = tool
807 .execute(json!({ "document_id": "doc-1", "dest_path": "out.bin" }))
808 .await
809 .unwrap();
810
811 assert!(!result.success);
812 assert!(result.error.as_deref().unwrap_or("").contains("500"));
813 assert!(result.output.contains("[truncated"));
814 assert!(
815 result.output.len() < body.len(),
816 "expected the body to be shortened"
817 );
818 assert!(!tmp.path().join("out.bin").exists());
819 }
820}