Skip to main content

zeroclaw_tools/
file_download.rs

1use async_trait::async_trait;
2use futures_util::StreamExt;
3use serde_json::json;
4use std::path::Path;
5use std::sync::Arc;
6use std::time::{Duration, SystemTime, UNIX_EPOCH};
7use tokio::io::AsyncWriteExt;
8use zeroclaw_api::tool::{Tool, ToolResult, with_ephemeral_workspace_warning};
9use zeroclaw_config::policy::SecurityPolicy;
10use zeroclaw_config::schema::FileDownloadConfig;
11
12const RESPONSE_BODY_LIMIT_BYTES: usize = 4 * 1024;
13
14pub struct FileDownloadTool {
15    security: Arc<SecurityPolicy>,
16    config: FileDownloadConfig,
17    /// Whether the downloaded file persists on the host filesystem. `false` on
18    /// an ephemeral runtime (Docker tmpfs / no volume mount), where the file is
19    /// written inside the container but invisible on the host and discarded at
20    /// session end. When `false`, a successful download carries a loud
21    /// ephemeral-workspace warning. Mirrors
22    /// [`super::file_write::FileWriteTool`]. See issue #4627.
23    persistent_writes: bool,
24}
25
26impl FileDownloadTool {
27    pub fn new(security: Arc<SecurityPolicy>, config: FileDownloadConfig) -> Self {
28        Self {
29            security,
30            config,
31            persistent_writes: true,
32        }
33    }
34
35    /// Construct with an explicit persistence flag derived from the active
36    /// runtime adapter's `has_filesystem_access()`. Mirrors
37    /// [`super::file_write::FileWriteTool::new_with_persistence`].
38    pub fn new_with_persistence(
39        security: Arc<SecurityPolicy>,
40        config: FileDownloadConfig,
41        persistent_writes: bool,
42    ) -> Self {
43        Self {
44            security,
45            config,
46            persistent_writes,
47        }
48    }
49
50    /// Stream a response body into `temp_path`, treating `max_bytes` as a hard
51    /// ceiling so an unbounded or oversized body never fully buffers in memory.
52    /// Returns the number of bytes written, or an error message. The caller is
53    /// responsible for removing `temp_path` on any error.
54    async fn stream_to_temp(
55        response: reqwest::Response,
56        temp_path: &Path,
57        max_bytes: u64,
58    ) -> Result<u64, String> {
59        let mut file = tokio::fs::File::create(temp_path)
60            .await
61            .map_err(|e| format!("Failed to create temporary download file: {e}"))?;
62
63        let mut stream = response.bytes_stream();
64        let mut written: u64 = 0;
65        while let Some(chunk) = stream.next().await {
66            let chunk = chunk.map_err(|e| format!("Failed while reading response body: {e}"))?;
67            written = written.saturating_add(chunk.len() as u64);
68            if written > max_bytes {
69                return Err(format!(
70                    "Download too large: exceeded limit of {max_bytes} bytes"
71                ));
72            }
73            file.write_all(&chunk)
74                .await
75                .map_err(|e| format!("Failed while writing downloaded bytes: {e}"))?;
76        }
77
78        file.flush()
79            .await
80            .map_err(|e| format!("Failed to flush downloaded file: {e}"))?;
81        Ok(written)
82    }
83}
84
85#[async_trait]
86impl Tool for FileDownloadTool {
87    fn name(&self) -> &str {
88        "file_download"
89    }
90
91    fn description(&self) -> &str {
92        "Download a file from the configured remote endpoint and write it to the \
93         agent's workspace. Supply the identifier of the document to fetch and a \
94         workspace-relative destination path; the endpoint URL is fixed by host \
95         config and is never model-controlled. Bytes are streamed straight to \
96         disk and are not loaded into model context. Returns the HTTP status, \
97         the number of bytes written, and the destination path."
98    }
99
100    fn parameters_schema(&self) -> serde_json::Value {
101        json!({
102            "type": "object",
103            "properties": {
104                "document_id": {
105                    "type": "string",
106                    "description": "Identifier of the document to fetch from the configured endpoint."
107                },
108                "dest_path": {
109                    "type": "string",
110                    "description": "Workspace-relative path to write the file to. The parent directory must already exist."
111                }
112            },
113            "required": ["document_id", "dest_path"]
114        })
115    }
116
117    async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
118        let Some(url) = self
119            .config
120            .url
121            .as_deref()
122            .map(str::trim)
123            .filter(|s| !s.is_empty())
124        else {
125            return Ok(ToolResult {
126                success: false,
127                output: String::new(),
128                error: Some(
129                    "file_download is disabled: [file_download].url is not configured".into(),
130                ),
131            });
132        };
133
134        if !self.security.can_act() {
135            return Ok(ToolResult {
136                success: false,
137                output: String::new(),
138                error: Some("Action blocked: autonomy is read-only".into()),
139            });
140        }
141
142        if self.security.is_rate_limited() {
143            return Ok(ToolResult {
144                success: false,
145                output: String::new(),
146                error: Some("Rate limit exceeded: too many actions in the last hour".into()),
147            });
148        }
149
150        let document_id = args
151            .get("document_id")
152            .and_then(|v| v.as_str())
153            .map(str::trim)
154            .filter(|s| !s.is_empty())
155            .ok_or_else(|| {
156                ::zeroclaw_log::record!(
157                    WARN,
158                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
159                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
160                        .with_attrs(::serde_json::json!({"param": "document_id"})),
161                    "file_download: missing document_id parameter"
162                );
163                anyhow::Error::msg("Missing 'document_id' parameter")
164            })?;
165
166        let dest_path = args
167            .get("dest_path")
168            .and_then(|v| v.as_str())
169            .map(str::trim)
170            .filter(|s| !s.is_empty())
171            .ok_or_else(|| {
172                ::zeroclaw_log::record!(
173                    WARN,
174                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
175                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
176                        .with_attrs(::serde_json::json!({"param": "dest_path"})),
177                    "file_download: missing dest_path parameter"
178                );
179                anyhow::Error::msg("Missing 'dest_path' parameter")
180            })?;
181
182        // The downloaded bytes are attacker-influenceable, so the write target
183        // must resolve inside the workspace allowlist before any network call.
184        let full = self.security.resolve_tool_path(dest_path);
185
186        let file_name = match full.file_name().and_then(|s| s.to_str()) {
187            Some(name) if name != "." && name != ".." => name.to_string(),
188            _ => {
189                return Ok(ToolResult {
190                    success: false,
191                    output: String::new(),
192                    error: Some(format!(
193                        "Invalid dest_path '{dest_path}': must end in a concrete file name"
194                    )),
195                });
196            }
197        };
198
199        let Some(parent) = full.parent() else {
200            return Ok(ToolResult {
201                success: false,
202                output: String::new(),
203                error: Some(format!(
204                    "Invalid dest_path '{dest_path}': has no parent directory"
205                )),
206            });
207        };
208
209        // Canonicalize the parent (which must already exist) so a symlinked
210        // parent cannot redirect the write outside the workspace. `full` itself
211        // does not exist yet, so it is never canonicalized.
212        let canonical_parent = match tokio::fs::canonicalize(parent).await {
213            Ok(p) => p,
214            Err(e) => {
215                return Ok(ToolResult {
216                    success: false,
217                    output: String::new(),
218                    error: Some(format!(
219                        "Cannot resolve destination directory for '{dest_path}': {e}"
220                    )),
221                });
222            }
223        };
224
225        if !self.security.is_resolved_path_allowed(&canonical_parent) {
226            return Ok(ToolResult {
227                success: false,
228                output: String::new(),
229                error: Some(
230                    self.security
231                        .resolved_path_violation_message(&canonical_parent),
232                ),
233            });
234        }
235
236        let dest = canonical_parent.join(&file_name);
237        if !self.security.is_resolved_path_allowed(&dest) {
238            return Ok(ToolResult {
239                success: false,
240                output: String::new(),
241                error: Some(self.security.resolved_path_violation_message(&dest)),
242            });
243        }
244
245        // Debit the action budget only once the request is validated, mirroring
246        // file_upload — right before the network call.
247        if !self.security.record_action() {
248            return Ok(ToolResult {
249                success: false,
250                output: String::new(),
251                error: Some("Rate limit exceeded: action budget exhausted".into()),
252            });
253        }
254
255        // Disable redirect-following: the configured `[file_download].url` is
256        // the operator-approved endpoint, so a 3xx response from it must surface
257        // as a non-success status rather than silently rehome the request.
258        let builder = reqwest::Client::builder()
259            .timeout(Duration::from_secs(self.config.timeout_secs))
260            .connect_timeout(Duration::from_secs(10))
261            .redirect(reqwest::redirect::Policy::none());
262        let builder =
263            zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.file_download");
264        let client = match builder.build() {
265            Ok(c) => c,
266            Err(e) => {
267                return Ok(ToolResult {
268                    success: false,
269                    output: String::new(),
270                    error: Some(format!("Failed to build download client: {e}")),
271                });
272            }
273        };
274
275        let mut request = client.get(url).query(&[("document_id", document_id)]);
276        for (k, v) in &self.config.headers {
277            request = request.header(k.as_str(), v.as_str());
278        }
279
280        let response = match request.send().await {
281            Ok(r) => r,
282            Err(e) => {
283                return Ok(ToolResult {
284                    success: false,
285                    output: String::new(),
286                    error: Some(format!("Download request failed: {e}")),
287                });
288            }
289        };
290
291        let status = response.status();
292
293        if !status.is_success() {
294            let raw_body = response.text().await.unwrap_or_default();
295            let truncated = if raw_body.len() > RESPONSE_BODY_LIMIT_BYTES {
296                // The body is attacker-influenceable, so split on a char boundary
297                // to avoid panicking when the byte cutoff lands inside a
298                // multi-byte UTF-8 sequence. floor_char_boundary is unstable, so
299                // walk down at most three bytes — a UTF-8 code point is at most
300                // four bytes wide, so a boundary is always within reach.
301                let mut cut = RESPONSE_BODY_LIMIT_BYTES;
302                while cut > 0 && !raw_body.is_char_boundary(cut) {
303                    cut -= 1;
304                }
305                format!(
306                    "{}... [truncated {} bytes]",
307                    &raw_body[..cut],
308                    raw_body.len() - cut
309                )
310            } else {
311                raw_body
312            };
313            return Ok(ToolResult {
314                success: false,
315                output: truncated,
316                error: Some(format!("Download endpoint returned status {status}")),
317            });
318        }
319
320        // Fast-reject when the endpoint advertises an oversized body, before
321        // opening the destination file at all.
322        if let Some(len) = response.content_length()
323            && len > self.config.max_file_size_bytes
324        {
325            return Ok(ToolResult {
326                success: false,
327                output: String::new(),
328                error: Some(format!(
329                    "Download too large: endpoint reports {len} bytes (limit: {} bytes)",
330                    self.config.max_file_size_bytes
331                )),
332            });
333        }
334
335        // Stream into a temp file in the destination directory so a failed or
336        // oversized transfer never leaves a partial artifact at `dest`; on
337        // success the rename is atomic within the same directory.
338        let nanos = SystemTime::now()
339            .duration_since(UNIX_EPOCH)
340            .map(|d| d.as_nanos())
341            .unwrap_or(0);
342        let temp_path = canonical_parent.join(format!(".{file_name}.part-{nanos}"));
343
344        match Self::stream_to_temp(response, &temp_path, self.config.max_file_size_bytes).await {
345            Ok(written) => match tokio::fs::rename(&temp_path, &dest).await {
346                Ok(()) => {
347                    let output = format!("Downloaded {written} bytes to {dest_path} ({status})");
348                    // The download landed in an ephemeral workspace and will not
349                    // reach the host — warn loudly rather than report a bare
350                    // success (issue #4627).
351                    let output = if self.persistent_writes {
352                        output
353                    } else {
354                        with_ephemeral_workspace_warning(&output)
355                    };
356                    Ok(ToolResult {
357                        success: true,
358                        output,
359                        error: None,
360                    })
361                }
362                Err(e) => {
363                    let _ = tokio::fs::remove_file(&temp_path).await;
364                    Ok(ToolResult {
365                        success: false,
366                        output: String::new(),
367                        error: Some(format!("Failed to move downloaded file into place: {e}")),
368                    })
369                }
370            },
371            Err(msg) => {
372                let _ = tokio::fs::remove_file(&temp_path).await;
373                Ok(ToolResult {
374                    success: false,
375                    output: String::new(),
376                    error: Some(msg),
377                })
378            }
379        }
380    }
381}
382
383#[cfg(test)]
384mod tests {
385    use super::*;
386    use std::collections::HashMap;
387    use std::fs;
388    use std::path::PathBuf;
389    use tempfile::TempDir;
390    use wiremock::matchers::{header, method, path, query_param};
391    use wiremock::{Mock, MockServer, ResponseTemplate};
392    use zeroclaw_config::autonomy::AutonomyLevel;
393
394    fn test_security(workspace: PathBuf, level: AutonomyLevel) -> Arc<SecurityPolicy> {
395        Arc::new(SecurityPolicy {
396            autonomy: level,
397            max_actions_per_hour: 100,
398            workspace_dir: workspace,
399            ..SecurityPolicy::default()
400        })
401    }
402
403    fn cfg(url: Option<String>) -> FileDownloadConfig {
404        FileDownloadConfig {
405            url,
406            ..FileDownloadConfig::default()
407        }
408    }
409
410    /// Count files in `dir` whose name marks an in-progress download temp file.
411    fn part_files(dir: &Path) -> Vec<PathBuf> {
412        fs::read_dir(dir)
413            .unwrap()
414            .filter_map(|e| e.ok().map(|e| e.path()))
415            .filter(|p| {
416                p.file_name()
417                    .and_then(|s| s.to_str())
418                    .is_some_and(|n| n.contains(".part-"))
419            })
420            .collect()
421    }
422
423    #[test]
424    fn tool_name_and_description() {
425        let tmp = TempDir::new().unwrap();
426        let tool = FileDownloadTool::new(
427            test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
428            cfg(Some("https://example.com/download".into())),
429        );
430        assert_eq!(tool.name(), "file_download");
431        assert!(!tool.description().is_empty());
432    }
433
434    #[test]
435    fn schema_requires_document_id_and_dest_path() {
436        let tmp = TempDir::new().unwrap();
437        let tool = FileDownloadTool::new(
438            test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
439            cfg(Some("https://example.com/download".into())),
440        );
441        let schema = tool.parameters_schema();
442        assert_eq!(schema["type"], "object");
443        let required = schema["required"].as_array().unwrap();
444        assert!(required.contains(&serde_json::Value::String("document_id".into())));
445        assert!(required.contains(&serde_json::Value::String("dest_path".into())));
446    }
447
448    #[tokio::test]
449    async fn execute_fails_when_url_unset() {
450        let tmp = TempDir::new().unwrap();
451        let tool = FileDownloadTool::new(
452            test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
453            cfg(None),
454        );
455
456        let result = tool
457            .execute(json!({ "document_id": "doc-1", "dest_path": "out.bin" }))
458            .await
459            .unwrap();
460        assert!(!result.success);
461        assert!(result.error.unwrap().contains("disabled"));
462        assert!(!tmp.path().join("out.bin").exists());
463    }
464
465    #[tokio::test]
466    async fn execute_blocks_readonly_autonomy() {
467        let tmp = TempDir::new().unwrap();
468        let tool = FileDownloadTool::new(
469            test_security(tmp.path().to_path_buf(), AutonomyLevel::ReadOnly),
470            cfg(Some("https://example.com/download".into())),
471        );
472
473        let result = tool
474            .execute(json!({ "document_id": "doc-1", "dest_path": "out.bin" }))
475            .await
476            .unwrap();
477        assert!(!result.success);
478        assert!(result.error.unwrap().contains("read-only"));
479        assert!(!tmp.path().join("out.bin").exists());
480    }
481
482    #[tokio::test]
483    async fn execute_errors_on_missing_arguments() {
484        let tmp = TempDir::new().unwrap();
485        let tool = FileDownloadTool::new(
486            test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
487            cfg(Some("https://example.com/download".into())),
488        );
489
490        assert!(
491            tool.execute(json!({ "dest_path": "out.bin" }))
492                .await
493                .is_err()
494        );
495        assert!(
496            tool.execute(json!({ "document_id": "doc-1" }))
497                .await
498                .is_err()
499        );
500        // Present-but-empty values are treated the same as missing.
501        assert!(
502            tool.execute(json!({ "document_id": "  ", "dest_path": "out.bin" }))
503                .await
504                .is_err()
505        );
506    }
507
508    #[tokio::test]
509    async fn execute_rejects_traversal_dest_path() {
510        let tmp = TempDir::new().unwrap();
511        let tool = FileDownloadTool::new(
512            test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
513            cfg(Some("https://example.com/download".into())),
514        );
515
516        // A dest_path that terminates in `..` has no concrete file name.
517        let result = tool
518            .execute(json!({ "document_id": "doc-1", "dest_path": "nested/.." }))
519            .await
520            .unwrap();
521        assert!(!result.success);
522        assert!(result.error.unwrap().contains("concrete file name"));
523    }
524
525    #[tokio::test]
526    async fn execute_rejects_dest_outside_workspace() {
527        let server = MockServer::start().await;
528        let workspace = TempDir::new().unwrap();
529        let outside = TempDir::new().unwrap();
530
531        // The endpoint must never be contacted when the destination is rejected.
532        Mock::given(method("GET"))
533            .and(path("/download"))
534            .respond_with(ResponseTemplate::new(200).set_body_bytes(b"should-not-arrive".to_vec()))
535            .expect(0)
536            .mount(&server)
537            .await;
538
539        let dest_abs = outside.path().join("escape.bin");
540        let config = FileDownloadConfig {
541            url: Some(format!("{}/download", server.uri())),
542            ..FileDownloadConfig::default()
543        };
544        let tool = FileDownloadTool::new(
545            test_security(workspace.path().to_path_buf(), AutonomyLevel::Full),
546            config,
547        );
548
549        let result = tool
550            .execute(json!({
551                "document_id": "doc-1",
552                "dest_path": dest_abs.to_string_lossy(),
553            }))
554            .await
555            .unwrap();
556
557        assert!(!result.success);
558        assert!(
559            !dest_abs.exists(),
560            "no file should be written outside workspace"
561        );
562    }
563
564    #[tokio::test]
565    async fn execute_downloads_file_to_dest() {
566        let server = MockServer::start().await;
567        let tmp = TempDir::new().unwrap();
568        let body = b"the-downloaded-bytes-\x00\x01\x02".to_vec();
569
570        Mock::given(method("GET"))
571            .and(path("/download"))
572            .and(query_param("document_id", "doc-123"))
573            .respond_with(ResponseTemplate::new(200).set_body_bytes(body.clone()))
574            .expect(1)
575            .mount(&server)
576            .await;
577
578        let config = FileDownloadConfig {
579            url: Some(format!("{}/download", server.uri())),
580            ..FileDownloadConfig::default()
581        };
582        let tool = FileDownloadTool::new(
583            test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
584            config,
585        );
586
587        let result = tool
588            .execute(json!({ "document_id": "doc-123", "dest_path": "out.bin" }))
589            .await
590            .unwrap();
591
592        assert!(result.success, "expected success, got {result:?}");
593        let written = fs::read(tmp.path().join("out.bin")).unwrap();
594        assert_eq!(written, body);
595        assert!(result.output.contains("out.bin"));
596        assert!(
597            part_files(tmp.path()).is_empty(),
598            "temp file must be cleaned up"
599        );
600    }
601
602    /// On an ephemeral runtime a successful download lands in a workspace that
603    /// won't persist; the output must carry the loud warning while preserving
604    /// the original status, and the bytes must still be written (issue #4627).
605    #[tokio::test]
606    async fn execute_warns_on_ephemeral_workspace() {
607        let server = MockServer::start().await;
608        let tmp = TempDir::new().unwrap();
609        let body = b"downloaded-bytes".to_vec();
610
611        Mock::given(method("GET"))
612            .and(path("/download"))
613            .and(query_param("document_id", "doc-eph"))
614            .respond_with(ResponseTemplate::new(200).set_body_bytes(body.clone()))
615            .expect(1)
616            .mount(&server)
617            .await;
618
619        let config = FileDownloadConfig {
620            url: Some(format!("{}/download", server.uri())),
621            ..FileDownloadConfig::default()
622        };
623        let tool = FileDownloadTool::new_with_persistence(
624            test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
625            config,
626            false,
627        );
628
629        let result = tool
630            .execute(json!({ "document_id": "doc-eph", "dest_path": "out.bin" }))
631            .await
632            .unwrap();
633
634        assert!(result.success, "expected success, got {result:?}");
635        assert!(
636            result.output.contains("EPHEMERAL WORKSPACE"),
637            "ephemeral warning must be present, got: {}",
638            result.output
639        );
640        assert!(result.output.contains("mount_workspace"));
641        assert!(
642            result.output.contains("out.bin"),
643            "original download status must be preserved, got: {}",
644            result.output
645        );
646        assert_eq!(fs::read(tmp.path().join("out.bin")).unwrap(), body);
647    }
648
649    #[tokio::test]
650    async fn execute_sends_configured_bearer_header() {
651        let server = MockServer::start().await;
652        let tmp = TempDir::new().unwrap();
653
654        Mock::given(method("GET"))
655            .and(path("/download"))
656            .and(header("Authorization", "Bearer secret-token"))
657            .respond_with(ResponseTemplate::new(200).set_body_bytes(b"ok".to_vec()))
658            .expect(1)
659            .mount(&server)
660            .await;
661
662        let mut headers = HashMap::new();
663        headers.insert("Authorization".into(), "Bearer secret-token".into());
664        let config = FileDownloadConfig {
665            url: Some(format!("{}/download", server.uri())),
666            headers,
667            ..FileDownloadConfig::default()
668        };
669        let tool = FileDownloadTool::new(
670            test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
671            config,
672        );
673
674        let result = tool
675            .execute(json!({ "document_id": "doc-1", "dest_path": "out.bin" }))
676            .await
677            .unwrap();
678
679        // The mock only matches when the Bearer header is present, so success
680        // proves the configured header was attached to the request.
681        assert!(result.success, "expected success, got {result:?}");
682        assert_eq!(fs::read(tmp.path().join("out.bin")).unwrap(), b"ok");
683    }
684
685    #[tokio::test]
686    async fn execute_reports_non_2xx_without_writing() {
687        let server = MockServer::start().await;
688        let tmp = TempDir::new().unwrap();
689
690        Mock::given(method("GET"))
691            .and(path("/download"))
692            .respond_with(ResponseTemplate::new(404).set_body_string("not_found"))
693            .expect(1)
694            .mount(&server)
695            .await;
696
697        let config = FileDownloadConfig {
698            url: Some(format!("{}/download", server.uri())),
699            ..FileDownloadConfig::default()
700        };
701        let tool = FileDownloadTool::new(
702            test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
703            config,
704        );
705
706        let result = tool
707            .execute(json!({ "document_id": "missing", "dest_path": "out.bin" }))
708            .await
709            .unwrap();
710
711        assert!(!result.success);
712        assert!(result.error.unwrap().contains("404"));
713        assert!(!tmp.path().join("out.bin").exists());
714        assert!(part_files(tmp.path()).is_empty());
715    }
716
717    #[tokio::test]
718    async fn execute_rejects_oversized_via_content_length() {
719        let server = MockServer::start().await;
720        let tmp = TempDir::new().unwrap();
721
722        // Body of 2048 bytes; wiremock serves it with a Content-Length header.
723        Mock::given(method("GET"))
724            .and(path("/download"))
725            .respond_with(ResponseTemplate::new(200).set_body_bytes(vec![0u8; 2048]))
726            .mount(&server)
727            .await;
728
729        let mut config = FileDownloadConfig {
730            url: Some(format!("{}/download", server.uri())),
731            ..FileDownloadConfig::default()
732        };
733        config.max_file_size_bytes = 1024;
734        let tool = FileDownloadTool::new(
735            test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
736            config,
737        );
738
739        let result = tool
740            .execute(json!({ "document_id": "big", "dest_path": "out.bin" }))
741            .await
742            .unwrap();
743
744        assert!(!result.success);
745        // The advertised Content-Length must trigger the fast pre-stream reject.
746        assert!(
747            result.error.unwrap().contains("endpoint reports"),
748            "expected the Content-Length fast-reject path"
749        );
750        assert!(!tmp.path().join("out.bin").exists());
751        assert!(
752            part_files(tmp.path()).is_empty(),
753            "no partial file may remain"
754        );
755    }
756
757    #[tokio::test]
758    async fn execute_rejects_oversized_while_streaming_without_content_length() {
759        let server = MockServer::start().await;
760        let tmp = TempDir::new().unwrap();
761
762        // `Transfer-Encoding: chunked` makes the served response omit
763        // Content-Length, so the size ceiling can only be enforced by the
764        // streaming accumulator rather than the fast Content-Length check.
765        Mock::given(method("GET"))
766            .and(path("/download"))
767            .respond_with(
768                ResponseTemplate::new(200)
769                    .insert_header("Transfer-Encoding", "chunked")
770                    .set_body_bytes(vec![0u8; 4096]),
771            )
772            .mount(&server)
773            .await;
774
775        let mut config = FileDownloadConfig {
776            url: Some(format!("{}/download", server.uri())),
777            ..FileDownloadConfig::default()
778        };
779        config.max_file_size_bytes = 1024;
780        let tool = FileDownloadTool::new(
781            test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
782            config,
783        );
784
785        let result = tool
786            .execute(json!({ "document_id": "big", "dest_path": "out.bin" }))
787            .await
788            .unwrap();
789
790        assert!(!result.success);
791        // With no Content-Length, only the streaming accumulator can catch the
792        // overage, which emits this distinct message.
793        assert!(
794            result.error.unwrap().contains("exceeded limit"),
795            "expected the streaming size-cap path"
796        );
797        assert!(!tmp.path().join("out.bin").exists());
798        assert!(
799            part_files(tmp.path()).is_empty(),
800            "no partial file may remain"
801        );
802    }
803
804    #[tokio::test]
805    async fn execute_does_not_follow_redirects_from_configured_endpoint() {
806        let server = MockServer::start().await;
807        let tmp = TempDir::new().unwrap();
808
809        // The configured endpoint returns a 302 pointing at a sibling path.
810        // With redirects disabled, the tool must surface the 302 itself as a
811        // non-success status and must never contact the redirect target.
812        Mock::given(method("GET"))
813            .and(path("/download"))
814            .respond_with(
815                ResponseTemplate::new(302)
816                    .insert_header("location", format!("{}/elsewhere", server.uri())),
817            )
818            .expect(1)
819            .mount(&server)
820            .await;
821        Mock::given(method("GET"))
822            .and(path("/elsewhere"))
823            .respond_with(ResponseTemplate::new(200).set_body_bytes(b"redirected-bytes".to_vec()))
824            .expect(0)
825            .mount(&server)
826            .await;
827
828        let config = FileDownloadConfig {
829            url: Some(format!("{}/download", server.uri())),
830            ..FileDownloadConfig::default()
831        };
832        let tool = FileDownloadTool::new(
833            test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
834            config,
835        );
836
837        let result = tool
838            .execute(json!({ "document_id": "doc-1", "dest_path": "out.bin" }))
839            .await
840            .unwrap();
841
842        assert!(!result.success);
843        assert!(
844            result.error.as_deref().unwrap_or("").contains("302"),
845            "expected the 302 status to surface; got {result:?}"
846        );
847        assert!(
848            !tmp.path().join("out.bin").exists(),
849            "no file may be written when the configured endpoint returns 3xx"
850        );
851        assert!(
852            part_files(tmp.path()).is_empty(),
853            "no partial file may remain after a 3xx response"
854        );
855    }
856
857    #[tokio::test]
858    async fn execute_truncates_non_ascii_error_body_safely() {
859        let server = MockServer::start().await;
860        let tmp = TempDir::new().unwrap();
861
862        // Build a non-2xx body that is longer than RESPONSE_BODY_LIMIT_BYTES
863        // (4096) and where the byte at offset 4096 lands inside a multi-byte
864        // UTF-8 sequence. Pre-truncation pad — 4094 ASCII bytes — places the
865        // first byte of the next 3-byte character ("界") at offset 4094, so
866        // offset 4096 lies in the middle of that code point.
867        let mut body = "x".repeat(4094);
868        body.push_str("世界世界世界世界世界世界");
869        assert!(!body.is_char_boundary(4096));
870
871        Mock::given(method("GET"))
872            .and(path("/download"))
873            .respond_with(ResponseTemplate::new(500).set_body_string(body.clone()))
874            .expect(1)
875            .mount(&server)
876            .await;
877
878        let config = FileDownloadConfig {
879            url: Some(format!("{}/download", server.uri())),
880            ..FileDownloadConfig::default()
881        };
882        let tool = FileDownloadTool::new(
883            test_security(tmp.path().to_path_buf(), AutonomyLevel::Full),
884            config,
885        );
886
887        // Must not panic when slicing the body at a non-char-boundary byte
888        // index. The truncated output must still be valid UTF-8 and must
889        // include the "[truncated ...]" marker.
890        let result = tool
891            .execute(json!({ "document_id": "doc-1", "dest_path": "out.bin" }))
892            .await
893            .unwrap();
894
895        assert!(!result.success);
896        assert!(result.error.as_deref().unwrap_or("").contains("500"));
897        assert!(result.output.contains("[truncated"));
898        assert!(
899            result.output.len() < body.len(),
900            "expected the body to be shortened"
901        );
902        assert!(!tmp.path().join("out.bin").exists());
903    }
904}