Skip to main content

zeroclaw_tools/
web_search_tool.rs

1use super::web_search_provider_routing::{WebSearchProviderRoute, resolve_web_search_provider};
2use async_trait::async_trait;
3use regex::Regex;
4use serde_json::json;
5use std::path::{Path, PathBuf};
6use std::time::Duration;
7use zeroclaw_api::tool::{Tool, ToolResult};
8
9/// Web search tool for searching the internet.
10/// Supports multiple model_providers: DuckDuckGo (free), Brave (requires API key),
11/// Tavily (requires API key), SearXNG (self-hosted, requires instance URL).
12///
13/// API keys are resolved lazily at execution time: if the boot-time key
14/// is missing or still encrypted, the tool re-reads `config.toml`, decrypts the
15/// corresponding `[web_search]` field, and uses the result. This ensures that
16/// keys set or rotated after boot, and encrypted keys, are correctly picked up.
17pub struct WebSearchTool {
18    /// ModelProvider selector as configured by user. Routed via model_provider aliases at runtime.
19    model_provider: String,
20    /// Boot-time key snapshot (may be `None` if not yet configured at startup).
21    boot_brave_api_key: Option<String>,
22    /// Boot-time Tavily key snapshot.
23    boot_tavily_api_key: Option<String>,
24    /// SearXNG instance base URL (e.g. `"https://searx.example.com"`).
25    searxng_instance_url: Option<String>,
26    max_results: usize,
27    timeout_secs: u64,
28    /// Path to `config.toml` for lazy re-read of keys at execution time.
29    config_path: PathBuf,
30    /// Whether secret encryption is enabled (needed to create a `SecretStore`).
31    secrets_encrypt: bool,
32}
33
34impl WebSearchTool {
35    pub fn new(
36        model_provider: String,
37        brave_api_key: Option<String>,
38        max_results: usize,
39        timeout_secs: u64,
40    ) -> Self {
41        Self {
42            model_provider: model_provider.trim().to_lowercase(),
43            boot_brave_api_key: brave_api_key,
44            boot_tavily_api_key: None,
45            searxng_instance_url: None,
46            max_results: max_results.clamp(1, 10),
47            timeout_secs: timeout_secs.max(1),
48            config_path: PathBuf::new(),
49            secrets_encrypt: false,
50        }
51    }
52
53    /// Create a `WebSearchTool` with config-reload and decryption support.
54    ///
55    /// `config_path` is the path to `config.toml` so the tool can re-read API
56    /// keys at execution time. `secrets_encrypt` controls whether the keys are
57    /// decrypted via `SecretStore`.
58    #[allow(clippy::too_many_arguments)]
59    pub fn new_with_config(
60        model_provider: String,
61        brave_api_key: Option<String>,
62        tavily_api_key: Option<String>,
63        searxng_instance_url: Option<String>,
64        max_results: usize,
65        timeout_secs: u64,
66        config_path: PathBuf,
67        secrets_encrypt: bool,
68    ) -> Self {
69        Self {
70            model_provider: model_provider.trim().to_lowercase(),
71            boot_brave_api_key: brave_api_key,
72            boot_tavily_api_key: tavily_api_key,
73            searxng_instance_url,
74            max_results: max_results.clamp(1, 10),
75            timeout_secs: timeout_secs.max(1),
76            config_path,
77            secrets_encrypt,
78        }
79    }
80
81    /// Resolve the Brave API key, preferring the boot-time value but falling
82    /// back to a fresh config read + decryption when the boot-time value is
83    /// absent.
84    fn resolve_brave_api_key(&self) -> anyhow::Result<String> {
85        // Fast path: boot-time key is present and usable (not an encrypted blob).
86        if let Some(ref key) = self.boot_brave_api_key
87            && !key.is_empty()
88            && !zeroclaw_config::secrets::SecretStore::is_encrypted(key)
89        {
90            return Ok(key.clone());
91        }
92
93        // Slow path: re-read config.toml to pick up keys set/rotated after boot.
94        self.reload_brave_api_key()
95    }
96
97    /// Re-read `config.toml` and decrypt `[web_search] brave_api_key`.
98    fn reload_brave_api_key(&self) -> anyhow::Result<String> {
99        let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
100            ::zeroclaw_log::record!(
101                ERROR,
102                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
103                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
104                    .with_attrs(::serde_json::json!({
105                        "path": self.config_path.display().to_string(),
106                        "search_provider": "brave",
107                        "error": format!("{}", e),
108                    })),
109                "web_search: failed to read config for Brave API key"
110            );
111            anyhow::Error::msg(format!(
112                "Failed to read config file {} for Brave API key: {e}",
113                self.config_path.display()
114            ))
115        })?;
116
117        let config: zeroclaw_config::schema::Config = toml::from_str(&contents).map_err(|e| {
118            ::zeroclaw_log::record!(
119                ERROR,
120                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
121                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
122                    .with_attrs(::serde_json::json!({
123                        "path": self.config_path.display().to_string(),
124                        "search_provider": "brave",
125                        "error": format!("{}", e),
126                    })),
127                "web_search: failed to parse config for Brave API key"
128            );
129            anyhow::Error::msg(format!(
130                "Failed to parse config file {} for Brave API key: {e}",
131                self.config_path.display()
132            ))
133        })?;
134
135        let raw_key = config
136            .web_search
137            .brave_api_key
138            .filter(|k| !k.is_empty())
139            .ok_or_else(|| {
140                ::zeroclaw_log::record!(
141                    ERROR,
142                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
143                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
144                        .with_attrs(::serde_json::json!({"search_provider": "brave"})),
145                    "web_search: Brave API key not configured"
146                );
147                anyhow::Error::msg("Brave API key not configured")
148            })?;
149
150        // Decrypt if necessary.
151        if zeroclaw_config::secrets::SecretStore::is_encrypted(&raw_key) {
152            let zeroclaw_dir = self.config_path.parent().unwrap_or_else(|| Path::new("."));
153            let store =
154                zeroclaw_config::secrets::SecretStore::new(zeroclaw_dir, self.secrets_encrypt);
155            let plaintext = store.decrypt(&raw_key)?;
156            if plaintext.is_empty() {
157                anyhow::bail!("Brave API key not configured (decrypted value is empty)");
158            }
159            Ok(plaintext)
160        } else {
161            Ok(raw_key)
162        }
163    }
164
165    async fn search_duckduckgo(&self, query: &str) -> anyhow::Result<String> {
166        self.search_duckduckgo_at("https://html.duckduckgo.com/html/", query)
167            .await
168    }
169
170    /// Inner DuckDuckGo request implementation, parameterized on the endpoint URL
171    /// so request-flow tests can target a local mock server. Production calls
172    /// always go through [`Self::search_duckduckgo`].
173    async fn search_duckduckgo_at(
174        &self,
175        endpoint_url: &str,
176        query: &str,
177    ) -> anyhow::Result<String> {
178        let encoded_query = urlencoding::encode(query);
179        let search_url = format!("{}?q={}", endpoint_url, encoded_query);
180
181        let builder = reqwest::Client::builder()
182            .timeout(Duration::from_secs(self.timeout_secs))
183            .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
184        let builder =
185            zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
186        let client = builder.build()?;
187
188        let response = client.get(&search_url).send().await?;
189        let status = response.status();
190        let final_url_is_block =
191            contains_ascii_case_insensitive(response.url().as_str(), "/wr.do?");
192
193        if !status.is_success() {
194            if let Some(message) = duckduckgo_block_message(status, final_url_is_block, false) {
195                anyhow::bail!(message);
196            }
197            anyhow::bail!("DuckDuckGo search failed with status: {}", status);
198        }
199
200        let html = response.text().await?;
201        let html_contains_block = contains_ascii_case_insensitive(&html, "/wr.do?")
202            || contains_ascii_case_insensitive(&html, "anomaly-modal");
203        if let Some(message) =
204            duckduckgo_block_message(status, final_url_is_block, html_contains_block)
205        {
206            anyhow::bail!(message);
207        }
208        self.parse_duckduckgo_results(&html, query)
209    }
210
211    fn parse_duckduckgo_results(&self, html: &str, query: &str) -> anyhow::Result<String> {
212        // Extract result links: <a class="result__a" href="...">Title</a>
213        let link_regex = Regex::new(
214            r#"<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)</a>"#,
215        )?;
216
217        // Extract snippets: <a class="result__snippet">...</a>
218        let snippet_regex = Regex::new(r#"<a class="result__snippet[^"]*"[^>]*>([\s\S]*?)</a>"#)?;
219
220        let link_matches: Vec<_> = link_regex
221            .captures_iter(html)
222            .take(self.max_results + 2)
223            .collect();
224
225        let snippet_matches: Vec<_> = snippet_regex
226            .captures_iter(html)
227            .take(self.max_results + 2)
228            .collect();
229
230        if link_matches.is_empty() {
231            return Ok(format!("No results found for: {}", query));
232        }
233
234        let mut lines = vec![format!("Search results for: {} (via DuckDuckGo)", query)];
235
236        let count = link_matches.len().min(self.max_results);
237
238        for i in 0..count {
239            let caps = &link_matches[i];
240            let url_str = decode_ddg_redirect_url(&caps[1]);
241            let title = strip_tags(&caps[2]);
242
243            lines.push(format!("{}. {}", i + 1, title.trim()));
244            lines.push(format!("   {}", url_str.trim()));
245
246            // Add snippet if available
247            if i < snippet_matches.len() {
248                let snippet = strip_tags(&snippet_matches[i][1]);
249                let snippet = snippet.trim();
250                if !snippet.is_empty() {
251                    lines.push(format!("   {}", snippet));
252                }
253            }
254        }
255
256        Ok(lines.join("\n"))
257    }
258
259    async fn search_brave(&self, query: &str) -> anyhow::Result<String> {
260        let api_key = self.resolve_brave_api_key()?;
261
262        let encoded_query = urlencoding::encode(query);
263        let search_url = format!(
264            "https://api.search.brave.com/res/v1/web/search?q={}&count={}",
265            encoded_query, self.max_results
266        );
267
268        let builder = reqwest::Client::builder().timeout(Duration::from_secs(self.timeout_secs));
269        let builder =
270            zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
271        let client = builder.build()?;
272
273        let response = client
274            .get(&search_url)
275            .header("Accept", "application/json")
276            .header("X-Subscription-Token", &api_key)
277            .send()
278            .await?;
279
280        if !response.status().is_success() {
281            anyhow::bail!("Brave search failed with status: {}", response.status());
282        }
283
284        let json: serde_json::Value = response.json().await?;
285        self.parse_brave_results(&json, query)
286    }
287
288    /// Resolve the Tavily API key from the boot-time snapshot, falling back
289    /// to a fresh config read + decryption when the boot-time value is absent.
290    fn resolve_tavily_api_key(&self) -> anyhow::Result<String> {
291        if let Some(ref key) = self.boot_tavily_api_key
292            && !key.is_empty()
293            && !zeroclaw_config::secrets::SecretStore::is_encrypted(key)
294        {
295            return Ok(key.clone());
296        }
297        self.reload_tavily_api_key()
298    }
299
300    /// Re-read `config.toml` and decrypt `[web_search] tavily_api_key`.
301    fn reload_tavily_api_key(&self) -> anyhow::Result<String> {
302        let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
303            ::zeroclaw_log::record!(
304                ERROR,
305                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
306                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
307                    .with_attrs(::serde_json::json!({
308                        "path": self.config_path.display().to_string(),
309                        "search_provider": "tavily",
310                        "error": format!("{}", e),
311                    })),
312                "web_search: failed to read config for Tavily API key"
313            );
314            anyhow::Error::msg(format!(
315                "Failed to read config file {} for Tavily API key: {e}",
316                self.config_path.display()
317            ))
318        })?;
319
320        let config: zeroclaw_config::schema::Config = toml::from_str(&contents).map_err(|e| {
321            ::zeroclaw_log::record!(
322                ERROR,
323                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
324                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
325                    .with_attrs(::serde_json::json!({
326                        "path": self.config_path.display().to_string(),
327                        "search_provider": "tavily",
328                        "error": format!("{}", e),
329                    })),
330                "web_search: failed to parse config for Tavily API key"
331            );
332            anyhow::Error::msg(format!(
333                "Failed to parse config file {} for Tavily API key: {e}",
334                self.config_path.display()
335            ))
336        })?;
337
338        let raw_key = config
339            .web_search
340            .tavily_api_key
341            .filter(|k| !k.is_empty())
342            .ok_or_else(|| {
343                ::zeroclaw_log::record!(
344                    ERROR,
345                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
346                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
347                        .with_attrs(::serde_json::json!({"search_provider": "tavily"})),
348                    "web_search: Tavily API key not configured"
349                );
350                anyhow::Error::msg("Tavily API key not configured")
351            })?;
352
353        if zeroclaw_config::secrets::SecretStore::is_encrypted(&raw_key) {
354            let zeroclaw_dir = self.config_path.parent().unwrap_or_else(|| Path::new("."));
355            let store =
356                zeroclaw_config::secrets::SecretStore::new(zeroclaw_dir, self.secrets_encrypt);
357            let plaintext = store.decrypt(&raw_key)?;
358            if plaintext.is_empty() {
359                anyhow::bail!("Tavily API key not configured (decrypted value is empty)");
360            }
361            Ok(plaintext)
362        } else {
363            Ok(raw_key)
364        }
365    }
366
367    async fn search_tavily(&self, query: &str) -> anyhow::Result<String> {
368        let client = self.build_tavily_client()?;
369        self.search_tavily_with_client(&client, "https://api.tavily.com/search", query)
370            .await
371    }
372
373    /// Build the production HTTP client for Tavily, wired through the
374    /// process-global runtime proxy state. Extracted so the
375    /// `search_tavily_with_client` test path can substitute a fresh
376    /// client and stay isolated from concurrent tests that mutate
377    /// `RUNTIME_PROXY_CONFIG` (a request built off a stale "enabled"
378    /// proxy snapshot otherwise routes through a non-existent proxy
379    /// and the wiremock connection fails).
380    fn build_tavily_client(&self) -> anyhow::Result<reqwest::Client> {
381        let builder = reqwest::Client::builder().timeout(Duration::from_secs(self.timeout_secs));
382        let builder =
383            zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
384        Ok(builder.build()?)
385    }
386
387    /// Inner Tavily request implementation, parameterized on the HTTP
388    /// client and endpoint URL so request-shape tests can target a local
389    /// mock server with a client that doesn't read process-global proxy
390    /// state. Production calls always go through [`Self::search_tavily`].
391    async fn search_tavily_with_client(
392        &self,
393        client: &reqwest::Client,
394        url: &str,
395        query: &str,
396    ) -> anyhow::Result<String> {
397        let api_key = self.resolve_tavily_api_key()?;
398
399        // Tavily authenticates via `Authorization: Bearer <key>` per
400        // https://docs.tavily.com/documentation/api-reference/endpoint/search
401        // (the API also tolerates `api_key` in the body for legacy clients,
402        // but bearer-header is the documented contract).
403        let body = serde_json::json!({
404            "query": query,
405            "max_results": self.max_results,
406            "search_depth": "basic",
407            "include_answer": false,
408            "include_raw_content": false,
409        });
410
411        let response = client
412            .post(url)
413            .bearer_auth(&api_key)
414            .json(&body)
415            .send()
416            .await?;
417
418        if !response.status().is_success() {
419            anyhow::bail!("Tavily search failed with status: {}", response.status());
420        }
421
422        let json: serde_json::Value = response.json().await?;
423        self.parse_tavily_results(&json, query)
424    }
425
426    fn parse_tavily_results(
427        &self,
428        json: &serde_json::Value,
429        query: &str,
430    ) -> anyhow::Result<String> {
431        let results = json
432            .get("results")
433            .and_then(|r| r.as_array())
434            .ok_or_else(|| {
435                ::zeroclaw_log::record!(
436                    ERROR,
437                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
438                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
439                        .with_attrs(::serde_json::json!({"search_provider": "tavily"})),
440                    "web_search: invalid Tavily response"
441                );
442                anyhow::Error::msg("Invalid Tavily API response")
443            })?;
444
445        if results.is_empty() {
446            return Ok(format!("No results found for: {}", query));
447        }
448
449        let mut lines = vec![format!("Search results for: {} (via Tavily)", query)];
450
451        for (i, result) in results.iter().take(self.max_results).enumerate() {
452            let title = result
453                .get("title")
454                .and_then(|t| t.as_str())
455                .unwrap_or("No title");
456            let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
457            // Tavily returns a pre-cleaned `content` field (not just a snippet),
458            // so it doubles as the description for the LLM caller.
459            let content = result.get("content").and_then(|c| c.as_str()).unwrap_or("");
460
461            lines.push(format!("{}. {}", i + 1, title));
462            lines.push(format!("   {}", url));
463            if !content.is_empty() {
464                lines.push(format!("   {}", content));
465            }
466        }
467
468        Ok(lines.join("\n"))
469    }
470
471    fn parse_brave_results(&self, json: &serde_json::Value, query: &str) -> anyhow::Result<String> {
472        let results = json
473            .get("web")
474            .and_then(|w| w.get("results"))
475            .and_then(|r| r.as_array())
476            .ok_or_else(|| {
477                ::zeroclaw_log::record!(
478                    ERROR,
479                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
480                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
481                        .with_attrs(::serde_json::json!({"search_provider": "brave"})),
482                    "web_search: invalid Brave response"
483                );
484                anyhow::Error::msg("Invalid Brave API response")
485            })?;
486
487        if results.is_empty() {
488            return Ok(format!("No results found for: {}", query));
489        }
490
491        let mut lines = vec![format!("Search results for: {} (via Brave)", query)];
492
493        for (i, result) in results.iter().take(self.max_results).enumerate() {
494            let title = result
495                .get("title")
496                .and_then(|t| t.as_str())
497                .unwrap_or("No title");
498            let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
499            let description = result
500                .get("description")
501                .and_then(|d| d.as_str())
502                .unwrap_or("");
503
504            lines.push(format!("{}. {}", i + 1, title));
505            lines.push(format!("   {}", url));
506            if !description.is_empty() {
507                lines.push(format!("   {}", description));
508            }
509        }
510
511        Ok(lines.join("\n"))
512    }
513
514    /// Resolve the SearXNG instance URL from the boot-time config or by
515    /// re-reading `config.toml` at runtime.
516    fn resolve_searxng_instance_url(&self) -> anyhow::Result<String> {
517        if let Some(ref url) = self.searxng_instance_url
518            && !url.is_empty()
519        {
520            return Ok(url.clone());
521        }
522
523        // Slow path: re-read config.toml to pick up values set after boot.
524        let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
525            ::zeroclaw_log::record!(
526                ERROR,
527                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
528                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
529                    .with_attrs(::serde_json::json!({
530                        "path": self.config_path.display().to_string(),
531                        "search_provider": "searxng",
532                        "error": format!("{}", e),
533                    })),
534                "web_search: failed to read config for SearXNG URL"
535            );
536            anyhow::Error::msg(format!(
537                "Failed to read config file {} for SearXNG instance URL: {e}",
538                self.config_path.display()
539            ))
540        })?;
541
542        let config: zeroclaw_config::schema::Config = toml::from_str(&contents).map_err(|e| {
543            ::zeroclaw_log::record!(
544                ERROR,
545                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
546                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
547                    .with_attrs(::serde_json::json!({
548                        "path": self.config_path.display().to_string(),
549                        "search_provider": "searxng",
550                        "error": format!("{}", e),
551                    })),
552                "web_search: failed to parse config for SearXNG URL"
553            );
554            anyhow::Error::msg(format!(
555                "Failed to parse config file {} for SearXNG instance URL: {e}",
556                self.config_path.display()
557            ))
558        })?;
559
560        config
561            .web_search
562            .searxng_instance_url
563            .filter(|u| !u.is_empty())
564            .ok_or_else(|| {
565                ::zeroclaw_log::record!(
566                    ERROR,
567                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
568                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
569                        .with_attrs(::serde_json::json!({"search_provider": "searxng"})),
570                    "web_search: SearXNG instance URL not configured"
571                );
572                anyhow::Error::msg(
573                    "SearXNG instance URL not configured. Set [web_search] searxng_instance_url \
574                     in config.toml or the SEARXNG_INSTANCE_URL environment variable.",
575                )
576            })
577    }
578
579    async fn search_searxng(&self, query: &str) -> anyhow::Result<String> {
580        let instance_url = self.resolve_searxng_instance_url()?;
581        let base_url = instance_url.trim_end_matches('/');
582
583        let encoded_query = urlencoding::encode(query);
584        let search_url = format!(
585            "{}/search?q={}&format=json&pageno=1",
586            base_url, encoded_query
587        );
588
589        let builder = reqwest::Client::builder()
590            .timeout(Duration::from_secs(self.timeout_secs))
591            .user_agent("ZeroClaw/1.0");
592        let builder =
593            zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
594        let client = builder.build()?;
595
596        let response = client
597            .get(&search_url)
598            .header("Accept", "application/json")
599            .send()
600            .await?;
601
602        if !response.status().is_success() {
603            anyhow::bail!("SearXNG search failed with status: {}", response.status());
604        }
605
606        let json: serde_json::Value = response.json().await?;
607        self.parse_searxng_results(&json, query)
608    }
609
610    fn parse_searxng_results(
611        &self,
612        json: &serde_json::Value,
613        query: &str,
614    ) -> anyhow::Result<String> {
615        let results = json
616            .get("results")
617            .and_then(|r| r.as_array())
618            .ok_or_else(|| {
619                ::zeroclaw_log::record!(
620                    ERROR,
621                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
622                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
623                        .with_attrs(::serde_json::json!({"search_provider": "searxng"})),
624                    "web_search: invalid SearXNG response"
625                );
626                anyhow::Error::msg("Invalid SearXNG API response")
627            })?;
628
629        if results.is_empty() {
630            return Ok(format!("No results found for: {}", query));
631        }
632
633        let mut lines = vec![format!("Search results for: {} (via SearXNG)", query)];
634
635        for (i, result) in results.iter().take(self.max_results).enumerate() {
636            let title = result
637                .get("title")
638                .and_then(|t| t.as_str())
639                .unwrap_or("No title");
640            let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
641            let content = result.get("content").and_then(|c| c.as_str()).unwrap_or("");
642
643            lines.push(format!("{}. {}", i + 1, title));
644            lines.push(format!("   {}", url));
645            if !content.is_empty() {
646                lines.push(format!("   {}", content));
647            }
648        }
649
650        Ok(lines.join("\n"))
651    }
652}
653
654fn decode_ddg_redirect_url(raw_url: &str) -> String {
655    if let Some(index) = raw_url.find("uddg=") {
656        let encoded = &raw_url[index + 5..];
657        let encoded = encoded.split('&').next().unwrap_or(encoded);
658        if let Ok(decoded) = urlencoding::decode(encoded) {
659            return decoded.into_owned();
660        }
661    }
662
663    raw_url.to_string()
664}
665
666const DUCKDUCKGO_BLOCK_MESSAGE: &str = "DuckDuckGo blocked the automated search request. Try configuring SearXNG, Brave, or Tavily as the web search provider.";
667
668fn duckduckgo_block_message(
669    status: reqwest::StatusCode,
670    final_url_is_block: bool,
671    html_contains_block: bool,
672) -> Option<&'static str> {
673    if status == reqwest::StatusCode::FORBIDDEN || final_url_is_block || html_contains_block {
674        Some(DUCKDUCKGO_BLOCK_MESSAGE)
675    } else {
676        None
677    }
678}
679
680fn contains_ascii_case_insensitive(haystack: &str, needle: &str) -> bool {
681    haystack
682        .as_bytes()
683        .windows(needle.len())
684        .any(|window| window.eq_ignore_ascii_case(needle.as_bytes()))
685}
686
687fn strip_tags(content: &str) -> String {
688    let re = Regex::new(r"<[^>]+>").unwrap();
689    re.replace_all(content, "").to_string()
690}
691
692#[async_trait]
693impl Tool for WebSearchTool {
694    fn name(&self) -> &str {
695        "web_search_tool"
696    }
697
698    fn description(&self) -> &str {
699        "Search the web for information. Returns relevant search results with titles, URLs, and descriptions. Use this to find current information, news, or research topics."
700    }
701
702    fn parameters_schema(&self) -> serde_json::Value {
703        json!({
704            "type": "object",
705            "properties": {
706                "query": {
707                    "type": "string",
708                    "description": "The search query. Be specific for better results."
709                }
710            },
711            "required": ["query"]
712        })
713    }
714
715    async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
716        let query = args.get("query").and_then(|q| q.as_str()).ok_or_else(|| {
717            ::zeroclaw_log::record!(
718                WARN,
719                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
720                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
721                    .with_attrs(::serde_json::json!({"param": "query"})),
722                "web_search: missing query parameter"
723            );
724            anyhow::Error::msg("Missing required parameter: query")
725        })?;
726
727        if query.trim().is_empty() {
728            anyhow::bail!("Search query cannot be empty");
729        }
730
731        ::zeroclaw_log::record!(
732            INFO,
733            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
734            &format!("Searching web for: {}", query)
735        );
736
737        let resolution = resolve_web_search_provider(&self.model_provider);
738        if resolution.used_fallback {
739            ::zeroclaw_log::record!(
740                WARN,
741                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
742                    .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
743                &format!(
744                    "Unknown web search model_provider '{}'; falling back to '{}'",
745                    self.model_provider, resolution.canonical_provider
746                )
747            );
748        }
749
750        let result = match resolution.route {
751            WebSearchProviderRoute::DuckDuckGo => self.search_duckduckgo(query).await?,
752            WebSearchProviderRoute::Brave => self.search_brave(query).await?,
753            WebSearchProviderRoute::Tavily => self.search_tavily(query).await?,
754            WebSearchProviderRoute::SearXNG => self.search_searxng(query).await?,
755        };
756
757        Ok(ToolResult {
758            success: true,
759            output: result,
760            error: None,
761        })
762    }
763}
764
765#[cfg(test)]
766mod tests {
767    use super::*;
768
769    #[test]
770    fn test_tool_name() {
771        let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
772        assert_eq!(tool.name(), "web_search_tool");
773    }
774
775    #[test]
776    fn test_tool_description() {
777        let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
778        assert!(tool.description().contains("Search the web"));
779    }
780
781    #[test]
782    fn test_parameters_schema() {
783        let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
784        let schema = tool.parameters_schema();
785        assert_eq!(schema["type"], "object");
786        assert!(schema["properties"]["query"].is_object());
787    }
788
789    #[test]
790    fn test_strip_tags() {
791        let html = "<b>Hello</b> <i>World</i>";
792        assert_eq!(strip_tags(html), "Hello World");
793    }
794
795    #[test]
796    fn test_parse_duckduckgo_results_empty() {
797        let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
798        let result = tool
799            .parse_duckduckgo_results("<html>No results here</html>", "test")
800            .unwrap();
801        assert!(result.contains("No results found"));
802    }
803
804    #[test]
805    fn test_parse_duckduckgo_results_with_data() {
806        let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
807        let html = r#"
808            <a class="result__a" href="https://example.com">Example Title</a>
809            <a class="result__snippet">This is a description</a>
810        "#;
811        let result = tool.parse_duckduckgo_results(html, "test").unwrap();
812        assert!(result.contains("Example Title"));
813        assert!(result.contains("https://example.com"));
814    }
815
816    #[test]
817    fn test_parse_duckduckgo_results_decodes_redirect_url() {
818        let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
819        let html = r#"
820            <a class="result__a" href="https://duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com%2Fpath%3Fa%3D1&amp;rut=test">Example Title</a>
821            <a class="result__snippet">This is a description</a>
822        "#;
823        let result = tool.parse_duckduckgo_results(html, "test").unwrap();
824        assert!(result.contains("https://example.com/path?a=1"));
825        assert!(!result.contains("rut=test"));
826    }
827
828    #[test]
829    fn test_duckduckgo_block_detection_reports_forbidden_status() {
830        let message = duckduckgo_block_message(reqwest::StatusCode::FORBIDDEN, false, false)
831            .expect("403 responses should be classified as a DuckDuckGo block");
832
833        assert!(message.contains("DuckDuckGo blocked"));
834        assert!(message.contains("SearXNG"));
835    }
836
837    #[test]
838    fn test_duckduckgo_block_detection_reports_verification_redirect() {
839        let message = duckduckgo_block_message(reqwest::StatusCode::OK, true, false)
840            .expect("verification redirects should be classified as a DuckDuckGo block");
841
842        assert!(message.contains("DuckDuckGo blocked"));
843        assert!(message.contains("SearXNG"));
844    }
845
846    #[test]
847    fn test_duckduckgo_block_detection_reports_verification_form_in_html() {
848        let message = duckduckgo_block_message(reqwest::StatusCode::OK, false, true)
849            .expect("verification form HTML should be classified as a DuckDuckGo block");
850
851        assert!(message.contains("DuckDuckGo blocked"));
852        assert!(message.contains("SearXNG"));
853    }
854
855    #[test]
856    fn test_duckduckgo_block_detection_ignores_normal_empty_results() {
857        let message = duckduckgo_block_message(reqwest::StatusCode::OK, false, false);
858
859        assert!(message.is_none());
860    }
861
862    #[test]
863    fn test_duckduckgo_block_detection_is_case_insensitive_without_allocating_html() {
864        assert!(contains_ascii_case_insensitive(
865            r#"<form action="/WR.DO?u=https%3A%2F%2Fhtml.duckduckgo.com%2Fhtml%2F"></form>"#,
866            "/wr.do?"
867        ));
868    }
869
870    #[tokio::test]
871    async fn test_duckduckgo_request_reports_forbidden_status() {
872        use wiremock::matchers::{method, path, query_param};
873        use wiremock::{Mock, MockServer, ResponseTemplate};
874
875        let server = MockServer::start().await;
876        Mock::given(method("GET"))
877            .and(path("/html/"))
878            .and(query_param("q", "test"))
879            .respond_with(ResponseTemplate::new(403))
880            .mount(&server)
881            .await;
882
883        let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
884        let err = tool
885            .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
886            .await
887            .expect_err("403 should be reported as a DuckDuckGo block");
888
889        assert!(err.to_string().contains("DuckDuckGo blocked"));
890        assert!(err.to_string().contains("SearXNG"));
891    }
892
893    #[tokio::test]
894    async fn test_duckduckgo_request_reports_verification_redirect_url() {
895        use wiremock::matchers::{method, path, query_param};
896        use wiremock::{Mock, MockServer, ResponseTemplate};
897
898        let server = MockServer::start().await;
899        Mock::given(method("GET"))
900            .and(path("/html/"))
901            .and(query_param("q", "test"))
902            .respond_with(
903                ResponseTemplate::new(302)
904                    .insert_header("location", format!("{}/wr.do?u=blocked", server.uri())),
905            )
906            .mount(&server)
907            .await;
908        Mock::given(method("GET"))
909            .and(path("/wr.do"))
910            .respond_with(ResponseTemplate::new(200).set_body_string("<html></html>"))
911            .mount(&server)
912            .await;
913
914        let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
915        let err = tool
916            .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
917            .await
918            .expect_err("verification redirects should be reported as a DuckDuckGo block");
919
920        assert!(err.to_string().contains("DuckDuckGo blocked"));
921        assert!(err.to_string().contains("SearXNG"));
922    }
923
924    #[tokio::test]
925    async fn test_duckduckgo_request_reports_verification_form_html() {
926        use wiremock::matchers::{method, path, query_param};
927        use wiremock::{Mock, MockServer, ResponseTemplate};
928
929        let server = MockServer::start().await;
930        Mock::given(method("GET"))
931            .and(path("/html/"))
932            .and(query_param("q", "test"))
933            .respond_with(ResponseTemplate::new(200).set_body_string(
934                r#"<form action="/wr.do?u=https%3A%2F%2Fhtml.duckduckgo.com%2Fhtml%2F"></form>"#,
935            ))
936            .mount(&server)
937            .await;
938
939        let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
940        let err = tool
941            .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
942            .await
943            .expect_err("verification HTML should be reported as a DuckDuckGo block");
944
945        assert!(err.to_string().contains("DuckDuckGo blocked"));
946        assert!(err.to_string().contains("SearXNG"));
947    }
948
949    #[tokio::test]
950    async fn test_duckduckgo_request_reports_anomaly_modal_block() {
951        // Regression for #6373: DuckDuckGo's anti-bot page now ships an
952        // `anomaly-modal` interstitial (HTTP 200/202, no `/wr.do?` redirect,
953        // no verification form), and the old detector slid past it,
954        // returning a misleading "No results found" message to the agent.
955        use wiremock::matchers::{method, path, query_param};
956        use wiremock::{Mock, MockServer, ResponseTemplate};
957
958        let server = MockServer::start().await;
959        Mock::given(method("GET"))
960            .and(path("/html/"))
961            .and(query_param("q", "test"))
962            .respond_with(ResponseTemplate::new(202).set_body_string(
963                r#"<html><body><div class="anomaly-modal__title">Unusual Traffic Detected</div></body></html>"#,
964            ))
965            .mount(&server)
966            .await;
967
968        let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
969        let err = tool
970            .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
971            .await
972            .expect_err("anomaly-modal page should be reported as a DuckDuckGo block");
973
974        assert!(err.to_string().contains("DuckDuckGo blocked"));
975        assert!(err.to_string().contains("SearXNG"));
976    }
977
978    #[tokio::test]
979    async fn test_duckduckgo_request_preserves_normal_empty_results() {
980        use wiremock::matchers::{method, path, query_param};
981        use wiremock::{Mock, MockServer, ResponseTemplate};
982
983        let server = MockServer::start().await;
984        Mock::given(method("GET"))
985            .and(path("/html/"))
986            .and(query_param("q", "test"))
987            .respond_with(
988                ResponseTemplate::new(200).set_body_string("<html>No results here</html>"),
989            )
990            .mount(&server)
991            .await;
992
993        let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
994        let result = tool
995            .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
996            .await
997            .expect("normal empty result HTML should still parse");
998
999        assert!(result.contains("No results found"));
1000    }
1001
1002    #[test]
1003    fn test_constructor_clamps_web_search_limits() {
1004        let tool = WebSearchTool::new("duckduckgo".to_string(), None, 0, 0);
1005        let html = r#"
1006            <a class="result__a" href="https://example.com">Example Title</a>
1007            <a class="result__snippet">This is a description</a>
1008        "#;
1009        let result = tool.parse_duckduckgo_results(html, "test").unwrap();
1010        assert!(result.contains("Example Title"));
1011    }
1012
1013    #[tokio::test]
1014    async fn test_execute_missing_query() {
1015        let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
1016        let result = tool.execute(json!({})).await;
1017        assert!(result.is_err());
1018    }
1019
1020    #[tokio::test]
1021    async fn test_execute_empty_query() {
1022        let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
1023        let result = tool.execute(json!({"query": ""})).await;
1024        assert!(result.is_err());
1025    }
1026
1027    #[tokio::test]
1028    async fn test_execute_brave_without_api_key() {
1029        let tool = WebSearchTool::new("brave".to_string(), None, 5, 15);
1030        let result = tool.execute(json!({"query": "test"})).await;
1031        assert!(result.is_err());
1032        assert!(result.unwrap_err().to_string().contains("API key"));
1033    }
1034
1035    #[test]
1036    fn test_resolve_brave_api_key_uses_boot_key() {
1037        let tool = WebSearchTool::new(
1038            "brave".to_string(),
1039            Some("sk-plaintext-key".to_string()),
1040            5,
1041            15,
1042        );
1043        let key = tool.resolve_brave_api_key().unwrap();
1044        assert_eq!(key, "sk-plaintext-key");
1045    }
1046
1047    #[test]
1048    fn test_resolve_brave_api_key_reloads_from_config() {
1049        let tmp = tempfile::TempDir::new().unwrap();
1050        let config_path = tmp.path().join("config.toml");
1051        std::fs::write(
1052            &config_path,
1053            "[web_search]\nbrave_api_key = \"fresh-key-from-disk\"\n",
1054        )
1055        .unwrap();
1056
1057        // No boot key -- forces reload from config
1058        let tool = WebSearchTool::new_with_config(
1059            "brave".to_string(),
1060            None,
1061            None,
1062            None,
1063            5,
1064            15,
1065            config_path,
1066            false,
1067        );
1068        let key = tool.resolve_brave_api_key().unwrap();
1069        assert_eq!(key, "fresh-key-from-disk");
1070    }
1071
1072    #[test]
1073    fn test_resolve_brave_api_key_decrypts_encrypted_key() {
1074        let tmp = tempfile::TempDir::new().unwrap();
1075        let store = zeroclaw_config::secrets::SecretStore::new(tmp.path(), true);
1076        let encrypted = store.encrypt("brave-secret-key").unwrap();
1077
1078        let config_path = tmp.path().join("config.toml");
1079        std::fs::write(
1080            &config_path,
1081            format!("[web_search]\nbrave_api_key = \"{}\"\n", encrypted),
1082        )
1083        .unwrap();
1084
1085        // Boot key is the encrypted blob -- should trigger reload + decrypt
1086        let tool = WebSearchTool::new_with_config(
1087            "brave".to_string(),
1088            Some(encrypted),
1089            None,
1090            None,
1091            5,
1092            15,
1093            config_path,
1094            true,
1095        );
1096        let key = tool.resolve_brave_api_key().unwrap();
1097        assert_eq!(key, "brave-secret-key");
1098    }
1099
1100    #[tokio::test]
1101    async fn test_execute_searxng_without_instance_url() {
1102        let tmp = tempfile::TempDir::new().unwrap();
1103        let config_path = tmp.path().join("config.toml");
1104        std::fs::write(&config_path, "[web_search]\n").unwrap();
1105
1106        let tool = WebSearchTool::new_with_config(
1107            "searxng".to_string(),
1108            None,
1109            None,
1110            None,
1111            5,
1112            15,
1113            config_path,
1114            false,
1115        );
1116        let result = tool.execute(json!({"query": "test"})).await;
1117        assert!(result.is_err());
1118        assert!(
1119            result
1120                .unwrap_err()
1121                .to_string()
1122                .contains("SearXNG instance URL not configured")
1123        );
1124    }
1125
1126    #[test]
1127    fn test_parse_tavily_results_empty() {
1128        let tool = WebSearchTool::new("tavily".to_string(), None, 5, 15);
1129        let json = serde_json::json!({"results": []});
1130        let result = tool.parse_tavily_results(&json, "test").unwrap();
1131        assert!(result.contains("No results found"));
1132    }
1133
1134    #[test]
1135    fn test_parse_tavily_results_with_data() {
1136        let tool = WebSearchTool::new("tavily".to_string(), None, 5, 15);
1137        let json = serde_json::json!({
1138            "query": "test",
1139            "results": [
1140                {
1141                    "title": "Tavily Example",
1142                    "url": "https://example.com",
1143                    "content": "Pre-cleaned summary content from Tavily",
1144                    "score": 0.91
1145                },
1146                {
1147                    "title": "Another Result",
1148                    "url": "https://example.org",
1149                    "content": "Second result body"
1150                }
1151            ]
1152        });
1153        let result = tool.parse_tavily_results(&json, "test").unwrap();
1154        assert!(result.contains("Tavily Example"));
1155        assert!(result.contains("https://example.com"));
1156        assert!(result.contains("Pre-cleaned summary content from Tavily"));
1157        assert!(result.contains("via Tavily"));
1158    }
1159
1160    #[test]
1161    fn test_parse_tavily_results_invalid_response() {
1162        let tool = WebSearchTool::new("tavily".to_string(), None, 5, 15);
1163        let json = serde_json::json!({"error": "bad api key"});
1164        let result = tool.parse_tavily_results(&json, "test");
1165        assert!(result.is_err());
1166        assert!(
1167            result
1168                .unwrap_err()
1169                .to_string()
1170                .contains("Invalid Tavily API response")
1171        );
1172    }
1173
1174    #[tokio::test]
1175    async fn test_execute_tavily_without_api_key() {
1176        // No boot key + no config field → resolve_tavily_api_key must error
1177        // before any network call is attempted.
1178        let tmp = tempfile::tempdir().unwrap();
1179        let config_path = tmp.path().join("config.toml");
1180        std::fs::write(&config_path, "[web_search]\n").unwrap();
1181
1182        let tool = WebSearchTool::new_with_config(
1183            "tavily".to_string(),
1184            None,
1185            None,
1186            None,
1187            5,
1188            15,
1189            config_path,
1190            false,
1191        );
1192        let result = tool.execute(json!({"query": "test"})).await;
1193        assert!(result.is_err());
1194        assert!(
1195            result
1196                .unwrap_err()
1197                .to_string()
1198                .contains("Tavily API key not configured")
1199        );
1200    }
1201
1202    #[test]
1203    fn test_resolve_tavily_api_key_uses_boot_key() {
1204        let tool = WebSearchTool::new_with_config(
1205            "tavily".to_string(),
1206            None,
1207            Some("tvly-boot-key".to_string()),
1208            None,
1209            5,
1210            15,
1211            PathBuf::new(),
1212            false,
1213        );
1214        let key = tool.resolve_tavily_api_key().unwrap();
1215        assert_eq!(key, "tvly-boot-key");
1216    }
1217
1218    #[test]
1219    fn test_resolve_tavily_api_key_reloads_from_config() {
1220        let tmp = tempfile::tempdir().unwrap();
1221        let config_path = tmp.path().join("config.toml");
1222        std::fs::write(
1223            &config_path,
1224            "[web_search]\ntavily_api_key = \"tvly-fresh-from-disk\"\n",
1225        )
1226        .unwrap();
1227
1228        // No boot key — forces reload from config
1229        let tool = WebSearchTool::new_with_config(
1230            "tavily".to_string(),
1231            None,
1232            None,
1233            None,
1234            5,
1235            15,
1236            config_path,
1237            false,
1238        );
1239        let key = tool.resolve_tavily_api_key().unwrap();
1240        assert_eq!(key, "tvly-fresh-from-disk");
1241    }
1242
1243    #[test]
1244    fn test_resolve_tavily_api_key_decrypts_encrypted_key() {
1245        let tmp = tempfile::TempDir::new().unwrap();
1246        let store = zeroclaw_config::secrets::SecretStore::new(tmp.path(), true);
1247        let encrypted = store.encrypt("tvly-secret-key").unwrap();
1248
1249        let config_path = tmp.path().join("config.toml");
1250        std::fs::write(
1251            &config_path,
1252            format!("[web_search]\ntavily_api_key = \"{}\"\n", encrypted),
1253        )
1254        .unwrap();
1255
1256        // Boot key is the encrypted blob -- should trigger reload + decrypt
1257        let tool = WebSearchTool::new_with_config(
1258            "tavily".to_string(),
1259            None,
1260            Some(encrypted),
1261            None,
1262            5,
1263            15,
1264            config_path,
1265            true,
1266        );
1267        let key = tool.resolve_tavily_api_key().unwrap();
1268        assert_eq!(key, "tvly-secret-key");
1269    }
1270
1271    /// Regression: Tavily auth must travel as `Authorization: Bearer <key>`
1272    /// (the documented contract per
1273    /// https://docs.tavily.com/documentation/api-reference/endpoint/search),
1274    /// NOT as an `api_key` field in the JSON body. The previous shape worked
1275    /// against the live service for legacy reasons, but the docs identify
1276    /// bearer-header as the canonical method.
1277    #[tokio::test]
1278    async fn test_tavily_request_uses_bearer_auth_header_not_body_field() {
1279        use wiremock::matchers::{header, method, path};
1280        use wiremock::{Mock, MockServer, ResponseTemplate};
1281
1282        let server = MockServer::start().await;
1283
1284        Mock::given(method("POST"))
1285            .and(path("/search"))
1286            .and(header("authorization", "Bearer tvly-test-key"))
1287            .and(header("content-type", "application/json"))
1288            .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
1289                "query": "what is rust",
1290                "results": []
1291            })))
1292            .mount(&server)
1293            .await;
1294
1295        let tool = WebSearchTool::new_with_config(
1296            "tavily".to_string(),
1297            None,
1298            Some("tvly-test-key".to_string()),
1299            None,
1300            5,
1301            15,
1302            PathBuf::new(),
1303            false,
1304        );
1305
1306        // Isolated client so the request shape under test isn't affected
1307        // by `RUNTIME_PROXY_CONFIG` mutations from sibling proxy_config
1308        // tests running concurrently in the same process.
1309        let client = reqwest::Client::builder()
1310            .timeout(Duration::from_secs(15))
1311            .build()
1312            .expect("client builder should succeed without a proxy");
1313        let result = tool
1314            .search_tavily_with_client(&client, &format!("{}/search", server.uri()), "what is rust")
1315            .await
1316            .expect("request should succeed against the mock");
1317        assert!(
1318            result.contains("No results found"),
1319            "parser should report empty results: {result}"
1320        );
1321
1322        let recorded = server
1323            .received_requests()
1324            .await
1325            .expect("wiremock should have captured the request");
1326        assert_eq!(recorded.len(), 1, "expected exactly one POST /search");
1327
1328        let body: serde_json::Value =
1329            serde_json::from_slice(&recorded[0].body).expect("body should be JSON");
1330
1331        // Auth must NOT leak into the body — bearer header is the only auth channel.
1332        assert!(
1333            body.get("api_key").is_none(),
1334            "api_key must not appear in the request body; got: {body}"
1335        );
1336
1337        // The documented body fields must still be present so the search
1338        // contract continues to match the upstream API spec.
1339        assert_eq!(body["query"], "what is rust");
1340        assert_eq!(body["search_depth"], "basic");
1341        assert_eq!(body["max_results"], 5);
1342        assert_eq!(body["include_answer"], false);
1343        assert_eq!(body["include_raw_content"], false);
1344    }
1345
1346    #[test]
1347    fn test_parse_searxng_results_empty() {
1348        let tool = WebSearchTool::new("searxng".to_string(), None, 5, 15);
1349        let json = serde_json::json!({"results": []});
1350        let result = tool.parse_searxng_results(&json, "test").unwrap();
1351        assert!(result.contains("No results found"));
1352    }
1353
1354    #[test]
1355    fn test_parse_searxng_results_with_data() {
1356        let tool = WebSearchTool::new("searxng".to_string(), None, 5, 15);
1357        let json = serde_json::json!({
1358            "results": [
1359                {
1360                    "title": "SearXNG Example",
1361                    "url": "https://example.com",
1362                    "content": "A privacy-respecting metasearch engine"
1363                },
1364                {
1365                    "title": "Another Result",
1366                    "url": "https://example.org",
1367                    "content": "More information here"
1368                }
1369            ]
1370        });
1371        let result = tool.parse_searxng_results(&json, "test").unwrap();
1372        assert!(result.contains("SearXNG Example"));
1373        assert!(result.contains("https://example.com"));
1374        assert!(result.contains("A privacy-respecting metasearch engine"));
1375        assert!(result.contains("via SearXNG"));
1376    }
1377
1378    #[test]
1379    fn test_parse_searxng_results_invalid_response() {
1380        let tool = WebSearchTool::new("searxng".to_string(), None, 5, 15);
1381        let json = serde_json::json!({"error": "bad request"});
1382        let result = tool.parse_searxng_results(&json, "test");
1383        assert!(result.is_err());
1384        assert!(
1385            result
1386                .unwrap_err()
1387                .to_string()
1388                .contains("Invalid SearXNG API response")
1389        );
1390    }
1391
1392    #[test]
1393    fn test_resolve_searxng_instance_url_from_boot() {
1394        let tool = WebSearchTool {
1395            model_provider: "searxng".into(),
1396            boot_brave_api_key: None,
1397            boot_tavily_api_key: None,
1398            searxng_instance_url: Some("https://searx.example.com".to_string()),
1399            max_results: 5,
1400            timeout_secs: 15,
1401            config_path: PathBuf::new(),
1402            secrets_encrypt: false,
1403        };
1404        let url = tool.resolve_searxng_instance_url().unwrap();
1405        assert_eq!(url, "https://searx.example.com");
1406    }
1407
1408    #[test]
1409    fn test_resolve_searxng_instance_url_reloads_from_config() {
1410        let tmp = tempfile::TempDir::new().unwrap();
1411        let config_path = tmp.path().join("config.toml");
1412        std::fs::write(
1413            &config_path,
1414            "[web_search]\nsearxng_instance_url = \"https://search.local\"\n",
1415        )
1416        .unwrap();
1417
1418        let tool = WebSearchTool::new_with_config(
1419            "searxng".to_string(),
1420            None,
1421            None,
1422            None,
1423            5,
1424            15,
1425            config_path,
1426            false,
1427        );
1428        let url = tool.resolve_searxng_instance_url().unwrap();
1429        assert_eq!(url, "https://search.local");
1430    }
1431
1432    #[test]
1433    fn test_resolve_brave_api_key_picks_up_runtime_update() {
1434        let tmp = tempfile::TempDir::new().unwrap();
1435        let config_path = tmp.path().join("config.toml");
1436
1437        // Start with no key in config
1438        std::fs::write(&config_path, "[web_search]\n").unwrap();
1439
1440        let tool = WebSearchTool::new_with_config(
1441            "brave".to_string(),
1442            None,
1443            None,
1444            None,
1445            5,
1446            15,
1447            config_path.clone(),
1448            false,
1449        );
1450
1451        // Key not configured yet -- should fail
1452        assert!(tool.resolve_brave_api_key().is_err());
1453
1454        // Simulate runtime config update (e.g. via web_search_config set)
1455        std::fs::write(
1456            &config_path,
1457            "[web_search]\nbrave_api_key = \"runtime-updated-key\"\n",
1458        )
1459        .unwrap();
1460
1461        // Now should succeed with the updated key
1462        let key = tool.resolve_brave_api_key().unwrap();
1463        assert_eq!(key, "runtime-updated-key");
1464    }
1465}