Skip to main content

zeroclaw_config/
env_overrides.rs

1//! V0.8.0 env-var override mechanism.
2//!
3//! Grammar: `ZEROCLAW_<dotted_path_with_double_underscores>=<value>`.
4//! Each `__` (double underscore) is a path separator (`.` in the TOML); each
5//! single `_` is either a snake-case joiner inside a field name (which the
6//! walker converts to kebab `-` for `set_prop`) or a literal char inside an
7//! alias key.
8//!
9//! Schema-derived: [`map_key_sections`] gives HashMap positions (one alias
10//! token consumed; alias chars are `[a-z0-9_]`); [`prop_fields`] gives every
11//! other leaf path. No string-literal pattern matching, no hardcoded family
12//! names.
13//!
14//! Bootstrap exception: `ZEROCLAW_WORKSPACE` and `ZEROCLAW_CONFIG_DIR` keep
15//! their UPPERCASE form. The case rule (lowercase tail = config-tree,
16//! uppercase tail = bootstrap) does the disambiguation work without an
17//! exemption list.
18//!
19//! Persistence boundary: each overridden path's pre-override raw value is
20//! snapshotted (post-`decrypt_secrets`, so secrets are plaintext) and used
21//! by [`mask_env_overrides_for_save`] to restore disk-or-default values
22//! before `encrypt_secrets()` runs. Env-injected values never reach disk.
23//!
24//! [`map_key_sections`]: crate::schema::Config::map_key_sections
25//! [`prop_fields`]: crate::schema::Config::prop_fields
26
27use crate::schema::Config;
28use anyhow::{Context, Result};
29use std::collections::{HashMap, HashSet};
30use std::sync::LazyLock;
31
32const PREFIX: &str = "ZEROCLAW_";
33const SEP: &str = "__";
34
35/// Paths that the schema exposes via `prop_fields()` but that operators must
36/// not override at runtime. Currently just `schema_version` (snake form, as
37/// emitted by `prop_fields()`) — the migration engine sets it from the
38/// on-disk file's value, and an env override would either skip needed
39/// migrations or trigger a no-op rerun. O(1) HashSet lookup so adding more
40/// reserved paths stays cheap.
41static NON_OVERRIDABLE_PATHS: LazyLock<HashSet<&'static str>> =
42    LazyLock::new(|| HashSet::from(["schema_version"]));
43
44/// Outcome of [`apply_env_overrides`]: the set of overridden paths plus the
45/// per-path snapshot of pre-override raw values. The snapshot drives
46/// [`mask_env_overrides_for_save`] so secret fields recover their original
47/// plaintext (which `encrypt_secrets()` then re-encrypts), and non-secret
48/// fields recover their disk-or-default value.
49#[derive(Debug, Default, Clone)]
50pub struct AppliedOverrides {
51    pub paths: HashSet<String>,
52    pub snapshots: HashMap<String, String>,
53}
54
55/// Apply every `ZEROCLAW_<lowercase>` env var to `config`. Returns the set of
56/// dotted prop-paths that were overridden plus the pre-override raw values
57/// for each. Hard-errors on any env var that doesn't resolve to a known
58/// schema path or whose alias fails validation.
59pub fn apply_env_overrides(config: &mut Config) -> Result<AppliedOverrides> {
60    let mut entries: Vec<(String, String, String)> = std::env::vars()
61        .filter_map(|(k, v)| {
62            let tail = k.strip_prefix(PREFIX)?;
63            (!tail.is_empty()
64                && tail
65                    .chars()
66                    .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_'))
67            .then(|| (k.clone(), v, tail.to_string()))
68        })
69        .collect();
70    entries.sort_by(|a, b| a.0.cmp(&b.0));
71
72    let mut paths: HashSet<String> = HashSet::with_capacity(entries.len());
73    let mut snapshots: HashMap<String, String> = HashMap::with_capacity(entries.len());
74    for (env_name, value, tail) in entries {
75        let path = resolve_path(&tail, config)
76            .with_context(|| format!("{env_name} did not resolve to a schema path"))?;
77        if NON_OVERRIDABLE_PATHS.contains(path.as_str()) {
78            ::zeroclaw_log::record!(
79                WARN,
80                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
81                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
82                    .with_attrs(::serde_json::json!({"env_var": env_name, "path": path})),
83                "env override rejected: field is not overridable"
84            );
85            anyhow::bail!("{env_name} -> {path}: this field is not overridable via env vars");
86        }
87        // Snapshot the pre-override raw value via TOML serde walk. Bypasses
88        // `Config::get_prop`'s unconditional secret mask: secret fields on
89        // `config` carry plaintext (post-`decrypt_secrets`), so the snapshot
90        // captures the real value that should be restored at save time.
91        let snapshot = raw_value_for_path(config, &path).unwrap_or_default();
92        snapshots.insert(path.clone(), snapshot);
93
94        config
95            .set_prop(&path, &value)
96            .with_context(|| format!("{env_name} → {path}"))?;
97        if Config::prop_is_secret(&path) {
98            ::zeroclaw_log::record!(
99                WARN,
100                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
101                    .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
102                    .with_attrs(::serde_json::json!({"path": path, "env_var": env_name})),
103                "Secret applied from env override"
104            );
105        } else {
106            ::zeroclaw_log::record!(
107                DEBUG,
108                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
109                    .with_attrs(::serde_json::json!({"path": path, "env_var": env_name})),
110                "Env override applied"
111            );
112        }
113        paths.insert(path);
114    }
115    if !paths.is_empty() {
116        ::zeroclaw_log::record!(
117            INFO,
118            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
119                .with_attrs(::serde_json::json!({"count": paths.len()})),
120            "Applied env-var config overrides"
121        );
122    }
123    Ok(AppliedOverrides { paths, snapshots })
124}
125
126/// Walk an env-var tail against the schema. Map-keyed positions consume one
127/// `__`-delimited alias token (which may contain single `_` per the alias
128/// validator); everything else resolves via `prop_fields()` lookup.
129fn resolve_path(tail: &str, config: &mut Config) -> Result<String> {
130    let mut sections = Config::map_key_sections();
131    sections.sort_by_key(|s| std::cmp::Reverse(s.path.len()));
132    for section in sections {
133        let env_pfx: String = section.path.replace('.', SEP);
134        let with_sep = format!("{env_pfx}{SEP}");
135        let Some(rest) = tail.strip_prefix(&with_sep) else {
136            continue;
137        };
138        let mut parts = rest.splitn(2, SEP);
139        let alias = parts.next().filter(|s| !s.is_empty()).ok_or_else(|| {
140            ::zeroclaw_log::record!(
141                WARN,
142                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
143                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
144                    .with_attrs(::serde_json::json!({"section": section.path, "tail": tail})),
145                "env override path missing alias segment"
146            );
147            anyhow::Error::msg(format!("missing alias after `{}`", section.path))
148        })?;
149        let inner = parts.next().unwrap_or("");
150        // Propagate the alias-validator's specific error so operators see
151        // *why* their alias was rejected (leading underscore, uppercase, …)
152        // instead of the generic "Unknown property" that would surface from
153        // a downstream `set_prop` against a non-existent map key.
154        config.create_map_key(section.path, alias).map_err(|e| {
155            ::zeroclaw_log::record!(
156                WARN,
157                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
158                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
159                    .with_attrs(::serde_json::json!({
160                        "section": section.path,
161                        "alias": alias,
162                        "error": format!("{}", e),
163                    })),
164                "env override alias rejected by validator"
165            );
166            anyhow::Error::msg(format!(
167                "invalid alias `{alias}` for `{}`: {e}",
168                section.path
169            ))
170        })?;
171        let path = if inner.is_empty() {
172            format!("{}.{}", section.path, alias)
173        } else {
174            // Inner segments are `__`-separated snake-case field names — the
175            // same casing the prop-path uses, so join them verbatim.
176            let inner_path = inner.split(SEP).collect::<Vec<_>>().join(".");
177            format!("{}.{}.{}", section.path, alias, inner_path)
178        };
179        return Ok(path);
180    }
181
182    // Non-map path: prop_fields() entries are dotted snake-case field
183    // names. Convert to env-form (`.` → `__`) and compare.
184    config
185        .prop_fields()
186        .into_iter()
187        .find(|f| f.name.replace('.', SEP) == tail)
188        .map(|f| f.name)
189        .ok_or_else(|| {
190            ::zeroclaw_log::record!(
191                WARN,
192                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
193                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
194                    .with_attrs(::serde_json::json!({"tail": tail})),
195                "env override path does not match any schema field"
196            );
197            anyhow::Error::msg(format!("no schema field has env-form `{tail}`"))
198        })
199}
200
201/// Read the raw string value at a dotted (kebab-cased) prop path from a
202/// serializable Config struct, bypassing the `is_secret` masking that
203/// `Config::get_prop` applies. Returns `None` when the path doesn't resolve
204/// (e.g. the alias entry hasn't been created yet on disk).
205///
206/// Walks the TOML serialization. Each segment is resolved value-aware:
207/// tried verbatim first so hyphenated map keys (aliases, model names like
208/// `claude-opus-4-8`) survive, then snake-cased only as a fallback for a
209/// kebab field segment. Used by [`apply_env_overrides`] so the pre-override
210/// snapshot of a secret field captures the real plaintext rather than the
211/// display mask.
212fn raw_value_for_path(source: &Config, path: &str) -> Option<String> {
213    let table = toml::Value::try_from(source).ok()?;
214    let mut current: &toml::Value = &table;
215    for segment in path.split('.') {
216        let tbl = current.as_table()?;
217        current = match tbl.get(segment) {
218            Some(v) => v,
219            None => tbl.get(&segment.replace('-', "_"))?,
220        };
221    }
222    Some(match current {
223        toml::Value::String(s) => s.clone(),
224        other => other.to_string(),
225    })
226}
227
228/// Restore env-overridden paths in a save-bound clone to their pre-override
229/// snapshots, so env-injected values never reach `encrypt_secrets()` or the
230/// on-disk TOML.
231///
232/// Snapshots come from [`apply_env_overrides`] which captures the
233/// post-`decrypt_secrets` plaintext for secret fields. After this restore,
234/// `encrypt_secrets()` re-encrypts the recovered plaintext to fresh
235/// ciphertext that decrypts to the same value — preserving the operator's
236/// real on-disk credential across env-override + save cycles.
237pub fn mask_env_overrides_for_save(
238    config_to_save: &mut Config,
239    snapshots: &HashMap<String, String>,
240) -> Result<()> {
241    for (path, value) in snapshots {
242        if let Err(err) = config_to_save.set_prop(path, value) {
243            ::zeroclaw_log::record!(
244                WARN,
245                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
246                    .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
247                    .with_attrs(::serde_json::json!({"path": path, "error": format!("{}", err)})),
248                "Save-mask reset failed; field retains default"
249            );
250        }
251    }
252    Ok(())
253}
254
255/// Process-wide lock for env-mutating tests. Both `env_overrides::tests`
256/// and `schema::tests` race on `ZEROCLAW_*` env vars and must serialize on
257/// the same mutex; defining it once here and re-exporting `pub(crate)`
258/// keeps a single coordinator. `#[cfg(test)]` so it never lands in
259/// production builds.
260#[cfg(test)]
261pub(crate) async fn env_test_lock() -> tokio::sync::MutexGuard<'static, ()> {
262    static LOCK: tokio::sync::Mutex<()> = tokio::sync::Mutex::const_new(());
263    LOCK.lock().await
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269    use crate::schema::Config;
270
271    /// RAII-ish helper: removes the named ZEROCLAW_* var on drop so failed
272    /// asserts don't leak state into sibling tests.
273    struct EnvVarGuard(&'static str);
274    impl EnvVarGuard {
275        fn set(name: &'static str, value: &str) -> Self {
276            // SAFETY: tests serialize on `env_test_lock()`.
277            unsafe { std::env::set_var(name, value) };
278            Self(name)
279        }
280    }
281    impl Drop for EnvVarGuard {
282        fn drop(&mut self) {
283            // SAFETY: tests serialize on `env_test_lock()`.
284            unsafe { std::env::remove_var(self.0) };
285        }
286    }
287
288    #[tokio::test]
289    async fn walker_resolves_typed_family_alias_default() {
290        let _guard = super::env_test_lock().await;
291        let _v = EnvVarGuard::set(
292            "ZEROCLAW_providers__models__anthropic__default__api_key",
293            "sk-ant-fixture",
294        );
295
296        let mut config = Config::default();
297        let applied = apply_env_overrides(&mut config).expect("apply succeeds");
298
299        assert!(
300            applied
301                .paths
302                .contains("providers.models.anthropic.default.api_key"),
303            "kebab-translated path should be recorded: {:?}",
304            applied.paths,
305        );
306        // Secret field round-trips through set_prop into the typed alias.
307        assert_eq!(
308            config
309                .providers
310                .models
311                .anthropic
312                .get("default")
313                .and_then(|c| c.base.api_key.as_deref()),
314            Some("sk-ant-fixture"),
315        );
316    }
317
318    #[tokio::test]
319    async fn walker_accepts_alias_with_underscore() {
320        let _guard = super::env_test_lock().await;
321        let _v1 = EnvVarGuard::set(
322            "ZEROCLAW_providers__models__openrouter__prod_v2__api_key",
323            "sk-or-fixture",
324        );
325        let _v2 = EnvVarGuard::set(
326            "ZEROCLAW_providers__models__openrouter__prod_v2__model",
327            "anthropic/claude-sonnet-4-6",
328        );
329
330        let mut config = Config::default();
331        let applied = apply_env_overrides(&mut config).expect("apply succeeds");
332
333        assert!(
334            applied
335                .paths
336                .contains("providers.models.openrouter.prod_v2.api_key"),
337        );
338        assert!(
339            applied
340                .paths
341                .contains("providers.models.openrouter.prod_v2.model"),
342        );
343        let entry = config
344            .providers
345            .models
346            .openrouter
347            .get("prod_v2")
348            .expect("alias created");
349        assert_eq!(entry.base.api_key.as_deref(), Some("sk-or-fixture"));
350        assert_eq!(
351            entry.base.model.as_deref(),
352            Some("anthropic/claude-sonnet-4-6"),
353        );
354    }
355
356    #[tokio::test]
357    async fn walker_resolves_non_map_gateway_path() {
358        let _guard = super::env_test_lock().await;
359        let _v = EnvVarGuard::set("ZEROCLAW_gateway__request_timeout_secs", "120");
360
361        let mut config = Config::default();
362        let applied = apply_env_overrides(&mut config).expect("apply succeeds");
363
364        assert!(applied.paths.contains("gateway.request_timeout_secs"));
365        assert_eq!(config.gateway.request_timeout_secs, 120);
366    }
367
368    #[tokio::test]
369    async fn walker_rejects_unknown_path() {
370        let _guard = super::env_test_lock().await;
371        let _v = EnvVarGuard::set("ZEROCLAW_no__such__field", "x");
372
373        let mut config = Config::default();
374        let err = apply_env_overrides(&mut config).expect_err("must hard-error");
375        let msg = format!("{err:#}");
376        assert!(
377            msg.contains("ZEROCLAW_no__such__field") && msg.contains("did not resolve"),
378            "error must name the env var and the failure: {msg}",
379        );
380    }
381
382    #[tokio::test]
383    async fn walker_propagates_alias_validator_error() {
384        let _guard = super::env_test_lock().await;
385        // `_invalid` starts with `_`, which the alias validator rejects.
386        // The walker's tail filter accepts `[a-z0-9_]+` so this gets past
387        // the prefilter, and the failure must surface as the validator's
388        // specific message — not a generic "Unknown property".
389        let _v = EnvVarGuard::set(
390            "ZEROCLAW_providers__models__anthropic___invalid__api_key",
391            "x",
392        );
393
394        let mut config = Config::default();
395        let err = apply_env_overrides(&mut config).expect_err("must hard-error");
396        let msg = format!("{err:#}");
397        assert!(
398            msg.contains("invalid alias") && msg.contains("_invalid"),
399            "error must surface the alias validator's message: {msg}",
400        );
401    }
402
403    #[tokio::test]
404    async fn mask_restores_pre_override_snapshot_for_non_secret() {
405        let _guard = super::env_test_lock().await;
406        let _v = EnvVarGuard::set("ZEROCLAW_gateway__request_timeout_secs", "999");
407
408        let mut config = Config::default();
409        let original_timeout = config.gateway.request_timeout_secs;
410        let applied = apply_env_overrides(&mut config).expect("apply succeeds");
411        assert_eq!(config.gateway.request_timeout_secs, 999);
412
413        let mut to_save = config.clone();
414        mask_env_overrides_for_save(&mut to_save, &applied.snapshots).expect("mask succeeds");
415        assert_eq!(
416            to_save.gateway.request_timeout_secs, original_timeout,
417            "non-secret path resets to pre-override snapshot",
418        );
419        // In-memory config is unchanged — env value still effective for the
420        // running process.
421        assert_eq!(config.gateway.request_timeout_secs, 999);
422    }
423
424    #[tokio::test]
425    async fn mask_restores_pre_override_plaintext_for_secret() {
426        let _guard = super::env_test_lock().await;
427        let _v = EnvVarGuard::set(
428            "ZEROCLAW_providers__models__anthropic__default__api_key",
429            "sk-ant-from-env",
430        );
431
432        // Pre-existing alias with a real plaintext credential (the state
433        // after `Config::load_or_init` calls `decrypt_secrets`).
434        let mut config = Config::default();
435        config
436            .providers
437            .models
438            .ensure("anthropic", "default")
439            .expect("typed slot")
440            .api_key = Some("sk-ant-on-disk".to_string());
441
442        let applied = apply_env_overrides(&mut config).expect("apply succeeds");
443        assert!(
444            applied
445                .paths
446                .contains("providers.models.anthropic.default.api_key"),
447        );
448        // Env value is live in memory.
449        assert_eq!(
450            config
451                .providers
452                .models
453                .anthropic
454                .get("default")
455                .and_then(|c| c.base.api_key.as_deref()),
456            Some("sk-ant-from-env"),
457        );
458
459        // Save-bound clone restores the pre-override plaintext, NOT the
460        // display mask. This is the regression bar for the data-loss bug
461        // identified in PR #6523 review.
462        let mut to_save = config.clone();
463        mask_env_overrides_for_save(&mut to_save, &applied.snapshots).expect("mask succeeds");
464        assert_eq!(
465            to_save
466                .providers
467                .models
468                .anthropic
469                .get("default")
470                .and_then(|c| c.base.api_key.as_deref()),
471            Some("sk-ant-on-disk"),
472            "secret resets to pre-override plaintext (not the `**** (encrypted)` mask)",
473        );
474        assert_ne!(
475            to_save
476                .providers
477                .models
478                .anthropic
479                .get("default")
480                .and_then(|c| c.base.api_key.as_deref()),
481            Some("**** (encrypted)"),
482            "must not corrupt the field with the display mask",
483        );
484    }
485
486    #[tokio::test]
487    async fn schema_version_override_rejected() {
488        let _guard = super::env_test_lock().await;
489        let _v = EnvVarGuard::set("ZEROCLAW_schema_version", "99");
490
491        let mut config = Config::default();
492        let err = apply_env_overrides(&mut config).expect_err("must hard-error");
493        let msg = format!("{err:#}");
494        assert!(
495            msg.contains("schema_version") && msg.contains("not overridable"),
496            "error must name the path and the reason: {msg}",
497        );
498    }
499}