Skip to main content

zeroclaw_config/
env_overrides.rs

1//! V0.8.0 env-var override mechanism.
2//!
3//! Grammar: `ZEROCLAW_<dotted_path_with_double_underscores>=<value>`.
4//! Each `__` (double underscore) is a path separator (`.` in the TOML); each
5//! single `_` is either a snake-case joiner inside a field name (which the
6//! walker converts to kebab `-` for `set_prop`) or a literal char inside an
7//! alias key.
8//!
9//! Schema-derived: [`map_key_sections`] gives HashMap positions (one alias
10//! token consumed; alias chars are `[a-z0-9_]`); [`prop_fields`] gives every
11//! other leaf path. No string-literal pattern matching, no hardcoded family
12//! names.
13//!
14//! Bootstrap exception: `ZEROCLAW_WORKSPACE` and `ZEROCLAW_CONFIG_DIR` keep
15//! their UPPERCASE form. The case rule (lowercase tail = config-tree,
16//! uppercase tail = bootstrap) does the disambiguation work without an
17//! exemption list.
18//!
19//! Persistence boundary: each overridden path's pre-override raw value is
20//! snapshotted (post-`decrypt_secrets`, so secrets are plaintext) and used
21//! by [`mask_env_overrides_for_save`] to restore disk-or-default values
22//! before `encrypt_secrets()` runs. Env-injected values never reach disk.
23//!
24//! [`map_key_sections`]: crate::schema::Config::map_key_sections
25//! [`prop_fields`]: crate::schema::Config::prop_fields
26
27use crate::schema::Config;
28use anyhow::{Context, Result};
29use std::collections::{HashMap, HashSet};
30use std::sync::LazyLock;
31
32const PREFIX: &str = "ZEROCLAW_";
33const SEP: &str = "__";
34
35/// Paths that the schema exposes via `prop_fields()` but that operators must
36/// not override at runtime. Currently just `schema-version` (kebab form, as
37/// emitted by `prop_fields()`) — the migration engine sets it from the
38/// on-disk file's value, and an env override would either skip needed
39/// migrations or trigger a no-op rerun. O(1) HashSet lookup so adding more
40/// reserved paths stays cheap.
41static NON_OVERRIDABLE_PATHS: LazyLock<HashSet<&'static str>> =
42    LazyLock::new(|| HashSet::from(["schema-version"]));
43
44/// Outcome of [`apply_env_overrides`]: the set of overridden paths plus the
45/// per-path snapshot of pre-override raw values. The snapshot drives
46/// [`mask_env_overrides_for_save`] so secret fields recover their original
47/// plaintext (which `encrypt_secrets()` then re-encrypts), and non-secret
48/// fields recover their disk-or-default value.
49#[derive(Debug, Default, Clone)]
50pub struct AppliedOverrides {
51    pub paths: HashSet<String>,
52    pub snapshots: HashMap<String, String>,
53}
54
55/// Apply every `ZEROCLAW_<lowercase>` env var to `config`. Returns the set of
56/// dotted prop-paths that were overridden plus the pre-override raw values
57/// for each. Hard-errors on any env var that doesn't resolve to a known
58/// schema path or whose alias fails validation.
59pub fn apply_env_overrides(config: &mut Config) -> Result<AppliedOverrides> {
60    let mut entries: Vec<(String, String, String)> = std::env::vars()
61        .filter_map(|(k, v)| {
62            let tail = k.strip_prefix(PREFIX)?;
63            (!tail.is_empty()
64                && tail
65                    .chars()
66                    .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_'))
67            .then(|| (k.clone(), v, tail.to_string()))
68        })
69        .collect();
70    entries.sort_by(|a, b| a.0.cmp(&b.0));
71
72    let mut paths: HashSet<String> = HashSet::with_capacity(entries.len());
73    let mut snapshots: HashMap<String, String> = HashMap::with_capacity(entries.len());
74    for (env_name, value, tail) in entries {
75        let path = resolve_path(&tail, config)
76            .with_context(|| format!("{env_name} did not resolve to a schema path"))?;
77        if NON_OVERRIDABLE_PATHS.contains(path.as_str()) {
78            ::zeroclaw_log::record!(
79                WARN,
80                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
81                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
82                    .with_attrs(::serde_json::json!({"env_var": env_name, "path": path})),
83                "env override rejected: field is not overridable"
84            );
85            anyhow::bail!("{env_name} -> {path}: this field is not overridable via env vars");
86        }
87        // Snapshot the pre-override raw value via TOML serde walk. Bypasses
88        // `Config::get_prop`'s unconditional secret mask: secret fields on
89        // `config` carry plaintext (post-`decrypt_secrets`), so the snapshot
90        // captures the real value that should be restored at save time.
91        let snapshot = raw_value_for_path(config, &path).unwrap_or_default();
92        snapshots.insert(path.clone(), snapshot);
93
94        config
95            .set_prop(&path, &value)
96            .with_context(|| format!("{env_name} → {path}"))?;
97        if Config::prop_is_secret(&path) {
98            ::zeroclaw_log::record!(
99                WARN,
100                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
101                    .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
102                    .with_attrs(::serde_json::json!({"path": path, "env_var": env_name})),
103                "Secret applied from env override"
104            );
105        } else {
106            ::zeroclaw_log::record!(
107                DEBUG,
108                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
109                    .with_attrs(::serde_json::json!({"path": path, "env_var": env_name})),
110                "Env override applied"
111            );
112        }
113        paths.insert(path);
114    }
115    if !paths.is_empty() {
116        ::zeroclaw_log::record!(
117            INFO,
118            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
119                .with_attrs(::serde_json::json!({"count": paths.len()})),
120            "Applied env-var config overrides"
121        );
122    }
123    Ok(AppliedOverrides { paths, snapshots })
124}
125
126/// Walk an env-var tail against the schema. Map-keyed positions consume one
127/// `__`-delimited alias token (which may contain single `_` per the alias
128/// validator); everything else resolves via `prop_fields()` lookup.
129fn resolve_path(tail: &str, config: &mut Config) -> Result<String> {
130    let mut sections = Config::map_key_sections();
131    sections.sort_by_key(|s| std::cmp::Reverse(s.path.len()));
132    for section in sections {
133        let env_pfx: String = section.path.replace('.', SEP);
134        let with_sep = format!("{env_pfx}{SEP}");
135        let Some(rest) = tail.strip_prefix(&with_sep) else {
136            continue;
137        };
138        let mut parts = rest.splitn(2, SEP);
139        let alias = parts.next().filter(|s| !s.is_empty()).ok_or_else(|| {
140            ::zeroclaw_log::record!(
141                WARN,
142                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
143                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
144                    .with_attrs(::serde_json::json!({"section": section.path, "tail": tail})),
145                "env override path missing alias segment"
146            );
147            anyhow::Error::msg(format!("missing alias after `{}`", section.path))
148        })?;
149        let inner = parts.next().unwrap_or("");
150        // Propagate the alias-validator's specific error so operators see
151        // *why* their alias was rejected (leading underscore, uppercase, …)
152        // instead of the generic "Unknown property" that would surface from
153        // a downstream `set_prop` against a non-existent map key.
154        config.create_map_key(section.path, alias).map_err(|e| {
155            ::zeroclaw_log::record!(
156                WARN,
157                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
158                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
159                    .with_attrs(::serde_json::json!({
160                        "section": section.path,
161                        "alias": alias,
162                        "error": format!("{}", e),
163                    })),
164                "env override alias rejected by validator"
165            );
166            anyhow::Error::msg(format!(
167                "invalid alias `{alias}` for `{}`: {e}",
168                section.path
169            ))
170        })?;
171        let path = if inner.is_empty() {
172            format!("{}.{}", section.path, alias)
173        } else {
174            // Inner segments are `__`-separated; each segment is a snake-case
175            // field name that maps to kebab in the prop-path.
176            let inner_path = inner
177                .split(SEP)
178                .map(|seg| seg.replace('_', "-"))
179                .collect::<Vec<_>>()
180                .join(".");
181            format!("{}.{}.{}", section.path, alias, inner_path)
182        };
183        return Ok(path);
184    }
185
186    // Non-map path: prop_fields() entries are dotted with kebab fields.
187    // Convert to env-form (`.` → `__`, `-` → `_`) and compare.
188    config
189        .prop_fields()
190        .into_iter()
191        .find(|f| f.name.replace('.', SEP).replace('-', "_") == tail)
192        .map(|f| f.name)
193        .ok_or_else(|| {
194            ::zeroclaw_log::record!(
195                WARN,
196                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
197                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
198                    .with_attrs(::serde_json::json!({"tail": tail})),
199                "env override path does not match any schema field"
200            );
201            anyhow::Error::msg(format!("no schema field has env-form `{tail}`"))
202        })
203}
204
205/// Read the raw string value at a dotted (kebab-cased) prop path from a
206/// serializable Config struct, bypassing the `is_secret` masking that
207/// `Config::get_prop` applies. Returns `None` when the path doesn't resolve
208/// (e.g. the alias entry hasn't been created yet on disk).
209///
210/// Walks the TOML serialization with `kebab → snake` field-name conversion
211/// (HashMap aliases keep `_` natively per the validator). Used by
212/// [`apply_env_overrides`] so the pre-override snapshot of a secret field
213/// captures the real plaintext rather than the display mask.
214fn raw_value_for_path(source: &Config, path: &str) -> Option<String> {
215    let table = toml::Value::try_from(source).ok()?;
216    let mut current: &toml::Value = &table;
217    for segment in path.split('.') {
218        let key = segment.replace('-', "_");
219        current = current.as_table()?.get(&key)?;
220    }
221    Some(match current {
222        toml::Value::String(s) => s.clone(),
223        other => other.to_string(),
224    })
225}
226
227/// Restore env-overridden paths in a save-bound clone to their pre-override
228/// snapshots, so env-injected values never reach `encrypt_secrets()` or the
229/// on-disk TOML.
230///
231/// Snapshots come from [`apply_env_overrides`] which captures the
232/// post-`decrypt_secrets` plaintext for secret fields. After this restore,
233/// `encrypt_secrets()` re-encrypts the recovered plaintext to fresh
234/// ciphertext that decrypts to the same value — preserving the operator's
235/// real on-disk credential across env-override + save cycles.
236pub fn mask_env_overrides_for_save(
237    config_to_save: &mut Config,
238    snapshots: &HashMap<String, String>,
239) -> Result<()> {
240    for (path, value) in snapshots {
241        if let Err(err) = config_to_save.set_prop(path, value) {
242            ::zeroclaw_log::record!(
243                WARN,
244                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
245                    .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
246                    .with_attrs(::serde_json::json!({"path": path, "error": format!("{}", err)})),
247                "Save-mask reset failed; field retains default"
248            );
249        }
250    }
251    Ok(())
252}
253
254/// Process-wide lock for env-mutating tests. Both `env_overrides::tests`
255/// and `schema::tests` race on `ZEROCLAW_*` env vars and must serialize on
256/// the same mutex; defining it once here and re-exporting `pub(crate)`
257/// keeps a single coordinator. `#[cfg(test)]` so it never lands in
258/// production builds.
259#[cfg(test)]
260pub(crate) async fn env_test_lock() -> tokio::sync::MutexGuard<'static, ()> {
261    static LOCK: tokio::sync::Mutex<()> = tokio::sync::Mutex::const_new(());
262    LOCK.lock().await
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268    use crate::schema::Config;
269
270    /// RAII-ish helper: removes the named ZEROCLAW_* var on drop so failed
271    /// asserts don't leak state into sibling tests.
272    struct EnvVarGuard(&'static str);
273    impl EnvVarGuard {
274        fn set(name: &'static str, value: &str) -> Self {
275            // SAFETY: tests serialize on `env_test_lock()`.
276            unsafe { std::env::set_var(name, value) };
277            Self(name)
278        }
279    }
280    impl Drop for EnvVarGuard {
281        fn drop(&mut self) {
282            // SAFETY: tests serialize on `env_test_lock()`.
283            unsafe { std::env::remove_var(self.0) };
284        }
285    }
286
287    #[tokio::test]
288    async fn walker_resolves_typed_family_alias_default() {
289        let _guard = super::env_test_lock().await;
290        let _v = EnvVarGuard::set(
291            "ZEROCLAW_providers__models__anthropic__default__api_key",
292            "sk-ant-fixture",
293        );
294
295        let mut config = Config::default();
296        let applied = apply_env_overrides(&mut config).expect("apply succeeds");
297
298        assert!(
299            applied
300                .paths
301                .contains("providers.models.anthropic.default.api-key"),
302            "kebab-translated path should be recorded: {:?}",
303            applied.paths,
304        );
305        // Secret field round-trips through set_prop into the typed alias.
306        assert_eq!(
307            config
308                .providers
309                .models
310                .anthropic
311                .get("default")
312                .and_then(|c| c.base.api_key.as_deref()),
313            Some("sk-ant-fixture"),
314        );
315    }
316
317    #[tokio::test]
318    async fn walker_accepts_alias_with_underscore() {
319        let _guard = super::env_test_lock().await;
320        let _v1 = EnvVarGuard::set(
321            "ZEROCLAW_providers__models__openrouter__prod_v2__api_key",
322            "sk-or-fixture",
323        );
324        let _v2 = EnvVarGuard::set(
325            "ZEROCLAW_providers__models__openrouter__prod_v2__model",
326            "anthropic/claude-sonnet-4-6",
327        );
328
329        let mut config = Config::default();
330        let applied = apply_env_overrides(&mut config).expect("apply succeeds");
331
332        assert!(
333            applied
334                .paths
335                .contains("providers.models.openrouter.prod_v2.api-key"),
336        );
337        assert!(
338            applied
339                .paths
340                .contains("providers.models.openrouter.prod_v2.model"),
341        );
342        let entry = config
343            .providers
344            .models
345            .openrouter
346            .get("prod_v2")
347            .expect("alias created");
348        assert_eq!(entry.base.api_key.as_deref(), Some("sk-or-fixture"));
349        assert_eq!(
350            entry.base.model.as_deref(),
351            Some("anthropic/claude-sonnet-4-6"),
352        );
353    }
354
355    #[tokio::test]
356    async fn walker_resolves_non_map_gateway_path() {
357        let _guard = super::env_test_lock().await;
358        let _v = EnvVarGuard::set("ZEROCLAW_gateway__request_timeout_secs", "120");
359
360        let mut config = Config::default();
361        let applied = apply_env_overrides(&mut config).expect("apply succeeds");
362
363        assert!(applied.paths.contains("gateway.request-timeout-secs"));
364        assert_eq!(config.gateway.request_timeout_secs, 120);
365    }
366
367    #[tokio::test]
368    async fn walker_rejects_unknown_path() {
369        let _guard = super::env_test_lock().await;
370        let _v = EnvVarGuard::set("ZEROCLAW_no__such__field", "x");
371
372        let mut config = Config::default();
373        let err = apply_env_overrides(&mut config).expect_err("must hard-error");
374        let msg = format!("{err:#}");
375        assert!(
376            msg.contains("ZEROCLAW_no__such__field") && msg.contains("did not resolve"),
377            "error must name the env var and the failure: {msg}",
378        );
379    }
380
381    #[tokio::test]
382    async fn walker_propagates_alias_validator_error() {
383        let _guard = super::env_test_lock().await;
384        // `_invalid` starts with `_`, which the alias validator rejects.
385        // The walker's tail filter accepts `[a-z0-9_]+` so this gets past
386        // the prefilter, and the failure must surface as the validator's
387        // specific message — not a generic "Unknown property".
388        let _v = EnvVarGuard::set(
389            "ZEROCLAW_providers__models__anthropic___invalid__api_key",
390            "x",
391        );
392
393        let mut config = Config::default();
394        let err = apply_env_overrides(&mut config).expect_err("must hard-error");
395        let msg = format!("{err:#}");
396        assert!(
397            msg.contains("invalid alias") && msg.contains("_invalid"),
398            "error must surface the alias validator's message: {msg}",
399        );
400    }
401
402    #[tokio::test]
403    async fn mask_restores_pre_override_snapshot_for_non_secret() {
404        let _guard = super::env_test_lock().await;
405        let _v = EnvVarGuard::set("ZEROCLAW_gateway__request_timeout_secs", "999");
406
407        let mut config = Config::default();
408        let original_timeout = config.gateway.request_timeout_secs;
409        let applied = apply_env_overrides(&mut config).expect("apply succeeds");
410        assert_eq!(config.gateway.request_timeout_secs, 999);
411
412        let mut to_save = config.clone();
413        mask_env_overrides_for_save(&mut to_save, &applied.snapshots).expect("mask succeeds");
414        assert_eq!(
415            to_save.gateway.request_timeout_secs, original_timeout,
416            "non-secret path resets to pre-override snapshot",
417        );
418        // In-memory config is unchanged — env value still effective for the
419        // running process.
420        assert_eq!(config.gateway.request_timeout_secs, 999);
421    }
422
423    #[tokio::test]
424    async fn mask_restores_pre_override_plaintext_for_secret() {
425        let _guard = super::env_test_lock().await;
426        let _v = EnvVarGuard::set(
427            "ZEROCLAW_providers__models__anthropic__default__api_key",
428            "sk-ant-from-env",
429        );
430
431        // Pre-existing alias with a real plaintext credential (the state
432        // after `Config::load_or_init` calls `decrypt_secrets`).
433        let mut config = Config::default();
434        config
435            .providers
436            .models
437            .ensure("anthropic", "default")
438            .expect("typed slot")
439            .api_key = Some("sk-ant-on-disk".to_string());
440
441        let applied = apply_env_overrides(&mut config).expect("apply succeeds");
442        assert!(
443            applied
444                .paths
445                .contains("providers.models.anthropic.default.api-key"),
446        );
447        // Env value is live in memory.
448        assert_eq!(
449            config
450                .providers
451                .models
452                .anthropic
453                .get("default")
454                .and_then(|c| c.base.api_key.as_deref()),
455            Some("sk-ant-from-env"),
456        );
457
458        // Save-bound clone restores the pre-override plaintext, NOT the
459        // display mask. This is the regression bar for the data-loss bug
460        // identified in PR #6523 review.
461        let mut to_save = config.clone();
462        mask_env_overrides_for_save(&mut to_save, &applied.snapshots).expect("mask succeeds");
463        assert_eq!(
464            to_save
465                .providers
466                .models
467                .anthropic
468                .get("default")
469                .and_then(|c| c.base.api_key.as_deref()),
470            Some("sk-ant-on-disk"),
471            "secret resets to pre-override plaintext (not the `**** (encrypted)` mask)",
472        );
473        assert_ne!(
474            to_save
475                .providers
476                .models
477                .anthropic
478                .get("default")
479                .and_then(|c| c.base.api_key.as_deref()),
480            Some("**** (encrypted)"),
481            "must not corrupt the field with the display mask",
482        );
483    }
484
485    #[tokio::test]
486    async fn schema_version_override_rejected() {
487        let _guard = super::env_test_lock().await;
488        let _v = EnvVarGuard::set("ZEROCLAW_schema_version", "99");
489
490        let mut config = Config::default();
491        let err = apply_env_overrides(&mut config).expect_err("must hard-error");
492        let msg = format!("{err:#}");
493        assert!(
494            msg.contains("schema_version") && msg.contains("not overridable"),
495            "error must name the path and the reason: {msg}",
496        );
497    }
498}