Skip to main content

zeroclaw_memory/
hygiene.rs

1use crate::policy::PolicyEnforcer;
2use anyhow::Result;
3use chrono::{DateTime, Duration, Local, NaiveDate, Utc};
4use rusqlite::{Connection, params};
5use serde::{Deserialize, Serialize};
6use std::fs;
7use std::path::{Path, PathBuf};
8use std::time::{Duration as StdDuration, SystemTime};
9use zeroclaw_config::schema::MemoryConfig;
10
11const HYGIENE_INTERVAL_HOURS: i64 = 12;
12const STATE_FILE: &str = "memory_hygiene_state.json";
13
14#[derive(Debug, Clone, Default, Serialize, Deserialize)]
15struct HygieneReport {
16    archived_memory_files: u64,
17    archived_session_files: u64,
18    purged_memory_archives: u64,
19    purged_session_archives: u64,
20    pruned_conversation_rows: u64,
21}
22
23impl HygieneReport {
24    fn total_actions(&self) -> u64 {
25        self.archived_memory_files
26            + self.archived_session_files
27            + self.purged_memory_archives
28            + self.purged_session_archives
29            + self.pruned_conversation_rows
30    }
31}
32
33#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34struct HygieneState {
35    last_run_at: Option<String>,
36    last_report: HygieneReport,
37}
38
39/// Run memory/session hygiene if the cadence window has elapsed.
40///
41/// This function is intentionally best-effort: callers should log and continue on failure.
42pub fn run_if_due(config: &MemoryConfig, workspace_dir: &Path) -> Result<()> {
43    if !config.hygiene_enabled {
44        return Ok(());
45    }
46
47    if !should_run_now(workspace_dir)? {
48        return Ok(());
49    }
50
51    // Use policy engine for per-category retention overrides.
52    let enforcer = PolicyEnforcer::new(&config.policy);
53    let conversation_retention = enforcer.retention_days_for_category(
54        &crate::traits::MemoryCategory::Conversation,
55        config.conversation_retention_days,
56    );
57
58    let report = HygieneReport {
59        archived_memory_files: archive_daily_memory_files(
60            workspace_dir,
61            config.archive_after_days,
62        )?,
63        archived_session_files: archive_session_files(workspace_dir, config.archive_after_days)?,
64        purged_memory_archives: purge_memory_archives(workspace_dir, config.purge_after_days)?,
65        purged_session_archives: purge_session_archives(workspace_dir, config.purge_after_days)?,
66        pruned_conversation_rows: prune_conversation_rows(workspace_dir, conversation_retention)?,
67    };
68
69    // Prune audit entries if audit is enabled.
70    if config.audit_enabled
71        && let Err(e) = prune_audit_entries(workspace_dir, config.audit_retention_days)
72    {
73        ::zeroclaw_log::record!(
74            DEBUG,
75            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
76                .with_attrs(::serde_json::json!({"error": format!("{}", e)})),
77            "audit pruning skipped"
78        );
79    }
80
81    write_state(workspace_dir, &report)?;
82
83    if report.total_actions() > 0 {
84        ::zeroclaw_log::record!(
85            INFO,
86            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
87            &format!(
88                "memory hygiene complete: archived_memory={} archived_sessions={} purged_memory={} purged_sessions={} pruned_conversation_rows={}",
89                report.archived_memory_files,
90                report.archived_session_files,
91                report.purged_memory_archives,
92                report.purged_session_archives,
93                report.pruned_conversation_rows
94            )
95        );
96    }
97
98    Ok(())
99}
100
101fn should_run_now(workspace_dir: &Path) -> Result<bool> {
102    let path = state_path(workspace_dir);
103    if !path.exists() {
104        return Ok(true);
105    }
106
107    let raw = fs::read_to_string(&path)?;
108    let state: HygieneState = match serde_json::from_str(&raw) {
109        Ok(s) => s,
110        Err(_) => return Ok(true),
111    };
112
113    let Some(last_run_at) = state.last_run_at else {
114        return Ok(true);
115    };
116
117    let last = match DateTime::parse_from_rfc3339(&last_run_at) {
118        Ok(ts) => ts.with_timezone(&Utc),
119        Err(_) => return Ok(true),
120    };
121
122    Ok(Utc::now().signed_duration_since(last) >= Duration::hours(HYGIENE_INTERVAL_HOURS))
123}
124
125fn write_state(workspace_dir: &Path, report: &HygieneReport) -> Result<()> {
126    let path = state_path(workspace_dir);
127    if let Some(parent) = path.parent() {
128        fs::create_dir_all(parent)?;
129    }
130
131    let state = HygieneState {
132        last_run_at: Some(Utc::now().to_rfc3339()),
133        last_report: report.clone(),
134    };
135    let json = serde_json::to_vec_pretty(&state)?;
136    fs::write(path, json)?;
137    Ok(())
138}
139
140fn state_path(workspace_dir: &Path) -> PathBuf {
141    workspace_dir.join("state").join(STATE_FILE)
142}
143
144fn archive_daily_memory_files(workspace_dir: &Path, archive_after_days: u32) -> Result<u64> {
145    if archive_after_days == 0 {
146        return Ok(0);
147    }
148
149    let memory_dir = workspace_dir.join("memory");
150    if !memory_dir.is_dir() {
151        return Ok(0);
152    }
153
154    let archive_dir = memory_dir.join("archive");
155    fs::create_dir_all(&archive_dir)?;
156
157    let cutoff = Local::now().date_naive() - Duration::days(i64::from(archive_after_days));
158    let mut moved = 0_u64;
159
160    for entry in fs::read_dir(&memory_dir)? {
161        let entry = entry?;
162        let path = entry.path();
163
164        if path.is_dir() {
165            continue;
166        }
167        if path.extension().and_then(|e| e.to_str()) != Some("md") {
168            continue;
169        }
170
171        let Some(filename) = path.file_name().and_then(|f| f.to_str()) else {
172            continue;
173        };
174
175        let Some(file_date) = memory_date_from_filename(filename) else {
176            continue;
177        };
178
179        if file_date < cutoff {
180            move_to_archive(&path, &archive_dir)?;
181            moved += 1;
182        }
183    }
184
185    Ok(moved)
186}
187
188fn archive_session_files(workspace_dir: &Path, archive_after_days: u32) -> Result<u64> {
189    if archive_after_days == 0 {
190        return Ok(0);
191    }
192
193    let sessions_dir = workspace_dir.join("sessions");
194    if !sessions_dir.is_dir() {
195        return Ok(0);
196    }
197
198    let archive_dir = sessions_dir.join("archive");
199    fs::create_dir_all(&archive_dir)?;
200
201    let cutoff_date = Local::now().date_naive() - Duration::days(i64::from(archive_after_days));
202    let cutoff_time = SystemTime::now()
203        .checked_sub(StdDuration::from_secs(
204            u64::from(archive_after_days) * 24 * 60 * 60,
205        ))
206        .unwrap_or(SystemTime::UNIX_EPOCH);
207
208    let mut moved = 0_u64;
209    for entry in fs::read_dir(&sessions_dir)? {
210        let entry = entry?;
211        let path = entry.path();
212
213        if path.is_dir() {
214            continue;
215        }
216
217        let Some(filename) = path.file_name().and_then(|f| f.to_str()) else {
218            continue;
219        };
220
221        let is_old = if let Some(date) = date_prefix(filename) {
222            date < cutoff_date
223        } else {
224            is_older_than(&path, cutoff_time)
225        };
226
227        if is_old {
228            move_to_archive(&path, &archive_dir)?;
229            moved += 1;
230        }
231    }
232
233    Ok(moved)
234}
235
236fn purge_memory_archives(workspace_dir: &Path, purge_after_days: u32) -> Result<u64> {
237    if purge_after_days == 0 {
238        return Ok(0);
239    }
240
241    let archive_dir = workspace_dir.join("memory").join("archive");
242    if !archive_dir.is_dir() {
243        return Ok(0);
244    }
245
246    let cutoff = Local::now().date_naive() - Duration::days(i64::from(purge_after_days));
247    let mut removed = 0_u64;
248
249    for entry in fs::read_dir(&archive_dir)? {
250        let entry = entry?;
251        let path = entry.path();
252
253        if path.is_dir() {
254            continue;
255        }
256
257        let Some(filename) = path.file_name().and_then(|f| f.to_str()) else {
258            continue;
259        };
260
261        let Some(file_date) = memory_date_from_filename(filename) else {
262            continue;
263        };
264
265        if file_date < cutoff {
266            fs::remove_file(&path)?;
267            removed += 1;
268        }
269    }
270
271    Ok(removed)
272}
273
274fn purge_session_archives(workspace_dir: &Path, purge_after_days: u32) -> Result<u64> {
275    if purge_after_days == 0 {
276        return Ok(0);
277    }
278
279    let archive_dir = workspace_dir.join("sessions").join("archive");
280    if !archive_dir.is_dir() {
281        return Ok(0);
282    }
283
284    let cutoff_date = Local::now().date_naive() - Duration::days(i64::from(purge_after_days));
285    let cutoff_time = SystemTime::now()
286        .checked_sub(StdDuration::from_secs(
287            u64::from(purge_after_days) * 24 * 60 * 60,
288        ))
289        .unwrap_or(SystemTime::UNIX_EPOCH);
290
291    let mut removed = 0_u64;
292    for entry in fs::read_dir(&archive_dir)? {
293        let entry = entry?;
294        let path = entry.path();
295
296        if path.is_dir() {
297            continue;
298        }
299
300        let Some(filename) = path.file_name().and_then(|f| f.to_str()) else {
301            continue;
302        };
303
304        let is_old = if let Some(date) = date_prefix(filename) {
305            date < cutoff_date
306        } else {
307            is_older_than(&path, cutoff_time)
308        };
309
310        if is_old {
311            fs::remove_file(&path)?;
312            removed += 1;
313        }
314    }
315
316    Ok(removed)
317}
318
319fn prune_conversation_rows(workspace_dir: &Path, retention_days: u32) -> Result<u64> {
320    if retention_days == 0 {
321        return Ok(0);
322    }
323
324    let db_path = workspace_dir.join("memory").join("brain.db");
325    if !db_path.exists() {
326        return Ok(0);
327    }
328
329    let conn = Connection::open(db_path)?;
330    // Use WAL so hygiene pruning doesn't block agent reads
331    conn.execute_batch("PRAGMA journal_mode = WAL; PRAGMA synchronous = NORMAL;")?;
332    let cutoff = (Local::now() - Duration::days(i64::from(retention_days))).to_rfc3339();
333
334    let affected = conn.execute(
335        "DELETE FROM memories WHERE category = 'conversation' AND updated_at < ?1",
336        params![cutoff],
337    )?;
338
339    Ok(u64::try_from(affected).unwrap_or(0))
340}
341
342fn prune_audit_entries(workspace_dir: &Path, retention_days: u32) -> Result<()> {
343    if retention_days == 0 {
344        return Ok(());
345    }
346
347    let db_path = workspace_dir.join("memory").join("audit.db");
348    if !db_path.exists() {
349        return Ok(());
350    }
351
352    let conn = Connection::open(db_path)?;
353    conn.execute_batch("PRAGMA journal_mode = WAL; PRAGMA synchronous = NORMAL;")?;
354    let cutoff = (Local::now() - Duration::days(i64::from(retention_days))).to_rfc3339();
355
356    let affected = conn.execute(
357        "DELETE FROM memory_audit WHERE timestamp < ?1",
358        params![cutoff],
359    )?;
360
361    if affected > 0 {
362        ::zeroclaw_log::record!(
363            DEBUG,
364            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note).with_attrs(
365                ::serde_json::json!({"affected": affected, "retention_days": retention_days})
366            ),
367            "pruned  audit entries older than  days"
368        );
369    }
370
371    Ok(())
372}
373
374fn memory_date_from_filename(filename: &str) -> Option<NaiveDate> {
375    let stem = filename.strip_suffix(".md")?;
376    let date_part = stem.split('_').next().unwrap_or(stem);
377    NaiveDate::parse_from_str(date_part, "%Y-%m-%d").ok()
378}
379
380fn date_prefix(filename: &str) -> Option<NaiveDate> {
381    if filename.len() < 10 {
382        return None;
383    }
384    let boundary = {
385        let mut i = 10.min(filename.len());
386        while i > 0 && !filename.is_char_boundary(i) {
387            i -= 1;
388        }
389        i
390    };
391    NaiveDate::parse_from_str(&filename[..boundary], "%Y-%m-%d").ok()
392}
393
394fn is_older_than(path: &Path, cutoff: SystemTime) -> bool {
395    fs::metadata(path)
396        .and_then(|meta| meta.modified())
397        .map(|modified| modified < cutoff)
398        .unwrap_or(false)
399}
400
401fn move_to_archive(src: &Path, archive_dir: &Path) -> Result<()> {
402    let Some(filename) = src.file_name().and_then(|f| f.to_str()) else {
403        return Ok(());
404    };
405
406    let target = unique_archive_target(archive_dir, filename);
407    fs::rename(src, target)?;
408    Ok(())
409}
410
411fn unique_archive_target(archive_dir: &Path, filename: &str) -> PathBuf {
412    let direct = archive_dir.join(filename);
413    if !direct.exists() {
414        return direct;
415    }
416
417    let (stem, ext) = split_name(filename);
418    for i in 1..10_000 {
419        let candidate = if ext.is_empty() {
420            archive_dir.join(format!("{stem}_{i}"))
421        } else {
422            archive_dir.join(format!("{stem}_{i}.{ext}"))
423        };
424        if !candidate.exists() {
425            return candidate;
426        }
427    }
428
429    direct
430}
431
432fn split_name(filename: &str) -> (&str, &str) {
433    match filename.rsplit_once('.') {
434        Some((stem, ext)) => (stem, ext),
435        None => (filename, ""),
436    }
437}
438
439#[cfg(test)]
440mod tests {
441    use super::*;
442    use crate::sqlite::SqliteMemory;
443    use crate::traits::{Memory, MemoryCategory};
444    use tempfile::TempDir;
445
446    fn default_cfg() -> MemoryConfig {
447        MemoryConfig::default()
448    }
449
450    #[test]
451    fn archives_old_daily_memory_files() {
452        let tmp = TempDir::new().unwrap();
453        let workspace = tmp.path();
454        fs::create_dir_all(workspace.join("memory")).unwrap();
455
456        let old = (Local::now().date_naive() - Duration::days(10))
457            .format("%Y-%m-%d")
458            .to_string();
459        let today = Local::now().date_naive().format("%Y-%m-%d").to_string();
460
461        let old_file = workspace.join("memory").join(format!("{old}.md"));
462        let today_file = workspace.join("memory").join(format!("{today}.md"));
463        fs::write(&old_file, "old note").unwrap();
464        fs::write(&today_file, "fresh note").unwrap();
465
466        run_if_due(&default_cfg(), workspace).unwrap();
467
468        assert!(!old_file.exists(), "old daily file should be archived");
469        assert!(
470            workspace
471                .join("memory")
472                .join("archive")
473                .join(format!("{old}.md"))
474                .exists(),
475            "old daily file should exist in memory/archive"
476        );
477        assert!(today_file.exists(), "today file should remain in place");
478    }
479
480    #[test]
481    fn archives_old_session_files() {
482        let tmp = TempDir::new().unwrap();
483        let workspace = tmp.path();
484        fs::create_dir_all(workspace.join("sessions")).unwrap();
485
486        let old = (Local::now().date_naive() - Duration::days(10))
487            .format("%Y-%m-%d")
488            .to_string();
489        let old_name = format!("{old}-agent.log");
490        let old_file = workspace.join("sessions").join(&old_name);
491        fs::write(&old_file, "old session").unwrap();
492
493        run_if_due(&default_cfg(), workspace).unwrap();
494
495        assert!(!old_file.exists(), "old session file should be archived");
496        assert!(
497            workspace
498                .join("sessions")
499                .join("archive")
500                .join(&old_name)
501                .exists(),
502            "archived session file should exist"
503        );
504    }
505
506    #[test]
507    fn skips_second_run_within_cadence_window() {
508        let tmp = TempDir::new().unwrap();
509        let workspace = tmp.path();
510        fs::create_dir_all(workspace.join("memory")).unwrap();
511
512        let old_a = (Local::now().date_naive() - Duration::days(10))
513            .format("%Y-%m-%d")
514            .to_string();
515        let file_a = workspace.join("memory").join(format!("{old_a}.md"));
516        fs::write(&file_a, "first").unwrap();
517
518        run_if_due(&default_cfg(), workspace).unwrap();
519        assert!(!file_a.exists(), "first old file should be archived");
520
521        let old_b = (Local::now().date_naive() - Duration::days(9))
522            .format("%Y-%m-%d")
523            .to_string();
524        let file_b = workspace.join("memory").join(format!("{old_b}.md"));
525        fs::write(&file_b, "second").unwrap();
526
527        // Should skip because cadence gate prevents a second immediate run.
528        run_if_due(&default_cfg(), workspace).unwrap();
529        assert!(
530            file_b.exists(),
531            "second file should remain because run is throttled"
532        );
533    }
534
535    #[test]
536    fn purges_old_memory_archives() {
537        let tmp = TempDir::new().unwrap();
538        let workspace = tmp.path();
539        let archive_dir = workspace.join("memory").join("archive");
540        fs::create_dir_all(&archive_dir).unwrap();
541
542        let old = (Local::now().date_naive() - Duration::days(40))
543            .format("%Y-%m-%d")
544            .to_string();
545        let keep = (Local::now().date_naive() - Duration::days(5))
546            .format("%Y-%m-%d")
547            .to_string();
548
549        let old_file = archive_dir.join(format!("{old}.md"));
550        let keep_file = archive_dir.join(format!("{keep}.md"));
551        fs::write(&old_file, "expired").unwrap();
552        fs::write(&keep_file, "recent").unwrap();
553
554        run_if_due(&default_cfg(), workspace).unwrap();
555
556        assert!(!old_file.exists(), "old archived file should be purged");
557        assert!(keep_file.exists(), "recent archived file should remain");
558    }
559
560    #[tokio::test]
561    async fn prunes_old_conversation_rows_in_sqlite_backend() {
562        let tmp = TempDir::new().unwrap();
563        let workspace = tmp.path();
564
565        let mem = SqliteMemory::new("sqlite", workspace).unwrap();
566        mem.store("conv_old", "outdated", MemoryCategory::Conversation, None)
567            .await
568            .unwrap();
569        mem.store("core_keep", "durable", MemoryCategory::Core, None)
570            .await
571            .unwrap();
572        drop(mem);
573
574        let db_path = workspace.join("memory").join("brain.db");
575        let conn = Connection::open(&db_path).unwrap();
576        let old_cutoff = (Local::now() - Duration::days(60)).to_rfc3339();
577        conn.execute(
578            "UPDATE memories SET created_at = ?1, updated_at = ?1 WHERE key = 'conv_old'",
579            params![old_cutoff],
580        )
581        .unwrap();
582        drop(conn);
583
584        let mut cfg = default_cfg();
585        cfg.archive_after_days = 0;
586        cfg.purge_after_days = 0;
587        cfg.conversation_retention_days = 30;
588
589        run_if_due(&cfg, workspace).unwrap();
590
591        let mem2 = SqliteMemory::new("sqlite", workspace).unwrap();
592        assert!(
593            mem2.get("conv_old").await.unwrap().is_none(),
594            "old conversation rows should be pruned"
595        );
596        assert!(
597            mem2.get("core_keep").await.unwrap().is_some(),
598            "core memory should remain"
599        );
600    }
601}