Skip to main content

zeroclaw_memory/
markdown.rs

1use super::traits::{Memory, MemoryCategory, MemoryEntry, is_recent_recall_query};
2use async_trait::async_trait;
3use chrono::{DateTime, FixedOffset, Local, NaiveDate};
4use std::path::{Path, PathBuf};
5use tokio::fs;
6
7/// Decide whether a markdown entry's `timestamp` stem falls inside the
8/// recall `[since, until]` window. Markdown timestamps are file stems, not
9/// RFC 3339 strings: daily logs use a bare `YYYY-MM-DD` date and the core
10/// file uses `MEMORY.md`. We therefore (1) try RFC 3339, (2) fall back to a
11/// `NaiveDate` compared at day granularity, and (3) leave non-date stems
12/// (e.g. `MEMORY.md`) unfiltered so evergreen core memories still surface.
13fn entry_in_window(
14    timestamp: &str,
15    since: Option<&DateTime<FixedOffset>>,
16    until: Option<&DateTime<FixedOffset>>,
17) -> bool {
18    if let Ok(ts) = DateTime::parse_from_rfc3339(timestamp) {
19        if let Some(s) = since
20            && ts < *s
21        {
22            return false;
23        }
24        if let Some(u) = until
25            && ts > *u
26        {
27            return false;
28        }
29        return true;
30    }
31    if let Ok(date) = NaiveDate::parse_from_str(timestamp, "%Y-%m-%d") {
32        if let Some(s) = since
33            && date < s.date_naive()
34        {
35            return false;
36        }
37        if let Some(u) = until
38            && date > u.date_naive()
39        {
40            return false;
41        }
42        return true;
43    }
44    // Non-date stems (e.g. MEMORY.md) are evergreen; never window-filtered.
45    true
46}
47
48/// Markdown-based memory — plain files as source of truth
49///
50/// Layout:
51///   workspace/MEMORY.md          — curated long-term memory (core)
52///   workspace/memory/YYYY-MM-DD.md — daily logs (append-only)
53pub struct MarkdownMemory {
54    alias: String,
55    workspace_dir: PathBuf,
56}
57
58impl MarkdownMemory {
59    pub fn new(alias: &str, workspace_dir: &Path) -> Self {
60        Self {
61            alias: alias.to_string(),
62            workspace_dir: workspace_dir.to_path_buf(),
63        }
64    }
65
66    fn memory_dir(&self) -> PathBuf {
67        self.workspace_dir.join("memory")
68    }
69
70    fn core_path(&self) -> PathBuf {
71        self.workspace_dir.join("MEMORY.md")
72    }
73
74    fn daily_path(&self) -> PathBuf {
75        let date = Local::now().format("%Y-%m-%d").to_string();
76        self.memory_dir().join(format!("{date}.md"))
77    }
78
79    async fn ensure_dirs(&self) -> anyhow::Result<()> {
80        fs::create_dir_all(self.memory_dir()).await?;
81        Ok(())
82    }
83
84    async fn append_to_file(&self, path: &Path, content: &str) -> anyhow::Result<()> {
85        self.ensure_dirs().await?;
86
87        let existing = if path.exists() {
88            fs::read_to_string(path).await.unwrap_or_default()
89        } else {
90            String::new()
91        };
92
93        let updated = if existing.is_empty() {
94            let header = if path == self.core_path() {
95                "# Long-Term Memory\n\n"
96            } else {
97                let date = Local::now().format("%Y-%m-%d").to_string();
98                &format!("# Daily Log — {date}\n\n")
99            };
100            format!("{header}{content}\n")
101        } else {
102            format!("{existing}\n{content}\n")
103        };
104
105        fs::write(path, updated).await?;
106        Ok(())
107    }
108
109    fn parse_entries_from_file(
110        path: &Path,
111        content: &str,
112        category: &MemoryCategory,
113    ) -> Vec<MemoryEntry> {
114        let filename = path
115            .file_stem()
116            .and_then(|s| s.to_str())
117            .unwrap_or("unknown");
118
119        content
120            .lines()
121            .filter(|line| {
122                let trimmed = line.trim();
123                !trimmed.is_empty() && !trimmed.starts_with('#')
124            })
125            .enumerate()
126            .map(|(i, line)| {
127                let trimmed = line.trim();
128                let clean = trimmed.strip_prefix("- ").unwrap_or(trimmed);
129                MemoryEntry {
130                    id: format!("{filename}:{i}"),
131                    key: format!("{filename}:{i}"),
132                    content: clean.to_string(),
133                    category: category.clone(),
134                    timestamp: filename.to_string(),
135                    session_id: None,
136                    score: None,
137                    namespace: "default".into(),
138                    importance: None,
139                    superseded_by: None,
140                    agent_alias: None,
141                    agent_id: None,
142                }
143            })
144            .collect()
145    }
146
147    async fn read_all_entries(&self) -> anyhow::Result<Vec<MemoryEntry>> {
148        let mut entries = Vec::new();
149
150        // Read MEMORY.md (core)
151        let core_path = self.core_path();
152        if core_path.exists() {
153            let content = fs::read_to_string(&core_path).await?;
154            entries.extend(Self::parse_entries_from_file(
155                &core_path,
156                &content,
157                &MemoryCategory::Core,
158            ));
159        }
160
161        // Read daily logs
162        let mem_dir = self.memory_dir();
163        if mem_dir.exists() {
164            let mut dir = fs::read_dir(&mem_dir).await?;
165            while let Some(entry) = dir.next_entry().await? {
166                let path = entry.path();
167                if path.extension().and_then(|e| e.to_str()) == Some("md") {
168                    let content = fs::read_to_string(&path).await?;
169                    entries.extend(Self::parse_entries_from_file(
170                        &path,
171                        &content,
172                        &MemoryCategory::Daily,
173                    ));
174                }
175            }
176        }
177
178        entries.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
179        Ok(entries)
180    }
181}
182
183#[async_trait]
184impl Memory for MarkdownMemory {
185    fn name(&self) -> &str {
186        "markdown"
187    }
188
189    async fn store(
190        &self,
191        key: &str,
192        content: &str,
193        category: MemoryCategory,
194        _session_id: Option<&str>,
195    ) -> anyhow::Result<()> {
196        let entry = format!("- **{key}**: {content}");
197        let path = match category {
198            MemoryCategory::Core => self.core_path(),
199            _ => self.daily_path(),
200        };
201        self.append_to_file(&path, &entry).await
202    }
203
204    async fn recall(
205        &self,
206        query: &str,
207        limit: usize,
208        _session_id: Option<&str>,
209        since: Option<&str>,
210        until: Option<&str>,
211    ) -> anyhow::Result<Vec<MemoryEntry>> {
212        let since_dt = since
213            .map(chrono::DateTime::parse_from_rfc3339)
214            .transpose()
215            .map_err(|e| {
216                ::zeroclaw_log::record!(
217                    WARN,
218                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
219                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
220                        .with_attrs(
221                            ::serde_json::json!({"field": "since", "error": format!("{}", e)})
222                        ),
223                    "recall window bound rejected"
224                );
225                anyhow::Error::msg(format!("invalid 'since' date (expected RFC 3339): {e}"))
226            })?;
227        let until_dt = until
228            .map(chrono::DateTime::parse_from_rfc3339)
229            .transpose()
230            .map_err(|e| {
231                ::zeroclaw_log::record!(
232                    WARN,
233                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
234                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
235                        .with_attrs(
236                            ::serde_json::json!({"field": "until", "error": format!("{}", e)})
237                        ),
238                    "recall window bound rejected"
239                );
240                anyhow::Error::msg(format!("invalid 'until' date (expected RFC 3339): {e}"))
241            })?;
242        if let (Some(s), Some(u)) = (&since_dt, &until_dt)
243            && s >= u
244        {
245            anyhow::bail!("'since' must be before 'until'");
246        }
247
248        let all = self.read_all_entries().await?;
249        let keywords: Vec<String> = if is_recent_recall_query(query) {
250            Vec::new()
251        } else {
252            query
253                .to_lowercase()
254                .split_whitespace()
255                .map(str::to_string)
256                .collect()
257        };
258
259        let mut scored: Vec<MemoryEntry> = all
260            .into_iter()
261            .filter_map(|mut entry| {
262                if !entry_in_window(&entry.timestamp, since_dt.as_ref(), until_dt.as_ref()) {
263                    return None;
264                }
265                if keywords.is_empty() {
266                    entry.score = Some(1.0);
267                    return Some(entry);
268                }
269                let content_lower = entry.content.to_lowercase();
270                let matched = keywords
271                    .iter()
272                    .filter(|kw| content_lower.contains(kw.as_str()))
273                    .count();
274                if matched > 0 {
275                    #[allow(clippy::cast_precision_loss)]
276                    let score = matched as f64 / keywords.len() as f64;
277                    entry.score = Some(score);
278                    Some(entry)
279                } else {
280                    None
281                }
282            })
283            .collect();
284
285        scored.sort_by(|a, b| {
286            if keywords.is_empty() {
287                b.timestamp.as_str().cmp(a.timestamp.as_str())
288            } else {
289                b.score
290                    .partial_cmp(&a.score)
291                    .unwrap_or(std::cmp::Ordering::Equal)
292            }
293        });
294        scored.truncate(limit);
295        Ok(scored)
296    }
297
298    async fn get(&self, key: &str) -> anyhow::Result<Option<MemoryEntry>> {
299        let all = self.read_all_entries().await?;
300        Ok(all
301            .into_iter()
302            .find(|e| e.key == key || e.content.contains(key)))
303    }
304
305    async fn list(
306        &self,
307        category: Option<&MemoryCategory>,
308        _session_id: Option<&str>,
309    ) -> anyhow::Result<Vec<MemoryEntry>> {
310        let all = self.read_all_entries().await?;
311        match category {
312            Some(cat) => Ok(all.into_iter().filter(|e| &e.category == cat).collect()),
313            None => Ok(all),
314        }
315    }
316
317    async fn forget(&self, _key: &str) -> anyhow::Result<bool> {
318        // Markdown memory is append-only by design (audit trail)
319        // Return false to indicate the entry wasn't removed
320        Ok(false)
321    }
322
323    async fn forget_for_agent(&self, _key: &str, _agent_id: &str) -> anyhow::Result<bool> {
324        Ok(false)
325    }
326
327    async fn count(&self) -> anyhow::Result<usize> {
328        let all = self.read_all_entries().await?;
329        Ok(all.len())
330    }
331
332    async fn health_check(&self) -> bool {
333        self.workspace_dir.exists()
334    }
335
336    async fn store_with_agent(
337        &self,
338        key: &str,
339        content: &str,
340        category: MemoryCategory,
341        session_id: Option<&str>,
342        _namespace: Option<&str>,
343        _importance: Option<f64>,
344        _agent_id: Option<&str>,
345    ) -> anyhow::Result<()> {
346        // Markdown's per-agent attribution is the on-disk path: the
347        // backend writes into `<workspace_dir>/MEMORY.md` and the
348        // workspace_dir is owned by the agent that constructed this
349        // backend. The agent_id parameter is redundant and ignored at
350        // the trait boundary; cross-agent reads merge multiple
351        // MarkdownMemory instances at the `AgentScopedMarkdownMemory`
352        // wrapper layer.
353        self.store(key, content, category, session_id).await
354    }
355
356    async fn recall_for_agents(
357        &self,
358        _allowed_agent_ids: &[&str],
359        query: &str,
360        limit: usize,
361        session_id: Option<&str>,
362        since: Option<&str>,
363        until: Option<&str>,
364    ) -> anyhow::Result<Vec<MemoryEntry>> {
365        // Same per-agent-path attribution model as `store_with_agent`:
366        // a single MarkdownMemory instance reads only its own
367        // workspace_dir. Cross-agent recall is composed by
368        // `AgentScopedMarkdownMemory`, which holds an own
369        // MarkdownMemory plus a Vec<(alias, MarkdownMemory)> peer set
370        // and unions their results with attribution.
371        self.recall(query, limit, session_id, since, until).await
372    }
373}
374
375impl ::zeroclaw_api::attribution::Attributable for MarkdownMemory {
376    fn role(&self) -> ::zeroclaw_api::attribution::Role {
377        ::zeroclaw_api::attribution::Role::Memory(::zeroclaw_api::attribution::MemoryKind::Markdown)
378    }
379    fn alias(&self) -> &str {
380        &self.alias
381    }
382}
383
384#[cfg(test)]
385mod tests {
386    use super::*;
387    use chrono::TimeZone;
388    use tempfile::TempDir;
389
390    fn temp_workspace() -> (TempDir, MarkdownMemory) {
391        let tmp = TempDir::new().unwrap();
392        let mem = MarkdownMemory::new("markdown", tmp.path());
393        (tmp, mem)
394    }
395
396    #[tokio::test]
397    async fn markdown_name() {
398        let (_tmp, mem) = temp_workspace();
399        assert_eq!(mem.name(), "markdown");
400    }
401
402    #[tokio::test]
403    async fn markdown_health_check() {
404        let (_tmp, mem) = temp_workspace();
405        assert!(mem.health_check().await);
406    }
407
408    #[tokio::test]
409    async fn markdown_store_core() {
410        let (_tmp, mem) = temp_workspace();
411        mem.store("pref", "User likes Rust", MemoryCategory::Core, None)
412            .await
413            .unwrap();
414        let content = fs::read_to_string(mem.core_path()).await.unwrap();
415        assert!(content.contains("User likes Rust"));
416    }
417
418    #[tokio::test]
419    async fn markdown_store_daily() {
420        let (_tmp, mem) = temp_workspace();
421        mem.store("note", "Finished tests", MemoryCategory::Daily, None)
422            .await
423            .unwrap();
424        let path = mem.daily_path();
425        let content = fs::read_to_string(path).await.unwrap();
426        assert!(content.contains("Finished tests"));
427    }
428
429    #[tokio::test]
430    async fn markdown_recall_keyword() {
431        let (_tmp, mem) = temp_workspace();
432        mem.store("a", "Rust is fast", MemoryCategory::Core, None)
433            .await
434            .unwrap();
435        mem.store("b", "Python is slow", MemoryCategory::Core, None)
436            .await
437            .unwrap();
438        mem.store("c", "Rust and safety", MemoryCategory::Core, None)
439            .await
440            .unwrap();
441
442        let results = mem.recall("Rust", 10, None, None, None).await.unwrap();
443        assert!(results.len() >= 2);
444        assert!(
445            results
446                .iter()
447                .all(|r| r.content.to_lowercase().contains("rust"))
448        );
449    }
450
451    #[tokio::test]
452    async fn markdown_recall_no_match() {
453        let (_tmp, mem) = temp_workspace();
454        mem.store("a", "Rust is great", MemoryCategory::Core, None)
455            .await
456            .unwrap();
457        let results = mem
458            .recall("javascript", 10, None, None, None)
459            .await
460            .unwrap();
461        assert!(results.is_empty());
462    }
463
464    #[tokio::test]
465    async fn markdown_recall_star_query_returns_recent_entries() {
466        let (_tmp, mem) = temp_workspace();
467        mem.store("a", "first memory", MemoryCategory::Core, None)
468            .await
469            .unwrap();
470        mem.store("b", "second memory", MemoryCategory::Daily, None)
471            .await
472            .unwrap();
473
474        let results = mem.recall("*", 10, None, None, None).await.unwrap();
475        assert_eq!(results.len(), 2);
476        assert!(
477            results
478                .iter()
479                .any(|entry| entry.content.contains("first memory"))
480        );
481        assert!(
482            results
483                .iter()
484                .any(|entry| entry.content.contains("second memory"))
485        );
486    }
487
488    #[tokio::test]
489    async fn markdown_count() {
490        let (_tmp, mem) = temp_workspace();
491        mem.store("a", "first", MemoryCategory::Core, None)
492            .await
493            .unwrap();
494        mem.store("b", "second", MemoryCategory::Core, None)
495            .await
496            .unwrap();
497        let count = mem.count().await.unwrap();
498        assert!(count >= 2);
499    }
500
501    #[tokio::test]
502    async fn markdown_list_by_category() {
503        let (_tmp, mem) = temp_workspace();
504        mem.store("a", "core fact", MemoryCategory::Core, None)
505            .await
506            .unwrap();
507        mem.store("b", "daily note", MemoryCategory::Daily, None)
508            .await
509            .unwrap();
510
511        let core = mem.list(Some(&MemoryCategory::Core), None).await.unwrap();
512        assert!(core.iter().all(|e| e.category == MemoryCategory::Core));
513
514        let daily = mem.list(Some(&MemoryCategory::Daily), None).await.unwrap();
515        assert!(daily.iter().all(|e| e.category == MemoryCategory::Daily));
516    }
517
518    #[tokio::test]
519    async fn markdown_forget_is_noop() {
520        let (_tmp, mem) = temp_workspace();
521        mem.store("a", "permanent", MemoryCategory::Core, None)
522            .await
523            .unwrap();
524        let removed = mem.forget("a").await.unwrap();
525        assert!(!removed, "Markdown memory is append-only");
526    }
527
528    #[tokio::test]
529    async fn markdown_empty_recall() {
530        let (_tmp, mem) = temp_workspace();
531        let results = mem.recall("anything", 10, None, None, None).await.unwrap();
532        assert!(results.is_empty());
533    }
534
535    #[tokio::test]
536    async fn markdown_empty_count() {
537        let (_tmp, mem) = temp_workspace();
538        assert_eq!(mem.count().await.unwrap(), 0);
539    }
540
541    // Markdown has no agents table and no UUID indirection. Rows return
542    // `agent_alias = agent_id = None`; the dashboard renders these as
543    // "unattributed". This locks that contract so a future change can't
544    // silently leak a synthesized UUID into `agent_alias` (the bug that
545    // bit the SQL backends before the JOIN landed).
546    #[tokio::test]
547    async fn markdown_entries_carry_no_agent_attribution() {
548        let (_tmp, mem) = temp_workspace();
549        mem.store("k", "v", MemoryCategory::Core, None)
550            .await
551            .unwrap();
552        let entry = mem.get("MEMORY.md:0").await.unwrap();
553        if let Some(entry) = entry {
554            assert!(
555                entry.agent_alias.is_none(),
556                "markdown rows must never claim an agent alias"
557            );
558            assert!(
559                entry.agent_id.is_none(),
560                "markdown rows must never claim a raw agent id either"
561            );
562        }
563        // list path must show the same shape regardless of how a row is
564        // surfaced (keyed lookup vs. enumeration).
565        let rows = mem.list(None, None).await.unwrap();
566        for row in rows {
567            assert!(
568                row.agent_alias.is_none(),
569                "list path must not synthesize aliases"
570            );
571            assert!(row.agent_id.is_none(), "list path must not synthesize ids");
572        }
573    }
574
575    // Markdown entry timestamps are file stems (a bare `YYYY-MM-DD` for daily
576    // logs), not RFC 3339. `recall` must still honour the `since`/`until`
577    // window: a daily entry is dropped when the window ends before its date
578    // and surfaces when the window opens in the past. Evergreen `MEMORY.md`
579    // entries (non-date stems) must NOT be filtered out by the window.
580    #[tokio::test]
581    async fn markdown_recall_since_until_filters_daily() {
582        let (_tmp, mem) = temp_workspace();
583        mem.store("today", "daily standup note", MemoryCategory::Daily, None)
584            .await
585            .unwrap();
586        mem.store("core", "evergreen daily fact", MemoryCategory::Core, None)
587            .await
588            .unwrap();
589
590        let today = Local::now().date_naive();
591        let yesterday = (today - chrono::Duration::days(1))
592            .and_hms_opt(23, 59, 59)
593            .unwrap();
594        let yesterday_rfc = Local.from_local_datetime(&yesterday).unwrap().to_rfc3339();
595        let past = (today - chrono::Duration::days(7))
596            .and_hms_opt(0, 0, 0)
597            .unwrap();
598        let past_rfc = Local.from_local_datetime(&past).unwrap().to_rfc3339();
599
600        // until = yesterday: today's daily entry is outside the window and
601        // must be dropped, but the evergreen MEMORY.md entry must survive.
602        let bounded = mem
603            .recall("daily", 10, None, None, Some(&yesterday_rfc))
604            .await
605            .unwrap();
606        assert!(
607            !bounded.iter().any(|e| e.content.contains("standup")),
608            "today's daily entry must be excluded when until=yesterday"
609        );
610        assert!(
611            bounded.iter().any(|e| e.content.contains("evergreen")),
612            "evergreen MEMORY.md entry must not be window-filtered"
613        );
614
615        // since = a week ago: today's daily entry is inside the window.
616        let recent = mem
617            .recall("daily", 10, None, Some(&past_rfc), None)
618            .await
619            .unwrap();
620        assert!(
621            recent.iter().any(|e| e.content.contains("standup")),
622            "today's daily entry must be included when since is in the past"
623        );
624    }
625}