Files
the-nexus/nexus/mnemosyne/cli.py
Alexander Whitestone b5ed262581
Some checks failed
CI / test (pull_request) Failing after 9s
CI / validate (pull_request) Failing after 15s
Review Approval Gate / verify-review (pull_request) Failing after 2s
feat(mnemosyne): implement discover() — serendipitous entry exploration (#1271)
- Added discover() method to archive.py (probabilistic, vitality-weighted)
- Added cmd_discover CLI handler with subparser
- Supports: -n COUNT, -t TOPIC, --vibrant flag
- prefer_fading=True surfaces neglected entries
2026-04-12 12:07:28 -04:00

578 lines
21 KiB
Python

"""CLI interface for Mnemosyne.
Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats,
mnemosyne topics, mnemosyne remove, mnemosyne export,
mnemosyne clusters, mnemosyne hubs, mnemosyne bridges, mnemosyne rebuild,
mnemosyne tag, mnemosyne untag, mnemosyne retag,
mnemosyne timeline, mnemosyne neighbors, mnemosyne path,
mnemosyne touch, mnemosyne decay, mnemosyne vitality,
mnemosyne fading, mnemosyne vibrant,
mnemosyne snapshot create|list|restore|diff,
mnemosyne resonance
"""
from __future__ import annotations
import argparse
import json
import sys
from nexus.mnemosyne.archive import MnemosyneArchive
from nexus.mnemosyne.entry import ArchiveEntry
from nexus.mnemosyne.ingest import ingest_event, ingest_directory
def cmd_stats(args):
archive = MnemosyneArchive()
stats = archive.stats()
print(json.dumps(stats, indent=2))
def cmd_search(args):
from nexus.mnemosyne.embeddings import get_embedding_backend
backend = None
if getattr(args, "backend", "auto") != "auto":
backend = get_embedding_backend(prefer=args.backend)
elif getattr(args, "semantic", False):
try:
backend = get_embedding_backend()
except Exception:
pass
archive = MnemosyneArchive(embedding_backend=backend)
if getattr(args, "semantic", False):
results = archive.semantic_search(args.query, limit=args.limit)
else:
results = archive.search(args.query, limit=args.limit)
if not results:
print("No results found.")
return
for entry in results:
linked = len(entry.links)
print(f"[{entry.id[:8]}] {entry.title}")
print(f" Source: {entry.source} | Topics: {', '.join(entry.topics)} | Links: {linked}")
print(f" {entry.content[:120]}...")
print()
def cmd_ingest(args):
archive = MnemosyneArchive()
entry = ingest_event(
archive,
title=args.title,
content=args.content,
topics=args.topics.split(",") if args.topics else [],
)
print(f"Ingested: [{entry.id[:8]}] {entry.title} ({len(entry.links)} links)")
def cmd_ingest_dir(args):
archive = MnemosyneArchive()
ext = [e.strip() for e in args.ext.split(",")] if args.ext else None
added = ingest_directory(archive, args.path, extensions=ext)
print(f"Ingested {added} new entries from {args.path}")
def cmd_link(args):
archive = MnemosyneArchive()
entry = archive.get(args.entry_id)
if not entry:
print(f"Entry not found: {args.entry_id}")
sys.exit(1)
linked = archive.get_linked(entry.id, depth=args.depth)
if not linked:
print("No linked entries found.")
return
for e in linked:
print(f" [{e.id[:8]}] {e.title} (source: {e.source})")
def cmd_topics(args):
archive = MnemosyneArchive()
counts = archive.topic_counts()
if not counts:
print("No topics found.")
return
for topic, count in counts.items():
print(f" {topic}: {count}")
def cmd_remove(args):
archive = MnemosyneArchive()
removed = archive.remove(args.entry_id)
if removed:
print(f"Removed entry: {args.entry_id}")
else:
print(f"Entry not found: {args.entry_id}")
sys.exit(1)
def cmd_export(args):
archive = MnemosyneArchive()
topics = [t.strip() for t in args.topics.split(",")] if args.topics else None
data = archive.export(query=args.query or None, topics=topics)
print(json.dumps(data, indent=2))
def cmd_clusters(args):
archive = MnemosyneArchive()
clusters = archive.graph_clusters(min_size=args.min_size)
if not clusters:
print("No clusters found.")
return
for c in clusters:
print(f"Cluster {c['cluster_id']}: {c['size']} entries, density={c['density']}")
print(f" Topics: {', '.join(c['top_topics']) if c['top_topics'] else '(none)'}")
if args.verbose:
for eid in c["entries"]:
entry = archive.get(eid)
if entry:
print(f" [{eid[:8]}] {entry.title}")
print()
def cmd_hubs(args):
archive = MnemosyneArchive()
hubs = archive.hub_entries(limit=args.limit)
if not hubs:
print("No hubs found.")
return
for h in hubs:
e = h["entry"]
print(f"[{e.id[:8]}] {e.title}")
print(f" Degree: {h['degree']} (in: {h['inbound']}, out: {h['outbound']})")
print(f" Topics: {', '.join(h['topics']) if h['topics'] else '(none)'}")
print()
def cmd_bridges(args):
archive = MnemosyneArchive()
bridges = archive.bridge_entries()
if not bridges:
print("No bridge entries found.")
return
for b in bridges:
e = b["entry"]
print(f"[{e.id[:8]}] {e.title}")
print(f" Bridges {b['components_after_removal']} components (cluster: {b['cluster_size']} entries)")
print(f" Topics: {', '.join(b['topics']) if b['topics'] else '(none)'}")
print()
def cmd_rebuild(args):
archive = MnemosyneArchive()
threshold = args.threshold if args.threshold else None
total = archive.rebuild_links(threshold=threshold)
print(f"Rebuilt links: {total} connections across {archive.count} entries")
def cmd_tag(args):
archive = MnemosyneArchive()
tags = [t.strip() for t in args.tags.split(",") if t.strip()]
try:
entry = archive.add_tags(args.entry_id, tags)
except KeyError:
print(f"Entry not found: {args.entry_id}")
sys.exit(1)
print(f"[{entry.id[:8]}] {entry.title}")
print(f" Topics: {', '.join(entry.topics) if entry.topics else '(none)'}")
def cmd_untag(args):
archive = MnemosyneArchive()
tags = [t.strip() for t in args.tags.split(",") if t.strip()]
try:
entry = archive.remove_tags(args.entry_id, tags)
except KeyError:
print(f"Entry not found: {args.entry_id}")
sys.exit(1)
print(f"[{entry.id[:8]}] {entry.title}")
print(f" Topics: {', '.join(entry.topics) if entry.topics else '(none)'}")
def cmd_retag(args):
archive = MnemosyneArchive()
tags = [t.strip() for t in args.tags.split(",") if t.strip()]
try:
entry = archive.retag(args.entry_id, tags)
except KeyError:
print(f"Entry not found: {args.entry_id}")
sys.exit(1)
print(f"[{entry.id[:8]}] {entry.title}")
print(f" Topics: {', '.join(entry.topics) if entry.topics else '(none)'}")
def cmd_timeline(args):
archive = MnemosyneArchive()
try:
results = archive.by_date_range(args.start, args.end)
except ValueError as e:
print(f"Invalid date format: {e}")
sys.exit(1)
if not results:
print("No entries found in that date range.")
return
for entry in results:
print(f"[{entry.id[:8]}] {entry.created_at[:10]} {entry.title}")
print(f" Topics: {', '.join(entry.topics) if entry.topics else '(none)'}")
print()
def cmd_path(args):
archive = MnemosyneArchive(archive_path=args.archive) if args.archive else MnemosyneArchive()
path = archive.shortest_path(args.start, args.end)
if path is None:
print(f"No path found between {args.start} and {args.end}")
return
steps = archive.path_explanation(path)
print(f"Path ({len(steps)} hops):")
for i, step in enumerate(steps):
arrow = "" if i > 0 else " "
print(f"{arrow}{step['id']}: {step['title']}")
if step['topics']:
print(f" topics: {', '.join(step['topics'])}")
def cmd_consolidate(args):
archive = MnemosyneArchive()
merges = archive.consolidate(threshold=args.threshold, dry_run=args.dry_run)
if not merges:
print("No duplicates found.")
return
label = "[DRY RUN] " if args.dry_run else ""
for m in merges:
print(f"{label}Merge ({m['reason']}, score={m['score']:.4f}):")
print(f" kept: {m['kept'][:8]}")
print(f" removed: {m['removed'][:8]}")
if args.dry_run:
print(f"\n{len(merges)} pair(s) would be merged. Re-run without --dry-run to apply.")
else:
print(f"\nMerged {len(merges)} duplicate pair(s).")
def cmd_neighbors(args):
archive = MnemosyneArchive()
try:
results = archive.temporal_neighbors(args.entry_id, window_days=args.days)
except KeyError:
print(f"Entry not found: {args.entry_id}")
sys.exit(1)
if not results:
print("No temporal neighbors found.")
return
for entry in results:
print(f"[{entry.id[:8]}] {entry.created_at[:10]} {entry.title}")
print(f" Topics: {', '.join(entry.topics) if entry.topics else '(none)'}")
print()
def cmd_touch(args):
archive = MnemosyneArchive()
try:
entry = archive.touch(args.entry_id)
except KeyError:
print(f"Entry not found: {args.entry_id}")
sys.exit(1)
v = archive.get_vitality(entry.id)
print(f"[{entry.id[:8]}] {entry.title}")
print(f" Vitality: {v['vitality']:.4f} (boosted)")
def cmd_decay(args):
archive = MnemosyneArchive()
result = archive.apply_decay()
print(f"Applied decay to {result['total_entries']} entries")
print(f" Decayed: {result['decayed_count']}")
print(f" Avg vitality: {result['avg_vitality']:.4f}")
print(f" Fading (<0.3): {result['fading_count']}")
print(f" Vibrant (>0.7): {result['vibrant_count']}")
def cmd_vitality(args):
archive = MnemosyneArchive()
try:
v = archive.get_vitality(args.entry_id)
except KeyError:
print(f"Entry not found: {args.entry_id}")
sys.exit(1)
print(f"[{v['entry_id'][:8]}] {v['title']}")
print(f" Vitality: {v['vitality']:.4f}")
print(f" Last accessed: {v['last_accessed'] or 'never'}")
print(f" Age: {v['age_days']} days")
def cmd_fading(args):
archive = MnemosyneArchive()
results = archive.fading(limit=args.limit)
if not results:
print("Archive is empty.")
return
for v in results:
print(f"[{v['entry_id'][:8]}] {v['title']}")
print(f" Vitality: {v['vitality']:.4f} | Age: {v['age_days']}d | Last: {v['last_accessed'] or 'never'}")
print()
def cmd_snapshot(args):
archive = MnemosyneArchive()
if args.snapshot_cmd == "create":
result = archive.snapshot_create(label=args.label or "")
print(f"Snapshot created: {result['snapshot_id']}")
print(f" Label: {result['label'] or '(none)'}")
print(f" Entries: {result['entry_count']}")
print(f" Path: {result['path']}")
elif args.snapshot_cmd == "list":
snapshots = archive.snapshot_list()
if not snapshots:
print("No snapshots found.")
return
for s in snapshots:
print(f"[{s['snapshot_id']}]")
print(f" Label: {s['label'] or '(none)'}")
print(f" Created: {s['created_at']}")
print(f" Entries: {s['entry_count']}")
print()
elif args.snapshot_cmd == "restore":
try:
result = archive.snapshot_restore(args.snapshot_id)
except FileNotFoundError as e:
print(str(e))
sys.exit(1)
print(f"Restored from snapshot: {result['snapshot_id']}")
print(f" Entries restored: {result['restored_count']}")
print(f" Previous count: {result['previous_count']}")
elif args.snapshot_cmd == "diff":
try:
diff = archive.snapshot_diff(args.snapshot_id)
except FileNotFoundError as e:
print(str(e))
sys.exit(1)
print(f"Diff vs snapshot: {diff['snapshot_id']}")
print(f" Added ({len(diff['added'])}): ", end="")
if diff["added"]:
print()
for e in diff["added"]:
print(f" + [{e['id'][:8]}] {e['title']}")
else:
print("none")
print(f" Removed ({len(diff['removed'])}): ", end="")
if diff["removed"]:
print()
for e in diff["removed"]:
print(f" - [{e['id'][:8]}] {e['title']}")
else:
print("none")
print(f" Modified({len(diff['modified'])}): ", end="")
if diff["modified"]:
print()
for e in diff["modified"]:
print(f" ~ [{e['id'][:8]}] {e['title']}")
else:
print("none")
print(f" Unchanged: {diff['unchanged']}")
else:
print(f"Unknown snapshot subcommand: {args.snapshot_cmd}")
sys.exit(1)
def cmd_resonance(args):
archive = MnemosyneArchive()
topic = args.topic if args.topic else None
pairs = archive.resonance(threshold=args.threshold, limit=args.limit, topic=topic)
if not pairs:
print("No resonant pairs found.")
return
for p in pairs:
a = p["entry_a"]
b = p["entry_b"]
print(f"Score: {p['score']:.4f}")
print(f" [{a['id'][:8]}] {a['title']}")
print(f" Topics: {', '.join(a['topics']) if a['topics'] else '(none)'}")
print(f" [{b['id'][:8]}] {b['title']}")
print(f" Topics: {', '.join(b['topics']) if b['topics'] else '(none)'}")
print()
def cmd_discover(args):
archive = MnemosyneArchive()
topic = args.topic if args.topic else None
results = archive.discover(
count=args.count,
prefer_fading=not args.vibrant,
topic=topic,
)
if not results:
print("No entries to discover.")
return
for entry in results:
v = archive.get_vitality(entry.id)
print(f"[{entry.id[:8]}] {entry.title}")
print(f" Topics: {', '.join(entry.topics) if entry.topics else '(none)'}")
print(f" Vitality: {v['vitality']:.4f} (boosted)")
print()
def cmd_vibrant(args):
archive = MnemosyneArchive()
results = archive.vibrant(limit=args.limit)
if not results:
print("Archive is empty.")
return
for v in results:
print(f"[{v['entry_id'][:8]}] {v['title']}")
print(f" Vitality: {v['vitality']:.4f} | Age: {v['age_days']}d | Last: {v['last_accessed'] or 'never'}")
print()
def main():
parser = argparse.ArgumentParser(prog="mnemosyne", description="The Living Holographic Archive")
sub = parser.add_subparsers(dest="command")
sub.add_parser("stats", help="Show archive statistics")
s = sub.add_parser("search", help="Search the archive")
s.add_argument("query", help="Search query")
s.add_argument("-n", "--limit", type=int, default=10)
s.add_argument("--semantic", action="store_true", help="Use holographic linker similarity scoring")
i = sub.add_parser("ingest", help="Ingest a new entry")
i.add_argument("--title", required=True)
i.add_argument("--content", required=True)
i.add_argument("--topics", default="", help="Comma-separated topics")
id_ = sub.add_parser("ingest-dir", help="Ingest a directory of files")
id_.add_argument("path", help="Directory to ingest")
id_.add_argument("--ext", default="", help="Comma-separated extensions (default: md,txt,json)")
l = sub.add_parser("link", help="Show linked entries")
l.add_argument("entry_id", help="Entry ID (or prefix)")
l.add_argument("-d", "--depth", type=int, default=1)
sub.add_parser("topics", help="List all topics with entry counts")
r = sub.add_parser("remove", help="Remove an entry by ID")
r.add_argument("entry_id", help="Entry ID to remove")
ex = sub.add_parser("export", help="Export filtered archive data as JSON")
ex.add_argument("-q", "--query", default="", help="Keyword filter")
ex.add_argument("-t", "--topics", default="", help="Comma-separated topic filter")
cl = sub.add_parser("clusters", help="Show graph clusters (connected components)")
cl.add_argument("-m", "--min-size", type=int, default=1, help="Minimum cluster size")
cl.add_argument("-v", "--verbose", action="store_true", help="List entries in each cluster")
hu = sub.add_parser("hubs", help="Show most connected entries (hub analysis)")
hu.add_argument("-n", "--limit", type=int, default=10, help="Max hubs to show")
sub.add_parser("bridges", help="Show bridge entries (articulation points)")
rb = sub.add_parser("rebuild", help="Recompute all links from scratch")
rb.add_argument("-t", "--threshold", type=float, default=None, help="Similarity threshold override")
tg = sub.add_parser("tag", help="Add tags to an existing entry")
tg.add_argument("entry_id", help="Entry ID")
tg.add_argument("tags", help="Comma-separated tags to add")
ut = sub.add_parser("untag", help="Remove tags from an existing entry")
ut.add_argument("entry_id", help="Entry ID")
ut.add_argument("tags", help="Comma-separated tags to remove")
rt = sub.add_parser("retag", help="Replace all tags on an existing entry")
rt.add_argument("entry_id", help="Entry ID")
rt.add_argument("tags", help="Comma-separated new tag list")
tl = sub.add_parser("timeline", help="Show entries within an ISO date range")
tl.add_argument("start", help="Start datetime (ISO format, e.g. 2024-01-01 or 2024-01-01T00:00:00Z)")
tl.add_argument("end", help="End datetime (ISO format)")
nb = sub.add_parser("neighbors", help="Show entries temporally near a given entry")
nb.add_argument("entry_id", help="Anchor entry ID")
nb.add_argument("--days", type=int, default=7, help="Window in days (default: 7)")
pa = sub.add_parser("path", help="Find shortest path between two memories")
pa.add_argument("start", help="Starting entry ID")
pa.add_argument("end", help="Target entry ID")
pa.add_argument("--archive", default=None, help="Archive path")
co = sub.add_parser("consolidate", help="Merge duplicate/near-duplicate entries")
co.add_argument("--dry-run", action="store_true", help="Show what would be merged without applying")
co.add_argument("--threshold", type=float, default=0.9, help="Similarity threshold (default: 0.9)")
tc = sub.add_parser("touch", help="Boost an entry's vitality by accessing it")
tc.add_argument("entry_id", help="Entry ID to touch")
dc = sub.add_parser("decay", help="Apply time-based decay to all entries")
vy = sub.add_parser("vitality", help="Show an entry's vitality status")
vy.add_argument("entry_id", help="Entry ID to check")
fg = sub.add_parser("fading", help="Show most neglected entries (lowest vitality)")
fg.add_argument("-n", "--limit", type=int, default=10, help="Max entries to show")
vb = sub.add_parser("vibrant", help="Show most alive entries (highest vitality)")
vb.add_argument("-n", "--limit", type=int, default=10, help="Max entries to show")
rs = sub.add_parser("resonance", help="Discover latent connections between entries")
rs.add_argument("-t", "--threshold", type=float, default=0.3, help="Minimum similarity score (default: 0.3)")
rs.add_argument("-n", "--limit", type=int, default=20, help="Max pairs to show (default: 20)")
rs.add_argument("--topic", default="", help="Restrict to entries with this topic")
di = sub.add_parser("discover", help="Serendipitous entry exploration")
di.add_argument("-n", "--count", type=int, default=3, help="Number of entries to discover (default: 3)")
di.add_argument("-t", "--topic", default="", help="Filter to entries with this topic")
di.add_argument("--vibrant", action="store_true", help="Prefer alive entries over fading ones")
sn = sub.add_parser("snapshot", help="Point-in-time backup and restore")
sn_sub = sn.add_subparsers(dest="snapshot_cmd")
sn_create = sn_sub.add_parser("create", help="Create a new snapshot")
sn_create.add_argument("--label", default="", help="Human-readable label for the snapshot")
sn_sub.add_parser("list", help="List available snapshots")
sn_restore = sn_sub.add_parser("restore", help="Restore archive from a snapshot")
sn_restore.add_argument("snapshot_id", help="Snapshot ID to restore")
sn_diff = sn_sub.add_parser("diff", help="Show what changed since a snapshot")
sn_diff.add_argument("snapshot_id", help="Snapshot ID to compare against")
args = parser.parse_args()
if not args.command:
parser.print_help()
sys.exit(1)
if args.command == "snapshot" and not args.snapshot_cmd:
sn.print_help()
sys.exit(1)
dispatch = {
"stats": cmd_stats,
"search": cmd_search,
"ingest": cmd_ingest,
"ingest-dir": cmd_ingest_dir,
"link": cmd_link,
"topics": cmd_topics,
"remove": cmd_remove,
"export": cmd_export,
"clusters": cmd_clusters,
"hubs": cmd_hubs,
"bridges": cmd_bridges,
"rebuild": cmd_rebuild,
"tag": cmd_tag,
"untag": cmd_untag,
"retag": cmd_retag,
"timeline": cmd_timeline,
"neighbors": cmd_neighbors,
"consolidate": cmd_consolidate,
"path": cmd_path,
"touch": cmd_touch,
"decay": cmd_decay,
"vitality": cmd_vitality,
"fading": cmd_fading,
"vibrant": cmd_vibrant,
"resonance": cmd_resonance,
"discover": cmd_discover,
"snapshot": cmd_snapshot,
}
dispatch[args.command](args)
if __name__ == "__main__":
main()