Files
the-nexus/mnemosyne/cli.py
2026-04-11 20:15:58 -04:00

164 lines
4.9 KiB
Python

"""
Mnemosyne CLI
Usage:
mnemosyne ingest <path> [--db PATH] [--chunk-size N] [--overlap N]
mnemosyne query <text> [--db PATH] [--limit N]
mnemosyne list [--db PATH] [--limit N]
mnemosyne stats [--db PATH]
mnemosyne doc <id> [--db PATH]
"""
import argparse
import json
import sys
from pathlib import Path
from .ingest import ingest_file, ingest_directory, get_stats, DEFAULT_DB_PATH, DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP
from .index import query, list_documents, get_document
def cmd_ingest(args):
"""Ingest files or directories into the archive."""
p = Path(args.path)
db = args.db or DEFAULT_DB_PATH
if p.is_dir():
result = ingest_directory(
str(p), db_path=db,
chunk_size=args.chunk_size, chunk_overlap=args.overlap,
)
print(f"Ingested: {result['ingested']} files")
print(f"Skipped (duplicates): {result['skipped']}")
if result["errors"]:
print(f"Errors: {len(result['errors'])}")
for err in result["errors"]:
print(f" {err['file']}: {err['error']}")
elif p.is_file():
doc_id = ingest_file(
str(p), db_path=db,
chunk_size=args.chunk_size, chunk_overlap=args.overlap,
)
if doc_id is not None:
print(f"Ingested: {p.name} (doc_id={doc_id})")
else:
print(f"Skipped (duplicate): {p.name}")
else:
print(f"Error: {args.path} not found", file=sys.stderr)
sys.exit(1)
def cmd_query(args):
"""Query the holographic archive."""
db = args.db or DEFAULT_DB_PATH
results = query(args.text, db_path=db, limit=args.limit)
if not results:
print("No results found.")
return
for i, r in enumerate(results, 1):
source = r.get("source", "?")
title = r.get("title") or Path(source).name
score = r.get("rrf_score") or r.get("score", 0)
methods = r.get("methods") or [r.get("method", "?")]
content_preview = r["content"][:200].replace("\n", " ")
print(f"[{i}] {title}")
print(f" Source: {source}")
print(f" Score: {score:.4f} ({', '.join(methods)})")
print(f" {content_preview}...")
print()
def cmd_list(args):
"""List documents in the archive."""
db = args.db or DEFAULT_DB_PATH
docs = list_documents(db_path=db, limit=args.limit)
if not docs:
print("Archive is empty.")
return
print(f"{'ID':>5} {'Chunks':>6} {'Title':<40} Source")
print("-" * 90)
for d in docs:
title = (d["title"] or "?")[:40]
source = Path(d["source"]).name[:30] if d["source"] else "?"
print(f"{d['id']:>5} {d['chunks']:>6} {title:<40} {source}")
def cmd_stats(args):
"""Show archive statistics."""
db = args.db or DEFAULT_DB_PATH
s = get_stats(db_path=db)
print(f"Documents: {s['documents']}")
print(f"Chunks: {s['chunks']}")
print(f"Sources: {s['sources']}")
def cmd_doc(args):
"""Show a document by ID."""
db = args.db or DEFAULT_DB_PATH
d = get_document(args.id, db_path=db)
if not d:
print(f"Document #{args.id} not found.")
sys.exit(1)
print(f"ID: {d['id']}")
print(f"Title: {d['title']}")
print(f"Source: {d['source']}")
print(f"Ingested: {d['ingested_at']}")
print(f"Metadata: {json.dumps(d['metadata'], indent=2)}")
print(f"\n--- Content ({len(d['content'])} chars) ---\n")
print(d["content"])
def main():
parser = argparse.ArgumentParser(
prog="mnemosyne",
description="Mnemosyne — The Living Holographic Archive",
)
parser.add_argument("--db", help="Database path (default: mnemosyne.db)")
sub = parser.add_subparsers(dest="command")
# ingest
p_ingest = sub.add_parser("ingest", help="Ingest files or directories")
p_ingest.add_argument("path", help="File or directory to ingest")
p_ingest.add_argument("--chunk-size", type=int, default=DEFAULT_CHUNK_SIZE)
p_ingest.add_argument("--overlap", type=int, default=DEFAULT_CHUNK_OVERLAP)
# query
p_query = sub.add_parser("query", help="Search the archive")
p_query.add_argument("text", help="Search query")
p_query.add_argument("--limit", type=int, default=10)
# list
p_list = sub.add_parser("list", help="List documents in archive")
p_list.add_argument("--limit", type=int, default=50)
# stats
sub.add_parser("stats", help="Show archive statistics")
# doc
p_doc = sub.add_parser("doc", help="Show document by ID")
p_doc.add_argument("id", type=int, help="Document ID")
args = parser.parse_args()
if args.command == "ingest":
cmd_ingest(args)
elif args.command == "query":
cmd_query(args)
elif args.command == "list":
cmd_list(args)
elif args.command == "stats":
cmd_stats(args)
elif args.command == "doc":
cmd_doc(args)
else:
parser.print_help()
if __name__ == "__main__":
main()