164 lines
4.9 KiB
Python
164 lines
4.9 KiB
Python
"""
|
|
Mnemosyne CLI
|
|
|
|
Usage:
|
|
mnemosyne ingest <path> [--db PATH] [--chunk-size N] [--overlap N]
|
|
mnemosyne query <text> [--db PATH] [--limit N]
|
|
mnemosyne list [--db PATH] [--limit N]
|
|
mnemosyne stats [--db PATH]
|
|
mnemosyne doc <id> [--db PATH]
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from .ingest import ingest_file, ingest_directory, get_stats, DEFAULT_DB_PATH, DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP
|
|
from .index import query, list_documents, get_document
|
|
|
|
|
|
def cmd_ingest(args):
|
|
"""Ingest files or directories into the archive."""
|
|
p = Path(args.path)
|
|
db = args.db or DEFAULT_DB_PATH
|
|
|
|
if p.is_dir():
|
|
result = ingest_directory(
|
|
str(p), db_path=db,
|
|
chunk_size=args.chunk_size, chunk_overlap=args.overlap,
|
|
)
|
|
print(f"Ingested: {result['ingested']} files")
|
|
print(f"Skipped (duplicates): {result['skipped']}")
|
|
if result["errors"]:
|
|
print(f"Errors: {len(result['errors'])}")
|
|
for err in result["errors"]:
|
|
print(f" {err['file']}: {err['error']}")
|
|
elif p.is_file():
|
|
doc_id = ingest_file(
|
|
str(p), db_path=db,
|
|
chunk_size=args.chunk_size, chunk_overlap=args.overlap,
|
|
)
|
|
if doc_id is not None:
|
|
print(f"Ingested: {p.name} (doc_id={doc_id})")
|
|
else:
|
|
print(f"Skipped (duplicate): {p.name}")
|
|
else:
|
|
print(f"Error: {args.path} not found", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
|
|
def cmd_query(args):
|
|
"""Query the holographic archive."""
|
|
db = args.db or DEFAULT_DB_PATH
|
|
results = query(args.text, db_path=db, limit=args.limit)
|
|
|
|
if not results:
|
|
print("No results found.")
|
|
return
|
|
|
|
for i, r in enumerate(results, 1):
|
|
source = r.get("source", "?")
|
|
title = r.get("title") or Path(source).name
|
|
score = r.get("rrf_score") or r.get("score", 0)
|
|
methods = r.get("methods") or [r.get("method", "?")]
|
|
content_preview = r["content"][:200].replace("\n", " ")
|
|
|
|
print(f"[{i}] {title}")
|
|
print(f" Source: {source}")
|
|
print(f" Score: {score:.4f} ({', '.join(methods)})")
|
|
print(f" {content_preview}...")
|
|
print()
|
|
|
|
|
|
def cmd_list(args):
|
|
"""List documents in the archive."""
|
|
db = args.db or DEFAULT_DB_PATH
|
|
docs = list_documents(db_path=db, limit=args.limit)
|
|
|
|
if not docs:
|
|
print("Archive is empty.")
|
|
return
|
|
|
|
print(f"{'ID':>5} {'Chunks':>6} {'Title':<40} Source")
|
|
print("-" * 90)
|
|
for d in docs:
|
|
title = (d["title"] or "?")[:40]
|
|
source = Path(d["source"]).name[:30] if d["source"] else "?"
|
|
print(f"{d['id']:>5} {d['chunks']:>6} {title:<40} {source}")
|
|
|
|
|
|
def cmd_stats(args):
|
|
"""Show archive statistics."""
|
|
db = args.db or DEFAULT_DB_PATH
|
|
s = get_stats(db_path=db)
|
|
print(f"Documents: {s['documents']}")
|
|
print(f"Chunks: {s['chunks']}")
|
|
print(f"Sources: {s['sources']}")
|
|
|
|
|
|
def cmd_doc(args):
|
|
"""Show a document by ID."""
|
|
db = args.db or DEFAULT_DB_PATH
|
|
d = get_document(args.id, db_path=db)
|
|
if not d:
|
|
print(f"Document #{args.id} not found.")
|
|
sys.exit(1)
|
|
print(f"ID: {d['id']}")
|
|
print(f"Title: {d['title']}")
|
|
print(f"Source: {d['source']}")
|
|
print(f"Ingested: {d['ingested_at']}")
|
|
print(f"Metadata: {json.dumps(d['metadata'], indent=2)}")
|
|
print(f"\n--- Content ({len(d['content'])} chars) ---\n")
|
|
print(d["content"])
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
prog="mnemosyne",
|
|
description="Mnemosyne — The Living Holographic Archive",
|
|
)
|
|
parser.add_argument("--db", help="Database path (default: mnemosyne.db)")
|
|
sub = parser.add_subparsers(dest="command")
|
|
|
|
# ingest
|
|
p_ingest = sub.add_parser("ingest", help="Ingest files or directories")
|
|
p_ingest.add_argument("path", help="File or directory to ingest")
|
|
p_ingest.add_argument("--chunk-size", type=int, default=DEFAULT_CHUNK_SIZE)
|
|
p_ingest.add_argument("--overlap", type=int, default=DEFAULT_CHUNK_OVERLAP)
|
|
|
|
# query
|
|
p_query = sub.add_parser("query", help="Search the archive")
|
|
p_query.add_argument("text", help="Search query")
|
|
p_query.add_argument("--limit", type=int, default=10)
|
|
|
|
# list
|
|
p_list = sub.add_parser("list", help="List documents in archive")
|
|
p_list.add_argument("--limit", type=int, default=50)
|
|
|
|
# stats
|
|
sub.add_parser("stats", help="Show archive statistics")
|
|
|
|
# doc
|
|
p_doc = sub.add_parser("doc", help="Show document by ID")
|
|
p_doc.add_argument("id", type=int, help="Document ID")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.command == "ingest":
|
|
cmd_ingest(args)
|
|
elif args.command == "query":
|
|
cmd_query(args)
|
|
elif args.command == "list":
|
|
cmd_list(args)
|
|
elif args.command == "stats":
|
|
cmd_stats(args)
|
|
elif args.command == "doc":
|
|
cmd_doc(args)
|
|
else:
|
|
parser.print_help()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|