259 lines
7.1 KiB
Python
259 lines
7.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
memory_mine.py — Mine session transcripts into MemPalace.
|
|
|
|
Reads Hermes session logs (JSONL format) and stores summaries
|
|
in the palace. Supports batch mining, single-file processing,
|
|
and live directory watching.
|
|
|
|
Usage:
|
|
# Mine a single session file
|
|
python3 bin/memory_mine.py ~/.hermes/sessions/2026-04-13.jsonl
|
|
|
|
# Mine all sessions from last 7 days
|
|
python3 bin/memory_mine.py --days 7
|
|
|
|
# Mine a specific wing's sessions
|
|
python3 bin/memory_mine.py --wing wing_bezalel --days 14
|
|
|
|
# Dry run — show what would be mined
|
|
python3 bin/memory_mine.py --dry-run --days 7
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import sys
|
|
import time
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
)
|
|
logger = logging.getLogger("memory-mine")
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
if str(REPO_ROOT) not in sys.path:
|
|
sys.path.insert(0, str(REPO_ROOT))
|
|
|
|
|
|
def parse_session_file(path: Path) -> list[dict]:
|
|
"""
|
|
Parse a JSONL session file into turns.
|
|
|
|
Each line is expected to be a JSON object with:
|
|
- role: "user" | "assistant" | "system" | "tool"
|
|
- content: text
|
|
- timestamp: ISO string (optional)
|
|
"""
|
|
turns = []
|
|
with open(path) as f:
|
|
for i, line in enumerate(f):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
turn = json.loads(line)
|
|
turns.append(turn)
|
|
except json.JSONDecodeError:
|
|
logger.debug(f"Skipping malformed line {i+1} in {path}")
|
|
return turns
|
|
|
|
|
|
def summarize_session(turns: list[dict], agent_name: str = "unknown") -> str:
|
|
"""
|
|
Generate a compact summary of a session's turns.
|
|
|
|
Keeps user messages and key agent responses, strips noise.
|
|
"""
|
|
if not turns:
|
|
return "Empty session."
|
|
|
|
user_msgs = []
|
|
agent_msgs = []
|
|
tool_calls = []
|
|
|
|
for turn in turns:
|
|
role = turn.get("role", "")
|
|
content = str(turn.get("content", ""))[:300]
|
|
|
|
if role == "user":
|
|
user_msgs.append(content)
|
|
elif role == "assistant":
|
|
agent_msgs.append(content)
|
|
elif role == "tool":
|
|
tool_name = turn.get("name", turn.get("tool", "unknown"))
|
|
tool_calls.append(f"{tool_name}: {content[:150]}")
|
|
|
|
parts = [f"Session by {agent_name}:"]
|
|
|
|
if user_msgs:
|
|
parts.append(f"\nUser asked ({len(user_msgs)} messages):")
|
|
for msg in user_msgs[:5]:
|
|
parts.append(f" - {msg[:200]}")
|
|
if len(user_msgs) > 5:
|
|
parts.append(f" ... and {len(user_msgs) - 5} more")
|
|
|
|
if agent_msgs:
|
|
parts.append(f"\nAgent responded ({len(agent_msgs)} messages):")
|
|
for msg in agent_msgs[:3]:
|
|
parts.append(f" - {msg[:200]}")
|
|
|
|
if tool_calls:
|
|
parts.append(f"\nTools used ({len(tool_calls)} calls):")
|
|
for tc in tool_calls[:5]:
|
|
parts.append(f" - {tc}")
|
|
|
|
return "\n".join(parts)
|
|
|
|
|
|
def mine_session(
|
|
path: Path,
|
|
wing: str,
|
|
palace_path: Optional[Path] = None,
|
|
dry_run: bool = False,
|
|
) -> Optional[str]:
|
|
"""
|
|
Mine a single session file into MemPalace.
|
|
|
|
Returns the document ID if stored, None on failure or dry run.
|
|
"""
|
|
try:
|
|
from agent.memory import AgentMemory
|
|
except ImportError:
|
|
logger.error("Cannot import agent.memory — is the repo in PYTHONPATH?")
|
|
return None
|
|
|
|
turns = parse_session_file(path)
|
|
if not turns:
|
|
logger.debug(f"Empty session file: {path}")
|
|
return None
|
|
|
|
agent_name = wing.replace("wing_", "")
|
|
summary = summarize_session(turns, agent_name)
|
|
|
|
if dry_run:
|
|
print(f"\n--- {path.name} ---")
|
|
print(summary[:500])
|
|
print(f"({len(turns)} turns)")
|
|
return None
|
|
|
|
mem = AgentMemory(agent_name=agent_name, wing=wing, palace_path=palace_path)
|
|
doc_id = mem.remember(
|
|
summary,
|
|
room="hermes",
|
|
source_file=str(path),
|
|
metadata={
|
|
"type": "mined_session",
|
|
"source": str(path),
|
|
"turn_count": len(turns),
|
|
"agent": agent_name,
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
},
|
|
)
|
|
|
|
if doc_id:
|
|
logger.info(f"Mined {path.name} → {doc_id} ({len(turns)} turns)")
|
|
else:
|
|
logger.warning(f"Failed to mine {path.name}")
|
|
|
|
return doc_id
|
|
|
|
|
|
def find_session_files(
|
|
sessions_dir: Path,
|
|
days: int = 7,
|
|
pattern: str = "*.jsonl",
|
|
) -> list[Path]:
|
|
"""
|
|
Find session files from the last N days.
|
|
"""
|
|
cutoff = datetime.now() - timedelta(days=days)
|
|
files = []
|
|
|
|
if not sessions_dir.exists():
|
|
logger.warning(f"Sessions directory not found: {sessions_dir}")
|
|
return files
|
|
|
|
for path in sorted(sessions_dir.glob(pattern)):
|
|
# Use file modification time as proxy for session date
|
|
mtime = datetime.fromtimestamp(path.stat().st_mtime)
|
|
if mtime >= cutoff:
|
|
files.append(path)
|
|
|
|
return files
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
parser = argparse.ArgumentParser(
|
|
description="Mine session transcripts into MemPalace"
|
|
)
|
|
parser.add_argument(
|
|
"files", nargs="*", help="Session files to mine (JSONL format)"
|
|
)
|
|
parser.add_argument(
|
|
"--days", type=int, default=7,
|
|
help="Mine sessions from last N days (default: 7)"
|
|
)
|
|
parser.add_argument(
|
|
"--sessions-dir",
|
|
default=str(Path.home() / ".hermes" / "sessions"),
|
|
help="Directory containing session JSONL files"
|
|
)
|
|
parser.add_argument(
|
|
"--wing", default=None,
|
|
help="Wing name (default: auto-detect from MEMPALACE_WING env or 'wing_timmy')"
|
|
)
|
|
parser.add_argument(
|
|
"--palace-path", default=None,
|
|
help="Override palace path"
|
|
)
|
|
parser.add_argument(
|
|
"--dry-run", action="store_true",
|
|
help="Show what would be mined without storing"
|
|
)
|
|
|
|
args = parser.parse_args(argv)
|
|
|
|
wing = args.wing or os.environ.get("MEMPALACE_WING", "wing_timmy")
|
|
palace_path = Path(args.palace_path) if args.palace_path else None
|
|
|
|
if args.files:
|
|
files = [Path(f) for f in args.files]
|
|
else:
|
|
sessions_dir = Path(args.sessions_dir)
|
|
files = find_session_files(sessions_dir, days=args.days)
|
|
|
|
if not files:
|
|
logger.info("No session files found to mine.")
|
|
return 0
|
|
|
|
logger.info(f"Mining {len(files)} session files (wing={wing})")
|
|
|
|
mined = 0
|
|
failed = 0
|
|
for path in files:
|
|
result = mine_session(path, wing=wing, palace_path=palace_path, dry_run=args.dry_run)
|
|
if result:
|
|
mined += 1
|
|
elif result is None and not args.dry_run:
|
|
failed += 1
|
|
|
|
if args.dry_run:
|
|
logger.info(f"Dry run complete — {len(files)} files would be mined")
|
|
else:
|
|
logger.info(f"Mining complete — {mined} mined, {failed} failed")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|