diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 899d567c8..c41ddaa26 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -5004,7 +5004,7 @@ For more help on a command: # ========================================================================= sessions_parser = subparsers.add_parser( "sessions", - help="Manage session history (list, rename, export, prune, delete)", + help="Manage session history (list, rename, export, prune, gc, delete)", description="View and manage the SQLite session store" ) sessions_subparsers = sessions_parser.add_subparsers(dest="sessions_action") @@ -5027,6 +5027,14 @@ For more help on a command: sessions_prune.add_argument("--source", help="Only prune sessions from this source") sessions_prune.add_argument("--yes", "-y", action="store_true", help="Skip confirmation") + sessions_gc = sessions_subparsers.add_parser("gc", help="Garbage-collect empty/trivial sessions") + sessions_gc.add_argument("--empty-hours", type=int, default=24, help="Delete empty (0-msg) sessions older than N hours (default: 24)") + sessions_gc.add_argument("--trivial-days", type=int, default=7, help="Delete trivial (1-5 msg) sessions older than N days (default: 7)") + sessions_gc.add_argument("--trivial-max", type=int, default=5, help="Max messages to consider trivial (default: 5)") + sessions_gc.add_argument("--source", help="Only GC sessions from this source") + sessions_gc.add_argument("--dry-run", action="store_true", help="Show what would be deleted without deleting") + sessions_gc.add_argument("--yes", "-y", action="store_true", help="Skip confirmation") + sessions_stats = sessions_subparsers.add_parser("stats", help="Show session store statistics") sessions_rename = sessions_subparsers.add_parser("rename", help="Set or change a session's title") @@ -5196,6 +5204,49 @@ For more help on a command: size_mb = os.path.getsize(db_path) / (1024 * 1024) print(f"Database size: {size_mb:.1f} MB") + elif action == "gc": + dry_run = getattr(args, "dry_run", False) + if dry_run: + counts = db.garbage_collect( + empty_older_than_hours=args.empty_hours, + trivial_max_messages=args.trivial_max, + trivial_older_than_days=args.trivial_days, + source=args.source, + dry_run=True, + ) + print(f"[dry-run] Would delete {counts['total']} session(s):") + print(f" Empty (0 msgs, >{args.empty_hours}h old): {counts['empty']}") + print(f" Trivial (<={args.trivial_max} msgs, >{args.trivial_days}d old): {counts['trivial']}") + else: + # Preview first + preview = db.garbage_collect( + empty_older_than_hours=args.empty_hours, + trivial_max_messages=args.trivial_max, + trivial_older_than_days=args.trivial_days, + source=args.source, + dry_run=True, + ) + if preview["total"] == 0: + print("Nothing to collect.") + else: + if not args.yes: + if not _confirm_prompt( + f"Delete {preview['total']} session(s) " + f"({preview['empty']} empty, {preview['trivial']} trivial)? [y/N] " + ): + print("Cancelled.") + return + counts = db.garbage_collect( + empty_older_than_hours=args.empty_hours, + trivial_max_messages=args.trivial_max, + trivial_older_than_days=args.trivial_days, + source=args.source, + dry_run=False, + ) + print(f"Collected {counts['total']} session(s):") + print(f" Empty: {counts['empty']}") + print(f" Trivial: {counts['trivial']}") + else: sessions_parser.print_help() diff --git a/hermes_state.py b/hermes_state.py index 6f6be056a..565b9b7cf 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -1303,3 +1303,78 @@ class SessionDB: return len(session_ids) return self._execute_write(_do) + + def garbage_collect( + self, + empty_older_than_hours: int = 24, + trivial_max_messages: int = 5, + trivial_older_than_days: int = 7, + source: str = None, + dry_run: bool = False, + ) -> Dict[str, int]: + """Delete empty and trivial sessions based on age. + + Policy (matches #315): + - Empty sessions (0 messages) older than ``empty_older_than_hours`` + - Trivial sessions (1..``trivial_max_messages`` msgs) older than + ``trivial_older_than_days`` + - Sessions with more than ``trivial_max_messages`` are kept indefinitely + - Active (not ended) sessions are never deleted + + Returns a dict with counts: ``empty``, ``trivial``, ``total``. + """ + now = time.time() + empty_cutoff = now - (empty_older_than_hours * 3600) + trivial_cutoff = now - (trivial_older_than_days * 86400) + + def _do(conn): + # --- Find empty sessions --- + empty_q = ( + "SELECT id FROM sessions " + "WHERE message_count = 0 AND started_at < ? AND ended_at IS NOT NULL" + ) + params = [empty_cutoff] + if source: + empty_q += " AND source = ?" + params.append(source) + empty_ids = [r[0] for r in conn.execute(empty_q, params).fetchall()] + + # --- Find trivial sessions --- + trivial_q = ( + "SELECT id FROM sessions " + "WHERE message_count BETWEEN 1 AND ? AND started_at < ? AND ended_at IS NOT NULL" + ) + t_params = [trivial_max_messages, trivial_cutoff] + if source: + trivial_q += " AND source = ?" + t_params.append(source) + trivial_ids = [r[0] for r in conn.execute(trivial_q, t_params).fetchall()] + + all_ids = set(empty_ids) | set(trivial_ids) + + if dry_run: + return {"empty": len(empty_ids), "trivial": len(trivial_ids), + "total": len(all_ids)} + + # --- Collect child sessions to delete first (FK constraint) --- + child_ids = set() + for sid in all_ids: + for r in conn.execute( + "SELECT id FROM sessions WHERE parent_session_id = ?", (sid,) + ).fetchall(): + child_ids.add(r[0]) + + # Delete children + for cid in child_ids: + conn.execute("DELETE FROM messages WHERE session_id = ?", (cid,)) + conn.execute("DELETE FROM sessions WHERE id = ?", (cid,)) + + # Delete targets + for sid in all_ids: + conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,)) + conn.execute("DELETE FROM sessions WHERE id = ?", (sid,)) + + return {"empty": len(empty_ids), "trivial": len(trivial_ids), + "total": len(all_ids)} + + return self._execute_write(_do) diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index a0630858c..265e9108b 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -665,6 +665,127 @@ class TestPruneSessions: # ========================================================================= +# ========================================================================= +# Garbage Collect +# ========================================================================= + +class TestGarbageCollect: + def test_gc_deletes_empty_old_sessions(self, db): + """Empty sessions (0 messages) older than 24h should be deleted.""" + db.create_session(session_id="empty_old", source="cli") + db.end_session("empty_old", end_reason="done") + db._conn.execute( + "UPDATE sessions SET started_at = ? WHERE id = ?", + (time.time() - 48 * 3600, "empty_old"), # 48 hours ago + ) + db._conn.commit() + + # Recent empty session should be kept + db.create_session(session_id="empty_new", source="cli") + db.end_session("empty_new", end_reason="done") + + result = db.garbage_collect() + assert result["empty"] == 1 + assert result["trivial"] == 0 + assert result["total"] == 1 + assert db.get_session("empty_old") is None + assert db.get_session("empty_new") is not None + + def test_gc_deletes_trivial_old_sessions(self, db): + """Sessions with 1-5 messages older than 7 days should be deleted.""" + db.create_session(session_id="trivial_old", source="cli") + for i in range(3): + db.append_message("trivial_old", role="user", content=f"msg {i}") + db.end_session("trivial_old", end_reason="done") + db._conn.execute( + "UPDATE sessions SET started_at = ? WHERE id = ?", + (time.time() - 10 * 86400, "trivial_old"), # 10 days ago + ) + db._conn.commit() + + result = db.garbage_collect() + assert result["trivial"] == 1 + assert result["total"] == 1 + assert db.get_session("trivial_old") is None + + def test_gc_keeps_active_sessions(self, db): + """Active (not ended) sessions should never be deleted.""" + db.create_session(session_id="active_old", source="cli") + # Backdate but don't end + db._conn.execute( + "UPDATE sessions SET started_at = ? WHERE id = ?", + (time.time() - 48 * 3600, "active_old"), + ) + db._conn.commit() + + result = db.garbage_collect() + assert result["total"] == 0 + assert db.get_session("active_old") is not None + + def test_gc_keeps_substantial_sessions(self, db): + """Sessions with >5 messages should never be deleted.""" + db.create_session(session_id="big_old", source="cli") + for i in range(10): + db.append_message("big_old", role="user", content=f"msg {i}") + db.end_session("big_old", end_reason="done") + db._conn.execute( + "UPDATE sessions SET started_at = ? WHERE id = ?", + (time.time() - 365 * 86400, "big_old"), # 1 year ago + ) + db._conn.commit() + + result = db.garbage_collect() + assert result["total"] == 0 + assert db.get_session("big_old") is not None + + def test_gc_dry_run_does_not_delete(self, db): + """dry_run=True should return counts but not delete anything.""" + db.create_session(session_id="empty_old", source="cli") + db.end_session("empty_old", end_reason="done") + db._conn.execute( + "UPDATE sessions SET started_at = ? WHERE id = ?", + (time.time() - 48 * 3600, "empty_old"), + ) + db._conn.commit() + + result = db.garbage_collect(dry_run=True) + assert result["total"] == 1 + assert db.get_session("empty_old") is not None # Still exists + + def test_gc_with_source_filter(self, db): + """--source should only GC sessions from that source.""" + for sid, src in [("old_cli", "cli"), ("old_tg", "telegram")]: + db.create_session(session_id=sid, source=src) + db.end_session(sid, end_reason="done") + db._conn.execute( + "UPDATE sessions SET started_at = ? WHERE id = ?", + (time.time() - 48 * 3600, sid), + ) + db._conn.commit() + + result = db.garbage_collect(source="cli") + assert result["total"] == 1 + assert db.get_session("old_cli") is None + assert db.get_session("old_tg") is not None + + def test_gc_handles_child_sessions(self, db): + """Child sessions should be deleted when parent is GC'd.""" + db.create_session(session_id="parent_old", source="cli") + db.end_session("parent_old", end_reason="done") + db._conn.execute( + "UPDATE sessions SET started_at = ? WHERE id = ?", + (time.time() - 48 * 3600, "parent_old"), + ) + # Create child session + db.create_session(session_id="child", source="cli", parent_session_id="parent_old") + db.end_session("child", end_reason="done") + db._conn.commit() + + result = db.garbage_collect() + assert result["total"] == 1 + assert db.get_session("parent_old") is None + assert db.get_session("child") is None + # Schema and WAL mode # =========================================================================