feat: add sampler.py — session value scorer (#17 )

Add comprehensive test script for harvest prompt validation
Add test session 5: Session with questions
2026-04-15 03:02:12 +00:00 · 2026-04-14 19:02:41 +00:00 · 2026-04-14 19:01:03 +00:00 · 2026-04-14 19:01:00 +00:00 · 2026-04-14 19:00:58 +00:00 · 2026-04-14 19:00:56 +00:00
11 changed files with 635 additions and 1148 deletions
--- a/knowledge/harvest_state.json
+++ b/knowledge/harvest_state.json
@@ -1,10 +0,0 @@
-{
-  "last_harvest": "2026-04-14T18:04:45.484759+00:00",
-  "harvested_sessions": [
-    "20260413_175935_20cb44",
-    "20260413_171106_62c276",
-    "20260413_181734_aed35b"
-  ],
-  "total_sessions_processed": 3,
-  "total_facts_extracted": 59
-}
--- a/knowledge/index.json
+++ b/knowledge/index.json
@@ -1,597 +1,6 @@
 {
  "version": 1,
-  "last_updated": "2026-04-14T18:04:45.484238+00:00",
-  "total_facts": 59,
-  "facts": [
-    {
-      "fact": "Error encountered with file: /private/var/folders/9k/v07xkpp133v03yynn9nx80fr0000gn/T/hermes_sandbox_z8ielhro/script.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.477585+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479057+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: crons.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.477603+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479059+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: 300.07",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.477614+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479060+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: /private/var/folders/9k/v07xkpp133v03yynn9nx80fr0000gn/T/hermes_sandbox__3wxy21d/script.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.477622+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479061+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: /private/var/folders/9k/v07xkpp133v03yynn9nx80fr0000gn/T/hermes_sandbox_dimnu9ba/script.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.477633+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479062+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: nhermes_cli/cron.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.477664+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479062+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: hermes_cli/cron.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.477793+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479063+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: config.yaml",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.477921+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479064+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: ~/.hermes",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478092+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479065+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: ncli.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478281+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479065+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: 300.17",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478293+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479066+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: 10.88",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478370+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479067+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: k2.5",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478386+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479067+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: 300.92",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478410+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479068+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Successful command pattern: python observatory.py --check          ",
-      "category": "pattern",
-      "repo": "global",
-      "confidence": 0.6,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478498+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479069+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: devkit/health.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478571+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479069+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: CHANGELOG.md",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478608+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479070+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: 300.06",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478635+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479071+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: 300.03",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478658+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479072+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: crons.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478703+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479072+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: crons.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478757+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479073+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: /private/var/folders/9k/v07xkpp133v03yynn9nx80fr0000gn/T/hermes_sandbox_1h5nj9lg/script.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478778+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479074+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: job.get",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478833+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479074+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: CreateIssueOption.Labels",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.478975+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479075+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Successful command pattern: git process seems to be running in this repository",
-      "category": "pattern",
-      "repo": "global",
-      "confidence": 0.6,
-      "session_id": "20260413_175935_20cb44",
-      "extracted_at": "2026-04-14T18:04:45.479018+00:00",
-      "harvested_at": "2026-04-14T18:04:45.479076+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_175935_20cb44.json"
-    },
-    {
-      "fact": "Error encountered with file: ~/.hermes",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.479242+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482379+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: pokayoke/hermes_constants.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.479346+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482380+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: Path.home",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.479565+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482380+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: /private/var/folders/9k/v07xkpp133v03yynn9nx80fr0000gn/T/hermes_sandbox_5pwgex20/script.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.479901+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482381+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: 300.11",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.480675+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482382+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: AIAgent.__init__",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.480862+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482383+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: job.ge",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.481044+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482383+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: cron/scheduler.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.481254+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482384+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: __main__.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.481644+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482385+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: tests/test_prompt_injection_defense.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.481654+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482385+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: /private/var/folders/9k/v07xkpp133v03yynn9nx80fr0000gn/T/hermes_sandbox_v2umc709/script.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.481666+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482386+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: pytest.mark",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.481733+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482387+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: ntests/test_prompt_injection_defense.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.481788+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482388+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: result.get",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.481979+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482388+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: concurrent.future",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.482228+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482389+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: 0.0",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.482252+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482390+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: /private/var/folders/9k/v07xkpp133v03yynn9nx80fr0000gn/T/hermes_sandbox_mjbblg0z/script.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_171106_62c276",
-      "extracted_at": "2026-04-14T18:04:45.482315+00:00",
-      "harvested_at": "2026-04-14T18:04:45.482390+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_171106_62c276.json"
-    },
-    {
-      "fact": "Error encountered with file: /private/var/folders/9k/v07xkpp133v03yynn9nx80fr0000gn/T/hermes_sandbox_u2ngkm60/script.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.482463+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484207+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Error encountered with file: /private/var/folders/9k/v07xkpp133v03yynn9nx80fr0000gn/T/hermes_sandbox_i63vbaem/script.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.482569+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484208+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Error encountered with file: 3.12",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.482589+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484209+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Successful command pattern: git restore --staged ",
-      "category": "pattern",
-      "repo": "global",
-      "confidence": 0.6,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.482629+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484209+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Error encountered with file: forge.alexanderwhitestone",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.482645+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484210+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Successful command pattern: git restore --staged ",
-      "category": "pattern",
-      "repo": "global",
-      "confidence": 0.6,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.483301+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484211+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Error encountered with file: ntests/test_repo_truth.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.483472+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484211+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Successful command pattern: git restore --staged ",
-      "category": "pattern",
-      "repo": "global",
-      "confidence": 0.6,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.483479+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484212+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Error encountered with file: 300.02",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.483596+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484213+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Successful command pattern: git restore --staged ",
-      "category": "pattern",
-      "repo": "global",
-      "confidence": 0.6,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.483603+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484213+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Successful command pattern: git restore --staged ",
-      "category": "pattern",
-      "repo": "global",
-      "confidence": 0.6,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.483697+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484214+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Error encountered with file: 300.37",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.483785+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484215+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Error encountered with file: /private/var/folders/9k/v07xkpp133v03yynn9nx80fr0000gn/T/hermes_sandbox_2k0n79t8/script.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.483792+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484216+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Error encountered with file: 300.19",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.483864+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484216+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Error encountered with file: /private/var/folders/9k/v07xkpp133v03yynn9nx80fr0000gn/T/hermes_sandbox_qxzsy_kv/script.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.483919+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484217+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Error encountered with file: CreateIssueOption.Labels",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.483930+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484218+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    },
-    {
-      "fact": "Error encountered with file: verify_triage_status.py",
-      "category": "pitfall",
-      "repo": "global",
-      "confidence": 0.7,
-      "session_id": "20260413_181734_aed35b",
-      "extracted_at": "2026-04-14T18:04:45.483963+00:00",
-      "harvested_at": "2026-04-14T18:04:45.484218+00:00",
-      "session_path": "/Users/apayne/.hermes/sessions/session_20260413_181734_aed35b.json"
-    }
-  ]
+  "last_updated": "2026-04-13T20:00:00Z",
+  "total_facts": 0,
+  "facts": []
 }
--- a/scripts/harvester.py
+++ b/scripts/harvester.py
@@ -1,350 +0,0 @@
-#!/usr/bin/env python3
-"""
-Session Harvester for Compounding Intelligence.
-Extracts durable knowledge from completed sessions and updates the knowledge store.
-"""
-
-import json
-import os
-import sys
-import logging
-from datetime import datetime, timezone, timedelta
-from pathlib import Path
-from typing import List, Dict, Any, Optional
-
-# Add parent directory to path for imports
-sys.path.insert(0, str(Path(__file__).parent))
-from session_reader import SessionReader
-
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.FileHandler(Path(__file__).parent.parent / 'metrics' / 'harvester.log'),
-        logging.StreamHandler()
-    ]
-)
-logger = logging.getLogger(__name__)
-
-
-class KnowledgeHarvester:
-    """Extracts knowledge from completed sessions."""
-    
-    def __init__(self, repo_root: str = None):
-        """Initialize the harvester."""
-        if repo_root is None:
-            repo_root = str(Path(__file__).parent.parent)
-        self.repo_root = Path(repo_root)
-        self.knowledge_dir = self.repo_root / "knowledge"
-        self.index_path = self.knowledge_dir / "index.json"
-        self.prompt_path = self.repo_root / "templates" / "harvest-prompt.md"
-        
-        # Load or create knowledge index
-        self.index = self._load_index()
-        
-        # Initialize session reader
-        self.reader = SessionReader()
-        
-        # Harvest state file
-        self.state_path = self.knowledge_dir / "harvest_state.json"
-        self.state = self._load_state()
-    
-    def _load_index(self) -> Dict[str, Any]:
-        """Load or create the knowledge index."""
-        if self.index_path.exists():
-            with open(self.index_path, 'r') as f:
-                return json.load(f)
-        else:
-            return {
-                "version": 1,
-                "last_updated": datetime.now(timezone.utc).isoformat(),
-                "total_facts": 0,
-                "facts": []
-            }
-    
-    def _save_index(self):
-        """Save the knowledge index."""
-        self.index["last_updated"] = datetime.now(timezone.utc).isoformat()
-        with open(self.index_path, 'w') as f:
-            json.dump(self.index, f, indent=2)
-    
-    def _load_state(self) -> Dict[str, Any]:
-        """Load harvest state."""
-        if self.state_path.exists():
-            with open(self.state_path, 'r') as f:
-                return json.load(f)
-        else:
-            return {
-                "last_harvest": None,
-                "harvested_sessions": [],
-                "total_sessions_processed": 0,
-                "total_facts_extracted": 0
-            }
-    
-    def _save_state(self):
-        """Save harvest state."""
-        with open(self.state_path, 'w') as f:
-            json.dump(self.state, f, indent=2)
-    
-    def get_sessions_to_harvest(self, max_age_hours: float = 24) -> List[Dict[str, Any]]:
-        """
-        Get sessions that need harvesting.
-        
-        Args:
-            max_age_hours: Only harvest sessions modified within this many hours
-            
-        Returns:
-            List of session data dictionaries
-        """
-        # Get sessions modified since last harvest
-        since = None
-        if self.state["last_harvest"]:
-            try:
-                since = datetime.fromisoformat(self.state["last_harvest"].replace('Z', '+00:00'))
-            except (ValueError, AttributeError):
-                pass
-        
-        # If no last harvest, use max_age_hours
-        if since is None:
-            since = datetime.now(timezone.utc) - timedelta(hours=max_age_hours)
-        
-        # Get recent sessions
-        sessions = self.reader.list_sessions(since=since)
-        
-        # Filter out already harvested sessions
-        harvested = set(self.state["harvested_sessions"])
-        to_harvest = []
-        
-        for path in sessions:
-            session = self.reader.read_session(path)
-            if "error" in session:
-                logger.warning(f"Error reading session {path}: {session['error']}")
-                continue
-            
-            # Skip if already harvested
-            if session["session_id"] in harvested:
-                continue
-            
-            # Skip if session is still active
-            if not self.reader.is_session_complete(session):
-                continue
-            
-            to_harvest.append(session)
-        
-        return to_harvest
-    
-    def extract_knowledge_from_session(self, session: Dict[str, Any]) -> List[Dict[str, Any]]:
-        """
-        Extract knowledge from a single session.
-        
-        This is a simplified extraction that looks for patterns in the session.
-        In a full implementation, this would use an LLM with the harvest prompt.
-        
-        Args:
-            session: Session data dictionary
-            
-        Returns:
-            List of extracted knowledge items
-        """
-        knowledge_items = []
-        
-        # Get messages from session
-        messages = session.get("messages", [])
-        
-        # Simple pattern-based extraction
-        for i, msg in enumerate(messages):
-            if not isinstance(msg, dict):
-                continue
-            
-            role = msg.get("role", "")
-            content = msg.get("content", "")
-            
-            if not content or not isinstance(content, str):
-                continue
-            
-            # Look for error patterns
-            if "error" in content.lower() or "Error" in content:
-                # Extract error context
-                context = content[:200]  # First 200 chars
-                
-                # Look for file paths
-                import re
-                file_paths = re.findall(r'[~/.]?[\w/]+\.\w+', context)
-                
-                if file_paths:
-                    knowledge_items.append({
-                        "fact": f"Error encountered with file: {file_paths[0]}",
-                        "category": "pitfall",
-                        "repo": "global",
-                        "confidence": 0.7,
-                        "session_id": session["session_id"],
-                        "extracted_at": datetime.now(timezone.utc).isoformat()
-                    })
-            
-            # Look for successful patterns
-            if "success" in content.lower() or "Success" in content:
-                # Extract success context
-                context = content[:200]
-                
-                # Look for commands or actions
-                import re
-                commands = re.findall(r'(?:git|npm|pip|python|curl|ssh)\s+[\w\s\-\.]+', context)
-                
-                if commands:
-                    knowledge_items.append({
-                        "fact": f"Successful command pattern: {commands[0]}",
-                        "category": "pattern",
-                        "repo": "global",
-                        "confidence": 0.6,
-                        "session_id": session["session_id"],
-                        "extracted_at": datetime.now(timezone.utc).isoformat()
-                    })
-        
-        return knowledge_items
-    
-    def harvest_session(self, session: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Harvest knowledge from a single session.
-        
-        Args:
-            session: Session data dictionary
-            
-        Returns:
-            Harvest result dictionary
-        """
-        session_id = session["session_id"]
-        logger.info(f"Harvesting session: {session_id}")
-        
-        try:
-            # Extract knowledge
-            knowledge_items = self.extract_knowledge_from_session(session)
-            
-            # Add to index
-            for item in knowledge_items:
-                # Add metadata
-                item["harvested_at"] = datetime.now(timezone.utc).isoformat()
-                item["session_path"] = session.get("path", "")
-                
-                # Add to facts
-                self.index["facts"].append(item)
-            
-            # Update state
-            self.state["harvested_sessions"].append(session_id)
-            self.state["total_sessions_processed"] += 1
-            self.state["total_facts_extracted"] += len(knowledge_items)
-            
-            result = {
-                "session_id": session_id,
-                "success": True,
-                "facts_extracted": len(knowledge_items),
-                "knowledge_items": knowledge_items
-            }
-            
-            logger.info(f"Extracted {len(knowledge_items)} facts from session {session_id}")
-            
-        except Exception as e:
-            logger.error(f"Error harvesting session {session_id}: {e}")
-            result = {
-                "session_id": session_id,
-                "success": False,
-                "error": str(e),
-                "facts_extracted": 0
-            }
-        
-        return result
-    
-    def harvest_batch(self, max_sessions: int = 10, max_age_hours: float = 24) -> Dict[str, Any]:
-        """
-        Harvest a batch of sessions.
-        
-        Args:
-            max_sessions: Maximum number of sessions to harvest
-            max_age_hours: Only harvest sessions modified within this many hours
-            
-        Returns:
-            Batch harvest result
-        """
-        logger.info(f"Starting harvest batch (max {max_sessions} sessions, max age {max_age_hours}h)")
-        
-        # Get sessions to harvest
-        sessions = self.get_sessions_to_harvest(max_age_hours)
-        
-        if not sessions:
-            logger.info("No sessions to harvest")
-            return {
-                "success": True,
-                "sessions_processed": 0,
-                "facts_extracted": 0,
-                "results": []
-            }
-        
-        # Limit to max_sessions
-        sessions = sessions[:max_sessions]
-        
-        results = []
-        total_facts = 0
-        
-        for session in sessions:
-            result = self.harvest_session(session)
-            results.append(result)
-            
-            if result["success"]:
-                total_facts += result["facts_extracted"]
-        
-        # Update index and state
-        self.index["total_facts"] = len(self.index["facts"])
-        self._save_index()
-        
-        self.state["last_harvest"] = datetime.now(timezone.utc).isoformat()
-        self._save_state()
-        
-        batch_result = {
-            "success": True,
-            "sessions_processed": len(sessions),
-            "facts_extracted": total_facts,
-            "results": results,
-            "timestamp": datetime.now(timezone.utc).isoformat()
-        }
-        
-        logger.info(f"Harvest batch complete: {len(sessions)} sessions, {total_facts} facts")
-        
-        return batch_result
-
-
-def main():
-    """Main entry point for the harvester."""
-    import argparse
-    
-    parser = argparse.ArgumentParser(description="Harvest knowledge from completed sessions")
-    parser.add_argument("--max-sessions", type=int, default=10, help="Maximum sessions to harvest")
-    parser.add_argument("--max-age-hours", type=float, default=24, help="Max age in hours")
-    parser.add_argument("--dry-run", action="store_true", help="Don't save, just report")
-    
-    args = parser.parse_args()
-    
-    harvester = KnowledgeHarvester()
-    
-    if args.dry_run:
-        sessions = harvester.get_sessions_to_harvest(args.max_age_hours)
-        print(f"Would harvest {len(sessions)} sessions:")
-        for session in sessions[:5]:  # Show first 5
-            print(f"  - {session['session_id']} ({session['message_count']} messages)")
-        if len(sessions) > 5:
-            print(f"  ... and {len(sessions) - 5} more")
-        return
-    
-    result = harvester.harvest_batch(
-        max_sessions=args.max_sessions,
-        max_age_hours=args.max_age_hours
-    )
-    
-    if result["success"]:
-        print(f"Harvest complete: {result['sessions_processed']} sessions, {result['facts_extracted']} facts")
-    else:
-        print(f"Harvest failed: {result.get('error', 'Unknown error')}")
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/sampler.py
+++ b/scripts/sampler.py
@@ -0,0 +1,353 @@
+#!/usr/bin/env python3
+"""
+sampler.py — Score and rank sessions by harvest value.
+
+With 20k+ sessions on disk, we can't harvest all at once. This script
+scores each session by how likely it is to contain valuable knowledge,
+so the harvester processes the best ones first.
+
+Scoring strategy:
+  - Recency: last 7d=3pts, last 30d=2pts, older=1pt
+  - Length: >50 messages=3pts, >20=2pts, <20=1pt
+  - Repo uniqueness: first session for a repo=5pts, otherwise=1pt
+  - Outcome: failure=3pts (most to learn), success=2pts, unknown=1pt
+  - Tool calls: >10 tool invocations=2pts (complex sessions)
+
+Usage:
+    python3 sampler.py --count 100                          # Top 100 sessions
+    python3 sampler.py --repo the-nexus --count 20          # Top 20 for a repo
+    python3 sampler.py --since 2026-04-01                   # All sessions since date
+    python3 sampler.py --count 50 --min-score 8             # Only high-value sessions
+    python3 sampler.py --count 100 --output sample.json     # Save to file
+"""
+
+import argparse
+import json
+import os
+import sys
+import time
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Optional
+
+
+# --- Fast session scanning (no full parse) ---
+
+def scan_session_fast(path: str) -> dict:
+    """Extract scoring metadata from a session without parsing the full JSONL.
+    
+    Reads only: first line, last ~20 lines, and line count. This processes
+    20k sessions in seconds instead of minutes.
+    """
+    meta = {
+        'path': path,
+        'message_count': 0,
+        'has_tool_calls': False,
+        'tool_call_count': 0,
+        'first_timestamp': '',
+        'last_timestamp': '',
+        'is_failure': False,
+        'repos_mentioned': set(),
+        'first_role': '',
+        'last_content_preview': '',
+    }
+    
+    try:
+        file_size = os.path.getsize(path)
+        if file_size == 0:
+            return meta
+        
+        with open(path, 'r', encoding='utf-8', errors='replace') as f:
+            # Read first line for timestamp + role
+            first_line = f.readline().strip()
+            if first_line:
+                try:
+                    first_msg = json.loads(first_line)
+                    meta['first_timestamp'] = first_msg.get('timestamp', '')
+                    meta['first_role'] = first_msg.get('role', '')
+                except json.JSONDecodeError:
+                    pass
+            
+            # Fast line count + collect tail lines
+            # For the tail, seek to near end of file
+            tail_lines = []
+            line_count = 1  # already read first
+            
+            if file_size > 8192:
+                # Seek to last 8KB for tail sampling
+                f.seek(max(0, file_size - 8192))
+                f.readline()  # skip partial line
+                for line in f:
+                    line = line.strip()
+                    if line:
+                        tail_lines.append(line)
+                    line_count += 1
+                # We lost the exact count for big files — estimate from file size
+                # Average JSONL line is ~500 bytes
+                if line_count < 100:
+                    line_count = max(line_count, file_size // 500)
+            else:
+                # Small file — read all
+                for line in f:
+                    line = line.strip()
+                    if line:
+                        tail_lines.append(line)
+                    line_count += 1
+            
+            meta['message_count'] = line_count
+            
+            # Parse tail lines for outcome, tool calls, repos
+            for line in tail_lines[-30:]:  # last 30 non-empty lines
+                try:
+                    msg = json.loads(line)
+                    
+                    # Track last timestamp
+                    ts = msg.get('timestamp', '')
+                    if ts:
+                        meta['last_timestamp'] = ts
+                    
+                    # Count tool calls
+                    if msg.get('tool_calls'):
+                        meta['has_tool_calls'] = True
+                        meta['tool_call_count'] += len(msg['tool_calls'])
+                    
+                    # Detect failure signals in content
+                    content = ''
+                    if isinstance(msg.get('content'), str):
+                        content = msg['content'].lower()
+                    elif isinstance(msg.get('content'), list):
+                        for part in msg['content']:
+                            if isinstance(part, dict) and part.get('type') == 'text':
+                                content += part.get('text', '').lower()
+                    
+                    if content:
+                        meta['last_content_preview'] = content[:200]
+                        failure_signals = ['error', 'failed', 'cannot', 'unable', 
+                                          'exception', 'traceback', 'rejected', 'denied']
+                        if any(sig in content for sig in failure_signals):
+                            meta['is_failure'] = True
+                    
+                    # Extract repo references from tool call arguments
+                    if msg.get('tool_calls'):
+                        for tc in msg['tool_calls']:
+                            args = tc.get('function', {}).get('arguments', '')
+                            if isinstance(args, str):
+                                # Look for repo patterns
+                                for pattern in ['Timmy_Foundation/', 'Rockachopa/', 'compounding-intelligence', 'the-nexus', 'timmy-home', 'hermes-agent', 'the-beacon', 'the-door']:
+                                    if pattern in args:
+                                        repo = pattern.rstrip('/')
+                                        meta['repos_mentioned'].add(repo)
+                
+                except json.JSONDecodeError:
+                    continue
+    
+    except (IOError, OSError):
+        pass
+    
+    meta['repos_mentioned'] = list(meta['repos_mentioned'])
+    return meta
+
+
+# --- Filename timestamp parsing ---
+
+def parse_session_timestamp(filename: str) -> Optional[datetime]:
+    """Parse timestamp from session filename.
+    
+    Common formats:
+      session_20260413_123456_hash.jsonl
+      20260413_123456_hash.jsonl
+    """
+    stem = Path(filename).stem
+    parts = stem.split('_')
+    
+    # Try session_YYYYMMDD_HHMMSS format
+    for i, part in enumerate(parts):
+        if len(part) == 8 and part.isdigit():
+            date_part = part
+            time_part = parts[i + 1] if i + 1 < len(parts) and len(parts[i + 1]) == 6 else '000000'
+            try:
+                return datetime.strptime(f"{date_part}_{time_part}", '%Y%m%d_%H%M%S').replace(tzinfo=timezone.utc)
+            except ValueError:
+                continue
+    
+    # Fallback: use file modification time
+    return None
+
+
+# --- Scoring ---
+
+def score_session(meta: dict, now: datetime, seen_repos: set) -> tuple[int, dict]:
+    """Score a session for harvest value. Returns (score, breakdown)."""
+    score = 0
+    breakdown = {}
+    
+    # 1. Recency
+    ts = parse_session_timestamp(os.path.basename(meta['path']))
+    if ts is None:
+        # Fallback to mtime
+        try:
+            ts = datetime.fromtimestamp(os.path.getmtime(meta['path']), tz=timezone.utc)
+        except OSError:
+            ts = now - timedelta(days=365)
+    
+    age_days = (now - ts).days
+    if age_days <= 7:
+        recency = 3
+    elif age_days <= 30:
+        recency = 2
+    else:
+        recency = 1
+    score += recency
+    breakdown['recency'] = recency
+    
+    # 2. Length
+    count = meta['message_count']
+    if count > 50:
+        length = 3
+    elif count > 20:
+        length = 2
+    else:
+        length = 1
+    score += length
+    breakdown['length'] = length
+    
+    # 3. Repo uniqueness (first session mentioning a repo gets bonus)
+    repo_score = 0
+    for repo in meta.get('repos_mentioned', []):
+        if repo not in seen_repos:
+            seen_repos.add(repo)
+            repo_score = max(repo_score, 5)
+        else:
+            repo_score = max(repo_score, 1)
+    score += repo_score
+    breakdown['repo_unique'] = repo_score
+    
+    # 4. Outcome
+    if meta.get('is_failure'):
+        outcome = 3
+    elif meta.get('last_content_preview', '').strip():
+        outcome = 2  # has some content = likely completed
+    else:
+        outcome = 1
+    score += outcome
+    breakdown['outcome'] = outcome
+    
+    # 5. Tool calls
+    if meta.get('tool_call_count', 0) > 10:
+        tool = 2
+    else:
+        tool = 0
+    score += tool
+    breakdown['tool_calls'] = tool
+    
+    return score, breakdown
+
+
+# --- Main ---
+
+def main():
+    parser = argparse.ArgumentParser(description="Score and rank sessions for harvesting")
+    parser.add_argument('--sessions-dir', default=os.path.expanduser('~/.hermes/sessions'),
+                        help='Directory containing session files')
+    parser.add_argument('--count', type=int, default=100, help='Number of top sessions to return')
+    parser.add_argument('--repo', default='', help='Filter to sessions mentioning this repo')
+    parser.add_argument('--since', default='', help='Only score sessions after this date (YYYY-MM-DD)')
+    parser.add_argument('--min-score', type=int, default=0, help='Minimum score threshold')
+    parser.add_argument('--output', default='', help='Output file (JSON). Default: stdout')
+    parser.add_argument('--format', choices=['json', 'paths', 'table'], default='table',
+                        help='Output format: json (full), paths (one per line), table (human)')
+    parser.add_argument('--top-percent', type=float, default=0, help='Return top N%% instead of --count')
+    
+    args = parser.parse_args()
+    
+    sessions_dir = Path(args.sessions_dir)
+    if not sessions_dir.is_dir():
+        print(f"ERROR: Sessions directory not found: {sessions_dir}", file=sys.stderr)
+        sys.exit(1)
+    
+    # Find all JSONL files
+    print(f"Scanning {sessions_dir}...", file=sys.stderr)
+    t0 = time.time()
+    
+    session_files = list(sessions_dir.glob('*.jsonl'))
+    total = len(session_files)
+    print(f"Found {total} session files", file=sys.stderr)
+    
+    # Parse since date
+    since_dt = None
+    if args.since:
+        since_dt = datetime.strptime(args.since, '%Y-%m-%d').replace(tzinfo=timezone.utc)
+    
+    # Score all sessions
+    now = datetime.now(timezone.utc)
+    seen_repos = set()  # Track repos for uniqueness scoring
+    scored = []
+    
+    for i, sf in enumerate(session_files):
+        # Date filter (fast path: check filename first)
+        if since_dt:
+            ts = parse_session_timestamp(sf.name)
+            if ts and ts < since_dt:
+                continue
+        
+        meta = scan_session_fast(str(sf))
+        
+        # Repo filter
+        if args.repo:
+            repos = meta.get('repos_mentioned', [])
+            if args.repo.lower() not in [r.lower() for r in repos]:
+                # Also check filename
+                if args.repo.lower() not in sf.name.lower():
+                    continue
+        
+        score, breakdown = score_session(meta, now, seen_repos)
+        
+        if score >= args.min_score:
+            scored.append({
+                'path': str(sf),
+                'filename': sf.name,
+                'score': score,
+                'breakdown': breakdown,
+                'message_count': meta['message_count'],
+                'repos': meta['repos_mentioned'],
+                'is_failure': meta['is_failure'],
+            })
+        
+        if (i + 1) % 5000 == 0:
+            elapsed = time.time() - t0
+            print(f"  Scanned {i + 1}/{total} ({elapsed:.1f}s)", file=sys.stderr)
+    
+    elapsed = time.time() - t0
+    print(f"Scored {len(scored)} sessions in {elapsed:.1f}s", file=sys.stderr)
+    
+    # Sort by score descending
+    scored.sort(key=lambda x: x['score'], reverse=True)
+    
+    # Apply count or percent
+    if args.top_percent > 0:
+        count = max(1, int(len(scored) * args.top_percent / 100))
+    else:
+        count = args.count
+    scored = scored[:count]
+    
+    # Output
+    if args.output:
+        with open(args.output, 'w', encoding='utf-8') as f:
+            json.dump(scored, f, indent=2)
+        print(f"Wrote {len(scored)} sessions to {args.output}", file=sys.stderr)
+    elif args.format == 'json':
+        json.dump(scored, sys.stdout, indent=2)
+    elif args.format == 'paths':
+        for s in scored:
+            print(s['path'])
+    else:  # table
+        print(f"{'SCORE':>5}  {'MSGS':>5}  {'REPOS':<25}  {'FILE'}")
+        print(f"{'-'*5}  {'-'*5}  {'-'*25}  {'-'*40}")
+        for s in scored:
+            repos = ', '.join(s['repos'][:2]) if s['repos'] else '-'
+            fail = ' FAIL' if s['is_failure'] else ''
+            print(f"{s['score']:>5}  {s['message_count']:>5}  {repos:<25}  {s['filename'][:40]}{fail}")
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/session_reader.py
+++ b/scripts/session_reader.py
@@ -1,194 +0,0 @@
-#!/usr/bin/env python3
-"""
-Session reader for Compounding Intelligence.
-Reads and parses Hermes session files from ~/.hermes/sessions/.
-"""
-
-import json
-import os
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import List, Dict, Any, Optional
-
-
-class SessionReader:
-    """Reads and parses Hermes session files."""
-    
-    def __init__(self, sessions_dir: str = None):
-        """Initialize with sessions directory path."""
-        if sessions_dir is None:
-            sessions_dir = os.path.expanduser("~/.hermes/sessions")
-        self.sessions_dir = Path(sessions_dir)
-        self.supported_extensions = {'.json', '.jsonl'}
-    
-    def list_sessions(self, since: Optional[datetime] = None, limit: int = None) -> List[Path]:
-        """
-        List session files, optionally filtered by modification time.
-        
-        Args:
-            since: Only return sessions modified after this datetime
-            limit: Maximum number of sessions to return
-            
-        Returns:
-            List of Path objects to session files
-        """
-        if not self.sessions_dir.exists():
-            return []
-        
-        sessions = []
-        for f in self.sessions_dir.iterdir():
-            if f.suffix in self.supported_extensions:
-                if since is not None:
-                    mtime = datetime.fromtimestamp(f.stat().st_mtime, tz=timezone.utc)
-                    if mtime <= since:
-                        continue
-                sessions.append(f)
-        
-        # Sort by modification time (newest first)
-        sessions.sort(key=lambda p: p.stat().st_mtime, reverse=True)
-        
-        if limit:
-            sessions = sessions[:limit]
-        
-        return sessions
-    
-    def read_session(self, path: Path) -> Dict[str, Any]:
-        """
-        Read a session file and return structured data.
-        
-        Args:
-            path: Path to session file
-            
-        Returns:
-            Dictionary with session data
-        """
-        try:
-            if path.suffix == '.jsonl':
-                return self._read_jsonl_session(path)
-            elif path.suffix == '.json':
-                return self._read_json_session(path)
-            else:
-                return {"error": f"Unsupported format: {path.suffix}"}
-        except Exception as e:
-            return {"error": str(e), "path": str(path)}
-    
-    def _read_json_session(self, path: Path) -> Dict[str, Any]:
-        """Read a JSON format session file."""
-        with open(path, 'r') as f:
-            data = json.load(f)
-        
-        return {
-            "session_id": data.get("session_id", path.stem),
-            "model": data.get("model", "unknown"),
-            "created_at": data.get("session_start"),
-            "last_updated": data.get("last_updated"),
-            "message_count": data.get("message_count", len(data.get("messages", []))),
-            "messages": data.get("messages", []),
-            "path": str(path),
-            "format": "json"
-        }
-    
-    def _read_jsonl_session(self, path: Path) -> Dict[str, Any]:
-        """Read a JSONL format session file."""
-        messages = []
-        session_meta = None
-        
-        with open(path, 'r') as f:
-            for line in f:
-                line = line.strip()
-                if not line:
-                    continue
-                try:
-                    entry = json.loads(line)
-                    if entry.get("role") == "session_meta":
-                        session_meta = entry
-                    else:
-                        messages.append(entry)
-                except json.JSONDecodeError:
-                    continue
-        
-        session_id = path.stem
-        if session_meta:
-            session_id = session_meta.get("session_id", session_id)
-        
-        return {
-            "session_id": session_id,
-            "model": session_meta.get("model", "unknown") if session_meta else "unknown",
-            "created_at": session_meta.get("timestamp") if session_meta else None,
-            "last_updated": messages[-1].get("timestamp") if messages else None,
-            "message_count": len(messages),
-            "messages": messages,
-            "path": str(path),
-            "format": "jsonl",
-            "meta": session_meta
-        }
-    
-    def get_session_age_hours(self, session_data: Dict[str, Any]) -> float:
-        """Get session age in hours."""
-        last_updated = session_data.get("last_updated")
-        if not last_updated:
-            return float('inf')
-        
-        try:
-            if isinstance(last_updated, str):
-                # Handle various timestamp formats
-                for fmt in [
-                    "%Y-%m-%dT%H:%M:%S.%fZ",
-                    "%Y-%m-%dT%H:%M:%SZ",
-                    "%Y-%m-%dT%H:%M:%S.%f",
-                    "%Y-%m-%dT%H:%M:%S"
-                ]:
-                    try:
-                        dt = datetime.strptime(last_updated, fmt)
-                        dt = dt.replace(tzinfo=timezone.utc)
-                        break
-                    except ValueError:
-                        continue
-                else:
-                    # Try parsing with fromisoformat
-                    dt = datetime.fromisoformat(last_updated.replace('Z', '+00:00'))
-            else:
-                dt = last_updated
-            
-            now = datetime.now(timezone.utc)
-            age = now - dt
-            return age.total_seconds() / 3600
-        except Exception:
-            return float('inf')
-    
-    def is_session_complete(self, session_data: Dict[str, Any]) -> bool:
-        """
-        Check if a session appears to be complete (not actively running).
-        
-        Heuristic: If last update was more than 5 minutes ago, consider it complete.
-        """
-        age_hours = self.get_session_age_hours(session_data)
-        return age_hours > (5 / 60)  # 5 minutes
-
-
-def main():
-    """Test the session reader."""
-    reader = SessionReader()
-    
-    # List recent sessions
-    sessions = reader.list_sessions(limit=5)
-    print(f"Found {len(sessions)} recent sessions")
-    
-    for path in sessions:
-        session = reader.read_session(path)
-        if "error" in session:
-            print(f"Error reading {path}: {session['error']}")
-            continue
-        
-        age_hours = reader.get_session_age_hours(session)
-        complete = reader.is_session_complete(session)
-        
-        print(f"\nSession: {session['session_id']}")
-        print(f"  Model: {session['model']}")
-        print(f"  Messages: {session['message_count']}")
-        print(f"  Age: {age_hours:.1f} hours")
-        print(f"  Complete: {complete}")
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/test_harvest_prompt_comprehensive.py
+++ b/scripts/test_harvest_prompt_comprehensive.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+"""
+Comprehensive test script for knowledge extraction prompt.
+Validates prompt structure, requirements, and consistency.
+"""
+
+import json
+import re
+from pathlib import Path
+
+def test_prompt_structure():
+    """Test that the prompt has the required structure."""
+    prompt_path = Path("templates/harvest-prompt.md")
+    if not prompt_path.exists():
+        return False, "harvest-prompt.md not found"
+    
+    content = prompt_path.read_text()
+    
+    # Check for required sections
+    required_sections = [
+        "System Prompt",
+        "Instructions", 
+        "Categories",
+        "Output Format",
+        "Confidence Scoring",
+        "Constraints",
+        "Example"
+    ]
+    
+    for section in required_sections:
+        if section.lower() not in content.lower():
+            return False, f"Missing required section: {section}"
+    
+    # Check for required categories
+    required_categories = ["fact", "pitfall", "pattern", "tool-quirk", "question"]
+    for category in required_categories:
+        if category not in content:
+            return False, f"Missing required category: {category}"
+    
+    # Check for required output fields
+    required_fields = ["fact", "category", "repo", "confidence"]
+    for field in required_fields:
+        if field not in content:
+            return False, f"Missing required output field: {field}"
+    
+    # Check prompt size (should be ~1k tokens, roughly 4k chars)
+    if len(content) > 5000:
+        return False, f"Prompt too large: {len(content)} chars (max ~5000)"
+    
+    if len(content) < 1000:
+        return False, f"Prompt too small: {len(content)} chars (min ~1000)"
+    
+    return True, "Prompt structure is valid"
+
+def test_confidence_scoring():
+    """Test that confidence scoring is properly defined."""
+    prompt_path = Path("templates/harvest-prompt.md")
+    content = prompt_path.read_text()
+    
+    # Check for confidence scale definitions
+    confidence_levels = [
+        ("0.9-1.0", "explicitly stated"),
+        ("0.7-0.8", "clearly implied"),
+        ("0.5-0.6", "suggested"),
+        ("0.3-0.4", "inferred"),
+        ("0.1-0.2", "speculative")
+    ]
+    
+    for level, description in confidence_levels:
+        if level not in content:
+            return False, f"Missing confidence level: {level}"
+        if description.lower() not in content.lower():
+            return False, f"Missing confidence description: {description}"
+    
+    return True, "Confidence scoring is properly defined"
+
+def test_example_quality():
+    """Test that examples are clear and complete."""
+    prompt_path = Path("templates/harvest-prompt.md")
+    content = prompt_path.read_text()
+    
+    # Check for example input/output
+    if "example" not in content.lower():
+        return False, "No examples provided"
+    
+    # Check that example includes all categories
+    example_section = content[content.lower().find("example"):]
+    
+    # Look for JSON example
+    json_match = re.search(r'\{[\s\S]*"knowledge"[\s\S]*\}', example_section)
+    if not json_match:
+        return False, "No JSON example found"
+    
+    example_json = json_match.group(0)
+    
+    # Check for all categories in example
+    for category in ["fact", "pitfall", "pattern", "tool-quirk", "question"]:
+        if category not in example_json:
+            return False, f"Example missing category: {category}"
+    
+    return True, "Examples are clear and complete"
+
+def test_constraint_coverage():
+    """Test that constraints cover all requirements."""
+    prompt_path = Path("templates/harvest-prompt.md")
+    content = prompt_path.read_text()
+    
+    required_constraints = [
+        "No hallucination",
+        "only extract",
+        "explicitly",
+        "partial",
+        "failed sessions",
+        "1k tokens"
+    ]
+    
+    for constraint in required_constraints:
+        if constraint.lower() not in content.lower():
+            return False, f"Missing constraint: {constraint}"
+    
+    return True, "Constraints cover all requirements"
+
+def test_test_sessions():
+    """Test that test sessions exist and are valid."""
+    test_sessions_dir = Path("test_sessions")
+    if not test_sessions_dir.exists():
+        return False, "test_sessions directory not found"
+    
+    session_files = list(test_sessions_dir.glob("*.jsonl"))
+    if len(session_files) < 5:
+        return False, f"Only {len(session_files)} test sessions found, need 5"
+    
+    # Check each session file
+    for session_file in session_files:
+        content = session_file.read_text()
+        lines = content.strip().split("\n")
+        
+        # Check that each line is valid JSON
+        for i, line in enumerate(lines, 1):
+            try:
+                json.loads(line)
+            except json.JSONDecodeError as e:
+                return False, f"Invalid JSON in {session_file.name}, line {i}: {e}"
+    
+    return True, f"Found {len(session_files)} valid test sessions"
+
+def run_all_tests():
+    """Run all tests and return results."""
+    tests = [
+        ("Prompt Structure", test_prompt_structure),
+        ("Confidence Scoring", test_confidence_scoring),
+        ("Example Quality", test_example_quality),
+        ("Constraint Coverage", test_constraint_coverage),
+        ("Test Sessions", test_test_sessions)
+    ]
+    
+    results = []
+    all_passed = True
+    
+    for test_name, test_func in tests:
+        try:
+            passed, message = test_func()
+            results.append({
+                "test": test_name,
+                "passed": passed,
+                "message": message
+            })
+            if not passed:
+                all_passed = False
+        except Exception as e:
+            results.append({
+                "test": test_name,
+                "passed": False,
+                "message": f"Error: {str(e)}"
+            })
+            all_passed = False
+    
+    # Print results
+    print("=" * 60)
+    print("HARVEST PROMPT TEST RESULTS")
+    print("=" * 60)
+    
+    for result in results:
+        status = "✓ PASS" if result["passed"] else "✗ FAIL"
+        print(f"{status}: {result['test']}")
+        print(f"  {result['message']}")
+        print()
+    
+    print("=" * 60)
+    if all_passed:
+        print("ALL TESTS PASSED!")
+    else:
+        print("SOME TESTS FAILED!")
+    print("=" * 60)
+    
+    return all_passed, results
+
+if __name__ == "__main__":
+    all_passed, results = run_all_tests()
+    
+    # Save results to file
+    with open("test_results.json", "w") as f:
+        json.dump({
+            "all_passed": all_passed,
+            "results": results,
+            "timestamp": "2026-04-14T19:05:00Z"
+        }, f, indent=2)
+    
+    print(f"Results saved to test_results.json")
+    
+    # Exit with appropriate code
+    exit(0 if all_passed else 1)
--- a/test_sessions/session_failure.jsonl
+++ b/test_sessions/session_failure.jsonl
@@ -0,0 +1,18 @@
+{"role": "user", "content": "Deploy the application to production"}
+{"role": "assistant", "content": "I'll deploy the application to production.", "tool_calls": [{"function": "terminal", "arguments": {"command": "ssh deploy@prod-server 'cd /app && git pull && docker-compose up -d'"}}]}
+{"role": "tool", "content": "ssh: connect to host prod-server port 22: Connection refused"}
+{"role": "assistant", "content": "Connection refused. Let me check the server status.", "tool_calls": [{"function": "terminal", "arguments": {"command": "ping -c 3 prod-server"}}]}
+{"role": "tool", "content": "PING prod-server (192.168.1.100): 56 data bytes\nRequest timeout for icmp_seq 0\nRequest timeout for icmp_seq 1\nRequest timeout for icmp_seq 2\n--- prod-server ping statistics ---\n3 packets transmitted, 0 packets received, 100.0% packet loss"}
+{"role": "assistant", "content": "The server appears to be down. Let me check if there's an alternative deployment method."}
+{"role": "user", "content": "Try using the VPS instead"}
+{"role": "assistant", "content": "I'll try deploying to the VPS.", "tool_calls": [{"function": "terminal", "arguments": {"command": "ssh deploy@vps-server 'cd /app && git pull && docker-compose up -d'"}}]}
+{"role": "tool", "content": "Permission denied (publickey)."}
+{"role": "assistant", "content": "Permission denied. The SSH key might not be set up correctly. Let me check the SSH configuration.", "tool_calls": [{"function": "terminal", "arguments": {"command": "cat ~/.ssh/config"}}]}
+{"role": "tool", "content": "Host vps-server\n    HostName 167.99.126.228\n    User deploy\n    IdentityFile ~/.ssh/id_rsa_vps"}
+{"role": "assistant", "content": "The SSH config looks correct. Let me check if the key exists.", "tool_calls": [{"function": "terminal", "arguments": {"command": "ls -la ~/.ssh/id_rsa_vps"}}]}
+{"role": "tool", "content": "ls: /Users/user/.ssh/id_rsa_vps: No such file or directory"}
+{"role": "assistant", "content": "The SSH key doesn't exist. This is a pitfall - we need to generate the SSH key first."}
+{"role": "user", "content": "Generate a new SSH key for the VPS"}
+{"role": "assistant", "content": "I'll generate a new SSH key.", "tool_calls": [{"function": "terminal", "arguments": {"command": "ssh-keygen -t rsa -b 4096 -f ~/.ssh/id_rsa_vps -N ''"}}]}
+{"role": "tool", "content": "Generating public/private rsa key pair.\nYour identification has been saved in /Users/user/.ssh/id_rsa_vps\nYour public key has been saved in /Users/user/.ssh/id_rsa_vps.pub\nThe key fingerprint is:\nSHA256:abc123... user@machine"}
+{"role": "assistant", "content": "SSH key generated. Now I need to add it to the VPS."}
--- a/test_sessions/session_partial.jsonl
+++ b/test_sessions/session_partial.jsonl
@@ -0,0 +1,14 @@
+{"role": "user", "content": "Check the Gitea API for open issues"}
+{"role": "assistant", "content": "I'll check the Gitea API for open issues.", "tool_calls": [{"function": "terminal", "arguments": {"command": "curl -H 'Authorization: token $GITEA_TOKEN' https://forge.alexanderwhitestone.com/api/v1/repos/Timmy_Foundation/compounding-intelligence/issues?state=open"}}]}
+{"role": "tool", "content": "curl: (6) Could not resolve host: forge.alexanderwhitestone.com"}
+{"role": "assistant", "content": "DNS resolution failed. Let me check the network connectivity.", "tool_calls": [{"function": "terminal", "arguments": {"command": "nslookup forge.alexanderwhitestone.com"}}]}
+{"role": "tool", "content": "Server:		8.8.8.8\nAddress:	8.8.8.8#53\n\n** server can't find forge.alexanderwhitestone.com: NXDOMAIN"}
+{"role": "assistant", "content": "The domain doesn't resolve. This might be a DNS issue. Let me try using the IP address directly.", "tool_calls": [{"function": "terminal", "arguments": {"command": "curl -H 'Authorization: token $GITEA_TOKEN' https://167.99.126.228/api/v1/repos/Timmy_Foundation/compounding-intelligence/issues?state=open"}}]}
+{"role": "tool", "content": "curl: (60) SSL certificate problem: self signed certificate"}
+{"role": "assistant", "content": "SSL certificate issue. The server is using a self-signed certificate. Let me try with -k flag to skip SSL verification.", "tool_calls": [{"function": "terminal", "arguments": {"command": "curl -k -H 'Authorization: token $GITEA_TOKEN' https://167.99.126.228/api/v1/repos/Timmy_Foundation/compounding-intelligence/issues?state=open"}}]}
+{"role": "tool", "content": "[]"}
+{"role": "assistant", "content": "No open issues found. The API returned an empty array."}
+{"role": "user", "content": "Check if there are any pull requests"}
+{"role": "assistant", "content": "Let me check for pull requests.", "tool_calls": [{"function": "terminal", "arguments": {"command": "curl -k -H 'Authorization: token $GITEA_TOKEN' https://167.99.126.228/api/v1/repos/Timmy_Foundation/compounding-intelligence/pulls?state=open"}}]}
+{"role": "tool", "content": "[{"id": 20, "title": "feat: add harvester.py + session_reader.py — session knowledge extractor (closes #8)", "state": "open", "user": {"login": "Rockachopa"}, "created_at": "2026-04-14T17:28:20Z"}]"}
+{"role": "assistant", "content": "Found 1 open pull request: PR #20 - "feat: add harvester.py + session_reader.py — session knowledge extractor (closes #8)" by Rockachopa."}
--- a/test_sessions/session_patterns.jsonl
+++ b/test_sessions/session_patterns.jsonl
@@ -0,0 +1,10 @@
+{"role": "user", "content": "How do I deploy a new version of the application?"}
+{"role": "assistant", "content": "Here's the deployment process:\n1. Run tests: pytest\n2. Build Docker image: docker build -t app:latest .\n3. Push to registry: docker push registry.example.com/app:latest\n4. Update production: ssh prod 'cd /app && docker-compose pull && docker-compose up -d'\n5. Verify: curl https://app.example.com/health"}
+{"role": "user", "content": "What if tests fail?"}
+{"role": "assistant", "content": "If tests fail:\n1. Check the error logs: pytest -v\n2. Fix the failing tests\n3. Re-run tests: pytest\n4. If still failing, check recent changes: git log --oneline -10\n5. Consider reverting: git revert HEAD"}
+{"role": "user", "content": "What if the Docker build fails?"}
+{"role": "assistant", "content": "If Docker build fails:\n1. Check Dockerfile syntax: docker build --no-cache -t app:latest .\n2. Check for missing dependencies: cat requirements.txt\n3. Check disk space: df -h\n4. Clean up Docker: docker system prune -f\n5. Try building with verbose output: docker build --progress=plain -t app:latest ."}
+{"role": "user", "content": "What if the push to registry fails?"}
+{"role": "assistant", "content": "If push to registry fails:\n1. Check registry credentials: docker login registry.example.com\n2. Check registry space: curl https://registry.example.com/v2/_catalog\n3. Check image size: docker images app:latest\n4. Try tagging with version: docker tag app:latest registry.example.com/app:v1.0.0\n5. Push with version tag: docker push registry.example.com/app:v1.0.0"}
+{"role": "user", "content": "What if the production update fails?"}
+{"role": "assistant", "content": "If production update fails:\n1. Check SSH connection: ssh prod 'echo connected'\n2. Check disk space on prod: ssh prod 'df -h'\n3. Check Docker status: ssh prod 'docker ps -a'\n4. Check logs: ssh prod 'docker-compose logs --tail=100'\n5. Rollback: ssh prod 'cd /app && git checkout HEAD~1 && docker-compose up -d'"}
--- a/test_sessions/session_questions.jsonl
+++ b/test_sessions/session_questions.jsonl
@@ -0,0 +1,12 @@
+{"role": "user", "content": "I need to implement a new feature for the application"}
+{"role": "assistant", "content": "I can help you implement a new feature. What feature would you like to add?"}
+{"role": "user", "content": "I want to add user authentication"}
+{"role": "assistant", "content": "User authentication is a good feature. I have a few questions:\n1. What authentication method do you prefer? (JWT, OAuth, Session-based)\n2. Do you need social login? (Google, GitHub, etc.)\n3. What user data do you need to store?\n4. Do you need role-based access control?\n5. What's your database? (PostgreSQL, MySQL, SQLite)"}
+{"role": "user", "content": "I prefer JWT, no social login, just email and password, no roles, PostgreSQL"}
+{"role": "assistant", "content": "Great choices. Here's the implementation plan:\n1. Create user model with email and password hash\n2. Create JWT token generation and validation\n3. Create login endpoint: POST /api/auth/login\n4. Create register endpoint: POST /api/auth/register\n5. Create middleware for protected routes\n6. Add password hashing with bcrypt\n7. Add rate limiting for login attempts\n8. Add password reset functionality\n9. Add email verification\n10. Test all endpoints"}
+{"role": "user", "content": "What about password reset?"}
+{"role": "assistant", "content": "For password reset:\n1. Create reset token table\n2. Create forgot password endpoint: POST /api/auth/forgot-password\n3. Send reset email with token\n4. Create reset password endpoint: POST /api/auth/reset-password\n5. Validate reset token and expiration\n6. Update password and delete token\n7. Send confirmation email"}
+{"role": "user", "content": "What about email verification?"}
+{"role": "assistant", "content": "For email verification:\n1. Add email_verified field to user model\n2. Create verification token table\n3. Send verification email on registration\n4. Create verify email endpoint: GET /api/auth/verify-email?token=...\n5. Validate token and update email_verified\n6. Add middleware to check email_verified for certain routes\n7. Add resend verification email endpoint"}
+{"role": "user", "content": "What about rate limiting?"}
+{"role": "assistant", "content": "For rate limiting:\n1. Use Redis for rate limit storage\n2. Create rate limit middleware\n3. Set limits: 5 login attempts per minute, 3 password resets per hour\n4. Return 429 Too Many Requests when limit exceeded\n5. Add X-RateLimit headers to responses\n6. Log rate limit violations\n7. Add whitelist for trusted IPs"}
--- a/test_sessions/session_success.jsonl
+++ b/test_sessions/session_success.jsonl
@@ -0,0 +1,13 @@
+{"role": "user", "content": "Clone the repo at forge.alexanderwhitestone.com/Timmy_Foundation/compounding-intelligence"}
+{"role": "assistant", "content": "I'll clone the repository for you.", "tool_calls": [{"function": "terminal", "arguments": {"command": "git clone https://forge.alexanderwhitestone.com/Timmy_Foundation/compounding-intelligence.git"}}]}
+{"role": "tool", "content": "Cloning into 'compounding-intelligence'...\nremote: Enumerating objects: 15, done.\nremote: Counting objects: 100% (15/15), done.\nremote: Compressing objects: 100% (12/12), done.\nremote: Total 15 (delta 2), reused 0 (delta 0), pack-reused 0\nReceiving objects: 100% (15/15), done.\nResolving deltas: 100% (2/2), done."}
+{"role": "assistant", "content": "Repository cloned successfully. Let me check the structure.", "tool_calls": [{"function": "terminal", "arguments": {"command": "ls -la compounding-intelligence/"}}]}
+{"role": "tool", "content": "total 8\ndrwxr-xr-x   5 user  staff  160 Apr 14 17:21 .\ndrwxr-xr-x  12 user  staff  384 Apr 14 17:21 ..\ndrwxr-xr-x  13 user  staff  416 Apr 14 17:21 .git\n-rw-r--r--   1 user  staff  1234 Apr 14 17:21 README.md\ndrwxr-xr-x   3 user  staff   96 Apr 14 17:21 knowledge\n-rw-r--r--   1 user  staff   456 Apr 14 17:21 metrics\ndrwxr-xr-x   4 user  staff  128 Apr 14 17:21 scripts\ndrwxr-xr-x   3 user  staff   96 Apr 14 17:21 templates"}
+{"role": "user", "content": "What files are in the templates directory?"}
+{"role": "assistant", "content": "Let me check the templates directory.", "tool_calls": [{"function": "terminal", "arguments": {"command": "ls -la compounding-intelligence/templates/"}}]}
+{"role": "tool", "content": "total 0\ndrwxr-xr-x  3 user  staff   96 Apr 14 17:21 .\ndrwxr-xr-x  5 user  staff  160 Apr 14 17:21 ..\n-rw-r--r--  1 user  staff    0 Apr 14 17:21 .gitkeep"}
+{"role": "assistant", "content": "The templates directory currently only contains a .gitkeep file. No templates have been added yet."}
+{"role": "user", "content": "Create a harvest-prompt.md template in the templates directory."}
+{"role": "assistant", "content": "I'll create the harvest-prompt.md template for you.", "tool_calls": [{"function": "terminal", "arguments": {"command": "cat > compounding-intelligence/templates/harvest-prompt.md << 'EOF'\n# Knowledge Extraction Prompt\n\nExtract knowledge from session transcripts.\nEOF"}}]}
+{"role": "tool", "content": ""}
+{"role": "assistant", "content": "Template created successfully. The harvest-prompt.md file is now in the templates directory."}
Author	SHA1	Message	Date
Alexander Whitestone	55797c8a3e	feat: add sampler.py — session value scorer (#17 )	2026-04-15 03:02:12 +00:00
Timmy Time	8d716ff03f	Add comprehensive test script for harvest prompt validation	2026-04-14 19:02:41 +00:00
Timmy Time	920510996e	Add test session 5: Session with questions	2026-04-14 19:01:03 +00:00
Timmy Time	1fafeaf5a4	Add test session 4: Session with patterns	2026-04-14 19:01:00 +00:00
Timmy Time	36b440f998	Add test session 3: Partial session with tool quirks	2026-04-14 19:00:58 +00:00
Timmy Time	9f3caabf42	Add test session 2: Failed session with pitfalls	2026-04-14 19:00:56 +00:00
Timmy Time	a21f3a44e1	Add test session 1: Successful session	2026-04-14 18:58:05 +00:00