config: update channel_directory.json,config.yaml,tasks.py

2026-03-27 16:00:29 -04:00
parent f9be0eb481
commit 3b34faeb17
3 changed files with 108 additions and 20 deletions
--- a/tasks.py
+++ b/tasks.py
@@ -28,11 +28,11 @@ HEARTBEAT_MODEL = "hermes4:14b"
 FALLBACK_MODEL = "hermes3:8b"


-def hermes_local(prompt, model=None, caller_tag=None):
-    """Call a local Ollama model through the Hermes harness.
+def hermes_local(prompt, model=None, caller_tag=None, toolsets=None):
+    """Call a local model through the Hermes harness.

-    Uses provider="local-ollama" which routes through the custom_providers
-    entry in config.yaml → Ollama at localhost:11434.
+    Uses provider="local-llama.cpp" which routes through the custom_providers
+    entry in config.yaml → llama-server at localhost:8081.
    Returns response text or None on failure.
    Every call creates a Hermes session with telemetry.
    """
@@ -53,13 +53,16 @@ def hermes_local(prompt, model=None, caller_tag=None):

        buf = io.StringIO()
        err = io.StringIO()
-        with redirect_stdout(buf), redirect_stderr(err):
-            hermes_main(
+        kwargs = dict(
                query=tagged,
                model=_model,
-                provider="local-ollama",
+                provider="local-llama.cpp",
                quiet=True,
            )
+        if toolsets:
+            kwargs["toolsets"] = toolsets
+        with redirect_stdout(buf), redirect_stderr(err):
+            hermes_main(**kwargs)
        output = buf.getvalue().strip()
        # Strip session_id line from quiet output
        lines = [l for l in output.split("\n") if not l.startswith("session_id:")]
@@ -98,6 +101,92 @@ def hermes_local(prompt, model=None, caller_tag=None):
        os.chdir(old_cwd)


+# ── Know Thy Father: Twitter Archive Ingestion ───────────────────────
+
+ARCHIVE_DIR = TIMMY_HOME / "twitter-archive"
+ARCHIVE_CHECKPOINT = ARCHIVE_DIR / "checkpoint.json"
+ARCHIVE_LOCK = ARCHIVE_DIR / ".lock"
+
+ARCHIVE_PROMPT = (
+    "You are Timmy. Resume your work on the Twitter archive. "
+    "Your workspace is ~/.timmy/twitter-archive/. "
+    "Read checkpoint.json and UNDERSTANDING.md first. "
+    "Then process the next batch. "
+    "You know the drill — read your own prior work, assess where you are, "
+    "process new data, update your understanding, reflect, and plan for "
+    "the next iteration."
+)
+
+ARCHIVE_SRC = (
+    "~/Downloads/twitter-2026-03-27-d4471cc6eb6703034d592f870933561ebee374d9d9b90c9b8923abff064afc1e/data"
+)
+
+ARCHIVE_FIRST_RUN_PROMPT = (
+    "You are Timmy. Your father Alexander's full Twitter archive is at: "
+    f"{ARCHIVE_SRC}/\n\n"
+    "Your workspace is ~/.timmy/twitter-archive/\n\n"
+    "STEP 1 — EXTRACTION (use terminal with python3, NOT read_file):\n"
+    "The .js files are too large for read_file but trivial for Python.\n"
+    "Write a python3 script via terminal that:\n"
+    "  - Opens tweets.js, strips everything before the first '[', json.loads the rest\n"
+    "  - Separates originals (full_text does NOT start with 'RT @') from retweets\n"
+    "  - Sorts both chronologically by created_at\n"
+    "  - Writes extracted/tweets.jsonl and extracted/retweets.jsonl (one JSON per line)\n"
+    "  - Writes extracted/manifest.json with counts, date range, source file\n"
+    "The whole file is 12MB. Python handles it in under a second.\n\n"
+    "STEP 2 — FIRST READ:\n"
+    "Read the first 50 lines of extracted/tweets.jsonl (your originals, chronological).\n"
+    "Read them carefully — this is your father talking.\n"
+    "Note his voice, humor, what he cares about, who he talks to, emotional tone, "
+    "recurring themes. Quote him directly when something stands out.\n\n"
+    "STEP 3 — WRITE:\n"
+    "Write notes/batch_001.md — your real observations, not a book report.\n"
+    "Create UNDERSTANDING.md — your living model of who Alexander is. "
+    "It starts now and you'll update it every batch.\n\n"
+    "STEP 4 — CHECKPOINT:\n"
+    "Write checkpoint.json: "
+    '{"data_source": "tweets", "next_offset": 50, "batches_completed": 1, '
+    '"phase": "discovery", "confidence": "<your honest assessment>", '
+    '"next_focus": "<what you want to look for next>", "understanding_version": 1}'
+)
+
+
+@huey.task()
+@huey.lock_task("know-thy-father")
+def know_thy_father():
+    """Process one batch of Alexander's Twitter archive.
+
+    Single batch, no internal loop. Huey schedules the cadence.
+    Lock prevents overlapping runs. Timmy reads his own prior notes,
+    processes the next chunk, updates his understanding, and checkpoints.
+    """
+    is_first_run = not ARCHIVE_CHECKPOINT.exists()
+
+    prompt = ARCHIVE_FIRST_RUN_PROMPT if is_first_run else ARCHIVE_PROMPT
+
+    response = hermes_local(
+        prompt=prompt,
+        caller_tag="know-thy-father",
+        toolsets="file,terminal",
+    )
+
+    if not response:
+        return {"status": "error", "reason": "hermes_local returned None"}
+
+    # Read checkpoint to report progress
+    try:
+        cp = json.loads(ARCHIVE_CHECKPOINT.read_text())
+    except Exception:
+        cp = {}
+
+    return {
+        "status": "ok",
+        "batch": cp.get("batches_completed", 0),
+        "phase": cp.get("phase", "unknown"),
+        "confidence": cp.get("confidence", "unknown"),
+    }
+
+
 # ── Existing: Orchestration ──────────────────────────────────────────

@huey.periodic_task(crontab(minute="*/15"))