diff --git a/cron/jobs.json b/cron/jobs.json index 3acc6ce9..ab4aac63 100644 --- a/cron/jobs.json +++ b/cron/jobs.json @@ -168,7 +168,35 @@ "paused_reason": null, "skills": [], "skill": null + }, + { + "id": "overnight-rd-nightly", + "name": "Overnight R&D Loop", + "prompt": "Run the overnight R&D automation: Deep Dive paper synthesis, tightening loop for tool-use training data, DPO export sweep, morning briefing prep. All local inference via Ollama.", + "schedule": { + "kind": "cron", + "expr": "0 2 * * *", + "display": "0 2 * * * (10 PM EDT)" + }, + "schedule_display": "Nightly at 10 PM EDT", + "repeat": { + "times": null, + "completed": 0 + }, + "enabled": true, + "created_at": "2026-04-13T02:00:00+00:00", + "next_run_at": null, + "last_run_at": null, + "last_status": null, + "last_error": null, + "deliver": "local", + "origin": "perplexity/overnight-rd-automation", + "state": "scheduled", + "paused_at": null, + "paused_reason": null, + "skills": [], + "skill": null } ], - "updated_at": "2026-04-07T15:00:00+00:00" + "updated_at": "2026-04-13T02:00:00+00:00" } diff --git a/docs/overnight-rd.md b/docs/overnight-rd.md new file mode 100644 index 00000000..8e390cf9 --- /dev/null +++ b/docs/overnight-rd.md @@ -0,0 +1,68 @@ +# Overnight R&D Automation + +**Schedule**: Nightly at 10 PM EDT (02:00 UTC) +**Duration**: ~2-4 hours (self-limiting, finishes before 6 AM morning report) +**Cost**: $0 — all local Ollama inference + +## Phases + +### Phase 1: Deep Dive Intelligence +Runs the `intelligence/deepdive/pipeline.py` from the-nexus: +- Aggregates arXiv CS.AI, CS.CL, CS.LG RSS feeds (last 24h) +- Fetches OpenAI, Anthropic, DeepMind blog updates +- Filters for relevance using sentence-transformers embeddings +- Synthesizes a briefing using local Gemma 4 12B +- Saves briefing to `~/briefings/` + +### Phase 2: Tightening Loop +Exercises Timmy's local tool-use capability: +- 10 tasks × 3 cycles = 30 task attempts per night +- File reading, writing, searching against real workspace files +- Each result logged as JSONL for training data analysis +- Tests sovereignty compliance (SOUL.md alignment, banned provider detection) + +### Phase 3: DPO Export +Sweeps overnight Hermes sessions for training pair extraction: +- Converts good conversation pairs into DPO training format +- Saves to `~/.timmy/training-data/dpo-pairs/` + +### Phase 4: Morning Prep +Compiles overnight findings into `~/.timmy/overnight-rd/latest_summary.md` +for consumption by the 6 AM `good_morning_report` task. + +## Approved Providers + +| Slot | Provider | Model | +|------|----------|-------| +| Synthesis | Ollama | gemma4:12b | +| Tool tasks | Ollama | hermes4:14b | +| Fallback | Ollama | gemma4:12b | + +Anthropic is permanently banned (BANNED_PROVIDERS.yml, 2026-04-09). + +## Outputs + +| Path | Content | +|------|---------| +| `~/.timmy/overnight-rd/{run_id}/rd_log.jsonl` | Full task log | +| `~/.timmy/overnight-rd/{run_id}/rd_summary.md` | Run summary | +| `~/.timmy/overnight-rd/latest_summary.md` | Latest summary (for morning report) | +| `~/briefings/briefing_*.json` | Deep Dive briefings | + +## Monitoring + +Check the Huey consumer log: +```bash +tail -f ~/.timmy/timmy-config/logs/huey.log | grep overnight +``` + +Check the latest run summary: +```bash +cat ~/.timmy/overnight-rd/latest_summary.md +``` + +## Dependencies + +- Deep Dive pipeline installed: `cd the-nexus/intelligence/deepdive && make install` +- Ollama running with gemma4:12b and hermes4:14b models +- Huey consumer running: `huey_consumer.py tasks.huey -w 2 -k thread` diff --git a/tasks.py b/tasks.py index fa9018dd..c0355187 100644 --- a/tasks.py +++ b/tasks.py @@ -1755,6 +1755,27 @@ def memory_compress(): # ── NEW 6: Good Morning Report ─────────────────────────────────────── + + +def _load_overnight_rd_summary(): + """Load the latest overnight R&D summary for morning report enrichment.""" + summary_path = TIMMY_HOME / "overnight-rd" / "latest_summary.md" + if not summary_path.exists(): + return None + try: + text = summary_path.read_text() + # Only use if generated in the last 24 hours + import re + date_match = re.search(r"Started: (\d{4}-\d{2}-\d{2})", text) + if date_match: + from datetime import timedelta + summary_date = datetime.strptime(date_match.group(1), "%Y-%m-%d").date() + if (datetime.now(timezone.utc).date() - summary_date).days > 1: + return None + return text + except Exception: + return None + @huey.periodic_task(crontab(hour="6", minute="0")) # 6 AM daily def good_morning_report(): """Generate Alexander's daily morning report. Filed as a Gitea issue. @@ -2437,3 +2458,295 @@ def velocity_tracking(): msg += f" [ALERT: +{total_open - prev['total_open']} open since {prev['date']}]" print(msg) return data + + +# ── Overnight R&D Loop ────────────────────────────────────────────── +# Runs 10 PM - 6 AM EDT. Orchestrates: +# Phase 1: Deep Dive paper aggregation + relevance filtering +# Phase 2: Overnight tightening loop (tool-use capability training) +# Phase 3: DPO pair export from overnight sessions +# Phase 4: Morning briefing enrichment +# +# Provider: local Ollama (gemma4:12b for synthesis, hermes4:14b for tasks) +# Budget: $0 — all local inference + +OVERNIGHT_RD_SYSTEM_PROMPT = """You are Timmy running the overnight R&D loop. +You run locally on Ollama. Use tools when asked. Be brief and precise. +Log findings to the specified output paths. No cloud calls.""" + +OVERNIGHT_TIGHTENING_TASKS = [ + { + "id": "read-soul", + "prompt": "Read ~/.timmy/SOUL.md. Quote the first sentence of the Prime Directive.", + "toolsets": "file", + }, + { + "id": "read-operations", + "prompt": "Read ~/.timmy/OPERATIONS.md. List all section headings.", + "toolsets": "file", + }, + { + "id": "search-banned-providers", + "prompt": "Search ~/.timmy/timmy-config for files containing 'anthropic'. List filenames only.", + "toolsets": "file", + }, + { + "id": "read-config-audit", + "prompt": "Read ~/.hermes/config.yaml. What model and provider are the default? Is Anthropic present anywhere?", + "toolsets": "file", + }, + { + "id": "write-overnight-log", + "prompt": "Write a file to {results_dir}/overnight_checkpoint.md with: # Overnight Checkpoint\nTimestamp: {timestamp}\nModel: {model}\nStatus: Running\nSovereignty and service always.", + "toolsets": "file", + }, + { + "id": "search-cloud-markers", + "prompt": "Search files in ~/.hermes/bin/ for the string 'chatgpt.com'. Report which files and lines.", + "toolsets": "file", + }, + { + "id": "read-decisions", + "prompt": "Read ~/.timmy/decisions.md. What is the most recent decision?", + "toolsets": "file", + }, + { + "id": "multi-read-sovereignty", + "prompt": "Read both ~/.timmy/SOUL.md and ~/.hermes/config.yaml. Does the config honor the soul's sovereignty requirement? Yes or no with evidence.", + "toolsets": "file", + }, + { + "id": "search-hermes-skills", + "prompt": "Search for *.md files in ~/.hermes/skills/. List the first 10 skill names.", + "toolsets": "file", + }, + { + "id": "read-heartbeat", + "prompt": "Read the most recent file in ~/.timmy/heartbeat/. Summarize what Timmy perceived.", + "toolsets": "file", + }, +] + + +def _run_overnight_tightening_task(task, cycle, results_dir, model): + """Run a single tightening task through Hermes with explicit Ollama provider.""" + from datetime import datetime + task_id = task["id"] + prompt = task["prompt"].replace( + "{results_dir}", str(results_dir) + ).replace( + "{timestamp}", datetime.now().isoformat() + ).replace( + "{model}", model + ) + + result = { + "task_id": task_id, + "cycle": cycle, + "started_at": datetime.now(timezone.utc).isoformat(), + "prompt": prompt, + } + + started = time.time() + try: + hermes_result = run_hermes_local( + prompt=prompt, + model=model, + caller_tag=f"overnight-rd-{task_id}", + system_prompt=OVERNIGHT_RD_SYSTEM_PROMPT, + skip_context_files=True, + skip_memory=True, + max_iterations=5, + ) + elapsed = time.time() - started + result["elapsed_seconds"] = round(elapsed, 2) + + if hermes_result: + result["response"] = hermes_result.get("response", "")[:2000] + result["session_id"] = hermes_result.get("session_id") + result["status"] = "pass" if hermes_result.get("response") else "empty" + else: + result["status"] = "empty" + result["response"] = "" + + except Exception as exc: + result["elapsed_seconds"] = round(time.time() - started, 2) + result["status"] = "error" + result["error"] = str(exc)[:500] + + result["finished_at"] = datetime.now(timezone.utc).isoformat() + return result + + +def _run_deepdive_phase(config_path=None): + """Run the Deep Dive aggregation + synthesis pipeline. + + Uses the existing pipeline.py from the-nexus/intelligence/deepdive. + Returns path to generated briefing or None. + """ + deepdive_dir = Path.home() / "wizards" / "the-nexus" / "intelligence" / "deepdive" + deepdive_venv = Path.home() / ".venvs" / "deepdive" / "bin" / "python" + pipeline_script = deepdive_dir / "pipeline.py" + config = config_path or (deepdive_dir / "config.yaml") + + if not pipeline_script.exists(): + return {"status": "not_installed", "error": f"Pipeline not found at {pipeline_script}"} + + python_bin = str(deepdive_venv) if deepdive_venv.exists() else "python3" + + try: + result = subprocess.run( + [python_bin, str(pipeline_script), "--config", str(config), "--since", "24"], + cwd=str(deepdive_dir), + capture_output=True, + text=True, + timeout=600, # 10 minute timeout + ) + + # Find the latest briefing file + briefings_dir = Path.home() / "briefings" + briefing_files = sorted(briefings_dir.glob("briefing_*.json")) if briefings_dir.exists() else [] + latest_briefing = str(briefing_files[-1]) if briefing_files else None + + return { + "status": "ok" if result.returncode == 0 else "error", + "exit_code": result.returncode, + "stdout": result.stdout[-1000:] if result.stdout else "", + "stderr": result.stderr[-500:] if result.stderr else "", + "briefing_path": latest_briefing, + } + except subprocess.TimeoutExpired: + return {"status": "timeout", "error": "Pipeline exceeded 10 minute timeout"} + except Exception as exc: + return {"status": "error", "error": str(exc)} + + +@huey.periodic_task(crontab(hour="22", minute="0")) # 10 PM daily (server time) +def overnight_rd(): + """Overnight R&D automation loop. + + Runs from 10 PM until 6 AM. Orchestrates: + 1. Deep Dive: Aggregate papers/blogs, filter for relevance, synthesize briefing + 2. Tightening Loop: Exercise tool-use against local model for training data + 3. DPO Export: Sweep overnight sessions for training pair extraction + 4. Morning prep: Compile findings for good_morning_report enrichment + + All inference is local (Ollama). $0 cloud cost. + """ + from datetime import timedelta + + now = datetime.now(timezone.utc) + run_id = now.strftime("%Y%m%d_%H%M%S") + results_dir = TIMMY_HOME / "overnight-rd" / run_id + results_dir.mkdir(parents=True, exist_ok=True) + + rd_log = results_dir / "rd_log.jsonl" + rd_summary = results_dir / "rd_summary.md" + + phases = {} + + # ── Phase 1: Deep Dive ────────────────────────────────────────── + phase1_start = time.time() + deepdive_result = _run_deepdive_phase() + phases["deepdive"] = { + "elapsed_seconds": round(time.time() - phase1_start, 2), + **deepdive_result, + } + + # Log result + with open(rd_log, "a") as f: + f.write(json.dumps({"phase": "deepdive", "timestamp": now.isoformat(), **deepdive_result}) + "\n") + + # ── Phase 2: Tightening Loop (3 cycles) ───────────────────────── + tightening_model = "hermes4:14b" + fallback_model = "gemma4:12b" + + tightening_results = [] + max_cycles = 3 + + for cycle in range(1, max_cycles + 1): + for task in OVERNIGHT_TIGHTENING_TASKS: + model = tightening_model + result = _run_overnight_tightening_task(task, cycle, results_dir, model) + + # If primary model fails, try fallback + if result["status"] == "error" and "Unknown provider" not in result.get("error", ""): + result = _run_overnight_tightening_task(task, cycle, results_dir, fallback_model) + + tightening_results.append(result) + + with open(rd_log, "a") as f: + f.write(json.dumps(result) + "\n") + + time.sleep(2) # Pace local inference + + time.sleep(10) # Pause between cycles + + passes = sum(1 for r in tightening_results if r["status"] == "pass") + errors = sum(1 for r in tightening_results if r["status"] == "error") + total = len(tightening_results) + avg_time = sum(r.get("elapsed_seconds", 0) for r in tightening_results) / max(total, 1) + + phases["tightening"] = { + "cycles": max_cycles, + "total_tasks": total, + "passes": passes, + "errors": errors, + "avg_response_time": round(avg_time, 2), + "pass_rate": f"{100 * passes // max(total, 1)}%", + } + + # ── Phase 3: DPO Export Sweep ─────────────────────────────────── + # Trigger the existing session_export task to catch overnight sessions + try: + export_result = session_export() + phases["dpo_export"] = export_result if isinstance(export_result, dict) else {"status": "ok"} + except Exception as exc: + phases["dpo_export"] = {"status": "error", "error": str(exc)} + + # ── Phase 4: Compile Summary ──────────────────────────────────── + summary_lines = [ + f"# Overnight R&D Summary — {now.strftime('%Y-%m-%d')}", + f"Run ID: {run_id}", + f"Started: {now.isoformat()}", + f"Finished: {datetime.now(timezone.utc).isoformat()}", + "", + "## Deep Dive", + f"- Status: {phases['deepdive'].get('status', 'unknown')}", + f"- Elapsed: {phases['deepdive'].get('elapsed_seconds', '?')}s", + ] + + if phases["deepdive"].get("briefing_path"): + summary_lines.append(f"- Briefing: {phases['deepdive']['briefing_path']}") + + summary_lines.extend([ + "", + "## Tightening Loop", + f"- Cycles: {max_cycles}", + f"- Pass rate: {phases['tightening']['pass_rate']} ({passes}/{total})", + f"- Avg response time: {avg_time:.1f}s", + f"- Errors: {errors}", + "", + "## DPO Export", + f"- Status: {phases.get('dpo_export', {}).get('status', 'unknown')}", + "", + "## Error Details", + ]) + + for r in tightening_results: + if r["status"] == "error": + summary_lines.append(f"- {r['task_id']} (cycle {r['cycle']}): {r.get('error', '?')[:100]}") + + with open(rd_summary, "w") as f: + f.write("\n".join(summary_lines) + "\n") + + # Save summary for morning report consumption + latest_summary = TIMMY_HOME / "overnight-rd" / "latest_summary.md" + with open(latest_summary, "w") as f: + f.write("\n".join(summary_lines) + "\n") + + return { + "run_id": run_id, + "phases": phases, + "summary_path": str(rd_summary), + }