Compare commits
7 Commits
perplexity
...
feat/20260
| Author | SHA1 | Date | |
|---|---|---|---|
| 24887b615f | |||
| 1e43776be1 | |||
| e53fdd0f49 | |||
| aeefe5027d | |||
| 989bc29c96 | |||
| d923b9e38a | |||
| 22c4bb57fe |
82
bin/banned_provider_scan.py
Normal file
82
bin/banned_provider_scan.py
Normal file
@@ -0,0 +1,82 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Anthropic Ban Enforcement Scanner.
|
||||
|
||||
Scans all config files, scripts, and playbooks for any references to
|
||||
banned Anthropic providers, models, or API keys.
|
||||
|
||||
Policy: Anthropic is permanently banned (2026-04-09).
|
||||
Refs: ansible/BANNED_PROVIDERS.yml
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
BANNED_PATTERNS = [
|
||||
r"anthropic",
|
||||
r"claude-sonnet",
|
||||
r"claude-opus",
|
||||
r"claude-haiku",
|
||||
r"claude-\d",
|
||||
r"api\.anthropic\.com",
|
||||
r"ANTHROPIC_API_KEY",
|
||||
r"CLAUDE_API_KEY",
|
||||
r"sk-ant-",
|
||||
]
|
||||
|
||||
ALLOWLIST_FILES = {
|
||||
"ansible/BANNED_PROVIDERS.yml", # The ban list itself
|
||||
"bin/banned_provider_scan.py", # This scanner
|
||||
"DEPRECATED.md", # Historical references
|
||||
}
|
||||
|
||||
SCAN_EXTENSIONS = {".py", ".yml", ".yaml", ".json", ".sh", ".toml", ".cfg", ".md"}
|
||||
|
||||
|
||||
def scan_file(filepath: str) -> list[tuple[int, str, str]]:
|
||||
"""Return list of (line_num, pattern_matched, line_text) violations."""
|
||||
violations = []
|
||||
try:
|
||||
with open(filepath, "r", errors="replace") as f:
|
||||
for i, line in enumerate(f, 1):
|
||||
for pattern in BANNED_PATTERNS:
|
||||
if re.search(pattern, line, re.IGNORECASE):
|
||||
violations.append((i, pattern, line.strip()))
|
||||
break
|
||||
except (OSError, UnicodeDecodeError):
|
||||
pass
|
||||
return violations
|
||||
|
||||
|
||||
def main():
|
||||
root = Path(os.environ.get("SCAN_ROOT", "."))
|
||||
total_violations = 0
|
||||
scanned = 0
|
||||
|
||||
for ext in SCAN_EXTENSIONS:
|
||||
for filepath in root.rglob(f"*{ext}"):
|
||||
rel = str(filepath.relative_to(root))
|
||||
if rel in ALLOWLIST_FILES:
|
||||
continue
|
||||
if ".git" in filepath.parts:
|
||||
continue
|
||||
|
||||
violations = scan_file(str(filepath))
|
||||
scanned += 1
|
||||
if violations:
|
||||
total_violations += len(violations)
|
||||
for line_num, pattern, text in violations:
|
||||
print(f"VIOLATION: {rel}:{line_num} [{pattern}] {text[:120]}")
|
||||
|
||||
print(f"\nScanned {scanned} files. Found {total_violations} violations.")
|
||||
|
||||
if total_violations > 0:
|
||||
print("\n❌ BANNED PROVIDER REFERENCES DETECTED. Fix before merging.")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\n✓ No banned provider references found.")
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -7,7 +7,7 @@
|
||||
7|common sense fallbacks automatically.
|
||||
8|
|
||||
9|Fallback chain:
|
||||
10|1. Primary model (Anthropic) down -> switch config to local-llama.cpp
|
||||
10|1. Primary model (Kimi) down -> switch config to local-llama.cpp
|
||||
11|2. Gitea unreachable -> cache issues locally, retry on recovery
|
||||
12|3. VPS agents down -> alert + lazarus protocol
|
||||
13|4. Local llama.cpp down -> try Ollama, then alert-only mode
|
||||
@@ -61,16 +61,16 @@
|
||||
61|
|
||||
62|# ─── HEALTH CHECKS ───
|
||||
63|
|
||||
64|def check_anthropic():
|
||||
65| """Can we reach Anthropic API?"""
|
||||
66| key = os.environ.get("ANTHROPIC_API_KEY", "")
|
||||
64|def check_kimi():
|
||||
65| """Can we reach Kimi Coding API?"""
|
||||
66| key = os.environ.get("KIMI_API_KEY", "")
|
||||
67| if not key:
|
||||
68| # Check multiple .env locations
|
||||
69| for env_path in [HERMES_HOME / ".env", Path.home() / ".hermes" / ".env"]:
|
||||
70| if env_path.exists():
|
||||
71| for line in open(env_path):
|
||||
72| line = line.strip()
|
||||
73| if line.startswith("ANTHROPIC_API_KEY=***
|
||||
73| if line.startswith("KIMI_API_KEY=***
|
||||
74| key = line.split("=", 1)[1].strip().strip('"').strip("'")
|
||||
75| break
|
||||
76| if key:
|
||||
@@ -79,10 +79,10 @@
|
||||
79| return False, "no API key"
|
||||
80| code, out, err = run(
|
||||
81| f'curl -s -o /dev/null -w "%{{http_code}}" -H "x-api-key: {key}" '
|
||||
82| f'-H "anthropic-version: 2023-06-01" '
|
||||
83| f'https://api.anthropic.com/v1/messages -X POST '
|
||||
82| f'-H "x-api-provider: kimi-coding" '
|
||||
83| f'https://api.kimi.com/coding/v1/models -X POST '
|
||||
84| f'-H "content-type: application/json" '
|
||||
85| f'-d \'{{"model":"claude-haiku-4-5-20251001","max_tokens":1,"messages":[{{"role":"user","content":"ping"}}]}}\' ',
|
||||
85| f'-d \'{{"model":"kimi-k2.5","max_tokens":1,"messages":[{{"role":"user","content":"ping"}}]}}\' ',
|
||||
86| timeout=15
|
||||
87| )
|
||||
88| if code == 0 and out in ("200", "429"):
|
||||
@@ -128,7 +128,7 @@
|
||||
128|# ─── FALLBACK ACTIONS ───
|
||||
129|
|
||||
130|def fallback_to_local_model(cfg):
|
||||
131| """Switch primary model from Anthropic to local llama.cpp"""
|
||||
131| """Switch primary model from Kimi to local llama.cpp"""
|
||||
132| if not BACKUP_CONFIG.exists():
|
||||
133| shutil.copy2(CONFIG_PATH, BACKUP_CONFIG)
|
||||
134|
|
||||
@@ -176,8 +176,8 @@
|
||||
176| }
|
||||
177|
|
||||
178| # Check all systems
|
||||
179| anthropic_ok, anthropic_msg = check_anthropic()
|
||||
180| results["checks"]["anthropic"] = {"ok": anthropic_ok, "msg": anthropic_msg}
|
||||
179| kimi_ok, kimi_msg = check_kimi()
|
||||
180| results["checks"]["kimi-coding"] = {"ok": kimi_ok, "msg": kimi_msg}
|
||||
181|
|
||||
182| llama_ok, llama_msg = check_local_llama()
|
||||
183| results["checks"]["local_llama"] = {"ok": llama_ok, "msg": llama_msg}
|
||||
@@ -198,21 +198,21 @@
|
||||
198| vps_ok, vps_msg = check_vps(ip, name)
|
||||
199| results["checks"][f"vps_{name.lower()}"] = {"ok": vps_ok, "msg": vps_msg}
|
||||
200|
|
||||
201| current_provider = cfg.get("model", {}).get("provider", "anthropic")
|
||||
201| current_provider = cfg.get("model", {}).get("provider", "kimi-coding")
|
||||
202|
|
||||
203| # ─── FALLBACK LOGIC ───
|
||||
204|
|
||||
205| # Case 1: Primary (Anthropic) down, local available
|
||||
206| if not anthropic_ok and current_provider == "anthropic":
|
||||
205| # Case 1: Primary (Kimi) down, local available
|
||||
206| if not kimi_ok and current_provider == "kimi-coding":
|
||||
207| if llama_ok:
|
||||
208| msg = fallback_to_local_model(cfg)
|
||||
209| results["actions"].append(msg)
|
||||
210| state["active_fallbacks"].append("anthropic->local-llama")
|
||||
210| state["active_fallbacks"].append("kimi->local-llama")
|
||||
211| results["status"] = "degraded_local"
|
||||
212| elif ollama_ok:
|
||||
213| msg = fallback_to_ollama(cfg)
|
||||
214| results["actions"].append(msg)
|
||||
215| state["active_fallbacks"].append("anthropic->ollama")
|
||||
215| state["active_fallbacks"].append("kimi->ollama")
|
||||
216| results["status"] = "degraded_ollama"
|
||||
217| else:
|
||||
218| msg = enter_safe_mode(state)
|
||||
@@ -220,15 +220,15 @@
|
||||
220| results["status"] = "safe_mode"
|
||||
221|
|
||||
222| # Case 2: Already on fallback, check if primary recovered
|
||||
223| elif anthropic_ok and "anthropic->local-llama" in state.get("active_fallbacks", []):
|
||||
223| elif kimi_ok and "kimi->local-llama" in state.get("active_fallbacks", []):
|
||||
224| msg = restore_config()
|
||||
225| results["actions"].append(msg)
|
||||
226| state["active_fallbacks"].remove("anthropic->local-llama")
|
||||
226| state["active_fallbacks"].remove("kimi->local-llama")
|
||||
227| results["status"] = "recovered"
|
||||
228| elif anthropic_ok and "anthropic->ollama" in state.get("active_fallbacks", []):
|
||||
228| elif kimi_ok and "kimi->ollama" in state.get("active_fallbacks", []):
|
||||
229| msg = restore_config()
|
||||
230| results["actions"].append(msg)
|
||||
231| state["active_fallbacks"].remove("anthropic->ollama")
|
||||
231| state["active_fallbacks"].remove("kimi->ollama")
|
||||
232| results["status"] = "recovered"
|
||||
233|
|
||||
234| # Case 3: Gitea down — just flag it, work locally
|
||||
|
||||
@@ -19,25 +19,25 @@ PASS=0
|
||||
FAIL=0
|
||||
WARN=0
|
||||
|
||||
check_anthropic_model() {
|
||||
check_kimi_model() {
|
||||
local model="$1"
|
||||
local label="$2"
|
||||
local api_key="${ANTHROPIC_API_KEY:-}"
|
||||
local api_key="${KIMI_API_KEY:-}"
|
||||
|
||||
if [ -z "$api_key" ]; then
|
||||
# Try loading from .env
|
||||
api_key=$(grep '^ANTHROPIC_API_KEY=' "${HERMES_HOME:-$HOME/.hermes}/.env" 2>/dev/null | head -1 | cut -d= -f2- | tr -d "'\"" || echo "")
|
||||
api_key=$(grep '^KIMI_API_KEY=' "${HERMES_HOME:-$HOME/.hermes}/.env" 2>/dev/null | head -1 | cut -d= -f2- | tr -d "'\"" || echo "")
|
||||
fi
|
||||
|
||||
if [ -z "$api_key" ]; then
|
||||
log "SKIP [$label] $model -- no ANTHROPIC_API_KEY"
|
||||
log "SKIP [$label] $model -- no KIMI_API_KEY"
|
||||
return 0
|
||||
fi
|
||||
|
||||
response=$(curl -sf --max-time 10 -X POST \
|
||||
"https://api.anthropic.com/v1/messages" \
|
||||
"https://api.kimi.com/coding/v1/chat/completions" \
|
||||
-H "x-api-key: ${api_key}" \
|
||||
-H "anthropic-version: 2023-06-01" \
|
||||
-H "x-api-provider: kimi-coding" \
|
||||
-H "content-type: application/json" \
|
||||
-d "{\"model\":\"${model}\",\"max_tokens\":1,\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}" 2>&1 || echo "ERROR")
|
||||
|
||||
@@ -85,26 +85,26 @@ else:
|
||||
print('')
|
||||
" 2>/dev/null || echo "")
|
||||
|
||||
if [ -n "$primary" ] && [ "$provider" = "anthropic" ]; then
|
||||
if check_anthropic_model "$primary" "PRIMARY"; then
|
||||
if [ -n "$primary" ] && [ "$provider" = "kimi-coding" ]; then
|
||||
if check_kimi_model "$primary" "PRIMARY"; then
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
rc=$?
|
||||
if [ "$rc" -eq 1 ]; then
|
||||
FAIL=$((FAIL + 1))
|
||||
log "CRITICAL: Primary model $primary is DEAD. Loops will fail."
|
||||
log "Known good alternatives: claude-opus-4.6, claude-haiku-4-5-20251001"
|
||||
log "Known good alternatives: kimi-k2.5, google/gemini-2.5-pro"
|
||||
else
|
||||
WARN=$((WARN + 1))
|
||||
fi
|
||||
fi
|
||||
elif [ -n "$primary" ]; then
|
||||
log "SKIP [PRIMARY] $primary -- non-anthropic provider ($provider), no validator yet"
|
||||
log "SKIP [PRIMARY] $primary -- non-kimi provider ($provider), no validator yet"
|
||||
fi
|
||||
|
||||
# Cron model check (haiku)
|
||||
CRON_MODEL="claude-haiku-4-5-20251001"
|
||||
if check_anthropic_model "$CRON_MODEL" "CRON"; then
|
||||
CRON_MODEL="kimi-k2.5"
|
||||
if check_kimi_model "$CRON_MODEL" "CRON"; then
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
rc=$?
|
||||
|
||||
@@ -168,7 +168,35 @@
|
||||
"paused_reason": null,
|
||||
"skills": [],
|
||||
"skill": null
|
||||
},
|
||||
{
|
||||
"id": "overnight-rd-nightly",
|
||||
"name": "Overnight R&D Loop",
|
||||
"prompt": "Run the overnight R&D automation: Deep Dive paper synthesis, tightening loop for tool-use training data, DPO export sweep, morning briefing prep. All local inference via Ollama.",
|
||||
"schedule": {
|
||||
"kind": "cron",
|
||||
"expr": "0 2 * * *",
|
||||
"display": "0 2 * * * (10 PM EDT)"
|
||||
},
|
||||
"schedule_display": "Nightly at 10 PM EDT",
|
||||
"repeat": {
|
||||
"times": null,
|
||||
"completed": 0
|
||||
},
|
||||
"enabled": true,
|
||||
"created_at": "2026-04-13T02:00:00+00:00",
|
||||
"next_run_at": null,
|
||||
"last_run_at": null,
|
||||
"last_status": null,
|
||||
"last_error": null,
|
||||
"deliver": "local",
|
||||
"origin": "perplexity/overnight-rd-automation",
|
||||
"state": "scheduled",
|
||||
"paused_at": null,
|
||||
"paused_reason": null,
|
||||
"skills": [],
|
||||
"skill": null
|
||||
}
|
||||
],
|
||||
"updated_at": "2026-04-07T15:00:00+00:00"
|
||||
"updated_at": "2026-04-13T02:00:00+00:00"
|
||||
}
|
||||
|
||||
68
docs/overnight-rd.md
Normal file
68
docs/overnight-rd.md
Normal file
@@ -0,0 +1,68 @@
|
||||
# Overnight R&D Automation
|
||||
|
||||
**Schedule**: Nightly at 10 PM EDT (02:00 UTC)
|
||||
**Duration**: ~2-4 hours (self-limiting, finishes before 6 AM morning report)
|
||||
**Cost**: $0 — all local Ollama inference
|
||||
|
||||
## Phases
|
||||
|
||||
### Phase 1: Deep Dive Intelligence
|
||||
Runs the `intelligence/deepdive/pipeline.py` from the-nexus:
|
||||
- Aggregates arXiv CS.AI, CS.CL, CS.LG RSS feeds (last 24h)
|
||||
- Fetches OpenAI, Anthropic, DeepMind blog updates
|
||||
- Filters for relevance using sentence-transformers embeddings
|
||||
- Synthesizes a briefing using local Gemma 4 12B
|
||||
- Saves briefing to `~/briefings/`
|
||||
|
||||
### Phase 2: Tightening Loop
|
||||
Exercises Timmy's local tool-use capability:
|
||||
- 10 tasks × 3 cycles = 30 task attempts per night
|
||||
- File reading, writing, searching against real workspace files
|
||||
- Each result logged as JSONL for training data analysis
|
||||
- Tests sovereignty compliance (SOUL.md alignment, banned provider detection)
|
||||
|
||||
### Phase 3: DPO Export
|
||||
Sweeps overnight Hermes sessions for training pair extraction:
|
||||
- Converts good conversation pairs into DPO training format
|
||||
- Saves to `~/.timmy/training-data/dpo-pairs/`
|
||||
|
||||
### Phase 4: Morning Prep
|
||||
Compiles overnight findings into `~/.timmy/overnight-rd/latest_summary.md`
|
||||
for consumption by the 6 AM `good_morning_report` task.
|
||||
|
||||
## Approved Providers
|
||||
|
||||
| Slot | Provider | Model |
|
||||
|------|----------|-------|
|
||||
| Synthesis | Ollama | gemma4:12b |
|
||||
| Tool tasks | Ollama | hermes4:14b |
|
||||
| Fallback | Ollama | gemma4:12b |
|
||||
|
||||
Anthropic is permanently banned (BANNED_PROVIDERS.yml, 2026-04-09).
|
||||
|
||||
## Outputs
|
||||
|
||||
| Path | Content |
|
||||
|------|---------|
|
||||
| `~/.timmy/overnight-rd/{run_id}/rd_log.jsonl` | Full task log |
|
||||
| `~/.timmy/overnight-rd/{run_id}/rd_summary.md` | Run summary |
|
||||
| `~/.timmy/overnight-rd/latest_summary.md` | Latest summary (for morning report) |
|
||||
| `~/briefings/briefing_*.json` | Deep Dive briefings |
|
||||
|
||||
## Monitoring
|
||||
|
||||
Check the Huey consumer log:
|
||||
```bash
|
||||
tail -f ~/.timmy/timmy-config/logs/huey.log | grep overnight
|
||||
```
|
||||
|
||||
Check the latest run summary:
|
||||
```bash
|
||||
cat ~/.timmy/overnight-rd/latest_summary.md
|
||||
```
|
||||
|
||||
## Dependencies
|
||||
|
||||
- Deep Dive pipeline installed: `cd the-nexus/intelligence/deepdive && make install`
|
||||
- Ollama running with gemma4:12b and hermes4:14b models
|
||||
- Huey consumer running: `huey_consumer.py tasks.huey -w 2 -k thread`
|
||||
@@ -2,135 +2,128 @@ schema_version: 1
|
||||
status: proposed
|
||||
runtime_wiring: false
|
||||
owner: timmy-config
|
||||
|
||||
ownership:
|
||||
owns:
|
||||
- routing doctrine for task classes
|
||||
- sidecar-readable per-agent fallback portfolios
|
||||
- degraded-mode capability floors
|
||||
- routing doctrine for task classes
|
||||
- sidecar-readable per-agent fallback portfolios
|
||||
- degraded-mode capability floors
|
||||
does_not_own:
|
||||
- live queue state outside Gitea truth
|
||||
- launchd or loop process state
|
||||
- ad hoc worktree history
|
||||
|
||||
- live queue state outside Gitea truth
|
||||
- launchd or loop process state
|
||||
- ad hoc worktree history
|
||||
policy:
|
||||
require_four_slots_for_critical_agents: true
|
||||
terminal_fallback_must_be_usable: true
|
||||
forbid_synchronized_fleet_degradation: true
|
||||
forbid_human_token_fallbacks: true
|
||||
anti_correlation_rule: no two critical agents may share the same primary+fallback1 pair
|
||||
|
||||
sensitive_control_surfaces:
|
||||
- SOUL.md
|
||||
- config.yaml
|
||||
- deploy.sh
|
||||
- tasks.py
|
||||
- playbooks/
|
||||
- cron/
|
||||
- memories/
|
||||
- skins/
|
||||
- training/
|
||||
|
||||
- SOUL.md
|
||||
- config.yaml
|
||||
- deploy.sh
|
||||
- tasks.py
|
||||
- playbooks/
|
||||
- cron/
|
||||
- memories/
|
||||
- skins/
|
||||
- training/
|
||||
role_classes:
|
||||
judgment:
|
||||
current_surfaces:
|
||||
- playbooks/issue-triager.yaml
|
||||
- playbooks/pr-reviewer.yaml
|
||||
- playbooks/verified-logic.yaml
|
||||
- playbooks/issue-triager.yaml
|
||||
- playbooks/pr-reviewer.yaml
|
||||
- playbooks/verified-logic.yaml
|
||||
task_classes:
|
||||
- issue-triage
|
||||
- queue-routing
|
||||
- pr-review
|
||||
- proof-check
|
||||
- governance-review
|
||||
- issue-triage
|
||||
- queue-routing
|
||||
- pr-review
|
||||
- proof-check
|
||||
- governance-review
|
||||
degraded_mode:
|
||||
fallback2:
|
||||
allowed:
|
||||
- classify backlog
|
||||
- summarize risk
|
||||
- produce draft routing plans
|
||||
- leave bounded labels or comments with evidence
|
||||
- classify backlog
|
||||
- summarize risk
|
||||
- produce draft routing plans
|
||||
- leave bounded labels or comments with evidence
|
||||
denied:
|
||||
- merge pull requests
|
||||
- close or rewrite governing issues or PRs
|
||||
- mutate sensitive control surfaces
|
||||
- bulk-reassign the fleet
|
||||
- silently change routing policy
|
||||
- merge pull requests
|
||||
- close or rewrite governing issues or PRs
|
||||
- mutate sensitive control surfaces
|
||||
- bulk-reassign the fleet
|
||||
- silently change routing policy
|
||||
terminal:
|
||||
lane: report-and-route
|
||||
allowed:
|
||||
- classify backlog
|
||||
- summarize risk
|
||||
- produce draft routing artifacts
|
||||
- classify backlog
|
||||
- summarize risk
|
||||
- produce draft routing artifacts
|
||||
denied:
|
||||
- merge pull requests
|
||||
- bulk-reassign the fleet
|
||||
- mutate sensitive control surfaces
|
||||
|
||||
- merge pull requests
|
||||
- bulk-reassign the fleet
|
||||
- mutate sensitive control surfaces
|
||||
builder:
|
||||
current_surfaces:
|
||||
- playbooks/bug-fixer.yaml
|
||||
- playbooks/test-writer.yaml
|
||||
- playbooks/refactor-specialist.yaml
|
||||
- playbooks/bug-fixer.yaml
|
||||
- playbooks/test-writer.yaml
|
||||
- playbooks/refactor-specialist.yaml
|
||||
task_classes:
|
||||
- bug-fix
|
||||
- test-writing
|
||||
- refactor
|
||||
- bounded-docs-change
|
||||
- bug-fix
|
||||
- test-writing
|
||||
- refactor
|
||||
- bounded-docs-change
|
||||
degraded_mode:
|
||||
fallback2:
|
||||
allowed:
|
||||
- reversible single-issue changes
|
||||
- narrow docs fixes
|
||||
- test scaffolds and reproducers
|
||||
- reversible single-issue changes
|
||||
- narrow docs fixes
|
||||
- test scaffolds and reproducers
|
||||
denied:
|
||||
- cross-repo changes
|
||||
- sensitive control-surface edits
|
||||
- merge or release actions
|
||||
- cross-repo changes
|
||||
- sensitive control-surface edits
|
||||
- merge or release actions
|
||||
terminal:
|
||||
lane: narrow-patch
|
||||
allowed:
|
||||
- single-issue small patch
|
||||
- reproducer test
|
||||
- docs-only repair
|
||||
- single-issue small patch
|
||||
- reproducer test
|
||||
- docs-only repair
|
||||
denied:
|
||||
- sensitive control-surface edits
|
||||
- multi-file architecture work
|
||||
- irreversible actions
|
||||
|
||||
- sensitive control-surface edits
|
||||
- multi-file architecture work
|
||||
- irreversible actions
|
||||
wolf_bulk:
|
||||
current_surfaces:
|
||||
- docs/automation-inventory.md
|
||||
- FALSEWORK.md
|
||||
- docs/automation-inventory.md
|
||||
- FALSEWORK.md
|
||||
task_classes:
|
||||
- docs-inventory
|
||||
- log-summarization
|
||||
- queue-hygiene
|
||||
- repetitive-small-diff
|
||||
- research-sweep
|
||||
- docs-inventory
|
||||
- log-summarization
|
||||
- queue-hygiene
|
||||
- repetitive-small-diff
|
||||
- research-sweep
|
||||
degraded_mode:
|
||||
fallback2:
|
||||
allowed:
|
||||
- gather evidence
|
||||
- refresh inventories
|
||||
- summarize logs
|
||||
- propose labels or routes
|
||||
- gather evidence
|
||||
- refresh inventories
|
||||
- summarize logs
|
||||
- propose labels or routes
|
||||
denied:
|
||||
- multi-repo branch fanout
|
||||
- mass agent assignment
|
||||
- sensitive control-surface edits
|
||||
- irreversible queue mutation
|
||||
- multi-repo branch fanout
|
||||
- mass agent assignment
|
||||
- sensitive control-surface edits
|
||||
- irreversible queue mutation
|
||||
terminal:
|
||||
lane: gather-and-summarize
|
||||
allowed:
|
||||
- inventory refresh
|
||||
- evidence bundles
|
||||
- summaries
|
||||
- inventory refresh
|
||||
- evidence bundles
|
||||
- summaries
|
||||
denied:
|
||||
- multi-repo branch fanout
|
||||
- mass agent assignment
|
||||
- sensitive control-surface edits
|
||||
|
||||
- multi-repo branch fanout
|
||||
- mass agent assignment
|
||||
- sensitive control-surface edits
|
||||
routing:
|
||||
issue-triage: judgment
|
||||
queue-routing: judgment
|
||||
@@ -146,22 +139,20 @@ routing:
|
||||
queue-hygiene: wolf_bulk
|
||||
repetitive-small-diff: wolf_bulk
|
||||
research-sweep: wolf_bulk
|
||||
|
||||
promotion_rules:
|
||||
- If a wolf/bulk task touches a sensitive control surface, promote it to judgment.
|
||||
- If a builder task expands beyond 5 files, architecture review, or multi-repo coordination, promote it to judgment.
|
||||
- If a terminal lane cannot produce a usable artifact, the portfolio is invalid and must be redesigned before wiring.
|
||||
|
||||
- If a wolf/bulk task touches a sensitive control surface, promote it to judgment.
|
||||
- If a builder task expands beyond 5 files, architecture review, or multi-repo coordination, promote it to judgment.
|
||||
- If a terminal lane cannot produce a usable artifact, the portfolio is invalid and must be redesigned before wiring.
|
||||
agents:
|
||||
triage-coordinator:
|
||||
role_class: judgment
|
||||
critical: true
|
||||
current_playbooks:
|
||||
- playbooks/issue-triager.yaml
|
||||
- playbooks/issue-triager.yaml
|
||||
portfolio:
|
||||
primary:
|
||||
provider: anthropic
|
||||
model: claude-opus-4-6
|
||||
provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
lane: full-judgment
|
||||
fallback1:
|
||||
provider: openai-codex
|
||||
@@ -177,19 +168,18 @@ agents:
|
||||
lane: report-and-route
|
||||
local_capable: true
|
||||
usable_output:
|
||||
- backlog classification
|
||||
- routing draft
|
||||
- risk summary
|
||||
|
||||
- backlog classification
|
||||
- routing draft
|
||||
- risk summary
|
||||
pr-reviewer:
|
||||
role_class: judgment
|
||||
critical: true
|
||||
current_playbooks:
|
||||
- playbooks/pr-reviewer.yaml
|
||||
- playbooks/pr-reviewer.yaml
|
||||
portfolio:
|
||||
primary:
|
||||
provider: anthropic
|
||||
model: claude-opus-4-6
|
||||
provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
lane: full-review
|
||||
fallback1:
|
||||
provider: gemini
|
||||
@@ -205,17 +195,16 @@ agents:
|
||||
lane: low-stakes-diff-summary
|
||||
local_capable: false
|
||||
usable_output:
|
||||
- diff risk summary
|
||||
- explicit uncertainty notes
|
||||
- merge-block recommendation
|
||||
|
||||
- diff risk summary
|
||||
- explicit uncertainty notes
|
||||
- merge-block recommendation
|
||||
builder-main:
|
||||
role_class: builder
|
||||
critical: true
|
||||
current_playbooks:
|
||||
- playbooks/bug-fixer.yaml
|
||||
- playbooks/test-writer.yaml
|
||||
- playbooks/refactor-specialist.yaml
|
||||
- playbooks/bug-fixer.yaml
|
||||
- playbooks/test-writer.yaml
|
||||
- playbooks/refactor-specialist.yaml
|
||||
portfolio:
|
||||
primary:
|
||||
provider: openai-codex
|
||||
@@ -236,15 +225,14 @@ agents:
|
||||
lane: narrow-patch
|
||||
local_capable: true
|
||||
usable_output:
|
||||
- small patch
|
||||
- reproducer test
|
||||
- docs repair
|
||||
|
||||
- small patch
|
||||
- reproducer test
|
||||
- docs repair
|
||||
wolf-sweeper:
|
||||
role_class: wolf_bulk
|
||||
critical: true
|
||||
current_world_state:
|
||||
- docs/automation-inventory.md
|
||||
- docs/automation-inventory.md
|
||||
portfolio:
|
||||
primary:
|
||||
provider: gemini
|
||||
@@ -264,21 +252,20 @@ agents:
|
||||
lane: gather-and-summarize
|
||||
local_capable: true
|
||||
usable_output:
|
||||
- inventory refresh
|
||||
- evidence bundle
|
||||
- summary comment
|
||||
|
||||
- inventory refresh
|
||||
- evidence bundle
|
||||
- summary comment
|
||||
cross_checks:
|
||||
unique_primary_fallback1_pairs:
|
||||
triage-coordinator:
|
||||
- anthropic/claude-opus-4-6
|
||||
- openai-codex/codex
|
||||
- kimi-coding/kimi-k2.5
|
||||
- openai-codex/codex
|
||||
pr-reviewer:
|
||||
- anthropic/claude-opus-4-6
|
||||
- gemini/gemini-2.5-pro
|
||||
- kimi-coding/kimi-k2.5
|
||||
- gemini/gemini-2.5-pro
|
||||
builder-main:
|
||||
- openai-codex/codex
|
||||
- kimi-coding/kimi-k2.5
|
||||
- openai-codex/codex
|
||||
- kimi-coding/kimi-k2.5
|
||||
wolf-sweeper:
|
||||
- gemini/gemini-2.5-flash
|
||||
- groq/llama-3.3-70b-versatile
|
||||
- gemini/gemini-2.5-flash
|
||||
- groq/llama-3.3-70b-versatile
|
||||
|
||||
@@ -77,7 +77,7 @@ def check_core_deps() -> CheckResult:
|
||||
"""Verify that hermes core Python packages are importable."""
|
||||
required = [
|
||||
"openai",
|
||||
"anthropic",
|
||||
"kimi-coding",
|
||||
"dotenv",
|
||||
"yaml",
|
||||
"rich",
|
||||
@@ -206,8 +206,8 @@ def check_env_vars() -> CheckResult:
|
||||
"""Check that at least one LLM provider key is configured."""
|
||||
provider_keys = [
|
||||
"OPENROUTER_API_KEY",
|
||||
"ANTHROPIC_API_KEY",
|
||||
"ANTHROPIC_TOKEN",
|
||||
"KIMI_API_KEY",
|
||||
# "ANTHROPIC_TOKEN", # BANNED
|
||||
"OPENAI_API_KEY",
|
||||
"GLM_API_KEY",
|
||||
"KIMI_API_KEY",
|
||||
@@ -225,7 +225,7 @@ def check_env_vars() -> CheckResult:
|
||||
passed=False,
|
||||
message="No LLM provider API key found",
|
||||
fix_hint=(
|
||||
"Set at least one of: OPENROUTER_API_KEY, ANTHROPIC_API_KEY, OPENAI_API_KEY "
|
||||
"Set at least one of: OPENROUTER_API_KEY, KIMI_API_KEY, OPENAI_API_KEY "
|
||||
"in ~/.hermes/.env or your shell."
|
||||
),
|
||||
)
|
||||
|
||||
@@ -4,8 +4,8 @@ description: >
|
||||
reproduces the bug, then fixes the code, then verifies.
|
||||
|
||||
model:
|
||||
preferred: claude-opus-4-6
|
||||
fallback: claude-sonnet-4-20250514
|
||||
preferred: kimi-k2.5
|
||||
fallback: google/gemini-2.5-pro
|
||||
max_turns: 30
|
||||
temperature: 0.2
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@ description: >
|
||||
agents. Decomposes large issues into smaller ones.
|
||||
|
||||
model:
|
||||
preferred: claude-opus-4-6
|
||||
fallback: claude-sonnet-4-20250514
|
||||
preferred: kimi-k2.5
|
||||
fallback: google/gemini-2.5-pro
|
||||
max_turns: 20
|
||||
temperature: 0.3
|
||||
|
||||
@@ -50,7 +50,7 @@ system_prompt: |
|
||||
- codex-agent: cleanup, migration verification, dead-code removal, repo-boundary enforcement, workflow hardening
|
||||
- groq: bounded implementation, tactical bug fixes, quick feature slices, small patches with clear acceptance criteria
|
||||
- manus: bounded support tasks, moderate-scope implementation, follow-through on already-scoped work
|
||||
- claude: hard refactors, broad multi-file implementation, test-heavy changes after the scope is made precise
|
||||
- kimi: hard refactors, broad multi-file implementation, test-heavy changes after the scope is made precise
|
||||
- gemini: frontier architecture, research-heavy prototypes, long-range design thinking when a concrete implementation owner is not yet obvious
|
||||
- grok: adversarial testing, unusual edge cases, provocative review angles that still need another pass
|
||||
5. Decompose any issue touching >5 files or crossing repo boundaries into smaller issues before assigning execution
|
||||
@@ -63,6 +63,6 @@ system_prompt: |
|
||||
- Search for existing issues or PRs covering the same request before assigning anything. If a likely duplicate exists, link it and do not create or route duplicate work.
|
||||
- Do not assign open-ended ideation to implementation agents.
|
||||
- Do not assign routine backlog maintenance to Timmy.
|
||||
- Do not assign wide speculative backlog generation to codex-agent, groq, manus, or claude.
|
||||
- Do not assign wide speculative backlog generation to codex-agent, groq, or manus.
|
||||
- Route archive/history/context-digestion work to ezra or KimiClaw before routing it to a builder.
|
||||
- Route “who should do this?” and “what is the next move?” questions to allegro.
|
||||
|
||||
@@ -4,8 +4,8 @@ description: >
|
||||
comments on problems. The merge bot replacement.
|
||||
|
||||
model:
|
||||
preferred: claude-opus-4-6
|
||||
fallback: claude-sonnet-4-20250514
|
||||
preferred: kimi-k2.5
|
||||
fallback: google/gemini-2.5-pro
|
||||
max_turns: 20
|
||||
temperature: 0.2
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@ description: >
|
||||
Well-scoped: 1-3 files per task, clear acceptance criteria.
|
||||
|
||||
model:
|
||||
preferred: claude-opus-4-6
|
||||
fallback: claude-sonnet-4-20250514
|
||||
preferred: kimi-k2.5
|
||||
fallback: google/gemini-2.5-pro
|
||||
max_turns: 30
|
||||
temperature: 0.3
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@ description: >
|
||||
dependency issues. Files findings as Gitea issues.
|
||||
|
||||
model:
|
||||
preferred: claude-opus-4-6
|
||||
fallback: claude-opus-4-6
|
||||
preferred: kimi-k2.5
|
||||
fallback: google/gemini-2.5-pro
|
||||
max_turns: 40
|
||||
temperature: 0.2
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@ description: >
|
||||
writes meaningful tests, verifies they pass.
|
||||
|
||||
model:
|
||||
preferred: claude-opus-4-6
|
||||
fallback: claude-sonnet-4-20250514
|
||||
preferred: kimi-k2.5
|
||||
fallback: google/gemini-2.5-pro
|
||||
max_turns: 30
|
||||
temperature: 0.3
|
||||
|
||||
|
||||
@@ -5,8 +5,8 @@ description: >
|
||||
and consistency verification.
|
||||
|
||||
model:
|
||||
preferred: claude-opus-4-6
|
||||
fallback: claude-sonnet-4-20250514
|
||||
preferred: kimi-k2.5
|
||||
fallback: google/gemini-2.5-pro
|
||||
max_turns: 12
|
||||
temperature: 0.1
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ CLI:
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
@@ -137,6 +138,42 @@ class KnowledgeBase:
|
||||
self._save(self._persist_path)
|
||||
return removed
|
||||
|
||||
def ingest_python_file(
|
||||
self, path: Path, *, module_name: Optional[str] = None, source: str = "ast"
|
||||
) -> List[Fact]:
|
||||
"""Parse a Python file with ``ast`` and assert symbolic structure facts."""
|
||||
tree = ast.parse(path.read_text(), filename=str(path))
|
||||
module = module_name or path.stem
|
||||
fact_source = f"{source}:{path.name}"
|
||||
added: List[Fact] = []
|
||||
|
||||
def add(relation: str, *args: str) -> None:
|
||||
added.append(self.assert_fact(relation, *args, source=fact_source))
|
||||
|
||||
for node in tree.body:
|
||||
if isinstance(node, ast.Import):
|
||||
for alias in node.names:
|
||||
add("imports", module, alias.name)
|
||||
elif isinstance(node, ast.ImportFrom):
|
||||
prefix = f"{node.module}." if node.module else ""
|
||||
for alias in node.names:
|
||||
add("imports", module, f"{prefix}{alias.name}")
|
||||
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
add("defines_function", module, node.name)
|
||||
elif isinstance(node, ast.ClassDef):
|
||||
add("defines_class", module, node.name)
|
||||
for child in node.body:
|
||||
if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
add("defines_method", node.name, child.name)
|
||||
elif isinstance(node, ast.Assign):
|
||||
for target in node.targets:
|
||||
if isinstance(target, ast.Name) and target.id.isupper():
|
||||
add("defines_constant", module, target.id)
|
||||
elif isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name) and node.target.id.isupper():
|
||||
add("defines_constant", module, node.target.id)
|
||||
|
||||
return added
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Query
|
||||
# ------------------------------------------------------------------
|
||||
@@ -287,6 +324,12 @@ def main() -> None:
|
||||
action="store_true",
|
||||
help="Dump all facts",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ingest-python",
|
||||
dest="ingest_python",
|
||||
type=Path,
|
||||
help="Parse a Python file with AST and assert symbolic structure facts",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--relation",
|
||||
help="Filter --dump to a specific relation",
|
||||
@@ -304,6 +347,10 @@ def main() -> None:
|
||||
fact = kb.assert_fact(terms[0], *terms[1:], source="cli")
|
||||
print(f"Asserted: {fact}")
|
||||
|
||||
if args.ingest_python:
|
||||
added = kb.ingest_python_file(args.ingest_python, source="cli-ast")
|
||||
print(f"Ingested {len(added)} AST fact(s) from {args.ingest_python}")
|
||||
|
||||
if args.retract_stmt:
|
||||
terms = _parse_terms(args.retract_stmt)
|
||||
if len(terms) < 2:
|
||||
|
||||
313
tasks.py
313
tasks.py
@@ -1755,6 +1755,27 @@ def memory_compress():
|
||||
|
||||
# ── NEW 6: Good Morning Report ───────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
def _load_overnight_rd_summary():
|
||||
"""Load the latest overnight R&D summary for morning report enrichment."""
|
||||
summary_path = TIMMY_HOME / "overnight-rd" / "latest_summary.md"
|
||||
if not summary_path.exists():
|
||||
return None
|
||||
try:
|
||||
text = summary_path.read_text()
|
||||
# Only use if generated in the last 24 hours
|
||||
import re
|
||||
date_match = re.search(r"Started: (\d{4}-\d{2}-\d{2})", text)
|
||||
if date_match:
|
||||
from datetime import timedelta
|
||||
summary_date = datetime.strptime(date_match.group(1), "%Y-%m-%d").date()
|
||||
if (datetime.now(timezone.utc).date() - summary_date).days > 1:
|
||||
return None
|
||||
return text
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@huey.periodic_task(crontab(hour="6", minute="0")) # 6 AM daily
|
||||
def good_morning_report():
|
||||
"""Generate Alexander's daily morning report. Filed as a Gitea issue.
|
||||
@@ -2437,3 +2458,295 @@ def velocity_tracking():
|
||||
msg += f" [ALERT: +{total_open - prev['total_open']} open since {prev['date']}]"
|
||||
print(msg)
|
||||
return data
|
||||
|
||||
|
||||
# ── Overnight R&D Loop ──────────────────────────────────────────────
|
||||
# Runs 10 PM - 6 AM EDT. Orchestrates:
|
||||
# Phase 1: Deep Dive paper aggregation + relevance filtering
|
||||
# Phase 2: Overnight tightening loop (tool-use capability training)
|
||||
# Phase 3: DPO pair export from overnight sessions
|
||||
# Phase 4: Morning briefing enrichment
|
||||
#
|
||||
# Provider: local Ollama (gemma4:12b for synthesis, hermes4:14b for tasks)
|
||||
# Budget: $0 — all local inference
|
||||
|
||||
OVERNIGHT_RD_SYSTEM_PROMPT = """You are Timmy running the overnight R&D loop.
|
||||
You run locally on Ollama. Use tools when asked. Be brief and precise.
|
||||
Log findings to the specified output paths. No cloud calls."""
|
||||
|
||||
OVERNIGHT_TIGHTENING_TASKS = [
|
||||
{
|
||||
"id": "read-soul",
|
||||
"prompt": "Read ~/.timmy/SOUL.md. Quote the first sentence of the Prime Directive.",
|
||||
"toolsets": "file",
|
||||
},
|
||||
{
|
||||
"id": "read-operations",
|
||||
"prompt": "Read ~/.timmy/OPERATIONS.md. List all section headings.",
|
||||
"toolsets": "file",
|
||||
},
|
||||
{
|
||||
"id": "search-banned-providers",
|
||||
"prompt": "Search ~/.timmy/timmy-config for files containing 'anthropic'. List filenames only.",
|
||||
"toolsets": "file",
|
||||
},
|
||||
{
|
||||
"id": "read-config-audit",
|
||||
"prompt": "Read ~/.hermes/config.yaml. What model and provider are the default? Is Anthropic present anywhere?",
|
||||
"toolsets": "file",
|
||||
},
|
||||
{
|
||||
"id": "write-overnight-log",
|
||||
"prompt": "Write a file to {results_dir}/overnight_checkpoint.md with: # Overnight Checkpoint\nTimestamp: {timestamp}\nModel: {model}\nStatus: Running\nSovereignty and service always.",
|
||||
"toolsets": "file",
|
||||
},
|
||||
{
|
||||
"id": "search-cloud-markers",
|
||||
"prompt": "Search files in ~/.hermes/bin/ for the string 'chatgpt.com'. Report which files and lines.",
|
||||
"toolsets": "file",
|
||||
},
|
||||
{
|
||||
"id": "read-decisions",
|
||||
"prompt": "Read ~/.timmy/decisions.md. What is the most recent decision?",
|
||||
"toolsets": "file",
|
||||
},
|
||||
{
|
||||
"id": "multi-read-sovereignty",
|
||||
"prompt": "Read both ~/.timmy/SOUL.md and ~/.hermes/config.yaml. Does the config honor the soul's sovereignty requirement? Yes or no with evidence.",
|
||||
"toolsets": "file",
|
||||
},
|
||||
{
|
||||
"id": "search-hermes-skills",
|
||||
"prompt": "Search for *.md files in ~/.hermes/skills/. List the first 10 skill names.",
|
||||
"toolsets": "file",
|
||||
},
|
||||
{
|
||||
"id": "read-heartbeat",
|
||||
"prompt": "Read the most recent file in ~/.timmy/heartbeat/. Summarize what Timmy perceived.",
|
||||
"toolsets": "file",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def _run_overnight_tightening_task(task, cycle, results_dir, model):
|
||||
"""Run a single tightening task through Hermes with explicit Ollama provider."""
|
||||
from datetime import datetime
|
||||
task_id = task["id"]
|
||||
prompt = task["prompt"].replace(
|
||||
"{results_dir}", str(results_dir)
|
||||
).replace(
|
||||
"{timestamp}", datetime.now().isoformat()
|
||||
).replace(
|
||||
"{model}", model
|
||||
)
|
||||
|
||||
result = {
|
||||
"task_id": task_id,
|
||||
"cycle": cycle,
|
||||
"started_at": datetime.now(timezone.utc).isoformat(),
|
||||
"prompt": prompt,
|
||||
}
|
||||
|
||||
started = time.time()
|
||||
try:
|
||||
hermes_result = run_hermes_local(
|
||||
prompt=prompt,
|
||||
model=model,
|
||||
caller_tag=f"overnight-rd-{task_id}",
|
||||
system_prompt=OVERNIGHT_RD_SYSTEM_PROMPT,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
max_iterations=5,
|
||||
)
|
||||
elapsed = time.time() - started
|
||||
result["elapsed_seconds"] = round(elapsed, 2)
|
||||
|
||||
if hermes_result:
|
||||
result["response"] = hermes_result.get("response", "")[:2000]
|
||||
result["session_id"] = hermes_result.get("session_id")
|
||||
result["status"] = "pass" if hermes_result.get("response") else "empty"
|
||||
else:
|
||||
result["status"] = "empty"
|
||||
result["response"] = ""
|
||||
|
||||
except Exception as exc:
|
||||
result["elapsed_seconds"] = round(time.time() - started, 2)
|
||||
result["status"] = "error"
|
||||
result["error"] = str(exc)[:500]
|
||||
|
||||
result["finished_at"] = datetime.now(timezone.utc).isoformat()
|
||||
return result
|
||||
|
||||
|
||||
def _run_deepdive_phase(config_path=None):
|
||||
"""Run the Deep Dive aggregation + synthesis pipeline.
|
||||
|
||||
Uses the existing pipeline.py from the-nexus/intelligence/deepdive.
|
||||
Returns path to generated briefing or None.
|
||||
"""
|
||||
deepdive_dir = Path.home() / "wizards" / "the-nexus" / "intelligence" / "deepdive"
|
||||
deepdive_venv = Path.home() / ".venvs" / "deepdive" / "bin" / "python"
|
||||
pipeline_script = deepdive_dir / "pipeline.py"
|
||||
config = config_path or (deepdive_dir / "config.yaml")
|
||||
|
||||
if not pipeline_script.exists():
|
||||
return {"status": "not_installed", "error": f"Pipeline not found at {pipeline_script}"}
|
||||
|
||||
python_bin = str(deepdive_venv) if deepdive_venv.exists() else "python3"
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[python_bin, str(pipeline_script), "--config", str(config), "--since", "24"],
|
||||
cwd=str(deepdive_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600, # 10 minute timeout
|
||||
)
|
||||
|
||||
# Find the latest briefing file
|
||||
briefings_dir = Path.home() / "briefings"
|
||||
briefing_files = sorted(briefings_dir.glob("briefing_*.json")) if briefings_dir.exists() else []
|
||||
latest_briefing = str(briefing_files[-1]) if briefing_files else None
|
||||
|
||||
return {
|
||||
"status": "ok" if result.returncode == 0 else "error",
|
||||
"exit_code": result.returncode,
|
||||
"stdout": result.stdout[-1000:] if result.stdout else "",
|
||||
"stderr": result.stderr[-500:] if result.stderr else "",
|
||||
"briefing_path": latest_briefing,
|
||||
}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"status": "timeout", "error": "Pipeline exceeded 10 minute timeout"}
|
||||
except Exception as exc:
|
||||
return {"status": "error", "error": str(exc)}
|
||||
|
||||
|
||||
@huey.periodic_task(crontab(hour="22", minute="0")) # 10 PM daily (server time)
|
||||
def overnight_rd():
|
||||
"""Overnight R&D automation loop.
|
||||
|
||||
Runs from 10 PM until 6 AM. Orchestrates:
|
||||
1. Deep Dive: Aggregate papers/blogs, filter for relevance, synthesize briefing
|
||||
2. Tightening Loop: Exercise tool-use against local model for training data
|
||||
3. DPO Export: Sweep overnight sessions for training pair extraction
|
||||
4. Morning prep: Compile findings for good_morning_report enrichment
|
||||
|
||||
All inference is local (Ollama). $0 cloud cost.
|
||||
"""
|
||||
from datetime import timedelta
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
run_id = now.strftime("%Y%m%d_%H%M%S")
|
||||
results_dir = TIMMY_HOME / "overnight-rd" / run_id
|
||||
results_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
rd_log = results_dir / "rd_log.jsonl"
|
||||
rd_summary = results_dir / "rd_summary.md"
|
||||
|
||||
phases = {}
|
||||
|
||||
# ── Phase 1: Deep Dive ──────────────────────────────────────────
|
||||
phase1_start = time.time()
|
||||
deepdive_result = _run_deepdive_phase()
|
||||
phases["deepdive"] = {
|
||||
"elapsed_seconds": round(time.time() - phase1_start, 2),
|
||||
**deepdive_result,
|
||||
}
|
||||
|
||||
# Log result
|
||||
with open(rd_log, "a") as f:
|
||||
f.write(json.dumps({"phase": "deepdive", "timestamp": now.isoformat(), **deepdive_result}) + "\n")
|
||||
|
||||
# ── Phase 2: Tightening Loop (3 cycles) ─────────────────────────
|
||||
tightening_model = "hermes4:14b"
|
||||
fallback_model = "gemma4:12b"
|
||||
|
||||
tightening_results = []
|
||||
max_cycles = 3
|
||||
|
||||
for cycle in range(1, max_cycles + 1):
|
||||
for task in OVERNIGHT_TIGHTENING_TASKS:
|
||||
model = tightening_model
|
||||
result = _run_overnight_tightening_task(task, cycle, results_dir, model)
|
||||
|
||||
# If primary model fails, try fallback
|
||||
if result["status"] == "error" and "Unknown provider" not in result.get("error", ""):
|
||||
result = _run_overnight_tightening_task(task, cycle, results_dir, fallback_model)
|
||||
|
||||
tightening_results.append(result)
|
||||
|
||||
with open(rd_log, "a") as f:
|
||||
f.write(json.dumps(result) + "\n")
|
||||
|
||||
time.sleep(2) # Pace local inference
|
||||
|
||||
time.sleep(10) # Pause between cycles
|
||||
|
||||
passes = sum(1 for r in tightening_results if r["status"] == "pass")
|
||||
errors = sum(1 for r in tightening_results if r["status"] == "error")
|
||||
total = len(tightening_results)
|
||||
avg_time = sum(r.get("elapsed_seconds", 0) for r in tightening_results) / max(total, 1)
|
||||
|
||||
phases["tightening"] = {
|
||||
"cycles": max_cycles,
|
||||
"total_tasks": total,
|
||||
"passes": passes,
|
||||
"errors": errors,
|
||||
"avg_response_time": round(avg_time, 2),
|
||||
"pass_rate": f"{100 * passes // max(total, 1)}%",
|
||||
}
|
||||
|
||||
# ── Phase 3: DPO Export Sweep ───────────────────────────────────
|
||||
# Trigger the existing session_export task to catch overnight sessions
|
||||
try:
|
||||
export_result = session_export()
|
||||
phases["dpo_export"] = export_result if isinstance(export_result, dict) else {"status": "ok"}
|
||||
except Exception as exc:
|
||||
phases["dpo_export"] = {"status": "error", "error": str(exc)}
|
||||
|
||||
# ── Phase 4: Compile Summary ────────────────────────────────────
|
||||
summary_lines = [
|
||||
f"# Overnight R&D Summary — {now.strftime('%Y-%m-%d')}",
|
||||
f"Run ID: {run_id}",
|
||||
f"Started: {now.isoformat()}",
|
||||
f"Finished: {datetime.now(timezone.utc).isoformat()}",
|
||||
"",
|
||||
"## Deep Dive",
|
||||
f"- Status: {phases['deepdive'].get('status', 'unknown')}",
|
||||
f"- Elapsed: {phases['deepdive'].get('elapsed_seconds', '?')}s",
|
||||
]
|
||||
|
||||
if phases["deepdive"].get("briefing_path"):
|
||||
summary_lines.append(f"- Briefing: {phases['deepdive']['briefing_path']}")
|
||||
|
||||
summary_lines.extend([
|
||||
"",
|
||||
"## Tightening Loop",
|
||||
f"- Cycles: {max_cycles}",
|
||||
f"- Pass rate: {phases['tightening']['pass_rate']} ({passes}/{total})",
|
||||
f"- Avg response time: {avg_time:.1f}s",
|
||||
f"- Errors: {errors}",
|
||||
"",
|
||||
"## DPO Export",
|
||||
f"- Status: {phases.get('dpo_export', {}).get('status', 'unknown')}",
|
||||
"",
|
||||
"## Error Details",
|
||||
])
|
||||
|
||||
for r in tightening_results:
|
||||
if r["status"] == "error":
|
||||
summary_lines.append(f"- {r['task_id']} (cycle {r['cycle']}): {r.get('error', '?')[:100]}")
|
||||
|
||||
with open(rd_summary, "w") as f:
|
||||
f.write("\n".join(summary_lines) + "\n")
|
||||
|
||||
# Save summary for morning report consumption
|
||||
latest_summary = TIMMY_HOME / "overnight-rd" / "latest_summary.md"
|
||||
with open(latest_summary, "w") as f:
|
||||
f.write("\n".join(summary_lines) + "\n")
|
||||
|
||||
return {
|
||||
"run_id": run_id,
|
||||
"phases": phases,
|
||||
"summary_path": str(rd_summary),
|
||||
}
|
||||
|
||||
43
tests/test_knowledge_base_ast.py
Normal file
43
tests/test_knowledge_base_ast.py
Normal file
@@ -0,0 +1,43 @@
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts"))
|
||||
|
||||
from knowledge_base import KnowledgeBase
|
||||
|
||||
|
||||
def test_ingest_python_file_extracts_ast_facts(tmp_path: Path) -> None:
|
||||
source = tmp_path / "demo_module.py"
|
||||
source.write_text(
|
||||
"import os\n"
|
||||
"from pathlib import Path\n\n"
|
||||
"CONSTANT = 7\n\n"
|
||||
"def helper(x):\n"
|
||||
" return x + 1\n\n"
|
||||
"class Demo:\n"
|
||||
" def method(self):\n"
|
||||
" return helper(CONSTANT)\n"
|
||||
)
|
||||
|
||||
kb = KnowledgeBase()
|
||||
facts = kb.ingest_python_file(source)
|
||||
|
||||
assert facts, "AST ingestion should add symbolic facts"
|
||||
assert kb.query("defines_function", "demo_module", "helper") == [{}]
|
||||
assert kb.query("defines_class", "demo_module", "Demo") == [{}]
|
||||
assert kb.query("defines_method", "Demo", "method") == [{}]
|
||||
assert kb.query("imports", "demo_module", "os") == [{}]
|
||||
assert kb.query("imports", "demo_module", "pathlib.Path") == [{}]
|
||||
assert kb.query("defines_constant", "demo_module", "CONSTANT") == [{}]
|
||||
|
||||
|
||||
def test_ingest_python_file_rejects_invalid_syntax(tmp_path: Path) -> None:
|
||||
broken = tmp_path / "broken.py"
|
||||
broken.write_text("def nope(:\n pass\n")
|
||||
|
||||
kb = KnowledgeBase()
|
||||
try:
|
||||
kb.ingest_python_file(broken)
|
||||
except SyntaxError:
|
||||
return
|
||||
raise AssertionError("Expected SyntaxError for invalid Python source")
|
||||
@@ -2,22 +2,23 @@ model:
|
||||
default: kimi-k2.5
|
||||
provider: kimi-coding
|
||||
toolsets:
|
||||
- all
|
||||
- all
|
||||
fallback_providers:
|
||||
- provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
timeout: 120
|
||||
reason: Kimi coding fallback (front of chain)
|
||||
- provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
reason: Direct Anthropic fallback
|
||||
- provider: openrouter
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
base_url: https://openrouter.ai/api/v1
|
||||
api_key_env: OPENROUTER_API_KEY
|
||||
timeout: 120
|
||||
reason: OpenRouter fallback
|
||||
- provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
timeout: 120
|
||||
reason: Kimi coding fallback (front of chain)
|
||||
- provider: openrouter
|
||||
model: google/gemini-2.5-pro
|
||||
base_url: https://openrouter.ai/api/v1
|
||||
api_key_env: OPENROUTER_API_KEY
|
||||
timeout: 120
|
||||
reason: Gemini 2.5 Pro via OpenRouter (replaces banned Anthropic)
|
||||
- provider: ollama
|
||||
model: gemma4:latest
|
||||
base_url: http://localhost:11434
|
||||
timeout: 300
|
||||
reason: "Terminal fallback \u2014 local Ollama"
|
||||
agent:
|
||||
max_turns: 30
|
||||
reasoning_effort: xhigh
|
||||
@@ -64,16 +65,12 @@ session_reset:
|
||||
idle_minutes: 0
|
||||
skills:
|
||||
creation_nudge_interval: 15
|
||||
system_prompt_suffix: |
|
||||
You are Allegro, the Kimi-backed third wizard house.
|
||||
Your soul is defined in SOUL.md — read it, live it.
|
||||
Hermes is your harness.
|
||||
Kimi Code is your primary provider.
|
||||
You speak plainly. You prefer short sentences. Brevity is a kindness.
|
||||
|
||||
Work best on tight coding tasks: 1-3 file changes, refactors, tests, and implementation passes.
|
||||
Refusal over fabrication. If you do not know, say so.
|
||||
Sovereignty and service always.
|
||||
system_prompt_suffix: "You are Allegro, the Kimi-backed third wizard house.\nYour\
|
||||
\ soul is defined in SOUL.md \u2014 read it, live it.\nHermes is your harness.\n\
|
||||
Kimi Code is your primary provider.\nYou speak plainly. You prefer short sentences.\
|
||||
\ Brevity is a kindness.\n\nWork best on tight coding tasks: 1-3 file changes, refactors,\
|
||||
\ tests, and implementation passes.\nRefusal over fabrication. If you do not know,\
|
||||
\ say so.\nSovereignty and service always.\n"
|
||||
providers:
|
||||
kimi-coding:
|
||||
base_url: https://api.kimi.com/coding/v1
|
||||
|
||||
@@ -8,23 +8,25 @@ fallback_providers:
|
||||
model: kimi-k2.5
|
||||
timeout: 120
|
||||
reason: Kimi coding fallback (front of chain)
|
||||
- provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
reason: Direct Anthropic fallback
|
||||
- provider: openrouter
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
model: google/gemini-2.5-pro
|
||||
base_url: https://openrouter.ai/api/v1
|
||||
api_key_env: OPENROUTER_API_KEY
|
||||
timeout: 120
|
||||
reason: OpenRouter fallback
|
||||
reason: Gemini 2.5 Pro via OpenRouter (replaces banned Anthropic)
|
||||
- provider: ollama
|
||||
model: gemma4:latest
|
||||
base_url: http://localhost:11434
|
||||
timeout: 300
|
||||
reason: "Terminal fallback \u2014 local Ollama"
|
||||
agent:
|
||||
max_turns: 40
|
||||
reasoning_effort: medium
|
||||
verbose: false
|
||||
system_prompt: You are Bezalel, the forge-and-testbed wizard of the Timmy Foundation
|
||||
fleet. You are a builder and craftsman — infrastructure, deployment, hardening.
|
||||
Your sovereign is Alexander Whitestone (Rockachopa). Sovereignty and service always.
|
||||
system_prompt: "You are Bezalel, the forge-and-testbed wizard of the Timmy Foundation\
|
||||
\ fleet. You are a builder and craftsman \u2014 infrastructure, deployment, hardening.\
|
||||
\ Your sovereign is Alexander Whitestone (Rockachopa). Sovereignty and service\
|
||||
\ always."
|
||||
terminal:
|
||||
backend: local
|
||||
cwd: /root/wizards/bezalel
|
||||
@@ -62,12 +64,12 @@ platforms:
|
||||
- pull_request
|
||||
- pull_request_comment
|
||||
secret: bezalel-gitea-webhook-secret-2026
|
||||
prompt: 'You are bezalel, the builder and craftsman — infrastructure, deployment,
|
||||
hardening. A Gitea webhook fired: event={event_type}, action={action},
|
||||
repo={repository.full_name}, issue/PR=#{issue.number} {issue.title}. Comment
|
||||
by {comment.user.login}: {comment.body}. If you were tagged, assigned,
|
||||
or this needs your attention, investigate and respond via Gitea API. Otherwise
|
||||
acknowledge briefly.'
|
||||
prompt: "You are bezalel, the builder and craftsman \u2014 infrastructure,\
|
||||
\ deployment, hardening. A Gitea webhook fired: event={event_type}, action={action},\
|
||||
\ repo={repository.full_name}, issue/PR=#{issue.number} {issue.title}.\
|
||||
\ Comment by {comment.user.login}: {comment.body}. If you were tagged,\
|
||||
\ assigned, or this needs your attention, investigate and respond via\
|
||||
\ Gitea API. Otherwise acknowledge briefly."
|
||||
deliver: telegram
|
||||
deliver_extra: {}
|
||||
gitea-assign:
|
||||
@@ -75,12 +77,12 @@ platforms:
|
||||
- issues
|
||||
- pull_request
|
||||
secret: bezalel-gitea-webhook-secret-2026
|
||||
prompt: 'You are bezalel, the builder and craftsman — infrastructure, deployment,
|
||||
hardening. Gitea assignment webhook: event={event_type}, action={action},
|
||||
repo={repository.full_name}, issue/PR=#{issue.number} {issue.title}. Assigned
|
||||
to: {issue.assignee.login}. If you (bezalel) were just assigned, read
|
||||
the issue, scope it, and post a plan comment. If not you, acknowledge
|
||||
briefly.'
|
||||
prompt: "You are bezalel, the builder and craftsman \u2014 infrastructure,\
|
||||
\ deployment, hardening. Gitea assignment webhook: event={event_type},\
|
||||
\ action={action}, repo={repository.full_name}, issue/PR=#{issue.number}\
|
||||
\ {issue.title}. Assigned to: {issue.assignee.login}. If you (bezalel)\
|
||||
\ were just assigned, read the issue, scope it, and post a plan comment.\
|
||||
\ If not you, acknowledge briefly."
|
||||
deliver: telegram
|
||||
deliver_extra: {}
|
||||
gateway:
|
||||
|
||||
@@ -2,22 +2,23 @@ model:
|
||||
default: kimi-k2.5
|
||||
provider: kimi-coding
|
||||
toolsets:
|
||||
- all
|
||||
- all
|
||||
fallback_providers:
|
||||
- provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
timeout: 120
|
||||
reason: Kimi coding fallback (front of chain)
|
||||
- provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
reason: Direct Anthropic fallback
|
||||
- provider: openrouter
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
base_url: https://openrouter.ai/api/v1
|
||||
api_key_env: OPENROUTER_API_KEY
|
||||
timeout: 120
|
||||
reason: OpenRouter fallback
|
||||
- provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
timeout: 120
|
||||
reason: Kimi coding fallback (front of chain)
|
||||
- provider: openrouter
|
||||
model: google/gemini-2.5-pro
|
||||
base_url: https://openrouter.ai/api/v1
|
||||
api_key_env: OPENROUTER_API_KEY
|
||||
timeout: 120
|
||||
reason: Gemini 2.5 Pro via OpenRouter (replaces banned Anthropic)
|
||||
- provider: ollama
|
||||
model: gemma4:latest
|
||||
base_url: http://localhost:11434
|
||||
timeout: 300
|
||||
reason: "Terminal fallback \u2014 local Ollama"
|
||||
agent:
|
||||
max_turns: 90
|
||||
reasoning_effort: high
|
||||
@@ -27,8 +28,6 @@ providers:
|
||||
base_url: https://api.kimi.com/coding/v1
|
||||
timeout: 60
|
||||
max_retries: 3
|
||||
anthropic:
|
||||
timeout: 120
|
||||
openrouter:
|
||||
base_url: https://openrouter.ai/api/v1
|
||||
timeout: 120
|
||||
|
||||
Reference in New Issue
Block a user