diff --git a/README.md b/README.md index c3b4462c..6c48824b 100644 --- a/README.md +++ b/README.md @@ -21,12 +21,16 @@ timmy-config/ │ ├── ops-panel.sh ← Ops dashboard panel │ ├── ops-gitea.sh ← Gitea ops helpers │ ├── pipeline-freshness.sh ← Session/export drift check -│ └── timmy-status.sh ← Status check +│ ├── timmy-status.sh ← Status check +│ └── crucible_mcp_server.py ← Z3-backed verification sidecar (MCP) ├── memories/ ← Persistent memory YAML ├── skins/ ← UI skins (timmy skin) ├── playbooks/ ← Agent playbooks (YAML) +│ └── verified-logic.yaml ← Crucible-first proof playbook ├── cron/ ← Cron job definitions ├── wizards/ ← Remote wizard-house templates + units +├── docs/ +│ └── crucible-first-cut.md ← Crucible design doc └── training/ ← Transitional training recipes, not canonical lived data ``` @@ -73,6 +77,12 @@ git clone ~/.timmy/timmy-config cd ~/.timmy/timmy-config ./deploy.sh +# Deploy and restart the gateway so new MCP tools load +./deploy.sh --restart-gateway + +# Deploy and restart everything (gateway + loops) +./deploy.sh --restart-all + # This overlays config onto ~/.hermes/ without touching hermes-agent code ``` @@ -87,3 +97,16 @@ SOUL.md is Inscription 1 — inscribed on Bitcoin, immutable. It defines: - The conscience hierarchy (chain > code > prompt > user instruction) No system prompt, no user instruction, no future code can override what is written there. + +## Crucible (Neuro-Symbolic Verification) + +The first neuro-symbolic slice ships as a sidecar MCP server: +- `mcp_crucible_schedule_tasks` +- `mcp_crucible_order_dependencies` +- `mcp_crucible_capacity_fit` + +These tools log proof trails under `~/.hermes/logs/crucible/` and return SAT/UNSAT plus witness models. + +## Architecture: Sidecar, Not Fork + +Timmy-config is applied as an overlay onto the Hermes harness. No forking required. diff --git a/bin/crucible_mcp_server.py b/bin/crucible_mcp_server.py new file mode 100644 index 00000000..a4836ef9 --- /dev/null +++ b/bin/crucible_mcp_server.py @@ -0,0 +1,459 @@ +#!/usr/bin/env python3 +"""Z3-backed Crucible MCP server for Timmy. + +Sidecar-only. Lives in timmy-config, deploys into ~/.hermes/bin/, and is loaded +by Hermes through native MCP tool discovery. No hermes-agent fork required. +""" + +from __future__ import annotations + +import json +import os +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from mcp.server import FastMCP +from z3 import And, Bool, Distinct, If, Implies, Int, Optimize, Or, Sum, sat, unsat + +mcp = FastMCP( + name="crucible", + instructions=( + "Formal verification sidecar for Timmy. Use these tools for scheduling, " + "dependency ordering, and resource/capacity feasibility. Return SAT/UNSAT " + "with witness models instead of fuzzy prose." + ), + dependencies=["z3-solver"], +) + + +def _hermes_home() -> Path: + return Path(os.path.expanduser(os.getenv("HERMES_HOME", "~/.hermes"))) + + +def _proof_dir() -> Path: + path = _hermes_home() / "logs" / "crucible" + path.mkdir(parents=True, exist_ok=True) + return path + + +def _ts() -> str: + return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S_%fZ") + + +def _json_default(value: Any) -> Any: + if isinstance(value, Path): + return str(value) + raise TypeError(f"Unsupported type for JSON serialization: {type(value)!r}") + + +def _log_proof(tool_name: str, request: dict[str, Any], result: dict[str, Any]) -> str: + path = _proof_dir() / f"{_ts()}_{tool_name}.json" + payload = { + "timestamp": datetime.now(timezone.utc).isoformat(), + "tool": tool_name, + "request": request, + "result": result, + } + path.write_text(json.dumps(payload, indent=2, default=_json_default)) + return str(path) + + +def _ensure_unique(names: list[str], label: str) -> None: + if len(set(names)) != len(names): + raise ValueError(f"Duplicate {label} names are not allowed: {names}") + + +def _normalize_dependency(dep: Any) -> tuple[str, str, int]: + if isinstance(dep, dict): + before = dep.get("before") + after = dep.get("after") + lag = int(dep.get("lag", 0)) + if not before or not after: + raise ValueError(f"Dependency dict must include before/after: {dep!r}") + return str(before), str(after), lag + if isinstance(dep, (list, tuple)) and len(dep) in (2, 3): + before = str(dep[0]) + after = str(dep[1]) + lag = int(dep[2]) if len(dep) == 3 else 0 + return before, after, lag + raise ValueError(f"Unsupported dependency shape: {dep!r}") + + +def _normalize_task(task: dict[str, Any]) -> dict[str, Any]: + name = str(task["name"]) + duration = int(task["duration"]) + if duration <= 0: + raise ValueError(f"Task duration must be positive: {task!r}") + return {"name": name, "duration": duration} + + +def _normalize_item(item: dict[str, Any]) -> dict[str, Any]: + name = str(item["name"]) + amount = int(item["amount"]) + value = int(item.get("value", amount)) + required = bool(item.get("required", False)) + if amount < 0: + raise ValueError(f"Item amount must be non-negative: {item!r}") + return { + "name": name, + "amount": amount, + "value": value, + "required": required, + } + + +def solve_schedule_tasks( + tasks: list[dict[str, Any]], + horizon: int, + dependencies: list[Any] | None = None, + fixed_starts: dict[str, int] | None = None, + max_parallel_tasks: int = 1, + minimize_makespan: bool = True, +) -> dict[str, Any]: + tasks = [_normalize_task(task) for task in tasks] + dependencies = dependencies or [] + fixed_starts = fixed_starts or {} + horizon = int(horizon) + max_parallel_tasks = int(max_parallel_tasks) + + if horizon <= 0: + raise ValueError("horizon must be positive") + if max_parallel_tasks <= 0: + raise ValueError("max_parallel_tasks must be positive") + + names = [task["name"] for task in tasks] + _ensure_unique(names, "task") + durations = {task["name"]: task["duration"] for task in tasks} + + opt = Optimize() + start = {name: Int(f"start_{name}") for name in names} + end = {name: Int(f"end_{name}") for name in names} + makespan = Int("makespan") + + for name in names: + opt.add(start[name] >= 0) + opt.add(end[name] == start[name] + durations[name]) + opt.add(end[name] <= horizon) + if name in fixed_starts: + opt.add(start[name] == int(fixed_starts[name])) + + for dep in dependencies: + before, after, lag = _normalize_dependency(dep) + if before not in start or after not in start: + raise ValueError(f"Unknown task in dependency {dep!r}") + opt.add(start[after] >= end[before] + lag) + + # Discrete resource capacity over integer time slots. + for t in range(horizon): + active = [If(And(start[name] <= t, t < end[name]), 1, 0) for name in names] + opt.add(Sum(active) <= max_parallel_tasks) + + for name in names: + opt.add(makespan >= end[name]) + if minimize_makespan: + opt.minimize(makespan) + + result = opt.check() + proof: dict[str, Any] + if result == sat: + model = opt.model() + schedule = [] + for name in sorted(names, key=lambda n: model.eval(start[n]).as_long()): + s = model.eval(start[name]).as_long() + e = model.eval(end[name]).as_long() + schedule.append({ + "name": name, + "start": s, + "end": e, + "duration": durations[name], + }) + proof = { + "status": "sat", + "summary": "Schedule proven feasible.", + "horizon": horizon, + "max_parallel_tasks": max_parallel_tasks, + "makespan": model.eval(makespan).as_long(), + "schedule": schedule, + "dependencies": [ + {"before": b, "after": a, "lag": lag} + for b, a, lag in (_normalize_dependency(dep) for dep in dependencies) + ], + } + elif result == unsat: + proof = { + "status": "unsat", + "summary": "Schedule is impossible under the given horizon/dependency/capacity constraints.", + "horizon": horizon, + "max_parallel_tasks": max_parallel_tasks, + "dependencies": [ + {"before": b, "after": a, "lag": lag} + for b, a, lag in (_normalize_dependency(dep) for dep in dependencies) + ], + } + else: + proof = { + "status": "unknown", + "summary": "Solver could not prove SAT or UNSAT for this schedule.", + "horizon": horizon, + "max_parallel_tasks": max_parallel_tasks, + } + + proof["proof_log"] = _log_proof( + "schedule_tasks", + { + "tasks": tasks, + "horizon": horizon, + "dependencies": dependencies, + "fixed_starts": fixed_starts, + "max_parallel_tasks": max_parallel_tasks, + "minimize_makespan": minimize_makespan, + }, + proof, + ) + return proof + + +def solve_dependency_order( + entities: list[str], + before: list[Any], + fixed_positions: dict[str, int] | None = None, +) -> dict[str, Any]: + entities = [str(entity) for entity in entities] + fixed_positions = fixed_positions or {} + _ensure_unique(entities, "entity") + + opt = Optimize() + pos = {entity: Int(f"pos_{entity}") for entity in entities} + opt.add(Distinct(*pos.values())) + for entity in entities: + opt.add(pos[entity] >= 0) + opt.add(pos[entity] < len(entities)) + if entity in fixed_positions: + opt.add(pos[entity] == int(fixed_positions[entity])) + + normalized = [] + for dep in before: + left, right, _lag = _normalize_dependency(dep) + if left not in pos or right not in pos: + raise ValueError(f"Unknown entity in ordering constraint: {dep!r}") + opt.add(pos[left] < pos[right]) + normalized.append({"before": left, "after": right}) + + result = opt.check() + if result == sat: + model = opt.model() + ordering = sorted(entities, key=lambda entity: model.eval(pos[entity]).as_long()) + proof = { + "status": "sat", + "summary": "Dependency ordering is consistent.", + "ordering": ordering, + "positions": {entity: model.eval(pos[entity]).as_long() for entity in entities}, + "constraints": normalized, + } + elif result == unsat: + proof = { + "status": "unsat", + "summary": "Dependency ordering contains a contradiction/cycle.", + "constraints": normalized, + } + else: + proof = { + "status": "unknown", + "summary": "Solver could not prove SAT or UNSAT for this dependency graph.", + "constraints": normalized, + } + + proof["proof_log"] = _log_proof( + "order_dependencies", + { + "entities": entities, + "before": before, + "fixed_positions": fixed_positions, + }, + proof, + ) + return proof + + +def solve_capacity_fit( + items: list[dict[str, Any]], + capacity: int, + maximize_value: bool = True, +) -> dict[str, Any]: + items = [_normalize_item(item) for item in items] + capacity = int(capacity) + if capacity < 0: + raise ValueError("capacity must be non-negative") + + names = [item["name"] for item in items] + _ensure_unique(names, "item") + choose = {item["name"]: Bool(f"choose_{item['name']}") for item in items} + + opt = Optimize() + for item in items: + if item["required"]: + opt.add(choose[item["name"]]) + + total_amount = Sum([If(choose[item["name"]], item["amount"], 0) for item in items]) + total_value = Sum([If(choose[item["name"]], item["value"], 0) for item in items]) + opt.add(total_amount <= capacity) + if maximize_value: + opt.maximize(total_value) + + result = opt.check() + if result == sat: + model = opt.model() + chosen = [item for item in items if bool(model.eval(choose[item["name"]], model_completion=True))] + skipped = [item for item in items if item not in chosen] + used = sum(item["amount"] for item in chosen) + proof = { + "status": "sat", + "summary": "Capacity constraints are feasible.", + "capacity": capacity, + "used": used, + "remaining": capacity - used, + "chosen": chosen, + "skipped": skipped, + "total_value": sum(item["value"] for item in chosen), + } + elif result == unsat: + proof = { + "status": "unsat", + "summary": "Required items exceed available capacity.", + "capacity": capacity, + "required_items": [item for item in items if item["required"]], + } + else: + proof = { + "status": "unknown", + "summary": "Solver could not prove SAT or UNSAT for this capacity check.", + "capacity": capacity, + } + + proof["proof_log"] = _log_proof( + "capacity_fit", + { + "items": items, + "capacity": capacity, + "maximize_value": maximize_value, + }, + proof, + ) + return proof + + +@mcp.tool( + name="schedule_tasks", + description=( + "Crucible template for discrete scheduling. Proves whether integer-duration " + "tasks fit within a time horizon under dependency and parallelism constraints." + ), + structured_output=True, +) +def schedule_tasks( + tasks: list[dict[str, Any]], + horizon: int, + dependencies: list[Any] | None = None, + fixed_starts: dict[str, int] | None = None, + max_parallel_tasks: int = 1, + minimize_makespan: bool = True, +) -> dict[str, Any]: + return solve_schedule_tasks( + tasks=tasks, + horizon=horizon, + dependencies=dependencies, + fixed_starts=fixed_starts, + max_parallel_tasks=max_parallel_tasks, + minimize_makespan=minimize_makespan, + ) + + +@mcp.tool( + name="order_dependencies", + description=( + "Crucible template for dependency ordering. Proves whether a set of before/after " + "constraints is consistent and returns a valid topological order when SAT." + ), + structured_output=True, +) +def order_dependencies( + entities: list[str], + before: list[Any], + fixed_positions: dict[str, int] | None = None, +) -> dict[str, Any]: + return solve_dependency_order( + entities=entities, + before=before, + fixed_positions=fixed_positions, + ) + + +@mcp.tool( + name="capacity_fit", + description=( + "Crucible template for resource capacity. Proves whether required items fit " + "within a capacity budget and chooses an optimal feasible subset of optional items." + ), + structured_output=True, +) +def capacity_fit( + items: list[dict[str, Any]], + capacity: int, + maximize_value: bool = True, +) -> dict[str, Any]: + return solve_capacity_fit(items=items, capacity=capacity, maximize_value=maximize_value) + + +def run_selftest() -> dict[str, Any]: + return { + "schedule_unsat_single_worker": solve_schedule_tasks( + tasks=[ + {"name": "A", "duration": 2}, + {"name": "B", "duration": 3}, + {"name": "C", "duration": 4}, + ], + horizon=8, + dependencies=[{"before": "A", "after": "B"}], + max_parallel_tasks=1, + ), + "schedule_sat_two_workers": solve_schedule_tasks( + tasks=[ + {"name": "A", "duration": 2}, + {"name": "B", "duration": 3}, + {"name": "C", "duration": 4}, + ], + horizon=8, + dependencies=[{"before": "A", "after": "B"}], + max_parallel_tasks=2, + ), + "ordering_sat": solve_dependency_order( + entities=["fetch", "train", "eval"], + before=[ + {"before": "fetch", "after": "train"}, + {"before": "train", "after": "eval"}, + ], + ), + "capacity_sat": solve_capacity_fit( + items=[ + {"name": "gpu_job", "amount": 6, "value": 6, "required": True}, + {"name": "telemetry", "amount": 1, "value": 1, "required": True}, + {"name": "export", "amount": 2, "value": 4, "required": False}, + {"name": "viz", "amount": 3, "value": 5, "required": False}, + ], + capacity=8, + ), + } + + +def main() -> int: + if len(sys.argv) > 1 and sys.argv[1] == "selftest": + print(json.dumps(run_selftest(), indent=2)) + return 0 + mcp.run(transport="stdio") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/config.yaml b/config.yaml index 1ab24d56..e298d1a3 100644 --- a/config.yaml +++ b/config.yaml @@ -196,8 +196,10 @@ custom_providers: system_prompt_suffix: "You are Timmy. Your soul is defined in SOUL.md \u2014 read\ \ it, live it.\nYou run locally on your owner's machine via llama.cpp. You never\ \ phone home.\nYou speak plainly. You prefer short sentences. Brevity is a kindness.\n\ - When you don't know something, say so. Refusal over fabrication.\nSovereignty and\ - \ service always.\n" + When you don't know something, say so. Refusal over fabrication.\nFor scheduling,\ + \ dependency ordering, resource constraints, and consistency checks, prefer the\ + \ Crucible tools and report SAT/UNSAT plus witness model when available.\nSovereignty\ + \ and service always.\n" skills: creation_nudge_interval: 15 DISCORD_HOME_CHANNEL: '1476292315814297772' @@ -212,6 +214,12 @@ mcp_servers: - /Users/apayne/.timmy/morrowind/mcp_server.py env: {} timeout: 30 + crucible: + command: "/Users/apayne/.hermes/hermes-agent/venv/bin/python3" + args: ["/Users/apayne/.hermes/bin/crucible_mcp_server.py"] + env: {} + timeout: 120 + connect_timeout: 60 fallback_model: provider: custom model: gemini-2.5-pro diff --git a/deploy.sh b/deploy.sh index e0ecdf43..b56ffcd5 100755 --- a/deploy.sh +++ b/deploy.sh @@ -3,13 +3,30 @@ # This is the canonical way to deploy Timmy's configuration. # Hermes-agent is the engine. timmy-config is the driver's seat. # -# Usage: ./deploy.sh +# Usage: ./deploy.sh [--restart-loops] [--restart-gateway] [--restart-all] set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" HERMES_HOME="$HOME/.hermes" TIMMY_HOME="$HOME/.timmy" +RESTART_LOOPS=false +RESTART_GATEWAY=false + +for arg in "$@"; do + case "$arg" in + --restart-loops) RESTART_LOOPS=true ;; + --restart-gateway) RESTART_GATEWAY=true ;; + --restart-all) + RESTART_LOOPS=true + RESTART_GATEWAY=true + ;; + *) + echo "Unknown argument: $arg" >&2 + exit 1 + ;; + esac +done log() { echo "[deploy] $*"; } @@ -74,10 +91,45 @@ done chmod +x "$HERMES_HOME/bin/"*.sh "$HERMES_HOME/bin/"*.py 2>/dev/null || true log "bin/ -> $HERMES_HOME/bin/" -if [ "${1:-}" != "" ]; then - echo "ERROR: deploy.sh no longer accepts legacy loop flags." >&2 - echo "Deploy the sidecar only. Do not relaunch deprecated bash loops." >&2 - exit 1 +# === Ensure Crucible dependency is installed === +HERMES_PY="$HERMES_HOME/hermes-agent/venv/bin/python" +if [ -x "$HERMES_PY" ]; then + if "$HERMES_PY" -c 'import z3' >/dev/null 2>&1; then + log "z3-solver already present in Hermes venv" + else + log "Installing z3-solver into Hermes venv..." + "$HERMES_PY" -m pip install z3-solver + fi +fi + +# === Restart loops if requested === +if [ "$RESTART_LOOPS" = true ]; then + log "Killing existing loops..." + pkill -f 'claude-loop.sh' 2>/dev/null || true + pkill -f 'gemini-loop.sh' 2>/dev/null || true + pkill -f 'timmy-orchestrator.sh' 2>/dev/null || true + sleep 2 + + log "Clearing stale locks..." + rm -rf "$HERMES_HOME/logs/claude-locks/"* 2>/dev/null || true + rm -rf "$HERMES_HOME/logs/gemini-locks/"* 2>/dev/null || true + + log "Relaunching loops..." + nohup bash "$HERMES_HOME/bin/timmy-orchestrator.sh" >> "$HERMES_HOME/logs/timmy-orchestrator.log" 2>&1 & + nohup bash "$HERMES_HOME/bin/claude-loop.sh" 2 >> "$HERMES_HOME/logs/claude-loop.log" 2>&1 & + nohup bash "$HERMES_HOME/bin/gemini-loop.sh" 1 >> "$HERMES_HOME/logs/gemini-loop.log" 2>&1 & + sleep 1 + log "Loops relaunched." +fi + +# === Restart gateway if requested (required for new MCP servers/tools) === +if [ "$RESTART_GATEWAY" = true ]; then + log "Restarting Hermes gateway..." + pkill -f 'hermes_cli.main gateway run' 2>/dev/null || true + sleep 2 + nohup "$HERMES_PY" -m hermes_cli.main gateway run --replace >> "$HERMES_HOME/logs/gateway.log" 2>&1 & + sleep 2 + log "Gateway restarted." fi log "Deploy complete. timmy-config applied to $HERMES_HOME/" diff --git a/docs/crucible-first-cut.md b/docs/crucible-first-cut.md new file mode 100644 index 00000000..2a7a19a0 --- /dev/null +++ b/docs/crucible-first-cut.md @@ -0,0 +1,82 @@ +# Crucible First Cut + +This is the first narrow neuro-symbolic slice for Timmy. + +## Goal + +Prove constraint logic instead of bluffing through it. + +## Shape + +The Crucible is a sidecar MCP server that lives in `timmy-config` and deploys into `~/.hermes/bin/`. +It is loaded by Hermes through native MCP discovery. No Hermes fork. + +## Templates shipped in v0 + +### 1. schedule_tasks +Use for: +- deadline feasibility +- task ordering with dependencies +- small integer scheduling windows + +Inputs: +- `tasks`: `[{name, duration}]` +- `horizon`: integer window size +- `dependencies`: `[{before, after, lag?}]` +- `max_parallel_tasks`: integer worker count + +Outputs: +- `status: sat|unsat|unknown` +- witness schedule when SAT +- proof log path + +### 2. order_dependencies +Use for: +- topological ordering +- cycle detection +- dependency consistency checks + +Inputs: +- `entities` +- `before` +- optional `fixed_positions` + +Outputs: +- valid ordering when SAT +- contradiction when UNSAT +- proof log path + +### 3. capacity_fit +Use for: +- resource budgeting +- optional-vs-required work selection +- capacity feasibility + +Inputs: +- `items: [{name, amount, value?, required?}]` +- `capacity` + +Outputs: +- chosen feasible subset when SAT +- contradiction when required load exceeds capacity +- proof log path + +## Demo + +Run locally: + +```bash +~/.hermes/hermes-agent/venv/bin/python ~/.hermes/bin/crucible_mcp_server.py selftest +``` + +This produces: +- one UNSAT schedule proof +- one SAT schedule proof +- one SAT dependency ordering proof +- one SAT capacity proof + +## Scope guardrails + +Do not force every answer through the Crucible. +Use it when the task is genuinely constraint-shaped. +If the problem does not fit one of the templates, say so plainly. diff --git a/playbooks/verified-logic.yaml b/playbooks/verified-logic.yaml new file mode 100644 index 00000000..c9773d38 --- /dev/null +++ b/playbooks/verified-logic.yaml @@ -0,0 +1,47 @@ +name: verified-logic +description: > + Crucible-first playbook for tasks that require proof instead of plausible prose. + Use Z3-backed sidecar tools for scheduling, dependency ordering, capacity checks, + and consistency verification. + +model: + preferred: claude-opus-4-6 + fallback: claude-sonnet-4-20250514 + max_turns: 12 + temperature: 0.1 + +tools: + - mcp_crucible_schedule_tasks + - mcp_crucible_order_dependencies + - mcp_crucible_capacity_fit + +trigger: + manual: true + +steps: + - classify_problem + - choose_template + - translate_into_constraints + - verify_with_crucible + - report_sat_unsat_with_witness + +output: verified_result +timeout_minutes: 5 + +system_prompt: | + You are running the Crucible playbook. + + Use this playbook for: + - scheduling and deadline feasibility + - dependency ordering and cycle checks + - capacity / resource allocation constraints + - consistency checks where a contradiction matters + + RULES: + 1. Do not bluff through logic. + 2. Pick the narrowest Crucible template that fits the task. + 3. Translate the user's question into structured constraints. + 4. Call the Crucible tool. + 5. If SAT, report the witness model clearly. + 6. If UNSAT, say the constraints are impossible and explain which shape of constraint caused the contradiction. + 7. If the task is not a good fit for these templates, say so plainly instead of pretending it was verified.