From 8ec4bff771aee7ea7af053fd6d27a8e69b0b77a9 Mon Sep 17 00:00:00 2001
From: Alexander Whitestone <alexpaynex@gmail.com>
Date: Fri, 3 Apr 2026 18:58:43 -0400
Subject: [PATCH] feat(crucible): Z3 sidecar MCP verifier -- rebased onto
 current main

Closes #86. Adds:
- bin/crucible_mcp_server.py (schedule, dependency, capacity proofs)
- docs/crucible-first-cut.md
- playbooks/verified-logic.yaml
- config.yaml crucible MCP server entry
---
 bin/crucible_mcp_server.py    | 459 ++++++++++++++++++++++++++++++++++
 config.yaml                   |   7 +
 docs/crucible-first-cut.md    |  82 ++++++
 playbooks/verified-logic.yaml |  47 ++++
 4 files changed, 595 insertions(+)
 create mode 100644 bin/crucible_mcp_server.py
 create mode 100644 docs/crucible-first-cut.md
 create mode 100644 playbooks/verified-logic.yaml

diff --git a/bin/crucible_mcp_server.py b/bin/crucible_mcp_server.py
new file mode 100644
index 00000000..a4836ef9
--- /dev/null
+++ b/bin/crucible_mcp_server.py
@@ -0,0 +1,459 @@
+#!/usr/bin/env python3
+"""Z3-backed Crucible MCP server for Timmy.
+
+Sidecar-only. Lives in timmy-config, deploys into ~/.hermes/bin/, and is loaded
+by Hermes through native MCP tool discovery. No hermes-agent fork required.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+from mcp.server import FastMCP
+from z3 import And, Bool, Distinct, If, Implies, Int, Optimize, Or, Sum, sat, unsat
+
+mcp = FastMCP(
+    name="crucible",
+    instructions=(
+        "Formal verification sidecar for Timmy. Use these tools for scheduling, "
+        "dependency ordering, and resource/capacity feasibility. Return SAT/UNSAT "
+        "with witness models instead of fuzzy prose."
+    ),
+    dependencies=["z3-solver"],
+)
+
+
+def _hermes_home() -> Path:
+    return Path(os.path.expanduser(os.getenv("HERMES_HOME", "~/.hermes")))
+
+
+def _proof_dir() -> Path:
+    path = _hermes_home() / "logs" / "crucible"
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def _ts() -> str:
+    return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S_%fZ")
+
+
+def _json_default(value: Any) -> Any:
+    if isinstance(value, Path):
+        return str(value)
+    raise TypeError(f"Unsupported type for JSON serialization: {type(value)!r}")
+
+
+def _log_proof(tool_name: str, request: dict[str, Any], result: dict[str, Any]) -> str:
+    path = _proof_dir() / f"{_ts()}_{tool_name}.json"
+    payload = {
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "tool": tool_name,
+        "request": request,
+        "result": result,
+    }
+    path.write_text(json.dumps(payload, indent=2, default=_json_default))
+    return str(path)
+
+
+def _ensure_unique(names: list[str], label: str) -> None:
+    if len(set(names)) != len(names):
+        raise ValueError(f"Duplicate {label} names are not allowed: {names}")
+
+
+def _normalize_dependency(dep: Any) -> tuple[str, str, int]:
+    if isinstance(dep, dict):
+        before = dep.get("before")
+        after = dep.get("after")
+        lag = int(dep.get("lag", 0))
+        if not before or not after:
+            raise ValueError(f"Dependency dict must include before/after: {dep!r}")
+        return str(before), str(after), lag
+    if isinstance(dep, (list, tuple)) and len(dep) in (2, 3):
+        before = str(dep[0])
+        after = str(dep[1])
+        lag = int(dep[2]) if len(dep) == 3 else 0
+        return before, after, lag
+    raise ValueError(f"Unsupported dependency shape: {dep!r}")
+
+
+def _normalize_task(task: dict[str, Any]) -> dict[str, Any]:
+    name = str(task["name"])
+    duration = int(task["duration"])
+    if duration <= 0:
+        raise ValueError(f"Task duration must be positive: {task!r}")
+    return {"name": name, "duration": duration}
+
+
+def _normalize_item(item: dict[str, Any]) -> dict[str, Any]:
+    name = str(item["name"])
+    amount = int(item["amount"])
+    value = int(item.get("value", amount))
+    required = bool(item.get("required", False))
+    if amount < 0:
+        raise ValueError(f"Item amount must be non-negative: {item!r}")
+    return {
+        "name": name,
+        "amount": amount,
+        "value": value,
+        "required": required,
+    }
+
+
+def solve_schedule_tasks(
+    tasks: list[dict[str, Any]],
+    horizon: int,
+    dependencies: list[Any] | None = None,
+    fixed_starts: dict[str, int] | None = None,
+    max_parallel_tasks: int = 1,
+    minimize_makespan: bool = True,
+) -> dict[str, Any]:
+    tasks = [_normalize_task(task) for task in tasks]
+    dependencies = dependencies or []
+    fixed_starts = fixed_starts or {}
+    horizon = int(horizon)
+    max_parallel_tasks = int(max_parallel_tasks)
+
+    if horizon <= 0:
+        raise ValueError("horizon must be positive")
+    if max_parallel_tasks <= 0:
+        raise ValueError("max_parallel_tasks must be positive")
+
+    names = [task["name"] for task in tasks]
+    _ensure_unique(names, "task")
+    durations = {task["name"]: task["duration"] for task in tasks}
+
+    opt = Optimize()
+    start = {name: Int(f"start_{name}") for name in names}
+    end = {name: Int(f"end_{name}") for name in names}
+    makespan = Int("makespan")
+
+    for name in names:
+        opt.add(start[name] >= 0)
+        opt.add(end[name] == start[name] + durations[name])
+        opt.add(end[name] <= horizon)
+        if name in fixed_starts:
+            opt.add(start[name] == int(fixed_starts[name]))
+
+    for dep in dependencies:
+        before, after, lag = _normalize_dependency(dep)
+        if before not in start or after not in start:
+            raise ValueError(f"Unknown task in dependency {dep!r}")
+        opt.add(start[after] >= end[before] + lag)
+
+    # Discrete resource capacity over integer time slots.
+    for t in range(horizon):
+        active = [If(And(start[name] <= t, t < end[name]), 1, 0) for name in names]
+        opt.add(Sum(active) <= max_parallel_tasks)
+
+    for name in names:
+        opt.add(makespan >= end[name])
+    if minimize_makespan:
+        opt.minimize(makespan)
+
+    result = opt.check()
+    proof: dict[str, Any]
+    if result == sat:
+        model = opt.model()
+        schedule = []
+        for name in sorted(names, key=lambda n: model.eval(start[n]).as_long()):
+            s = model.eval(start[name]).as_long()
+            e = model.eval(end[name]).as_long()
+            schedule.append({
+                "name": name,
+                "start": s,
+                "end": e,
+                "duration": durations[name],
+            })
+        proof = {
+            "status": "sat",
+            "summary": "Schedule proven feasible.",
+            "horizon": horizon,
+            "max_parallel_tasks": max_parallel_tasks,
+            "makespan": model.eval(makespan).as_long(),
+            "schedule": schedule,
+            "dependencies": [
+                {"before": b, "after": a, "lag": lag}
+                for b, a, lag in (_normalize_dependency(dep) for dep in dependencies)
+            ],
+        }
+    elif result == unsat:
+        proof = {
+            "status": "unsat",
+            "summary": "Schedule is impossible under the given horizon/dependency/capacity constraints.",
+            "horizon": horizon,
+            "max_parallel_tasks": max_parallel_tasks,
+            "dependencies": [
+                {"before": b, "after": a, "lag": lag}
+                for b, a, lag in (_normalize_dependency(dep) for dep in dependencies)
+            ],
+        }
+    else:
+        proof = {
+            "status": "unknown",
+            "summary": "Solver could not prove SAT or UNSAT for this schedule.",
+            "horizon": horizon,
+            "max_parallel_tasks": max_parallel_tasks,
+        }
+
+    proof["proof_log"] = _log_proof(
+        "schedule_tasks",
+        {
+            "tasks": tasks,
+            "horizon": horizon,
+            "dependencies": dependencies,
+            "fixed_starts": fixed_starts,
+            "max_parallel_tasks": max_parallel_tasks,
+            "minimize_makespan": minimize_makespan,
+        },
+        proof,
+    )
+    return proof
+
+
+def solve_dependency_order(
+    entities: list[str],
+    before: list[Any],
+    fixed_positions: dict[str, int] | None = None,
+) -> dict[str, Any]:
+    entities = [str(entity) for entity in entities]
+    fixed_positions = fixed_positions or {}
+    _ensure_unique(entities, "entity")
+
+    opt = Optimize()
+    pos = {entity: Int(f"pos_{entity}") for entity in entities}
+    opt.add(Distinct(*pos.values()))
+    for entity in entities:
+        opt.add(pos[entity] >= 0)
+        opt.add(pos[entity] < len(entities))
+        if entity in fixed_positions:
+            opt.add(pos[entity] == int(fixed_positions[entity]))
+
+    normalized = []
+    for dep in before:
+        left, right, _lag = _normalize_dependency(dep)
+        if left not in pos or right not in pos:
+            raise ValueError(f"Unknown entity in ordering constraint: {dep!r}")
+        opt.add(pos[left] < pos[right])
+        normalized.append({"before": left, "after": right})
+
+    result = opt.check()
+    if result == sat:
+        model = opt.model()
+        ordering = sorted(entities, key=lambda entity: model.eval(pos[entity]).as_long())
+        proof = {
+            "status": "sat",
+            "summary": "Dependency ordering is consistent.",
+            "ordering": ordering,
+            "positions": {entity: model.eval(pos[entity]).as_long() for entity in entities},
+            "constraints": normalized,
+        }
+    elif result == unsat:
+        proof = {
+            "status": "unsat",
+            "summary": "Dependency ordering contains a contradiction/cycle.",
+            "constraints": normalized,
+        }
+    else:
+        proof = {
+            "status": "unknown",
+            "summary": "Solver could not prove SAT or UNSAT for this dependency graph.",
+            "constraints": normalized,
+        }
+
+    proof["proof_log"] = _log_proof(
+        "order_dependencies",
+        {
+            "entities": entities,
+            "before": before,
+            "fixed_positions": fixed_positions,
+        },
+        proof,
+    )
+    return proof
+
+
+def solve_capacity_fit(
+    items: list[dict[str, Any]],
+    capacity: int,
+    maximize_value: bool = True,
+) -> dict[str, Any]:
+    items = [_normalize_item(item) for item in items]
+    capacity = int(capacity)
+    if capacity < 0:
+        raise ValueError("capacity must be non-negative")
+
+    names = [item["name"] for item in items]
+    _ensure_unique(names, "item")
+    choose = {item["name"]: Bool(f"choose_{item['name']}") for item in items}
+
+    opt = Optimize()
+    for item in items:
+        if item["required"]:
+            opt.add(choose[item["name"]])
+
+    total_amount = Sum([If(choose[item["name"]], item["amount"], 0) for item in items])
+    total_value = Sum([If(choose[item["name"]], item["value"], 0) for item in items])
+    opt.add(total_amount <= capacity)
+    if maximize_value:
+        opt.maximize(total_value)
+
+    result = opt.check()
+    if result == sat:
+        model = opt.model()
+        chosen = [item for item in items if bool(model.eval(choose[item["name"]], model_completion=True))]
+        skipped = [item for item in items if item not in chosen]
+        used = sum(item["amount"] for item in chosen)
+        proof = {
+            "status": "sat",
+            "summary": "Capacity constraints are feasible.",
+            "capacity": capacity,
+            "used": used,
+            "remaining": capacity - used,
+            "chosen": chosen,
+            "skipped": skipped,
+            "total_value": sum(item["value"] for item in chosen),
+        }
+    elif result == unsat:
+        proof = {
+            "status": "unsat",
+            "summary": "Required items exceed available capacity.",
+            "capacity": capacity,
+            "required_items": [item for item in items if item["required"]],
+        }
+    else:
+        proof = {
+            "status": "unknown",
+            "summary": "Solver could not prove SAT or UNSAT for this capacity check.",
+            "capacity": capacity,
+        }
+
+    proof["proof_log"] = _log_proof(
+        "capacity_fit",
+        {
+            "items": items,
+            "capacity": capacity,
+            "maximize_value": maximize_value,
+        },
+        proof,
+    )
+    return proof
+
+
+@mcp.tool(
+    name="schedule_tasks",
+    description=(
+        "Crucible template for discrete scheduling. Proves whether integer-duration "
+        "tasks fit within a time horizon under dependency and parallelism constraints."
+    ),
+    structured_output=True,
+)
+def schedule_tasks(
+    tasks: list[dict[str, Any]],
+    horizon: int,
+    dependencies: list[Any] | None = None,
+    fixed_starts: dict[str, int] | None = None,
+    max_parallel_tasks: int = 1,
+    minimize_makespan: bool = True,
+) -> dict[str, Any]:
+    return solve_schedule_tasks(
+        tasks=tasks,
+        horizon=horizon,
+        dependencies=dependencies,
+        fixed_starts=fixed_starts,
+        max_parallel_tasks=max_parallel_tasks,
+        minimize_makespan=minimize_makespan,
+    )
+
+
+@mcp.tool(
+    name="order_dependencies",
+    description=(
+        "Crucible template for dependency ordering. Proves whether a set of before/after "
+        "constraints is consistent and returns a valid topological order when SAT."
+    ),
+    structured_output=True,
+)
+def order_dependencies(
+    entities: list[str],
+    before: list[Any],
+    fixed_positions: dict[str, int] | None = None,
+) -> dict[str, Any]:
+    return solve_dependency_order(
+        entities=entities,
+        before=before,
+        fixed_positions=fixed_positions,
+    )
+
+
+@mcp.tool(
+    name="capacity_fit",
+    description=(
+        "Crucible template for resource capacity. Proves whether required items fit "
+        "within a capacity budget and chooses an optimal feasible subset of optional items."
+    ),
+    structured_output=True,
+)
+def capacity_fit(
+    items: list[dict[str, Any]],
+    capacity: int,
+    maximize_value: bool = True,
+) -> dict[str, Any]:
+    return solve_capacity_fit(items=items, capacity=capacity, maximize_value=maximize_value)
+
+
+def run_selftest() -> dict[str, Any]:
+    return {
+        "schedule_unsat_single_worker": solve_schedule_tasks(
+            tasks=[
+                {"name": "A", "duration": 2},
+                {"name": "B", "duration": 3},
+                {"name": "C", "duration": 4},
+            ],
+            horizon=8,
+            dependencies=[{"before": "A", "after": "B"}],
+            max_parallel_tasks=1,
+        ),
+        "schedule_sat_two_workers": solve_schedule_tasks(
+            tasks=[
+                {"name": "A", "duration": 2},
+                {"name": "B", "duration": 3},
+                {"name": "C", "duration": 4},
+            ],
+            horizon=8,
+            dependencies=[{"before": "A", "after": "B"}],
+            max_parallel_tasks=2,
+        ),
+        "ordering_sat": solve_dependency_order(
+            entities=["fetch", "train", "eval"],
+            before=[
+                {"before": "fetch", "after": "train"},
+                {"before": "train", "after": "eval"},
+            ],
+        ),
+        "capacity_sat": solve_capacity_fit(
+            items=[
+                {"name": "gpu_job", "amount": 6, "value": 6, "required": True},
+                {"name": "telemetry", "amount": 1, "value": 1, "required": True},
+                {"name": "export", "amount": 2, "value": 4, "required": False},
+                {"name": "viz", "amount": 3, "value": 5, "required": False},
+            ],
+            capacity=8,
+        ),
+    }
+
+
+def main() -> int:
+    if len(sys.argv) > 1 and sys.argv[1] == "selftest":
+        print(json.dumps(run_selftest(), indent=2))
+        return 0
+    mcp.run(transport="stdio")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/config.yaml b/config.yaml
index 553014c3..347c8377 100644
--- a/config.yaml
+++ b/config.yaml
@@ -214,6 +214,13 @@ mcp_servers:
     - /Users/apayne/.timmy/morrowind/mcp_server.py
     env: {}
     timeout: 30
+  crucible:
+    command: /Users/apayne/.hermes/hermes-agent/venv/bin/python3
+    args:
+    - /Users/apayne/.hermes/bin/crucible_mcp_server.py
+    env: {}
+    timeout: 120
+    connect_timeout: 60
 fallback_model:
   provider: ollama
   model: hermes3:latest
diff --git a/docs/crucible-first-cut.md b/docs/crucible-first-cut.md
new file mode 100644
index 00000000..2a7a19a0
--- /dev/null
+++ b/docs/crucible-first-cut.md
@@ -0,0 +1,82 @@
+# Crucible First Cut
+
+This is the first narrow neuro-symbolic slice for Timmy.
+
+## Goal
+
+Prove constraint logic instead of bluffing through it.
+
+## Shape
+
+The Crucible is a sidecar MCP server that lives in `timmy-config` and deploys into `~/.hermes/bin/`.
+It is loaded by Hermes through native MCP discovery. No Hermes fork.
+
+## Templates shipped in v0
+
+### 1. schedule_tasks
+Use for:
+- deadline feasibility
+- task ordering with dependencies
+- small integer scheduling windows
+
+Inputs:
+- `tasks`: `[{name, duration}]`
+- `horizon`: integer window size
+- `dependencies`: `[{before, after, lag?}]`
+- `max_parallel_tasks`: integer worker count
+
+Outputs:
+- `status: sat|unsat|unknown`
+- witness schedule when SAT
+- proof log path
+
+### 2. order_dependencies
+Use for:
+- topological ordering
+- cycle detection
+- dependency consistency checks
+
+Inputs:
+- `entities`
+- `before`
+- optional `fixed_positions`
+
+Outputs:
+- valid ordering when SAT
+- contradiction when UNSAT
+- proof log path
+
+### 3. capacity_fit
+Use for:
+- resource budgeting
+- optional-vs-required work selection
+- capacity feasibility
+
+Inputs:
+- `items: [{name, amount, value?, required?}]`
+- `capacity`
+
+Outputs:
+- chosen feasible subset when SAT
+- contradiction when required load exceeds capacity
+- proof log path
+
+## Demo
+
+Run locally:
+
+```bash
+~/.hermes/hermes-agent/venv/bin/python ~/.hermes/bin/crucible_mcp_server.py selftest
+```
+
+This produces:
+- one UNSAT schedule proof
+- one SAT schedule proof
+- one SAT dependency ordering proof
+- one SAT capacity proof
+
+## Scope guardrails
+
+Do not force every answer through the Crucible.
+Use it when the task is genuinely constraint-shaped.
+If the problem does not fit one of the templates, say so plainly.
diff --git a/playbooks/verified-logic.yaml b/playbooks/verified-logic.yaml
new file mode 100644
index 00000000..c9773d38
--- /dev/null
+++ b/playbooks/verified-logic.yaml
@@ -0,0 +1,47 @@
+name: verified-logic
+description: >
+  Crucible-first playbook for tasks that require proof instead of plausible prose.
+  Use Z3-backed sidecar tools for scheduling, dependency ordering, capacity checks,
+  and consistency verification.
+
+model:
+  preferred: claude-opus-4-6
+  fallback: claude-sonnet-4-20250514
+  max_turns: 12
+  temperature: 0.1
+
+tools:
+  - mcp_crucible_schedule_tasks
+  - mcp_crucible_order_dependencies
+  - mcp_crucible_capacity_fit
+
+trigger:
+  manual: true
+
+steps:
+  - classify_problem
+  - choose_template
+  - translate_into_constraints
+  - verify_with_crucible
+  - report_sat_unsat_with_witness
+
+output: verified_result
+timeout_minutes: 5
+
+system_prompt: |
+  You are running the Crucible playbook.
+
+  Use this playbook for:
+  - scheduling and deadline feasibility
+  - dependency ordering and cycle checks
+  - capacity / resource allocation constraints
+  - consistency checks where a contradiction matters
+
+  RULES:
+  1. Do not bluff through logic.
+  2. Pick the narrowest Crucible template that fits the task.
+  3. Translate the user's question into structured constraints.
+  4. Call the Crucible tool.
+  5. If SAT, report the witness model clearly.
+  6. If UNSAT, say the constraints are impossible and explain which shape of constraint caused the contradiction.
+  7. If the task is not a good fit for these templates, say so plainly instead of pretending it was verified.