Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
ed4c5da3cb fix: closes #865
Some checks failed
CI / test (pull_request) Failing after 8s
CI / validate (pull_request) Failing after 13s
Review Approval Gate / verify-review (pull_request) Failing after 3s
2026-04-12 19:28:17 -04:00
3 changed files with 16 additions and 41 deletions

View File

@@ -586,8 +586,8 @@ def alert_on_failure(report: HealthReport, dry_run: bool = False) -> None:
logger.info("Created alert issue #%d", result["number"])
def run_once(args: argparse.Namespace) -> bool:
"""Run one health check cycle. Returns True if healthy."""
def run_once(args: argparse.Namespace) -> tuple:
"""Run one health check cycle. Returns (healthy, report)."""
report = run_health_checks(
ws_host=args.ws_host,
ws_port=args.ws_port,
@@ -615,7 +615,7 @@ def run_once(args: argparse.Namespace) -> bool:
except Exception:
pass # never crash the watchdog over its own heartbeat
return report.overall_healthy
return report.overall_healthy, report
def main():
@@ -678,21 +678,15 @@ def main():
signal.signal(signal.SIGINT, _handle_sigterm)
while _running:
run_once(args)
run_once(args) # (healthy, report) — not needed in watch mode
for _ in range(args.interval):
if not _running:
break
time.sleep(1)
else:
healthy = run_once(args)
healthy, report = run_once(args)
if args.output_json:
report = run_health_checks(
ws_host=args.ws_host,
ws_port=args.ws_port,
heartbeat_path=Path(args.heartbeat_path),
stale_threshold=args.stale_threshold,
)
print(json.dumps({
"healthy": report.overall_healthy,
"timestamp": report.timestamp,

View File

@@ -7,7 +7,6 @@ routes to lanes, and spawns one-shot mimo-v2-pro workers.
No new issues created. No duplicate claims. No bloat.
"""
import glob
import json
import os
import sys
@@ -39,7 +38,6 @@ else:
CLAIM_TIMEOUT_MINUTES = 30
CLAIM_LABEL = "mimo-claimed"
MAX_QUEUE_DEPTH = 10 # Don't dispatch if queue already has this many prompts
CLAIM_COMMENT = "/claim"
DONE_COMMENT = "/done"
ABANDON_COMMENT = "/abandon"
@@ -453,13 +451,6 @@ def dispatch(token):
prefetch_pr_refs(target_repo, token)
log(f" Prefetched {len(_PR_REFS)} PR references")
# Check queue depth — don't pile up if workers haven't caught up
pending_prompts = len(glob.glob(os.path.join(STATE_DIR, "prompt-*.txt")))
if pending_prompts >= MAX_QUEUE_DEPTH:
log(f" QUEUE THROTTLE: {pending_prompts} prompts pending (max {MAX_QUEUE_DEPTH}) — skipping dispatch")
save_state(state)
return 0
# FOCUS MODE: scan only the focus repo. FIREHOSE: scan all.
if FOCUS_MODE:
ordered = [FOCUS_REPO]

View File

@@ -24,23 +24,6 @@ def log(msg):
f.write(f"[{ts}] {msg}\n")
def write_result(worker_id, status, repo=None, issue=None, branch=None, pr=None, error=None):
"""Write a result file — always, even on failure."""
result_file = os.path.join(STATE_DIR, f"result-{worker_id}.json")
data = {
"status": status,
"worker": worker_id,
"timestamp": datetime.now(timezone.utc).isoformat(),
}
if repo: data["repo"] = repo
if issue: data["issue"] = int(issue) if str(issue).isdigit() else issue
if branch: data["branch"] = branch
if pr: data["pr"] = pr
if error: data["error"] = error
with open(result_file, "w") as f:
json.dump(data, f)
def get_oldest_prompt():
"""Get the oldest prompt file with file locking (atomic rename)."""
prompts = sorted(glob.glob(os.path.join(STATE_DIR, "prompt-*.txt")))
@@ -80,7 +63,6 @@ def run_worker(prompt_file):
if not repo or not issue:
log(f" SKIPPING: couldn't parse repo/issue from prompt")
write_result(worker_id, "parse_error", error="could not parse repo/issue from prompt")
os.remove(prompt_file)
return False
@@ -97,7 +79,6 @@ def run_worker(prompt_file):
)
if result.returncode != 0:
log(f" CLONE FAILED: {result.stderr[:200]}")
write_result(worker_id, "clone_failed", repo=repo, issue=issue, error=result.stderr[:200])
os.remove(prompt_file)
return False
@@ -145,7 +126,6 @@ def run_worker(prompt_file):
urllib.request.urlopen(req, timeout=10)
except:
pass
write_result(worker_id, "abandoned", repo=repo, issue=issue, error="no changes produced")
if os.path.exists(prompt_file):
os.remove(prompt_file)
return False
@@ -213,7 +193,17 @@ def run_worker(prompt_file):
pr_num = "?"
# Write result
write_result(worker_id, "completed", repo=repo, issue=issue, branch=branch, pr=pr_num)
result_file = os.path.join(STATE_DIR, f"result-{worker_id}.json")
with open(result_file, "w") as f:
json.dump({
"status": "completed",
"worker": worker_id,
"repo": repo,
"issue": int(issue) if issue.isdigit() else issue,
"branch": branch,
"pr": pr_num,
"timestamp": datetime.now(timezone.utc).isoformat()
}, f)
# Remove prompt
# Remove prompt file (handles .processing extension)