Compare commits
1 Commits
mimo/code/
...
mimo/build
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ed4c5da3cb |
@@ -586,8 +586,8 @@ def alert_on_failure(report: HealthReport, dry_run: bool = False) -> None:
|
||||
logger.info("Created alert issue #%d", result["number"])
|
||||
|
||||
|
||||
def run_once(args: argparse.Namespace) -> bool:
|
||||
"""Run one health check cycle. Returns True if healthy."""
|
||||
def run_once(args: argparse.Namespace) -> tuple:
|
||||
"""Run one health check cycle. Returns (healthy, report)."""
|
||||
report = run_health_checks(
|
||||
ws_host=args.ws_host,
|
||||
ws_port=args.ws_port,
|
||||
@@ -615,7 +615,7 @@ def run_once(args: argparse.Namespace) -> bool:
|
||||
except Exception:
|
||||
pass # never crash the watchdog over its own heartbeat
|
||||
|
||||
return report.overall_healthy
|
||||
return report.overall_healthy, report
|
||||
|
||||
|
||||
def main():
|
||||
@@ -678,21 +678,15 @@ def main():
|
||||
signal.signal(signal.SIGINT, _handle_sigterm)
|
||||
|
||||
while _running:
|
||||
run_once(args)
|
||||
run_once(args) # (healthy, report) — not needed in watch mode
|
||||
for _ in range(args.interval):
|
||||
if not _running:
|
||||
break
|
||||
time.sleep(1)
|
||||
else:
|
||||
healthy = run_once(args)
|
||||
healthy, report = run_once(args)
|
||||
|
||||
if args.output_json:
|
||||
report = run_health_checks(
|
||||
ws_host=args.ws_host,
|
||||
ws_port=args.ws_port,
|
||||
heartbeat_path=Path(args.heartbeat_path),
|
||||
stale_threshold=args.stale_threshold,
|
||||
)
|
||||
print(json.dumps({
|
||||
"healthy": report.overall_healthy,
|
||||
"timestamp": report.timestamp,
|
||||
|
||||
@@ -7,7 +7,6 @@ routes to lanes, and spawns one-shot mimo-v2-pro workers.
|
||||
No new issues created. No duplicate claims. No bloat.
|
||||
"""
|
||||
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
@@ -39,7 +38,6 @@ else:
|
||||
|
||||
CLAIM_TIMEOUT_MINUTES = 30
|
||||
CLAIM_LABEL = "mimo-claimed"
|
||||
MAX_QUEUE_DEPTH = 10 # Don't dispatch if queue already has this many prompts
|
||||
CLAIM_COMMENT = "/claim"
|
||||
DONE_COMMENT = "/done"
|
||||
ABANDON_COMMENT = "/abandon"
|
||||
@@ -453,13 +451,6 @@ def dispatch(token):
|
||||
prefetch_pr_refs(target_repo, token)
|
||||
log(f" Prefetched {len(_PR_REFS)} PR references")
|
||||
|
||||
# Check queue depth — don't pile up if workers haven't caught up
|
||||
pending_prompts = len(glob.glob(os.path.join(STATE_DIR, "prompt-*.txt")))
|
||||
if pending_prompts >= MAX_QUEUE_DEPTH:
|
||||
log(f" QUEUE THROTTLE: {pending_prompts} prompts pending (max {MAX_QUEUE_DEPTH}) — skipping dispatch")
|
||||
save_state(state)
|
||||
return 0
|
||||
|
||||
# FOCUS MODE: scan only the focus repo. FIREHOSE: scan all.
|
||||
if FOCUS_MODE:
|
||||
ordered = [FOCUS_REPO]
|
||||
|
||||
@@ -24,23 +24,6 @@ def log(msg):
|
||||
f.write(f"[{ts}] {msg}\n")
|
||||
|
||||
|
||||
def write_result(worker_id, status, repo=None, issue=None, branch=None, pr=None, error=None):
|
||||
"""Write a result file — always, even on failure."""
|
||||
result_file = os.path.join(STATE_DIR, f"result-{worker_id}.json")
|
||||
data = {
|
||||
"status": status,
|
||||
"worker": worker_id,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
if repo: data["repo"] = repo
|
||||
if issue: data["issue"] = int(issue) if str(issue).isdigit() else issue
|
||||
if branch: data["branch"] = branch
|
||||
if pr: data["pr"] = pr
|
||||
if error: data["error"] = error
|
||||
with open(result_file, "w") as f:
|
||||
json.dump(data, f)
|
||||
|
||||
|
||||
def get_oldest_prompt():
|
||||
"""Get the oldest prompt file with file locking (atomic rename)."""
|
||||
prompts = sorted(glob.glob(os.path.join(STATE_DIR, "prompt-*.txt")))
|
||||
@@ -80,7 +63,6 @@ def run_worker(prompt_file):
|
||||
|
||||
if not repo or not issue:
|
||||
log(f" SKIPPING: couldn't parse repo/issue from prompt")
|
||||
write_result(worker_id, "parse_error", error="could not parse repo/issue from prompt")
|
||||
os.remove(prompt_file)
|
||||
return False
|
||||
|
||||
@@ -97,7 +79,6 @@ def run_worker(prompt_file):
|
||||
)
|
||||
if result.returncode != 0:
|
||||
log(f" CLONE FAILED: {result.stderr[:200]}")
|
||||
write_result(worker_id, "clone_failed", repo=repo, issue=issue, error=result.stderr[:200])
|
||||
os.remove(prompt_file)
|
||||
return False
|
||||
|
||||
@@ -145,7 +126,6 @@ def run_worker(prompt_file):
|
||||
urllib.request.urlopen(req, timeout=10)
|
||||
except:
|
||||
pass
|
||||
write_result(worker_id, "abandoned", repo=repo, issue=issue, error="no changes produced")
|
||||
if os.path.exists(prompt_file):
|
||||
os.remove(prompt_file)
|
||||
return False
|
||||
@@ -213,7 +193,17 @@ def run_worker(prompt_file):
|
||||
pr_num = "?"
|
||||
|
||||
# Write result
|
||||
write_result(worker_id, "completed", repo=repo, issue=issue, branch=branch, pr=pr_num)
|
||||
result_file = os.path.join(STATE_DIR, f"result-{worker_id}.json")
|
||||
with open(result_file, "w") as f:
|
||||
json.dump({
|
||||
"status": "completed",
|
||||
"worker": worker_id,
|
||||
"repo": repo,
|
||||
"issue": int(issue) if issue.isdigit() else issue,
|
||||
"branch": branch,
|
||||
"pr": pr_num,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat()
|
||||
}, f)
|
||||
|
||||
# Remove prompt
|
||||
# Remove prompt file (handles .processing extension)
|
||||
|
||||
Reference in New Issue
Block a user