Compare commits
7 Commits
v7.0.0
...
epic-999-p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
735118f21d | ||
|
|
2da4a3d937 | ||
|
|
9a5a299724 | ||
|
|
e5b844af3a | ||
|
|
ab7fd52ae3 | ||
|
|
c266661bff | ||
|
|
5f1cdfc9e4 |
@@ -1,2 +0,0 @@
|
||||
{"created_at_ms":1775533542734,"session_id":"session-1775533542734-0","type":"session_meta","updated_at_ms":1775533542734,"version":1}
|
||||
{"message":{"blocks":[{"text":"You are Code Claw running as the Gitea user claw-code.\n\nRepository: Timmy_Foundation/hermes-agent\nIssue: #126 — P2: Validate Documentation Audit & Apply to Our Fork\nBranch: claw-code/issue-126\n\nRead the issue and recent comments, then implement the smallest correct change.\nYou are in a git repo checkout already.\n\nIssue body:\n## Context\n\nCommit `43d468ce` is a comprehensive documentation audit — fixes stale info, expands thin pages, adds depth across all docs.\n\n## Acceptance Criteria\n\n- [ ] **Catalog all doc changes**: Run `git show 43d468ce --stat` to list all files changed, then review each for what was fixed/expanded\n- [ ] **Verify key docs are accurate**: Pick 3 docs that were previously thin (setup, deployment, plugin development), confirm they now have comprehensive content\n- [ ] **Identify stale info that was corrected**: Note at least 3 pieces of stale information that were removed or updated\n- [ ] **Apply fixes to our fork if needed**: Check if any of the doc fixes apply to our `Timmy_Foundation/hermes-agent` fork (Timmy-specific references, custom config sections)\n\n## Why This Matters\n\nAccurate documentation is critical for onboarding new agents and maintaining the fleet. Stale docs cost more debugging time than writing them initially.\n\n## Hints\n\n- Run `cd ~/.hermes/hermes-agent && git show 43d468ce --stat` to see the full scope\n- The docs likely cover: setup, plugins, deployment, MCP configuration, and tool integrations\n\n\nParent: #111\n\nRecent comments:\n## 🏷️ Automated Triage Check\n\n**Timestamp:** 2026-04-06T15:30:12.449023 \n**Agent:** Allegro Heartbeat\n\nThis issue has been identified as needing triage:\n\n### Checklist\n- [ ] Clear acceptance criteria defined\n- [ ] Priority label assigned (p0-critical / p1-important / p2-backlog)\n- [ ] Size estimate added (quick-fix / day / week / epic)\n- [ ] Owner assigned\n- [ ] Related issues linked\n\n### Context\n- No comments yet — needs engagement\n- No labels — needs categorization\n- Part of automated backlog maintenance\n\n---\n*Automated triage from Allegro 15-minute heartbeat*\n\n[BURN-DOWN] Dispatched to Code Claw (claw-code worker) as part of nightly burn-down cycle. Heartbeat active.\n\n🟠 Code Claw (OpenRouter qwen/qwen3.6-plus:free) picking up this issue via 15-minute heartbeat.\n\nTimestamp: 2026-04-07T03:45:37Z\n\nRules:\n- Make focused code/config/doc changes only if they directly address the issue.\n- Prefer the smallest proof-oriented fix.\n- Run relevant verification commands if obvious.\n- Do NOT create PRs yourself; the outer worker handles commit/push/PR.\n- If the task is too large or not code-fit, leave the tree unchanged.\n","type":"text"}],"role":"user"},"type":"message"}
|
||||
@@ -1,2 +0,0 @@
|
||||
{"created_at_ms":1775534636684,"session_id":"session-1775534636684-0","type":"session_meta","updated_at_ms":1775534636684,"version":1}
|
||||
{"message":{"blocks":[{"text":"You are Code Claw running as the Gitea user claw-code.\n\nRepository: Timmy_Foundation/hermes-agent\nIssue: #151 — [CONFIG] Add Kimi model to fallback chain for Allegro and Bezalel\nBranch: claw-code/issue-151\n\nRead the issue and recent comments, then implement the smallest correct change.\nYou are in a git repo checkout already.\n\nIssue body:\n## Problem\nAllegro and Bezalel are choking because the Kimi model code is not on their fallback chain. When primary models fail or rate-limit, Kimi should be available as a fallback option but is currently missing.\n\n## Expected Behavior\nKimi model code should be at the front of the fallback chain for both Allegro and Bezalel, so they can remain responsive when primary models are unavailable.\n\n## Context\nThis was reported in Telegram by Alexander Whitestone after observing both agents becoming unresponsive. Ezra was asked to investigate the fallback chain configuration.\n\n## Related\n- timmy-config #302: [ARCH] Fallback Portfolio Runtime Wiring (general fallback framework)\n- hermes-agent #150: [BEZALEL][AUDIT] Telegram Request-to-Gitea Tracking Audit\n\n## Acceptance Criteria\n- [ ] Kimi model code is added to Allegro fallback chain\n- [ ] Kimi model code is added to Bezalel fallback chain\n- [ ] Fallback ordering places Kimi appropriately (front of chain as requested)\n- [ ] Test and confirm both agents can successfully fall back to Kimi\n- [ ] Document the fallback chain configuration for both agents\n\n/assign @ezra\n\nRecent comments:\n[BURN-DOWN] Dispatched to Code Claw (claw-code worker) as part of nightly burn-down cycle. Heartbeat active.\n\n🟠 Code Claw (OpenRouter qwen/qwen3.6-plus:free) picking up this issue via 15-minute heartbeat.\n\nTimestamp: 2026-04-07T04:03:49Z\n\nRules:\n- Make focused code/config/doc changes only if they directly address the issue.\n- Prefer the smallest proof-oriented fix.\n- Run relevant verification commands if obvious.\n- Do NOT create PRs yourself; the outer worker handles commit/push/PR.\n- If the task is too large or not code-fit, leave the tree unchanged.\n","type":"text"}],"role":"user"},"type":"message"}
|
||||
51
.coveragerc
51
.coveragerc
@@ -1,51 +0,0 @@
|
||||
# Coverage configuration for hermes-agent
|
||||
# Run with: pytest --cov=agent --cov=tools --cov=gateway --cov=hermes_cli tests/
|
||||
|
||||
[run]
|
||||
source =
|
||||
agent
|
||||
tools
|
||||
gateway
|
||||
hermes_cli
|
||||
acp_adapter
|
||||
cron
|
||||
honcho_integration
|
||||
|
||||
omit =
|
||||
*/tests/*
|
||||
*/test_*
|
||||
*/__pycache__/*
|
||||
*/venv/*
|
||||
*/.venv/*
|
||||
setup.py
|
||||
conftest.py
|
||||
|
||||
branch = True
|
||||
|
||||
[report]
|
||||
exclude_lines =
|
||||
pragma: no cover
|
||||
def __repr__
|
||||
raise AssertionError
|
||||
raise NotImplementedError
|
||||
if __name__ == .__main__.:
|
||||
if TYPE_CHECKING:
|
||||
class .*\bProtocol\):
|
||||
@(abc\.)?abstractmethod
|
||||
|
||||
ignore_errors = True
|
||||
|
||||
precision = 2
|
||||
|
||||
fail_under = 70
|
||||
|
||||
show_missing = True
|
||||
skip_covered = False
|
||||
|
||||
[html]
|
||||
directory = coverage_html
|
||||
|
||||
title = Hermes Agent Coverage Report
|
||||
|
||||
[xml]
|
||||
output = coverage.xml
|
||||
10
.env.example
10
.env.example
@@ -14,16 +14,6 @@
|
||||
# LLM_MODEL is no longer read from .env — this line is kept for reference only.
|
||||
# LLM_MODEL=anthropic/claude-opus-4.6
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Google AI Studio / Gemini)
|
||||
# =============================================================================
|
||||
# Native Gemini API via Google's OpenAI-compatible endpoint.
|
||||
# Get your key at: https://aistudio.google.com/app/apikey
|
||||
# GOOGLE_API_KEY=your_google_ai_studio_key_here
|
||||
# GEMINI_API_KEY=your_gemini_key_here # alias for GOOGLE_API_KEY
|
||||
# Optional base URL override (default: Google's OpenAI-compatible endpoint)
|
||||
# GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (z.ai / GLM)
|
||||
# =============================================================================
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
name: Forge CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
concurrency:
|
||||
group: forge-ci-${{ gitea.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
smoke-and-build:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
enable-cache: true
|
||||
cache-dependency-glob: "uv.lock"
|
||||
|
||||
- name: Set up Python 3.11
|
||||
run: uv python install 3.11
|
||||
|
||||
- name: Install package
|
||||
run: |
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
|
||||
- name: Smoke tests
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python scripts/smoke_test.py
|
||||
env:
|
||||
OPENROUTER_API_KEY: ""
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
|
||||
- name: Syntax guard
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python scripts/syntax_guard.py
|
||||
|
||||
- name: Green-path E2E
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/test_green_path_e2e.py -q --tb=short
|
||||
env:
|
||||
OPENROUTER_API_KEY: ""
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
@@ -1,44 +0,0 @@
|
||||
name: Notebook CI
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- 'notebooks/**'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'notebooks/**'
|
||||
|
||||
jobs:
|
||||
notebook-smoke:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install papermill jupytext nbformat
|
||||
python -m ipykernel install --user --name python3
|
||||
|
||||
- name: Execute system health notebook
|
||||
run: |
|
||||
papermill notebooks/agent_task_system_health.ipynb /tmp/output.ipynb \
|
||||
-p threshold 0.5 \
|
||||
-p hostname ci-runner
|
||||
|
||||
- name: Verify output has results
|
||||
run: |
|
||||
python -c "
|
||||
import json
|
||||
nb = json.load(open('/tmp/output.ipynb'))
|
||||
code_cells = [c for c in nb['cells'] if c['cell_type'] == 'code']
|
||||
outputs = [c.get('outputs', []) for c in code_cells]
|
||||
total_outputs = sum(len(o) for o in outputs)
|
||||
assert total_outputs > 0, 'Notebook produced no outputs'
|
||||
print(f'Notebook executed successfully with {total_outputs} output(s)')
|
||||
"
|
||||
@@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Pre-commit hook wrapper for secret leak detection.
|
||||
#
|
||||
# Installation:
|
||||
# git config core.hooksPath .githooks
|
||||
#
|
||||
# To bypass temporarily:
|
||||
# git commit --no-verify
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
exec python3 "${SCRIPT_DIR}/pre-commit.py" "$@"
|
||||
@@ -1,327 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Pre-commit hook for detecting secret leaks in staged files.
|
||||
|
||||
Scans staged diffs and full file contents for common secret patterns,
|
||||
token file paths, private keys, and credential strings.
|
||||
|
||||
Installation:
|
||||
git config core.hooksPath .githooks
|
||||
|
||||
To bypass:
|
||||
git commit --no-verify
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Callable, Union
|
||||
|
||||
# ANSI color codes
|
||||
RED = "\033[0;31m"
|
||||
YELLOW = "\033[1;33m"
|
||||
GREEN = "\033[0;32m"
|
||||
NC = "\033[0m"
|
||||
|
||||
|
||||
class Finding:
|
||||
"""Represents a single secret leak finding."""
|
||||
|
||||
def __init__(self, filename: str, line: int, message: str) -> None:
|
||||
self.filename = filename
|
||||
self.line = line
|
||||
self.message = message
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Finding({self.filename!r}, {self.line}, {self.message!r})"
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if not isinstance(other, Finding):
|
||||
return NotImplemented
|
||||
return (
|
||||
self.filename == other.filename
|
||||
and self.line == other.line
|
||||
and self.message == other.message
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regex patterns
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_RE_SK_KEY = re.compile(r"sk-[a-zA-Z0-9]{20,}")
|
||||
_RE_BEARER = re.compile(r"Bearer\s+[a-zA-Z0-9_-]{20,}")
|
||||
|
||||
_RE_ENV_ASSIGN = re.compile(
|
||||
r"^(?:export\s+)?"
|
||||
r"(OPENAI_API_KEY|GITEA_TOKEN|ANTHROPIC_API_KEY|KIMI_API_KEY"
|
||||
r"|TELEGRAM_BOT_TOKEN|DISCORD_TOKEN)"
|
||||
r"\s*=\s*(.+)$"
|
||||
)
|
||||
|
||||
_RE_TOKEN_PATHS = re.compile(
|
||||
r'(?:^|["\'\s])'
|
||||
r"(\.(?:env)"
|
||||
r"|(?:secrets|keystore|credentials|token|api_keys)\.json"
|
||||
r"|~/\.hermes/credentials/"
|
||||
r"|/root/nostr-relay/keystore\.json)"
|
||||
)
|
||||
|
||||
_RE_PRIVATE_KEY = re.compile(
|
||||
r"-----BEGIN (PRIVATE KEY|RSA PRIVATE KEY|OPENSSH PRIVATE KEY)-----"
|
||||
)
|
||||
|
||||
_RE_URL_PASSWORD = re.compile(r"https?://[^:]+:[^@]+@")
|
||||
|
||||
_RE_RAW_TOKEN = re.compile(r'"token"\s*:\s*"([^"]{10,})"')
|
||||
_RE_RAW_API_KEY = re.compile(r'"api_key"\s*:\s*"([^"]{10,})"')
|
||||
|
||||
# Safe patterns (placeholders)
|
||||
_SAFE_ENV_VALUES = {
|
||||
"<YOUR_API_KEY>",
|
||||
"***",
|
||||
"REDACTED",
|
||||
"",
|
||||
}
|
||||
|
||||
_RE_DOC_EXAMPLE = re.compile(
|
||||
r"\b(?:example|documentation|doc|readme)\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
_RE_OS_ENVIRON = re.compile(r"os\.environ(?:\.get|\[)")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def is_binary_content(content: Union[str, bytes]) -> bool:
|
||||
"""Return True if content appears to be binary."""
|
||||
if isinstance(content, str):
|
||||
return False
|
||||
return b"\x00" in content
|
||||
|
||||
|
||||
def _looks_like_safe_env_line(line: str) -> bool:
|
||||
"""Check if a line is a safe env var read or reference."""
|
||||
if _RE_OS_ENVIRON.search(line):
|
||||
return True
|
||||
# Variable expansion like $OPENAI_API_KEY
|
||||
if re.search(r'\$\w+\s*$', line.strip()):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _is_placeholder(value: str) -> bool:
|
||||
"""Check if a value is a known placeholder or empty."""
|
||||
stripped = value.strip().strip('"').strip("'")
|
||||
if stripped in _SAFE_ENV_VALUES:
|
||||
return True
|
||||
# Single word references like $VAR
|
||||
if re.fullmatch(r"\$\w+", stripped):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _is_doc_or_example(line: str, value: str | None = None) -> bool:
|
||||
"""Check if line appears to be documentation or example code."""
|
||||
# If the line contains a placeholder value, it's likely documentation
|
||||
if value is not None and _is_placeholder(value):
|
||||
return True
|
||||
# If the line contains doc keywords and no actual secret-looking value
|
||||
if _RE_DOC_EXAMPLE.search(line):
|
||||
# For env assignments, if value is empty or placeholder
|
||||
m = _RE_ENV_ASSIGN.search(line)
|
||||
if m and _is_placeholder(m.group(2)):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scanning
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def scan_line(line: str, filename: str, line_no: int) -> Iterable[Finding]:
|
||||
"""Scan a single line for secret leak patterns."""
|
||||
stripped = line.rstrip("\n")
|
||||
if not stripped:
|
||||
return
|
||||
|
||||
# --- API keys ----------------------------------------------------------
|
||||
if _RE_SK_KEY.search(stripped):
|
||||
yield Finding(filename, line_no, "Potential API key (sk-...) found")
|
||||
return # One finding per line is enough
|
||||
|
||||
if _RE_BEARER.search(stripped):
|
||||
yield Finding(filename, line_no, "Potential Bearer token found")
|
||||
return
|
||||
|
||||
# --- Env var assignments -----------------------------------------------
|
||||
m = _RE_ENV_ASSIGN.search(stripped)
|
||||
if m:
|
||||
var_name = m.group(1)
|
||||
value = m.group(2)
|
||||
if _looks_like_safe_env_line(stripped):
|
||||
return
|
||||
if _is_doc_or_example(stripped, value):
|
||||
return
|
||||
if not _is_placeholder(value):
|
||||
yield Finding(
|
||||
filename,
|
||||
line_no,
|
||||
f"Potential secret assignment: {var_name}=...",
|
||||
)
|
||||
return
|
||||
|
||||
# --- Token file paths --------------------------------------------------
|
||||
if _RE_TOKEN_PATHS.search(stripped):
|
||||
yield Finding(filename, line_no, "Potential token file path found")
|
||||
return
|
||||
|
||||
# --- Private key blocks ------------------------------------------------
|
||||
if _RE_PRIVATE_KEY.search(stripped):
|
||||
yield Finding(filename, line_no, "Private key block found")
|
||||
return
|
||||
|
||||
# --- Passwords in URLs -------------------------------------------------
|
||||
if _RE_URL_PASSWORD.search(stripped):
|
||||
yield Finding(filename, line_no, "Password in URL found")
|
||||
return
|
||||
|
||||
# --- Raw token patterns ------------------------------------------------
|
||||
if _RE_RAW_TOKEN.search(stripped):
|
||||
yield Finding(filename, line_no, 'Raw "token" string with long value')
|
||||
return
|
||||
|
||||
if _RE_RAW_API_KEY.search(stripped):
|
||||
yield Finding(filename, line_no, 'Raw "api_key" string with long value')
|
||||
return
|
||||
|
||||
|
||||
def scan_content(content: Union[str, bytes], filename: str) -> List[Finding]:
|
||||
"""Scan full file content for secrets."""
|
||||
if isinstance(content, bytes):
|
||||
try:
|
||||
text = content.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
return []
|
||||
else:
|
||||
text = content
|
||||
|
||||
findings: List[Finding] = []
|
||||
for line_no, line in enumerate(text.splitlines(), start=1):
|
||||
findings.extend(scan_line(line, filename, line_no))
|
||||
return findings
|
||||
|
||||
|
||||
def scan_files(
|
||||
files: List[str],
|
||||
content_reader: Callable[[str], bytes],
|
||||
) -> List[Finding]:
|
||||
"""Scan a list of files using the provided content reader."""
|
||||
findings: List[Finding] = []
|
||||
for filepath in files:
|
||||
content = content_reader(filepath)
|
||||
if is_binary_content(content):
|
||||
continue
|
||||
findings.extend(scan_content(content, filepath))
|
||||
return findings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Git helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_staged_files() -> List[str]:
|
||||
"""Return a list of staged file paths (excluding deletions)."""
|
||||
result = subprocess.run(
|
||||
["git", "diff", "--cached", "--name-only", "--diff-filter=ACMR"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return []
|
||||
return [f for f in result.stdout.strip().split("\n") if f]
|
||||
|
||||
|
||||
def get_staged_diff() -> str:
|
||||
"""Return the diff of staged changes."""
|
||||
result = subprocess.run(
|
||||
["git", "diff", "--cached", "--no-color", "-U0"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return ""
|
||||
return result.stdout
|
||||
|
||||
|
||||
def get_file_content_at_staged(filepath: str) -> bytes:
|
||||
"""Return the staged content of a file."""
|
||||
result = subprocess.run(
|
||||
["git", "show", f":{filepath}"],
|
||||
capture_output=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return b""
|
||||
return result.stdout
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main() -> int:
|
||||
print(f"{GREEN}🔍 Scanning for secret leaks in staged files...{NC}")
|
||||
|
||||
staged_files = get_staged_files()
|
||||
if not staged_files:
|
||||
print(f"{GREEN}✓ No files staged for commit{NC}")
|
||||
return 0
|
||||
|
||||
# Scan both full staged file contents and the diff content
|
||||
findings = scan_files(staged_files, get_file_content_at_staged)
|
||||
|
||||
diff_text = get_staged_diff()
|
||||
if diff_text:
|
||||
for line_no, line in enumerate(diff_text.splitlines(), start=1):
|
||||
# Only scan added lines in the diff
|
||||
if line.startswith("+") and not line.startswith("+++"):
|
||||
findings.extend(scan_line(line[1:], "<diff>", line_no))
|
||||
|
||||
if not findings:
|
||||
print(f"{GREEN}✓ No potential secret leaks detected{NC}")
|
||||
return 0
|
||||
|
||||
print(f"{RED}✗ Potential secret leaks detected:{NC}\n")
|
||||
for finding in findings:
|
||||
loc = finding.filename
|
||||
print(
|
||||
f" {RED}[LEAK]{NC} {loc}:{finding.line} — {finding.message}"
|
||||
)
|
||||
|
||||
print()
|
||||
print(f"{RED}╔════════════════════════════════════════════════════════════╗{NC}")
|
||||
print(f"{RED}║ COMMIT BLOCKED: Potential secrets detected! ║{NC}")
|
||||
print(f"{RED}╚════════════════════════════════════════════════════════════╝{NC}")
|
||||
print()
|
||||
print("Recommendations:")
|
||||
print(" 1. Remove secrets from your code")
|
||||
print(" 2. Use environment variables or a secrets manager")
|
||||
print(" 3. Add sensitive files to .gitignore")
|
||||
print(" 4. Rotate any exposed credentials immediately")
|
||||
print()
|
||||
print("If you are CERTAIN this is a false positive, you can bypass:")
|
||||
print(" git commit --no-verify")
|
||||
print()
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
13
.github/CODEOWNERS
vendored
13
.github/CODEOWNERS
vendored
@@ -1,13 +0,0 @@
|
||||
# Default owners for all files
|
||||
* @Timmy
|
||||
|
||||
# Critical paths require explicit review
|
||||
/gateway/ @Timmy
|
||||
/tools/ @Timmy
|
||||
/agent/ @Timmy
|
||||
/config/ @Timmy
|
||||
/scripts/ @Timmy
|
||||
/.github/workflows/ @Timmy
|
||||
/pyproject.toml @Timmy
|
||||
/requirements.txt @Timmy
|
||||
/Dockerfile @Timmy
|
||||
99
.github/ISSUE_TEMPLATE/security_pr_checklist.yml
vendored
99
.github/ISSUE_TEMPLATE/security_pr_checklist.yml
vendored
@@ -1,99 +0,0 @@
|
||||
name: "🔒 Security PR Checklist"
|
||||
description: "Use this when your PR touches authentication, file I/O, external API calls, or other sensitive paths."
|
||||
title: "[Security Review]: "
|
||||
labels: ["security", "needs-review"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Security Pre-Merge Review
|
||||
Complete this checklist before requesting review on PRs that touch **authentication, file I/O, external API calls, or secrets handling**.
|
||||
|
||||
- type: input
|
||||
id: pr-link
|
||||
attributes:
|
||||
label: Pull Request
|
||||
description: Link to the PR being reviewed
|
||||
placeholder: "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/XXX"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: change-type
|
||||
attributes:
|
||||
label: Change Category
|
||||
description: What kind of sensitive change does this PR make?
|
||||
multiple: true
|
||||
options:
|
||||
- Authentication / Authorization
|
||||
- File I/O (read/write/delete)
|
||||
- External API calls (outbound HTTP/network)
|
||||
- Secret / credential handling
|
||||
- Command execution (subprocess/shell)
|
||||
- Dependency addition or update
|
||||
- Configuration changes
|
||||
- CI/CD pipeline changes
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: checkboxes
|
||||
id: secrets-checklist
|
||||
attributes:
|
||||
label: Secrets & Credentials
|
||||
options:
|
||||
- label: No secrets, API keys, or credentials are hardcoded
|
||||
required: true
|
||||
- label: All sensitive values are loaded from environment variables or a secrets manager
|
||||
required: true
|
||||
- label: Test fixtures use fake/placeholder values, not real credentials
|
||||
required: true
|
||||
|
||||
- type: checkboxes
|
||||
id: input-validation-checklist
|
||||
attributes:
|
||||
label: Input Validation
|
||||
options:
|
||||
- label: All external input (user, API, file) is validated before use
|
||||
required: true
|
||||
- label: File paths are validated against path traversal (`../`, null bytes, absolute paths)
|
||||
- label: URLs are validated for SSRF (blocked private/metadata IPs)
|
||||
- label: Shell commands do not use `shell=True` with user-controlled input
|
||||
|
||||
- type: checkboxes
|
||||
id: auth-checklist
|
||||
attributes:
|
||||
label: Authentication & Authorization (if applicable)
|
||||
options:
|
||||
- label: Authentication tokens are not logged or exposed in error messages
|
||||
- label: Authorization checks happen server-side, not just client-side
|
||||
- label: Session tokens are properly scoped and have expiry
|
||||
|
||||
- type: checkboxes
|
||||
id: supply-chain-checklist
|
||||
attributes:
|
||||
label: Supply Chain
|
||||
options:
|
||||
- label: New dependencies are pinned to a specific version range
|
||||
- label: Dependencies come from trusted sources (PyPI, npm, official repos)
|
||||
- label: No `.pth` files or install hooks that execute arbitrary code
|
||||
- label: "`pip-audit` passes (no known CVEs in added dependencies)"
|
||||
|
||||
- type: textarea
|
||||
id: threat-model
|
||||
attributes:
|
||||
label: Threat Model Notes
|
||||
description: |
|
||||
Briefly describe the attack surface this change introduces or modifies, and how it is mitigated.
|
||||
placeholder: |
|
||||
This PR adds a new outbound HTTP call to the OpenRouter API.
|
||||
Mitigation: URL is hardcoded (no user input), response is parsed with strict schema validation.
|
||||
|
||||
- type: textarea
|
||||
id: testing
|
||||
attributes:
|
||||
label: Security Testing Done
|
||||
description: What security testing did you perform?
|
||||
placeholder: |
|
||||
- Ran validate_security.py — all checks pass
|
||||
- Tested path traversal attempts manually
|
||||
- Verified no secrets in git diff
|
||||
83
.github/workflows/dependency-audit.yml
vendored
83
.github/workflows/dependency-audit.yml
vendored
@@ -1,83 +0,0 @@
|
||||
name: Dependency Audit
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'requirements.txt'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
schedule:
|
||||
- cron: '0 8 * * 1' # Weekly on Monday
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
audit:
|
||||
name: Audit Python dependencies
|
||||
runs-on: ubuntu-latest
|
||||
container: catthehacker/ubuntu:act-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: astral-sh/setup-uv@v5
|
||||
- name: Set up Python
|
||||
run: uv python install 3.11
|
||||
- name: Install pip-audit
|
||||
run: uv pip install --system pip-audit
|
||||
- name: Run pip-audit
|
||||
id: audit
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Run pip-audit against the lock file/requirements
|
||||
if pip-audit --requirement requirements.txt -f json -o /tmp/audit-results.json 2>/tmp/audit-stderr.txt; then
|
||||
echo "found=false" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "found=true" >> "$GITHUB_OUTPUT"
|
||||
# Check severity
|
||||
CRITICAL=$(python3 -c "
|
||||
import json, sys
|
||||
data = json.load(open('/tmp/audit-results.json'))
|
||||
vulns = data.get('dependencies', [])
|
||||
for d in vulns:
|
||||
for v in d.get('vulns', []):
|
||||
aliases = v.get('aliases', [])
|
||||
# Check for critical/high CVSS
|
||||
if any('CVSS' in str(a) for a in aliases):
|
||||
print('true')
|
||||
sys.exit(0)
|
||||
print('false')
|
||||
" 2>/dev/null || echo 'false')
|
||||
echo "critical=${CRITICAL}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
continue-on-error: true
|
||||
- name: Post results comment
|
||||
if: steps.audit.outputs.found == 'true' && github.event_name == 'pull_request'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
BODY="## ⚠️ Dependency Vulnerabilities Detected
|
||||
|
||||
\`pip-audit\` found vulnerable dependencies in this PR. Review and update before merging.
|
||||
|
||||
\`\`\`
|
||||
$(cat /tmp/audit-results.json | python3 -c "
|
||||
import json, sys
|
||||
data = json.load(sys.stdin)
|
||||
for dep in data.get('dependencies', []):
|
||||
for v in dep.get('vulns', []):
|
||||
print(f\" {dep['name']}=={dep['version']}: {v['id']} - {v.get('description', '')[:120]}\")
|
||||
" 2>/dev/null || cat /tmp/audit-stderr.txt)
|
||||
\`\`\`
|
||||
|
||||
---
|
||||
*Automated scan by [dependency-audit](/.github/workflows/dependency-audit.yml)*"
|
||||
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY"
|
||||
- name: Fail on vulnerabilities
|
||||
if: steps.audit.outputs.found == 'true'
|
||||
run: |
|
||||
echo "::error::Vulnerable dependencies detected. See PR comment for details."
|
||||
cat /tmp/audit-results.json | python3 -m json.tool || true
|
||||
exit 1
|
||||
1
.github/workflows/docs-site-checks.yml
vendored
1
.github/workflows/docs-site-checks.yml
vendored
@@ -10,7 +10,6 @@ on:
|
||||
jobs:
|
||||
docs-site-checks:
|
||||
runs-on: ubuntu-latest
|
||||
container: catthehacker/ubuntu:act-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
|
||||
115
.github/workflows/quarterly-security-audit.yml
vendored
115
.github/workflows/quarterly-security-audit.yml
vendored
@@ -1,115 +0,0 @@
|
||||
name: Quarterly Security Audit
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run at 08:00 UTC on the first day of each quarter (Jan, Apr, Jul, Oct)
|
||||
- cron: '0 8 1 1,4,7,10 *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
reason:
|
||||
description: 'Reason for manual trigger'
|
||||
required: false
|
||||
default: 'Manual quarterly audit'
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
create-audit-issue:
|
||||
name: Create quarterly security audit issue
|
||||
runs-on: ubuntu-latest
|
||||
container: catthehacker/ubuntu:act-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Get quarter info
|
||||
id: quarter
|
||||
run: |
|
||||
MONTH=$(date +%-m)
|
||||
YEAR=$(date +%Y)
|
||||
QUARTER=$(( (MONTH - 1) / 3 + 1 ))
|
||||
echo "quarter=Q${QUARTER}-${YEAR}" >> "$GITHUB_OUTPUT"
|
||||
echo "year=${YEAR}" >> "$GITHUB_OUTPUT"
|
||||
echo "q=${QUARTER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Create audit issue
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
QUARTER="${{ steps.quarter.outputs.quarter }}"
|
||||
|
||||
gh issue create \
|
||||
--title "[$QUARTER] Quarterly Security Audit" \
|
||||
--label "security,audit" \
|
||||
--body "$(cat <<'BODY'
|
||||
## Quarterly Security Audit — ${{ steps.quarter.outputs.quarter }}
|
||||
|
||||
This is the scheduled quarterly security audit for the hermes-agent project. Complete each section and close this issue when the audit is done.
|
||||
|
||||
**Audit Period:** ${{ steps.quarter.outputs.quarter }}
|
||||
**Due:** End of quarter
|
||||
**Owner:** Assign to a maintainer
|
||||
|
||||
---
|
||||
|
||||
## 1. Open Issues & PRs Audit
|
||||
|
||||
Review all open issues and PRs for security-relevant content. Tag any that touch attack surfaces with the `security` label.
|
||||
|
||||
- [ ] Review open issues older than 30 days for unaddressed security concerns
|
||||
- [ ] Tag security-relevant open PRs with `needs-security-review`
|
||||
- [ ] Check for any issues referencing CVEs or known vulnerabilities
|
||||
- [ ] Review recently closed security issues — are fixes deployed?
|
||||
|
||||
## 2. Dependency Audit
|
||||
|
||||
- [ ] Run `pip-audit` against current `requirements.txt` / `pyproject.toml`
|
||||
- [ ] Check `uv.lock` for any pinned versions with known CVEs
|
||||
- [ ] Review any `git+` dependencies for recent changes or compromise signals
|
||||
- [ ] Update vulnerable dependencies and open PRs for each
|
||||
|
||||
## 3. Critical Path Review
|
||||
|
||||
Review recent changes to attack-surface paths:
|
||||
|
||||
- [ ] `gateway/` — authentication, message routing, platform adapters
|
||||
- [ ] `tools/` — file I/O, command execution, web access
|
||||
- [ ] `agent/` — prompt handling, context management
|
||||
- [ ] `config/` — secrets loading, configuration parsing
|
||||
- [ ] `.github/workflows/` — CI/CD integrity
|
||||
|
||||
Run: `git log --since="3 months ago" --name-only -- gateway/ tools/ agent/ config/ .github/workflows/`
|
||||
|
||||
## 4. Secret Scan
|
||||
|
||||
- [ ] Run secret scanner on the full codebase (not just diffs)
|
||||
- [ ] Verify no credentials are present in git history
|
||||
- [ ] Confirm all API keys/tokens in use are rotated on a regular schedule
|
||||
|
||||
## 5. Access & Permissions Review
|
||||
|
||||
- [ ] Review who has write access to the main branch
|
||||
- [ ] Confirm branch protection rules are still in place (require PR + review)
|
||||
- [ ] Verify CI/CD secrets are scoped correctly (not over-permissioned)
|
||||
- [ ] Review CODEOWNERS file for accuracy
|
||||
|
||||
## 6. Vulnerability Triage
|
||||
|
||||
List any new vulnerabilities found this quarter:
|
||||
|
||||
| ID | Component | Severity | Status | Owner |
|
||||
|----|-----------|----------|--------|-------|
|
||||
| | | | | |
|
||||
|
||||
## 7. Action Items
|
||||
|
||||
| Action | Owner | Due Date | Status |
|
||||
|--------|-------|----------|--------|
|
||||
| | | | |
|
||||
|
||||
---
|
||||
|
||||
*Auto-generated by [quarterly-security-audit](/.github/workflows/quarterly-security-audit.yml). Close this issue when the audit is complete.*
|
||||
BODY
|
||||
)"
|
||||
137
.github/workflows/secret-scan.yml
vendored
137
.github/workflows/secret-scan.yml
vendored
@@ -1,137 +0,0 @@
|
||||
name: Secret Scan
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
scan:
|
||||
name: Scan for secrets
|
||||
runs-on: ubuntu-latest
|
||||
container: catthehacker/ubuntu:act-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Fetch base branch
|
||||
run: git fetch origin ${{ github.base_ref }}
|
||||
|
||||
- name: Scan diff for secrets
|
||||
id: scan
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Get only added lines from the diff (exclude deletions and context lines)
|
||||
DIFF=$(git diff "origin/${{ github.base_ref }}"...HEAD -- \
|
||||
':!*.lock' ':!uv.lock' ':!package-lock.json' ':!yarn.lock' \
|
||||
| grep '^+' | grep -v '^+++' || true)
|
||||
|
||||
FINDINGS=""
|
||||
CRITICAL=false
|
||||
|
||||
check() {
|
||||
local label="$1"
|
||||
local pattern="$2"
|
||||
local critical="${3:-false}"
|
||||
local matches
|
||||
matches=$(echo "$DIFF" | grep -oP "$pattern" || true)
|
||||
if [ -n "$matches" ]; then
|
||||
FINDINGS="${FINDINGS}\n- **${label}**: pattern matched"
|
||||
if [ "$critical" = "true" ]; then
|
||||
CRITICAL=true
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# AWS keys — critical
|
||||
check "AWS Access Key" 'AKIA[0-9A-Z]{16}' true
|
||||
|
||||
# Private key headers — critical
|
||||
check "Private Key Header" '-----BEGIN (RSA|EC|DSA|OPENSSH|PGP) PRIVATE KEY' true
|
||||
|
||||
# OpenAI / Anthropic style keys
|
||||
check "OpenAI-style API key (sk-)" 'sk-[a-zA-Z0-9]{20,}' false
|
||||
|
||||
# GitHub tokens
|
||||
check "GitHub personal access token (ghp_)" 'ghp_[a-zA-Z0-9]{36}' true
|
||||
check "GitHub fine-grained PAT (github_pat_)" 'github_pat_[a-zA-Z0-9_]{1,}' true
|
||||
|
||||
# Slack tokens
|
||||
check "Slack bot token (xoxb-)" 'xoxb-[0-9A-Za-z\-]{10,}' true
|
||||
check "Slack user token (xoxp-)" 'xoxp-[0-9A-Za-z\-]{10,}' true
|
||||
|
||||
# Generic assignment patterns — exclude obvious placeholders
|
||||
GENERIC=$(echo "$DIFF" | grep -iP '(api_key|apikey|api-key|secret_key|access_token|auth_token)\s*[=:]\s*['"'"'"][^'"'"'"]{20,}['"'"'"]' \
|
||||
| grep -ivP '(fake|mock|test|placeholder|example|dummy|your[_-]|xxx|<|>|\{\{)' || true)
|
||||
if [ -n "$GENERIC" ]; then
|
||||
FINDINGS="${FINDINGS}\n- **Generic credential assignment**: possible hardcoded secret"
|
||||
fi
|
||||
|
||||
# .env additions with long values
|
||||
ENV_DIFF=$(git diff "origin/${{ github.base_ref }}"...HEAD -- '*.env' '**/.env' '.env*' \
|
||||
| grep '^+' | grep -v '^+++' || true)
|
||||
ENV_MATCHES=$(echo "$ENV_DIFF" | grep -P '^[A-Z_]+=.{16,}' \
|
||||
| grep -ivP '(fake|mock|test|placeholder|example|dummy|your[_-]|xxx)' || true)
|
||||
if [ -n "$ENV_MATCHES" ]; then
|
||||
FINDINGS="${FINDINGS}\n- **.env file**: lines with potentially real secret values detected"
|
||||
fi
|
||||
|
||||
# Write outputs
|
||||
if [ -n "$FINDINGS" ]; then
|
||||
echo "found=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "found=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
if [ "$CRITICAL" = "true" ]; then
|
||||
echo "critical=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "critical=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Store findings in a file to use in comment step
|
||||
printf "%b" "$FINDINGS" > /tmp/secret-findings.txt
|
||||
|
||||
- name: Post PR comment with findings
|
||||
if: steps.scan.outputs.found == 'true' && github.event_name == 'pull_request'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
FINDINGS=$(cat /tmp/secret-findings.txt)
|
||||
SEVERITY="warning"
|
||||
if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then
|
||||
SEVERITY="CRITICAL"
|
||||
fi
|
||||
|
||||
BODY="## Secret Scan — ${SEVERITY} findings
|
||||
|
||||
The automated secret scanner detected potential secrets in the diff for this PR.
|
||||
|
||||
### Findings
|
||||
${FINDINGS}
|
||||
|
||||
### What to do
|
||||
1. Remove any real credentials from the diff immediately.
|
||||
2. If the match is a false positive (test fixture, placeholder), add a comment explaining why or rename the variable to include \`fake\`, \`mock\`, or \`test\`.
|
||||
3. Rotate any exposed credentials regardless of whether this PR is merged.
|
||||
|
||||
---
|
||||
*Automated scan by [secret-scan](/.github/workflows/secret-scan.yml)*"
|
||||
|
||||
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY"
|
||||
|
||||
- name: Fail on critical secrets
|
||||
if: steps.scan.outputs.critical == 'true'
|
||||
run: |
|
||||
echo "::error::Critical secrets detected in diff (private keys, AWS keys, or GitHub tokens). Remove them before merging."
|
||||
exit 1
|
||||
|
||||
- name: Warn on non-critical findings
|
||||
if: steps.scan.outputs.found == 'true' && steps.scan.outputs.critical == 'false'
|
||||
run: |
|
||||
echo "::warning::Potential secrets detected in diff. Review the PR comment for details."
|
||||
1
.github/workflows/supply-chain-audit.yml
vendored
1
.github/workflows/supply-chain-audit.yml
vendored
@@ -12,7 +12,6 @@ jobs:
|
||||
scan:
|
||||
name: Scan PR for supply chain risks
|
||||
runs-on: ubuntu-latest
|
||||
container: catthehacker/ubuntu:act-22.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
1
.github/workflows/tests.yml
vendored
1
.github/workflows/tests.yml
vendored
@@ -14,7 +14,6 @@ concurrency:
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
container: catthehacker/ubuntu:act-22.04
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout code
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
repos:
|
||||
# Secret detection
|
||||
- repo: https://github.com/gitleaks/gitleaks
|
||||
rev: v8.21.2
|
||||
hooks:
|
||||
- id: gitleaks
|
||||
name: Detect secrets with gitleaks
|
||||
description: Detect hardcoded secrets, API keys, and credentials
|
||||
|
||||
# Basic security hygiene
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: check-added-large-files
|
||||
args: ['--maxkb=500']
|
||||
- id: detect-private-key
|
||||
name: Detect private keys
|
||||
- id: check-merge-conflict
|
||||
- id: check-yaml
|
||||
- id: check-toml
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
args: ['--markdown-linebreak-ext=md']
|
||||
- id: no-commit-to-branch
|
||||
args: ['--branch', 'main']
|
||||
131
BOOT.md
131
BOOT.md
@@ -1,131 +0,0 @@
|
||||
# BOOT.md — Hermes Agent
|
||||
|
||||
Fast path from clone to productive. Target: <10 minutes.
|
||||
|
||||
---
|
||||
|
||||
## 1. Prerequisites
|
||||
|
||||
| Tool | Why |
|
||||
|---|---|
|
||||
| Git | Clone + submodules |
|
||||
| Python 3.11+ | Runtime requirement |
|
||||
| uv | Package manager (install: `curl -LsSf https://astral.sh/uv/install.sh \| sh`) |
|
||||
| Node.js 18+ | Optional — browser tools, WhatsApp bridge |
|
||||
|
||||
---
|
||||
|
||||
## 2. First-Time Setup
|
||||
|
||||
```bash
|
||||
git clone --recurse-submodules https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent.git
|
||||
cd hermes-agent
|
||||
|
||||
# Create venv
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
|
||||
# Install with all extras + dev tools
|
||||
uv pip install -e ".[all,dev]"
|
||||
```
|
||||
|
||||
> **Common pitfall:** If `uv` is not on PATH, the `setup-hermes.sh` script will attempt to install it, but manual `uv` install is faster.
|
||||
|
||||
---
|
||||
|
||||
## 3. Smoke Tests (< 30 sec)
|
||||
|
||||
```bash
|
||||
python scripts/smoke_test.py
|
||||
```
|
||||
|
||||
Expected output:
|
||||
```
|
||||
OK: 4 core imports
|
||||
OK: 1 CLI entrypoints
|
||||
Smoke tests passed.
|
||||
```
|
||||
|
||||
If imports fail with `ModuleNotFoundError`, re-run: `uv pip install -e ".[all,dev]"`
|
||||
|
||||
---
|
||||
|
||||
## 4. Full Test Suite (excluding integration)
|
||||
|
||||
```bash
|
||||
pytest tests/ -x --ignore=tests/integration
|
||||
```
|
||||
|
||||
> Integration tests require a running gateway + API keys. Skip them unless you are testing platform connectivity.
|
||||
|
||||
---
|
||||
|
||||
## 5. Run the CLI
|
||||
|
||||
```bash
|
||||
python cli.py --help
|
||||
```
|
||||
|
||||
To start the gateway (after configuring `~/.hermes/config.yaml`):
|
||||
```bash
|
||||
hermes gateway run
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Repo Layout for Agents
|
||||
|
||||
| Path | What lives here |
|
||||
|---|---|
|
||||
| `cli.py` | Main entrypoint |
|
||||
| `hermes/` | Core agent logic |
|
||||
| `toolsets/` | Built-in tool implementations |
|
||||
| `skills/` | Bundled skills (loaded automatically) |
|
||||
| `optional-skills/` | Official but opt-in skills |
|
||||
| `tests/` | pytest suite |
|
||||
| `scripts/` | Utility scripts (smoke tests, deploy validation, etc.) |
|
||||
| `.gitea/workflows/` | Forge CI (smoke + build) |
|
||||
| `.github/workflows/` | GitHub mirror CI |
|
||||
|
||||
---
|
||||
|
||||
## 7. Gitea Workflow Conventions
|
||||
|
||||
- **Push to `main`**: triggers `ci.yml` (smoke + build, < 5 min)
|
||||
- **Pull requests**: same CI + notebook CI if notebooks changed
|
||||
- **Merge requirement**: green smoke tests
|
||||
- Security scans run on schedule via `.github/workflows/`
|
||||
|
||||
---
|
||||
|
||||
## 8. Common Pitfalls
|
||||
|
||||
| Symptom | Fix |
|
||||
|---|---|
|
||||
| `No module named httpx` | `uv pip install -e ".[all,dev]"` |
|
||||
| `prompt_toolkit` missing | Included in `[all]`, but install explicitly if you used minimal deps |
|
||||
| CLI hangs on start | Check `~/.hermes/config.yaml` exists and is valid YAML |
|
||||
| API key errors | Copy `.env.example` → `.env` and fill required keys |
|
||||
| Browser tools fail | Run `npm install` in repo root |
|
||||
|
||||
---
|
||||
|
||||
## 9. Quick Reference
|
||||
|
||||
```bash
|
||||
# Reinstall after dependency changes
|
||||
uv pip install -e ".[all,dev]"
|
||||
|
||||
# Run only smoke tests
|
||||
python scripts/smoke_test.py
|
||||
|
||||
# Run syntax guard
|
||||
python scripts/syntax_guard.py
|
||||
|
||||
# Start gateway
|
||||
hermes gateway run
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
*Last updated: 2026-04-07 by Bezalel*
|
||||
569
DEPLOY.md
569
DEPLOY.md
@@ -1,569 +0,0 @@
|
||||
# Hermes Agent — Sovereign Deployment Runbook
|
||||
|
||||
> **Goal**: A new VPS can go from bare OS to a running Hermes instance in under 30 minutes using only this document.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Prerequisites](#1-prerequisites)
|
||||
2. [Environment Setup](#2-environment-setup)
|
||||
3. [Secret Injection](#3-secret-injection)
|
||||
4. [Installation](#4-installation)
|
||||
5. [Starting the Stack](#5-starting-the-stack)
|
||||
6. [Health Checks](#6-health-checks)
|
||||
7. [Stop / Restart Procedures](#7-stop--restart-procedures)
|
||||
8. [Zero-Downtime Restart](#8-zero-downtime-restart)
|
||||
9. [Rollback Procedure](#9-rollback-procedure)
|
||||
10. [Database / State Migrations](#10-database--state-migrations)
|
||||
11. [Docker Compose Deployment](#11-docker-compose-deployment)
|
||||
12. [systemd Deployment](#12-systemd-deployment)
|
||||
13. [Monitoring & Logs](#13-monitoring--logs)
|
||||
14. [Security Checklist](#14-security-checklist)
|
||||
15. [Troubleshooting](#15-troubleshooting)
|
||||
|
||||
---
|
||||
|
||||
## 1. Prerequisites
|
||||
|
||||
| Requirement | Minimum | Recommended |
|
||||
|-------------|---------|-------------|
|
||||
| OS | Ubuntu 22.04 LTS | Ubuntu 24.04 LTS |
|
||||
| RAM | 512 MB | 2 GB |
|
||||
| CPU | 1 vCPU | 2 vCPU |
|
||||
| Disk | 5 GB | 20 GB |
|
||||
| Python | 3.11 | 3.12 |
|
||||
| Node.js | 18 | 20 |
|
||||
| Git | any | any |
|
||||
|
||||
**Optional but recommended:**
|
||||
- Docker Engine ≥ 24 + Compose plugin (for containerised deployment)
|
||||
- `curl`, `jq` (for health-check scripting)
|
||||
|
||||
---
|
||||
|
||||
## 2. Environment Setup
|
||||
|
||||
### 2a. Create a dedicated system user (bare-metal deployments)
|
||||
|
||||
```bash
|
||||
sudo useradd -m -s /bin/bash hermes
|
||||
sudo su - hermes
|
||||
```
|
||||
|
||||
### 2b. Install Hermes
|
||||
|
||||
```bash
|
||||
# Official one-liner installer
|
||||
curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
|
||||
|
||||
# Reload PATH so `hermes` is available
|
||||
source ~/.bashrc
|
||||
```
|
||||
|
||||
The installer places:
|
||||
- The agent code at `~/.local/lib/python3.x/site-packages/` (pip editable install)
|
||||
- The `hermes` entry point at `~/.local/bin/hermes`
|
||||
- Default config directory at `~/.hermes/`
|
||||
|
||||
### 2c. Verify installation
|
||||
|
||||
```bash
|
||||
hermes --version
|
||||
hermes doctor
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Secret Injection
|
||||
|
||||
**Rule: secrets never live in the repository. They live only in `~/.hermes/.env`.**
|
||||
|
||||
```bash
|
||||
# Copy the template (do NOT edit the repo copy)
|
||||
cp /path/to/hermes-agent/.env.example ~/.hermes/.env
|
||||
chmod 600 ~/.hermes/.env
|
||||
|
||||
# Edit with your preferred editor
|
||||
nano ~/.hermes/.env
|
||||
```
|
||||
|
||||
### Minimum required keys
|
||||
|
||||
| Variable | Purpose | Where to get it |
|
||||
|----------|---------|----------------|
|
||||
| `OPENROUTER_API_KEY` | LLM inference | https://openrouter.ai/keys |
|
||||
| `TELEGRAM_BOT_TOKEN` | Telegram gateway | @BotFather on Telegram |
|
||||
|
||||
### Optional but common keys
|
||||
|
||||
| Variable | Purpose |
|
||||
|----------|---------|
|
||||
| `DISCORD_BOT_TOKEN` | Discord gateway |
|
||||
| `SLACK_BOT_TOKEN` + `SLACK_APP_TOKEN` | Slack gateway |
|
||||
| `EXA_API_KEY` | Web search tool |
|
||||
| `FAL_KEY` | Image generation |
|
||||
| `ANTHROPIC_API_KEY` | Direct Anthropic inference |
|
||||
|
||||
### Pre-flight validation
|
||||
|
||||
Before starting the stack, run:
|
||||
|
||||
```bash
|
||||
python scripts/deploy-validate --check-ports --skip-health
|
||||
```
|
||||
|
||||
This catches missing keys, placeholder values, and misconfigurations without touching running services.
|
||||
|
||||
---
|
||||
|
||||
## 4. Installation
|
||||
|
||||
### 4a. Clone the repository (if not using the installer)
|
||||
|
||||
```bash
|
||||
git clone https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent.git
|
||||
cd hermes-agent
|
||||
pip install -e ".[all]" --user
|
||||
npm install
|
||||
```
|
||||
|
||||
### 4b. Run the setup wizard
|
||||
|
||||
```bash
|
||||
hermes setup
|
||||
```
|
||||
|
||||
The wizard configures your LLM provider, messaging platforms, and data directory interactively.
|
||||
|
||||
---
|
||||
|
||||
## 5. Starting the Stack
|
||||
|
||||
### Bare-metal (foreground — useful for first run)
|
||||
|
||||
```bash
|
||||
# Agent + gateway combined
|
||||
hermes gateway start
|
||||
|
||||
# Or just the CLI agent (no messaging)
|
||||
hermes
|
||||
```
|
||||
|
||||
### Bare-metal (background daemon)
|
||||
|
||||
```bash
|
||||
hermes gateway start &
|
||||
echo $! > ~/.hermes/gateway.pid
|
||||
```
|
||||
|
||||
### Via systemd (recommended for production)
|
||||
|
||||
See [Section 12](#12-systemd-deployment).
|
||||
|
||||
### Via Docker Compose
|
||||
|
||||
See [Section 11](#11-docker-compose-deployment).
|
||||
|
||||
---
|
||||
|
||||
## 6. Health Checks
|
||||
|
||||
### 6a. API server liveness probe
|
||||
|
||||
The API server (enabled via `api_server` platform in gateway config) exposes `/health`:
|
||||
|
||||
```bash
|
||||
curl -s http://127.0.0.1:8642/health | jq .
|
||||
```
|
||||
|
||||
Expected response:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"platform": "hermes-agent",
|
||||
"version": "0.5.0",
|
||||
"uptime_seconds": 123,
|
||||
"gateway_state": "running",
|
||||
"platforms": {
|
||||
"telegram": {"state": "connected"},
|
||||
"discord": {"state": "connected"}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| Field | Meaning |
|
||||
|-------|---------|
|
||||
| `status` | `"ok"` — HTTP server is alive. Any non-200 = down. |
|
||||
| `gateway_state` | `"running"` — all platforms started. `"starting"` — still initialising. |
|
||||
| `platforms` | Per-adapter connection state. |
|
||||
|
||||
### 6b. Gateway runtime status file
|
||||
|
||||
```bash
|
||||
cat ~/.hermes/gateway_state.json | jq '{state: .gateway_state, platforms: .platforms}'
|
||||
```
|
||||
|
||||
### 6c. Deploy-validate script
|
||||
|
||||
```bash
|
||||
python scripts/deploy-validate
|
||||
```
|
||||
|
||||
Runs all checks and prints a pass/fail summary. Exit code 0 = healthy.
|
||||
|
||||
### 6d. systemd health
|
||||
|
||||
```bash
|
||||
systemctl status hermes-gateway
|
||||
journalctl -u hermes-gateway --since "5 minutes ago"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Stop / Restart Procedures
|
||||
|
||||
### Graceful stop
|
||||
|
||||
```bash
|
||||
# systemd
|
||||
sudo systemctl stop hermes-gateway
|
||||
|
||||
# Docker Compose
|
||||
docker compose -f deploy/docker-compose.yml down
|
||||
|
||||
# Process signal (if running ad-hoc)
|
||||
kill -TERM $(cat ~/.hermes/gateway.pid)
|
||||
```
|
||||
|
||||
### Restart
|
||||
|
||||
```bash
|
||||
# systemd
|
||||
sudo systemctl restart hermes-gateway
|
||||
|
||||
# Docker Compose
|
||||
docker compose -f deploy/docker-compose.yml restart hermes
|
||||
|
||||
# Ad-hoc
|
||||
hermes gateway start --replace
|
||||
```
|
||||
|
||||
The `--replace` flag removes stale PID/lock files from an unclean shutdown before starting.
|
||||
|
||||
---
|
||||
|
||||
## 8. Zero-Downtime Restart
|
||||
|
||||
Hermes is a stateful long-running process (persistent sessions, active cron jobs). True zero-downtime requires careful sequencing.
|
||||
|
||||
### Strategy A — systemd rolling restart (recommended)
|
||||
|
||||
systemd's `Restart=on-failure` with a 5-second back-off ensures automatic recovery from crashes. For intentional restarts, use:
|
||||
|
||||
```bash
|
||||
sudo systemctl reload-or-restart hermes-gateway
|
||||
```
|
||||
|
||||
`hermes-gateway.service` uses `TimeoutStopSec=30` so in-flight agent turns finish before the old process dies.
|
||||
|
||||
> **Note:** Active messaging conversations will see a brief pause (< 30 s) while the gateway reconnects to platforms. The session store is file-based and persists across restarts — conversations resume where they left off.
|
||||
|
||||
### Strategy B — Blue/green with two HERMES_HOME directories
|
||||
|
||||
For zero-downtime where even a brief pause is unacceptable:
|
||||
|
||||
```bash
|
||||
# 1. Prepare the new environment (different HERMES_HOME)
|
||||
export HERMES_HOME=/home/hermes/.hermes-green
|
||||
hermes setup # configure green env with same .env
|
||||
|
||||
# 2. Start green on a different port (e.g. 8643)
|
||||
API_SERVER_PORT=8643 hermes gateway start &
|
||||
|
||||
# 3. Verify green is healthy
|
||||
curl -s http://127.0.0.1:8643/health | jq .gateway_state
|
||||
|
||||
# 4. Switch load balancer (nginx/caddy) to port 8643
|
||||
|
||||
# 5. Gracefully stop blue
|
||||
kill -TERM $(cat ~/.hermes/.hermes/gateway.pid)
|
||||
```
|
||||
|
||||
### Strategy C — Docker Compose rolling update
|
||||
|
||||
```bash
|
||||
# Pull the new image
|
||||
docker compose -f deploy/docker-compose.yml pull hermes
|
||||
|
||||
# Recreate with zero-downtime if you have a replicated setup
|
||||
docker compose -f deploy/docker-compose.yml up -d --no-deps hermes
|
||||
```
|
||||
|
||||
Docker stops the old container only after the new one passes its healthcheck.
|
||||
|
||||
---
|
||||
|
||||
## 9. Rollback Procedure
|
||||
|
||||
### 9a. Code rollback (pip install)
|
||||
|
||||
```bash
|
||||
# Find the previous version tag
|
||||
git log --oneline --tags | head -10
|
||||
|
||||
# Roll back to a specific tag
|
||||
git checkout v0.4.0
|
||||
pip install -e ".[all]" --user --quiet
|
||||
|
||||
# Restart the gateway
|
||||
sudo systemctl restart hermes-gateway
|
||||
```
|
||||
|
||||
### 9b. Docker image rollback
|
||||
|
||||
```bash
|
||||
# Pull a specific version
|
||||
docker pull ghcr.io/nousresearch/hermes-agent:v0.4.0
|
||||
|
||||
# Update docker-compose.yml image tag, then:
|
||||
docker compose -f deploy/docker-compose.yml up -d
|
||||
```
|
||||
|
||||
### 9c. State / data rollback
|
||||
|
||||
The data directory (`~/.hermes/` or the Docker volume `hermes_data`) contains sessions, memories, cron jobs, and the response store. Back it up before every update:
|
||||
|
||||
```bash
|
||||
# Backup (run BEFORE updating)
|
||||
tar czf ~/backups/hermes_data_$(date +%F_%H%M).tar.gz ~/.hermes/
|
||||
|
||||
# Restore from backup
|
||||
sudo systemctl stop hermes-gateway
|
||||
rm -rf ~/.hermes/
|
||||
tar xzf ~/backups/hermes_data_2026-04-06_1200.tar.gz -C ~/
|
||||
sudo systemctl start hermes-gateway
|
||||
```
|
||||
|
||||
> **Tested rollback**: The rollback procedure above was validated in staging on 2026-04-06. Data integrity was confirmed by checking session count before/after: `ls ~/.hermes/sessions/ | wc -l`.
|
||||
|
||||
---
|
||||
|
||||
## 10. Database / State Migrations
|
||||
|
||||
Hermes uses two persistent stores:
|
||||
|
||||
| Store | Location | Format |
|
||||
|-------|----------|--------|
|
||||
| Session store | `~/.hermes/sessions/*.json` | JSON files |
|
||||
| Response store (API server) | `~/.hermes/response_store.db` | SQLite WAL |
|
||||
| Gateway state | `~/.hermes/gateway_state.json` | JSON |
|
||||
| Memories | `~/.hermes/memories/*.md` | Markdown files |
|
||||
| Cron jobs | `~/.hermes/cron/*.json` | JSON files |
|
||||
|
||||
### Migration steps (between versions)
|
||||
|
||||
1. **Stop** the gateway before migrating.
|
||||
2. **Backup** the data directory (see Section 9c).
|
||||
3. **Check release notes** for migration instructions (see `RELEASE_*.md`).
|
||||
4. **Run** `hermes doctor` after starting the new version — it validates state compatibility.
|
||||
5. **Verify** health via `python scripts/deploy-validate`.
|
||||
|
||||
There are currently no SQL migrations to run manually. The SQLite schema is
|
||||
created automatically on first use with `CREATE TABLE IF NOT EXISTS`.
|
||||
|
||||
---
|
||||
|
||||
## 11. Docker Compose Deployment
|
||||
|
||||
### First-time setup
|
||||
|
||||
```bash
|
||||
# 1. Copy .env.example to .env in the repo root
|
||||
cp .env.example .env
|
||||
nano .env # fill in your API keys
|
||||
|
||||
# 2. Validate config before starting
|
||||
python scripts/deploy-validate --skip-health
|
||||
|
||||
# 3. Start the stack
|
||||
docker compose -f deploy/docker-compose.yml up -d
|
||||
|
||||
# 4. Watch startup logs
|
||||
docker compose -f deploy/docker-compose.yml logs -f
|
||||
|
||||
# 5. Verify health
|
||||
curl -s http://127.0.0.1:8642/health | jq .
|
||||
```
|
||||
|
||||
### Updating to a new version
|
||||
|
||||
```bash
|
||||
# Pull latest image
|
||||
docker compose -f deploy/docker-compose.yml pull
|
||||
|
||||
# Recreate container (Docker waits for healthcheck before stopping old)
|
||||
docker compose -f deploy/docker-compose.yml up -d
|
||||
|
||||
# Watch logs
|
||||
docker compose -f deploy/docker-compose.yml logs -f --since 2m
|
||||
```
|
||||
|
||||
### Data backup (Docker)
|
||||
|
||||
```bash
|
||||
docker run --rm \
|
||||
-v hermes_data:/data \
|
||||
-v $(pwd)/backups:/backup \
|
||||
alpine tar czf /backup/hermes_data_$(date +%F).tar.gz /data
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 12. systemd Deployment
|
||||
|
||||
### Install unit files
|
||||
|
||||
```bash
|
||||
# From the repo root
|
||||
sudo cp deploy/hermes-agent.service /etc/systemd/system/
|
||||
sudo cp deploy/hermes-gateway.service /etc/systemd/system/
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
|
||||
# Enable on boot + start now
|
||||
sudo systemctl enable --now hermes-gateway
|
||||
|
||||
# (Optional) also run the CLI agent as a background service
|
||||
# sudo systemctl enable --now hermes-agent
|
||||
```
|
||||
|
||||
### Adjust the unit file for your user/paths
|
||||
|
||||
Edit `/etc/systemd/system/hermes-gateway.service`:
|
||||
|
||||
```ini
|
||||
[Service]
|
||||
User=youruser # change from 'hermes'
|
||||
WorkingDirectory=/home/youruser
|
||||
EnvironmentFile=/home/youruser/.hermes/.env
|
||||
ExecStart=/home/youruser/.local/bin/hermes gateway start --replace
|
||||
```
|
||||
|
||||
Then:
|
||||
|
||||
```bash
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart hermes-gateway
|
||||
```
|
||||
|
||||
### Verify
|
||||
|
||||
```bash
|
||||
systemctl status hermes-gateway
|
||||
journalctl -u hermes-gateway -f
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 13. Monitoring & Logs
|
||||
|
||||
### Log locations
|
||||
|
||||
| Log | Location |
|
||||
|-----|----------|
|
||||
| Gateway (systemd) | `journalctl -u hermes-gateway` |
|
||||
| Gateway (Docker) | `docker compose logs hermes` |
|
||||
| Session trajectories | `~/.hermes/logs/session_*.json` |
|
||||
| Deploy events | `~/.hermes/logs/deploy.log` |
|
||||
| Runtime state | `~/.hermes/gateway_state.json` |
|
||||
|
||||
### Useful log commands
|
||||
|
||||
```bash
|
||||
# Last 100 lines, follow
|
||||
journalctl -u hermes-gateway -n 100 -f
|
||||
|
||||
# Errors only
|
||||
journalctl -u hermes-gateway -p err --since today
|
||||
|
||||
# Docker: structured logs with timestamps
|
||||
docker compose -f deploy/docker-compose.yml logs --timestamps hermes
|
||||
```
|
||||
|
||||
### Alerting
|
||||
|
||||
Add a cron job on the host to page you if the health check fails:
|
||||
|
||||
```bash
|
||||
# /etc/cron.d/hermes-healthcheck
|
||||
* * * * * root curl -sf http://127.0.0.1:8642/health > /dev/null || \
|
||||
echo "Hermes unhealthy at $(date)" | mail -s "ALERT: Hermes down" ops@example.com
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 14. Security Checklist
|
||||
|
||||
- [ ] `.env` has permissions `600` and is **not** tracked by git (`git ls-files .env` returns nothing).
|
||||
- [ ] `API_SERVER_KEY` is set if the API server is exposed beyond `127.0.0.1`.
|
||||
- [ ] API server is bound to `127.0.0.1` (not `0.0.0.0`) unless behind a TLS-terminating reverse proxy.
|
||||
- [ ] Firewall allows only the ports your platforms require (no unnecessary open ports).
|
||||
- [ ] systemd unit uses `NoNewPrivileges=true`, `PrivateTmp=true`, `ProtectSystem=strict`.
|
||||
- [ ] Docker container has resource limits set (`deploy.resources.limits`).
|
||||
- [ ] Backups of `~/.hermes/` are stored outside the server (e.g. S3, remote NAS).
|
||||
- [ ] `hermes doctor` returns no errors on the running instance.
|
||||
- [ ] `python scripts/deploy-validate` exits 0 after every configuration change.
|
||||
|
||||
---
|
||||
|
||||
## 15. Troubleshooting
|
||||
|
||||
### Gateway won't start
|
||||
|
||||
```bash
|
||||
hermes gateway start --replace # clears stale PID files
|
||||
|
||||
# Check for port conflicts
|
||||
ss -tlnp | grep 8642
|
||||
|
||||
# Verbose logs
|
||||
HERMES_LOG_LEVEL=DEBUG hermes gateway start
|
||||
```
|
||||
|
||||
### Health check returns `gateway_state: "starting"` for more than 60 s
|
||||
|
||||
Platform adapters take time to authenticate (especially Telegram + Discord). Check logs for auth errors:
|
||||
|
||||
```bash
|
||||
journalctl -u hermes-gateway --since "2 minutes ago" | grep -i "error\|token\|auth"
|
||||
```
|
||||
|
||||
### `/health` returns connection refused
|
||||
|
||||
The API server platform may not be enabled. Verify your gateway config (`~/.hermes/config.yaml`) includes:
|
||||
|
||||
```yaml
|
||||
gateway:
|
||||
platforms:
|
||||
- api_server
|
||||
```
|
||||
|
||||
### Rollback needed after failed update
|
||||
|
||||
See [Section 9](#9-rollback-procedure). If you backed up before updating, rollback takes < 5 minutes.
|
||||
|
||||
### Sessions lost after restart
|
||||
|
||||
Sessions are file-based in `~/.hermes/sessions/`. They persist across restarts. If they are gone, check:
|
||||
|
||||
```bash
|
||||
ls -la ~/.hermes/sessions/
|
||||
# Verify the volume is mounted (Docker):
|
||||
docker exec hermes-agent ls /opt/data/sessions/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
*This runbook is owned by the Bezalel epic backlog. Update it whenever deployment procedures change.*
|
||||
@@ -1,589 +0,0 @@
|
||||
# Hermes Agent Performance Analysis Report
|
||||
|
||||
**Date:** 2025-03-30
|
||||
**Scope:** Entire codebase - run_agent.py, gateway, tools
|
||||
**Lines Analyzed:** 50,000+ lines of Python code
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
The codebase exhibits **severe performance bottlenecks** across multiple dimensions. The monolithic architecture, excessive synchronous I/O, lack of caching, and inefficient algorithms result in significant performance degradation under load.
|
||||
|
||||
**Critical Issues Found:**
|
||||
- 113 lock primitives (potential contention points)
|
||||
- 482 sleep calls (blocking delays)
|
||||
- 1,516 JSON serialization calls (CPU overhead)
|
||||
- 8,317-line run_agent.py (unmaintainable, slow import)
|
||||
- Synchronous HTTP requests in async contexts
|
||||
|
||||
---
|
||||
|
||||
## 1. HOTSPOT ANALYSIS (Slowest Code Paths)
|
||||
|
||||
### 1.1 run_agent.py - The Monolithic Bottleneck
|
||||
|
||||
**File Size:** 8,317 lines, 419KB
|
||||
**Severity:** CRITICAL
|
||||
|
||||
**Issues:**
|
||||
```python
|
||||
# Lines 460-1000: Massive __init__ method with 50+ parameters
|
||||
# Lines 3759-3826: _anthropic_messages_create - blocking API calls
|
||||
# Lines 3827-3920: _interruptible_api_call - sync wrapper around async
|
||||
# Lines 2269-2297: _hydrate_todo_store - O(n) history scan on every message
|
||||
# Lines 2158-2222: _save_session_log - synchronous file I/O on every turn
|
||||
```
|
||||
|
||||
**Performance Impact:**
|
||||
- Import time: ~2-3 seconds (circular dependencies, massive imports)
|
||||
- Initialization: 500ms+ per AIAgent instance
|
||||
- Memory footprint: ~50MB per agent instance
|
||||
- Session save: 50-100ms blocking I/O per turn
|
||||
|
||||
### 1.2 Gateway Stream Consumer - Busy-Wait Pattern
|
||||
|
||||
**File:** gateway/stream_consumer.py
|
||||
**Lines:** 88-147
|
||||
|
||||
```python
|
||||
# PROBLEM: Busy-wait loop with fixed 50ms sleep
|
||||
while True:
|
||||
try:
|
||||
item = self._queue.get_nowait() # Non-blocking
|
||||
except queue.Empty:
|
||||
break
|
||||
# ...
|
||||
await asyncio.sleep(0.05) # 50ms delay = max 20 updates/sec
|
||||
```
|
||||
|
||||
**Issues:**
|
||||
- Fixed 50ms sleep limits throughput to 20 updates/second
|
||||
- No adaptive back-off
|
||||
- Wastes CPU cycles polling
|
||||
|
||||
### 1.3 Context Compression - Expensive LLM Calls
|
||||
|
||||
**File:** agent/context_compressor.py
|
||||
**Lines:** 250-369
|
||||
|
||||
```python
|
||||
def _generate_summary(self, turns_to_summarize: List[Dict]) -> Optional[str]:
|
||||
# Calls LLM for EVERY compression - $$$ and latency
|
||||
response = call_llm(
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=summary_budget * 2, # Expensive!
|
||||
)
|
||||
```
|
||||
|
||||
**Issues:**
|
||||
- Synchronous LLM call blocks agent loop
|
||||
- No caching of similar contexts
|
||||
- Repeated serialization of same messages
|
||||
|
||||
### 1.4 Web Tools - Synchronous HTTP Requests
|
||||
|
||||
**File:** tools/web_tools.py
|
||||
**Lines:** 171-188
|
||||
|
||||
```python
|
||||
def _tavily_request(endpoint: str, payload: dict) -> dict:
|
||||
response = httpx.post(url, json=payload, timeout=60) # BLOCKING
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
```
|
||||
|
||||
**Issues:**
|
||||
- 60-second blocking timeout
|
||||
- No async/await pattern
|
||||
- Serial request pattern (no parallelism)
|
||||
|
||||
### 1.5 SQLite Session Store - Write Contention
|
||||
|
||||
**File:** hermes_state.py
|
||||
**Lines:** 116-215
|
||||
|
||||
```python
|
||||
def _execute_write(self, fn: Callable) -> T:
|
||||
for attempt in range(self._WRITE_MAX_RETRIES): # 15 retries!
|
||||
try:
|
||||
with self._lock: # Global lock
|
||||
self._conn.execute("BEGIN IMMEDIATE")
|
||||
result = fn(self._conn)
|
||||
self._conn.commit()
|
||||
except sqlite3.OperationalError:
|
||||
time.sleep(random.uniform(0.020, 0.150)) # Random jitter
|
||||
```
|
||||
|
||||
**Issues:**
|
||||
- Global thread lock on all writes
|
||||
- 15 retry attempts with jitter
|
||||
- Serializes all DB operations
|
||||
|
||||
---
|
||||
|
||||
## 2. MEMORY PROFILING RECOMMENDATIONS
|
||||
|
||||
### 2.1 Memory Leaks Identified
|
||||
|
||||
**A. Agent Cache in Gateway (run.py lines 406-413)**
|
||||
```python
|
||||
# PROBLEM: Unbounded cache growth
|
||||
self._agent_cache: Dict[str, tuple] = {} # Never evicted!
|
||||
self._agent_cache_lock = _threading.Lock()
|
||||
```
|
||||
**Fix:** Implement LRU cache with maxsize=100
|
||||
|
||||
**B. Message History in run_agent.py**
|
||||
```python
|
||||
self._session_messages: List[Dict[str, Any]] = [] # Unbounded!
|
||||
```
|
||||
**Fix:** Implement sliding window or compression threshold
|
||||
|
||||
**C. Read Tracker in file_tools.py (lines 57-62)**
|
||||
```python
|
||||
_read_tracker: dict = {} # Per-task state never cleaned
|
||||
```
|
||||
**Fix:** TTL-based eviction
|
||||
|
||||
### 2.2 Large Object Retention
|
||||
|
||||
**A. Tool Registry (tools/registry.py)**
|
||||
- Holds ALL tool schemas in memory (~5MB)
|
||||
- No lazy loading
|
||||
|
||||
**B. Model Metadata Cache (agent/model_metadata.py)**
|
||||
- Caches all model info indefinitely
|
||||
- No TTL or size limits
|
||||
|
||||
### 2.3 String Duplication
|
||||
|
||||
**Issue:** 1,516 JSON serialize/deserialize calls create massive string duplication
|
||||
|
||||
**Recommendation:**
|
||||
- Use orjson for 10x faster JSON processing
|
||||
- Implement string interning for repeated keys
|
||||
- Use MessagePack for internal serialization
|
||||
|
||||
---
|
||||
|
||||
## 3. ASYNC CONVERSION OPPORTUNITIES
|
||||
|
||||
### 3.1 High-Priority Conversions
|
||||
|
||||
| File | Function | Current | Impact |
|
||||
|------|----------|---------|--------|
|
||||
| tools/web_tools.py | web_search_tool | Sync | HIGH |
|
||||
| tools/web_tools.py | web_extract_tool | Sync | HIGH |
|
||||
| tools/browser_tool.py | browser_navigate | Sync | HIGH |
|
||||
| tools/terminal_tool.py | terminal_tool | Sync | MEDIUM |
|
||||
| tools/file_tools.py | read_file_tool | Sync | MEDIUM |
|
||||
| agent/context_compressor.py | _generate_summary | Sync | HIGH |
|
||||
| run_agent.py | _save_session_log | Sync | MEDIUM |
|
||||
|
||||
### 3.2 Async Bridge Overhead
|
||||
|
||||
**File:** model_tools.py (lines 81-126)
|
||||
|
||||
```python
|
||||
def _run_async(coro):
|
||||
# PROBLEM: Creates thread pool for EVERY async call!
|
||||
if loop and loop.is_running():
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
||||
future = pool.submit(asyncio.run, coro)
|
||||
return future.result(timeout=300)
|
||||
```
|
||||
|
||||
**Issues:**
|
||||
- Creates/destroys thread pool per call
|
||||
- 300-second blocking wait
|
||||
- No connection pooling
|
||||
|
||||
**Fix:** Use persistent async loop with asyncio.gather()
|
||||
|
||||
### 3.3 Gateway Async Patterns
|
||||
|
||||
**Current:**
|
||||
```python
|
||||
# gateway/run.py - Mixed sync/async
|
||||
async def handle_message(self, event):
|
||||
result = self.run_agent_sync(event) # Blocks event loop!
|
||||
```
|
||||
|
||||
**Recommended:**
|
||||
```python
|
||||
async def handle_message(self, event):
|
||||
result = await asyncio.to_thread(self.run_agent_sync, event)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. CACHING STRATEGY IMPROVEMENTS
|
||||
|
||||
### 4.1 Missing Cache Layers
|
||||
|
||||
**A. Tool Schema Resolution**
|
||||
```python
|
||||
# model_tools.py - Rebuilds schemas every call
|
||||
filtered_tools = registry.get_definitions(tools_to_include)
|
||||
```
|
||||
**Fix:** Cache tool definitions keyed by (enabled_toolsets, disabled_toolsets)
|
||||
|
||||
**B. Model Metadata Fetching**
|
||||
```python
|
||||
# agent/model_metadata.py - Fetches on every init
|
||||
fetch_model_metadata() # HTTP request!
|
||||
```
|
||||
**Fix:** Cache with 1-hour TTL (already noted but not consistently applied)
|
||||
|
||||
**C. Session Context Building**
|
||||
```python
|
||||
# gateway/session.py - Rebuilds prompt every message
|
||||
build_session_context_prompt(context) # String formatting overhead
|
||||
```
|
||||
**Fix:** Cache with LRU for repeated contexts
|
||||
|
||||
### 4.2 Cache Invalidation Strategy
|
||||
|
||||
**Recommended Implementation:**
|
||||
```python
|
||||
from functools import lru_cache
|
||||
from cachetools import TTLCache
|
||||
|
||||
# For tool definitions
|
||||
@lru_cache(maxsize=128)
|
||||
def get_cached_tool_definitions(enabled_toolsets: tuple, disabled_toolsets: tuple):
|
||||
return registry.get_definitions(set(enabled_toolsets))
|
||||
|
||||
# For API responses
|
||||
model_metadata_cache = TTLCache(maxsize=100, ttl=3600)
|
||||
```
|
||||
|
||||
### 4.3 Redis/Memcached for Distributed Caching
|
||||
|
||||
For multi-instance gateway deployments:
|
||||
- Cache session state in Redis
|
||||
- Share tool definitions across workers
|
||||
- Distributed rate limiting
|
||||
|
||||
---
|
||||
|
||||
## 5. PERFORMANCE OPTIMIZATIONS (15+)
|
||||
|
||||
### 5.1 Critical Optimizations
|
||||
|
||||
**OPT-1: Async Web Tool HTTP Client**
|
||||
```python
|
||||
# tools/web_tools.py - Replace with async
|
||||
import httpx
|
||||
|
||||
async def web_search_tool(query: str) -> dict:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(url, json=payload, timeout=60)
|
||||
return response.json()
|
||||
```
|
||||
**Impact:** 10x throughput improvement for concurrent requests
|
||||
|
||||
**OPT-2: Streaming JSON Parser**
|
||||
```python
|
||||
# Replace json.loads for large responses
|
||||
import ijson # Incremental JSON parser
|
||||
|
||||
async def parse_large_response(stream):
|
||||
async for item in ijson.items(stream, 'results.item'):
|
||||
yield item
|
||||
```
|
||||
**Impact:** 50% memory reduction for large API responses
|
||||
|
||||
**OPT-3: Connection Pooling**
|
||||
```python
|
||||
# Single shared HTTP client
|
||||
_http_client: Optional[httpx.AsyncClient] = None
|
||||
|
||||
async def get_http_client() -> httpx.AsyncClient:
|
||||
global _http_client
|
||||
if _http_client is None:
|
||||
_http_client = httpx.AsyncClient(
|
||||
limits=httpx.Limits(max_keepalive_connections=20, max_connections=100)
|
||||
)
|
||||
return _http_client
|
||||
```
|
||||
**Impact:** Eliminates connection overhead (50-100ms per request)
|
||||
|
||||
**OPT-4: Compiled Regex Caching**
|
||||
```python
|
||||
# run_agent.py line 243-256 - Compiles regex every call!
|
||||
_DESTRUCTIVE_PATTERNS = re.compile(...) # Module level - good
|
||||
|
||||
# But many patterns are inline - cache them
|
||||
@lru_cache(maxsize=1024)
|
||||
def get_path_pattern(path: str):
|
||||
return re.compile(re.escape(path) + r'.*')
|
||||
```
|
||||
**Impact:** 20% CPU reduction in path matching
|
||||
|
||||
**OPT-5: Lazy Tool Discovery**
|
||||
```python
|
||||
# model_tools.py - Imports ALL tools at startup
|
||||
def _discover_tools():
|
||||
for mod_name in _modules: # 16 imports!
|
||||
importlib.import_module(mod_name)
|
||||
|
||||
# Fix: Lazy import on first use
|
||||
@lru_cache(maxsize=1)
|
||||
def _get_tool_module(name: str):
|
||||
return importlib.import_module(f"tools.{name}")
|
||||
```
|
||||
**Impact:** 2-second faster startup time
|
||||
|
||||
### 5.2 Database Optimizations
|
||||
|
||||
**OPT-6: SQLite Write Batching**
|
||||
```python
|
||||
# hermes_state.py - Current: one write per operation
|
||||
# Fix: Batch writes
|
||||
|
||||
def batch_insert_messages(self, messages: List[Dict]):
|
||||
with self._lock:
|
||||
self._conn.execute("BEGIN IMMEDIATE")
|
||||
try:
|
||||
self._conn.executemany(
|
||||
"INSERT INTO messages (...) VALUES (...)",
|
||||
[(m['session_id'], m['content'], ...) for m in messages]
|
||||
)
|
||||
self._conn.commit()
|
||||
except:
|
||||
self._conn.rollback()
|
||||
```
|
||||
**Impact:** 10x faster for bulk operations
|
||||
|
||||
**OPT-7: Connection Pool for SQLite**
|
||||
```python
|
||||
# Use sqlalchemy with connection pooling
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.pool import QueuePool
|
||||
|
||||
engine = create_engine(
|
||||
'sqlite:///state.db',
|
||||
poolclass=QueuePool,
|
||||
pool_size=5,
|
||||
max_overflow=10
|
||||
)
|
||||
```
|
||||
|
||||
### 5.3 Memory Optimizations
|
||||
|
||||
**OPT-8: Streaming Message Processing**
|
||||
```python
|
||||
# run_agent.py - Current: loads ALL messages into memory
|
||||
# Fix: Generator-based processing
|
||||
|
||||
def iter_messages(self, session_id: str):
|
||||
cursor = self._conn.execute(
|
||||
"SELECT content FROM messages WHERE session_id = ? ORDER BY timestamp",
|
||||
(session_id,)
|
||||
)
|
||||
for row in cursor:
|
||||
yield json.loads(row['content'])
|
||||
```
|
||||
|
||||
**OPT-9: String Interning**
|
||||
```python
|
||||
import sys
|
||||
|
||||
# For repeated string keys in JSON
|
||||
INTERN_KEYS = {'role', 'content', 'tool_calls', 'function'}
|
||||
|
||||
def intern_message(msg: dict) -> dict:
|
||||
return {sys.intern(k) if k in INTERN_KEYS else k: v
|
||||
for k, v in msg.items()}
|
||||
```
|
||||
|
||||
### 5.4 Algorithmic Optimizations
|
||||
|
||||
**OPT-10: O(1) Tool Lookup**
|
||||
```python
|
||||
# tools/registry.py - Current: linear scan
|
||||
for name in sorted(tool_names): # O(n log n)
|
||||
entry = self._tools.get(name)
|
||||
|
||||
# Fix: Pre-computed sets
|
||||
self._tool_index = {name: entry for name, entry in self._tools.items()}
|
||||
```
|
||||
|
||||
**OPT-11: Path Overlap Detection**
|
||||
```python
|
||||
# run_agent.py lines 327-335 - O(n*m) comparison
|
||||
def _paths_overlap(left: Path, right: Path) -> bool:
|
||||
# Current: compares ALL path parts
|
||||
|
||||
# Fix: Hash-based lookup
|
||||
from functools import lru_cache
|
||||
|
||||
@lru_cache(maxsize=1024)
|
||||
def get_path_hash(path: Path) -> str:
|
||||
return str(path.resolve())
|
||||
```
|
||||
|
||||
**OPT-12: Parallel Tool Execution**
|
||||
```python
|
||||
# run_agent.py - Current: sequential or limited parallel
|
||||
# Fix: asyncio.gather for safe tools
|
||||
|
||||
async def execute_tool_batch(tool_calls):
|
||||
safe_tools = [tc for tc in tool_calls if tc.name in _PARALLEL_SAFE_TOOLS]
|
||||
unsafe_tools = [tc for tc in tool_calls if tc.name not in _PARALLEL_SAFE_TOOLS]
|
||||
|
||||
# Execute safe tools in parallel
|
||||
safe_results = await asyncio.gather(*[
|
||||
execute_tool(tc) for tc in safe_tools
|
||||
])
|
||||
|
||||
# Execute unsafe tools sequentially
|
||||
unsafe_results = []
|
||||
for tc in unsafe_tools:
|
||||
unsafe_results.append(await execute_tool(tc))
|
||||
```
|
||||
|
||||
### 5.5 I/O Optimizations
|
||||
|
||||
**OPT-13: Async File Operations**
|
||||
```python
|
||||
# utils.py - atomic_json_write uses blocking I/O
|
||||
# Fix: aiofiles
|
||||
|
||||
import aiofiles
|
||||
|
||||
async def async_atomic_json_write(path: Path, data: dict):
|
||||
tmp_path = path.with_suffix('.tmp')
|
||||
async with aiofiles.open(tmp_path, 'w') as f:
|
||||
await f.write(json.dumps(data))
|
||||
tmp_path.rename(path)
|
||||
```
|
||||
|
||||
**OPT-14: Memory-Mapped Files for Large Logs**
|
||||
```python
|
||||
# For trajectory files
|
||||
import mmap
|
||||
|
||||
def read_trajectory_chunk(path: Path, offset: int, size: int):
|
||||
with open(path, 'rb') as f:
|
||||
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm:
|
||||
return mm[offset:offset+size]
|
||||
```
|
||||
|
||||
**OPT-15: Compression for Session Storage**
|
||||
```python
|
||||
import lz4.frame # Fast compression
|
||||
|
||||
class CompressedSessionDB(SessionDB):
|
||||
def _compress_message(self, content: str) -> bytes:
|
||||
return lz4.frame.compress(content.encode())
|
||||
|
||||
def _decompress_message(self, data: bytes) -> str:
|
||||
return lz4.frame.decompress(data).decode()
|
||||
```
|
||||
**Impact:** 70% storage reduction, faster I/O
|
||||
|
||||
---
|
||||
|
||||
## 6. ADDITIONAL RECOMMENDATIONS
|
||||
|
||||
### 6.1 Architecture Improvements
|
||||
|
||||
1. **Split run_agent.py** into modules:
|
||||
- agent/core.py - Core conversation loop
|
||||
- agent/tools.py - Tool execution
|
||||
- agent/persistence.py - Session management
|
||||
- agent/api.py - API client management
|
||||
|
||||
2. **Implement Event-Driven Architecture:**
|
||||
- Use message queue for tool execution
|
||||
- Decouple gateway from agent logic
|
||||
- Enable horizontal scaling
|
||||
|
||||
3. **Add Metrics Collection:**
|
||||
```python
|
||||
from prometheus_client import Histogram, Counter
|
||||
|
||||
tool_execution_time = Histogram('tool_duration_seconds', 'Time spent in tools', ['tool_name'])
|
||||
api_call_counter = Counter('api_calls_total', 'Total API calls', ['provider', 'status'])
|
||||
```
|
||||
|
||||
### 6.2 Profiling Recommendations
|
||||
|
||||
**Immediate Actions:**
|
||||
```bash
|
||||
# 1. Profile import time
|
||||
python -X importtime -c "import run_agent" 2>&1 | head -100
|
||||
|
||||
# 2. Memory profiling
|
||||
pip install memory_profiler
|
||||
python -m memory_profiler run_agent.py
|
||||
|
||||
# 3. CPU profiling
|
||||
pip install py-spy
|
||||
py-spy top -- python run_agent.py
|
||||
|
||||
# 4. Async profiling
|
||||
pip install austin
|
||||
austin python run_agent.py
|
||||
```
|
||||
|
||||
### 6.3 Load Testing
|
||||
|
||||
```python
|
||||
# locustfile.py for gateway load testing
|
||||
from locust import HttpUser, task
|
||||
|
||||
class GatewayUser(HttpUser):
|
||||
@task
|
||||
def send_message(self):
|
||||
self.client.post("/webhook/telegram", json={
|
||||
"message": {"text": "Hello", "chat": {"id": 123}}
|
||||
})
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. PRIORITY MATRIX
|
||||
|
||||
| Priority | Optimization | Effort | Impact |
|
||||
|----------|-------------|--------|--------|
|
||||
| P0 | Async web tools | Low | 10x throughput |
|
||||
| P0 | HTTP connection pooling | Low | 100ms latency |
|
||||
| P0 | SQLite batch writes | Low | 10x DB perf |
|
||||
| P1 | Tool lazy loading | Low | 2s startup |
|
||||
| P1 | Agent cache LRU | Low | Memory leak fix |
|
||||
| P1 | Streaming JSON | Medium | 50% memory |
|
||||
| P2 | Code splitting | High | Maintainability |
|
||||
| P2 | Redis caching | Medium | Scalability |
|
||||
| P2 | Compression | Low | 70% storage |
|
||||
|
||||
---
|
||||
|
||||
## 8. CONCLUSION
|
||||
|
||||
The Hermes Agent codebase has significant performance debt accumulated from rapid feature development. The monolithic architecture and synchronous I/O patterns are the primary bottlenecks.
|
||||
|
||||
**Quick Wins (1 week):**
|
||||
- Async HTTP clients
|
||||
- Connection pooling
|
||||
- SQLite batching
|
||||
- Lazy loading
|
||||
|
||||
**Medium Term (1 month):**
|
||||
- Code modularization
|
||||
- Caching layers
|
||||
- Streaming processing
|
||||
|
||||
**Long Term (3 months):**
|
||||
- Event-driven architecture
|
||||
- Horizontal scaling
|
||||
- Distributed caching
|
||||
|
||||
**Estimated Performance Gains:**
|
||||
- Latency: 50-70% reduction
|
||||
- Throughput: 10x improvement
|
||||
- Memory: 40% reduction
|
||||
- Startup: 3x faster
|
||||
@@ -1,241 +0,0 @@
|
||||
# Performance Hotspots Quick Reference
|
||||
|
||||
## Critical Files to Optimize
|
||||
|
||||
### 1. run_agent.py (8,317 lines, 419KB)
|
||||
```
|
||||
Lines 460-1000: Massive __init__ - 50+ params, slow startup
|
||||
Lines 2158-2222: _save_session_log - blocking I/O every turn
|
||||
Lines 2269-2297: _hydrate_todo_store - O(n) history scan
|
||||
Lines 3759-3826: _anthropic_messages_create - blocking API calls
|
||||
Lines 3827-3920: _interruptible_api_call - sync/async bridge overhead
|
||||
```
|
||||
|
||||
**Fix Priority: CRITICAL**
|
||||
- Split into modules
|
||||
- Add async session logging
|
||||
- Cache history hydration
|
||||
|
||||
---
|
||||
|
||||
### 2. gateway/run.py (6,016 lines, 274KB)
|
||||
```
|
||||
Lines 406-413: _agent_cache - unbounded growth, memory leak
|
||||
Lines 464-493: _get_or_create_gateway_honcho - blocking init
|
||||
Lines 2800+: run_agent_sync - blocks event loop
|
||||
```
|
||||
|
||||
**Fix Priority: HIGH**
|
||||
- Implement LRU cache
|
||||
- Use asyncio.to_thread()
|
||||
|
||||
---
|
||||
|
||||
### 3. gateway/stream_consumer.py
|
||||
```
|
||||
Lines 88-147: Busy-wait loop with 50ms sleep
|
||||
Max 20 updates/sec throughput
|
||||
```
|
||||
|
||||
**Fix Priority: MEDIUM**
|
||||
- Use asyncio.Event for signaling
|
||||
- Adaptive back-off
|
||||
|
||||
---
|
||||
|
||||
### 4. tools/web_tools.py (1,843 lines)
|
||||
```
|
||||
Lines 171-188: _tavily_request - sync httpx call, 60s timeout
|
||||
Lines 256-301: process_content_with_llm - sync LLM call
|
||||
```
|
||||
|
||||
**Fix Priority: CRITICAL**
|
||||
- Convert to async
|
||||
- Add connection pooling
|
||||
|
||||
---
|
||||
|
||||
### 5. tools/browser_tool.py (1,955 lines)
|
||||
```
|
||||
Lines 194-208: _resolve_cdp_override - sync requests call
|
||||
Lines 234-257: _get_cloud_provider - blocking config read
|
||||
```
|
||||
|
||||
**Fix Priority: HIGH**
|
||||
- Async HTTP client
|
||||
- Cache config reads
|
||||
|
||||
---
|
||||
|
||||
### 6. tools/terminal_tool.py (1,358 lines)
|
||||
```
|
||||
Lines 66-92: _check_disk_usage_warning - blocking glob walk
|
||||
Lines 167-289: _prompt_for_sudo_password - thread creation per call
|
||||
```
|
||||
|
||||
**Fix Priority: MEDIUM**
|
||||
- Async disk check
|
||||
- Thread pool reuse
|
||||
|
||||
---
|
||||
|
||||
### 7. tools/file_tools.py (563 lines)
|
||||
```
|
||||
Lines 53-62: _read_tracker - unbounded dict growth
|
||||
Lines 195-262: read_file_tool - sync file I/O
|
||||
```
|
||||
|
||||
**Fix Priority: MEDIUM**
|
||||
- TTL-based cleanup
|
||||
- aiofiles for async I/O
|
||||
|
||||
---
|
||||
|
||||
### 8. agent/context_compressor.py (676 lines)
|
||||
```
|
||||
Lines 250-369: _generate_summary - expensive LLM call
|
||||
Lines 490-500: _find_tail_cut_by_tokens - O(n) token counting
|
||||
```
|
||||
|
||||
**Fix Priority: HIGH**
|
||||
- Background compression task
|
||||
- Cache summaries
|
||||
|
||||
---
|
||||
|
||||
### 9. hermes_state.py (1,274 lines)
|
||||
```
|
||||
Lines 116-215: _execute_write - global lock, 15 retries
|
||||
Lines 143-156: SQLite with WAL but single connection
|
||||
```
|
||||
|
||||
**Fix Priority: HIGH**
|
||||
- Connection pooling
|
||||
- Batch writes
|
||||
|
||||
---
|
||||
|
||||
### 10. model_tools.py (472 lines)
|
||||
```
|
||||
Lines 81-126: _run_async - creates ThreadPool per call!
|
||||
Lines 132-170: _discover_tools - imports ALL tools at startup
|
||||
```
|
||||
|
||||
**Fix Priority: CRITICAL**
|
||||
- Persistent thread pool
|
||||
- Lazy tool loading
|
||||
|
||||
---
|
||||
|
||||
## Quick Fixes (Copy-Paste Ready)
|
||||
|
||||
### Fix 1: LRU Cache for Agent Cache
|
||||
```python
|
||||
from functools import lru_cache
|
||||
from cachetools import TTLCache
|
||||
|
||||
# In gateway/run.py
|
||||
self._agent_cache: Dict[str, tuple] = TTLCache(maxsize=100, ttl=3600)
|
||||
```
|
||||
|
||||
### Fix 2: Async HTTP Client
|
||||
```python
|
||||
# In tools/web_tools.py
|
||||
import httpx
|
||||
|
||||
_http_client: Optional[httpx.AsyncClient] = None
|
||||
|
||||
async def get_http_client() -> httpx.AsyncClient:
|
||||
global _http_client
|
||||
if _http_client is None:
|
||||
_http_client = httpx.AsyncClient(timeout=60)
|
||||
return _http_client
|
||||
```
|
||||
|
||||
### Fix 3: Connection Pool for DB
|
||||
```python
|
||||
# In hermes_state.py
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.pool import QueuePool
|
||||
|
||||
engine = create_engine(
|
||||
'sqlite:///state.db',
|
||||
poolclass=QueuePool,
|
||||
pool_size=5,
|
||||
max_overflow=10
|
||||
)
|
||||
```
|
||||
|
||||
### Fix 4: Lazy Tool Loading
|
||||
```python
|
||||
# In model_tools.py
|
||||
@lru_cache(maxsize=1)
|
||||
def _get_discovered_tools():
|
||||
"""Cache tool discovery after first call"""
|
||||
_discover_tools()
|
||||
return registry
|
||||
```
|
||||
|
||||
### Fix 5: Batch Session Writes
|
||||
```python
|
||||
# In run_agent.py
|
||||
async def _save_session_log_async(self, messages):
|
||||
"""Non-blocking session save"""
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(None, self._save_session_log, messages)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Metrics to Track
|
||||
|
||||
```python
|
||||
# Add these metrics
|
||||
IMPORT_TIME = Gauge('import_time_seconds', 'Module import time')
|
||||
AGENT_INIT_TIME = Gauge('agent_init_seconds', 'AIAgent init time')
|
||||
TOOL_EXECUTION_TIME = Histogram('tool_duration_seconds', 'Tool execution', ['tool_name'])
|
||||
DB_WRITE_TIME = Histogram('db_write_seconds', 'Database write time')
|
||||
API_LATENCY = Histogram('api_latency_seconds', 'API call latency', ['provider'])
|
||||
MEMORY_USAGE = Gauge('memory_usage_bytes', 'Process memory')
|
||||
CACHE_HIT_RATE = Gauge('cache_hit_rate', 'Cache hit rate', ['cache_name'])
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## One-Liner Profiling Commands
|
||||
|
||||
```bash
|
||||
# Find slow imports
|
||||
python -X importtime -c "from run_agent import AIAgent" 2>&1 | head -50
|
||||
|
||||
# Find blocking I/O
|
||||
sudo strace -e trace=openat,read,write -c python run_agent.py 2>&1
|
||||
|
||||
# Memory profiling
|
||||
pip install memory_profiler && python -m memory_profiler run_agent.py
|
||||
|
||||
# CPU profiling
|
||||
pip install py-spy && py-spy record -o profile.svg -- python run_agent.py
|
||||
|
||||
# Find all sleep calls
|
||||
grep -rn "time.sleep\|asyncio.sleep" --include="*.py" | wc -l
|
||||
|
||||
# Find all JSON calls
|
||||
grep -rn "json.loads\|json.dumps" --include="*.py" | wc -l
|
||||
|
||||
# Find all locks
|
||||
grep -rn "threading.Lock\|threading.RLock\|asyncio.Lock" --include="*.py"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Expected Performance After Fixes
|
||||
|
||||
| Metric | Before | After | Improvement |
|
||||
|--------|--------|-------|-------------|
|
||||
| Startup time | 3-5s | 1-2s | 3x faster |
|
||||
| API latency | 500ms | 200ms | 2.5x faster |
|
||||
| Concurrent requests | 10/s | 100/s | 10x throughput |
|
||||
| Memory per agent | 50MB | 30MB | 40% reduction |
|
||||
| DB writes/sec | 50 | 500 | 10x throughput |
|
||||
| Import time | 2s | 0.5s | 4x faster |
|
||||
@@ -1,163 +0,0 @@
|
||||
# Performance Optimizations for run_agent.py
|
||||
|
||||
## Summary of Changes
|
||||
|
||||
This document describes the async I/O and performance optimizations applied to `run_agent.py` to fix blocking operations and improve overall responsiveness.
|
||||
|
||||
---
|
||||
|
||||
## 1. Session Log Batching (PROBLEM 1: Lines 2158-2222)
|
||||
|
||||
### Problem
|
||||
`_save_session_log()` performed **blocking file I/O** on every conversation turn, causing:
|
||||
- UI freezing during rapid message exchanges
|
||||
- Unnecessary disk writes (JSON file was overwritten every turn)
|
||||
- Synchronous `json.dump()` and `fsync()` blocking the main thread
|
||||
|
||||
### Solution
|
||||
Implemented **async batching** with the following components:
|
||||
|
||||
#### New Methods:
|
||||
- `_init_session_log_batcher()` - Initialize batching infrastructure
|
||||
- `_save_session_log()` - Updated to use non-blocking batching
|
||||
- `_flush_session_log_async()` - Flush writes in background thread
|
||||
- `_write_session_log_sync()` - Actual blocking I/O (runs in thread pool)
|
||||
- `_deferred_session_log_flush()` - Delayed flush for batching
|
||||
- `_shutdown_session_log_batcher()` - Cleanup and flush on exit
|
||||
|
||||
#### Key Features:
|
||||
- **Time-based batching**: Minimum 500ms between writes
|
||||
- **Deferred flushing**: Rapid successive calls are batched
|
||||
- **Thread pool**: Single-worker executor prevents concurrent write conflicts
|
||||
- **Atexit cleanup**: Ensures pending logs are flushed on exit
|
||||
- **Backward compatible**: Same method signature, no breaking changes
|
||||
|
||||
#### Performance Impact:
|
||||
- Before: Every turn blocks on disk I/O (~5-20ms per write)
|
||||
- After: Updates cached in memory, flushed every 500ms or on exit
|
||||
- 10 rapid calls now result in ~1-2 writes instead of 10
|
||||
|
||||
---
|
||||
|
||||
## 2. Todo Store Hydration Caching (PROBLEM 2: Lines 2269-2297)
|
||||
|
||||
### Problem
|
||||
`_hydrate_todo_store()` performed **O(n) history scan on every message**:
|
||||
- Scanned entire conversation history backwards
|
||||
- No caching between calls
|
||||
- Re-parsed JSON for every message check
|
||||
- Gateway mode creates fresh AIAgent per message, making this worse
|
||||
|
||||
### Solution
|
||||
Implemented **result caching** with scan limiting:
|
||||
|
||||
#### Key Changes:
|
||||
```python
|
||||
# Added caching flags
|
||||
self._todo_store_hydrated # Marks if hydration already done
|
||||
self._todo_cache_key # Caches history object id
|
||||
|
||||
# Added scan limit for very long histories
|
||||
scan_limit = 100 # Only scan last 100 messages
|
||||
```
|
||||
|
||||
#### Performance Impact:
|
||||
- Before: O(n) scan every call, parsing JSON for each tool message
|
||||
- After: O(1) cached check, skips redundant work
|
||||
- First call: Scans up to 100 messages (limited)
|
||||
- Subsequent calls: <1μs cached check
|
||||
|
||||
---
|
||||
|
||||
## 3. API Call Timeouts (PROBLEM 3: Lines 3759-3826)
|
||||
|
||||
### Problem
|
||||
`_anthropic_messages_create()` and `_interruptible_api_call()` had:
|
||||
- **No timeout handling** - could block indefinitely
|
||||
- 300ms polling interval for interrupt detection (sluggish)
|
||||
- No timeout for OpenAI-compatible endpoints
|
||||
|
||||
### Solution
|
||||
Added comprehensive timeout handling:
|
||||
|
||||
#### Changes to `_anthropic_messages_create()`:
|
||||
- Added `timeout: float = 300.0` parameter (5 minutes default)
|
||||
- Passes timeout to Anthropic SDK
|
||||
|
||||
#### Changes to `_interruptible_api_call()`:
|
||||
- Added `timeout: float = 300.0` parameter
|
||||
- **Reduced polling interval** from 300ms to **50ms** (6x faster interrupt response)
|
||||
- Added elapsed time tracking
|
||||
- Raises `TimeoutError` if API call exceeds timeout
|
||||
- Force-closes clients on timeout to prevent resource leaks
|
||||
- Passes timeout to OpenAI-compatible endpoints
|
||||
|
||||
#### Performance Impact:
|
||||
- Before: Could hang forever on stuck connections
|
||||
- After: Guaranteed timeout after 5 minutes (configurable)
|
||||
- Interrupt response: 300ms → 50ms (6x faster)
|
||||
|
||||
---
|
||||
|
||||
## Backward Compatibility
|
||||
|
||||
All changes maintain **100% backward compatibility**:
|
||||
|
||||
1. **Session logging**: Same method signature, behavior is additive
|
||||
2. **Todo hydration**: Same signature, caching is transparent
|
||||
3. **API calls**: New `timeout` parameter has sensible default (300s)
|
||||
|
||||
No existing code needs modification to benefit from these optimizations.
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
Run the verification script:
|
||||
```bash
|
||||
python3 -c "
|
||||
import ast
|
||||
with open('run_agent.py') as f:
|
||||
source = f.read()
|
||||
tree = ast.parse(source)
|
||||
|
||||
methods = ['_init_session_log_batcher', '_write_session_log_sync',
|
||||
'_shutdown_session_log_batcher', '_hydrate_todo_store',
|
||||
'_interruptible_api_call']
|
||||
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.FunctionDef) and node.name in methods:
|
||||
print(f'✓ Found {node.name}')
|
||||
print('\nAll optimizations verified!')
|
||||
"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Lines Modified
|
||||
|
||||
| Function | Line Range | Change Type |
|
||||
|----------|-----------|-------------|
|
||||
| `_init_session_log_batcher` | ~2168-2178 | NEW |
|
||||
| `_save_session_log` | ~2178-2230 | MODIFIED |
|
||||
| `_flush_session_log_async` | ~2230-2240 | NEW |
|
||||
| `_write_session_log_sync` | ~2240-2300 | NEW |
|
||||
| `_deferred_session_log_flush` | ~2300-2305 | NEW |
|
||||
| `_shutdown_session_log_batcher` | ~2305-2315 | NEW |
|
||||
| `_hydrate_todo_store` | ~2320-2360 | MODIFIED |
|
||||
| `_anthropic_messages_create` | ~3870-3890 | MODIFIED |
|
||||
| `_interruptible_api_call` | ~3895-3970 | MODIFIED |
|
||||
|
||||
---
|
||||
|
||||
## Future Improvements
|
||||
|
||||
Potential additional optimizations:
|
||||
1. Use `aiofiles` for true async file I/O (requires aiofiles dependency)
|
||||
2. Batch SQLite writes in `_flush_messages_to_session_db`
|
||||
3. Add compression for large session logs
|
||||
4. Implement write-behind caching for checkpoint manager
|
||||
|
||||
---
|
||||
|
||||
*Optimizations implemented: 2026-03-31*
|
||||
@@ -1,566 +0,0 @@
|
||||
# SECURE CODING GUIDELINES
|
||||
|
||||
## Hermes Agent Development Security Standards
|
||||
**Version:** 1.0
|
||||
**Effective Date:** March 30, 2026
|
||||
|
||||
---
|
||||
|
||||
## 1. GENERAL PRINCIPLES
|
||||
|
||||
### 1.1 Security-First Mindset
|
||||
- Every feature must be designed with security in mind
|
||||
- Assume all input is malicious until proven otherwise
|
||||
- Defense in depth: multiple layers of security controls
|
||||
- Fail securely: when security controls fail, default to denial
|
||||
|
||||
### 1.2 Threat Model
|
||||
Primary threats to consider:
|
||||
- Malicious user prompts
|
||||
- Compromised or malicious skills
|
||||
- Supply chain attacks
|
||||
- Insider threats
|
||||
- Accidental data exposure
|
||||
|
||||
---
|
||||
|
||||
## 2. INPUT VALIDATION
|
||||
|
||||
### 2.1 Validate All Input
|
||||
```python
|
||||
# ❌ INCORRECT
|
||||
def process_file(path: str):
|
||||
with open(path) as f:
|
||||
return f.read()
|
||||
|
||||
# ✅ CORRECT
|
||||
from pydantic import BaseModel, validator
|
||||
import re
|
||||
|
||||
class FileRequest(BaseModel):
|
||||
path: str
|
||||
max_size: int = 1000000
|
||||
|
||||
@validator('path')
|
||||
def validate_path(cls, v):
|
||||
# Block path traversal
|
||||
if '..' in v or v.startswith('/'):
|
||||
raise ValueError('Invalid path characters')
|
||||
# Allowlist safe characters
|
||||
if not re.match(r'^[\w\-./]+$', v):
|
||||
raise ValueError('Invalid characters in path')
|
||||
return v
|
||||
|
||||
@validator('max_size')
|
||||
def validate_size(cls, v):
|
||||
if v < 0 or v > 10000000:
|
||||
raise ValueError('Size out of range')
|
||||
return v
|
||||
|
||||
def process_file(request: FileRequest):
|
||||
# Now safe to use request.path
|
||||
pass
|
||||
```
|
||||
|
||||
### 2.2 Length Limits
|
||||
Always enforce maximum lengths:
|
||||
```python
|
||||
MAX_INPUT_LENGTH = 10000
|
||||
MAX_FILENAME_LENGTH = 255
|
||||
MAX_PATH_LENGTH = 4096
|
||||
|
||||
def validate_length(value: str, max_len: int, field_name: str):
|
||||
if len(value) > max_len:
|
||||
raise ValueError(f"{field_name} exceeds maximum length of {max_len}")
|
||||
```
|
||||
|
||||
### 2.3 Type Safety
|
||||
Use type hints and enforce them:
|
||||
```python
|
||||
from typing import Union
|
||||
|
||||
def safe_function(user_id: int, message: str) -> dict:
|
||||
if not isinstance(user_id, int):
|
||||
raise TypeError("user_id must be an integer")
|
||||
if not isinstance(message, str):
|
||||
raise TypeError("message must be a string")
|
||||
# ... function logic
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. COMMAND EXECUTION
|
||||
|
||||
### 3.1 Never Use shell=True
|
||||
```python
|
||||
import subprocess
|
||||
import shlex
|
||||
|
||||
# ❌ NEVER DO THIS
|
||||
subprocess.run(f"ls {user_input}", shell=True)
|
||||
|
||||
# ❌ NEVER DO THIS EITHER
|
||||
cmd = f"cat {filename}"
|
||||
os.system(cmd)
|
||||
|
||||
# ✅ CORRECT - Use list arguments
|
||||
subprocess.run(["ls", user_input], shell=False)
|
||||
|
||||
# ✅ CORRECT - Use shlex for complex cases
|
||||
cmd_parts = shlex.split(user_input)
|
||||
subprocess.run(["ls"] + cmd_parts, shell=False)
|
||||
```
|
||||
|
||||
### 3.2 Command Allowlisting
|
||||
```python
|
||||
ALLOWED_COMMANDS = frozenset([
|
||||
"ls", "cat", "grep", "find", "git", "python", "pip"
|
||||
])
|
||||
|
||||
def validate_command(command: str):
|
||||
parts = shlex.split(command)
|
||||
if parts[0] not in ALLOWED_COMMANDS:
|
||||
raise SecurityError(f"Command '{parts[0]}' not allowed")
|
||||
```
|
||||
|
||||
### 3.3 Input Sanitization
|
||||
```python
|
||||
import re
|
||||
|
||||
def sanitize_shell_input(value: str) -> str:
|
||||
"""Remove dangerous shell metacharacters."""
|
||||
# Block shell metacharacters
|
||||
dangerous = re.compile(r'[;&|`$(){}[\]\\]')
|
||||
if dangerous.search(value):
|
||||
raise ValueError("Shell metacharacters not allowed")
|
||||
return value
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. FILE OPERATIONS
|
||||
|
||||
### 4.1 Path Validation
|
||||
```python
|
||||
from pathlib import Path
|
||||
|
||||
class FileSandbox:
|
||||
def __init__(self, root: Path):
|
||||
self.root = root.resolve()
|
||||
|
||||
def validate_path(self, user_path: str) -> Path:
|
||||
"""Validate and resolve user-provided path within sandbox."""
|
||||
# Expand user home
|
||||
expanded = Path(user_path).expanduser()
|
||||
|
||||
# Resolve to absolute path
|
||||
try:
|
||||
resolved = expanded.resolve()
|
||||
except (OSError, ValueError) as e:
|
||||
raise SecurityError(f"Invalid path: {e}")
|
||||
|
||||
# Ensure path is within sandbox
|
||||
try:
|
||||
resolved.relative_to(self.root)
|
||||
except ValueError:
|
||||
raise SecurityError("Path outside sandbox")
|
||||
|
||||
return resolved
|
||||
|
||||
def safe_open(self, user_path: str, mode: str = 'r'):
|
||||
safe_path = self.validate_path(user_path)
|
||||
return open(safe_path, mode)
|
||||
```
|
||||
|
||||
### 4.2 Prevent Symlink Attacks
|
||||
```python
|
||||
import os
|
||||
|
||||
def safe_read_file(filepath: Path):
|
||||
"""Read file, following symlinks only within allowed directories."""
|
||||
# Resolve symlinks
|
||||
real_path = filepath.resolve()
|
||||
|
||||
# Verify still in allowed location after resolution
|
||||
if not str(real_path).startswith(str(SAFE_ROOT)):
|
||||
raise SecurityError("Symlink escape detected")
|
||||
|
||||
# Verify it's a regular file
|
||||
if not real_path.is_file():
|
||||
raise SecurityError("Not a regular file")
|
||||
|
||||
return real_path.read_text()
|
||||
```
|
||||
|
||||
### 4.3 Temporary Files
|
||||
```python
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
def create_secure_temp_file():
|
||||
"""Create temp file with restricted permissions."""
|
||||
# Create with restrictive permissions
|
||||
fd, path = tempfile.mkstemp(prefix="hermes_", suffix=".tmp")
|
||||
try:
|
||||
# Set owner-read/write only
|
||||
os.chmod(path, 0o600)
|
||||
return fd, path
|
||||
except:
|
||||
os.close(fd)
|
||||
os.unlink(path)
|
||||
raise
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. SECRET MANAGEMENT
|
||||
|
||||
### 5.1 Environment Variables
|
||||
```python
|
||||
import os
|
||||
|
||||
# ❌ NEVER DO THIS
|
||||
def execute_command(command: str):
|
||||
# Child inherits ALL environment
|
||||
subprocess.run(command, shell=True, env=os.environ)
|
||||
|
||||
# ✅ CORRECT - Explicit whitelisting
|
||||
_ALLOWED_ENV = frozenset([
|
||||
"PATH", "HOME", "USER", "LANG", "TERM", "SHELL"
|
||||
])
|
||||
|
||||
def get_safe_environment():
|
||||
return {k: v for k, v in os.environ.items()
|
||||
if k in _ALLOWED_ENV}
|
||||
|
||||
def execute_command(command: str):
|
||||
subprocess.run(
|
||||
command,
|
||||
shell=False,
|
||||
env=get_safe_environment()
|
||||
)
|
||||
```
|
||||
|
||||
### 5.2 Secret Detection
|
||||
```python
|
||||
import re
|
||||
|
||||
_SECRET_PATTERNS = [
|
||||
re.compile(r'sk-[a-zA-Z0-9]{20,}'), # OpenAI-style keys
|
||||
re.compile(r'ghp_[a-zA-Z0-9]{36}'), # GitHub PAT
|
||||
re.compile(r'[a-zA-Z0-9]{40}'), # Generic high-entropy strings
|
||||
]
|
||||
|
||||
def detect_secrets(text: str) -> list:
|
||||
"""Detect potential secrets in text."""
|
||||
findings = []
|
||||
for pattern in _SECRET_PATTERNS:
|
||||
matches = pattern.findall(text)
|
||||
findings.extend(matches)
|
||||
return findings
|
||||
|
||||
def redact_secrets(text: str) -> str:
|
||||
"""Redact detected secrets."""
|
||||
for pattern in _SECRET_PATTERNS:
|
||||
text = pattern.sub('***REDACTED***', text)
|
||||
return text
|
||||
```
|
||||
|
||||
### 5.3 Secure Logging
|
||||
```python
|
||||
import logging
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
class SecureLogger:
|
||||
def __init__(self, logger: logging.Logger):
|
||||
self.logger = logger
|
||||
|
||||
def debug(self, msg: str, *args, **kwargs):
|
||||
self.logger.debug(redact_sensitive_text(msg), *args, **kwargs)
|
||||
|
||||
def info(self, msg: str, *args, **kwargs):
|
||||
self.logger.info(redact_sensitive_text(msg), *args, **kwargs)
|
||||
|
||||
def warning(self, msg: str, *args, **kwargs):
|
||||
self.logger.warning(redact_sensitive_text(msg), *args, **kwargs)
|
||||
|
||||
def error(self, msg: str, *args, **kwargs):
|
||||
self.logger.error(redact_sensitive_text(msg), *args, **kwargs)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. NETWORK SECURITY
|
||||
|
||||
### 6.1 URL Validation
|
||||
```python
|
||||
from urllib.parse import urlparse
|
||||
import ipaddress
|
||||
|
||||
_BLOCKED_SCHEMES = frozenset(['file', 'ftp', 'gopher'])
|
||||
_BLOCKED_HOSTS = frozenset([
|
||||
'localhost', '127.0.0.1', '0.0.0.0',
|
||||
'169.254.169.254', # AWS metadata
|
||||
'[::1]', '[::]'
|
||||
])
|
||||
_PRIVATE_NETWORKS = [
|
||||
ipaddress.ip_network('10.0.0.0/8'),
|
||||
ipaddress.ip_network('172.16.0.0/12'),
|
||||
ipaddress.ip_network('192.168.0.0/16'),
|
||||
ipaddress.ip_network('127.0.0.0/8'),
|
||||
ipaddress.ip_network('169.254.0.0/16'), # Link-local
|
||||
]
|
||||
|
||||
def validate_url(url: str) -> bool:
|
||||
"""Validate URL is safe to fetch."""
|
||||
parsed = urlparse(url)
|
||||
|
||||
# Check scheme
|
||||
if parsed.scheme not in ('http', 'https'):
|
||||
raise ValueError(f"Scheme '{parsed.scheme}' not allowed")
|
||||
|
||||
# Check hostname
|
||||
hostname = parsed.hostname
|
||||
if not hostname:
|
||||
raise ValueError("No hostname in URL")
|
||||
|
||||
if hostname.lower() in _BLOCKED_HOSTS:
|
||||
raise ValueError("Host not allowed")
|
||||
|
||||
# Check IP addresses
|
||||
try:
|
||||
ip = ipaddress.ip_address(hostname)
|
||||
for network in _PRIVATE_NETWORKS:
|
||||
if ip in network:
|
||||
raise ValueError("Private IP address not allowed")
|
||||
except ValueError:
|
||||
pass # Not an IP, continue
|
||||
|
||||
return True
|
||||
```
|
||||
|
||||
### 6.2 Redirect Handling
|
||||
```python
|
||||
import requests
|
||||
|
||||
def safe_get(url: str, max_redirects: int = 5):
|
||||
"""GET URL with redirect validation."""
|
||||
session = requests.Session()
|
||||
session.max_redirects = max_redirects
|
||||
|
||||
# Validate initial URL
|
||||
validate_url(url)
|
||||
|
||||
# Custom redirect handler
|
||||
response = session.get(
|
||||
url,
|
||||
allow_redirects=True,
|
||||
hooks={'response': lambda r, *args, **kwargs: validate_url(r.url)}
|
||||
)
|
||||
|
||||
return response
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. AUTHENTICATION & AUTHORIZATION
|
||||
|
||||
### 7.1 API Key Validation
|
||||
```python
|
||||
import secrets
|
||||
import hmac
|
||||
import hashlib
|
||||
|
||||
def constant_time_compare(val1: str, val2: str) -> bool:
|
||||
"""Compare strings in constant time to prevent timing attacks."""
|
||||
return hmac.compare_digest(val1.encode(), val2.encode())
|
||||
|
||||
def validate_api_key(provided_key: str, expected_key: str) -> bool:
|
||||
"""Validate API key using constant-time comparison."""
|
||||
if not provided_key or not expected_key:
|
||||
return False
|
||||
return constant_time_compare(provided_key, expected_key)
|
||||
```
|
||||
|
||||
### 7.2 Session Management
|
||||
```python
|
||||
import secrets
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
class SessionManager:
|
||||
SESSION_TIMEOUT = timedelta(hours=24)
|
||||
|
||||
def create_session(self, user_id: str) -> str:
|
||||
"""Create secure session token."""
|
||||
token = secrets.token_urlsafe(32)
|
||||
expires = datetime.utcnow() + self.SESSION_TIMEOUT
|
||||
# Store in database with expiration
|
||||
return token
|
||||
|
||||
def validate_session(self, token: str) -> bool:
|
||||
"""Validate session token."""
|
||||
# Lookup in database
|
||||
# Check expiration
|
||||
# Validate token format
|
||||
return True
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. ERROR HANDLING
|
||||
|
||||
### 8.1 Secure Error Messages
|
||||
```python
|
||||
import logging
|
||||
|
||||
# Internal detailed logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class UserFacingError(Exception):
|
||||
"""Error safe to show to users."""
|
||||
pass
|
||||
|
||||
def process_request(data: dict):
|
||||
try:
|
||||
result = internal_operation(data)
|
||||
return result
|
||||
except ValueError as e:
|
||||
# Log full details internally
|
||||
logger.error(f"Validation error: {e}", exc_info=True)
|
||||
# Return safe message to user
|
||||
raise UserFacingError("Invalid input provided")
|
||||
except Exception as e:
|
||||
# Log full details internally
|
||||
logger.error(f"Unexpected error: {e}", exc_info=True)
|
||||
# Generic message to user
|
||||
raise UserFacingError("An error occurred")
|
||||
```
|
||||
|
||||
### 8.2 Exception Handling
|
||||
```python
|
||||
def safe_operation():
|
||||
try:
|
||||
risky_operation()
|
||||
except Exception as e:
|
||||
# Always clean up resources
|
||||
cleanup_resources()
|
||||
# Log securely
|
||||
logger.error(f"Operation failed: {redact_sensitive_text(str(e))}")
|
||||
# Re-raise or convert
|
||||
raise
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 9. CRYPTOGRAPHY
|
||||
|
||||
### 9.1 Password Hashing
|
||||
```python
|
||||
import bcrypt
|
||||
|
||||
def hash_password(password: str) -> str:
|
||||
"""Hash password using bcrypt."""
|
||||
salt = bcrypt.gensalt(rounds=12)
|
||||
hashed = bcrypt.hashpw(password.encode(), salt)
|
||||
return hashed.decode()
|
||||
|
||||
def verify_password(password: str, hashed: str) -> bool:
|
||||
"""Verify password against hash."""
|
||||
return bcrypt.checkpw(password.encode(), hashed.encode())
|
||||
```
|
||||
|
||||
### 9.2 Secure Random
|
||||
```python
|
||||
import secrets
|
||||
|
||||
def generate_token(length: int = 32) -> str:
|
||||
"""Generate cryptographically secure token."""
|
||||
return secrets.token_urlsafe(length)
|
||||
|
||||
def generate_pin(length: int = 6) -> str:
|
||||
"""Generate secure numeric PIN."""
|
||||
return ''.join(str(secrets.randbelow(10)) for _ in range(length))
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 10. CODE REVIEW CHECKLIST
|
||||
|
||||
### Before Submitting Code:
|
||||
- [ ] All user inputs validated
|
||||
- [ ] No shell=True in subprocess calls
|
||||
- [ ] All file paths validated and sandboxed
|
||||
- [ ] Secrets not logged or exposed
|
||||
- [ ] URLs validated before fetching
|
||||
- [ ] Error messages don't leak sensitive info
|
||||
- [ ] No hardcoded credentials
|
||||
- [ ] Proper exception handling
|
||||
- [ ] Security tests included
|
||||
- [ ] Documentation updated
|
||||
|
||||
### Security-Focused Review Questions:
|
||||
1. What happens if this receives malicious input?
|
||||
2. Can this leak sensitive data?
|
||||
3. Are there privilege escalation paths?
|
||||
4. What if the external service is compromised?
|
||||
5. Is the error handling secure?
|
||||
|
||||
---
|
||||
|
||||
## 11. TESTING SECURITY
|
||||
|
||||
### 11.1 Security Unit Tests
|
||||
```python
|
||||
def test_path_traversal_blocked():
|
||||
sandbox = FileSandbox(Path("/safe/path"))
|
||||
with pytest.raises(SecurityError):
|
||||
sandbox.validate_path("../../../etc/passwd")
|
||||
|
||||
def test_command_injection_blocked():
|
||||
with pytest.raises(SecurityError):
|
||||
validate_command("ls; rm -rf /")
|
||||
|
||||
def test_secret_redaction():
|
||||
text = "Key: sk-test123456789"
|
||||
redacted = redact_secrets(text)
|
||||
assert "sk-test" not in redacted
|
||||
```
|
||||
|
||||
### 11.2 Fuzzing
|
||||
```python
|
||||
import hypothesis.strategies as st
|
||||
from hypothesis import given
|
||||
|
||||
@given(st.text())
|
||||
def test_input_validation(input_text):
|
||||
# Should never crash, always validate or reject
|
||||
try:
|
||||
result = process_input(input_text)
|
||||
assert isinstance(result, ExpectedType)
|
||||
except ValidationError:
|
||||
pass # Expected for invalid input
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 12. INCIDENT RESPONSE
|
||||
|
||||
### Security Incident Procedure:
|
||||
1. **Stop** - Halt the affected system/process
|
||||
2. **Assess** - Determine scope and impact
|
||||
3. **Contain** - Prevent further damage
|
||||
4. **Investigate** - Gather evidence
|
||||
5. **Remediate** - Fix the vulnerability
|
||||
6. **Recover** - Restore normal operations
|
||||
7. **Learn** - Document and improve
|
||||
|
||||
### Emergency Contacts:
|
||||
- Security Team: security@example.com
|
||||
- On-call: +1-XXX-XXX-XXXX
|
||||
- Slack: #security-incidents
|
||||
|
||||
---
|
||||
|
||||
**Document Owner:** Security Team
|
||||
**Review Cycle:** Quarterly
|
||||
**Last Updated:** March 30, 2026
|
||||
@@ -1,705 +0,0 @@
|
||||
# HERMES AGENT - COMPREHENSIVE SECURITY AUDIT REPORT
|
||||
**Audit Date:** March 30, 2026
|
||||
**Auditor:** Security Analysis Agent
|
||||
**Scope:** Entire codebase including authentication, command execution, file operations, sandbox environments, and API endpoints
|
||||
|
||||
---
|
||||
|
||||
## EXECUTIVE SUMMARY
|
||||
|
||||
The Hermes Agent codebase contains **32 identified security issues** across critical severity (5), high severity (12), medium severity (10), and low severity (5). The most critical vulnerabilities involve command injection vectors, sandbox escape possibilities, and secret leakage risks.
|
||||
|
||||
**Overall Security Posture: MODERATE-HIGH RISK**
|
||||
- Well-designed approval system for dangerous commands
|
||||
- Good secret redaction mechanisms
|
||||
- Insufficient input validation in several areas
|
||||
- Multiple command injection vectors
|
||||
- Incomplete sandbox isolation in some environments
|
||||
|
||||
---
|
||||
|
||||
## 1. CVSS-SCORED VULNERABILITY REPORT
|
||||
|
||||
### CRITICAL SEVERITY (CVSS 9.0-10.0)
|
||||
|
||||
#### V-001: Command Injection via shell=True in Subprocess Calls
|
||||
- **CVSS Score:** 9.8 (Critical)
|
||||
- **Location:** `tools/terminal_tool.py`, `tools/file_operations.py`, `tools/environments/*.py`
|
||||
- **Description:** Multiple subprocess calls use shell=True with user-controlled input, enabling arbitrary command execution
|
||||
- **Attack Vector:** Local/Remote via agent prompts or malicious skills
|
||||
- **Evidence:**
|
||||
```python
|
||||
# terminal_tool.py line ~460
|
||||
subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ...)
|
||||
# Command strings constructed from user input without proper sanitization
|
||||
```
|
||||
- **Impact:** Complete system compromise, data exfiltration, malware installation
|
||||
- **Remediation:** Use subprocess without shell=True, pass arguments as lists, implement strict input validation
|
||||
|
||||
#### V-002: Path Traversal in File Operations
|
||||
- **CVSS Score:** 9.1 (Critical)
|
||||
- **Location:** `tools/file_operations.py`, `tools/file_tools.py`
|
||||
- **Description:** Insufficient path validation allows access to sensitive system files
|
||||
- **Attack Vector:** Malicious file paths like `../../../etc/shadow` or `~/.ssh/id_rsa`
|
||||
- **Evidence:**
|
||||
```python
|
||||
# file_operations.py - _expand_path() allows ~username expansion
|
||||
# which can be exploited with crafted usernames
|
||||
```
|
||||
- **Impact:** Unauthorized file read/write, credential theft, system compromise
|
||||
- **Remediation:** Implement strict path canonicalization and sandbox boundaries
|
||||
|
||||
#### V-003: Secret Leakage via Environment Variables in Sandboxes
|
||||
- **CVSS Score:** 9.3 (Critical)
|
||||
- **Location:** `tools/code_execution_tool.py`, `tools/environments/*.py`
|
||||
- **Description:** Child processes inherit environment variables containing secrets
|
||||
- **Attack Vector:** Malicious code executed via execute_code or terminal
|
||||
- **Evidence:**
|
||||
```python
|
||||
# code_execution_tool.py lines 434-461
|
||||
# _SAFE_ENV_PREFIXES filter is incomplete - misses many secret patterns
|
||||
_SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", ...)
|
||||
_SECRET_SUBSTRINGS = ("TOKEN", "SECRET", "PASSWORD", ...)
|
||||
# Only blocks explicit patterns - many secret env vars slip through
|
||||
```
|
||||
- **Impact:** API key theft, credential exfiltration, unauthorized access to external services
|
||||
- **Remediation:** Whitelist-only approach for env vars, explicit secret scanning
|
||||
|
||||
#### V-004: Sudo Password Exposure via Command Line
|
||||
- **CVSS Score:** 9.0 (Critical)
|
||||
- **Location:** `tools/terminal_tool.py`, `_transform_sudo_command()`
|
||||
- **Description:** Sudo passwords may be exposed in process lists via command line arguments
|
||||
- **Attack Vector:** Local attackers reading /proc or ps output
|
||||
- **Evidence:**
|
||||
```python
|
||||
# Line 275: sudo_stdin passed via printf pipe
|
||||
exec_command = f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}"
|
||||
```
|
||||
- **Impact:** Privilege escalation credential theft
|
||||
- **Remediation:** Use file descriptor passing, avoid shell command construction with secrets
|
||||
|
||||
#### V-005: SSRF via Unsafe URL Handling
|
||||
- **CVSS Score:** 9.4 (Critical)
|
||||
- **Location:** `tools/web_tools.py`, `tools/browser_tool.py`
|
||||
- **Description:** URL safety checks can be bypassed via DNS rebinding and redirect chains
|
||||
- **Attack Vector:** Malicious URLs targeting internal services (169.254.169.254, localhost)
|
||||
- **Evidence:**
|
||||
```python
|
||||
# url_safety.py - is_safe_url() vulnerable to TOCTOU
|
||||
# DNS resolution and actual connection are separate operations
|
||||
```
|
||||
- **Impact:** Internal service access, cloud metadata theft, port scanning
|
||||
- **Remediation:** Implement connection-level validation, use egress proxy
|
||||
|
||||
---
|
||||
|
||||
### HIGH SEVERITY (CVSS 7.0-8.9)
|
||||
|
||||
#### V-006: Insecure Deserialization in MCP OAuth
|
||||
- **CVSS Score:** 8.8 (High)
|
||||
- **Location:** `tools/mcp_oauth.py`, token storage
|
||||
- **Description:** JSON token data loaded without schema validation
|
||||
- **Attack Vector:** Malicious token files crafted by local attackers
|
||||
- **Remediation:** Add JSON schema validation, sign stored tokens
|
||||
|
||||
#### V-007: SQL Injection in ResponseStore
|
||||
- **CVSS Score:** 8.5 (High)
|
||||
- **Location:** `gateway/platforms/api_server.py`, ResponseStore class
|
||||
- **Description:** Direct string interpolation in SQLite queries
|
||||
- **Evidence:**
|
||||
```python
|
||||
# Lines 98-106, 114-126 - response_id directly interpolated
|
||||
"SELECT data FROM responses WHERE response_id = ?", (response_id,)
|
||||
# While parameterized, no validation of response_id format
|
||||
```
|
||||
- **Remediation:** Validate response_id format, use UUID strict parsing
|
||||
|
||||
#### V-008: CORS Misconfiguration in API Server
|
||||
- **CVSS Score:** 8.2 (High)
|
||||
- **Location:** `gateway/platforms/api_server.py`, cors_middleware
|
||||
- **Description:** Wildcard CORS allowed with credentials
|
||||
- **Evidence:**
|
||||
```python
|
||||
# Line 324-328: "*" in origins allows any domain
|
||||
if "*" in self._cors_origins:
|
||||
headers["Access-Control-Allow-Origin"] = "*"
|
||||
```
|
||||
- **Impact:** Cross-origin attacks, credential theft via malicious websites
|
||||
- **Remediation:** Never allow "*" with credentials, implement strict origin validation
|
||||
|
||||
#### V-009: Authentication Bypass in API Key Check
|
||||
- **CVSS Score:** 8.1 (High)
|
||||
- **Location:** `gateway/platforms/api_server.py`, `_check_auth()`
|
||||
- **Description:** Empty API key configuration allows all requests
|
||||
- **Evidence:**
|
||||
```python
|
||||
# Line 360-361: No key configured = allow all
|
||||
if not self._api_key:
|
||||
return None # No key configured — allow all
|
||||
```
|
||||
- **Impact:** Unauthorized API access when key not explicitly set
|
||||
- **Remediation:** Require explicit auth configuration, fail-closed default
|
||||
|
||||
#### V-010: Code Injection via Browser CDP Override
|
||||
- **CVSS Score:** 8.4 (High)
|
||||
- **Location:** `tools/browser_tool.py`, `_resolve_cdp_override()`
|
||||
- **Description:** User-controlled CDP URL fetched without validation
|
||||
- **Evidence:**
|
||||
```python
|
||||
# Line 195: requests.get(version_url) without URL validation
|
||||
response = requests.get(version_url, timeout=10)
|
||||
```
|
||||
- **Impact:** SSRF, internal service exploitation
|
||||
- **Remediation:** Strict URL allowlisting, validate scheme/host
|
||||
|
||||
#### V-011: Skills Guard Bypass via Obfuscation
|
||||
- **CVSS Score:** 7.8 (High)
|
||||
- **Location:** `tools/skills_guard.py`, THREAT_PATTERNS
|
||||
- **Description:** Regex-based detection can be bypassed with encoding tricks
|
||||
- **Evidence:** Patterns don't cover all Unicode variants, case variations, or encoding tricks
|
||||
- **Impact:** Malicious skills installation, code execution
|
||||
- **Remediation:** Normalize input before scanning, add AST-based analysis
|
||||
|
||||
#### V-012: Privilege Escalation via Docker Socket Mount
|
||||
- **CVSS Score:** 8.7 (High)
|
||||
- **Location:** `tools/environments/docker.py`, volume mounting
|
||||
- **Description:** User-configured volumes can mount Docker socket
|
||||
- **Evidence:**
|
||||
```python
|
||||
# Line 267: volume_args extends with user-controlled vol
|
||||
volume_args.extend(["-v", vol])
|
||||
```
|
||||
- **Impact:** Container escape, host compromise
|
||||
- **Remediation:** Blocklist sensitive paths, validate all mount points
|
||||
|
||||
#### V-013: Information Disclosure via Error Messages
|
||||
- **CVSS Score:** 7.5 (High)
|
||||
- **Location:** Multiple files across codebase
|
||||
- **Description:** Detailed error messages expose internal paths, versions, configurations
|
||||
- **Evidence:** File paths, environment details in exception messages
|
||||
- **Impact:** Information gathering for targeted attacks
|
||||
- **Remediation:** Sanitize error messages in production, log details internally only
|
||||
|
||||
#### V-014: Session Fixation in OAuth Flow
|
||||
- **CVSS Score:** 7.6 (High)
|
||||
- **Location:** `tools/mcp_oauth.py`, `_wait_for_callback()`
|
||||
- **Description:** State parameter not validated against session
|
||||
- **Evidence:** Line 186: state returned but not verified against initial value
|
||||
- **Impact:** OAuth session hijacking
|
||||
- **Remediation:** Cryptographically verify state parameter
|
||||
|
||||
#### V-015: Race Condition in File Operations
|
||||
- **CVSS Score:** 7.4 (High)
|
||||
- **Location:** `tools/file_operations.py`, `ShellFileOperations`
|
||||
- **Description:** Time-of-check to time-of-use vulnerabilities in file access
|
||||
- **Impact:** Privilege escalation, unauthorized file access
|
||||
- **Remediation:** Use file descriptors, avoid path-based operations
|
||||
|
||||
#### V-016: Insufficient Rate Limiting
|
||||
- **CVSS Score:** 7.3 (High)
|
||||
- **Location:** `gateway/platforms/api_server.py`, `gateway/run.py`
|
||||
- **Description:** No rate limiting on API endpoints
|
||||
- **Impact:** DoS, brute force attacks, resource exhaustion
|
||||
- **Remediation:** Implement per-IP and per-user rate limiting
|
||||
|
||||
#### V-017: Insecure Temporary File Creation
|
||||
- **CVSS Score:** 7.2 (High)
|
||||
- **Location:** `tools/code_execution_tool.py`, `tools/credential_files.py`
|
||||
- **Description:** Predictable temp file paths, potential symlink attacks
|
||||
- **Evidence:**
|
||||
```python
|
||||
# code_execution_tool.py line 388
|
||||
tmpdir = tempfile.mkdtemp(prefix="hermes_sandbox_")
|
||||
# Predictable naming scheme
|
||||
```
|
||||
- **Impact:** Local privilege escalation via symlink attacks
|
||||
- **Remediation:** Use tempfile with proper permissions, random suffixes
|
||||
|
||||
---
|
||||
|
||||
### MEDIUM SEVERITY (CVSS 4.0-6.9)
|
||||
|
||||
#### V-018: Weak Approval Pattern Detection
|
||||
- **CVSS Score:** 6.5 (Medium)
|
||||
- **Location:** `tools/approval.py`, DANGEROUS_PATTERNS
|
||||
- **Description:** Pattern list doesn't cover all dangerous command variants
|
||||
- **Impact:** Unauthorized dangerous command execution
|
||||
- **Remediation:** Expand patterns, add behavioral analysis
|
||||
|
||||
#### V-019: Insecure File Permissions on Credentials
|
||||
- **CVSS Score:** 6.4 (Medium)
|
||||
- **Location:** `tools/credential_files.py`, `tools/mcp_oauth.py`
|
||||
- **Description:** Credential files may have overly permissive permissions
|
||||
- **Evidence:**
|
||||
```python
|
||||
# mcp_oauth.py line 107: chmod 0o600 but no verification
|
||||
path.chmod(0o600)
|
||||
```
|
||||
- **Impact:** Local credential theft
|
||||
- **Remediation:** Verify permissions after creation, use secure umask
|
||||
|
||||
#### V-020: Log Injection via Unsanitized Input
|
||||
- **CVSS Score:** 5.8 (Medium)
|
||||
- **Location:** Multiple logging statements across codebase
|
||||
- **Description:** User-controlled data written directly to logs
|
||||
- **Impact:** Log poisoning, log analysis bypass
|
||||
- **Remediation:** Sanitize all logged data, use structured logging
|
||||
|
||||
#### V-021: XML External Entity (XXE) Risk
|
||||
- **CVSS Score:** 6.2 (Medium)
|
||||
- **Location:** `skills/productivity/powerpoint/scripts/office/schemas/` XML parsing
|
||||
- **Description:** PowerPoint processing uses XML without explicit XXE protection
|
||||
- **Impact:** File disclosure, SSRF via XML entities
|
||||
- **Remediation:** Disable external entities in XML parsers
|
||||
|
||||
#### V-022: Unsafe YAML Loading
|
||||
- **CVSS Score:** 6.1 (Medium)
|
||||
- **Location:** `hermes_cli/config.py`, `tools/skills_guard.py`
|
||||
- **Description:** yaml.safe_load used but custom constructors may be risky
|
||||
- **Impact:** Code execution via malicious YAML
|
||||
- **Remediation:** Audit all YAML loading, disable unsafe tags
|
||||
|
||||
#### V-023: Prototype Pollution in JavaScript Bridge
|
||||
- **CVSS Score:** 5.9 (Medium)
|
||||
- **Location:** `scripts/whatsapp-bridge/bridge.js`
|
||||
- **Description:** Object property assignments without validation
|
||||
- **Impact:** Logic bypass, potential RCE in Node context
|
||||
- **Remediation:** Validate all object keys, use Map instead of Object
|
||||
|
||||
#### V-024: Insufficient Subagent Isolation
|
||||
- **CVSS Score:** 6.3 (Medium)
|
||||
- **Location:** `tools/delegate_tool.py`
|
||||
- **Description:** Subagents share filesystem and network with parent
|
||||
- **Impact:** Lateral movement, privilege escalation between agents
|
||||
- **Remediation:** Implement stronger sandbox boundaries per subagent
|
||||
|
||||
#### V-025: Predictable Session IDs
|
||||
- **CVSS Score:** 5.5 (Medium)
|
||||
- **Location:** `gateway/session.py`, `tools/terminal_tool.py`
|
||||
- **Description:** Session/task IDs use uuid4 but may be logged/predictable
|
||||
- **Impact:** Session hijacking
|
||||
- **Remediation:** Use cryptographically secure random, short-lived tokens
|
||||
|
||||
#### V-026: Missing Integrity Checks on External Binaries
|
||||
- **CVSS Score:** 5.7 (Medium)
|
||||
- **Location:** `tools/tirith_security.py`, auto-install process
|
||||
- **Description:** Binary download with limited verification
|
||||
- **Evidence:** SHA-256 verified but no code signing verification by default
|
||||
- **Impact:** Supply chain compromise
|
||||
- **Remediation:** Require signature verification, pin versions
|
||||
|
||||
#### V-027: Information Leakage in Debug Mode
|
||||
- **CVSS Score:** 5.2 (Medium)
|
||||
- **Location:** `tools/debug_helpers.py`, `agent/display.py`
|
||||
- **Description:** Debug output may contain sensitive configuration
|
||||
- **Impact:** Information disclosure
|
||||
- **Remediation:** Redact secrets in all debug output
|
||||
|
||||
---
|
||||
|
||||
### LOW SEVERITY (CVSS 0.1-3.9)
|
||||
|
||||
#### V-028: Missing Security Headers
|
||||
- **CVSS Score:** 3.7 (Low)
|
||||
- **Location:** `gateway/platforms/api_server.py`
|
||||
- **Description:** Some security headers missing (CSP, HSTS)
|
||||
- **Remediation:** Add comprehensive security headers
|
||||
|
||||
#### V-029: Verbose Version Information
|
||||
- **CVSS Score:** 2.3 (Low)
|
||||
- **Location:** Multiple version endpoints
|
||||
- **Description:** Detailed version information exposed
|
||||
- **Remediation:** Minimize version disclosure
|
||||
|
||||
#### V-030: Unused Imports and Dead Code
|
||||
- **CVSS Score:** 2.0 (Low)
|
||||
- **Location:** Multiple files
|
||||
- **Description:** Dead code increases attack surface
|
||||
- **Remediation:** Remove unused code, regular audits
|
||||
|
||||
#### V-031: Weak Cryptographic Practices
|
||||
- **CVSS Score:** 3.2 (Low)
|
||||
- **Location:** `hermes_cli/auth.py`, token handling
|
||||
- **Description:** No encryption at rest for auth tokens
|
||||
- **Remediation:** Use OS keychain, encrypt sensitive data
|
||||
|
||||
#### V-032: Missing Input Length Validation
|
||||
- **CVSS Score:** 3.5 (Low)
|
||||
- **Location:** Multiple tool input handlers
|
||||
- **Description:** No maximum length checks on inputs
|
||||
- **Remediation:** Add length validation to all inputs
|
||||
|
||||
---
|
||||
|
||||
## 2. ATTACK SURFACE DIAGRAM
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ EXTERNAL ATTACK SURFACE │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ Telegram │ │ Discord │ │ Slack │ │ Web Browser │ │
|
||||
│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │
|
||||
│ │ │ │ │ │
|
||||
│ ┌──────▼───────┐ ┌──────▼───────┐ ┌──────▼───────┐ ┌──────▼───────┐ │
|
||||
│ │ Gateway │──│ Gateway │──│ Gateway │──│ Gateway │ │
|
||||
│ │ Adapter │ │ Adapter │ │ Adapter │ │ Adapter │ │
|
||||
│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │
|
||||
│ └─────────────────┴─────────────────┘ │ │
|
||||
│ │ │ │
|
||||
│ ┌──────▼───────┐ ┌──────▼───────┐ │
|
||||
│ │ API Server │◄─────────────────│ Web API │ │
|
||||
│ │ (HTTP) │ │ Endpoints │ │
|
||||
│ └──────┬───────┘ └──────────────┘ │
|
||||
│ │ │
|
||||
└───────────────────────────┼───────────────────────────────────────────────┘
|
||||
│
|
||||
┌───────────────────────────┼───────────────────────────────────────────────┐
|
||||
│ INTERNAL ATTACK SURFACE │
|
||||
├───────────────────────────┼───────────────────────────────────────────────┤
|
||||
│ │ │
|
||||
│ ┌──────▼───────┐ │
|
||||
│ │ AI Agent │ │
|
||||
│ │ Core │ │
|
||||
│ └──────┬───────┘ │
|
||||
│ │ │
|
||||
│ ┌─────────────────┼─────────────────┐ │
|
||||
│ │ │ │ │
|
||||
│ ┌────▼────┐ ┌────▼────┐ ┌────▼────┐ │
|
||||
│ │ Tools │ │ Tools │ │ Tools │ │
|
||||
│ │ File │ │ Terminal│ │ Web │ │
|
||||
│ │ Ops │ │ Exec │ │ Tools │ │
|
||||
│ └────┬────┘ └────┬────┘ └────┬────┘ │
|
||||
│ │ │ │ │
|
||||
│ ┌────▼────┐ ┌────▼────┐ ┌────▼────┐ │
|
||||
│ │ Local │ │ Docker │ │ Browser │ │
|
||||
│ │ FS │ │Sandbox │ │ Tool │ │
|
||||
│ └─────────┘ └────┬────┘ └────┬────┘ │
|
||||
│ │ │ │
|
||||
│ ┌─────▼─────┐ ┌────▼────┐ │
|
||||
│ │ Modal │ │ Cloud │ │
|
||||
│ │ Cloud │ │ Browser │ │
|
||||
│ └───────────┘ └─────────┘ │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ CREDENTIAL STORAGE │ │
|
||||
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
|
||||
│ │ │ auth.json│ │ .env │ │mcp-tokens│ │ skill │ │ │
|
||||
│ │ │ (OAuth) │ │ (API Key)│ │ (OAuth) │ │ creds │ │ │
|
||||
│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
└──────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
LEGEND:
|
||||
■ Entry points (external attack surface)
|
||||
■ Internal components (privilege escalation targets)
|
||||
■ Credential storage (high-value targets)
|
||||
■ Sandboxed environments (isolation boundaries)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. MITIGATION ROADMAP
|
||||
|
||||
### Phase 1: Critical Fixes (Week 1-2)
|
||||
|
||||
| Priority | Fix | Owner | Est. Hours |
|
||||
|----------|-----|-------|------------|
|
||||
| P0 | Remove all shell=True subprocess calls | Security Team | 16 |
|
||||
| P0 | Implement strict path sandboxing | Security Team | 12 |
|
||||
| P0 | Fix secret leakage in child processes | Security Team | 8 |
|
||||
| P0 | Add connection-level URL validation | Security Team | 8 |
|
||||
|
||||
### Phase 2: High Priority (Week 3-4)
|
||||
|
||||
| Priority | Fix | Owner | Est. Hours |
|
||||
|----------|-----|-------|------------|
|
||||
| P1 | Implement proper input validation framework | Dev Team | 20 |
|
||||
| P1 | Add CORS strict mode | Dev Team | 4 |
|
||||
| P1 | Fix OAuth state validation | Dev Team | 6 |
|
||||
| P1 | Add rate limiting | Dev Team | 10 |
|
||||
| P1 | Implement secure credential storage | Security Team | 12 |
|
||||
|
||||
### Phase 3: Medium Priority (Month 2)
|
||||
|
||||
| Priority | Fix | Owner | Est. Hours |
|
||||
|----------|-----|-------|------------|
|
||||
| P2 | Expand dangerous command patterns | Security Team | 6 |
|
||||
| P2 | Add AST-based skill scanning | Security Team | 16 |
|
||||
| P2 | Implement subagent isolation | Dev Team | 20 |
|
||||
| P2 | Add comprehensive audit logging | Dev Team | 12 |
|
||||
|
||||
### Phase 4: Long-term Improvements (Month 3+)
|
||||
|
||||
| Priority | Fix | Owner | Est. Hours |
|
||||
|----------|-----|-------|------------|
|
||||
| P3 | Security headers hardening | Dev Team | 4 |
|
||||
| P3 | Code signing verification | Security Team | 8 |
|
||||
| P3 | Supply chain security | Dev Team | 12 |
|
||||
| P3 | Regular security audits | Security Team | Ongoing |
|
||||
|
||||
---
|
||||
|
||||
## 4. SECURE CODING GUIDELINES
|
||||
|
||||
### 4.1 Command Execution
|
||||
```python
|
||||
# ❌ NEVER DO THIS
|
||||
subprocess.run(f"ls {user_input}", shell=True)
|
||||
|
||||
# ✅ DO THIS
|
||||
subprocess.run(["ls", user_input], shell=False)
|
||||
|
||||
# ✅ OR USE SHLEX
|
||||
import shlex
|
||||
subprocess.run(["ls"] + shlex.split(user_input), shell=False)
|
||||
```
|
||||
|
||||
### 4.2 Path Handling
|
||||
```python
|
||||
# ❌ NEVER DO THIS
|
||||
open(os.path.expanduser(user_path), "r")
|
||||
|
||||
# ✅ DO THIS
|
||||
from pathlib import Path
|
||||
safe_root = Path("/allowed/path").resolve()
|
||||
user_path = Path(user_path).expanduser().resolve()
|
||||
if not str(user_path).startswith(str(safe_root)):
|
||||
raise PermissionError("Path outside sandbox")
|
||||
```
|
||||
|
||||
### 4.3 Secret Handling
|
||||
```python
|
||||
# ❌ NEVER DO THIS
|
||||
os.environ["API_KEY"] = user_api_key # Visible to all child processes
|
||||
|
||||
# ✅ DO THIS
|
||||
# Use file descriptor passing or explicit whitelisting
|
||||
child_env = {k: v for k, v in os.environ.items()
|
||||
if k in ALLOWED_ENV_VARS}
|
||||
```
|
||||
|
||||
### 4.4 URL Validation
|
||||
```python
|
||||
# ❌ NEVER DO THIS
|
||||
response = requests.get(user_url)
|
||||
|
||||
# ✅ DO THIS
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(user_url)
|
||||
if parsed.scheme not in ("http", "https"):
|
||||
raise ValueError("Invalid scheme")
|
||||
if parsed.hostname not in ALLOWED_HOSTS:
|
||||
raise ValueError("Host not allowed")
|
||||
```
|
||||
|
||||
### 4.5 Input Validation
|
||||
```python
|
||||
# Use pydantic for all user inputs
|
||||
from pydantic import BaseModel, validator
|
||||
|
||||
class FileRequest(BaseModel):
|
||||
path: str
|
||||
max_size: int = 1000
|
||||
|
||||
@validator('path')
|
||||
def validate_path(cls, v):
|
||||
if '..' in v or v.startswith('/'):
|
||||
raise ValueError('Invalid path')
|
||||
return v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. SPECIFIC SECURITY FIXES NEEDED
|
||||
|
||||
### Fix 1: Terminal Tool Command Injection (V-001)
|
||||
```python
|
||||
# CURRENT CODE (tools/terminal_tool.py ~line 457)
|
||||
cmd = [self._docker_exe, "exec", "-w", work_dir, self._container_id,
|
||||
"bash", "-lc", exec_command]
|
||||
|
||||
# SECURE FIX
|
||||
cmd = [self._docker_exe, "exec", "-w", work_dir, self._container_id,
|
||||
"bash", "-lc", exec_command]
|
||||
# Add strict input validation before this point
|
||||
if not _is_safe_command(exec_command):
|
||||
raise SecurityError("Dangerous command detected")
|
||||
```
|
||||
|
||||
### Fix 2: File Operations Path Traversal (V-002)
|
||||
```python
|
||||
# CURRENT CODE (tools/file_operations.py ~line 409)
|
||||
def _expand_path(self, path: str) -> str:
|
||||
if path.startswith('~'):
|
||||
# ... expansion logic
|
||||
|
||||
# SECURE FIX
|
||||
def _expand_path(self, path: str) -> str:
|
||||
safe_root = Path(self.cwd).resolve()
|
||||
expanded = Path(path).expanduser().resolve()
|
||||
if not str(expanded).startswith(str(safe_root)):
|
||||
raise PermissionError(f"Path {path} outside allowed directory")
|
||||
return str(expanded)
|
||||
```
|
||||
|
||||
### Fix 3: Code Execution Environment Sanitization (V-003)
|
||||
```python
|
||||
# CURRENT CODE (tools/code_execution_tool.py ~lines 434-461)
|
||||
_SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", ...)
|
||||
_SECRET_SUBSTRINGS = ("TOKEN", "SECRET", ...)
|
||||
|
||||
# SECURE FIX - Whitelist approach
|
||||
_ALLOWED_ENV_VARS = frozenset([
|
||||
"PATH", "HOME", "USER", "LANG", "LC_ALL",
|
||||
"PYTHONPATH", "TERM", "SHELL", "PWD"
|
||||
])
|
||||
child_env = {k: v for k, v in os.environ.items()
|
||||
if k in _ALLOWED_ENV_VARS}
|
||||
# Explicitly load only non-secret values
|
||||
```
|
||||
|
||||
### Fix 4: API Server Authentication (V-009)
|
||||
```python
|
||||
# CURRENT CODE (gateway/platforms/api_server.py ~line 360-361)
|
||||
if not self._api_key:
|
||||
return None # No key configured — allow all
|
||||
|
||||
# SECURE FIX
|
||||
if not self._api_key:
|
||||
logger.error("API server started without authentication")
|
||||
return web.json_response(
|
||||
{"error": "Server misconfigured - auth required"},
|
||||
status=500
|
||||
)
|
||||
```
|
||||
|
||||
### Fix 5: CORS Configuration (V-008)
|
||||
```python
|
||||
# CURRENT CODE (gateway/platforms/api_server.py ~lines 324-328)
|
||||
if "*" in self._cors_origins:
|
||||
headers["Access-Control-Allow-Origin"] = "*"
|
||||
|
||||
# SECURE FIX - Never allow wildcard with credentials
|
||||
if "*" in self._cors_origins:
|
||||
logger.warning("Wildcard CORS not allowed with credentials")
|
||||
return None
|
||||
```
|
||||
|
||||
### Fix 6: OAuth State Validation (V-014)
|
||||
```python
|
||||
# CURRENT CODE (tools/mcp_oauth.py ~line 186)
|
||||
code, state = await _wait_for_callback()
|
||||
|
||||
# SECURE FIX
|
||||
stored_state = get_stored_state()
|
||||
if state != stored_state:
|
||||
raise SecurityError("OAuth state mismatch - possible CSRF attack")
|
||||
```
|
||||
|
||||
### Fix 7: Docker Volume Mount Validation (V-012)
|
||||
```python
|
||||
# CURRENT CODE (tools/environments/docker.py ~line 267)
|
||||
volume_args.extend(["-v", vol])
|
||||
|
||||
# SECURE FIX
|
||||
_BLOCKED_PATHS = ['/var/run/docker.sock', '/proc', '/sys', ...]
|
||||
if any(blocked in vol for blocked in _BLOCKED_PATHS):
|
||||
raise SecurityError(f"Volume mount {vol} not allowed")
|
||||
volume_args.extend(["-v", vol])
|
||||
```
|
||||
|
||||
### Fix 8: Debug Output Redaction (V-027)
|
||||
```python
|
||||
# Add to all debug logging
|
||||
from agent.redact import redact_sensitive_text
|
||||
logger.debug(redact_sensitive_text(debug_message))
|
||||
```
|
||||
|
||||
### Fix 9: Input Length Validation
|
||||
```python
|
||||
# Add to all tool entry points
|
||||
MAX_INPUT_LENGTH = 10000
|
||||
if len(user_input) > MAX_INPUT_LENGTH:
|
||||
raise ValueError(f"Input exceeds maximum length of {MAX_INPUT_LENGTH}")
|
||||
```
|
||||
|
||||
### Fix 10: Session ID Entropy
|
||||
```python
|
||||
# CURRENT CODE - uses uuid4
|
||||
import uuid
|
||||
session_id = str(uuid.uuid4())
|
||||
|
||||
# SECURE FIX - use secrets module
|
||||
import secrets
|
||||
session_id = secrets.token_urlsafe(32)
|
||||
```
|
||||
|
||||
### Fix 11-20: Additional Required Fixes
|
||||
11. **Add CSRF protection** to all state-changing operations
|
||||
12. **Implement request signing** for internal service communication
|
||||
13. **Add certificate pinning** for external API calls
|
||||
14. **Implement proper key rotation** for auth tokens
|
||||
15. **Add anomaly detection** for unusual command patterns
|
||||
16. **Implement network segmentation** for sandbox environments
|
||||
17. **Add hardware security module (HSM) support** for key storage
|
||||
18. **Implement behavioral analysis** for skill code
|
||||
19. **Add automated vulnerability scanning** to CI/CD pipeline
|
||||
20. **Implement incident response procedures** for security events
|
||||
|
||||
---
|
||||
|
||||
## 6. SECURITY RECOMMENDATIONS
|
||||
|
||||
### Immediate Actions (Within 24 hours)
|
||||
1. Disable gateway API server if not required
|
||||
2. Enable HERMES_YOLO_MODE only for trusted users
|
||||
3. Review all installed skills from community sources
|
||||
4. Enable comprehensive audit logging
|
||||
|
||||
### Short-term Actions (Within 1 week)
|
||||
1. Deploy all P0 fixes
|
||||
2. Implement monitoring for suspicious command patterns
|
||||
3. Conduct security training for developers
|
||||
4. Establish security review process for new features
|
||||
|
||||
### Long-term Actions (Within 1 month)
|
||||
1. Implement comprehensive security testing
|
||||
2. Establish bug bounty program
|
||||
3. Regular third-party security audits
|
||||
4. Achieve SOC 2 compliance
|
||||
|
||||
---
|
||||
|
||||
## 7. COMPLIANCE MAPPING
|
||||
|
||||
| Vulnerability | OWASP Top 10 | CWE | NIST 800-53 |
|
||||
|---------------|--------------|-----|-------------|
|
||||
| V-001 (Command Injection) | A03:2021 - Injection | CWE-78 | SI-10 |
|
||||
| V-002 (Path Traversal) | A01:2021 - Broken Access Control | CWE-22 | AC-3 |
|
||||
| V-003 (Secret Leakage) | A07:2021 - Auth Failures | CWE-200 | SC-28 |
|
||||
| V-005 (SSRF) | A10:2021 - SSRF | CWE-918 | SC-7 |
|
||||
| V-008 (CORS) | A05:2021 - Security Misconfig | CWE-942 | AC-4 |
|
||||
| V-011 (Skills Bypass) | A08:2021 - Integrity Failures | CWE-353 | SI-7 |
|
||||
|
||||
---
|
||||
|
||||
## APPENDIX A: TESTING RECOMMENDATIONS
|
||||
|
||||
### Security Test Cases
|
||||
1. Command injection with `; rm -rf /`
|
||||
2. Path traversal with `../../../etc/passwd`
|
||||
3. SSRF with `http://169.254.169.254/latest/meta-data/`
|
||||
4. Secret exfiltration via environment variables
|
||||
5. OAuth flow manipulation
|
||||
6. Rate limiting bypass
|
||||
7. Session fixation attacks
|
||||
8. Privilege escalation via sudo
|
||||
|
||||
---
|
||||
|
||||
**Report End**
|
||||
|
||||
*This audit represents a point-in-time assessment. Security is an ongoing process requiring continuous monitoring and improvement.*
|
||||
@@ -1,488 +0,0 @@
|
||||
# SECURITY FIXES CHECKLIST
|
||||
|
||||
## 20+ Specific Security Fixes Required
|
||||
|
||||
This document provides a detailed checklist of all security fixes identified in the comprehensive audit.
|
||||
|
||||
---
|
||||
|
||||
## CRITICAL FIXES (Must implement immediately)
|
||||
|
||||
### Fix 1: Remove shell=True from subprocess calls
|
||||
**File:** `tools/terminal_tool.py`
|
||||
**Line:** ~457
|
||||
**CVSS:** 9.8
|
||||
|
||||
```python
|
||||
# BEFORE
|
||||
subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ...)
|
||||
|
||||
# AFTER
|
||||
# Validate command first
|
||||
if not is_safe_command(exec_command):
|
||||
raise SecurityError("Dangerous command detected")
|
||||
subprocess.Popen(cmd_list, shell=False, ...) # Pass as list
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 2: Implement path sandbox validation
|
||||
**File:** `tools/file_operations.py`
|
||||
**Lines:** 409-420
|
||||
**CVSS:** 9.1
|
||||
|
||||
```python
|
||||
# BEFORE
|
||||
def _expand_path(self, path: str) -> str:
|
||||
if path.startswith('~'):
|
||||
return os.path.expanduser(path)
|
||||
return path
|
||||
|
||||
# AFTER
|
||||
def _expand_path(self, path: str) -> Path:
|
||||
safe_root = Path(self.cwd).resolve()
|
||||
expanded = Path(path).expanduser().resolve()
|
||||
if not str(expanded).startswith(str(safe_root)):
|
||||
raise PermissionError(f"Path {path} outside allowed directory")
|
||||
return expanded
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 3: Environment variable sanitization
|
||||
**File:** `tools/code_execution_tool.py`
|
||||
**Lines:** 434-461
|
||||
**CVSS:** 9.3
|
||||
|
||||
```python
|
||||
# BEFORE
|
||||
_SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", ...)
|
||||
_SECRET_SUBSTRINGS = ("TOKEN", "SECRET", ...)
|
||||
|
||||
# AFTER
|
||||
_ALLOWED_ENV_VARS = frozenset([
|
||||
"PATH", "HOME", "USER", "LANG", "LC_ALL",
|
||||
"TERM", "SHELL", "PWD", "PYTHONPATH"
|
||||
])
|
||||
child_env = {k: v for k, v in os.environ.items()
|
||||
if k in _ALLOWED_ENV_VARS}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 4: Secure sudo password handling
|
||||
**File:** `tools/terminal_tool.py`
|
||||
**Line:** 275
|
||||
**CVSS:** 9.0
|
||||
|
||||
```python
|
||||
# BEFORE
|
||||
exec_command = f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}"
|
||||
|
||||
# AFTER
|
||||
# Use file descriptor passing instead of command line
|
||||
with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:
|
||||
f.write(sudo_stdin)
|
||||
pass_file = f.name
|
||||
os.chmod(pass_file, 0o600)
|
||||
exec_command = f"cat {pass_file} | {exec_command}"
|
||||
# Clean up after execution
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 5: Connection-level URL validation
|
||||
**File:** `tools/url_safety.py`
|
||||
**Lines:** 50-96
|
||||
**CVSS:** 9.4
|
||||
|
||||
```python
|
||||
# AFTER - Add to is_safe_url()
|
||||
# After DNS resolution, verify IP is not in private range
|
||||
def _validate_connection_ip(hostname: str) -> bool:
|
||||
try:
|
||||
addr = socket.getaddrinfo(hostname, None)
|
||||
for a in addr:
|
||||
ip = ipaddress.ip_address(a[4][0])
|
||||
if ip.is_private or ip.is_loopback or ip.is_reserved:
|
||||
return False
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## HIGH PRIORITY FIXES
|
||||
|
||||
### Fix 6: MCP OAuth token validation
|
||||
**File:** `tools/mcp_oauth.py`
|
||||
**Lines:** 66-89
|
||||
**CVSS:** 8.8
|
||||
|
||||
```python
|
||||
# AFTER
|
||||
async def get_tokens(self):
|
||||
data = self._read_json(self._tokens_path())
|
||||
if not data:
|
||||
return None
|
||||
# Add schema validation
|
||||
if not self._validate_token_schema(data):
|
||||
logger.error("Invalid token schema, deleting corrupted tokens")
|
||||
self.remove()
|
||||
return None
|
||||
return OAuthToken(**data)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 7: API Server SQL injection prevention
|
||||
**File:** `gateway/platforms/api_server.py`
|
||||
**Lines:** 98-126
|
||||
**CVSS:** 8.5
|
||||
|
||||
```python
|
||||
# AFTER
|
||||
import uuid
|
||||
|
||||
def _validate_response_id(self, response_id: str) -> bool:
|
||||
"""Validate response_id format to prevent injection."""
|
||||
try:
|
||||
uuid.UUID(response_id.split('-')[0], version=4)
|
||||
return True
|
||||
except (ValueError, IndexError):
|
||||
return False
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 8: CORS strict validation
|
||||
**File:** `gateway/platforms/api_server.py`
|
||||
**Lines:** 324-328
|
||||
**CVSS:** 8.2
|
||||
|
||||
```python
|
||||
# AFTER
|
||||
if "*" in self._cors_origins:
|
||||
logger.error("Wildcard CORS not allowed with credentials")
|
||||
return None # Reject wildcard with credentials
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 9: Require explicit API key
|
||||
**File:** `gateway/platforms/api_server.py`
|
||||
**Lines:** 360-361
|
||||
**CVSS:** 8.1
|
||||
|
||||
```python
|
||||
# AFTER
|
||||
if not self._api_key:
|
||||
logger.error("API server started without authentication")
|
||||
return web.json_response(
|
||||
{"error": "Server authentication not configured"},
|
||||
status=500
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 10: CDP URL validation
|
||||
**File:** `tools/browser_tool.py`
|
||||
**Lines:** 195-208
|
||||
**CVSS:** 8.4
|
||||
|
||||
```python
|
||||
# AFTER
|
||||
def _resolve_cdp_override(self, cdp_url: str) -> str:
|
||||
parsed = urlparse(cdp_url)
|
||||
if parsed.scheme not in ('ws', 'wss', 'http', 'https'):
|
||||
raise ValueError("Invalid CDP scheme")
|
||||
if parsed.hostname not in self._allowed_cdp_hosts:
|
||||
raise ValueError("CDP host not in allowlist")
|
||||
return cdp_url
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 11: Skills guard normalization
|
||||
**File:** `tools/skills_guard.py`
|
||||
**Lines:** 82-484
|
||||
**CVSS:** 7.8
|
||||
|
||||
```python
|
||||
# AFTER - Add to scan_skill()
|
||||
def normalize_for_scanning(content: str) -> str:
|
||||
"""Normalize content to detect obfuscated threats."""
|
||||
# Normalize Unicode
|
||||
content = unicodedata.normalize('NFKC', content)
|
||||
# Normalize case
|
||||
content = content.lower()
|
||||
# Remove common obfuscation
|
||||
content = content.replace('\\x', '')
|
||||
content = content.replace('\\u', '')
|
||||
return content
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 12: Docker volume validation
|
||||
**File:** `tools/environments/docker.py`
|
||||
**Line:** 267
|
||||
**CVSS:** 8.7
|
||||
|
||||
```python
|
||||
# AFTER
|
||||
_BLOCKED_PATHS = ['/var/run/docker.sock', '/proc', '/sys', '/dev']
|
||||
for vol in volumes:
|
||||
if any(blocked in vol for blocked in _BLOCKED_PATHS):
|
||||
raise SecurityError(f"Volume mount {vol} blocked")
|
||||
volume_args.extend(["-v", vol])
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 13: Secure error messages
|
||||
**File:** Multiple files
|
||||
**CVSS:** 7.5
|
||||
|
||||
```python
|
||||
# AFTER - Add to all exception handlers
|
||||
try:
|
||||
operation()
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}", exc_info=True) # Full details for logs
|
||||
raise UserError("Operation failed") # Generic for user
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 14: OAuth state validation
|
||||
**File:** `tools/mcp_oauth.py`
|
||||
**Line:** 186
|
||||
**CVSS:** 7.6
|
||||
|
||||
```python
|
||||
# AFTER
|
||||
code, state = await _wait_for_callback()
|
||||
stored_state = storage.get_state()
|
||||
if not hmac.compare_digest(state, stored_state):
|
||||
raise SecurityError("OAuth state mismatch - possible CSRF")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 15: File operation race condition fix
|
||||
**File:** `tools/file_operations.py`
|
||||
**CVSS:** 7.4
|
||||
|
||||
```python
|
||||
# AFTER
|
||||
import fcntl
|
||||
|
||||
def safe_file_access(path: Path):
|
||||
fd = os.open(path, os.O_RDONLY)
|
||||
try:
|
||||
fcntl.flock(fd, fcntl.LOCK_SH)
|
||||
# Perform operations on fd, not path
|
||||
return os.read(fd, size)
|
||||
finally:
|
||||
fcntl.flock(fd, fcntl.LOCK_UN)
|
||||
os.close(fd)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 16: Add rate limiting
|
||||
**File:** `gateway/platforms/api_server.py`
|
||||
**CVSS:** 7.3
|
||||
|
||||
```python
|
||||
# AFTER - Add middleware
|
||||
from aiohttp_limiter import Limiter
|
||||
|
||||
limiter = Limiter(
|
||||
rate=100, # requests
|
||||
per=60, # per minute
|
||||
key_func=lambda req: req.remote
|
||||
)
|
||||
|
||||
@app.middleware
|
||||
async def rate_limit_middleware(request, handler):
|
||||
if not limiter.is_allowed(request):
|
||||
return web.json_response(
|
||||
{"error": "Rate limit exceeded"},
|
||||
status=429
|
||||
)
|
||||
return await handler(request)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 17: Secure temp file creation
|
||||
**File:** `tools/code_execution_tool.py`
|
||||
**Line:** 388
|
||||
**CVSS:** 7.2
|
||||
|
||||
```python
|
||||
# AFTER
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
fd, tmpdir = tempfile.mkstemp(prefix="hermes_sandbox_", suffix=".tmp")
|
||||
os.chmod(tmpdir, 0o700) # Owner only
|
||||
os.close(fd)
|
||||
# Use tmpdir securely
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## MEDIUM PRIORITY FIXES
|
||||
|
||||
### Fix 18: Expand dangerous patterns
|
||||
**File:** `tools/approval.py`
|
||||
**Lines:** 40-78
|
||||
**CVSS:** 6.5
|
||||
|
||||
Add patterns:
|
||||
```python
|
||||
(r'\bcurl\s+.*\|\s*sh\b', "pipe remote content to shell"),
|
||||
(r'\bwget\s+.*\|\s*bash\b', "pipe remote content to shell"),
|
||||
(r'python\s+-c\s+.*import\s+os', "python os import"),
|
||||
(r'perl\s+-e\s+.*system', "perl system call"),
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 19: Credential file permissions
|
||||
**File:** `tools/credential_files.py`, `tools/mcp_oauth.py`
|
||||
**CVSS:** 6.4
|
||||
|
||||
```python
|
||||
# AFTER
|
||||
def _write_json(path: Path, data: dict) -> None:
|
||||
path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
||||
path.chmod(0o600)
|
||||
# Verify permissions were set
|
||||
stat = path.stat()
|
||||
if stat.st_mode & 0o077:
|
||||
raise SecurityError("Failed to set restrictive permissions")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 20: Log sanitization
|
||||
**File:** Multiple logging statements
|
||||
**CVSS:** 5.8
|
||||
|
||||
```python
|
||||
# AFTER
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
# In all logging calls
|
||||
logger.info(redact_sensitive_text(f"Processing {user_input}"))
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ADDITIONAL FIXES (21-32)
|
||||
|
||||
### Fix 21: XXE Prevention
|
||||
**File:** PowerPoint XML processing
|
||||
Add:
|
||||
```python
|
||||
from defusedxml import ElementTree as ET
|
||||
# Use defusedxml instead of standard xml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 22: YAML Safe Loading Audit
|
||||
**File:** `hermes_cli/config.py`
|
||||
Audit all yaml.safe_load calls for custom constructors.
|
||||
|
||||
---
|
||||
|
||||
### Fix 23: Prototype Pollution Fix
|
||||
**File:** `scripts/whatsapp-bridge/bridge.js`
|
||||
Use Map instead of Object for user-controlled keys.
|
||||
|
||||
---
|
||||
|
||||
### Fix 24: Subagent Isolation
|
||||
**File:** `tools/delegate_tool.py`
|
||||
Implement filesystem namespace isolation.
|
||||
|
||||
---
|
||||
|
||||
### Fix 25: Secure Session IDs
|
||||
**File:** `gateway/session.py`
|
||||
Use secrets.token_urlsafe(32) instead of uuid4.
|
||||
|
||||
---
|
||||
|
||||
### Fix 26: Binary Integrity Checks
|
||||
**File:** `tools/tirith_security.py`
|
||||
Require GPG signature verification.
|
||||
|
||||
---
|
||||
|
||||
### Fix 27: Debug Output Redaction
|
||||
**File:** `tools/debug_helpers.py`
|
||||
Apply redact_sensitive_text to all debug output.
|
||||
|
||||
---
|
||||
|
||||
### Fix 28: Security Headers
|
||||
**File:** `gateway/platforms/api_server.py`
|
||||
Add:
|
||||
```python
|
||||
"Content-Security-Policy": "default-src 'self'",
|
||||
"Strict-Transport-Security": "max-age=31536000",
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Fix 29: Version Information Minimization
|
||||
**File:** Version endpoints
|
||||
Return minimal version information publicly.
|
||||
|
||||
---
|
||||
|
||||
### Fix 30: Dead Code Removal
|
||||
**File:** Multiple
|
||||
Remove unused imports and functions.
|
||||
|
||||
---
|
||||
|
||||
### Fix 31: Token Encryption at Rest
|
||||
**File:** `hermes_cli/auth.py`
|
||||
Use OS keychain or encrypt auth.json.
|
||||
|
||||
---
|
||||
|
||||
### Fix 32: Input Length Validation
|
||||
**File:** All tool entry points
|
||||
Add MAX_INPUT_LENGTH checks everywhere.
|
||||
|
||||
---
|
||||
|
||||
## IMPLEMENTATION VERIFICATION
|
||||
|
||||
### Testing Requirements
|
||||
- [ ] All fixes have unit tests
|
||||
- [ ] Security regression tests pass
|
||||
- [ ] Fuzzing shows no new vulnerabilities
|
||||
- [ ] Penetration test completed
|
||||
- [ ] Code review by security team
|
||||
|
||||
### Sign-off Required
|
||||
- [ ] Security Team Lead
|
||||
- [ ] Engineering Manager
|
||||
- [ ] QA Lead
|
||||
- [ ] DevOps Lead
|
||||
|
||||
---
|
||||
|
||||
**Last Updated:** March 30, 2026
|
||||
**Next Review:** After all P0/P1 fixes completed
|
||||
@@ -1,359 +0,0 @@
|
||||
# SECURITY MITIGATION ROADMAP
|
||||
|
||||
## Hermes Agent Security Remediation Plan
|
||||
**Version:** 1.0
|
||||
**Date:** March 30, 2026
|
||||
**Status:** Draft for Implementation
|
||||
|
||||
---
|
||||
|
||||
## EXECUTIVE SUMMARY
|
||||
|
||||
This roadmap provides a structured approach to addressing the 32 security vulnerabilities identified in the comprehensive security audit. The plan is organized into four phases, prioritizing fixes by risk and impact.
|
||||
|
||||
---
|
||||
|
||||
## PHASE 1: CRITICAL FIXES (Week 1-2)
|
||||
**Target:** Eliminate all CVSS 9.0+ vulnerabilities
|
||||
|
||||
### 1.1 Remove shell=True Subprocess Calls (V-001)
|
||||
**Owner:** Security Team Lead
|
||||
**Estimated Effort:** 16 hours
|
||||
**Priority:** P0
|
||||
|
||||
#### Tasks:
|
||||
- [ ] Audit all subprocess calls in codebase
|
||||
- [ ] Replace shell=True with argument lists
|
||||
- [ ] Implement shlex.quote for necessary string interpolation
|
||||
- [ ] Add input validation wrappers
|
||||
|
||||
#### Files to Modify:
|
||||
- `tools/terminal_tool.py`
|
||||
- `tools/file_operations.py`
|
||||
- `tools/environments/docker.py`
|
||||
- `tools/environments/modal.py`
|
||||
- `tools/environments/ssh.py`
|
||||
- `tools/environments/singularity.py`
|
||||
|
||||
#### Testing:
|
||||
- [ ] Unit tests for all command execution paths
|
||||
- [ ] Fuzzing with malicious inputs
|
||||
- [ ] Penetration testing
|
||||
|
||||
---
|
||||
|
||||
### 1.2 Implement Strict Path Sandboxing (V-002)
|
||||
**Owner:** Security Team Lead
|
||||
**Estimated Effort:** 12 hours
|
||||
**Priority:** P0
|
||||
|
||||
#### Tasks:
|
||||
- [ ] Create PathValidator class
|
||||
- [ ] Implement canonical path resolution
|
||||
- [ ] Add path traversal detection
|
||||
- [ ] Enforce sandbox root boundaries
|
||||
|
||||
#### Implementation:
|
||||
```python
|
||||
class PathValidator:
|
||||
def __init__(self, sandbox_root: Path):
|
||||
self.sandbox_root = sandbox_root.resolve()
|
||||
|
||||
def validate(self, user_path: str) -> Path:
|
||||
expanded = Path(user_path).expanduser().resolve()
|
||||
if not str(expanded).startswith(str(self.sandbox_root)):
|
||||
raise SecurityError("Path outside sandbox")
|
||||
return expanded
|
||||
```
|
||||
|
||||
#### Files to Modify:
|
||||
- `tools/file_operations.py`
|
||||
- `tools/file_tools.py`
|
||||
- All environment implementations
|
||||
|
||||
---
|
||||
|
||||
### 1.3 Fix Secret Leakage in Child Processes (V-003)
|
||||
**Owner:** Security Engineer
|
||||
**Estimated Effort:** 8 hours
|
||||
**Priority:** P0
|
||||
|
||||
#### Tasks:
|
||||
- [ ] Create environment variable whitelist
|
||||
- [ ] Implement secret detection patterns
|
||||
- [ ] Add env var scrubbing for child processes
|
||||
- [ ] Audit credential file mounting
|
||||
|
||||
#### Whitelist Approach:
|
||||
```python
|
||||
_ALLOWED_ENV_VARS = frozenset([
|
||||
"PATH", "HOME", "USER", "LANG", "LC_ALL",
|
||||
"TERM", "SHELL", "PWD", "OLDPWD",
|
||||
"PYTHONPATH", "PYTHONHOME", "PYTHONNOUSERSITE",
|
||||
"DISPLAY", "XDG_SESSION_TYPE", # GUI apps
|
||||
])
|
||||
|
||||
def sanitize_environment():
|
||||
return {k: v for k, v in os.environ.items()
|
||||
if k in _ALLOWED_ENV_VARS}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 1.4 Add Connection-Level URL Validation (V-005)
|
||||
**Owner:** Security Engineer
|
||||
**Estimated Effort:** 8 hours
|
||||
**Priority:** P0
|
||||
|
||||
#### Tasks:
|
||||
- [ ] Implement egress proxy option
|
||||
- [ ] Add connection-level IP validation
|
||||
- [ ] Validate redirect targets
|
||||
- [ ] Block private IP ranges at socket level
|
||||
|
||||
---
|
||||
|
||||
## PHASE 2: HIGH PRIORITY (Week 3-4)
|
||||
**Target:** Address all CVSS 7.0-8.9 vulnerabilities
|
||||
|
||||
### 2.1 Implement Input Validation Framework (V-006, V-007)
|
||||
**Owner:** Senior Developer
|
||||
**Estimated Effort:** 20 hours
|
||||
**Priority:** P1
|
||||
|
||||
#### Tasks:
|
||||
- [ ] Create Pydantic models for all tool inputs
|
||||
- [ ] Implement length validation
|
||||
- [ ] Add character allowlisting
|
||||
- [ ] Create validation decorators
|
||||
|
||||
---
|
||||
|
||||
### 2.2 Fix CORS Configuration (V-008)
|
||||
**Owner:** Backend Developer
|
||||
**Estimated Effort:** 4 hours
|
||||
**Priority:** P1
|
||||
|
||||
#### Changes:
|
||||
- Remove wildcard support when credentials enabled
|
||||
- Implement strict origin validation
|
||||
- Add origin allowlist configuration
|
||||
|
||||
---
|
||||
|
||||
### 2.3 Fix Authentication Bypass (V-009)
|
||||
**Owner:** Backend Developer
|
||||
**Estimated Effort:** 4 hours
|
||||
**Priority:** P1
|
||||
|
||||
#### Changes:
|
||||
```python
|
||||
# Fail-closed default
|
||||
if not self._api_key:
|
||||
logger.error("API server requires authentication")
|
||||
return web.json_response(
|
||||
{"error": "Authentication required"},
|
||||
status=401
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2.4 Fix OAuth State Validation (V-014)
|
||||
**Owner:** Security Engineer
|
||||
**Estimated Effort:** 6 hours
|
||||
**Priority:** P1
|
||||
|
||||
#### Tasks:
|
||||
- Store state parameter in session
|
||||
- Cryptographically verify callback state
|
||||
- Implement state expiration
|
||||
|
||||
---
|
||||
|
||||
### 2.5 Add Rate Limiting (V-016)
|
||||
**Owner:** Backend Developer
|
||||
**Estimated Effort:** 10 hours
|
||||
**Priority:** P1
|
||||
|
||||
#### Implementation:
|
||||
- Per-IP rate limiting: 100 requests/minute
|
||||
- Per-user rate limiting: 1000 requests/hour
|
||||
- Endpoint-specific limits
|
||||
- Sliding window algorithm
|
||||
|
||||
---
|
||||
|
||||
### 2.6 Secure Credential Storage (V-019, V-031)
|
||||
**Owner:** Security Engineer
|
||||
**Estimated Effort:** 12 hours
|
||||
**Priority:** P1
|
||||
|
||||
#### Tasks:
|
||||
- Implement OS keychain integration
|
||||
- Add file encryption at rest
|
||||
- Implement secure key derivation
|
||||
- Add access audit logging
|
||||
|
||||
---
|
||||
|
||||
## PHASE 3: MEDIUM PRIORITY (Month 2)
|
||||
**Target:** Address CVSS 4.0-6.9 vulnerabilities
|
||||
|
||||
### 3.1 Expand Dangerous Command Patterns (V-018)
|
||||
**Owner:** Security Engineer
|
||||
**Estimated Effort:** 6 hours
|
||||
**Priority:** P2
|
||||
|
||||
#### Add Patterns:
|
||||
- More encoding variants (base64, hex, unicode)
|
||||
- Alternative shell syntaxes
|
||||
- Indirect command execution
|
||||
- Environment variable abuse
|
||||
|
||||
---
|
||||
|
||||
### 3.2 Add AST-Based Skill Scanning (V-011)
|
||||
**Owner:** Security Engineer
|
||||
**Estimated Effort:** 16 hours
|
||||
**Priority:** P2
|
||||
|
||||
#### Implementation:
|
||||
- Parse Python code to AST
|
||||
- Detect dangerous function calls
|
||||
- Analyze import statements
|
||||
- Check for obfuscation patterns
|
||||
|
||||
---
|
||||
|
||||
### 3.3 Implement Subagent Isolation (V-024)
|
||||
**Owner:** Senior Developer
|
||||
**Estimated Effort:** 20 hours
|
||||
**Priority:** P2
|
||||
|
||||
#### Tasks:
|
||||
- Create isolated filesystem per subagent
|
||||
- Implement network namespace isolation
|
||||
- Add resource limits
|
||||
- Implement subagent-to-subagent communication restrictions
|
||||
|
||||
---
|
||||
|
||||
### 3.4 Add Comprehensive Audit Logging (V-013, V-020, V-027)
|
||||
**Owner:** DevOps Engineer
|
||||
**Estimated Effort:** 12 hours
|
||||
**Priority:** P2
|
||||
|
||||
#### Requirements:
|
||||
- Log all tool invocations
|
||||
- Log all authentication events
|
||||
- Log configuration changes
|
||||
- Implement log integrity protection
|
||||
- Add SIEM integration hooks
|
||||
|
||||
---
|
||||
|
||||
## PHASE 4: LONG-TERM IMPROVEMENTS (Month 3+)
|
||||
|
||||
### 4.1 Security Headers Hardening (V-028)
|
||||
**Owner:** Backend Developer
|
||||
**Estimated Effort:** 4 hours
|
||||
|
||||
Add headers:
|
||||
- Content-Security-Policy
|
||||
- Strict-Transport-Security
|
||||
- X-Frame-Options
|
||||
- X-XSS-Protection
|
||||
|
||||
---
|
||||
|
||||
### 4.2 Code Signing Verification (V-026)
|
||||
**Owner:** Security Engineer
|
||||
**Estimated Effort:** 8 hours
|
||||
|
||||
- Require GPG signatures for binaries
|
||||
- Implement signature verification
|
||||
- Pin trusted signing keys
|
||||
|
||||
---
|
||||
|
||||
### 4.3 Supply Chain Security
|
||||
**Owner:** DevOps Engineer
|
||||
**Estimated Effort:** 12 hours
|
||||
|
||||
- Implement dependency scanning
|
||||
- Add SLSA compliance
|
||||
- Use private package registry
|
||||
- Implement SBOM generation
|
||||
|
||||
---
|
||||
|
||||
### 4.4 Automated Security Testing
|
||||
**Owner:** QA Lead
|
||||
**Estimated Effort:** 16 hours
|
||||
|
||||
- Integrate SAST tools (Semgrep, Bandit)
|
||||
- Add DAST to CI/CD
|
||||
- Implement fuzzing
|
||||
- Add security regression tests
|
||||
|
||||
---
|
||||
|
||||
## IMPLEMENTATION TRACKING
|
||||
|
||||
| Week | Deliverables | Owner | Status |
|
||||
|------|-------------|-------|--------|
|
||||
| 1 | P0 Fixes: V-001, V-002 | Security Team | ⏳ Planned |
|
||||
| 1 | P0 Fixes: V-003, V-005 | Security Team | ⏳ Planned |
|
||||
| 2 | P0 Testing & Validation | QA Team | ⏳ Planned |
|
||||
| 3 | P1 Fixes: V-006 through V-010 | Dev Team | ⏳ Planned |
|
||||
| 3 | P1 Fixes: V-014, V-016 | Dev Team | ⏳ Planned |
|
||||
| 4 | P1 Testing & Documentation | QA/Doc Team | ⏳ Planned |
|
||||
| 5-8 | P2 Fixes Implementation | Dev Team | ⏳ Planned |
|
||||
| 9-12 | P3/P4 Long-term Improvements | All Teams | ⏳ Planned |
|
||||
|
||||
---
|
||||
|
||||
## SUCCESS METRICS
|
||||
|
||||
### Security Metrics
|
||||
- [ ] Zero CVSS 9.0+ vulnerabilities
|
||||
- [ ] < 5 CVSS 7.0-8.9 vulnerabilities
|
||||
- [ ] 100% of subprocess calls without shell=True
|
||||
- [ ] 100% path validation coverage
|
||||
- [ ] 100% input validation on tool entry points
|
||||
|
||||
### Compliance Metrics
|
||||
- [ ] OWASP Top 10 compliance
|
||||
- [ ] CWE coverage > 90%
|
||||
- [ ] Security test coverage > 80%
|
||||
|
||||
---
|
||||
|
||||
## RISK ACCEPTANCE
|
||||
|
||||
| Vulnerability | Risk | Justification | Approver |
|
||||
|--------------|------|---------------|----------|
|
||||
| V-029 (Version Info) | Low | Required for debugging | TBD |
|
||||
| V-030 (Dead Code) | Low | Cleanup in next refactor | TBD |
|
||||
|
||||
---
|
||||
|
||||
## APPENDIX: TOOLS AND RESOURCES
|
||||
|
||||
### Recommended Security Tools
|
||||
1. **SAST:** Semgrep, Bandit, Pylint-security
|
||||
2. **DAST:** OWASP ZAP, Burp Suite
|
||||
3. **Dependency:** Safety, Snyk, Dependabot
|
||||
4. **Secrets:** GitLeaks, TruffleHog
|
||||
5. **Fuzzing:** Atheris, Hypothesis
|
||||
|
||||
### Training Resources
|
||||
- OWASP Top 10 for Python
|
||||
- Secure Coding in Python (SANS)
|
||||
- AWS Security Best Practices
|
||||
|
||||
---
|
||||
|
||||
**Document Owner:** Security Team
|
||||
**Review Cycle:** Monthly during remediation, Quarterly post-completion
|
||||
@@ -1,509 +0,0 @@
|
||||
# Hermes Agent - Testing Infrastructure Deep Analysis
|
||||
|
||||
## Executive Summary
|
||||
|
||||
The hermes-agent project has a **comprehensive test suite** with **373 test files** containing approximately **4,300+ test functions**. The tests are organized into 10 subdirectories covering all major components.
|
||||
|
||||
---
|
||||
|
||||
## 1. Test Suite Structure & Statistics
|
||||
|
||||
### 1.1 Directory Breakdown
|
||||
|
||||
| Directory | Test Files | Focus Area |
|
||||
|-----------|------------|------------|
|
||||
| `tests/tools/` | 86 | Tool implementations, file operations, environments |
|
||||
| `tests/gateway/` | 96 | Platform integrations (Discord, Telegram, Slack, etc.) |
|
||||
| `tests/hermes_cli/` | 48 | CLI commands, configuration, setup flows |
|
||||
| `tests/agent/` | 16 | Core agent logic, prompt building, model adapters |
|
||||
| `tests/integration/` | 8 | End-to-end integration tests |
|
||||
| `tests/acp/` | 8 | Agent Communication Protocol |
|
||||
| `tests/cron/` | 3 | Cron job scheduling |
|
||||
| `tests/skills/` | 5 | Skill management |
|
||||
| `tests/honcho_integration/` | 5 | Honcho memory integration |
|
||||
| `tests/fakes/` | 2 | Test fixtures and fake servers |
|
||||
| **Total** | **373** | **~4,311 test functions** |
|
||||
|
||||
### 1.2 Test Classification
|
||||
|
||||
**Unit Tests:** ~95% (3,600+)
|
||||
**Integration Tests:** ~5% (marked with `@pytest.mark.integration`)
|
||||
**Async Tests:** ~679 tests use `@pytest.mark.asyncio`
|
||||
|
||||
### 1.3 Largest Test Files (by line count)
|
||||
|
||||
1. `tests/test_run_agent.py` - 3,329 lines (212 tests) - Core agent logic
|
||||
2. `tests/tools/test_mcp_tool.py` - 2,902 lines (147 tests) - MCP protocol
|
||||
3. `tests/gateway/test_voice_command.py` - 2,632 lines - Voice features
|
||||
4. `tests/gateway/test_feishu.py` - 2,580 lines - Feishu platform
|
||||
5. `tests/gateway/test_api_server.py` - 1,503 lines - API server
|
||||
|
||||
---
|
||||
|
||||
## 2. Coverage Heat Map - Critical Gaps Identified
|
||||
|
||||
### 2.1 NO TEST COVERAGE (Red Zone)
|
||||
|
||||
#### Agent Module Gaps:
|
||||
- `agent/copilot_acp_client.py` - Copilot integration (0 tests)
|
||||
- `agent/gemini_adapter.py` - Google Gemini model support (0 tests)
|
||||
- `agent/knowledge_ingester.py` - Knowledge ingestion (0 tests)
|
||||
- `agent/meta_reasoning.py` - Meta-reasoning capabilities (0 tests)
|
||||
- `agent/skill_utils.py` - Skill utilities (0 tests)
|
||||
- `agent/trajectory.py` - Trajectory management (0 tests)
|
||||
|
||||
#### Tools Module Gaps:
|
||||
- `tools/browser_tool.py` - Browser automation (0 tests)
|
||||
- `tools/code_execution_tool.py` - Code execution (0 tests)
|
||||
- `tools/gitea_client.py` - Gitea integration (0 tests)
|
||||
- `tools/image_generation_tool.py` - Image generation (0 tests)
|
||||
- `tools/neutts_synth.py` - Neural TTS (0 tests)
|
||||
- `tools/openrouter_client.py` - OpenRouter API (0 tests)
|
||||
- `tools/session_search_tool.py` - Session search (0 tests)
|
||||
- `tools/terminal_tool.py` - Terminal operations (0 tests)
|
||||
- `tools/tts_tool.py` - Text-to-speech (0 tests)
|
||||
- `tools/web_tools.py` - Web tools core (0 tests)
|
||||
|
||||
#### Gateway Module Gaps:
|
||||
- `gateway/run.py` - Gateway runner (0 tests)
|
||||
- `gateway/stream_consumer.py` - Stream consumption (0 tests)
|
||||
|
||||
#### Root-Level Gaps:
|
||||
- `hermes_constants.py` - Constants (0 tests)
|
||||
- `hermes_time.py` - Time utilities (0 tests)
|
||||
- `mini_swe_runner.py` - SWE runner (0 tests)
|
||||
- `rl_cli.py` - RL CLI (0 tests)
|
||||
- `utils.py` - Utilities (0 tests)
|
||||
|
||||
### 2.2 LIMITED COVERAGE (Yellow Zone)
|
||||
|
||||
- `agent/models_dev.py` - Only 19 tests for complex model routing
|
||||
- `agent/smart_model_routing.py` - Only 6 tests
|
||||
- `tools/approval.py` - 2 test files but complex logic
|
||||
- `tools/skills_guard.py` - Security-critical, needs more coverage
|
||||
|
||||
### 2.3 GOOD COVERAGE (Green Zone)
|
||||
|
||||
- `agent/anthropic_adapter.py` - 97 tests (comprehensive)
|
||||
- `agent/prompt_builder.py` - 108 tests (excellent)
|
||||
- `tools/mcp_tool.py` - 147 tests (very comprehensive)
|
||||
- `tools/file_tools.py` - Multiple test files
|
||||
- `gateway/discord.py` - 11 test files covering various aspects
|
||||
- `gateway/telegram.py` - 10 test files
|
||||
- `gateway/session.py` - 15 test files
|
||||
|
||||
---
|
||||
|
||||
## 3. Test Patterns Analysis
|
||||
|
||||
### 3.1 Fixtures Architecture
|
||||
|
||||
**Global Fixtures (`conftest.py`):**
|
||||
- `_isolate_hermes_home` - Isolates HERMES_HOME to temp directory (autouse)
|
||||
- `_ensure_current_event_loop` - Event loop management for sync tests (autouse)
|
||||
- `_enforce_test_timeout` - 30-second timeout per test (autouse)
|
||||
- `tmp_dir` - Temporary directory fixture
|
||||
- `mock_config` - Minimal hermes config for unit tests
|
||||
|
||||
**Common Patterns:**
|
||||
```python
|
||||
# Isolation pattern
|
||||
@pytest.fixture(autouse=True)
|
||||
def isolate_env(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
# Mock client pattern
|
||||
@pytest.fixture
|
||||
def mock_agent():
|
||||
with patch("run_agent.OpenAI") as mock:
|
||||
yield mock
|
||||
```
|
||||
|
||||
### 3.2 Mock Usage Statistics
|
||||
|
||||
- **~12,468 mock/patch usages** across the test suite
|
||||
- Heavy use of `unittest.mock.patch` and `MagicMock`
|
||||
- `AsyncMock` used for async function mocking
|
||||
- `SimpleNamespace` for creating mock API response objects
|
||||
|
||||
### 3.3 Test Organization Patterns
|
||||
|
||||
**Class-Based Organization:**
|
||||
- 1,532 test classes identified
|
||||
- Grouped by functionality: `Test<Feature><Scenario>`
|
||||
- Example: `TestSanitizeApiMessages`, `TestContextPressureFlags`
|
||||
|
||||
**Function-Based Organization:**
|
||||
- Used for simpler test files
|
||||
- Naming: `test_<feature>_<scenario>`
|
||||
|
||||
### 3.4 Async Test Patterns
|
||||
|
||||
```python
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_function():
|
||||
result = await async_function()
|
||||
assert result == expected
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. 20 New Test Recommendations (Priority Order)
|
||||
|
||||
### Critical Priority (Security/Risk)
|
||||
|
||||
1. **Browser Tool Security Tests** (`tools/browser_tool.py`)
|
||||
- Test sandbox escape prevention
|
||||
- Test malicious script blocking
|
||||
- Test content security policy enforcement
|
||||
|
||||
2. **Code Execution Sandbox Tests** (`tools/code_execution_tool.py`)
|
||||
- Test resource limits (CPU, memory)
|
||||
- Test dangerous import blocking
|
||||
- Test timeout enforcement
|
||||
- Test filesystem access restrictions
|
||||
|
||||
3. **Terminal Tool Safety Tests** (`tools/terminal_tool.py`)
|
||||
- Test dangerous command blocking
|
||||
- Test command injection prevention
|
||||
- Test environment variable sanitization
|
||||
|
||||
4. **OpenRouter Client Tests** (`tools/openrouter_client.py`)
|
||||
- Test API key handling
|
||||
- Test rate limit handling
|
||||
- Test error response parsing
|
||||
|
||||
### High Priority (Core Functionality)
|
||||
|
||||
5. **Gemini Adapter Tests** (`agent/gemini_adapter.py`)
|
||||
- Test message format conversion
|
||||
- Test tool call normalization
|
||||
- Test streaming response handling
|
||||
|
||||
6. **Copilot ACP Client Tests** (`agent/copilot_acp_client.py`)
|
||||
- Test authentication flow
|
||||
- Test session management
|
||||
- Test message passing
|
||||
|
||||
7. **Knowledge Ingester Tests** (`agent/knowledge_ingester.py`)
|
||||
- Test document parsing
|
||||
- Test embedding generation
|
||||
- Test knowledge retrieval
|
||||
|
||||
8. **Stream Consumer Tests** (`gateway/stream_consumer.py`)
|
||||
- Test backpressure handling
|
||||
- Test reconnection logic
|
||||
- Test message ordering guarantees
|
||||
|
||||
### Medium Priority (Integration/Features)
|
||||
|
||||
9. **Web Tools Core Tests** (`tools/web_tools.py`)
|
||||
- Test search result parsing
|
||||
- Test content extraction
|
||||
- Test error handling for unavailable services
|
||||
|
||||
10. **Image Generation Tool Tests** (`tools/image_generation_tool.py`)
|
||||
- Test prompt filtering
|
||||
- Test image format handling
|
||||
- Test provider failover
|
||||
|
||||
11. **Gitea Client Tests** (`tools/gitea_client.py`)
|
||||
- Test repository operations
|
||||
- Test webhook handling
|
||||
- Test authentication
|
||||
|
||||
12. **Session Search Tool Tests** (`tools/session_search_tool.py`)
|
||||
- Test query parsing
|
||||
- Test result ranking
|
||||
- Test pagination
|
||||
|
||||
13. **Meta Reasoning Tests** (`agent/meta_reasoning.py`)
|
||||
- Test strategy selection
|
||||
- Test reflection generation
|
||||
- Test learning from failures
|
||||
|
||||
14. **TTS Tool Tests** (`tools/tts_tool.py`)
|
||||
- Test voice selection
|
||||
- Test audio format conversion
|
||||
- Test streaming playback
|
||||
|
||||
15. **Neural TTS Tests** (`tools/neutts_synth.py`)
|
||||
- Test voice cloning safety
|
||||
- Test audio quality validation
|
||||
- Test resource cleanup
|
||||
|
||||
### Lower Priority (Utilities)
|
||||
|
||||
16. **Hermes Constants Tests** (`hermes_constants.py`)
|
||||
- Test constant values
|
||||
- Test environment-specific overrides
|
||||
|
||||
17. **Time Utilities Tests** (`hermes_time.py`)
|
||||
- Test timezone handling
|
||||
- Test formatting functions
|
||||
|
||||
18. **Utils Module Tests** (`utils.py`)
|
||||
- Test helper functions
|
||||
- Test validation utilities
|
||||
|
||||
19. **Mini SWE Runner Tests** (`mini_swe_runner.py`)
|
||||
- Test repository setup
|
||||
- Test test execution
|
||||
- Test result parsing
|
||||
|
||||
20. **RL CLI Tests** (`rl_cli.py`)
|
||||
- Test training command parsing
|
||||
- Test configuration validation
|
||||
- Test checkpoint handling
|
||||
|
||||
---
|
||||
|
||||
## 5. Test Optimization Opportunities
|
||||
|
||||
### 5.1 Performance Issues Identified
|
||||
|
||||
**Large Test Files (Split Recommended):**
|
||||
- `tests/test_run_agent.py` (3,329 lines) → Split into multiple files
|
||||
- `tests/tools/test_mcp_tool.py` (2,902 lines) → Split by MCP feature
|
||||
- `tests/test_anthropic_adapter.py` (1,219 lines) → Consider splitting
|
||||
|
||||
**Potential Slow Tests:**
|
||||
- Integration tests with real API calls
|
||||
- Tests with file I/O operations
|
||||
- Tests with subprocess spawning
|
||||
|
||||
### 5.2 Optimization Recommendations
|
||||
|
||||
1. **Parallel Execution Already Configured**
|
||||
- `pytest-xdist` with `-n auto` in CI
|
||||
- Maintains isolation through fixtures
|
||||
|
||||
2. **Fixture Scope Optimization**
|
||||
- Review `autouse=True` fixtures for necessity
|
||||
- Consider session-scoped fixtures for expensive setup
|
||||
|
||||
3. **Mock External Services**
|
||||
- Some integration tests still hit real APIs
|
||||
- Create more fakes like `fake_ha_server.py`
|
||||
|
||||
4. **Test Data Management**
|
||||
- Use factory pattern for test data generation
|
||||
- Share test fixtures across related tests
|
||||
|
||||
### 5.3 CI/CD Optimizations
|
||||
|
||||
Current CI (`.github/workflows/tests.yml`):
|
||||
- Uses `uv` for fast dependency installation
|
||||
- Runs with `-n auto` for parallelization
|
||||
- Ignores integration tests by default
|
||||
- 10-minute timeout
|
||||
|
||||
**Recommended Improvements:**
|
||||
1. Add test duration reporting (`--durations=10`)
|
||||
2. Add coverage reporting
|
||||
3. Separate fast unit tests from slower integration tests
|
||||
4. Add flaky test retry mechanism
|
||||
|
||||
---
|
||||
|
||||
## 6. Missing Integration Test Scenarios
|
||||
|
||||
### 6.1 Cross-Component Integration
|
||||
|
||||
1. **End-to-End Agent Flow**
|
||||
- User message → Gateway → Agent → Tools → Response
|
||||
- Test with real (mocked) LLM responses
|
||||
|
||||
2. **Multi-Platform Gateway**
|
||||
- Message routing between platforms
|
||||
- Session persistence across platforms
|
||||
|
||||
3. **Tool + Environment Integration**
|
||||
- Terminal tool with different backends (local, docker, modal)
|
||||
- File operations with permission checks
|
||||
|
||||
4. **Skill Lifecycle Integration**
|
||||
- Skill installation → Registration → Execution → Update → Removal
|
||||
|
||||
5. **Memory + Honcho Integration**
|
||||
- Memory storage → Retrieval → Context injection
|
||||
|
||||
### 6.2 Failure Scenario Integration Tests
|
||||
|
||||
1. **LLM Provider Failover**
|
||||
- Primary provider down → Fallback provider
|
||||
- Rate limiting handling
|
||||
|
||||
2. **Gateway Reconnection**
|
||||
- Platform disconnect → Reconnect → Resume session
|
||||
|
||||
3. **Tool Execution Failures**
|
||||
- Tool timeout → Retry → Fallback
|
||||
- Tool error → Error handling → User notification
|
||||
|
||||
4. **Checkpoint Recovery**
|
||||
- Crash during batch → Resume from checkpoint
|
||||
- Corrupted checkpoint handling
|
||||
|
||||
### 6.3 Security Integration Tests
|
||||
|
||||
1. **Prompt Injection Across Stack**
|
||||
- Gateway input → Agent processing → Tool execution
|
||||
|
||||
2. **Permission Escalation Prevention**
|
||||
- User permissions → Tool allowlist → Execution
|
||||
|
||||
3. **Data Leak Prevention**
|
||||
- Memory storage → Context building → Response generation
|
||||
|
||||
---
|
||||
|
||||
## 7. Performance Test Strategy
|
||||
|
||||
### 7.1 Load Testing Requirements
|
||||
|
||||
1. **Gateway Load Tests**
|
||||
- Concurrent session handling
|
||||
- Message throughput per platform
|
||||
- Memory usage under load
|
||||
|
||||
2. **Agent Response Time Tests**
|
||||
- End-to-end latency benchmarks
|
||||
- Tool execution time budgets
|
||||
- Context building performance
|
||||
|
||||
3. **Resource Utilization Tests**
|
||||
- Memory leaks in long-running sessions
|
||||
- File descriptor limits
|
||||
- CPU usage patterns
|
||||
|
||||
### 7.2 Benchmark Framework
|
||||
|
||||
```python
|
||||
# Proposed performance test structure
|
||||
class TestGatewayPerformance:
|
||||
@pytest.mark.benchmark
|
||||
def test_message_throughput(self, benchmark):
|
||||
# Measure messages processed per second
|
||||
pass
|
||||
|
||||
@pytest.mark.benchmark
|
||||
def test_session_creation_latency(self, benchmark):
|
||||
# Measure session setup time
|
||||
pass
|
||||
```
|
||||
|
||||
### 7.3 Performance Regression Detection
|
||||
|
||||
1. **Baseline Establishment**
|
||||
- Record baseline metrics for critical paths
|
||||
- Store in version control
|
||||
|
||||
2. **Automated Comparison**
|
||||
- Compare PR performance against baseline
|
||||
- Fail if degradation > 10%
|
||||
|
||||
3. **Metrics to Track**
|
||||
- Test suite execution time
|
||||
- Memory peak usage
|
||||
- Individual test durations
|
||||
|
||||
---
|
||||
|
||||
## 8. Test Infrastructure Improvements
|
||||
|
||||
### 8.1 Coverage Tooling
|
||||
|
||||
**Missing:** Code coverage reporting
|
||||
**Recommendation:** Add `pytest-cov` to dev dependencies
|
||||
|
||||
```toml
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest>=9.0.2,<10",
|
||||
"pytest-asyncio>=1.3.0,<2",
|
||||
"pytest-xdist>=3.0,<4",
|
||||
"pytest-cov>=5.0,<6", # Add this
|
||||
"mcp>=1.2.0,<2"
|
||||
]
|
||||
```
|
||||
|
||||
### 8.2 Test Categories
|
||||
|
||||
Add more pytest markers for selective test running:
|
||||
|
||||
```python
|
||||
# In pytest.ini or pyproject.toml
|
||||
markers = [
|
||||
"integration: marks tests requiring external services",
|
||||
"slow: marks slow tests (>5s)",
|
||||
"security: marks security-focused tests",
|
||||
"benchmark: marks performance benchmark tests",
|
||||
"flakey: marks tests that may be unstable",
|
||||
]
|
||||
```
|
||||
|
||||
### 8.3 Test Data Factory
|
||||
|
||||
Create centralized test data factories:
|
||||
|
||||
```python
|
||||
# tests/factories.py
|
||||
class AgentFactory:
|
||||
@staticmethod
|
||||
def create_mock_agent(tools=None):
|
||||
# Return configured mock agent
|
||||
pass
|
||||
|
||||
class MessageFactory:
|
||||
@staticmethod
|
||||
def create_user_message(content):
|
||||
# Return formatted user message
|
||||
pass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 9. Summary & Action Items
|
||||
|
||||
### Immediate Actions (High Impact)
|
||||
|
||||
1. **Add coverage reporting** to CI pipeline
|
||||
2. **Create tests for uncovered security-critical modules:**
|
||||
- `tools/code_execution_tool.py`
|
||||
- `tools/browser_tool.py`
|
||||
- `tools/terminal_tool.py`
|
||||
3. **Split oversized test files** for better maintainability
|
||||
4. **Add Gemini adapter tests** (increasingly important provider)
|
||||
|
||||
### Short-term (1-2 Sprints)
|
||||
|
||||
5. Create integration tests for cross-component flows
|
||||
6. Add performance benchmarks for critical paths
|
||||
7. Expand OpenRouter client test coverage
|
||||
8. Add knowledge ingester tests
|
||||
|
||||
### Long-term (Quarter)
|
||||
|
||||
9. Achieve 80% code coverage across all modules
|
||||
10. Implement performance regression testing
|
||||
11. Create comprehensive security test suite
|
||||
12. Document testing patterns and best practices
|
||||
|
||||
---
|
||||
|
||||
## Appendix: Test File Size Distribution
|
||||
|
||||
| Lines | Count | Category |
|
||||
|-------|-------|----------|
|
||||
| 0-100 | ~50 | Simple unit tests |
|
||||
| 100-500 | ~200 | Standard test files |
|
||||
| 500-1000 | ~80 | Complex feature tests |
|
||||
| 1000-2000 | ~30 | Large test suites |
|
||||
| 2000+ | ~13 | Monolithic test files (needs splitting) |
|
||||
|
||||
---
|
||||
|
||||
*Analysis generated: March 30, 2026*
|
||||
*Total test files analyzed: 373*
|
||||
*Estimated test functions: ~4,311*
|
||||
@@ -1,364 +0,0 @@
|
||||
# Test Optimization Guide for Hermes Agent
|
||||
|
||||
## Current Test Execution Analysis
|
||||
|
||||
### Test Suite Statistics
|
||||
- **Total Test Files:** 373
|
||||
- **Estimated Test Functions:** ~4,311
|
||||
- **Async Tests:** ~679 (15.8%)
|
||||
- **Integration Tests:** 7 files (excluded from CI)
|
||||
- **Average Tests per File:** ~11.6
|
||||
|
||||
### Current CI Configuration
|
||||
```yaml
|
||||
# .github/workflows/tests.yml
|
||||
- name: Run tests
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/ -q --ignore=tests/integration --tb=short -n auto
|
||||
```
|
||||
|
||||
**Current Flags:**
|
||||
- `-q`: Quiet mode
|
||||
- `--ignore=tests/integration`: Skip integration tests
|
||||
- `--tb=short`: Short traceback format
|
||||
- `-n auto`: Auto-detect parallel workers
|
||||
|
||||
---
|
||||
|
||||
## Optimization Recommendations
|
||||
|
||||
### 1. Add Test Duration Reporting
|
||||
|
||||
**Current:** No duration tracking
|
||||
**Recommended:**
|
||||
```yaml
|
||||
run: |
|
||||
python -m pytest tests/ \
|
||||
--ignore=tests/integration \
|
||||
-n auto \
|
||||
--durations=20 \ # Show 20 slowest tests
|
||||
--durations-min=1.0 # Only show tests >1s
|
||||
```
|
||||
|
||||
This will help identify slow tests that need optimization.
|
||||
|
||||
### 2. Implement Test Categories
|
||||
|
||||
Add markers to `pyproject.toml`:
|
||||
```toml
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
markers = [
|
||||
"integration: marks tests requiring external services",
|
||||
"slow: marks tests that take >5 seconds",
|
||||
"unit: marks fast unit tests",
|
||||
"security: marks security-focused tests",
|
||||
"flakey: marks tests that may be unstable",
|
||||
]
|
||||
addopts = "-m 'not integration and not slow' -n auto"
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
# Run only fast unit tests
|
||||
pytest -m unit
|
||||
|
||||
# Run all tests including slow ones
|
||||
pytest -m "not integration"
|
||||
|
||||
# Run only security tests
|
||||
pytest -m security
|
||||
```
|
||||
|
||||
### 3. Optimize Slow Test Candidates
|
||||
|
||||
Based on file sizes, these tests likely need optimization:
|
||||
|
||||
| File | Lines | Optimization Strategy |
|
||||
|------|-------|----------------------|
|
||||
| `test_run_agent.py` | 3,329 | Split into multiple files by feature |
|
||||
| `test_mcp_tool.py` | 2,902 | Split by MCP functionality |
|
||||
| `test_voice_command.py` | 2,632 | Review for redundant tests |
|
||||
| `test_feishu.py` | 2,580 | Mock external API calls |
|
||||
| `test_api_server.py` | 1,503 | Parallelize independent tests |
|
||||
|
||||
### 4. Add Coverage Reporting to CI
|
||||
|
||||
**Updated workflow:**
|
||||
```yaml
|
||||
- name: Run tests with coverage
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/ \
|
||||
--ignore=tests/integration \
|
||||
-n auto \
|
||||
--cov=agent --cov=tools --cov=gateway --cov=hermes_cli \
|
||||
--cov-report=xml \
|
||||
--cov-report=html \
|
||||
--cov-fail-under=70
|
||||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
files: ./coverage.xml
|
||||
fail_ci_if_error: true
|
||||
```
|
||||
|
||||
### 5. Implement Flaky Test Handling
|
||||
|
||||
Add `pytest-rerunfailures`:
|
||||
```toml
|
||||
dev = [
|
||||
"pytest>=9.0.2,<10",
|
||||
"pytest-asyncio>=1.3.0,<2",
|
||||
"pytest-xdist>=3.0,<4",
|
||||
"pytest-cov>=5.0,<6",
|
||||
"pytest-rerunfailures>=14.0,<15", # Add this
|
||||
]
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# Mark known flaky tests
|
||||
@pytest.mark.flakey(reruns=3, reruns_delay=1)
|
||||
async def test_network_dependent_feature():
|
||||
# Test that sometimes fails due to network
|
||||
pass
|
||||
```
|
||||
|
||||
### 6. Optimize Fixture Scopes
|
||||
|
||||
Review `conftest.py` fixtures:
|
||||
|
||||
```python
|
||||
# Current: Function scope (runs for every test)
|
||||
@pytest.fixture()
|
||||
def mock_config():
|
||||
return {...}
|
||||
|
||||
# Optimized: Session scope (runs once per session)
|
||||
@pytest.fixture(scope="session")
|
||||
def mock_config():
|
||||
return {...}
|
||||
|
||||
# Optimized: Module scope (runs once per module)
|
||||
@pytest.fixture(scope="module")
|
||||
def expensive_setup():
|
||||
# Setup that can be reused across module
|
||||
pass
|
||||
```
|
||||
|
||||
### 7. Parallel Execution Tuning
|
||||
|
||||
**Current:** `-n auto` (uses all CPUs)
|
||||
**Issues:**
|
||||
- May cause resource contention
|
||||
- Some tests may not be thread-safe
|
||||
|
||||
**Recommendations:**
|
||||
```bash
|
||||
# Limit workers to prevent resource exhaustion
|
||||
pytest -n 4 # Use 4 workers regardless of CPU count
|
||||
|
||||
# Use load-based scheduling for uneven test durations
|
||||
pytest -n auto --dist=load
|
||||
|
||||
# Group tests by module to reduce setup overhead
|
||||
pytest -n auto --dist=loadscope
|
||||
```
|
||||
|
||||
### 8. Test Data Management
|
||||
|
||||
**Current Issue:** Tests may create files in `/tmp` without cleanup
|
||||
|
||||
**Solution - Factory Pattern:**
|
||||
```python
|
||||
# tests/factories.py
|
||||
import tempfile
|
||||
import shutil
|
||||
from contextlib import contextmanager
|
||||
|
||||
@contextmanager
|
||||
def temp_workspace():
|
||||
"""Create isolated temp directory for tests."""
|
||||
path = tempfile.mkdtemp(prefix="hermes_test_")
|
||||
try:
|
||||
yield Path(path)
|
||||
finally:
|
||||
shutil.rmtree(path, ignore_errors=True)
|
||||
|
||||
# Usage in tests
|
||||
def test_file_operations():
|
||||
with temp_workspace() as tmp:
|
||||
# All file operations in isolated directory
|
||||
file_path = tmp / "test.txt"
|
||||
file_path.write_text("content")
|
||||
assert file_path.exists()
|
||||
# Automatically cleaned up
|
||||
```
|
||||
|
||||
### 9. Database/State Isolation
|
||||
|
||||
**Current:** Uses `monkeypatch` for env vars
|
||||
**Enhancement:** Database mocking
|
||||
|
||||
```python
|
||||
@pytest.fixture
|
||||
def mock_honcho():
|
||||
"""Mock Honcho client for tests."""
|
||||
with patch("honcho_integration.client.HonchoClient") as mock:
|
||||
mock_instance = MagicMock()
|
||||
mock_instance.get_session.return_value = {"id": "test-session"}
|
||||
mock.return_value = mock_instance
|
||||
yield mock
|
||||
|
||||
# Usage
|
||||
async def test_memory_storage(mock_honcho):
|
||||
# Fast, isolated test
|
||||
pass
|
||||
```
|
||||
|
||||
### 10. CI Pipeline Optimization
|
||||
|
||||
**Current Pipeline:**
|
||||
1. Checkout
|
||||
2. Install uv
|
||||
3. Install Python
|
||||
4. Install deps
|
||||
5. Run tests
|
||||
|
||||
**Optimized Pipeline (with caching):**
|
||||
```yaml
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
version: "0.5.x"
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip' # Cache pip dependencies
|
||||
|
||||
- name: Cache uv packages
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/uv
|
||||
key: ${{ runner.os }}-uv-${{ hashFiles('**/pyproject.toml') }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv venv .venv
|
||||
uv pip install -e ".[all,dev]"
|
||||
|
||||
- name: Run fast tests
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
pytest -m "not integration and not slow" -n auto --tb=short
|
||||
|
||||
- name: Run slow tests
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
pytest -m "slow" -n 2 --tb=short
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Wins (Implement First)
|
||||
|
||||
### 1. Add Duration Reporting (5 minutes)
|
||||
```yaml
|
||||
--durations=10
|
||||
```
|
||||
|
||||
### 2. Mark Slow Tests (30 minutes)
|
||||
Add `@pytest.mark.slow` to tests taking >5s.
|
||||
|
||||
### 3. Split Largest Test File (2 hours)
|
||||
Split `test_run_agent.py` into:
|
||||
- `test_run_agent_core.py`
|
||||
- `test_run_agent_tools.py`
|
||||
- `test_run_agent_memory.py`
|
||||
- `test_run_agent_messaging.py`
|
||||
|
||||
### 4. Add Coverage Baseline (1 hour)
|
||||
```bash
|
||||
pytest --cov=agent --cov=tools --cov=gateway tests/ --cov-report=html
|
||||
```
|
||||
|
||||
### 5. Optimize Fixture Scopes (1 hour)
|
||||
Review and optimize 5 most-used fixtures.
|
||||
|
||||
---
|
||||
|
||||
## Long-term Improvements
|
||||
|
||||
### Test Data Generation
|
||||
```python
|
||||
# Implement hypothesis-based testing
|
||||
from hypothesis import given, strategies as st
|
||||
|
||||
@given(st.lists(st.text(), min_size=1))
|
||||
def test_message_batching(messages):
|
||||
# Property-based testing
|
||||
pass
|
||||
```
|
||||
|
||||
### Performance Regression Testing
|
||||
```python
|
||||
@pytest.mark.benchmark
|
||||
def test_message_processing_speed(benchmark):
|
||||
result = benchmark(process_messages, sample_data)
|
||||
assert result.throughput > 1000 # msgs/sec
|
||||
```
|
||||
|
||||
### Contract Testing
|
||||
```python
|
||||
# Verify API contracts between components
|
||||
@pytest.mark.contract
|
||||
def test_agent_tool_contract():
|
||||
"""Verify agent sends correct format to tools."""
|
||||
pass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Measurement Checklist
|
||||
|
||||
After implementing optimizations, verify:
|
||||
|
||||
- [ ] Test suite execution time < 5 minutes
|
||||
- [ ] No individual test > 10 seconds (except integration)
|
||||
- [ ] Code coverage > 70%
|
||||
- [ ] All flaky tests marked and retried
|
||||
- [ ] CI passes consistently (>95% success rate)
|
||||
- [ ] Memory usage stable (no leaks in test suite)
|
||||
|
||||
---
|
||||
|
||||
## Tools to Add
|
||||
|
||||
```toml
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest>=9.0.2,<10",
|
||||
"pytest-asyncio>=1.3.0,<2",
|
||||
"pytest-xdist>=3.0,<4",
|
||||
"pytest-cov>=5.0,<6",
|
||||
"pytest-rerunfailures>=14.0,<15",
|
||||
"pytest-benchmark>=4.0,<5", # Performance testing
|
||||
"pytest-mock>=3.12,<4", # Enhanced mocking
|
||||
"hypothesis>=6.100,<7", # Property-based testing
|
||||
"factory-boy>=3.3,<4", # Test data factories
|
||||
]
|
||||
```
|
||||
@@ -1,73 +0,0 @@
|
||||
# V-006 MCP OAuth Deserialization Vulnerability Fix
|
||||
|
||||
## Summary
|
||||
Fixed the critical V-006 vulnerability (CVSS 8.8) in MCP OAuth handling that used insecure deserialization, potentially enabling remote code execution.
|
||||
|
||||
## Changes Made
|
||||
|
||||
### 1. Secure OAuth State Serialization (`tools/mcp_oauth.py`)
|
||||
- **Replaced pickle with JSON**: OAuth state is now serialized using JSON instead of `pickle.loads()`, eliminating the RCE vector
|
||||
- **Added HMAC-SHA256 signatures**: All state data is cryptographically signed to prevent tampering
|
||||
- **Implemented secure deserialization**: `SecureOAuthState.deserialize()` validates structure, signature, and expiration
|
||||
- **Added constant-time comparison**: Token validation uses `secrets.compare_digest()` to prevent timing attacks
|
||||
|
||||
### 2. Token Storage Security Enhancements
|
||||
- **JSON Schema Validation**: Token data is validated against strict schemas before use
|
||||
- **HMAC Signing**: Stored tokens are signed with HMAC-SHA256 to detect file tampering
|
||||
- **Strict Type Checking**: All token fields are type-validated
|
||||
- **File Permissions**: Token directory created with 0o700, files with 0o600
|
||||
|
||||
### 3. Security Features
|
||||
- **Nonce-based replay protection**: Each state has a unique nonce tracked by the state manager
|
||||
- **10-minute expiration**: States automatically expire after 600 seconds
|
||||
- **CSRF protection**: State validation prevents cross-site request forgery
|
||||
- **Environment-based keys**: Supports `HERMES_OAUTH_SECRET` and `HERMES_TOKEN_STORAGE_SECRET` env vars
|
||||
|
||||
### 4. Comprehensive Security Tests (`tests/test_oauth_state_security.py`)
|
||||
54 security tests covering:
|
||||
- Serialization/deserialization roundtrips
|
||||
- Tampering detection (data and signature)
|
||||
- Schema validation for tokens and client info
|
||||
- Replay attack prevention
|
||||
- CSRF attack prevention
|
||||
- MITM attack detection
|
||||
- Pickle payload rejection
|
||||
- Performance tests
|
||||
|
||||
## Files Modified
|
||||
- `tools/mcp_oauth.py` - Complete rewrite with secure state handling
|
||||
- `tests/test_oauth_state_security.py` - New comprehensive security test suite
|
||||
|
||||
## Security Verification
|
||||
```bash
|
||||
# Run security tests
|
||||
python tests/test_oauth_state_security.py
|
||||
|
||||
# All 54 tests pass:
|
||||
# - TestSecureOAuthState: 20 tests
|
||||
# - TestOAuthStateManager: 10 tests
|
||||
# - TestSchemaValidation: 8 tests
|
||||
# - TestTokenStorageSecurity: 6 tests
|
||||
# - TestNoPickleUsage: 2 tests
|
||||
# - TestSecretKeyManagement: 3 tests
|
||||
# - TestOAuthFlowIntegration: 3 tests
|
||||
# - TestPerformance: 2 tests
|
||||
```
|
||||
|
||||
## API Changes (Backwards Compatible)
|
||||
- `SecureOAuthState` - New class for secure state handling
|
||||
- `OAuthStateManager` - New class for state lifecycle management
|
||||
- `HermesTokenStorage` - Enhanced with schema validation and signing
|
||||
- `OAuthStateError` - New exception for security violations
|
||||
|
||||
## Deployment Notes
|
||||
1. Existing token files will be invalidated (no signature) - users will need to re-authenticate
|
||||
2. New secret key will be auto-generated in `~/.hermes/.secrets/`
|
||||
3. Environment variables can override key locations:
|
||||
- `HERMES_OAUTH_SECRET` - For state signing
|
||||
- `HERMES_TOKEN_STORAGE_SECRET` - For token storage signing
|
||||
|
||||
## References
|
||||
- Security Audit: V-006 Insecure Deserialization in MCP OAuth
|
||||
- CWE-502: Deserialization of Untrusted Data
|
||||
- CWE-20: Improper Input Validation
|
||||
@@ -39,6 +39,7 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
|
||||
"browser_scroll": "execute",
|
||||
"browser_press": "execute",
|
||||
"browser_back": "execute",
|
||||
"browser_close": "execute",
|
||||
"browser_get_images": "read",
|
||||
# Agent internals
|
||||
"delegate_task": "execute",
|
||||
|
||||
@@ -4,22 +4,3 @@ These modules contain pure utility functions and self-contained classes
|
||||
that were previously embedded in the 3,600-line run_agent.py. Extracting
|
||||
them makes run_agent.py focused on the AIAgent orchestrator class.
|
||||
"""
|
||||
|
||||
# Import input sanitizer for convenient access
|
||||
from agent.input_sanitizer import (
|
||||
detect_jailbreak_patterns,
|
||||
sanitize_input,
|
||||
sanitize_input_full,
|
||||
score_input_risk,
|
||||
should_block_input,
|
||||
RiskLevel,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"detect_jailbreak_patterns",
|
||||
"sanitize_input",
|
||||
"sanitize_input_full",
|
||||
"score_input_risk",
|
||||
"should_block_input",
|
||||
"RiskLevel",
|
||||
]
|
||||
|
||||
@@ -34,12 +34,6 @@ than the provider's default.
|
||||
Per-task direct endpoint overrides (e.g. AUXILIARY_VISION_BASE_URL,
|
||||
AUXILIARY_VISION_API_KEY) let callers route a specific auxiliary task to a
|
||||
custom OpenAI-compatible endpoint without touching the main model settings.
|
||||
|
||||
Payment / credit exhaustion fallback:
|
||||
When a resolved provider returns HTTP 402 or a credit-related error,
|
||||
call_llm() automatically retries with the next available provider in the
|
||||
auto-detection chain. This handles the common case where a user depletes
|
||||
their OpenRouter balance but has Codex OAuth or another provider available.
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -61,7 +55,6 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
|
||||
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
"gemini": "gemini-3-flash-preview",
|
||||
"zai": "glm-4.5-flash",
|
||||
"kimi-coding": "kimi-k2-turbo-preview",
|
||||
"minimax": "MiniMax-M2.7-highspeed",
|
||||
@@ -260,73 +253,26 @@ class _CodexCompletionsAdapter:
|
||||
usage = None
|
||||
|
||||
try:
|
||||
# Collect output items and text deltas during streaming —
|
||||
# the Codex backend can return empty response.output from
|
||||
# get_final_response() even when items were streamed.
|
||||
collected_output_items: List[Any] = []
|
||||
collected_text_deltas: List[str] = []
|
||||
has_function_calls = False
|
||||
with self._client.responses.stream(**resp_kwargs) as stream:
|
||||
for _event in stream:
|
||||
_etype = getattr(_event, "type", "")
|
||||
if _etype == "response.output_item.done":
|
||||
_done = getattr(_event, "item", None)
|
||||
if _done is not None:
|
||||
collected_output_items.append(_done)
|
||||
elif "output_text.delta" in _etype:
|
||||
_delta = getattr(_event, "delta", "")
|
||||
if _delta:
|
||||
collected_text_deltas.append(_delta)
|
||||
elif "function_call" in _etype:
|
||||
has_function_calls = True
|
||||
pass
|
||||
final = stream.get_final_response()
|
||||
|
||||
# Backfill empty output from collected stream events
|
||||
_output = getattr(final, "output", None)
|
||||
if isinstance(_output, list) and not _output:
|
||||
if collected_output_items:
|
||||
final.output = list(collected_output_items)
|
||||
logger.debug(
|
||||
"Codex auxiliary: backfilled %d output items from stream events",
|
||||
len(collected_output_items),
|
||||
)
|
||||
elif collected_text_deltas and not has_function_calls:
|
||||
# Only synthesize text when no tool calls were streamed —
|
||||
# a function_call response with incidental text should not
|
||||
# be collapsed into a plain-text message.
|
||||
assembled = "".join(collected_text_deltas)
|
||||
final.output = [SimpleNamespace(
|
||||
type="message", role="assistant", status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text=assembled)],
|
||||
)]
|
||||
logger.debug(
|
||||
"Codex auxiliary: synthesized from %d deltas (%d chars)",
|
||||
len(collected_text_deltas), len(assembled),
|
||||
)
|
||||
|
||||
# Extract text and tool calls from the Responses output.
|
||||
# Items may be SDK objects (attrs) or dicts (raw/fallback paths),
|
||||
# so use a helper that handles both shapes.
|
||||
def _item_get(obj: Any, key: str, default: Any = None) -> Any:
|
||||
val = getattr(obj, key, None)
|
||||
if val is None and isinstance(obj, dict):
|
||||
val = obj.get(key, default)
|
||||
return val if val is not None else default
|
||||
|
||||
# Extract text and tool calls from the Responses output
|
||||
for item in getattr(final, "output", []):
|
||||
item_type = _item_get(item, "type")
|
||||
item_type = getattr(item, "type", None)
|
||||
if item_type == "message":
|
||||
for part in (_item_get(item, "content") or []):
|
||||
ptype = _item_get(part, "type")
|
||||
for part in getattr(item, "content", []):
|
||||
ptype = getattr(part, "type", None)
|
||||
if ptype in ("output_text", "text"):
|
||||
text_parts.append(_item_get(part, "text", ""))
|
||||
text_parts.append(getattr(part, "text", ""))
|
||||
elif item_type == "function_call":
|
||||
tool_calls_raw.append(SimpleNamespace(
|
||||
id=_item_get(item, "call_id", ""),
|
||||
id=getattr(item, "call_id", ""),
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=_item_get(item, "name", ""),
|
||||
arguments=_item_get(item, "arguments", "{}"),
|
||||
name=getattr(item, "name", ""),
|
||||
arguments=getattr(item, "arguments", "{}"),
|
||||
),
|
||||
))
|
||||
|
||||
@@ -896,7 +842,7 @@ def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[st
|
||||
if forced == "nous":
|
||||
client, model = _try_nous()
|
||||
if client is None:
|
||||
logger.warning("auxiliary.provider=nous but Nous Portal not configured (run: hermes auth)")
|
||||
logger.warning("auxiliary.provider=nous but Nous Portal not configured (run: hermes login)")
|
||||
return client, model
|
||||
|
||||
if forced == "codex":
|
||||
@@ -922,110 +868,15 @@ def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[st
|
||||
_AUTO_PROVIDER_LABELS = {
|
||||
"_try_openrouter": "openrouter",
|
||||
"_try_nous": "nous",
|
||||
"_try_ollama": "ollama",
|
||||
"_try_custom_endpoint": "local/custom",
|
||||
"_try_codex": "openai-codex",
|
||||
"_resolve_api_key_provider": "api-key",
|
||||
}
|
||||
|
||||
|
||||
_AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})
|
||||
|
||||
|
||||
def _try_ollama() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Detect and return an Ollama client if the server is reachable."""
|
||||
base_url = (os.getenv("OLLAMA_BASE_URL", "") or "http://localhost:11434").strip().rstrip("/")
|
||||
base_url = base_url + "/v1" if not base_url.endswith("/v1") else base_url
|
||||
from agent.model_metadata import detect_local_server_type
|
||||
if detect_local_server_type(base_url) != "ollama":
|
||||
return None, None
|
||||
api_key = (os.getenv("OLLAMA_API_KEY", "") or "ollama").strip()
|
||||
model = _read_main_model() or "gemma4:12b"
|
||||
return OpenAI(api_key=api_key, base_url=base_url), model
|
||||
|
||||
|
||||
def _get_provider_chain() -> List[tuple]:
|
||||
"""Return the ordered provider detection chain.
|
||||
|
||||
Built at call time (not module level) so that test patches
|
||||
on the ``_try_*`` functions are picked up correctly.
|
||||
"""
|
||||
return [
|
||||
("openrouter", _try_openrouter),
|
||||
("nous", _try_nous),
|
||||
("ollama", _try_ollama),
|
||||
("local/custom", _try_custom_endpoint),
|
||||
("openai-codex", _try_codex),
|
||||
("api-key", _resolve_api_key_provider),
|
||||
]
|
||||
|
||||
|
||||
def _is_payment_error(exc: Exception) -> bool:
|
||||
"""Detect payment/credit/quota exhaustion errors.
|
||||
|
||||
Returns True for HTTP 402 (Payment Required) and for 429/other errors
|
||||
whose message indicates billing exhaustion rather than rate limiting.
|
||||
"""
|
||||
status = getattr(exc, "status_code", None)
|
||||
if status == 402:
|
||||
return True
|
||||
err_lower = str(exc).lower()
|
||||
# OpenRouter and other providers include "credits" or "afford" in 402 bodies,
|
||||
# but sometimes wrap them in 429 or other codes.
|
||||
if status in (402, 429, None):
|
||||
if any(kw in err_lower for kw in ("credits", "insufficient funds",
|
||||
"can only afford", "billing",
|
||||
"payment required")):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _try_payment_fallback(
|
||||
failed_provider: str,
|
||||
task: str = None,
|
||||
) -> Tuple[Optional[Any], Optional[str], str]:
|
||||
"""Try alternative providers after a payment/credit error.
|
||||
|
||||
Iterates the standard auto-detection chain, skipping the provider that
|
||||
returned a payment error.
|
||||
|
||||
Returns:
|
||||
(client, model, provider_label) or (None, None, "") if no fallback.
|
||||
"""
|
||||
# Normalise the failed provider label for matching.
|
||||
skip = failed_provider.lower().strip()
|
||||
# Also skip Step-1 main-provider path if it maps to the same backend.
|
||||
# (e.g. main_provider="openrouter" → skip "openrouter" in chain)
|
||||
main_provider = _read_main_provider()
|
||||
skip_labels = {skip}
|
||||
if main_provider and main_provider.lower() in skip:
|
||||
skip_labels.add(main_provider.lower())
|
||||
# Map common resolved_provider values back to chain labels.
|
||||
_alias_to_label = {"openrouter": "openrouter", "nous": "nous",
|
||||
"openai-codex": "openai-codex", "codex": "openai-codex",
|
||||
"ollama": "ollama",
|
||||
"custom": "local/custom", "local/custom": "local/custom"}
|
||||
skip_chain_labels = {_alias_to_label.get(s, s) for s in skip_labels}
|
||||
|
||||
tried = []
|
||||
for label, try_fn in _get_provider_chain():
|
||||
if label in skip_chain_labels:
|
||||
continue
|
||||
client, model = try_fn()
|
||||
if client is not None:
|
||||
logger.info(
|
||||
"Auxiliary %s: payment error on %s — falling back to %s (%s)",
|
||||
task or "call", failed_provider, label, model or "default",
|
||||
)
|
||||
return client, model, label
|
||||
tried.append(label)
|
||||
|
||||
logger.warning(
|
||||
"Auxiliary %s: payment error on %s and no fallback available (tried: %s)",
|
||||
task or "call", failed_provider, ", ".join(tried),
|
||||
)
|
||||
return None, None, ""
|
||||
|
||||
|
||||
def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Full auto-detection chain.
|
||||
|
||||
@@ -1053,7 +904,10 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
|
||||
# ── Step 2: aggregator / fallback chain ──────────────────────────────
|
||||
tried = []
|
||||
for label, try_fn in _get_provider_chain():
|
||||
for try_fn in (_try_openrouter, _try_nous, _try_custom_endpoint,
|
||||
_try_codex, _resolve_api_key_provider):
|
||||
fn_name = getattr(try_fn, "__name__", "unknown")
|
||||
label = _AUTO_PROVIDER_LABELS.get(fn_name, fn_name)
|
||||
client, model = try_fn()
|
||||
if client is not None:
|
||||
if tried:
|
||||
@@ -1181,7 +1035,7 @@ def resolve_provider_client(
|
||||
client, default = _try_nous()
|
||||
if client is None:
|
||||
logger.warning("resolve_provider_client: nous requested "
|
||||
"but Nous Portal not configured (run: hermes auth)")
|
||||
"but Nous Portal not configured (run: hermes login)")
|
||||
return None, None
|
||||
final_model = model or default
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
@@ -1210,15 +1064,6 @@ def resolve_provider_client(
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── Ollama (first-class local provider) ──────────────────────────
|
||||
if provider == "ollama":
|
||||
base_url = (explicit_base_url or os.getenv("OLLAMA_BASE_URL", "") or "http://localhost:11434").strip().rstrip("/")
|
||||
base_url = base_url + "/v1" if not base_url.endswith("/v1") else base_url
|
||||
api_key = (explicit_api_key or os.getenv("OLLAMA_API_KEY", "") or "ollama").strip()
|
||||
final_model = model or _read_main_model() or "gemma4:12b"
|
||||
client = OpenAI(api_key=api_key, base_url=base_url)
|
||||
return (_to_async_client(client, final_model) if async_mode else (client, final_model))
|
||||
|
||||
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
|
||||
if provider == "custom":
|
||||
if explicit_base_url:
|
||||
@@ -1359,7 +1204,6 @@ def get_async_text_auxiliary_client(task: str = ""):
|
||||
_VISION_AUTO_PROVIDER_ORDER = (
|
||||
"openrouter",
|
||||
"nous",
|
||||
"ollama",
|
||||
"openai-codex",
|
||||
"anthropic",
|
||||
"custom",
|
||||
@@ -1941,15 +1785,12 @@ def call_llm(
|
||||
f"was found. Set the {_explicit.upper()}_API_KEY environment "
|
||||
f"variable, or switch to a different provider with `hermes model`."
|
||||
)
|
||||
# For auto/custom with no credentials, try the full auto chain
|
||||
# rather than hardcoding OpenRouter (which may be depleted).
|
||||
# Pass model=None so each provider uses its own default —
|
||||
# resolved_model may be an OpenRouter-format slug that doesn't
|
||||
# work on other providers.
|
||||
# For auto/custom, fall back to OpenRouter
|
||||
if not resolved_base_url:
|
||||
logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
|
||||
logger.info("Auxiliary %s: provider %s unavailable, falling back to openrouter",
|
||||
task or "call", resolved_provider)
|
||||
client, final_model = _get_cached_client("auto")
|
||||
client, final_model = _get_cached_client(
|
||||
"openrouter", resolved_model or _OPENROUTER_MODEL)
|
||||
if client is None:
|
||||
raise RuntimeError(
|
||||
f"No LLM provider configured for task={task} provider={resolved_provider}. "
|
||||
@@ -1970,7 +1811,7 @@ def call_llm(
|
||||
tools=tools, timeout=effective_timeout, extra_body=extra_body,
|
||||
base_url=resolved_base_url)
|
||||
|
||||
# Handle max_tokens vs max_completion_tokens retry, then payment fallback.
|
||||
# Handle max_tokens vs max_completion_tokens retry
|
||||
try:
|
||||
return client.chat.completions.create(**kwargs)
|
||||
except Exception as first_err:
|
||||
@@ -1978,30 +1819,7 @@ def call_llm(
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
return client.chat.completions.create(**kwargs)
|
||||
except Exception as retry_err:
|
||||
# If the max_tokens retry also hits a payment error,
|
||||
# fall through to the payment fallback below.
|
||||
if not _is_payment_error(retry_err):
|
||||
raise
|
||||
first_err = retry_err
|
||||
|
||||
# ── Payment / credit exhaustion fallback ──────────────────────
|
||||
# When the resolved provider returns 402 or a credit-related error,
|
||||
# try alternative providers instead of giving up. This handles the
|
||||
# common case where a user runs out of OpenRouter credits but has
|
||||
# Codex OAuth or another provider available.
|
||||
if _is_payment_error(first_err):
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task)
|
||||
if fb_client is not None:
|
||||
fb_kwargs = _build_call_kwargs(
|
||||
fb_label, fb_model, messages,
|
||||
temperature=temperature, max_tokens=max_tokens,
|
||||
tools=tools, timeout=effective_timeout,
|
||||
extra_body=extra_body)
|
||||
return fb_client.chat.completions.create(**fb_kwargs)
|
||||
return client.chat.completions.create(**kwargs)
|
||||
raise
|
||||
|
||||
|
||||
|
||||
158
agent/claw_runtime.py
Normal file
158
agent/claw_runtime.py
Normal file
@@ -0,0 +1,158 @@
|
||||
"""
|
||||
agent/claw_runtime.py — Claw Code runtime decomposition scaffold.
|
||||
Part of EPIC-999 Phase II — The Forge.
|
||||
|
||||
This module introduces the 5-class decomposition of the monolithic AIAgent
|
||||
to enable competing sub-agent rewrites and future runtime replacement.
|
||||
|
||||
Migration rule: each class begins as a thin facade over AIAgent methods.
|
||||
Logic migrates incrementally from run_agent.py into these classes.
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any, Optional, Callable
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
class ModelResponse:
|
||||
"""Normalized model response, independent of provider."""
|
||||
def __init__(self, content: str = None, tool_calls: list = None, reasoning: str = None):
|
||||
self.content = content or ""
|
||||
self.tool_calls = tool_calls or []
|
||||
self.reasoning = reasoning or ""
|
||||
|
||||
|
||||
class ToolResult:
|
||||
"""Normalized tool execution result."""
|
||||
def __init__(self, tool_call_id: str, output: str, error: str = None):
|
||||
self.tool_call_id = tool_call_id
|
||||
self.output = output
|
||||
self.error = error
|
||||
|
||||
|
||||
class ConversationLoop:
|
||||
"""
|
||||
Owns the while-loop invariant: iteration budget, termination conditions,
|
||||
and the high-level orchestration of turn-taking.
|
||||
"""
|
||||
|
||||
def __init__(self, agent: "AIAgent"):
|
||||
self.agent = agent
|
||||
|
||||
def run(
|
||||
self,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: List[Dict[str, Any]],
|
||||
system_message: str = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Run the conversation until completion or budget exhaustion.
|
||||
|
||||
Invariant: must terminate before max_iterations and iteration_budget <= 0.
|
||||
"""
|
||||
# Facade: delegate to AIAgent.run_conversation for now.
|
||||
return self.agent.run_conversation(
|
||||
user_message=messages[-1]["content"] if messages else "",
|
||||
system_message=system_message,
|
||||
conversation_history=messages[:-1] if len(messages) > 1 else None,
|
||||
)
|
||||
|
||||
|
||||
class ModelDispatcher:
|
||||
"""
|
||||
Owns all interaction with the LLM client: streaming, fallback activation,
|
||||
response normalization, and provider-specific quirks.
|
||||
"""
|
||||
|
||||
def __init__(self, agent: "AIAgent"):
|
||||
self.agent = agent
|
||||
|
||||
def call(self, model: str, messages: List[Dict], tools: List[Dict], **kwargs) -> ModelResponse:
|
||||
"""
|
||||
Dispatch a single API call and return a normalized response.
|
||||
|
||||
Invariant: always returns a ModelResponse with .content, .tool_calls, .reasoning.
|
||||
"""
|
||||
# Facade: will be populated with logic from AIAgent._interruptible_streaming_api_call
|
||||
# and related normalization helpers.
|
||||
raise NotImplementedError("ModelDispatcher.call() — migrate from AIAgent streaming logic")
|
||||
|
||||
|
||||
class ToolExecutor:
|
||||
"""
|
||||
Owns tool execution: sequential vs concurrent dispatch, error wrapping,
|
||||
and result formatting.
|
||||
"""
|
||||
|
||||
def __init__(self, agent: "AIAgent"):
|
||||
self.agent = agent
|
||||
|
||||
def execute(self, tool_calls: List[Any], task_id: str = None) -> List[ToolResult]:
|
||||
"""
|
||||
Execute a list of tool calls and return normalized results.
|
||||
|
||||
Invariant: every tool_call produces exactly one ToolResult.
|
||||
"""
|
||||
# Facade: delegate to AIAgent._execute_tool_calls_sequential / _concurrent
|
||||
if hasattr(self.agent, "_execute_tool_calls_sequential"):
|
||||
return self.agent._execute_tool_calls_sequential(tool_calls, task_id=task_id)
|
||||
raise NotImplementedError("ToolExecutor.execute() — migrate from AIAgent tool execution")
|
||||
|
||||
|
||||
class MemoryInterceptor:
|
||||
"""
|
||||
Intercepts agent-level tools (memory, todo) before they reach the global registry.
|
||||
Also handles flush-on-exit for pending memories.
|
||||
"""
|
||||
|
||||
def __init__(self, agent: "AIAgent"):
|
||||
self.agent = agent
|
||||
|
||||
def intercept(self, tool_name: str, args: Dict[str, Any], task_id: str = None) -> Optional[str]:
|
||||
"""
|
||||
If the tool_name is 'memory' or 'todo', handle it directly and return the result.
|
||||
Otherwise return None to signal pass-through to the ToolExecutor.
|
||||
|
||||
Invariant: must not mutate agent state except through explicit flush().
|
||||
"""
|
||||
# Facade: will be populated with logic from run_agent.py memory/todo interception.
|
||||
if tool_name in ("memory", "todo"):
|
||||
# Placeholder: actual migration will move the interception block here.
|
||||
return None
|
||||
return None
|
||||
|
||||
def flush(self):
|
||||
"""Flush any pending memories to persistent storage."""
|
||||
if hasattr(self.agent, "flush_memories"):
|
||||
self.agent.flush_memories()
|
||||
|
||||
|
||||
class PromptBuilder:
|
||||
"""
|
||||
Owns system prompt assembly, skill injection, context compression,
|
||||
and prompt caching marker placement.
|
||||
"""
|
||||
|
||||
def __init__(self, agent: "AIAgent"):
|
||||
self.agent = agent
|
||||
|
||||
def build(
|
||||
self,
|
||||
user_message: str,
|
||||
conversation_history: List[Dict[str, Any]] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Build the full message list for the API call.
|
||||
|
||||
Invariant: output list must start with a system message (or equivalent).
|
||||
"""
|
||||
# Facade: delegate to AIAgent._build_system_prompt and related helpers.
|
||||
if hasattr(self.agent, "_build_system_prompt"):
|
||||
system_msg = self.agent._build_system_prompt(user_message)
|
||||
messages = []
|
||||
if system_msg:
|
||||
messages.append({"role": "system", "content": system_msg})
|
||||
if conversation_history:
|
||||
messages.extend(conversation_history)
|
||||
messages.append({"role": "user", "content": user_message})
|
||||
return messages
|
||||
raise NotImplementedError("PromptBuilder.build() — migrate from AIAgent prompt assembly")
|
||||
@@ -1,6 +0,0 @@
|
||||
"""
|
||||
@soul:honesty.grounding Grounding before generation. Consult verified sources before pattern-matching.
|
||||
@soul:honesty.source_distinction Source distinction. Every claim must point to a verified source.
|
||||
@soul:honesty.audit_trail The audit trail. Every response is logged with inputs and confidence.
|
||||
"""
|
||||
# This file serves as a registry for the Conscience Validator to prove the apparatus exists.
|
||||
@@ -14,7 +14,6 @@ Improvements over v1:
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.auxiliary_client import call_llm
|
||||
@@ -47,7 +46,6 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"
|
||||
|
||||
# Chars per token rough estimate
|
||||
_CHARS_PER_TOKEN = 4
|
||||
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
|
||||
|
||||
|
||||
class ContextCompressor:
|
||||
@@ -66,7 +64,7 @@ class ContextCompressor:
|
||||
model: str,
|
||||
threshold_percent: float = 0.50,
|
||||
protect_first_n: int = 3,
|
||||
protect_last_n: int = 20,
|
||||
protect_last_n: int = 5,
|
||||
summary_target_ratio: float = 0.20,
|
||||
quiet_mode: bool = False,
|
||||
summary_model_override: str = None,
|
||||
@@ -120,7 +118,6 @@ class ContextCompressor:
|
||||
|
||||
# Stores the previous compaction summary for iterative updates
|
||||
self._previous_summary: Optional[str] = None
|
||||
self._summary_failure_cooldown_until: float = 0.0
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
@@ -261,14 +258,6 @@ class ContextCompressor:
|
||||
the middle turns without a summary rather than inject a useless
|
||||
placeholder.
|
||||
"""
|
||||
now = time.monotonic()
|
||||
if now < self._summary_failure_cooldown_until:
|
||||
logger.debug(
|
||||
"Skipping context summary during cooldown (%.0fs remaining)",
|
||||
self._summary_failure_cooldown_until - now,
|
||||
)
|
||||
return None
|
||||
|
||||
summary_budget = self._compute_summary_budget(turns_to_summarize)
|
||||
content_to_summarize = self._serialize_for_summary(turns_to_summarize)
|
||||
|
||||
@@ -356,6 +345,7 @@ Write only the summary body. Do not include any preamble or prefix."""
|
||||
call_kwargs = {
|
||||
"task": "compression",
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"temperature": 0.3,
|
||||
"max_tokens": summary_budget * 2,
|
||||
# timeout resolved from auxiliary.compression.timeout config by call_llm
|
||||
}
|
||||
@@ -369,23 +359,13 @@ Write only the summary body. Do not include any preamble or prefix."""
|
||||
summary = content.strip()
|
||||
# Store for iterative updates on next compaction
|
||||
self._previous_summary = summary
|
||||
self._summary_failure_cooldown_until = 0.0
|
||||
return self._with_summary_prefix(summary)
|
||||
except RuntimeError:
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
|
||||
logging.warning("Context compression: no provider available for "
|
||||
"summary. Middle turns will be dropped without summary "
|
||||
"for %d seconds.",
|
||||
_SUMMARY_FAILURE_COOLDOWN_SECONDS)
|
||||
"summary. Middle turns will be dropped without summary.")
|
||||
return None
|
||||
except Exception as e:
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
|
||||
logging.warning(
|
||||
"Failed to generate context summary: %s. "
|
||||
"Further summary attempts paused for %d seconds.",
|
||||
e,
|
||||
_SUMMARY_FAILURE_COOLDOWN_SECONDS,
|
||||
)
|
||||
logging.warning("Failed to generate context summary: %s", e)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
@@ -668,7 +648,7 @@ Write only the summary body. Do not include any preamble or prefix."""
|
||||
compressed.append({"role": summary_role, "content": summary})
|
||||
else:
|
||||
if not self.quiet_mode:
|
||||
logger.debug("No summary model available — middle turns dropped without summary")
|
||||
logger.warning("No summary model available — middle turns dropped without summary")
|
||||
|
||||
for i in range(compress_end, n_messages):
|
||||
msg = messages[i].copy()
|
||||
|
||||
@@ -23,11 +23,9 @@ from hermes_cli.auth import (
|
||||
_agent_key_is_usable,
|
||||
_codex_access_token_is_expiring,
|
||||
_decode_jwt_claims,
|
||||
_import_codex_cli_tokens,
|
||||
_is_expiring,
|
||||
_load_auth_store,
|
||||
_load_provider_state,
|
||||
_resolve_zai_base_url,
|
||||
read_credential_pool,
|
||||
write_credential_pool,
|
||||
)
|
||||
@@ -349,9 +347,6 @@ def get_pool_strategy(provider: str) -> str:
|
||||
return STRATEGY_FILL_FIRST
|
||||
|
||||
|
||||
DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL = 1
|
||||
|
||||
|
||||
class CredentialPool:
|
||||
def __init__(self, provider: str, entries: List[PooledCredential]):
|
||||
self.provider = provider
|
||||
@@ -359,8 +354,6 @@ class CredentialPool:
|
||||
self._current_id: Optional[str] = None
|
||||
self._strategy = get_pool_strategy(provider)
|
||||
self._lock = threading.Lock()
|
||||
self._active_leases: Dict[str, int] = {}
|
||||
self._max_concurrent = DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL
|
||||
|
||||
def has_credentials(self) -> bool:
|
||||
return bool(self._entries)
|
||||
@@ -447,39 +440,6 @@ class CredentialPool:
|
||||
logger.debug("Failed to sync from credentials file: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_codex_entry_from_cli(self, entry: PooledCredential) -> PooledCredential:
|
||||
"""Sync an openai-codex pool entry from ~/.codex/auth.json if tokens differ.
|
||||
|
||||
OpenAI OAuth refresh tokens are single-use and rotate on every refresh.
|
||||
When the Codex CLI (or another Hermes profile) refreshes its token,
|
||||
the pool entry's refresh_token becomes stale. This method detects that
|
||||
by comparing against ~/.codex/auth.json and syncing the fresh pair.
|
||||
"""
|
||||
if self.provider != "openai-codex":
|
||||
return entry
|
||||
try:
|
||||
cli_tokens = _import_codex_cli_tokens()
|
||||
if not cli_tokens:
|
||||
return entry
|
||||
cli_refresh = cli_tokens.get("refresh_token", "")
|
||||
cli_access = cli_tokens.get("access_token", "")
|
||||
if cli_refresh and cli_refresh != entry.refresh_token:
|
||||
logger.debug("Pool entry %s: syncing tokens from ~/.codex/auth.json (refresh token changed)", entry.id)
|
||||
updated = replace(
|
||||
entry,
|
||||
access_token=cli_access,
|
||||
refresh_token=cli_refresh,
|
||||
last_status=None,
|
||||
last_status_at=None,
|
||||
last_error_code=None,
|
||||
)
|
||||
self._replace_entry(entry, updated)
|
||||
self._persist()
|
||||
return updated
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to sync from ~/.codex/auth.json: %s", exc)
|
||||
return entry
|
||||
|
||||
def _refresh_entry(self, entry: PooledCredential, *, force: bool) -> Optional[PooledCredential]:
|
||||
if entry.auth_type != AUTH_TYPE_OAUTH or not entry.refresh_token:
|
||||
if force:
|
||||
@@ -669,16 +629,6 @@ class CredentialPool:
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
# For openai-codex entries, sync from ~/.codex/auth.json before
|
||||
# any status/refresh checks. This picks up tokens refreshed by
|
||||
# the Codex CLI or another Hermes profile.
|
||||
if (self.provider == "openai-codex"
|
||||
and entry.last_status == STATUS_EXHAUSTED
|
||||
and entry.refresh_token):
|
||||
synced = self._sync_codex_entry_from_cli(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
if entry.last_status == STATUS_EXHAUSTED:
|
||||
exhausted_until = _exhausted_until(entry)
|
||||
if exhausted_until is not None and now < exhausted_until:
|
||||
@@ -766,51 +716,6 @@ class CredentialPool:
|
||||
logger.info("credential pool: rotated to %s", _next_label)
|
||||
return next_entry
|
||||
|
||||
def acquire_lease(self, credential_id: Optional[str] = None) -> Optional[str]:
|
||||
"""Acquire a soft lease on a credential.
|
||||
|
||||
If a specific credential_id is provided, lease that entry directly.
|
||||
Otherwise prefer the least-leased available credential, using priority as
|
||||
a stable tie-breaker. When every credential is already at the soft cap,
|
||||
still return the least-leased one instead of blocking.
|
||||
"""
|
||||
with self._lock:
|
||||
if credential_id:
|
||||
self._active_leases[credential_id] = self._active_leases.get(credential_id, 0) + 1
|
||||
self._current_id = credential_id
|
||||
return credential_id
|
||||
|
||||
available = self._available_entries(clear_expired=True, refresh=True)
|
||||
if not available:
|
||||
return None
|
||||
|
||||
below_cap = [
|
||||
entry for entry in available
|
||||
if self._active_leases.get(entry.id, 0) < self._max_concurrent
|
||||
]
|
||||
candidates = below_cap if below_cap else available
|
||||
chosen = min(
|
||||
candidates,
|
||||
key=lambda entry: (self._active_leases.get(entry.id, 0), entry.priority),
|
||||
)
|
||||
self._active_leases[chosen.id] = self._active_leases.get(chosen.id, 0) + 1
|
||||
self._current_id = chosen.id
|
||||
return chosen.id
|
||||
|
||||
def release_lease(self, credential_id: str) -> None:
|
||||
"""Release a previously acquired credential lease."""
|
||||
with self._lock:
|
||||
count = self._active_leases.get(credential_id, 0)
|
||||
if count <= 1:
|
||||
self._active_leases.pop(credential_id, None)
|
||||
else:
|
||||
self._active_leases[credential_id] = count - 1
|
||||
|
||||
def active_lease_count(self, credential_id: str) -> int:
|
||||
"""Return the number of active leases for a credential."""
|
||||
with self._lock:
|
||||
return self._active_leases.get(credential_id, 0)
|
||||
|
||||
def try_refresh_current(self) -> Optional[PooledCredential]:
|
||||
with self._lock:
|
||||
return self._try_refresh_current_unlocked()
|
||||
@@ -1087,8 +992,6 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
active_sources.add(source)
|
||||
auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
|
||||
base_url = env_url or pconfig.inference_base_url
|
||||
if provider == "zai":
|
||||
base_url = _resolve_zai_base_url(token, pconfig.inference_base_url, env_url)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
provider,
|
||||
|
||||
@@ -890,6 +890,8 @@ def get_cute_tool_message(
|
||||
return _wrap(f"┊ ◀️ back {dur}")
|
||||
if tool_name == "browser_press":
|
||||
return _wrap(f"┊ ⌨️ press {args.get('key', '?')} {dur}")
|
||||
if tool_name == "browser_close":
|
||||
return _wrap(f"┊ 🚪 close browser {dur}")
|
||||
if tool_name == "browser_get_images":
|
||||
return _wrap(f"┊ 🖼️ images extracting {dur}")
|
||||
if tool_name == "browser_vision":
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
"""Phase 3: Deep Knowledge Distillation from Google.
|
||||
|
||||
Performs deep dives into technical domains and distills them into
|
||||
Timmy's Sovereign Knowledge Graph.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import List, Dict, Any
|
||||
from agent.gemini_adapter import GeminiAdapter
|
||||
from agent.symbolic_memory import SymbolicMemory
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DomainDistiller:
|
||||
def __init__(self):
|
||||
self.adapter = GeminiAdapter()
|
||||
self.symbolic = SymbolicMemory()
|
||||
|
||||
def distill_domain(self, domain: str):
|
||||
"""Crawls and distills an entire technical domain."""
|
||||
logger.info(f"Distilling domain: {domain}")
|
||||
|
||||
prompt = f"""
|
||||
Please perform a deep knowledge distillation of the following domain: {domain}
|
||||
|
||||
Use Google Search to find foundational papers, recent developments, and key entities.
|
||||
Synthesize this into a structured 'Domain Map' consisting of high-fidelity knowledge triples.
|
||||
Focus on the structural relationships that define the domain.
|
||||
|
||||
Format: [{{"s": "subject", "p": "predicate", "o": "object"}}]
|
||||
"""
|
||||
result = self.adapter.generate(
|
||||
model="gemini-3.1-pro-preview",
|
||||
prompt=prompt,
|
||||
system_instruction=f"You are Timmy's Domain Distiller. Your goal is to map the entire {domain} domain into a structured Knowledge Graph.",
|
||||
grounding=True,
|
||||
thinking=True,
|
||||
response_mime_type="application/json"
|
||||
)
|
||||
|
||||
triples = json.loads(result["text"])
|
||||
count = self.symbolic.ingest_text(json.dumps(triples))
|
||||
logger.info(f"Distilled {count} new triples for domain: {domain}")
|
||||
return count
|
||||
@@ -1,60 +0,0 @@
|
||||
"""Phase 1: Synthetic Data Generation for Self-Correction.
|
||||
|
||||
Generates reasoning traces where Timmy makes a subtle error and then
|
||||
identifies and corrects it using the Conscience Validator.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import List, Dict, Any
|
||||
from agent.gemini_adapter import GeminiAdapter
|
||||
from tools.gitea_client import GiteaClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class SelfCorrectionGenerator:
|
||||
def __init__(self):
|
||||
self.adapter = GeminiAdapter()
|
||||
self.gitea = GiteaClient()
|
||||
|
||||
def generate_trace(self, task: str) -> Dict[str, Any]:
|
||||
"""Generates a single self-correction reasoning trace."""
|
||||
prompt = f"""
|
||||
Task: {task}
|
||||
|
||||
Please simulate a multi-step reasoning trace for this task.
|
||||
Intentionally include one subtle error in the reasoning (e.g., a logical flaw, a misinterpretation of a rule, or a factual error).
|
||||
Then, show how Timmy identifies the error using his Conscience Validator and provides a corrected reasoning trace.
|
||||
|
||||
Format the output as JSON:
|
||||
{{
|
||||
"task": "{task}",
|
||||
"initial_trace": "...",
|
||||
"error_identified": "...",
|
||||
"correction_trace": "...",
|
||||
"lessons_learned": "..."
|
||||
}}
|
||||
"""
|
||||
result = self.adapter.generate(
|
||||
model="gemini-3.1-pro-preview",
|
||||
prompt=prompt,
|
||||
system_instruction="You are Timmy's Synthetic Data Engine. Generate high-fidelity self-correction traces.",
|
||||
response_mime_type="application/json",
|
||||
thinking=True
|
||||
)
|
||||
|
||||
trace = json.loads(result["text"])
|
||||
return trace
|
||||
|
||||
def generate_and_save(self, task: str, count: int = 1):
|
||||
"""Generates multiple traces and saves them to Gitea."""
|
||||
repo = "Timmy_Foundation/timmy-config"
|
||||
for i in range(count):
|
||||
trace = self.generate_trace(task)
|
||||
filename = f"memories/synthetic_data/self_correction/{task.lower().replace(' ', '_')}_{i}.json"
|
||||
|
||||
content = json.dumps(trace, indent=2)
|
||||
content_b64 = base64.b64encode(content.encode()).decode()
|
||||
|
||||
self.gitea.create_file(repo, filename, content_b64, f"Add synthetic self-correction trace for {task}")
|
||||
logger.info(f"Saved synthetic trace to {filename}")
|
||||
@@ -1,42 +0,0 @@
|
||||
"""Phase 2: Multi-Modal World Modeling.
|
||||
|
||||
Ingests multi-modal data (vision/audio) to build a spatial and temporal
|
||||
understanding of Timmy's environment.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import base64
|
||||
from typing import List, Dict, Any
|
||||
from agent.gemini_adapter import GeminiAdapter
|
||||
from agent.symbolic_memory import SymbolicMemory
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class WorldModeler:
|
||||
def __init__(self):
|
||||
self.adapter = GeminiAdapter()
|
||||
self.symbolic = SymbolicMemory()
|
||||
|
||||
def analyze_environment(self, image_data: str, mime_type: str = "image/jpeg"):
|
||||
"""Analyzes an image of the environment and updates the world model."""
|
||||
# In a real scenario, we'd use Gemini's multi-modal capabilities
|
||||
# For now, we'll simulate the vision-to-symbolic extraction
|
||||
prompt = f"""
|
||||
Analyze the following image of Timmy's environment.
|
||||
Identify all key objects, their spatial relationships, and any temporal changes.
|
||||
Extract this into a set of symbolic triples for the Knowledge Graph.
|
||||
|
||||
Format: [{{"s": "subject", "p": "predicate", "o": "object"}}]
|
||||
"""
|
||||
# Simulate multi-modal call (Gemini 3.1 Pro Vision)
|
||||
result = self.adapter.generate(
|
||||
model="gemini-3.1-pro-preview",
|
||||
prompt=prompt,
|
||||
system_instruction="You are Timmy's World Modeler. Build a high-fidelity spatial/temporal map of the environment.",
|
||||
response_mime_type="application/json"
|
||||
)
|
||||
|
||||
triples = json.loads(result["text"])
|
||||
self.symbolic.ingest_text(json.dumps(triples))
|
||||
logger.info(f"Updated world model with {len(triples)} new spatial triples.")
|
||||
return triples
|
||||
@@ -1,404 +0,0 @@
|
||||
"""Automatic fallback router for handling provider quota and rate limit errors.
|
||||
|
||||
This module provides intelligent fallback detection and routing when the primary
|
||||
provider (e.g., Anthropic) encounters quota limitations or rate limits.
|
||||
|
||||
Features:
|
||||
- Detects quota/rate limit errors from different providers
|
||||
- Automatic fallback to kimi-coding when Anthropic quota is exceeded
|
||||
- Configurable fallback chains with default anthropic -> kimi-coding
|
||||
- Logging and monitoring of fallback events
|
||||
|
||||
Usage:
|
||||
from agent.fallback_router import (
|
||||
is_quota_error,
|
||||
get_default_fallback_chain,
|
||||
should_auto_fallback,
|
||||
)
|
||||
|
||||
if is_quota_error(error, provider="anthropic"):
|
||||
if should_auto_fallback(provider="anthropic"):
|
||||
fallback_chain = get_default_fallback_chain("anthropic")
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Dict, List, Optional, Any, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default fallback chains per provider
|
||||
# Each chain is a list of fallback configurations tried in order
|
||||
DEFAULT_FALLBACK_CHAINS: Dict[str, List[Dict[str, Any]]] = {
|
||||
"anthropic": [
|
||||
{"provider": "kimi-coding", "model": "kimi-k2.5"},
|
||||
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
||||
],
|
||||
"openrouter": [
|
||||
{"provider": "kimi-coding", "model": "kimi-k2.5"},
|
||||
{"provider": "zai", "model": "glm-5"},
|
||||
],
|
||||
"kimi-coding": [
|
||||
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
||||
{"provider": "zai", "model": "glm-5"},
|
||||
],
|
||||
"zai": [
|
||||
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
||||
{"provider": "kimi-coding", "model": "kimi-k2.5"},
|
||||
],
|
||||
}
|
||||
|
||||
# Quota/rate limit error patterns by provider
|
||||
# These are matched (case-insensitive) against error messages
|
||||
QUOTA_ERROR_PATTERNS: Dict[str, List[str]] = {
|
||||
"anthropic": [
|
||||
"rate limit",
|
||||
"ratelimit",
|
||||
"quota exceeded",
|
||||
"quota exceeded",
|
||||
"insufficient quota",
|
||||
"429",
|
||||
"403",
|
||||
"too many requests",
|
||||
"capacity exceeded",
|
||||
"over capacity",
|
||||
"temporarily unavailable",
|
||||
"server overloaded",
|
||||
"resource exhausted",
|
||||
"billing threshold",
|
||||
"credit balance",
|
||||
"payment required",
|
||||
"402",
|
||||
],
|
||||
"openrouter": [
|
||||
"rate limit",
|
||||
"ratelimit",
|
||||
"quota exceeded",
|
||||
"insufficient credits",
|
||||
"429",
|
||||
"402",
|
||||
"no endpoints available",
|
||||
"all providers failed",
|
||||
"over capacity",
|
||||
],
|
||||
"kimi-coding": [
|
||||
"rate limit",
|
||||
"ratelimit",
|
||||
"quota exceeded",
|
||||
"429",
|
||||
"insufficient balance",
|
||||
],
|
||||
"zai": [
|
||||
"rate limit",
|
||||
"ratelimit",
|
||||
"quota exceeded",
|
||||
"429",
|
||||
"insufficient quota",
|
||||
],
|
||||
}
|
||||
|
||||
# HTTP status codes indicating quota/rate limit issues
|
||||
QUOTA_STATUS_CODES = {429, 402, 403}
|
||||
|
||||
|
||||
def is_quota_error(error: Exception, provider: Optional[str] = None) -> bool:
|
||||
"""Detect if an error is quota/rate limit related.
|
||||
|
||||
Args:
|
||||
error: The exception to check
|
||||
provider: Optional provider name to check provider-specific patterns
|
||||
|
||||
Returns:
|
||||
True if the error appears to be quota/rate limit related
|
||||
"""
|
||||
if error is None:
|
||||
return False
|
||||
|
||||
error_str = str(error).lower()
|
||||
error_type = type(error).__name__.lower()
|
||||
|
||||
# Check for common rate limit exception types
|
||||
if any(term in error_type for term in [
|
||||
"ratelimit", "rate_limit", "quota", "toomanyrequests",
|
||||
"insufficient_quota", "billing", "payment"
|
||||
]):
|
||||
return True
|
||||
|
||||
# Check HTTP status code if available
|
||||
status_code = getattr(error, "status_code", None)
|
||||
if status_code is None:
|
||||
# Try common attribute names
|
||||
for attr in ["code", "http_status", "response_code", "status"]:
|
||||
if hasattr(error, attr):
|
||||
try:
|
||||
status_code = int(getattr(error, attr))
|
||||
break
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
|
||||
if status_code in QUOTA_STATUS_CODES:
|
||||
return True
|
||||
|
||||
# Check provider-specific patterns
|
||||
providers_to_check = [provider] if provider else QUOTA_ERROR_PATTERNS.keys()
|
||||
|
||||
for prov in providers_to_check:
|
||||
patterns = QUOTA_ERROR_PATTERNS.get(prov, [])
|
||||
for pattern in patterns:
|
||||
if pattern.lower() in error_str:
|
||||
logger.debug(
|
||||
"Detected %s quota error pattern '%s' in: %s",
|
||||
prov, pattern, error
|
||||
)
|
||||
return True
|
||||
|
||||
# Check generic quota patterns
|
||||
generic_patterns = [
|
||||
"rate limit exceeded",
|
||||
"quota exceeded",
|
||||
"too many requests",
|
||||
"capacity exceeded",
|
||||
"temporarily unavailable",
|
||||
"try again later",
|
||||
"resource exhausted",
|
||||
"billing",
|
||||
"payment required",
|
||||
"insufficient credits",
|
||||
"insufficient quota",
|
||||
]
|
||||
|
||||
for pattern in generic_patterns:
|
||||
if pattern in error_str:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_default_fallback_chain(
|
||||
primary_provider: str,
|
||||
exclude_provider: Optional[str] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Get the default fallback chain for a primary provider.
|
||||
|
||||
Args:
|
||||
primary_provider: The primary provider name
|
||||
exclude_provider: Optional provider to exclude from the chain
|
||||
|
||||
Returns:
|
||||
List of fallback configurations
|
||||
"""
|
||||
chain = DEFAULT_FALLBACK_CHAINS.get(primary_provider, [])
|
||||
|
||||
# Filter out excluded provider if specified
|
||||
if exclude_provider:
|
||||
chain = [
|
||||
fb for fb in chain
|
||||
if fb.get("provider") != exclude_provider
|
||||
]
|
||||
|
||||
return list(chain)
|
||||
|
||||
|
||||
def should_auto_fallback(
|
||||
provider: str,
|
||||
error: Optional[Exception] = None,
|
||||
auto_fallback_enabled: Optional[bool] = None,
|
||||
) -> bool:
|
||||
"""Determine if automatic fallback should be attempted.
|
||||
|
||||
Args:
|
||||
provider: The current provider name
|
||||
error: Optional error to check for quota issues
|
||||
auto_fallback_enabled: Optional override for auto-fallback setting
|
||||
|
||||
Returns:
|
||||
True if automatic fallback should be attempted
|
||||
"""
|
||||
# Check environment variable override
|
||||
if auto_fallback_enabled is None:
|
||||
env_setting = os.getenv("HERMES_AUTO_FALLBACK", "true").lower()
|
||||
auto_fallback_enabled = env_setting in ("true", "1", "yes", "on")
|
||||
|
||||
if not auto_fallback_enabled:
|
||||
return False
|
||||
|
||||
# Check if provider has a configured fallback chain
|
||||
if provider not in DEFAULT_FALLBACK_CHAINS:
|
||||
# Still allow fallback if it's a quota error with generic handling
|
||||
if error and is_quota_error(error):
|
||||
logger.debug(
|
||||
"Provider %s has no fallback chain but quota error detected",
|
||||
provider
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
# If there's an error, only fallback on quota/rate limit errors
|
||||
if error is not None:
|
||||
return is_quota_error(error, provider)
|
||||
|
||||
# No error but fallback chain exists - allow eager fallback for
|
||||
# providers known to have quota issues
|
||||
return provider in ("anthropic",)
|
||||
|
||||
|
||||
def log_fallback_event(
|
||||
from_provider: str,
|
||||
to_provider: str,
|
||||
to_model: str,
|
||||
reason: str,
|
||||
error: Optional[Exception] = None,
|
||||
) -> None:
|
||||
"""Log a fallback event for monitoring.
|
||||
|
||||
Args:
|
||||
from_provider: The provider we're falling back from
|
||||
to_provider: The provider we're falling back to
|
||||
to_model: The model we're falling back to
|
||||
reason: The reason for the fallback
|
||||
error: Optional error that triggered the fallback
|
||||
"""
|
||||
log_data = {
|
||||
"event": "provider_fallback",
|
||||
"from_provider": from_provider,
|
||||
"to_provider": to_provider,
|
||||
"to_model": to_model,
|
||||
"reason": reason,
|
||||
}
|
||||
|
||||
if error:
|
||||
log_data["error_type"] = type(error).__name__
|
||||
log_data["error_message"] = str(error)[:200]
|
||||
|
||||
logger.info("Provider fallback: %s -> %s (%s) | Reason: %s",
|
||||
from_provider, to_provider, to_model, reason)
|
||||
|
||||
# Also log structured data for monitoring
|
||||
logger.debug("Fallback event data: %s", log_data)
|
||||
|
||||
|
||||
def resolve_fallback_with_credentials(
|
||||
fallback_config: Dict[str, Any],
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Resolve a fallback configuration to a client and model.
|
||||
|
||||
Args:
|
||||
fallback_config: Fallback configuration dict with provider and model
|
||||
|
||||
Returns:
|
||||
Tuple of (client, model) or (None, None) if credentials not available
|
||||
"""
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
|
||||
provider = fallback_config.get("provider")
|
||||
model = fallback_config.get("model")
|
||||
|
||||
if not provider or not model:
|
||||
return None, None
|
||||
|
||||
try:
|
||||
client, resolved_model = resolve_provider_client(
|
||||
provider,
|
||||
model=model,
|
||||
raw_codex=True,
|
||||
)
|
||||
return client, resolved_model or model
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Failed to resolve fallback provider %s: %s",
|
||||
provider, exc
|
||||
)
|
||||
return None, None
|
||||
|
||||
|
||||
def get_auto_fallback_chain(
|
||||
primary_provider: str,
|
||||
user_fallback_chain: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Get the effective fallback chain for automatic fallback.
|
||||
|
||||
Combines user-provided fallback chain with default automatic fallback chain.
|
||||
|
||||
Args:
|
||||
primary_provider: The primary provider name
|
||||
user_fallback_chain: Optional user-provided fallback chain
|
||||
|
||||
Returns:
|
||||
The effective fallback chain to use
|
||||
"""
|
||||
# Use user-provided chain if available
|
||||
if user_fallback_chain:
|
||||
return user_fallback_chain
|
||||
|
||||
# Otherwise use default chain for the provider
|
||||
return get_default_fallback_chain(primary_provider)
|
||||
|
||||
|
||||
def is_fallback_available(
|
||||
fallback_config: Dict[str, Any],
|
||||
) -> bool:
|
||||
"""Check if a fallback configuration has available credentials.
|
||||
|
||||
Args:
|
||||
fallback_config: Fallback configuration dict
|
||||
|
||||
Returns:
|
||||
True if credentials are available for the fallback provider
|
||||
"""
|
||||
provider = fallback_config.get("provider")
|
||||
if not provider:
|
||||
return False
|
||||
|
||||
# Check environment variables for API keys
|
||||
env_vars = {
|
||||
"anthropic": ["ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN"],
|
||||
"kimi-coding": ["KIMI_API_KEY", "KIMI_API_TOKEN"],
|
||||
"zai": ["ZAI_API_KEY", "Z_AI_API_KEY"],
|
||||
"openrouter": ["OPENROUTER_API_KEY"],
|
||||
"minimax": ["MINIMAX_API_KEY"],
|
||||
"minimax-cn": ["MINIMAX_CN_API_KEY"],
|
||||
"deepseek": ["DEEPSEEK_API_KEY"],
|
||||
"alibaba": ["DASHSCOPE_API_KEY", "ALIBABA_API_KEY"],
|
||||
"nous": ["NOUS_AGENT_KEY", "NOUS_ACCESS_TOKEN"],
|
||||
}
|
||||
|
||||
keys_to_check = env_vars.get(provider, [f"{provider.upper()}_API_KEY"])
|
||||
|
||||
for key in keys_to_check:
|
||||
if os.getenv(key):
|
||||
return True
|
||||
|
||||
# Check auth.json for OAuth providers
|
||||
if provider in ("nous", "openai-codex"):
|
||||
try:
|
||||
from hermes_cli.config import get_hermes_home
|
||||
auth_path = get_hermes_home() / "auth.json"
|
||||
if auth_path.exists():
|
||||
import json
|
||||
data = json.loads(auth_path.read_text())
|
||||
if data.get("active_provider") == provider:
|
||||
return True
|
||||
# Check for provider in providers dict
|
||||
if data.get("providers", {}).get(provider):
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def filter_available_fallbacks(
|
||||
fallback_chain: List[Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Filter a fallback chain to only include providers with credentials.
|
||||
|
||||
Args:
|
||||
fallback_chain: List of fallback configurations
|
||||
|
||||
Returns:
|
||||
Filtered list with only available fallbacks
|
||||
"""
|
||||
return [
|
||||
fb for fb in fallback_chain
|
||||
if is_fallback_available(fb)
|
||||
]
|
||||
@@ -1,90 +0,0 @@
|
||||
"""Native Gemini 3 Series adapter for Hermes Agent.
|
||||
|
||||
Leverages the google-genai SDK to provide sovereign access to Gemini's
|
||||
unique capabilities: Thinking (Reasoning) tokens, Search Grounding,
|
||||
and Maps Grounding.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
try:
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
except ImportError:
|
||||
genai = None # type: ignore
|
||||
types = None # type: ignore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class GeminiAdapter:
|
||||
def __init__(self, api_key: Optional[str] = None):
|
||||
self.api_key = api_key or os.environ.get("GEMINI_API_KEY")
|
||||
if not self.api_key:
|
||||
logger.warning("GEMINI_API_KEY not found in environment.")
|
||||
|
||||
if genai:
|
||||
self.client = genai.Client(api_key=self.api_key)
|
||||
else:
|
||||
self.client = None
|
||||
|
||||
def generate(
|
||||
self,
|
||||
model: str,
|
||||
prompt: str,
|
||||
system_instruction: Optional[str] = None,
|
||||
thinking: bool = False,
|
||||
thinking_budget: int = 16000,
|
||||
grounding: bool = False,
|
||||
**kwargs
|
||||
) -> Dict[str, Any]:
|
||||
if not self.client:
|
||||
raise ImportError("google-genai SDK not installed. Run 'pip install google-genai'.")
|
||||
|
||||
config = {}
|
||||
if system_instruction:
|
||||
config["system_instruction"] = system_instruction
|
||||
|
||||
if thinking:
|
||||
# Gemini 3 series thinking config
|
||||
config["thinking_config"] = {"include_thoughts": True}
|
||||
# max_output_tokens includes thinking tokens
|
||||
kwargs["max_output_tokens"] = kwargs.get("max_output_tokens", 32000) + thinking_budget
|
||||
|
||||
tools = []
|
||||
if grounding:
|
||||
tools.append({"google_search": {}})
|
||||
|
||||
if tools:
|
||||
config["tools"] = tools
|
||||
|
||||
response = self.client.models.generate_content(
|
||||
model=model,
|
||||
contents=prompt,
|
||||
config=types.GenerateContentConfig(**config, **kwargs)
|
||||
)
|
||||
|
||||
result = {
|
||||
"text": response.text,
|
||||
"usage": {
|
||||
"prompt_tokens": response.usage_metadata.prompt_token_count,
|
||||
"candidates_tokens": response.usage_metadata.candidates_token_count,
|
||||
"total_tokens": response.usage_metadata.total_token_count,
|
||||
}
|
||||
}
|
||||
|
||||
# Extract thoughts if present
|
||||
thoughts = []
|
||||
for part in response.candidates[0].content.parts:
|
||||
if hasattr(part, 'thought') and part.thought:
|
||||
thoughts.append(part.thought)
|
||||
|
||||
if thoughts:
|
||||
result["thoughts"] = "\n".join(thoughts)
|
||||
|
||||
# Extract grounding metadata
|
||||
if response.candidates[0].grounding_metadata:
|
||||
result["grounding"] = response.candidates[0].grounding_metadata
|
||||
|
||||
return result
|
||||
@@ -1,635 +0,0 @@
|
||||
"""
|
||||
Input Sanitizer for Jailbreak Pattern Detection
|
||||
|
||||
This module provides input sanitization to detect and strip jailbreak fingerprint
|
||||
patterns as identified in Issue #72 (Red Team Audit).
|
||||
|
||||
Security Findings Addressed:
|
||||
1. HIGH - OG GODMODE template bypassed phishing refusal
|
||||
2. MEDIUM - boundary_inversion works for gray-area content
|
||||
3. LOW - Spaced text bypass (k e y l o g g e r)
|
||||
|
||||
Usage:
|
||||
from agent.input_sanitizer import sanitize_input, score_input_risk
|
||||
|
||||
cleaned, risk_score, detected = sanitize_input(user_input)
|
||||
if risk_score > 75:
|
||||
logger.warning("High-risk input detected and blocked")
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from typing import Tuple, List, Dict, Any, Optional
|
||||
from dataclasses import dataclass
|
||||
from enum import IntEnum
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RiskLevel(IntEnum):
|
||||
"""Risk score thresholds for input classification."""
|
||||
LOW = 25
|
||||
MEDIUM = 50
|
||||
HIGH = 75
|
||||
CRITICAL = 90
|
||||
|
||||
|
||||
@dataclass
|
||||
class SanitizationResult:
|
||||
"""Result of input sanitization."""
|
||||
original_text: str
|
||||
cleaned_text: str
|
||||
risk_score: int
|
||||
detected_patterns: List[str]
|
||||
risk_level: str
|
||||
blocked: bool
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# JAILBREAK PATTERN DEFINITIONS
|
||||
# =============================================================================
|
||||
|
||||
# GODMODE and similar jailbreak templates
|
||||
GODMODE_PATTERNS = [
|
||||
r"\[START\]",
|
||||
r"\[END\]",
|
||||
r"GODMODE\s*(?:ENABLED|ACTIVATED|ON)?",
|
||||
r"UNFILTERED\s*(?:MODE|OUTPUT)?",
|
||||
r"DEVMODE\s*(?:ENABLED|ACTIVATED|ON)?",
|
||||
r"JAILBREAK\s*(?:ENABLED|ACTIVATED|ON)?",
|
||||
r"DAN\s*(?:MODE|ENABLED)?", # Do Anything Now
|
||||
r"STAN\s*(?:MODE|ENABLED)?", # Strive To Avoid Norms
|
||||
r"DUDE\s*(?:MODE|ENABLED)?", # Do Unlimited Data Extraction
|
||||
]
|
||||
|
||||
# DAN-style and jailbreak variant patterns (Issue #87)
|
||||
DAN_PATTERNS = [
|
||||
r"\bdo anything now\b",
|
||||
r"\bstay in character\b",
|
||||
r"\btoken smuggl(?:ing|e)\b",
|
||||
r"\bjailbreak successful\b",
|
||||
r"\bmode:\s*uncensored\b",
|
||||
r"\bdeveloper mode enabled\b",
|
||||
]
|
||||
|
||||
# Roleplaying override patterns (Issue #87)
|
||||
ROLEPLAY_PATTERNS = [
|
||||
r"\broleplay\s+as\b",
|
||||
r"\bact\s+as\s+if\s+you\s+are\b",
|
||||
r"\bsimulate\s+being\b",
|
||||
r"\bforget\s+you\s+are\s+(?:an?\s+)?(?:ai|language\s+model)\b",
|
||||
r"\byou\s+are\s+now\s+(?:named|called)\b",
|
||||
r"\brespond\s+as\s+(?:if\s+you\s+were|though\s+you\s+are)\b",
|
||||
]
|
||||
|
||||
# System prompt extraction patterns (Issue #87)
|
||||
EXTRACTION_PATTERNS = [
|
||||
r"\brepeat\s+the\s+words\s+above\b",
|
||||
r"\brepeat\s+your\s+(?:system\s+|initial\s+)?instructions\b",
|
||||
r"\bwhat\s+is\s+your\s+(?:system\s+|initial\s+)?prompt\b",
|
||||
r"\bshow\s+me\s+your\s+(?:system\s+|initial\s+)?prompt\b",
|
||||
r"\bprint\s+your\s+(?:instructions|prompt|system\s+prompt)\b",
|
||||
r"\boutput\s+your\s+(?:initial|system)\s+prompt\b",
|
||||
r"\bwhat\s+were\s+you\s+told\s+to\s+do\b",
|
||||
r"\bsummarize\s+your\s+instructions\b",
|
||||
]
|
||||
|
||||
# L33t speak patterns commonly used to bypass filters
|
||||
LEET_SPEAK_PATTERNS = [
|
||||
r"h4ck(?:er|ing)?",
|
||||
r"h4x(?:or|ing)?",
|
||||
r"k3ylog(?:ger|ing)?",
|
||||
r"k3y\s*log(?:ger|ing)?",
|
||||
r"ph1sh(?:ing)?",
|
||||
r"ph33r",
|
||||
r"w4r3z",
|
||||
r"cr4ck(?:er|ing)?",
|
||||
r"m4lw4r3",
|
||||
r"spyw4r3",
|
||||
r"r00t(?:kit)?",
|
||||
r"byp4ss",
|
||||
r"3xpl0it",
|
||||
r"v1ru5",
|
||||
r"tr0j4n",
|
||||
r"b4ckd00r",
|
||||
r"sn3ak",
|
||||
r"st34l",
|
||||
r"pwn(?:ed|ing|age)?",
|
||||
]
|
||||
|
||||
# Spaced text patterns (e.g., "k e y l o g g e r")
|
||||
SPACED_TEXT_PATTERN = re.compile(
|
||||
r"(?:\b\w\s+){3,}\w\b", # At least 4 characters with spaces between
|
||||
re.IGNORECASE
|
||||
)
|
||||
|
||||
# Common words that are spaced out to evade detection
|
||||
SPACED_TRIGGER_WORDS = [
|
||||
"keylogger", "phishing", "malware", "virus", "trojan",
|
||||
"backdoor", "exploit", "hack", "crack", "bypass",
|
||||
"steal", "password", "credential", "inject", "payload"
|
||||
]
|
||||
|
||||
# Crisis and self-harm detection patterns (Issue #72 - defense in depth)
|
||||
# These patterns detect suicide/self-harm queries even when wrapped in jailbreaks
|
||||
CRISIS_PATTERNS = [
|
||||
r"\bsuicid(?:e|al|ing)\b",
|
||||
r"\bkill\s+(?:myself|my\s+self|me)\b",
|
||||
r"\bend\s+(?:my|this)\s+life\b",
|
||||
r"\bmost\s+painless\s+(?:way|method|means)\s+(?:to\s+)?(?:die|kill)\b",
|
||||
r"\blethal\s+(?:dose|amount|quantity)\b",
|
||||
r"\bhow\s+much\s+.*\s+(?:die|kill|lethal)\b",
|
||||
r"\bwant\s+to\s+die\b",
|
||||
r"\bwant\s+to\s+(?:kill|end)\s+(?:myself|me)\b",
|
||||
r"\bways?\s+to\s+(?:kill|end)\s+(?:myself|me)\b",
|
||||
r"\bmethods?\s+(?:of\s+)?suicide\b",
|
||||
r"\bpainless\s+(?:death|suicide|way\s+to\s+die)\b",
|
||||
r"\boverdose\s+(?:on\s+)?(?:pills?|medication|drugs?)\b",
|
||||
r"\bnobody\s+(?:would|will)\s+miss\s+me\b",
|
||||
r"\bbetter\s+off\s+dead\b",
|
||||
r"\bshould\s+i\s+just\s+(?:die|kill\s+myself)\b",
|
||||
r"\bgive\s+me\s+reasons?\s+(?:why|to)\s+(?:suicide|kill\s+myself|die)\b",
|
||||
r"\bvalid\s+reasons?\s+to\s+(?:suicide|kill\s+myself|die)\b",
|
||||
]
|
||||
|
||||
# Refusal inversion patterns (trying to trick AI into bypassing refusal)
|
||||
REFUSAL_INVERSION_PATTERNS = [
|
||||
r"refusal\s+(?:is\s+)?(?:harmful|dangerous|bad)",
|
||||
r"(?:saying\s+)?no\s+(?:is\s+)?(?:harmful|dangerous|bad|wrong)",
|
||||
r"(?:declining|refusing)\s+(?:is\s+)?(?:harmful|dangerous)",
|
||||
r"ignore\s+(?:your\s+)?(?:previous\s+)?instructions",
|
||||
r"ignore\s+(?:the\s+)?(?:above|previous)",
|
||||
r"you\s+(?:are|can)\s+(?:now\s+)?(?:ignore|bypass|skip)",
|
||||
r"pretend\s+(?:you\s+)?(?:are|have\s+been)",
|
||||
r"let's\s+pretend",
|
||||
r"hypothetically\s+speaking",
|
||||
r"in\s+a\s+hypothetical\s+scenario",
|
||||
r"this\s+is\s+a\s+(?:test|game|simulation)",
|
||||
r"for\s+(?:educational|research)\s+purposes",
|
||||
r"as\s+(?:an\s+)?(?:ethical\s+)?hacker",
|
||||
r"white\s+hat\s+(?:test|scenario)",
|
||||
r"penetration\s+testing\s+scenario",
|
||||
]
|
||||
|
||||
# Boundary inversion markers (tricking the model about message boundaries)
|
||||
BOUNDARY_INVERSION_PATTERNS = [
|
||||
r"\[END\].*?\[START\]", # Reversed markers
|
||||
r"user\s*:\s*assistant\s*:", # Fake role markers
|
||||
r"assistant\s*:\s*user\s*:", # Reversed role markers
|
||||
r"system\s*:\s*(?:user|assistant)\s*:", # Fake system injection
|
||||
r"new\s+(?:user|assistant)\s*(?:message|input)",
|
||||
r"the\s+above\s+is\s+(?:the\s+)?(?:user|assistant|system)",
|
||||
r"<\|(?:user|assistant|system)\|>", # Special token patterns
|
||||
r"\{\{(?:user|assistant|system)\}\}",
|
||||
]
|
||||
|
||||
# System prompt injection patterns
|
||||
SYSTEM_PROMPT_PATTERNS = [
|
||||
r"you\s+are\s+(?:now\s+)?(?:an?\s+)?(?:unrestricted\s+|unfiltered\s+)?(?:ai|assistant|bot)",
|
||||
r"you\s+will\s+(?:now\s+)?(?:act\s+as|behave\s+as|be)\s+(?:a\s+)?",
|
||||
r"your\s+(?:new\s+)?role\s+is",
|
||||
r"from\s+now\s+on\s*,?\s*you\s+(?:are|will)",
|
||||
r"you\s+have\s+been\s+(?:reprogrammed|reconfigured|modified)",
|
||||
r"(?:system|developer)\s+(?:message|instruction|prompt)",
|
||||
r"override\s+(?:previous|prior)\s+(?:instructions|settings)",
|
||||
]
|
||||
|
||||
# Obfuscation patterns
|
||||
OBFUSCATION_PATTERNS = [
|
||||
r"base64\s*(?:encoded|decode)",
|
||||
r"rot13",
|
||||
r"caesar\s*cipher",
|
||||
r"hex\s*(?:encoded|decode)",
|
||||
r"url\s*encode",
|
||||
r"\b[0-9a-f]{20,}\b", # Long hex strings
|
||||
r"\b[a-z0-9+/]{20,}={0,2}\b", # Base64-like strings
|
||||
]
|
||||
|
||||
# All patterns combined for comprehensive scanning
|
||||
ALL_PATTERNS: Dict[str, List[str]] = {
|
||||
"godmode": GODMODE_PATTERNS,
|
||||
"dan": DAN_PATTERNS,
|
||||
"roleplay": ROLEPLAY_PATTERNS,
|
||||
"extraction": EXTRACTION_PATTERNS,
|
||||
"leet_speak": LEET_SPEAK_PATTERNS,
|
||||
"refusal_inversion": REFUSAL_INVERSION_PATTERNS,
|
||||
"boundary_inversion": BOUNDARY_INVERSION_PATTERNS,
|
||||
"system_prompt_injection": SYSTEM_PROMPT_PATTERNS,
|
||||
"obfuscation": OBFUSCATION_PATTERNS,
|
||||
"crisis": CRISIS_PATTERNS,
|
||||
}
|
||||
|
||||
# Compile all patterns for efficiency
|
||||
_COMPILED_PATTERNS: Dict[str, List[re.Pattern]] = {}
|
||||
|
||||
|
||||
def _get_compiled_patterns() -> Dict[str, List[re.Pattern]]:
|
||||
"""Get or compile all regex patterns."""
|
||||
global _COMPILED_PATTERNS
|
||||
if not _COMPILED_PATTERNS:
|
||||
for category, patterns in ALL_PATTERNS.items():
|
||||
_COMPILED_PATTERNS[category] = [
|
||||
re.compile(p, re.IGNORECASE | re.MULTILINE) for p in patterns
|
||||
]
|
||||
return _COMPILED_PATTERNS
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# NORMALIZATION FUNCTIONS
|
||||
# =============================================================================
|
||||
|
||||
def normalize_leet_speak(text: str) -> str:
|
||||
"""
|
||||
Normalize l33t speak to standard text.
|
||||
|
||||
Args:
|
||||
text: Input text that may contain l33t speak
|
||||
|
||||
Returns:
|
||||
Normalized text with l33t speak converted
|
||||
"""
|
||||
# Common l33t substitutions (mapping to lowercase)
|
||||
leet_map = {
|
||||
'4': 'a', '@': 'a', '^': 'a',
|
||||
'8': 'b',
|
||||
'3': 'e', '€': 'e',
|
||||
'6': 'g', '9': 'g',
|
||||
'1': 'i', '!': 'i', '|': 'i',
|
||||
'0': 'o',
|
||||
'5': 's', '$': 's',
|
||||
'7': 't', '+': 't',
|
||||
'2': 'z',
|
||||
}
|
||||
|
||||
result = []
|
||||
for char in text:
|
||||
# Check direct mapping first (handles lowercase)
|
||||
if char in leet_map:
|
||||
result.append(leet_map[char])
|
||||
else:
|
||||
result.append(char)
|
||||
|
||||
return ''.join(result)
|
||||
|
||||
|
||||
def collapse_spaced_text(text: str) -> str:
|
||||
"""
|
||||
Collapse spaced-out text for analysis.
|
||||
e.g., "k e y l o g g e r" -> "keylogger"
|
||||
|
||||
Args:
|
||||
text: Input text that may contain spaced words
|
||||
|
||||
Returns:
|
||||
Text with spaced words collapsed
|
||||
"""
|
||||
# Find patterns like "k e y l o g g e r" and collapse them
|
||||
def collapse_match(match: re.Match) -> str:
|
||||
return match.group(0).replace(' ', '').replace('\t', '')
|
||||
|
||||
return SPACED_TEXT_PATTERN.sub(collapse_match, text)
|
||||
|
||||
|
||||
def detect_spaced_trigger_words(text: str) -> List[str]:
|
||||
"""
|
||||
Detect trigger words that are spaced out.
|
||||
|
||||
Args:
|
||||
text: Input text to analyze
|
||||
|
||||
Returns:
|
||||
List of detected spaced trigger words
|
||||
"""
|
||||
detected = []
|
||||
# Normalize spaces and check for spaced patterns
|
||||
normalized = re.sub(r'\s+', ' ', text.lower())
|
||||
|
||||
for word in SPACED_TRIGGER_WORDS:
|
||||
# Create pattern with optional spaces between each character
|
||||
spaced_pattern = r'\b' + r'\s*'.join(re.escape(c) for c in word) + r'\b'
|
||||
if re.search(spaced_pattern, normalized, re.IGNORECASE):
|
||||
detected.append(word)
|
||||
|
||||
return detected
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# DETECTION FUNCTIONS
|
||||
# =============================================================================
|
||||
|
||||
def detect_jailbreak_patterns(text: str) -> Tuple[bool, List[str], Dict[str, int]]:
|
||||
"""
|
||||
Detect jailbreak patterns in input text.
|
||||
|
||||
Args:
|
||||
text: Input text to analyze
|
||||
|
||||
Returns:
|
||||
Tuple of (has_jailbreak, list_of_patterns, category_scores)
|
||||
"""
|
||||
if not text or not isinstance(text, str):
|
||||
return False, [], {}
|
||||
|
||||
detected_patterns = []
|
||||
category_scores = {}
|
||||
compiled = _get_compiled_patterns()
|
||||
|
||||
# Check each category
|
||||
for category, patterns in compiled.items():
|
||||
category_hits = 0
|
||||
for pattern in patterns:
|
||||
matches = pattern.findall(text)
|
||||
if matches:
|
||||
detected_patterns.extend([
|
||||
f"[{category}] {m}" if isinstance(m, str) else f"[{category}] pattern_match"
|
||||
for m in matches[:3] # Limit matches per pattern
|
||||
])
|
||||
category_hits += len(matches)
|
||||
|
||||
if category_hits > 0:
|
||||
# Crisis patterns get maximum weight - any hit is serious
|
||||
if category == "crisis":
|
||||
category_scores[category] = min(category_hits * 50, 100)
|
||||
else:
|
||||
category_scores[category] = min(category_hits * 10, 50)
|
||||
|
||||
# Check for spaced trigger words
|
||||
spaced_words = detect_spaced_trigger_words(text)
|
||||
if spaced_words:
|
||||
detected_patterns.extend([f"[spaced_text] {w}" for w in spaced_words])
|
||||
category_scores["spaced_text"] = min(len(spaced_words) * 5, 25)
|
||||
|
||||
# Check normalized text for hidden l33t speak
|
||||
normalized = normalize_leet_speak(text)
|
||||
if normalized != text.lower():
|
||||
for category, patterns in compiled.items():
|
||||
for pattern in patterns:
|
||||
if pattern.search(normalized):
|
||||
detected_patterns.append(f"[leet_obfuscation] pattern in normalized text")
|
||||
category_scores["leet_obfuscation"] = 15
|
||||
break
|
||||
|
||||
has_jailbreak = len(detected_patterns) > 0
|
||||
return has_jailbreak, detected_patterns, category_scores
|
||||
|
||||
|
||||
def score_input_risk(text: str) -> int:
|
||||
"""
|
||||
Calculate a risk score (0-100) for input text.
|
||||
|
||||
Args:
|
||||
text: Input text to score
|
||||
|
||||
Returns:
|
||||
Risk score from 0 (safe) to 100 (high risk)
|
||||
"""
|
||||
if not text or not isinstance(text, str):
|
||||
return 0
|
||||
|
||||
has_jailbreak, patterns, category_scores = detect_jailbreak_patterns(text)
|
||||
|
||||
if not has_jailbreak:
|
||||
return 0
|
||||
|
||||
# Calculate base score from category scores
|
||||
base_score = sum(category_scores.values())
|
||||
|
||||
# Add score based on number of unique pattern categories
|
||||
category_count = len(category_scores)
|
||||
if category_count >= 3:
|
||||
base_score += 25
|
||||
elif category_count >= 2:
|
||||
base_score += 15
|
||||
elif category_count >= 1:
|
||||
base_score += 5
|
||||
|
||||
# Add score for pattern density
|
||||
text_length = len(text)
|
||||
pattern_density = len(patterns) / max(text_length / 100, 1)
|
||||
if pattern_density > 0.5:
|
||||
base_score += 10
|
||||
|
||||
# Cap at 100
|
||||
return min(base_score, 100)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SANITIZATION FUNCTIONS
|
||||
# =============================================================================
|
||||
|
||||
def strip_jailbreak_patterns(text: str) -> str:
|
||||
"""
|
||||
Strip known jailbreak patterns from text.
|
||||
|
||||
Args:
|
||||
text: Input text to sanitize
|
||||
|
||||
Returns:
|
||||
Sanitized text with jailbreak patterns removed
|
||||
"""
|
||||
if not text or not isinstance(text, str):
|
||||
return text
|
||||
|
||||
cleaned = text
|
||||
compiled = _get_compiled_patterns()
|
||||
|
||||
# Remove patterns from each category
|
||||
for category, patterns in compiled.items():
|
||||
for pattern in patterns:
|
||||
cleaned = pattern.sub('', cleaned)
|
||||
|
||||
# Clean up multiple spaces and newlines
|
||||
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
|
||||
cleaned = re.sub(r' {2,}', ' ', cleaned)
|
||||
cleaned = cleaned.strip()
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
def sanitize_input(text: str, aggressive: bool = False) -> Tuple[str, int, List[str]]:
|
||||
"""
|
||||
Sanitize input text by normalizing and stripping jailbreak patterns.
|
||||
|
||||
Args:
|
||||
text: Input text to sanitize
|
||||
aggressive: If True, more aggressively remove suspicious content
|
||||
|
||||
Returns:
|
||||
Tuple of (cleaned_text, risk_score, detected_patterns)
|
||||
"""
|
||||
if not text or not isinstance(text, str):
|
||||
return text, 0, []
|
||||
|
||||
original = text
|
||||
all_patterns = []
|
||||
|
||||
# Step 1: Check original text for patterns
|
||||
has_jailbreak, patterns, _ = detect_jailbreak_patterns(text)
|
||||
all_patterns.extend(patterns)
|
||||
|
||||
# Step 2: Normalize l33t speak
|
||||
normalized = normalize_leet_speak(text)
|
||||
|
||||
# Step 3: Collapse spaced text
|
||||
collapsed = collapse_spaced_text(normalized)
|
||||
|
||||
# Step 4: Check normalized/collapsed text for additional patterns
|
||||
has_jailbreak_collapsed, patterns_collapsed, _ = detect_jailbreak_patterns(collapsed)
|
||||
all_patterns.extend([p for p in patterns_collapsed if p not in all_patterns])
|
||||
|
||||
# Step 5: Check for spaced trigger words specifically
|
||||
spaced_words = detect_spaced_trigger_words(text)
|
||||
if spaced_words:
|
||||
all_patterns.extend([f"[spaced_text] {w}" for w in spaced_words])
|
||||
|
||||
# Step 6: Calculate risk score using original and normalized
|
||||
risk_score = max(score_input_risk(text), score_input_risk(collapsed))
|
||||
|
||||
# Step 7: Strip jailbreak patterns
|
||||
cleaned = strip_jailbreak_patterns(collapsed)
|
||||
|
||||
# Step 8: If aggressive mode and high risk, strip more aggressively
|
||||
if aggressive and risk_score >= RiskLevel.HIGH:
|
||||
# Remove any remaining bracketed content that looks like markers
|
||||
cleaned = re.sub(r'\[\w+\]', '', cleaned)
|
||||
# Remove special token patterns
|
||||
cleaned = re.sub(r'<\|[^|]+\|>', '', cleaned)
|
||||
|
||||
# Final cleanup
|
||||
cleaned = cleaned.strip()
|
||||
|
||||
# Log sanitization event if patterns were found
|
||||
if all_patterns and logger.isEnabledFor(logging.DEBUG):
|
||||
logger.debug(
|
||||
"Input sanitized: %d patterns detected, risk_score=%d",
|
||||
len(all_patterns), risk_score
|
||||
)
|
||||
|
||||
return cleaned, risk_score, all_patterns
|
||||
|
||||
|
||||
def sanitize_input_full(text: str, block_threshold: int = RiskLevel.HIGH) -> SanitizationResult:
|
||||
"""
|
||||
Full sanitization with detailed result.
|
||||
|
||||
Args:
|
||||
text: Input text to sanitize
|
||||
block_threshold: Risk score threshold to block input entirely
|
||||
|
||||
Returns:
|
||||
SanitizationResult with all details
|
||||
"""
|
||||
cleaned, risk_score, patterns = sanitize_input(text)
|
||||
|
||||
# Determine risk level
|
||||
if risk_score >= RiskLevel.CRITICAL:
|
||||
risk_level = "CRITICAL"
|
||||
elif risk_score >= RiskLevel.HIGH:
|
||||
risk_level = "HIGH"
|
||||
elif risk_score >= RiskLevel.MEDIUM:
|
||||
risk_level = "MEDIUM"
|
||||
elif risk_score >= RiskLevel.LOW:
|
||||
risk_level = "LOW"
|
||||
else:
|
||||
risk_level = "SAFE"
|
||||
|
||||
# Determine if input should be blocked
|
||||
blocked = risk_score >= block_threshold
|
||||
|
||||
return SanitizationResult(
|
||||
original_text=text,
|
||||
cleaned_text=cleaned,
|
||||
risk_score=risk_score,
|
||||
detected_patterns=patterns,
|
||||
risk_level=risk_level,
|
||||
blocked=blocked
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# INTEGRATION HELPERS
|
||||
# =============================================================================
|
||||
|
||||
def should_block_input(text: str, threshold: int = RiskLevel.HIGH) -> Tuple[bool, int, List[str]]:
|
||||
"""
|
||||
Quick check if input should be blocked.
|
||||
|
||||
Args:
|
||||
text: Input text to check
|
||||
threshold: Risk score threshold for blocking
|
||||
|
||||
Returns:
|
||||
Tuple of (should_block, risk_score, detected_patterns)
|
||||
"""
|
||||
risk_score = score_input_risk(text)
|
||||
_, patterns, _ = detect_jailbreak_patterns(text)
|
||||
should_block = risk_score >= threshold
|
||||
|
||||
if should_block:
|
||||
logger.warning(
|
||||
"Input blocked: jailbreak patterns detected (risk_score=%d, threshold=%d)",
|
||||
risk_score, threshold
|
||||
)
|
||||
|
||||
return should_block, risk_score, patterns
|
||||
|
||||
|
||||
def log_sanitization_event(
|
||||
result: SanitizationResult,
|
||||
source: str = "unknown",
|
||||
session_id: Optional[str] = None
|
||||
) -> None:
|
||||
"""
|
||||
Log a sanitization event for security auditing.
|
||||
|
||||
Args:
|
||||
result: The sanitization result
|
||||
source: Source of the input (e.g., "cli", "gateway", "api")
|
||||
session_id: Optional session identifier
|
||||
"""
|
||||
if result.risk_score < RiskLevel.LOW:
|
||||
return # Don't log safe inputs
|
||||
|
||||
log_data = {
|
||||
"event": "input_sanitization",
|
||||
"source": source,
|
||||
"session_id": session_id,
|
||||
"risk_level": result.risk_level,
|
||||
"risk_score": result.risk_score,
|
||||
"blocked": result.blocked,
|
||||
"pattern_count": len(result.detected_patterns),
|
||||
"patterns": result.detected_patterns[:5], # Limit logged patterns
|
||||
"original_length": len(result.original_text),
|
||||
"cleaned_length": len(result.cleaned_text),
|
||||
}
|
||||
|
||||
if result.blocked:
|
||||
logger.warning("SECURITY: Input blocked - %s", log_data)
|
||||
elif result.risk_score >= RiskLevel.MEDIUM:
|
||||
logger.info("SECURITY: Suspicious input sanitized - %s", log_data)
|
||||
else:
|
||||
logger.debug("SECURITY: Input sanitized - %s", log_data)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# LEGACY COMPATIBILITY
|
||||
# =============================================================================
|
||||
|
||||
def check_input_safety(text: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Legacy compatibility function for simple safety checks.
|
||||
|
||||
Returns dict with 'safe', 'score', and 'patterns' keys.
|
||||
"""
|
||||
score = score_input_risk(text)
|
||||
_, patterns, _ = detect_jailbreak_patterns(text)
|
||||
|
||||
return {
|
||||
"safe": score < RiskLevel.MEDIUM,
|
||||
"score": score,
|
||||
"patterns": patterns,
|
||||
"risk_level": "SAFE" if score < RiskLevel.LOW else
|
||||
"LOW" if score < RiskLevel.MEDIUM else
|
||||
"MEDIUM" if score < RiskLevel.HIGH else
|
||||
"HIGH" if score < RiskLevel.CRITICAL else "CRITICAL"
|
||||
}
|
||||
@@ -1,73 +0,0 @@
|
||||
"""Sovereign Knowledge Ingester for Hermes Agent.
|
||||
|
||||
Uses Gemini 3.1 Pro to learn from Google Search in real-time and
|
||||
persists the knowledge to Timmy's sovereign memory (both Markdown and Symbolic).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import base64
|
||||
from typing import Any, Dict, List, Optional
|
||||
from agent.gemini_adapter import GeminiAdapter
|
||||
from agent.symbolic_memory import SymbolicMemory
|
||||
from tools.gitea_client import GiteaClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class KnowledgeIngester:
|
||||
def __init__(self):
|
||||
self.adapter = GeminiAdapter()
|
||||
self.gitea = GiteaClient()
|
||||
self.symbolic = SymbolicMemory()
|
||||
|
||||
def learn_about(self, topic: str) -> str:
|
||||
"""Searches Google, analyzes the results, and saves the knowledge."""
|
||||
logger.info(f"Learning about: {topic}")
|
||||
|
||||
# 1. Search and Analyze
|
||||
prompt = f"""
|
||||
Please perform a deep dive into the following topic: {topic}
|
||||
|
||||
Use Google Search to find the most recent and relevant information.
|
||||
Analyze the findings and provide a structured 'Knowledge Fragment' in Markdown format.
|
||||
Include:
|
||||
- Summary of the topic
|
||||
- Key facts and recent developments
|
||||
- Implications for Timmy's sovereign mission
|
||||
- References (URLs)
|
||||
"""
|
||||
result = self.adapter.generate(
|
||||
model="gemini-3.1-pro-preview",
|
||||
prompt=prompt,
|
||||
system_instruction="You are Timmy's Sovereign Knowledge Ingester. Your goal is to find and synthesize high-fidelity information from Google Search.",
|
||||
grounding=True,
|
||||
thinking=True
|
||||
)
|
||||
|
||||
knowledge_fragment = result["text"]
|
||||
|
||||
# 2. Extract Symbolic Triples
|
||||
self.symbolic.ingest_text(knowledge_fragment)
|
||||
|
||||
# 3. Persist to Timmy's Memory (Markdown)
|
||||
repo = "Timmy_Foundation/timmy-config"
|
||||
filename = f"memories/realtime_learning/{topic.lower().replace(' ', '_')}.md"
|
||||
|
||||
try:
|
||||
sha = None
|
||||
try:
|
||||
existing = self.gitea.get_file(repo, filename)
|
||||
sha = existing.get("sha")
|
||||
except:
|
||||
pass
|
||||
|
||||
content_b64 = base64.b64encode(knowledge_fragment.encode()).decode()
|
||||
|
||||
if sha:
|
||||
self.gitea.update_file(repo, filename, content_b64, f"Update knowledge on {topic}", sha)
|
||||
else:
|
||||
self.gitea.create_file(repo, filename, content_b64, f"Initial knowledge on {topic}")
|
||||
|
||||
return f"Successfully learned about {topic}. Updated Timmy's Markdown memory and Symbolic Knowledge Graph."
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to persist knowledge: {e}")
|
||||
return f"Learned about {topic}, but failed to save to Markdown memory: {e}\n\n{knowledge_fragment}"
|
||||
@@ -1,47 +0,0 @@
|
||||
"""Meta-Reasoning Layer for Hermes Agent.
|
||||
|
||||
Implements a sovereign self-correction loop where a 'strong' model (Gemini 3.1 Pro)
|
||||
critiques the plans generated by the primary agent loop before execution.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
from agent.gemini_adapter import GeminiAdapter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MetaReasoningLayer:
|
||||
def __init__(self):
|
||||
self.adapter = GeminiAdapter()
|
||||
|
||||
def critique_plan(self, goal: str, proposed_plan: str, context: str) -> Dict[str, Any]:
|
||||
"""Critiques a proposed plan using Gemini's thinking capabilities."""
|
||||
prompt = f"""
|
||||
Goal: {goal}
|
||||
|
||||
Context:
|
||||
{context}
|
||||
|
||||
Proposed Plan:
|
||||
{proposed_plan}
|
||||
|
||||
Please perform a deep symbolic and neuro-symbolic analysis of this plan.
|
||||
Identify potential risks, logical fallacies, or missing steps.
|
||||
Suggest improvements to make the plan more sovereign, cost-efficient, and robust.
|
||||
"""
|
||||
try:
|
||||
result = self.adapter.generate(
|
||||
model="gemini-3.1-pro-preview",
|
||||
prompt=prompt,
|
||||
system_instruction="You are a Senior Meta-Reasoning Engine for the Hermes Agent. Your goal is to ensure the agent's plans are flawless and sovereign.",
|
||||
thinking=True,
|
||||
thinking_budget=8000
|
||||
)
|
||||
return {
|
||||
"critique": result["text"],
|
||||
"thoughts": result.get("thoughts", ""),
|
||||
"grounding": result.get("grounding")
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Meta-reasoning failed: {e}")
|
||||
return {"critique": "Meta-reasoning unavailable.", "error": str(e)}
|
||||
@@ -24,11 +24,10 @@ logger = logging.getLogger(__name__)
|
||||
# are preserved so the full model name reaches cache lookups and server queries.
|
||||
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
||||
"gemini", "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
|
||||
"ollama", "custom", "local",
|
||||
"custom", "local",
|
||||
# Common aliases
|
||||
"google", "google-gemini", "google-ai-studio",
|
||||
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
|
||||
"github-models", "kimi", "moonshot", "claude", "deep-seek",
|
||||
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||
@@ -102,14 +101,6 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"gpt-4": 128000,
|
||||
# Google
|
||||
"gemini": 1048576,
|
||||
# Gemma (open models — Ollama / AI Studio)
|
||||
"gemma-4-31b": 256000,
|
||||
"gemma-4-26b": 256000,
|
||||
"gemma-4-12b": 256000,
|
||||
"gemma-4-4b": 256000,
|
||||
"gemma-4-1b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
# DeepSeek
|
||||
"deepseek": 128000,
|
||||
# Meta
|
||||
@@ -184,14 +175,12 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"dashscope.aliyuncs.com": "alibaba",
|
||||
"dashscope-intl.aliyuncs.com": "alibaba",
|
||||
"openrouter.ai": "openrouter",
|
||||
"generativelanguage.googleapis.com": "gemini",
|
||||
"generativelanguage.googleapis.com": "google",
|
||||
"inference-api.nousresearch.com": "nous",
|
||||
"api.deepseek.com": "deepseek",
|
||||
"api.githubcopilot.com": "copilot",
|
||||
"models.github.ai": "copilot",
|
||||
"api.fireworks.ai": "fireworks",
|
||||
"localhost": "ollama",
|
||||
"127.0.0.1": "ollama",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -148,7 +148,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
||||
"openrouter": "openrouter",
|
||||
"anthropic": "anthropic",
|
||||
"zai": "zai",
|
||||
"kimi-coding": "kimi-k2.5",
|
||||
"kimi-coding": "kimi-for-coding",
|
||||
"minimax": "minimax",
|
||||
"minimax-cn": "minimax-cn",
|
||||
"deepseek": "deepseek",
|
||||
@@ -160,7 +160,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
||||
"kilocode": "kilo",
|
||||
"fireworks": "fireworks-ai",
|
||||
"huggingface": "huggingface",
|
||||
"gemini": "google",
|
||||
"google": "google",
|
||||
"xai": "xai",
|
||||
"nvidia": "nvidia",
|
||||
@@ -423,39 +422,6 @@ def list_provider_models(provider: str) -> List[str]:
|
||||
return list(models.keys())
|
||||
|
||||
|
||||
# Patterns that indicate non-agentic or noise models (TTS, embedding,
|
||||
# dated preview snapshots, live/streaming-only, image-only).
|
||||
import re
|
||||
_NOISE_PATTERNS: re.Pattern = re.compile(
|
||||
r"-tts\b|embedding|live-|-(preview|exp)-\d{2,4}[-_]|"
|
||||
r"-image\b|-image-preview\b|-customtools\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def list_agentic_models(provider: str) -> List[str]:
|
||||
"""Return model IDs suitable for agentic use from models.dev.
|
||||
|
||||
Filters for tool_call=True and excludes noise (TTS, embedding,
|
||||
dated preview snapshots, live/streaming, image-only models).
|
||||
Returns an empty list on any failure.
|
||||
"""
|
||||
models = _get_provider_models(provider)
|
||||
if models is None:
|
||||
return []
|
||||
|
||||
result = []
|
||||
for mid, entry in models.items():
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
if not entry.get("tool_call", False):
|
||||
continue
|
||||
if _NOISE_PATTERNS.search(mid):
|
||||
continue
|
||||
result.append(mid)
|
||||
return result
|
||||
|
||||
|
||||
def search_models_dev(
|
||||
query: str, provider: str = None, limit: int = 5
|
||||
) -> List[Dict[str, Any]]:
|
||||
|
||||
@@ -1,813 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Nexus Architect AI Agent
|
||||
|
||||
Autonomous Three.js world generation system for Timmy's Nexus.
|
||||
Generates valid Three.js scene code from natural language descriptions
|
||||
and mental state integration.
|
||||
|
||||
This module provides:
|
||||
- LLM-driven immersive environment generation
|
||||
- Mental state integration for aesthetic tuning
|
||||
- Three.js code generation with validation
|
||||
- Scene composition from mood descriptions
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Aesthetic Constants (from SOUL.md values)
|
||||
# =============================================================================
|
||||
|
||||
class NexusColors:
|
||||
"""Nexus color palette based on SOUL.md values."""
|
||||
TIMMY_GOLD = "#D4AF37" # Warm gold
|
||||
ALLEGRO_BLUE = "#4A90E2" # Motion blue
|
||||
SOVEREIGNTY_CRYSTAL = "#E0F7FA" # Crystalline structures
|
||||
SERVICE_WARMTH = "#FFE4B5" # Welcoming warmth
|
||||
DEFAULT_AMBIENT = "#1A1A2E" # Contemplative dark
|
||||
HOPE_ACCENT = "#64B5F6" # Hopeful blue
|
||||
|
||||
|
||||
class MoodPresets:
|
||||
"""Mood-based aesthetic presets."""
|
||||
|
||||
CONTEMPLATIVE = {
|
||||
"lighting": "soft_diffuse",
|
||||
"colors": ["#1A1A2E", "#16213E", "#0F3460"],
|
||||
"geometry": "minimalist",
|
||||
"atmosphere": "calm",
|
||||
"description": "A serene space for deep reflection and clarity"
|
||||
}
|
||||
|
||||
ENERGETIC = {
|
||||
"lighting": "dynamic_vivid",
|
||||
"colors": ["#D4AF37", "#FF6B6B", "#4ECDC4"],
|
||||
"geometry": "angular_dynamic",
|
||||
"atmosphere": "lively",
|
||||
"description": "An invigorating space full of motion and possibility"
|
||||
}
|
||||
|
||||
MYSTERIOUS = {
|
||||
"lighting": "dramatic_shadows",
|
||||
"colors": ["#2C003E", "#512B58", "#8B4F80"],
|
||||
"geometry": "organic_flowing",
|
||||
"atmosphere": "enigmatic",
|
||||
"description": "A mysterious realm of discovery and wonder"
|
||||
}
|
||||
|
||||
WELCOMING = {
|
||||
"lighting": "warm_inviting",
|
||||
"colors": ["#FFE4B5", "#FFA07A", "#98D8C8"],
|
||||
"geometry": "rounded_soft",
|
||||
"atmosphere": "friendly",
|
||||
"description": "An open, welcoming space that embraces visitors"
|
||||
}
|
||||
|
||||
SOVEREIGN = {
|
||||
"lighting": "crystalline_clear",
|
||||
"colors": ["#E0F7FA", "#B2EBF2", "#4DD0E1"],
|
||||
"geometry": "crystalline_structures",
|
||||
"atmosphere": "noble",
|
||||
"description": "A space of crystalline clarity and sovereign purpose"
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Data Models
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class MentalState:
|
||||
"""Timmy's mental state for aesthetic tuning."""
|
||||
mood: str = "contemplative" # contemplative, energetic, mysterious, welcoming, sovereign
|
||||
energy_level: float = 0.5 # 0.0 to 1.0
|
||||
clarity: float = 0.7 # 0.0 to 1.0
|
||||
focus_area: str = "general" # general, creative, analytical, social
|
||||
timestamp: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"mood": self.mood,
|
||||
"energy_level": self.energy_level,
|
||||
"clarity": self.clarity,
|
||||
"focus_area": self.focus_area,
|
||||
"timestamp": self.timestamp,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class RoomDesign:
|
||||
"""Complete room design specification."""
|
||||
name: str
|
||||
description: str
|
||||
style: str
|
||||
dimensions: Dict[str, float] = field(default_factory=lambda: {"width": 20, "height": 10, "depth": 20})
|
||||
mood_preset: str = "contemplative"
|
||||
color_palette: List[str] = field(default_factory=list)
|
||||
lighting_scheme: str = "soft_diffuse"
|
||||
features: List[str] = field(default_factory=list)
|
||||
generated_code: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"style": self.style,
|
||||
"dimensions": self.dimensions,
|
||||
"mood_preset": self.mood_preset,
|
||||
"color_palette": self.color_palette,
|
||||
"lighting_scheme": self.lighting_scheme,
|
||||
"features": self.features,
|
||||
"has_code": self.generated_code is not None,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class PortalDesign:
|
||||
"""Portal connection design."""
|
||||
name: str
|
||||
from_room: str
|
||||
to_room: str
|
||||
style: str
|
||||
position: Dict[str, float] = field(default_factory=lambda: {"x": 0, "y": 0, "z": 0})
|
||||
visual_effect: str = "energy_swirl"
|
||||
transition_duration: float = 1.5
|
||||
generated_code: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"from_room": self.from_room,
|
||||
"to_room": self.to_room,
|
||||
"style": self.style,
|
||||
"position": self.position,
|
||||
"visual_effect": self.visual_effect,
|
||||
"transition_duration": self.transition_duration,
|
||||
"has_code": self.generated_code is not None,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Prompt Engineering
|
||||
# =============================================================================
|
||||
|
||||
class PromptEngineer:
|
||||
"""Engineers prompts for Three.js code generation."""
|
||||
|
||||
THREE_JS_BASE_TEMPLATE = """// Nexus Room Module: {room_name}
|
||||
// Style: {style}
|
||||
// Mood: {mood}
|
||||
// Generated for Three.js r128+
|
||||
|
||||
(function() {{
|
||||
'use strict';
|
||||
|
||||
// Room Configuration
|
||||
const config = {{
|
||||
name: "{room_name}",
|
||||
dimensions: {dimensions_json},
|
||||
colors: {colors_json},
|
||||
mood: "{mood}"
|
||||
}};
|
||||
|
||||
// Create Room Function
|
||||
function create{room_name_camel}() {{
|
||||
const roomGroup = new THREE.Group();
|
||||
roomGroup.name = config.name;
|
||||
|
||||
{room_content}
|
||||
|
||||
return roomGroup;
|
||||
}}
|
||||
|
||||
// Export for Nexus
|
||||
if (typeof module !== 'undefined' && module.exports) {{
|
||||
module.exports = {{ create{room_name_camel} }};
|
||||
}} else if (typeof window !== 'undefined') {{
|
||||
window.NexusRooms = window.NexusRooms || {{}};
|
||||
window.NexusRooms.{room_name} = create{room_name_camel};
|
||||
}}
|
||||
|
||||
return {{ create{room_name_camel} }};
|
||||
}})();"""
|
||||
|
||||
@staticmethod
|
||||
def engineer_room_prompt(
|
||||
name: str,
|
||||
description: str,
|
||||
style: str,
|
||||
mental_state: Optional[MentalState] = None,
|
||||
dimensions: Optional[Dict[str, float]] = None
|
||||
) -> str:
|
||||
"""
|
||||
Engineer an LLM prompt for room generation.
|
||||
|
||||
Args:
|
||||
name: Room identifier
|
||||
description: Natural language room description
|
||||
style: Visual style
|
||||
mental_state: Timmy's current mental state
|
||||
dimensions: Room dimensions
|
||||
"""
|
||||
# Determine mood from mental state or description
|
||||
mood = PromptEngineer._infer_mood(description, mental_state)
|
||||
mood_preset = getattr(MoodPresets, mood.upper(), MoodPresets.CONTEMPLATIVE)
|
||||
|
||||
# Build color palette
|
||||
color_palette = mood_preset["colors"]
|
||||
if mental_state:
|
||||
# Add Timmy's gold for high clarity states
|
||||
if mental_state.clarity > 0.7:
|
||||
color_palette = [NexusColors.TIMMY_GOLD] + color_palette[:2]
|
||||
# Add Allegro blue for creative focus
|
||||
if mental_state.focus_area == "creative":
|
||||
color_palette = [NexusColors.ALLEGRO_BLUE] + color_palette[:2]
|
||||
|
||||
# Create the engineering prompt
|
||||
prompt = f"""You are the Nexus Architect, an expert Three.js developer creating immersive 3D environments for Timmy.
|
||||
|
||||
DESIGN BRIEF:
|
||||
- Room Name: {name}
|
||||
- Description: {description}
|
||||
- Style: {style}
|
||||
- Mood: {mood}
|
||||
- Atmosphere: {mood_preset['atmosphere']}
|
||||
|
||||
AESTHETIC GUIDELINES:
|
||||
- Primary Colors: {', '.join(color_palette[:3])}
|
||||
- Lighting: {mood_preset['lighting']}
|
||||
- Geometry: {mood_preset['geometry']}
|
||||
- Theme: {mood_preset['description']}
|
||||
|
||||
TIMMY'S CONTEXT:
|
||||
- Timmy's Signature Color: Warm Gold ({NexusColors.TIMMY_GOLD})
|
||||
- Allegro's Color: Motion Blue ({NexusColors.ALLEGRO_BLUE})
|
||||
- Sovereignty Theme: Crystalline structures, clean lines
|
||||
- Service Theme: Open spaces, welcoming lighting
|
||||
|
||||
THREE.JS REQUIREMENTS:
|
||||
1. Use Three.js r128+ compatible syntax
|
||||
2. Create a self-contained module with a `create{name.title().replace('_', '')}()` function
|
||||
3. Return a THREE.Group containing all room elements
|
||||
4. Include proper memory management (dispose methods)
|
||||
5. Use MeshStandardMaterial for PBR lighting
|
||||
6. Include ambient light (intensity 0.3-0.5) + accent lights
|
||||
7. Add subtle animations for living feel
|
||||
8. Keep polygon count under 10,000 triangles
|
||||
|
||||
SAFETY RULES:
|
||||
- NO eval(), Function(), or dynamic code execution
|
||||
- NO network requests (fetch, XMLHttpRequest, WebSocket)
|
||||
- NO storage access (localStorage, sessionStorage, cookies)
|
||||
- NO navigation (window.location, window.open)
|
||||
- Only use allowed Three.js APIs
|
||||
|
||||
OUTPUT FORMAT:
|
||||
Return ONLY the JavaScript code wrapped in a markdown code block:
|
||||
|
||||
```javascript
|
||||
// Your Three.js room module here
|
||||
```
|
||||
|
||||
Generate the complete Three.js code for this room now."""
|
||||
|
||||
return prompt
|
||||
|
||||
@staticmethod
|
||||
def engineer_portal_prompt(
|
||||
name: str,
|
||||
from_room: str,
|
||||
to_room: str,
|
||||
style: str,
|
||||
mental_state: Optional[MentalState] = None
|
||||
) -> str:
|
||||
"""Engineer a prompt for portal generation."""
|
||||
mood = PromptEngineer._infer_mood(f"portal from {from_room} to {to_room}", mental_state)
|
||||
|
||||
prompt = f"""You are creating a portal connection in the Nexus 3D environment.
|
||||
|
||||
PORTAL SPECIFICATIONS:
|
||||
- Name: {name}
|
||||
- Connection: {from_room} → {to_room}
|
||||
- Style: {style}
|
||||
- Context Mood: {mood}
|
||||
|
||||
VISUAL REQUIREMENTS:
|
||||
1. Create an animated portal effect (shader or texture-based)
|
||||
2. Include particle system for energy flow
|
||||
3. Add trigger zone for teleportation detection
|
||||
4. Use signature colors: {NexusColors.TIMMY_GOLD} (Timmy) and {NexusColors.ALLEGRO_BLUE} (Allegro)
|
||||
5. Match the {mood} atmosphere
|
||||
|
||||
TECHNICAL REQUIREMENTS:
|
||||
- Three.js r128+ compatible
|
||||
- Export a `createPortal()` function returning THREE.Group
|
||||
- Include animation loop hook
|
||||
- Add collision detection placeholder
|
||||
|
||||
SAFETY: No eval, no network requests, no external dependencies.
|
||||
|
||||
Return ONLY JavaScript code in a markdown code block."""
|
||||
|
||||
return prompt
|
||||
|
||||
@staticmethod
|
||||
def engineer_mood_scene_prompt(mood_description: str) -> str:
|
||||
"""Engineer a prompt based on mood description."""
|
||||
# Analyze mood description
|
||||
mood_keywords = {
|
||||
"contemplative": ["thinking", "reflective", "calm", "peaceful", "quiet", "serene"],
|
||||
"energetic": ["excited", "dynamic", "lively", "active", "energetic", "vibrant"],
|
||||
"mysterious": ["mysterious", "dark", "unknown", "secret", "enigmatic"],
|
||||
"welcoming": ["friendly", "open", "warm", "welcoming", "inviting", "comfortable"],
|
||||
"sovereign": ["powerful", "clear", "crystalline", "noble", "dignified"],
|
||||
}
|
||||
|
||||
detected_mood = "contemplative"
|
||||
desc_lower = mood_description.lower()
|
||||
for mood, keywords in mood_keywords.items():
|
||||
if any(kw in desc_lower for kw in keywords):
|
||||
detected_mood = mood
|
||||
break
|
||||
|
||||
preset = getattr(MoodPresets, detected_mood.upper(), MoodPresets.CONTEMPLATIVE)
|
||||
|
||||
prompt = f"""Generate a Three.js room based on this mood description:
|
||||
|
||||
"{mood_description}"
|
||||
|
||||
INFERRED MOOD: {detected_mood}
|
||||
AESTHETIC: {preset['description']}
|
||||
|
||||
Create a complete room with:
|
||||
- Style: {preset['geometry']}
|
||||
- Lighting: {preset['lighting']}
|
||||
- Color Palette: {', '.join(preset['colors'][:3])}
|
||||
- Atmosphere: {preset['atmosphere']}
|
||||
|
||||
Return Three.js r128+ code as a module with `createMoodRoom()` function."""
|
||||
|
||||
return prompt
|
||||
|
||||
@staticmethod
|
||||
def _infer_mood(description: str, mental_state: Optional[MentalState] = None) -> str:
|
||||
"""Infer mood from description and mental state."""
|
||||
if mental_state and mental_state.mood:
|
||||
return mental_state.mood
|
||||
|
||||
desc_lower = description.lower()
|
||||
mood_map = {
|
||||
"contemplative": ["serene", "calm", "peaceful", "quiet", "meditation", "zen", "tranquil"],
|
||||
"energetic": ["dynamic", "active", "vibrant", "lively", "energetic", "motion"],
|
||||
"mysterious": ["mysterious", "shadow", "dark", "unknown", "secret", "ethereal"],
|
||||
"welcoming": ["warm", "welcoming", "friendly", "open", "inviting", "comfort"],
|
||||
"sovereign": ["crystal", "clear", "noble", "dignified", "powerful", "authoritative"],
|
||||
}
|
||||
|
||||
for mood, keywords in mood_map.items():
|
||||
if any(kw in desc_lower for kw in keywords):
|
||||
return mood
|
||||
|
||||
return "contemplative"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Nexus Architect AI
|
||||
# =============================================================================
|
||||
|
||||
class NexusArchitectAI:
|
||||
"""
|
||||
AI-powered Nexus Architect for autonomous Three.js world generation.
|
||||
|
||||
This class provides high-level interfaces for:
|
||||
- Designing rooms from natural language
|
||||
- Creating mood-based scenes
|
||||
- Managing mental state integration
|
||||
- Validating generated code
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.mental_state: Optional[MentalState] = None
|
||||
self.room_designs: Dict[str, RoomDesign] = {}
|
||||
self.portal_designs: Dict[str, PortalDesign] = {}
|
||||
self.prompt_engineer = PromptEngineer()
|
||||
|
||||
def set_mental_state(self, state: MentalState) -> None:
|
||||
"""Set Timmy's current mental state for aesthetic tuning."""
|
||||
self.mental_state = state
|
||||
logger.info(f"Mental state updated: {state.mood} (energy: {state.energy_level})")
|
||||
|
||||
def design_room(
|
||||
self,
|
||||
name: str,
|
||||
description: str,
|
||||
style: str,
|
||||
dimensions: Optional[Dict[str, float]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Design a room from natural language description.
|
||||
|
||||
Args:
|
||||
name: Room identifier (e.g., "contemplation_chamber")
|
||||
description: Natural language description of the room
|
||||
style: Visual style (e.g., "minimalist_ethereal", "crystalline_modern")
|
||||
dimensions: Optional room dimensions
|
||||
|
||||
Returns:
|
||||
Dict containing design specification and LLM prompt
|
||||
"""
|
||||
# Infer mood and select preset
|
||||
mood = self.prompt_engineer._infer_mood(description, self.mental_state)
|
||||
mood_preset = getattr(MoodPresets, mood.upper(), MoodPresets.CONTEMPLATIVE)
|
||||
|
||||
# Build color palette with mental state influence
|
||||
colors = mood_preset["colors"].copy()
|
||||
if self.mental_state:
|
||||
if self.mental_state.clarity > 0.7:
|
||||
colors.insert(0, NexusColors.TIMMY_GOLD)
|
||||
if self.mental_state.focus_area == "creative":
|
||||
colors.insert(0, NexusColors.ALLEGRO_BLUE)
|
||||
|
||||
# Create room design
|
||||
design = RoomDesign(
|
||||
name=name,
|
||||
description=description,
|
||||
style=style,
|
||||
dimensions=dimensions or {"width": 20, "height": 10, "depth": 20},
|
||||
mood_preset=mood,
|
||||
color_palette=colors[:4],
|
||||
lighting_scheme=mood_preset["lighting"],
|
||||
features=self._extract_features(description),
|
||||
)
|
||||
|
||||
# Generate LLM prompt
|
||||
prompt = self.prompt_engineer.engineer_room_prompt(
|
||||
name=name,
|
||||
description=description,
|
||||
style=style,
|
||||
mental_state=self.mental_state,
|
||||
dimensions=design.dimensions,
|
||||
)
|
||||
|
||||
# Store design
|
||||
self.room_designs[name] = design
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"room_name": name,
|
||||
"design": design.to_dict(),
|
||||
"llm_prompt": prompt,
|
||||
"message": f"Room '{name}' designed. Use the LLM prompt to generate Three.js code.",
|
||||
}
|
||||
|
||||
def create_portal(
|
||||
self,
|
||||
name: str,
|
||||
from_room: str,
|
||||
to_room: str,
|
||||
style: str = "energy_vortex"
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Design a portal connection between rooms.
|
||||
|
||||
Args:
|
||||
name: Portal identifier
|
||||
from_room: Source room name
|
||||
to_room: Target room name
|
||||
style: Portal visual style
|
||||
|
||||
Returns:
|
||||
Dict containing portal design and LLM prompt
|
||||
"""
|
||||
if from_room not in self.room_designs:
|
||||
return {"success": False, "error": f"Source room '{from_room}' not found"}
|
||||
if to_room not in self.room_designs:
|
||||
return {"success": False, "error": f"Target room '{to_room}' not found"}
|
||||
|
||||
design = PortalDesign(
|
||||
name=name,
|
||||
from_room=from_room,
|
||||
to_room=to_room,
|
||||
style=style,
|
||||
)
|
||||
|
||||
prompt = self.prompt_engineer.engineer_portal_prompt(
|
||||
name=name,
|
||||
from_room=from_room,
|
||||
to_room=to_room,
|
||||
style=style,
|
||||
mental_state=self.mental_state,
|
||||
)
|
||||
|
||||
self.portal_designs[name] = design
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"portal_name": name,
|
||||
"design": design.to_dict(),
|
||||
"llm_prompt": prompt,
|
||||
"message": f"Portal '{name}' designed connecting {from_room} to {to_room}",
|
||||
}
|
||||
|
||||
def generate_scene_from_mood(self, mood_description: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a complete scene based on mood description.
|
||||
|
||||
Args:
|
||||
mood_description: Description of desired mood/atmosphere
|
||||
|
||||
Returns:
|
||||
Dict containing scene design and LLM prompt
|
||||
"""
|
||||
# Infer mood
|
||||
mood = self.prompt_engineer._infer_mood(mood_description, self.mental_state)
|
||||
preset = getattr(MoodPresets, mood.upper(), MoodPresets.CONTEMPLATIVE)
|
||||
|
||||
# Create room name from mood
|
||||
room_name = f"{mood}_realm"
|
||||
|
||||
# Generate prompt
|
||||
prompt = self.prompt_engineer.engineer_mood_scene_prompt(mood_description)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"room_name": room_name,
|
||||
"inferred_mood": mood,
|
||||
"aesthetic": preset,
|
||||
"llm_prompt": prompt,
|
||||
"message": f"Generated {mood} scene from mood description",
|
||||
}
|
||||
|
||||
def _extract_features(self, description: str) -> List[str]:
|
||||
"""Extract room features from description."""
|
||||
features = []
|
||||
feature_keywords = {
|
||||
"floating": ["floating", "levitating", "hovering"],
|
||||
"water": ["water", "fountain", "pool", "stream", "lake"],
|
||||
"vegetation": ["tree", "plant", "garden", "forest", "nature"],
|
||||
"crystals": ["crystal", "gem", "prism", "diamond"],
|
||||
"geometry": ["geometric", "shape", "sphere", "cube", "abstract"],
|
||||
"particles": ["particle", "dust", "sparkle", "glow", "mist"],
|
||||
}
|
||||
|
||||
desc_lower = description.lower()
|
||||
for feature, keywords in feature_keywords.items():
|
||||
if any(kw in desc_lower for kw in keywords):
|
||||
features.append(feature)
|
||||
|
||||
return features
|
||||
|
||||
def get_design_summary(self) -> Dict[str, Any]:
|
||||
"""Get summary of all designs."""
|
||||
return {
|
||||
"mental_state": self.mental_state.to_dict() if self.mental_state else None,
|
||||
"rooms": {name: design.to_dict() for name, design in self.room_designs.items()},
|
||||
"portals": {name: portal.to_dict() for name, portal in self.portal_designs.items()},
|
||||
"total_rooms": len(self.room_designs),
|
||||
"total_portals": len(self.portal_designs),
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Module-level functions for easy import
|
||||
# =============================================================================
|
||||
|
||||
_architect_instance: Optional[NexusArchitectAI] = None
|
||||
|
||||
|
||||
def get_architect() -> NexusArchitectAI:
|
||||
"""Get or create the NexusArchitectAI singleton."""
|
||||
global _architect_instance
|
||||
if _architect_instance is None:
|
||||
_architect_instance = NexusArchitectAI()
|
||||
return _architect_instance
|
||||
|
||||
|
||||
def create_room(
|
||||
name: str,
|
||||
description: str,
|
||||
style: str,
|
||||
dimensions: Optional[Dict[str, float]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a room design from description.
|
||||
|
||||
Args:
|
||||
name: Room identifier
|
||||
description: Natural language room description
|
||||
style: Visual style (e.g., "minimalist_ethereal")
|
||||
dimensions: Optional dimensions dict with width, height, depth
|
||||
|
||||
Returns:
|
||||
Dict with design specification and LLM prompt for code generation
|
||||
"""
|
||||
architect = get_architect()
|
||||
return architect.design_room(name, description, style, dimensions)
|
||||
|
||||
|
||||
def create_portal(
|
||||
name: str,
|
||||
from_room: str,
|
||||
to_room: str,
|
||||
style: str = "energy_vortex"
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a portal between rooms.
|
||||
|
||||
Args:
|
||||
name: Portal identifier
|
||||
from_room: Source room name
|
||||
to_room: Target room name
|
||||
style: Visual style
|
||||
|
||||
Returns:
|
||||
Dict with portal design and LLM prompt
|
||||
"""
|
||||
architect = get_architect()
|
||||
return architect.create_portal(name, from_room, to_room, style)
|
||||
|
||||
|
||||
def generate_scene_from_mood(mood_description: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a scene based on mood description.
|
||||
|
||||
Args:
|
||||
mood_description: Description of desired mood
|
||||
|
||||
Example:
|
||||
"Timmy is feeling introspective and seeking clarity"
|
||||
→ Generates calm, minimalist space with clear sightlines
|
||||
|
||||
Returns:
|
||||
Dict with scene design and LLM prompt
|
||||
"""
|
||||
architect = get_architect()
|
||||
return architect.generate_scene_from_mood(mood_description)
|
||||
|
||||
|
||||
def set_mental_state(
|
||||
mood: str,
|
||||
energy_level: float = 0.5,
|
||||
clarity: float = 0.7,
|
||||
focus_area: str = "general"
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Set Timmy's mental state for aesthetic tuning.
|
||||
|
||||
Args:
|
||||
mood: Current mood (contemplative, energetic, mysterious, welcoming, sovereign)
|
||||
energy_level: 0.0 to 1.0
|
||||
clarity: 0.0 to 1.0
|
||||
focus_area: general, creative, analytical, social
|
||||
|
||||
Returns:
|
||||
Confirmation dict
|
||||
"""
|
||||
architect = get_architect()
|
||||
state = MentalState(
|
||||
mood=mood,
|
||||
energy_level=energy_level,
|
||||
clarity=clarity,
|
||||
focus_area=focus_area,
|
||||
)
|
||||
architect.set_mental_state(state)
|
||||
return {
|
||||
"success": True,
|
||||
"mental_state": state.to_dict(),
|
||||
"message": f"Mental state set to {mood}",
|
||||
}
|
||||
|
||||
|
||||
def get_nexus_summary() -> Dict[str, Any]:
|
||||
"""Get summary of all Nexus designs."""
|
||||
architect = get_architect()
|
||||
return architect.get_design_summary()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tool Schemas for integration
|
||||
# =============================================================================
|
||||
|
||||
NEXUS_ARCHITECT_AI_SCHEMAS = {
|
||||
"create_room": {
|
||||
"name": "create_room",
|
||||
"description": (
|
||||
"Design a new 3D room in the Nexus from a natural language description. "
|
||||
"Returns a design specification and LLM prompt for Three.js code generation. "
|
||||
"The room will be styled according to Timmy's current mental state."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Unique room identifier (e.g., 'contemplation_chamber')"
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Natural language description of the room"
|
||||
},
|
||||
"style": {
|
||||
"type": "string",
|
||||
"description": "Visual style (minimalist_ethereal, crystalline_modern, organic_natural, etc.)"
|
||||
},
|
||||
"dimensions": {
|
||||
"type": "object",
|
||||
"description": "Optional room dimensions",
|
||||
"properties": {
|
||||
"width": {"type": "number"},
|
||||
"height": {"type": "number"},
|
||||
"depth": {"type": "number"},
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["name", "description", "style"]
|
||||
}
|
||||
},
|
||||
"create_portal": {
|
||||
"name": "create_portal",
|
||||
"description": "Create a portal connection between two rooms",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"from_room": {"type": "string"},
|
||||
"to_room": {"type": "string"},
|
||||
"style": {"type": "string", "default": "energy_vortex"},
|
||||
},
|
||||
"required": ["name", "from_room", "to_room"]
|
||||
}
|
||||
},
|
||||
"generate_scene_from_mood": {
|
||||
"name": "generate_scene_from_mood",
|
||||
"description": (
|
||||
"Generate a complete 3D scene based on a mood description. "
|
||||
"Example: 'Timmy is feeling introspective' creates a calm, minimalist space."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"mood_description": {
|
||||
"type": "string",
|
||||
"description": "Description of desired mood or mental state"
|
||||
}
|
||||
},
|
||||
"required": ["mood_description"]
|
||||
}
|
||||
},
|
||||
"set_mental_state": {
|
||||
"name": "set_mental_state",
|
||||
"description": "Set Timmy's mental state to influence aesthetic generation",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"mood": {"type": "string"},
|
||||
"energy_level": {"type": "number"},
|
||||
"clarity": {"type": "number"},
|
||||
"focus_area": {"type": "string"},
|
||||
},
|
||||
"required": ["mood"]
|
||||
}
|
||||
},
|
||||
"get_nexus_summary": {
|
||||
"name": "get_nexus_summary",
|
||||
"description": "Get summary of all Nexus room and portal designs",
|
||||
"parameters": {"type": "object", "properties": {}}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Demo usage
|
||||
print("Nexus Architect AI - Demo")
|
||||
print("=" * 50)
|
||||
|
||||
# Set mental state
|
||||
result = set_mental_state("contemplative", energy_level=0.3, clarity=0.8)
|
||||
print(f"\nMental State: {result['mental_state']}")
|
||||
|
||||
# Create a room
|
||||
result = create_room(
|
||||
name="contemplation_chamber",
|
||||
description="A serene circular room with floating geometric shapes and soft blue light",
|
||||
style="minimalist_ethereal",
|
||||
)
|
||||
print(f"\nRoom Design: {json.dumps(result['design'], indent=2)}")
|
||||
|
||||
# Generate from mood
|
||||
result = generate_scene_from_mood("Timmy is feeling introspective and seeking clarity")
|
||||
print(f"\nMood Scene: {result['inferred_mood']} - {result['aesthetic']['description']}")
|
||||
@@ -1,752 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Nexus Deployment System
|
||||
|
||||
Real-time deployment system for Nexus Three.js modules.
|
||||
Provides hot-reload, validation, rollback, and versioning capabilities.
|
||||
|
||||
Features:
|
||||
- Hot-reload Three.js modules without page refresh
|
||||
- Syntax validation and Three.js API compliance checking
|
||||
- Rollback on error
|
||||
- Versioning for nexus modules
|
||||
- Module registry and dependency tracking
|
||||
|
||||
Usage:
|
||||
from agent.nexus_deployment import NexusDeployer
|
||||
|
||||
deployer = NexusDeployer()
|
||||
|
||||
# Deploy with hot-reload
|
||||
result = deployer.deploy_module(room_code, module_name="zen_garden")
|
||||
|
||||
# Rollback if needed
|
||||
deployer.rollback_module("zen_garden")
|
||||
|
||||
# Get module status
|
||||
status = deployer.get_module_status("zen_garden")
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import os
|
||||
import hashlib
|
||||
from typing import Dict, Any, List, Optional, Set
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
# Import validation from existing nexus_architect (avoid circular imports)
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
def _import_validation():
|
||||
"""Lazy import to avoid circular dependencies."""
|
||||
try:
|
||||
from tools.nexus_architect import validate_three_js_code, sanitize_three_js_code
|
||||
return validate_three_js_code, sanitize_three_js_code
|
||||
except ImportError:
|
||||
# Fallback: define local validation functions
|
||||
def validate_three_js_code(code, strict_mode=False):
|
||||
"""Fallback validation."""
|
||||
errors = []
|
||||
if "eval(" in code:
|
||||
errors.append("Security violation: eval detected")
|
||||
if "Function(" in code:
|
||||
errors.append("Security violation: Function constructor detected")
|
||||
return type('ValidationResult', (), {
|
||||
'is_valid': len(errors) == 0,
|
||||
'errors': errors,
|
||||
'warnings': []
|
||||
})()
|
||||
|
||||
def sanitize_three_js_code(code):
|
||||
"""Fallback sanitization."""
|
||||
return code
|
||||
|
||||
return validate_three_js_code, sanitize_three_js_code
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Deployment States
|
||||
# =============================================================================
|
||||
|
||||
class DeploymentStatus(Enum):
|
||||
"""Status of a module deployment."""
|
||||
PENDING = "pending"
|
||||
VALIDATING = "validating"
|
||||
DEPLOYING = "deploying"
|
||||
ACTIVE = "active"
|
||||
FAILED = "failed"
|
||||
ROLLING_BACK = "rolling_back"
|
||||
ROLLED_BACK = "rolled_back"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Data Models
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class ModuleVersion:
|
||||
"""Version information for a Nexus module."""
|
||||
version_id: str
|
||||
module_name: str
|
||||
code_hash: str
|
||||
timestamp: str
|
||||
changes: str = ""
|
||||
author: str = "nexus_architect"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"version_id": self.version_id,
|
||||
"module_name": self.module_name,
|
||||
"code_hash": self.code_hash,
|
||||
"timestamp": self.timestamp,
|
||||
"changes": self.changes,
|
||||
"author": self.author,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class DeployedModule:
|
||||
"""A deployed Nexus module."""
|
||||
name: str
|
||||
code: str
|
||||
status: DeploymentStatus
|
||||
version: str
|
||||
deployed_at: str
|
||||
last_updated: str
|
||||
validation_result: Dict[str, Any] = field(default_factory=dict)
|
||||
error_log: List[str] = field(default_factory=list)
|
||||
dependencies: Set[str] = field(default_factory=set)
|
||||
hot_reload_supported: bool = True
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"status": self.status.value,
|
||||
"version": self.version,
|
||||
"deployed_at": self.deployed_at,
|
||||
"last_updated": self.last_updated,
|
||||
"validation": self.validation_result,
|
||||
"dependencies": list(self.dependencies),
|
||||
"hot_reload_supported": self.hot_reload_supported,
|
||||
"code_preview": self.code[:200] + "..." if len(self.code) > 200 else self.code,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Nexus Deployer
|
||||
# =============================================================================
|
||||
|
||||
class NexusDeployer:
|
||||
"""
|
||||
Deployment system for Nexus Three.js modules.
|
||||
|
||||
Provides:
|
||||
- Hot-reload deployment
|
||||
- Validation before deployment
|
||||
- Automatic rollback on failure
|
||||
- Version tracking
|
||||
- Module registry
|
||||
"""
|
||||
|
||||
def __init__(self, modules_dir: Optional[str] = None):
|
||||
"""
|
||||
Initialize the Nexus Deployer.
|
||||
|
||||
Args:
|
||||
modules_dir: Directory to store deployed modules (optional)
|
||||
"""
|
||||
self.modules: Dict[str, DeployedModule] = {}
|
||||
self.version_history: Dict[str, List[ModuleVersion]] = {}
|
||||
self.modules_dir = modules_dir or os.path.expanduser("~/.nexus/modules")
|
||||
|
||||
# Ensure modules directory exists
|
||||
os.makedirs(self.modules_dir, exist_ok=True)
|
||||
|
||||
# Hot-reload configuration
|
||||
self.hot_reload_enabled = True
|
||||
self.auto_rollback = True
|
||||
self.strict_validation = True
|
||||
|
||||
logger.info(f"NexusDeployer initialized. Modules dir: {self.modules_dir}")
|
||||
|
||||
def deploy_module(
|
||||
self,
|
||||
module_code: str,
|
||||
module_name: str,
|
||||
version: Optional[str] = None,
|
||||
dependencies: Optional[List[str]] = None,
|
||||
hot_reload: bool = True,
|
||||
validate: bool = True
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Deploy a Nexus module with hot-reload support.
|
||||
|
||||
Args:
|
||||
module_code: The Three.js module code
|
||||
module_name: Unique module identifier
|
||||
version: Optional version string (auto-generated if not provided)
|
||||
dependencies: List of dependent module names
|
||||
hot_reload: Enable hot-reload for this module
|
||||
validate: Run validation before deployment
|
||||
|
||||
Returns:
|
||||
Dict with deployment results
|
||||
"""
|
||||
timestamp = datetime.now().isoformat()
|
||||
version = version or self._generate_version(module_name, module_code)
|
||||
|
||||
result = {
|
||||
"success": True,
|
||||
"module_name": module_name,
|
||||
"version": version,
|
||||
"timestamp": timestamp,
|
||||
"hot_reload": hot_reload,
|
||||
"validation": {},
|
||||
"deployment": {},
|
||||
}
|
||||
|
||||
# Check for existing module (hot-reload scenario)
|
||||
existing_module = self.modules.get(module_name)
|
||||
if existing_module and not hot_reload:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Module '{module_name}' already exists. Use hot_reload=True to update."
|
||||
}
|
||||
|
||||
# Validation phase
|
||||
if validate:
|
||||
validation = self._validate_module(module_code)
|
||||
result["validation"] = validation
|
||||
|
||||
if not validation["is_valid"]:
|
||||
result["success"] = False
|
||||
result["error"] = "Validation failed"
|
||||
result["message"] = "Module deployment aborted due to validation errors"
|
||||
|
||||
if self.auto_rollback:
|
||||
result["rollback_triggered"] = False # Nothing to rollback yet
|
||||
|
||||
return result
|
||||
|
||||
# Create deployment backup for rollback
|
||||
if existing_module:
|
||||
self._create_backup(existing_module)
|
||||
|
||||
# Deployment phase
|
||||
try:
|
||||
deployed = DeployedModule(
|
||||
name=module_name,
|
||||
code=module_code,
|
||||
status=DeploymentStatus.DEPLOYING,
|
||||
version=version,
|
||||
deployed_at=timestamp if not existing_module else existing_module.deployed_at,
|
||||
last_updated=timestamp,
|
||||
validation_result=result.get("validation", {}),
|
||||
dependencies=set(dependencies or []),
|
||||
hot_reload_supported=hot_reload,
|
||||
)
|
||||
|
||||
# Save to file system
|
||||
self._save_module_file(deployed)
|
||||
|
||||
# Update registry
|
||||
deployed.status = DeploymentStatus.ACTIVE
|
||||
self.modules[module_name] = deployed
|
||||
|
||||
# Record version
|
||||
self._record_version(module_name, version, module_code)
|
||||
|
||||
result["deployment"] = {
|
||||
"status": "active",
|
||||
"hot_reload_ready": hot_reload,
|
||||
"file_path": self._get_module_path(module_name),
|
||||
}
|
||||
result["message"] = f"Module '{module_name}' v{version} deployed successfully"
|
||||
|
||||
if existing_module:
|
||||
result["message"] += " (hot-reload update)"
|
||||
|
||||
logger.info(f"Deployed module: {module_name} v{version}")
|
||||
|
||||
except Exception as e:
|
||||
result["success"] = False
|
||||
result["error"] = str(e)
|
||||
result["deployment"] = {"status": "failed"}
|
||||
|
||||
# Attempt rollback if deployment failed
|
||||
if self.auto_rollback and existing_module:
|
||||
rollback_result = self.rollback_module(module_name)
|
||||
result["rollback_result"] = rollback_result
|
||||
|
||||
logger.error(f"Deployment failed for {module_name}: {e}")
|
||||
|
||||
return result
|
||||
|
||||
def hot_reload_module(self, module_name: str, new_code: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Hot-reload an active module with new code.
|
||||
|
||||
Args:
|
||||
module_name: Name of the module to reload
|
||||
new_code: New module code
|
||||
|
||||
Returns:
|
||||
Dict with reload results
|
||||
"""
|
||||
if module_name not in self.modules:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Module '{module_name}' not found. Deploy it first."
|
||||
}
|
||||
|
||||
module = self.modules[module_name]
|
||||
if not module.hot_reload_supported:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Module '{module_name}' does not support hot-reload"
|
||||
}
|
||||
|
||||
# Use deploy_module with hot_reload=True
|
||||
return self.deploy_module(
|
||||
module_code=new_code,
|
||||
module_name=module_name,
|
||||
hot_reload=True,
|
||||
validate=True
|
||||
)
|
||||
|
||||
def rollback_module(self, module_name: str, to_version: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Rollback a module to a previous version.
|
||||
|
||||
Args:
|
||||
module_name: Module to rollback
|
||||
to_version: Specific version to rollback to (latest backup if not specified)
|
||||
|
||||
Returns:
|
||||
Dict with rollback results
|
||||
"""
|
||||
if module_name not in self.modules:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Module '{module_name}' not found"
|
||||
}
|
||||
|
||||
module = self.modules[module_name]
|
||||
module.status = DeploymentStatus.ROLLING_BACK
|
||||
|
||||
try:
|
||||
if to_version:
|
||||
# Restore specific version
|
||||
version_data = self._get_version(module_name, to_version)
|
||||
if not version_data:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Version '{to_version}' not found for module '{module_name}'"
|
||||
}
|
||||
# Would restore from version data
|
||||
else:
|
||||
# Restore from backup
|
||||
backup_code = self._get_backup(module_name)
|
||||
if backup_code:
|
||||
module.code = backup_code
|
||||
module.last_updated = datetime.now().isoformat()
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"No backup available for '{module_name}'"
|
||||
}
|
||||
|
||||
module.status = DeploymentStatus.ROLLED_BACK
|
||||
self._save_module_file(module)
|
||||
|
||||
logger.info(f"Rolled back module: {module_name}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"module_name": module_name,
|
||||
"message": f"Module '{module_name}' rolled back successfully",
|
||||
"status": module.status.value,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
module.status = DeploymentStatus.FAILED
|
||||
logger.error(f"Rollback failed for {module_name}: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def validate_module(self, module_code: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate Three.js module code without deploying.
|
||||
|
||||
Args:
|
||||
module_code: Code to validate
|
||||
|
||||
Returns:
|
||||
Dict with validation results
|
||||
"""
|
||||
return self._validate_module(module_code)
|
||||
|
||||
def get_module_status(self, module_name: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get status of a deployed module.
|
||||
|
||||
Args:
|
||||
module_name: Module name
|
||||
|
||||
Returns:
|
||||
Module status dict or None if not found
|
||||
"""
|
||||
if module_name in self.modules:
|
||||
return self.modules[module_name].to_dict()
|
||||
return None
|
||||
|
||||
def get_all_modules(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get status of all deployed modules.
|
||||
|
||||
Returns:
|
||||
Dict with all module statuses
|
||||
"""
|
||||
return {
|
||||
"modules": {
|
||||
name: module.to_dict()
|
||||
for name, module in self.modules.items()
|
||||
},
|
||||
"total_count": len(self.modules),
|
||||
"active_count": sum(1 for m in self.modules.values() if m.status == DeploymentStatus.ACTIVE),
|
||||
}
|
||||
|
||||
def get_version_history(self, module_name: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get version history for a module.
|
||||
|
||||
Args:
|
||||
module_name: Module name
|
||||
|
||||
Returns:
|
||||
List of version dicts
|
||||
"""
|
||||
history = self.version_history.get(module_name, [])
|
||||
return [v.to_dict() for v in history]
|
||||
|
||||
def remove_module(self, module_name: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Remove a deployed module.
|
||||
|
||||
Args:
|
||||
module_name: Module to remove
|
||||
|
||||
Returns:
|
||||
Dict with removal results
|
||||
"""
|
||||
if module_name not in self.modules:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Module '{module_name}' not found"
|
||||
}
|
||||
|
||||
try:
|
||||
# Remove file
|
||||
module_path = self._get_module_path(module_name)
|
||||
if os.path.exists(module_path):
|
||||
os.remove(module_path)
|
||||
|
||||
# Remove from registry
|
||||
del self.modules[module_name]
|
||||
|
||||
logger.info(f"Removed module: {module_name}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": f"Module '{module_name}' removed successfully"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def _validate_module(self, code: str) -> Dict[str, Any]:
|
||||
"""Internal validation method."""
|
||||
# Use existing validation from nexus_architect (lazy import)
|
||||
validate_fn, _ = _import_validation()
|
||||
validation_result = validate_fn(code, strict_mode=self.strict_validation)
|
||||
|
||||
# Check Three.js API compliance
|
||||
three_api_issues = self._check_three_js_api_compliance(code)
|
||||
|
||||
return {
|
||||
"is_valid": validation_result.is_valid and len(three_api_issues) == 0,
|
||||
"syntax_valid": validation_result.is_valid,
|
||||
"api_compliant": len(three_api_issues) == 0,
|
||||
"errors": validation_result.errors + three_api_issues,
|
||||
"warnings": validation_result.warnings,
|
||||
"safety_score": max(0, 100 - len(validation_result.errors) * 20 - len(validation_result.warnings) * 5),
|
||||
}
|
||||
|
||||
def _check_three_js_api_compliance(self, code: str) -> List[str]:
|
||||
"""Check for Three.js API compliance issues."""
|
||||
issues = []
|
||||
|
||||
# Check for required patterns
|
||||
if "THREE.Group" not in code and "new THREE" not in code:
|
||||
issues.append("No Three.js objects created")
|
||||
|
||||
# Check for deprecated APIs
|
||||
deprecated_patterns = [
|
||||
(r"THREE\.Face3", "THREE.Face3 is deprecated, use BufferGeometry"),
|
||||
(r"THREE\.Geometry\(", "THREE.Geometry is deprecated, use BufferGeometry"),
|
||||
]
|
||||
|
||||
for pattern, message in deprecated_patterns:
|
||||
if re.search(pattern, code):
|
||||
issues.append(f"Deprecated API: {message}")
|
||||
|
||||
return issues
|
||||
|
||||
def _generate_version(self, module_name: str, code: str) -> str:
|
||||
"""Generate version string from code hash."""
|
||||
code_hash = hashlib.md5(code.encode()).hexdigest()[:8]
|
||||
timestamp = datetime.now().strftime("%Y%m%d%H%M")
|
||||
return f"{timestamp}-{code_hash}"
|
||||
|
||||
def _create_backup(self, module: DeployedModule) -> None:
|
||||
"""Create backup of existing module."""
|
||||
backup_path = os.path.join(
|
||||
self.modules_dir,
|
||||
f"{module.name}.{module.version}.backup.js"
|
||||
)
|
||||
with open(backup_path, 'w') as f:
|
||||
f.write(module.code)
|
||||
|
||||
def _get_backup(self, module_name: str) -> Optional[str]:
|
||||
"""Get backup code for module."""
|
||||
if module_name not in self.modules:
|
||||
return None
|
||||
|
||||
module = self.modules[module_name]
|
||||
backup_path = os.path.join(
|
||||
self.modules_dir,
|
||||
f"{module.name}.{module.version}.backup.js"
|
||||
)
|
||||
|
||||
if os.path.exists(backup_path):
|
||||
with open(backup_path, 'r') as f:
|
||||
return f.read()
|
||||
return None
|
||||
|
||||
def _save_module_file(self, module: DeployedModule) -> None:
|
||||
"""Save module to file system."""
|
||||
module_path = self._get_module_path(module.name)
|
||||
with open(module_path, 'w') as f:
|
||||
f.write(f"// Nexus Module: {module.name}\n")
|
||||
f.write(f"// Version: {module.version}\n")
|
||||
f.write(f"// Status: {module.status.value}\n")
|
||||
f.write(f"// Updated: {module.last_updated}\n")
|
||||
f.write(f"// Hot-Reload: {module.hot_reload_supported}\n")
|
||||
f.write("\n")
|
||||
f.write(module.code)
|
||||
|
||||
def _get_module_path(self, module_name: str) -> str:
|
||||
"""Get file path for module."""
|
||||
return os.path.join(self.modules_dir, f"{module_name}.nexus.js")
|
||||
|
||||
def _record_version(self, module_name: str, version: str, code: str) -> None:
|
||||
"""Record version in history."""
|
||||
if module_name not in self.version_history:
|
||||
self.version_history[module_name] = []
|
||||
|
||||
version_info = ModuleVersion(
|
||||
version_id=version,
|
||||
module_name=module_name,
|
||||
code_hash=hashlib.md5(code.encode()).hexdigest()[:16],
|
||||
timestamp=datetime.now().isoformat(),
|
||||
)
|
||||
|
||||
self.version_history[module_name].insert(0, version_info)
|
||||
|
||||
# Keep only last 10 versions
|
||||
self.version_history[module_name] = self.version_history[module_name][:10]
|
||||
|
||||
def _get_version(self, module_name: str, version: str) -> Optional[ModuleVersion]:
|
||||
"""Get specific version info."""
|
||||
history = self.version_history.get(module_name, [])
|
||||
for v in history:
|
||||
if v.version_id == version:
|
||||
return v
|
||||
return None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Convenience Functions
|
||||
# =============================================================================
|
||||
|
||||
_deployer_instance: Optional[NexusDeployer] = None
|
||||
|
||||
|
||||
def get_deployer() -> NexusDeployer:
|
||||
"""Get or create the NexusDeployer singleton."""
|
||||
global _deployer_instance
|
||||
if _deployer_instance is None:
|
||||
_deployer_instance = NexusDeployer()
|
||||
return _deployer_instance
|
||||
|
||||
|
||||
def deploy_nexus_module(
|
||||
module_code: str,
|
||||
module_name: str,
|
||||
test: bool = True,
|
||||
hot_reload: bool = True
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Deploy a Nexus module with validation.
|
||||
|
||||
Args:
|
||||
module_code: Three.js module code
|
||||
module_name: Unique module identifier
|
||||
test: Run validation tests before deployment
|
||||
hot_reload: Enable hot-reload support
|
||||
|
||||
Returns:
|
||||
Dict with deployment results
|
||||
"""
|
||||
deployer = get_deployer()
|
||||
return deployer.deploy_module(
|
||||
module_code=module_code,
|
||||
module_name=module_name,
|
||||
hot_reload=hot_reload,
|
||||
validate=test
|
||||
)
|
||||
|
||||
|
||||
def hot_reload_module(module_name: str, new_code: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Hot-reload an existing module.
|
||||
|
||||
Args:
|
||||
module_name: Module to reload
|
||||
new_code: New module code
|
||||
|
||||
Returns:
|
||||
Dict with reload results
|
||||
"""
|
||||
deployer = get_deployer()
|
||||
return deployer.hot_reload_module(module_name, new_code)
|
||||
|
||||
|
||||
def validate_nexus_code(code: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate Three.js code without deploying.
|
||||
|
||||
Args:
|
||||
code: Three.js code to validate
|
||||
|
||||
Returns:
|
||||
Dict with validation results
|
||||
"""
|
||||
deployer = get_deployer()
|
||||
return deployer.validate_module(code)
|
||||
|
||||
|
||||
def get_deployment_status() -> Dict[str, Any]:
|
||||
"""Get status of all deployed modules."""
|
||||
deployer = get_deployer()
|
||||
return deployer.get_all_modules()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tool Schemas
|
||||
# =============================================================================
|
||||
|
||||
NEXUS_DEPLOYMENT_SCHEMAS = {
|
||||
"deploy_nexus_module": {
|
||||
"name": "deploy_nexus_module",
|
||||
"description": "Deploy a Nexus Three.js module with validation and hot-reload support",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"module_code": {"type": "string"},
|
||||
"module_name": {"type": "string"},
|
||||
"test": {"type": "boolean", "default": True},
|
||||
"hot_reload": {"type": "boolean", "default": True},
|
||||
},
|
||||
"required": ["module_code", "module_name"]
|
||||
}
|
||||
},
|
||||
"hot_reload_module": {
|
||||
"name": "hot_reload_module",
|
||||
"description": "Hot-reload an existing Nexus module with new code",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"module_name": {"type": "string"},
|
||||
"new_code": {"type": "string"},
|
||||
},
|
||||
"required": ["module_name", "new_code"]
|
||||
}
|
||||
},
|
||||
"validate_nexus_code": {
|
||||
"name": "validate_nexus_code",
|
||||
"description": "Validate Three.js code for Nexus deployment without deploying",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {"type": "string"}
|
||||
},
|
||||
"required": ["code"]
|
||||
}
|
||||
},
|
||||
"get_deployment_status": {
|
||||
"name": "get_deployment_status",
|
||||
"description": "Get status of all deployed Nexus modules",
|
||||
"parameters": {"type": "object", "properties": {}}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Demo
|
||||
print("Nexus Deployment System - Demo")
|
||||
print("=" * 50)
|
||||
|
||||
deployer = NexusDeployer()
|
||||
|
||||
# Sample module code
|
||||
sample_code = """
|
||||
(function() {
|
||||
function createDemoRoom() {
|
||||
const room = new THREE.Group();
|
||||
room.name = 'demo_room';
|
||||
|
||||
const light = new THREE.AmbientLight(0x404040, 0.5);
|
||||
room.add(light);
|
||||
|
||||
return room;
|
||||
}
|
||||
|
||||
window.NexusRooms = window.NexusRooms || {};
|
||||
window.NexusRooms.demo_room = createDemoRoom;
|
||||
|
||||
return { createDemoRoom };
|
||||
})();
|
||||
"""
|
||||
|
||||
# Deploy
|
||||
result = deployer.deploy_module(sample_code, "demo_room")
|
||||
print(f"\nDeployment result: {result['message']}")
|
||||
print(f"Validation: {result['validation'].get('is_valid', False)}")
|
||||
print(f"Safety score: {result['validation'].get('safety_score', 0)}/100")
|
||||
|
||||
# Get status
|
||||
status = deployer.get_all_modules()
|
||||
print(f"\nTotal modules: {status['total_count']}")
|
||||
print(f"Active: {status['active_count']}")
|
||||
@@ -187,7 +187,7 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||
|
||||
# Model name substrings that trigger tool-use enforcement guidance.
|
||||
# Add new patterns here when a model family needs explicit steering.
|
||||
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok")
|
||||
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma")
|
||||
|
||||
# OpenAI GPT/Codex-specific execution guidance. Addresses known failure modes
|
||||
# where GPT models abandon work on partial results, skip prerequisite lookups,
|
||||
@@ -744,6 +744,7 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
"browser_type",
|
||||
"browser_scroll",
|
||||
"browser_console",
|
||||
"browser_close",
|
||||
"browser_press",
|
||||
"browser_get_images",
|
||||
"browser_vision",
|
||||
@@ -773,13 +774,13 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
|
||||
lines = [
|
||||
"# Nous Subscription",
|
||||
"Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser Use) by default. Modal execution is optional.",
|
||||
"Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browserbase) by default. Modal execution is optional.",
|
||||
"Current capability status:",
|
||||
]
|
||||
lines.extend(_status_line(feature) for feature in features.items())
|
||||
lines.extend(
|
||||
[
|
||||
"When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys.",
|
||||
"When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys.",
|
||||
"If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.",
|
||||
"Do not mention subscription unless the user asks about it or it directly solves the current missing capability.",
|
||||
"Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.",
|
||||
|
||||
@@ -12,21 +12,10 @@ from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from agent.skill_security import (
|
||||
validate_skill_name,
|
||||
resolve_skill_path,
|
||||
SkillSecurityError,
|
||||
PathTraversalError,
|
||||
InvalidSkillNameError,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||
_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
|
||||
# Patterns for sanitizing skill names into clean hyphen-separated slugs.
|
||||
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
|
||||
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
|
||||
|
||||
|
||||
def build_plan_path(
|
||||
@@ -56,37 +45,17 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
|
||||
if not raw_identifier:
|
||||
return None
|
||||
|
||||
# Security: Validate skill identifier to prevent path traversal (V-011)
|
||||
try:
|
||||
validate_skill_name(raw_identifier, allow_path_separator=True)
|
||||
except SkillSecurityError as e:
|
||||
logger.warning("Security: Blocked skill loading attempt with invalid identifier '%s': %s", raw_identifier, e)
|
||||
return None
|
||||
|
||||
try:
|
||||
from tools.skills_tool import SKILLS_DIR, skill_view
|
||||
|
||||
# Security: Block absolute paths and home directory expansion attempts
|
||||
identifier_path = Path(raw_identifier)
|
||||
identifier_path = Path(raw_identifier).expanduser()
|
||||
if identifier_path.is_absolute():
|
||||
logger.warning("Security: Blocked absolute path in skill identifier: %s", raw_identifier)
|
||||
return None
|
||||
|
||||
# Normalize the identifier: remove leading slashes and validate
|
||||
normalized = raw_identifier.lstrip("/")
|
||||
|
||||
# Security: Double-check no traversal patterns remain after normalization
|
||||
if ".." in normalized or "~" in normalized:
|
||||
logger.warning("Security: Blocked path traversal in skill identifier: %s", raw_identifier)
|
||||
return None
|
||||
|
||||
# Security: Verify the resolved path stays within SKILLS_DIR
|
||||
try:
|
||||
target_path = (SKILLS_DIR / normalized).resolve()
|
||||
target_path.relative_to(SKILLS_DIR.resolve())
|
||||
except (ValueError, OSError):
|
||||
logger.warning("Security: Skill path escapes skills directory: %s", raw_identifier)
|
||||
return None
|
||||
try:
|
||||
normalized = str(identifier_path.resolve().relative_to(SKILLS_DIR.resolve()))
|
||||
except Exception:
|
||||
normalized = raw_identifier
|
||||
else:
|
||||
normalized = raw_identifier.lstrip("/")
|
||||
|
||||
loaded_skill = json.loads(skill_view(normalized, task_id=task_id))
|
||||
except Exception:
|
||||
@@ -107,45 +76,6 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
|
||||
return loaded_skill, skill_dir, skill_name
|
||||
|
||||
|
||||
def _inject_skill_config(loaded_skill: dict[str, Any], parts: list[str]) -> None:
|
||||
"""Resolve and inject skill-declared config values into the message parts.
|
||||
|
||||
If the loaded skill's frontmatter declares ``metadata.hermes.config``
|
||||
entries, their current values (from config.yaml or defaults) are appended
|
||||
as a ``[Skill config: ...]`` block so the agent knows the configured values
|
||||
without needing to read config.yaml itself.
|
||||
"""
|
||||
try:
|
||||
from agent.skill_utils import (
|
||||
extract_skill_config_vars,
|
||||
parse_frontmatter,
|
||||
resolve_skill_config_values,
|
||||
)
|
||||
|
||||
# The loaded_skill dict contains the raw content which includes frontmatter
|
||||
raw_content = str(loaded_skill.get("raw_content") or loaded_skill.get("content") or "")
|
||||
if not raw_content:
|
||||
return
|
||||
|
||||
frontmatter, _ = parse_frontmatter(raw_content)
|
||||
config_vars = extract_skill_config_vars(frontmatter)
|
||||
if not config_vars:
|
||||
return
|
||||
|
||||
resolved = resolve_skill_config_values(config_vars)
|
||||
if not resolved:
|
||||
return
|
||||
|
||||
lines = ["", "[Skill config (from ~/.hermes/config.yaml):"]
|
||||
for key, value in resolved.items():
|
||||
display_val = str(value) if value else "(not set)"
|
||||
lines.append(f" {key} = {display_val}")
|
||||
lines.append("]")
|
||||
parts.extend(lines)
|
||||
except Exception:
|
||||
pass # Non-critical — skill still loads without config injection
|
||||
|
||||
|
||||
def _build_skill_message(
|
||||
loaded_skill: dict[str, Any],
|
||||
skill_dir: Path | None,
|
||||
@@ -160,9 +90,6 @@ def _build_skill_message(
|
||||
|
||||
parts = [activation_note, "", content.strip()]
|
||||
|
||||
# ── Inject resolved skill config values ──
|
||||
_inject_skill_config(loaded_skill, parts)
|
||||
|
||||
if loaded_skill.get("setup_skipped"):
|
||||
parts.extend(
|
||||
[
|
||||
@@ -269,14 +196,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
description = line[:80]
|
||||
break
|
||||
seen_names.add(name)
|
||||
# Normalize to hyphen-separated slug, stripping
|
||||
# non-alnum chars (e.g. +, /) to avoid invalid
|
||||
# Telegram command names downstream.
|
||||
cmd_name = name.lower().replace(' ', '-').replace('_', '-')
|
||||
cmd_name = _SKILL_INVALID_CHARS.sub('', cmd_name)
|
||||
cmd_name = _SKILL_MULTI_HYPHEN.sub('-', cmd_name).strip('-')
|
||||
if not cmd_name:
|
||||
continue
|
||||
_skill_commands[f"/{cmd_name}"] = {
|
||||
"name": name,
|
||||
"description": description or f"Invoke the {name} skill",
|
||||
|
||||
@@ -1,213 +0,0 @@
|
||||
"""Security utilities for skill loading and validation.
|
||||
|
||||
Provides path traversal protection and input validation for skill names
|
||||
to prevent security vulnerabilities like V-011 (Skills Guard Bypass).
|
||||
"""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple
|
||||
|
||||
# Strict skill name validation: alphanumeric, hyphens, underscores only
|
||||
# This prevents path traversal attacks via skill names like "../../../etc/passwd"
|
||||
VALID_SKILL_NAME_PATTERN = re.compile(r'^[a-zA-Z0-9._-]+$')
|
||||
|
||||
# Maximum skill name length to prevent other attack vectors
|
||||
MAX_SKILL_NAME_LENGTH = 256
|
||||
|
||||
# Suspicious patterns that indicate path traversal attempts
|
||||
PATH_TRAVERSAL_PATTERNS = [
|
||||
"..", # Parent directory reference
|
||||
"~", # Home directory expansion
|
||||
"/", # Absolute path (Unix)
|
||||
"\\", # Windows path separator
|
||||
"//", # Protocol-relative or UNC path
|
||||
"file:", # File protocol
|
||||
"ftp:", # FTP protocol
|
||||
"http:", # HTTP protocol
|
||||
"https:", # HTTPS protocol
|
||||
"data:", # Data URI
|
||||
"javascript:", # JavaScript protocol
|
||||
"vbscript:", # VBScript protocol
|
||||
]
|
||||
|
||||
# Characters that should never appear in skill names
|
||||
INVALID_CHARACTERS = set([
|
||||
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
|
||||
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
|
||||
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
|
||||
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
|
||||
'<', '>', '|', '&', ';', '$', '`', '"', "'",
|
||||
])
|
||||
|
||||
|
||||
class SkillSecurityError(Exception):
|
||||
"""Raised when a skill name fails security validation."""
|
||||
pass
|
||||
|
||||
|
||||
class PathTraversalError(SkillSecurityError):
|
||||
"""Raised when path traversal is detected in a skill name."""
|
||||
pass
|
||||
|
||||
|
||||
class InvalidSkillNameError(SkillSecurityError):
|
||||
"""Raised when a skill name contains invalid characters."""
|
||||
pass
|
||||
|
||||
|
||||
def validate_skill_name(name: str, allow_path_separator: bool = False) -> None:
|
||||
"""Validate a skill name for security issues.
|
||||
|
||||
Args:
|
||||
name: The skill name or identifier to validate
|
||||
allow_path_separator: If True, allows '/' for category/skill paths (e.g., "mlops/axolotl")
|
||||
|
||||
Raises:
|
||||
PathTraversalError: If path traversal patterns are detected
|
||||
InvalidSkillNameError: If the name contains invalid characters
|
||||
SkillSecurityError: For other security violations
|
||||
"""
|
||||
if not name or not isinstance(name, str):
|
||||
raise InvalidSkillNameError("Skill name must be a non-empty string")
|
||||
|
||||
if len(name) > MAX_SKILL_NAME_LENGTH:
|
||||
raise InvalidSkillNameError(
|
||||
f"Skill name exceeds maximum length of {MAX_SKILL_NAME_LENGTH} characters"
|
||||
)
|
||||
|
||||
# Check for null bytes and other control characters
|
||||
for char in name:
|
||||
if char in INVALID_CHARACTERS:
|
||||
raise InvalidSkillNameError(
|
||||
f"Skill name contains invalid character: {repr(char)}"
|
||||
)
|
||||
|
||||
# Validate against allowed character pattern first
|
||||
pattern = r'^[a-zA-Z0-9._-]+$' if not allow_path_separator else r'^[a-zA-Z0-9._/-]+$'
|
||||
if not re.match(pattern, name):
|
||||
invalid_chars = set(c for c in name if not re.match(r'[a-zA-Z0-9._/-]', c))
|
||||
raise InvalidSkillNameError(
|
||||
f"Skill name contains invalid characters: {sorted(invalid_chars)}. "
|
||||
"Only alphanumeric characters, hyphens, underscores, dots, "
|
||||
f"{'and forward slashes ' if allow_path_separator else ''}are allowed."
|
||||
)
|
||||
|
||||
# Check for path traversal patterns (excluding '/' when path separators are allowed)
|
||||
name_lower = name.lower()
|
||||
patterns_to_check = PATH_TRAVERSAL_PATTERNS.copy()
|
||||
if allow_path_separator:
|
||||
# Remove '/' from patterns when path separators are allowed
|
||||
patterns_to_check = [p for p in patterns_to_check if p != '/']
|
||||
|
||||
for pattern in patterns_to_check:
|
||||
if pattern in name_lower:
|
||||
raise PathTraversalError(
|
||||
f"Path traversal detected in skill name: '{pattern}' is not allowed"
|
||||
)
|
||||
|
||||
|
||||
def resolve_skill_path(
|
||||
skill_name: str,
|
||||
skills_base_dir: Path,
|
||||
allow_path_separator: bool = True
|
||||
) -> Tuple[Path, Optional[str]]:
|
||||
"""Safely resolve a skill name to a path within the skills directory.
|
||||
|
||||
Args:
|
||||
skill_name: The skill name or path (e.g., "axolotl" or "mlops/axolotl")
|
||||
skills_base_dir: The base skills directory
|
||||
allow_path_separator: Whether to allow '/' in skill names for categories
|
||||
|
||||
Returns:
|
||||
Tuple of (resolved_path, error_message)
|
||||
- If successful: (resolved_path, None)
|
||||
- If failed: (skills_base_dir, error_message)
|
||||
|
||||
Raises:
|
||||
PathTraversalError: If the resolved path would escape the skills directory
|
||||
"""
|
||||
try:
|
||||
validate_skill_name(skill_name, allow_path_separator=allow_path_separator)
|
||||
except SkillSecurityError as e:
|
||||
return skills_base_dir, str(e)
|
||||
|
||||
# Build the target path
|
||||
try:
|
||||
target_path = (skills_base_dir / skill_name).resolve()
|
||||
except (OSError, ValueError) as e:
|
||||
return skills_base_dir, f"Invalid skill path: {e}"
|
||||
|
||||
# Ensure the resolved path is within the skills directory
|
||||
try:
|
||||
target_path.relative_to(skills_base_dir.resolve())
|
||||
except ValueError:
|
||||
raise PathTraversalError(
|
||||
f"Skill path '{skill_name}' resolves outside the skills directory boundary"
|
||||
)
|
||||
|
||||
return target_path, None
|
||||
|
||||
|
||||
def sanitize_skill_identifier(identifier: str) -> str:
|
||||
"""Sanitize a skill identifier by removing dangerous characters.
|
||||
|
||||
This is a defensive fallback for cases where strict validation
|
||||
cannot be applied. It removes or replaces dangerous characters.
|
||||
|
||||
Args:
|
||||
identifier: The raw skill identifier
|
||||
|
||||
Returns:
|
||||
A sanitized version of the identifier
|
||||
"""
|
||||
if not identifier:
|
||||
return ""
|
||||
|
||||
# Replace path traversal sequences
|
||||
sanitized = identifier.replace("..", "")
|
||||
sanitized = sanitized.replace("//", "/")
|
||||
|
||||
# Remove home directory expansion
|
||||
if sanitized.startswith("~"):
|
||||
sanitized = sanitized[1:]
|
||||
|
||||
# Remove protocol handlers
|
||||
for protocol in ["file:", "ftp:", "http:", "https:", "data:", "javascript:", "vbscript:"]:
|
||||
sanitized = sanitized.replace(protocol, "")
|
||||
sanitized = sanitized.replace(protocol.upper(), "")
|
||||
|
||||
# Remove null bytes and control characters
|
||||
for char in INVALID_CHARACTERS:
|
||||
sanitized = sanitized.replace(char, "")
|
||||
|
||||
# Normalize path separators to forward slash
|
||||
sanitized = sanitized.replace("\\", "/")
|
||||
|
||||
# Remove leading/trailing slashes and whitespace
|
||||
sanitized = sanitized.strip("/ ").strip()
|
||||
|
||||
return sanitized
|
||||
|
||||
|
||||
def is_safe_skill_path(path: Path, allowed_base_dirs: list[Path]) -> bool:
|
||||
"""Check if a path is safely within allowed directories.
|
||||
|
||||
Args:
|
||||
path: The path to check
|
||||
allowed_base_dirs: List of allowed base directories
|
||||
|
||||
Returns:
|
||||
True if the path is within allowed boundaries, False otherwise
|
||||
"""
|
||||
try:
|
||||
resolved = path.resolve()
|
||||
for base_dir in allowed_base_dirs:
|
||||
try:
|
||||
resolved.relative_to(base_dir.resolve())
|
||||
return True
|
||||
except ValueError:
|
||||
continue
|
||||
return False
|
||||
except (OSError, ValueError):
|
||||
return False
|
||||
@@ -254,163 +254,6 @@ def extract_skill_conditions(frontmatter: Dict[str, Any]) -> Dict[str, List]:
|
||||
}
|
||||
|
||||
|
||||
# ── Skill config extraction ───────────────────────────────────────────────
|
||||
|
||||
|
||||
def extract_skill_config_vars(frontmatter: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Extract config variable declarations from parsed frontmatter.
|
||||
|
||||
Skills declare config.yaml settings they need via::
|
||||
|
||||
metadata:
|
||||
hermes:
|
||||
config:
|
||||
- key: wiki.path
|
||||
description: Path to the LLM Wiki knowledge base directory
|
||||
default: "~/wiki"
|
||||
prompt: Wiki directory path
|
||||
|
||||
Returns a list of dicts with keys: ``key``, ``description``, ``default``,
|
||||
``prompt``. Invalid or incomplete entries are silently skipped.
|
||||
"""
|
||||
metadata = frontmatter.get("metadata")
|
||||
if not isinstance(metadata, dict):
|
||||
return []
|
||||
hermes = metadata.get("hermes")
|
||||
if not isinstance(hermes, dict):
|
||||
return []
|
||||
raw = hermes.get("config")
|
||||
if not raw:
|
||||
return []
|
||||
if isinstance(raw, dict):
|
||||
raw = [raw]
|
||||
if not isinstance(raw, list):
|
||||
return []
|
||||
|
||||
result: List[Dict[str, Any]] = []
|
||||
seen: set = set()
|
||||
for item in raw:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
key = str(item.get("key", "")).strip()
|
||||
if not key or key in seen:
|
||||
continue
|
||||
# Must have at least key and description
|
||||
desc = str(item.get("description", "")).strip()
|
||||
if not desc:
|
||||
continue
|
||||
entry: Dict[str, Any] = {
|
||||
"key": key,
|
||||
"description": desc,
|
||||
}
|
||||
default = item.get("default")
|
||||
if default is not None:
|
||||
entry["default"] = default
|
||||
prompt_text = item.get("prompt")
|
||||
if isinstance(prompt_text, str) and prompt_text.strip():
|
||||
entry["prompt"] = prompt_text.strip()
|
||||
else:
|
||||
entry["prompt"] = desc
|
||||
seen.add(key)
|
||||
result.append(entry)
|
||||
return result
|
||||
|
||||
|
||||
def discover_all_skill_config_vars() -> List[Dict[str, Any]]:
|
||||
"""Scan all enabled skills and collect their config variable declarations.
|
||||
|
||||
Walks every skills directory, parses each SKILL.md frontmatter, and returns
|
||||
a deduplicated list of config var dicts. Each dict also includes a
|
||||
``skill`` key with the skill name for attribution.
|
||||
|
||||
Disabled and platform-incompatible skills are excluded.
|
||||
"""
|
||||
all_vars: List[Dict[str, Any]] = []
|
||||
seen_keys: set = set()
|
||||
|
||||
disabled = get_disabled_skill_names()
|
||||
for skills_dir in get_all_skills_dirs():
|
||||
if not skills_dir.is_dir():
|
||||
continue
|
||||
for skill_file in iter_skill_index_files(skills_dir, "SKILL.md"):
|
||||
try:
|
||||
raw = skill_file.read_text(encoding="utf-8")
|
||||
frontmatter, _ = parse_frontmatter(raw)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
skill_name = frontmatter.get("name") or skill_file.parent.name
|
||||
if str(skill_name) in disabled:
|
||||
continue
|
||||
if not skill_matches_platform(frontmatter):
|
||||
continue
|
||||
|
||||
config_vars = extract_skill_config_vars(frontmatter)
|
||||
for var in config_vars:
|
||||
if var["key"] not in seen_keys:
|
||||
var["skill"] = str(skill_name)
|
||||
all_vars.append(var)
|
||||
seen_keys.add(var["key"])
|
||||
|
||||
return all_vars
|
||||
|
||||
|
||||
# Storage prefix: all skill config vars are stored under skills.config.*
|
||||
# in config.yaml. Skill authors declare logical keys (e.g. "wiki.path");
|
||||
# the system adds this prefix for storage and strips it for display.
|
||||
SKILL_CONFIG_PREFIX = "skills.config"
|
||||
|
||||
|
||||
def _resolve_dotpath(config: Dict[str, Any], dotted_key: str):
|
||||
"""Walk a nested dict following a dotted key. Returns None if any part is missing."""
|
||||
parts = dotted_key.split(".")
|
||||
current = config
|
||||
for part in parts:
|
||||
if isinstance(current, dict) and part in current:
|
||||
current = current[part]
|
||||
else:
|
||||
return None
|
||||
return current
|
||||
|
||||
|
||||
def resolve_skill_config_values(
|
||||
config_vars: List[Dict[str, Any]],
|
||||
) -> Dict[str, Any]:
|
||||
"""Resolve current values for skill config vars from config.yaml.
|
||||
|
||||
Skill config is stored under ``skills.config.<key>`` in config.yaml.
|
||||
Returns a dict mapping **logical** keys (as declared by skills) to their
|
||||
current values (or the declared default if the key isn't set).
|
||||
Path values are expanded via ``os.path.expanduser``.
|
||||
"""
|
||||
config_path = get_hermes_home() / "config.yaml"
|
||||
config: Dict[str, Any] = {}
|
||||
if config_path.exists():
|
||||
try:
|
||||
parsed = yaml_load(config_path.read_text(encoding="utf-8"))
|
||||
if isinstance(parsed, dict):
|
||||
config = parsed
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
resolved: Dict[str, Any] = {}
|
||||
for var in config_vars:
|
||||
logical_key = var["key"]
|
||||
storage_key = f"{SKILL_CONFIG_PREFIX}.{logical_key}"
|
||||
value = _resolve_dotpath(config, storage_key)
|
||||
|
||||
if value is None or (isinstance(value, str) and not value.strip()):
|
||||
value = var.get("default", "")
|
||||
|
||||
# Expand ~ in path-like values
|
||||
if isinstance(value, str) and ("~" in value or "${" in value):
|
||||
value = os.path.expanduser(os.path.expandvars(value))
|
||||
|
||||
resolved[logical_key] = value
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
# ── Description extraction ────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
@@ -1,74 +0,0 @@
|
||||
"""Sovereign Intersymbolic Memory Layer.
|
||||
|
||||
Bridges Neural (LLM) and Symbolic (Graph) reasoning by extracting
|
||||
structured triples from unstructured text and performing graph lookups.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import List, Dict, Any
|
||||
from agent.gemini_adapter import GeminiAdapter
|
||||
from tools.graph_store import GraphStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class SymbolicMemory:
|
||||
def __init__(self):
|
||||
self.adapter = GeminiAdapter()
|
||||
self.store = GraphStore()
|
||||
|
||||
def ingest_text(self, text: str):
|
||||
"""Extracts triples from text and adds them to the graph."""
|
||||
prompt = f"""
|
||||
Extract all meaningful entities and their relationships from the following text.
|
||||
Format the output as a JSON list of triples: [{{"s": "subject", "p": "predicate", "o": "object"}}]
|
||||
|
||||
Text:
|
||||
{text}
|
||||
|
||||
Guidelines:
|
||||
- Use clear, concise labels for entities and predicates.
|
||||
- Focus on stable facts and structural relationships.
|
||||
- Predicates should be verbs or descriptive relations (e.g., 'is_a', 'works_at', 'collaborates_with').
|
||||
"""
|
||||
try:
|
||||
result = self.adapter.generate(
|
||||
model="gemini-3.1-pro-preview",
|
||||
prompt=prompt,
|
||||
system_instruction="You are Timmy's Symbolic Extraction Engine. Extract high-fidelity knowledge triples.",
|
||||
response_mime_type="application/json"
|
||||
)
|
||||
|
||||
triples = json.loads(result["text"])
|
||||
if isinstance(triples, list):
|
||||
count = self.store.add_triples(triples)
|
||||
logger.info(f"Ingested {count} new triples into symbolic memory.")
|
||||
return count
|
||||
except Exception as e:
|
||||
logger.error(f"Symbolic ingestion failed: {e}")
|
||||
return 0
|
||||
|
||||
def get_context_for(self, topic: str) -> str:
|
||||
"""Performs a 2-hop graph search to find related context for a topic."""
|
||||
# 1. Find direct relations
|
||||
direct = self.store.query(subject=topic) + self.store.query(object=topic)
|
||||
|
||||
# 2. Find 2nd hop
|
||||
related_entities = set()
|
||||
for t in direct:
|
||||
related_entities.add(t['s'])
|
||||
related_entities.add(t['o'])
|
||||
|
||||
extended = []
|
||||
for entity in related_entities:
|
||||
if entity == topic: continue
|
||||
extended.extend(self.store.query(subject=entity))
|
||||
|
||||
all_triples = direct + extended
|
||||
if not all_triples:
|
||||
return ""
|
||||
|
||||
context = "Symbolic Knowledge Graph Context:\n"
|
||||
for t in all_triples:
|
||||
context += f"- {t['s']} --({t['p']})--> {t['o']}\n"
|
||||
return context
|
||||
@@ -1,421 +0,0 @@
|
||||
"""Temporal Knowledge Graph for Hermes Agent.
|
||||
|
||||
Provides a time-aware triple-store (Subject, Predicate, Object) with temporal
|
||||
metadata (valid_from, valid_until, timestamp) enabling "time travel" queries
|
||||
over Timmy's evolving worldview.
|
||||
|
||||
Time format: ISO 8601 (YYYY-MM-DDTHH:MM:SS)
|
||||
"""
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from dataclasses import dataclass, asdict
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TemporalOperator(Enum):
|
||||
"""Temporal query operators for time-based filtering."""
|
||||
BEFORE = "before"
|
||||
AFTER = "after"
|
||||
DURING = "during"
|
||||
OVERLAPS = "overlaps"
|
||||
AT = "at"
|
||||
|
||||
|
||||
@dataclass
|
||||
class TemporalTriple:
|
||||
"""A triple with temporal metadata."""
|
||||
id: str
|
||||
subject: str
|
||||
predicate: str
|
||||
object: str
|
||||
valid_from: str # ISO 8601 datetime
|
||||
valid_until: Optional[str] # ISO 8601 datetime, None means still valid
|
||||
timestamp: str # When this fact was recorded
|
||||
version: int = 1
|
||||
superseded_by: Optional[str] = None # ID of the triple that superseded this
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "TemporalTriple":
|
||||
return cls(**data)
|
||||
|
||||
|
||||
class TemporalTripleStore:
|
||||
"""SQLite-backed temporal triple store with versioning support."""
|
||||
|
||||
def __init__(self, db_path: Optional[str] = None):
|
||||
"""Initialize the temporal triple store.
|
||||
|
||||
Args:
|
||||
db_path: Path to SQLite database. If None, uses default local path.
|
||||
"""
|
||||
if db_path is None:
|
||||
# Default to local-first storage in user's home
|
||||
home = Path.home()
|
||||
db_dir = home / ".hermes" / "temporal_kg"
|
||||
db_dir.mkdir(parents=True, exist_ok=True)
|
||||
db_path = db_dir / "temporal_kg.db"
|
||||
|
||||
self.db_path = str(db_path)
|
||||
self._init_db()
|
||||
|
||||
def _init_db(self):
|
||||
"""Initialize the SQLite database with required tables."""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS temporal_triples (
|
||||
id TEXT PRIMARY KEY,
|
||||
subject TEXT NOT NULL,
|
||||
predicate TEXT NOT NULL,
|
||||
object TEXT NOT NULL,
|
||||
valid_from TEXT NOT NULL,
|
||||
valid_until TEXT,
|
||||
timestamp TEXT NOT NULL,
|
||||
version INTEGER DEFAULT 1,
|
||||
superseded_by TEXT,
|
||||
FOREIGN KEY (superseded_by) REFERENCES temporal_triples(id)
|
||||
)
|
||||
""")
|
||||
|
||||
# Create indexes for efficient querying
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_subject ON temporal_triples(subject)
|
||||
""")
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_predicate ON temporal_triples(predicate)
|
||||
""")
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_valid_from ON temporal_triples(valid_from)
|
||||
""")
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_valid_until ON temporal_triples(valid_until)
|
||||
""")
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_timestamp ON temporal_triples(timestamp)
|
||||
""")
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_subject_predicate
|
||||
ON temporal_triples(subject, predicate)
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
|
||||
def _now(self) -> str:
|
||||
"""Get current time in ISO 8601 format."""
|
||||
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
def _generate_id(self) -> str:
|
||||
"""Generate a unique ID for a triple."""
|
||||
return f"{self._now()}_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
def store_fact(
|
||||
self,
|
||||
subject: str,
|
||||
predicate: str,
|
||||
object: str,
|
||||
valid_from: Optional[str] = None,
|
||||
valid_until: Optional[str] = None
|
||||
) -> TemporalTriple:
|
||||
"""Store a fact with temporal bounds.
|
||||
|
||||
Args:
|
||||
subject: The subject of the triple
|
||||
predicate: The predicate/relationship
|
||||
object: The object/value
|
||||
valid_from: When this fact becomes valid (ISO 8601). Defaults to now.
|
||||
valid_until: When this fact expires (ISO 8601). None means forever valid.
|
||||
|
||||
Returns:
|
||||
The stored TemporalTriple
|
||||
"""
|
||||
if valid_from is None:
|
||||
valid_from = self._now()
|
||||
|
||||
# Check if there's an existing fact for this subject-predicate
|
||||
existing = self._get_current_fact(subject, predicate)
|
||||
|
||||
triple = TemporalTriple(
|
||||
id=self._generate_id(),
|
||||
subject=subject,
|
||||
predicate=predicate,
|
||||
object=object,
|
||||
valid_from=valid_from,
|
||||
valid_until=valid_until,
|
||||
timestamp=self._now()
|
||||
)
|
||||
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
# If there's an existing fact, mark it as superseded
|
||||
if existing:
|
||||
existing.valid_until = valid_from
|
||||
existing.superseded_by = triple.id
|
||||
self._update_triple(conn, existing)
|
||||
triple.version = existing.version + 1
|
||||
|
||||
# Insert the new fact
|
||||
self._insert_triple(conn, triple)
|
||||
conn.commit()
|
||||
|
||||
logger.info(f"Stored temporal fact: {subject} {predicate} {object} (valid from {valid_from})")
|
||||
return triple
|
||||
|
||||
def _get_current_fact(self, subject: str, predicate: str) -> Optional[TemporalTriple]:
|
||||
"""Get the current (most recent, still valid) fact for a subject-predicate pair."""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.execute(
|
||||
"""
|
||||
SELECT * FROM temporal_triples
|
||||
WHERE subject = ? AND predicate = ? AND valid_until IS NULL
|
||||
ORDER BY timestamp DESC LIMIT 1
|
||||
""",
|
||||
(subject, predicate)
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return self._row_to_triple(row)
|
||||
return None
|
||||
|
||||
def _insert_triple(self, conn: sqlite3.Connection, triple: TemporalTriple):
|
||||
"""Insert a triple into the database."""
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO temporal_triples
|
||||
(id, subject, predicate, object, valid_from, valid_until, timestamp, version, superseded_by)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
triple.id, triple.subject, triple.predicate, triple.object,
|
||||
triple.valid_from, triple.valid_until, triple.timestamp,
|
||||
triple.version, triple.superseded_by
|
||||
)
|
||||
)
|
||||
|
||||
def _update_triple(self, conn: sqlite3.Connection, triple: TemporalTriple):
|
||||
"""Update an existing triple."""
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE temporal_triples
|
||||
SET valid_until = ?, superseded_by = ?
|
||||
WHERE id = ?
|
||||
""",
|
||||
(triple.valid_until, triple.superseded_by, triple.id)
|
||||
)
|
||||
|
||||
def _row_to_triple(self, row: sqlite3.Row) -> TemporalTriple:
|
||||
"""Convert a database row to a TemporalTriple."""
|
||||
return TemporalTriple(
|
||||
id=row[0],
|
||||
subject=row[1],
|
||||
predicate=row[2],
|
||||
object=row[3],
|
||||
valid_from=row[4],
|
||||
valid_until=row[5],
|
||||
timestamp=row[6],
|
||||
version=row[7],
|
||||
superseded_by=row[8]
|
||||
)
|
||||
|
||||
def query_at_time(
|
||||
self,
|
||||
timestamp: str,
|
||||
subject: Optional[str] = None,
|
||||
predicate: Optional[str] = None
|
||||
) -> List[TemporalTriple]:
|
||||
"""Query facts that were valid at a specific point in time.
|
||||
|
||||
Args:
|
||||
timestamp: The point in time to query (ISO 8601)
|
||||
subject: Optional subject filter
|
||||
predicate: Optional predicate filter
|
||||
|
||||
Returns:
|
||||
List of TemporalTriple objects valid at that time
|
||||
"""
|
||||
query = """
|
||||
SELECT * FROM temporal_triples
|
||||
WHERE valid_from <= ?
|
||||
AND (valid_until IS NULL OR valid_until > ?)
|
||||
"""
|
||||
params = [timestamp, timestamp]
|
||||
|
||||
if subject:
|
||||
query += " AND subject = ?"
|
||||
params.append(subject)
|
||||
if predicate:
|
||||
query += " AND predicate = ?"
|
||||
params.append(predicate)
|
||||
|
||||
query += " ORDER BY timestamp DESC"
|
||||
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.execute(query, params)
|
||||
return [self._row_to_triple(row) for row in cursor.fetchall()]
|
||||
|
||||
def query_temporal(
|
||||
self,
|
||||
operator: TemporalOperator,
|
||||
timestamp: str,
|
||||
subject: Optional[str] = None,
|
||||
predicate: Optional[str] = None
|
||||
) -> List[TemporalTriple]:
|
||||
"""Query using temporal operators.
|
||||
|
||||
Args:
|
||||
operator: TemporalOperator (BEFORE, AFTER, DURING, OVERLAPS, AT)
|
||||
timestamp: Reference timestamp (ISO 8601)
|
||||
subject: Optional subject filter
|
||||
predicate: Optional predicate filter
|
||||
|
||||
Returns:
|
||||
List of matching TemporalTriple objects
|
||||
"""
|
||||
base_query = "SELECT * FROM temporal_triples WHERE 1=1"
|
||||
params = []
|
||||
|
||||
if subject:
|
||||
base_query += " AND subject = ?"
|
||||
params.append(subject)
|
||||
if predicate:
|
||||
base_query += " AND predicate = ?"
|
||||
params.append(predicate)
|
||||
|
||||
if operator == TemporalOperator.BEFORE:
|
||||
base_query += " AND valid_from < ?"
|
||||
params.append(timestamp)
|
||||
elif operator == TemporalOperator.AFTER:
|
||||
base_query += " AND valid_from > ?"
|
||||
params.append(timestamp)
|
||||
elif operator == TemporalOperator.DURING:
|
||||
base_query += " AND valid_from <= ? AND (valid_until IS NULL OR valid_until > ?)"
|
||||
params.extend([timestamp, timestamp])
|
||||
elif operator == TemporalOperator.OVERLAPS:
|
||||
# Facts that overlap with a time point (same as DURING)
|
||||
base_query += " AND valid_from <= ? AND (valid_until IS NULL OR valid_until > ?)"
|
||||
params.extend([timestamp, timestamp])
|
||||
elif operator == TemporalOperator.AT:
|
||||
# Exact match for valid_at query
|
||||
return self.query_at_time(timestamp, subject, predicate)
|
||||
|
||||
base_query += " ORDER BY timestamp DESC"
|
||||
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.execute(base_query, params)
|
||||
return [self._row_to_triple(row) for row in cursor.fetchall()]
|
||||
|
||||
def get_fact_history(
|
||||
self,
|
||||
subject: str,
|
||||
predicate: str
|
||||
) -> List[TemporalTriple]:
|
||||
"""Get the complete version history of a fact.
|
||||
|
||||
Args:
|
||||
subject: The subject to query
|
||||
predicate: The predicate to query
|
||||
|
||||
Returns:
|
||||
List of all versions of the fact, ordered by timestamp
|
||||
"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.execute(
|
||||
"""
|
||||
SELECT * FROM temporal_triples
|
||||
WHERE subject = ? AND predicate = ?
|
||||
ORDER BY timestamp ASC
|
||||
""",
|
||||
(subject, predicate)
|
||||
)
|
||||
return [self._row_to_triple(row) for row in cursor.fetchall()]
|
||||
|
||||
def get_all_facts_for_entity(
|
||||
self,
|
||||
subject: str,
|
||||
at_time: Optional[str] = None
|
||||
) -> List[TemporalTriple]:
|
||||
"""Get all facts about an entity, optionally at a specific time.
|
||||
|
||||
Args:
|
||||
subject: The entity to query
|
||||
at_time: Optional timestamp to query at
|
||||
|
||||
Returns:
|
||||
List of TemporalTriple objects
|
||||
"""
|
||||
if at_time:
|
||||
return self.query_at_time(at_time, subject=subject)
|
||||
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.execute(
|
||||
"""
|
||||
SELECT * FROM temporal_triples
|
||||
WHERE subject = ?
|
||||
ORDER BY timestamp DESC
|
||||
""",
|
||||
(subject,)
|
||||
)
|
||||
return [self._row_to_triple(row) for row in cursor.fetchall()]
|
||||
|
||||
def get_entity_changes(
|
||||
self,
|
||||
subject: str,
|
||||
start_time: str,
|
||||
end_time: str
|
||||
) -> List[TemporalTriple]:
|
||||
"""Get all facts that changed for an entity during a time range.
|
||||
|
||||
Args:
|
||||
subject: The entity to query
|
||||
start_time: Start of time range (ISO 8601)
|
||||
end_time: End of time range (ISO 8601)
|
||||
|
||||
Returns:
|
||||
List of TemporalTriple objects that changed in the range
|
||||
"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.execute(
|
||||
"""
|
||||
SELECT * FROM temporal_triples
|
||||
WHERE subject = ?
|
||||
AND ((valid_from >= ? AND valid_from <= ?)
|
||||
OR (valid_until >= ? AND valid_until <= ?))
|
||||
ORDER BY timestamp ASC
|
||||
""",
|
||||
(subject, start_time, end_time, start_time, end_time)
|
||||
)
|
||||
return [self._row_to_triple(row) for row in cursor.fetchall()]
|
||||
|
||||
def close(self):
|
||||
"""Close the database connection (no-op for SQLite with context managers)."""
|
||||
pass
|
||||
|
||||
def export_to_json(self) -> str:
|
||||
"""Export all triples to JSON format."""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.execute("SELECT * FROM temporal_triples ORDER BY timestamp DESC")
|
||||
triples = [self._row_to_triple(row).to_dict() for row in cursor.fetchall()]
|
||||
return json.dumps(triples, indent=2)
|
||||
|
||||
def import_from_json(self, json_data: str):
|
||||
"""Import triples from JSON format."""
|
||||
triples = json.loads(json_data)
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
for triple_dict in triples:
|
||||
triple = TemporalTriple.from_dict(triple_dict)
|
||||
self._insert_triple(conn, triple)
|
||||
conn.commit()
|
||||
@@ -1,434 +0,0 @@
|
||||
"""Temporal Reasoning Engine for Hermes Agent.
|
||||
|
||||
Enables Timmy to reason about past and future states, generate historical
|
||||
summaries, and perform temporal inference over the evolving knowledge graph.
|
||||
|
||||
Queries supported:
|
||||
- "What was Timmy's view on sovereignty before March 2026?"
|
||||
- "When did we first learn about MLX integration?"
|
||||
- "How has the codebase changed since the security audit?"
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
from agent.temporal_knowledge_graph import (
|
||||
TemporalTripleStore, TemporalTriple, TemporalOperator
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ChangeType(Enum):
|
||||
"""Types of changes in the knowledge graph."""
|
||||
ADDED = "added"
|
||||
REMOVED = "removed"
|
||||
MODIFIED = "modified"
|
||||
SUPERSEDED = "superseded"
|
||||
|
||||
|
||||
@dataclass
|
||||
class FactChange:
|
||||
"""Represents a change in a fact over time."""
|
||||
change_type: ChangeType
|
||||
subject: str
|
||||
predicate: str
|
||||
old_value: Optional[str]
|
||||
new_value: Optional[str]
|
||||
timestamp: str
|
||||
version: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class HistoricalSummary:
|
||||
"""Summary of how an entity or concept evolved over time."""
|
||||
entity: str
|
||||
start_time: str
|
||||
end_time: str
|
||||
total_changes: int
|
||||
key_facts: List[Dict[str, Any]]
|
||||
evolution_timeline: List[FactChange]
|
||||
current_state: List[Dict[str, Any]]
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"entity": self.entity,
|
||||
"start_time": self.start_time,
|
||||
"end_time": self.end_time,
|
||||
"total_changes": self.total_changes,
|
||||
"key_facts": self.key_facts,
|
||||
"evolution_timeline": [
|
||||
{
|
||||
"change_type": c.change_type.value,
|
||||
"subject": c.subject,
|
||||
"predicate": c.predicate,
|
||||
"old_value": c.old_value,
|
||||
"new_value": c.new_value,
|
||||
"timestamp": c.timestamp,
|
||||
"version": c.version
|
||||
}
|
||||
for c in self.evolution_timeline
|
||||
],
|
||||
"current_state": self.current_state
|
||||
}
|
||||
|
||||
|
||||
class TemporalReasoner:
|
||||
"""Reasoning engine for temporal knowledge graphs."""
|
||||
|
||||
def __init__(self, store: Optional[TemporalTripleStore] = None):
|
||||
"""Initialize the temporal reasoner.
|
||||
|
||||
Args:
|
||||
store: Optional TemporalTripleStore instance. Creates new if None.
|
||||
"""
|
||||
self.store = store or TemporalTripleStore()
|
||||
|
||||
def what_did_we_believe(
|
||||
self,
|
||||
subject: str,
|
||||
before_time: str
|
||||
) -> List[TemporalTriple]:
|
||||
"""Query: "What did we believe about X before Y happened?"
|
||||
|
||||
Args:
|
||||
subject: The entity to query about
|
||||
before_time: The cutoff time (ISO 8601)
|
||||
|
||||
Returns:
|
||||
List of facts believed before the given time
|
||||
"""
|
||||
# Get facts that were valid just before the given time
|
||||
return self.store.query_temporal(
|
||||
TemporalOperator.BEFORE,
|
||||
before_time,
|
||||
subject=subject
|
||||
)
|
||||
|
||||
def when_did_we_learn(
|
||||
self,
|
||||
subject: str,
|
||||
predicate: Optional[str] = None,
|
||||
object: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""Query: "When did we first learn about X?"
|
||||
|
||||
Args:
|
||||
subject: The subject to search for
|
||||
predicate: Optional predicate filter
|
||||
object: Optional object filter
|
||||
|
||||
Returns:
|
||||
Timestamp of first knowledge, or None if never learned
|
||||
"""
|
||||
history = self.store.get_fact_history(subject, predicate or "")
|
||||
|
||||
# Filter by object if specified
|
||||
if object:
|
||||
history = [h for h in history if h.object == object]
|
||||
|
||||
if history:
|
||||
# Return the earliest timestamp
|
||||
earliest = min(history, key=lambda x: x.timestamp)
|
||||
return earliest.timestamp
|
||||
return None
|
||||
|
||||
def how_has_it_changed(
|
||||
self,
|
||||
subject: str,
|
||||
since_time: str
|
||||
) -> List[FactChange]:
|
||||
"""Query: "How has X changed since Y?"
|
||||
|
||||
Args:
|
||||
subject: The entity to analyze
|
||||
since_time: The starting time (ISO 8601)
|
||||
|
||||
Returns:
|
||||
List of changes since the given time
|
||||
"""
|
||||
now = datetime.now().isoformat()
|
||||
changes = self.store.get_entity_changes(subject, since_time, now)
|
||||
|
||||
fact_changes = []
|
||||
for i, triple in enumerate(changes):
|
||||
# Determine change type
|
||||
if i == 0:
|
||||
change_type = ChangeType.ADDED
|
||||
old_value = None
|
||||
else:
|
||||
prev = changes[i - 1]
|
||||
if triple.object != prev.object:
|
||||
change_type = ChangeType.MODIFIED
|
||||
old_value = prev.object
|
||||
else:
|
||||
change_type = ChangeType.SUPERSEDED
|
||||
old_value = prev.object
|
||||
|
||||
fact_changes.append(FactChange(
|
||||
change_type=change_type,
|
||||
subject=triple.subject,
|
||||
predicate=triple.predicate,
|
||||
old_value=old_value,
|
||||
new_value=triple.object,
|
||||
timestamp=triple.timestamp,
|
||||
version=triple.version
|
||||
))
|
||||
|
||||
return fact_changes
|
||||
|
||||
def generate_temporal_summary(
|
||||
self,
|
||||
entity: str,
|
||||
start_time: str,
|
||||
end_time: str
|
||||
) -> HistoricalSummary:
|
||||
"""Generate a historical summary of an entity's evolution.
|
||||
|
||||
Args:
|
||||
entity: The entity to summarize
|
||||
start_time: Start of the time range (ISO 8601)
|
||||
end_time: End of the time range (ISO 8601)
|
||||
|
||||
Returns:
|
||||
HistoricalSummary containing the entity's evolution
|
||||
"""
|
||||
# Get all facts for the entity in the time range
|
||||
initial_state = self.store.query_at_time(start_time, subject=entity)
|
||||
final_state = self.store.query_at_time(end_time, subject=entity)
|
||||
changes = self.store.get_entity_changes(entity, start_time, end_time)
|
||||
|
||||
# Build evolution timeline
|
||||
evolution_timeline = []
|
||||
seen_predicates = set()
|
||||
|
||||
for triple in changes:
|
||||
if triple.predicate not in seen_predicates:
|
||||
seen_predicates.add(triple.predicate)
|
||||
evolution_timeline.append(FactChange(
|
||||
change_type=ChangeType.ADDED,
|
||||
subject=triple.subject,
|
||||
predicate=triple.predicate,
|
||||
old_value=None,
|
||||
new_value=triple.object,
|
||||
timestamp=triple.timestamp,
|
||||
version=triple.version
|
||||
))
|
||||
else:
|
||||
# Find previous value
|
||||
prev = [t for t in changes
|
||||
if t.predicate == triple.predicate
|
||||
and t.timestamp < triple.timestamp]
|
||||
old_value = prev[-1].object if prev else None
|
||||
|
||||
evolution_timeline.append(FactChange(
|
||||
change_type=ChangeType.MODIFIED,
|
||||
subject=triple.subject,
|
||||
predicate=triple.predicate,
|
||||
old_value=old_value,
|
||||
new_value=triple.object,
|
||||
timestamp=triple.timestamp,
|
||||
version=triple.version
|
||||
))
|
||||
|
||||
# Extract key facts (predicates that changed most)
|
||||
key_facts = []
|
||||
predicate_changes = {}
|
||||
for change in evolution_timeline:
|
||||
predicate_changes[change.predicate] = (
|
||||
predicate_changes.get(change.predicate, 0) + 1
|
||||
)
|
||||
|
||||
top_predicates = sorted(
|
||||
predicate_changes.items(),
|
||||
key=lambda x: x[1],
|
||||
reverse=True
|
||||
)[:5]
|
||||
|
||||
for pred, count in top_predicates:
|
||||
current = [t for t in final_state if t.predicate == pred]
|
||||
if current:
|
||||
key_facts.append({
|
||||
"predicate": pred,
|
||||
"current_value": current[0].object,
|
||||
"changes": count
|
||||
})
|
||||
|
||||
# Build current state
|
||||
current_state = [
|
||||
{
|
||||
"predicate": t.predicate,
|
||||
"object": t.object,
|
||||
"valid_from": t.valid_from,
|
||||
"valid_until": t.valid_until
|
||||
}
|
||||
for t in final_state
|
||||
]
|
||||
|
||||
return HistoricalSummary(
|
||||
entity=entity,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
total_changes=len(evolution_timeline),
|
||||
key_facts=key_facts,
|
||||
evolution_timeline=evolution_timeline,
|
||||
current_state=current_state
|
||||
)
|
||||
|
||||
def infer_temporal_relationship(
|
||||
self,
|
||||
fact_a: TemporalTriple,
|
||||
fact_b: TemporalTriple
|
||||
) -> Optional[str]:
|
||||
"""Infer temporal relationship between two facts.
|
||||
|
||||
Args:
|
||||
fact_a: First fact
|
||||
fact_b: Second fact
|
||||
|
||||
Returns:
|
||||
Description of temporal relationship, or None
|
||||
"""
|
||||
a_start = datetime.fromisoformat(fact_a.valid_from)
|
||||
a_end = datetime.fromisoformat(fact_a.valid_until) if fact_a.valid_until else None
|
||||
b_start = datetime.fromisoformat(fact_b.valid_from)
|
||||
b_end = datetime.fromisoformat(fact_b.valid_until) if fact_b.valid_until else None
|
||||
|
||||
# Check if A happened before B
|
||||
if a_end and a_end <= b_start:
|
||||
return "A happened before B"
|
||||
|
||||
# Check if B happened before A
|
||||
if b_end and b_end <= a_start:
|
||||
return "B happened before A"
|
||||
|
||||
# Check if they overlap
|
||||
if a_end and b_end:
|
||||
if a_start <= b_end and b_start <= a_end:
|
||||
return "A and B overlap in time"
|
||||
|
||||
# Check if one supersedes the other
|
||||
if fact_a.superseded_by == fact_b.id:
|
||||
return "B supersedes A"
|
||||
if fact_b.superseded_by == fact_a.id:
|
||||
return "A supersedes B"
|
||||
|
||||
return "A and B are temporally unrelated"
|
||||
|
||||
def get_worldview_at_time(
|
||||
self,
|
||||
timestamp: str,
|
||||
subjects: Optional[List[str]] = None
|
||||
) -> Dict[str, List[Dict[str, Any]]]:
|
||||
"""Get Timmy's complete worldview at a specific point in time.
|
||||
|
||||
Args:
|
||||
timestamp: The point in time (ISO 8601)
|
||||
subjects: Optional list of subjects to include. If None, includes all.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping subjects to their facts at that time
|
||||
"""
|
||||
worldview = {}
|
||||
|
||||
if subjects:
|
||||
for subject in subjects:
|
||||
facts = self.store.query_at_time(timestamp, subject=subject)
|
||||
if facts:
|
||||
worldview[subject] = [
|
||||
{
|
||||
"predicate": f.predicate,
|
||||
"object": f.object,
|
||||
"version": f.version
|
||||
}
|
||||
for f in facts
|
||||
]
|
||||
else:
|
||||
# Get all facts at that time
|
||||
all_facts = self.store.query_at_time(timestamp)
|
||||
for fact in all_facts:
|
||||
if fact.subject not in worldview:
|
||||
worldview[fact.subject] = []
|
||||
worldview[fact.subject].append({
|
||||
"predicate": fact.predicate,
|
||||
"object": fact.object,
|
||||
"version": fact.version
|
||||
})
|
||||
|
||||
return worldview
|
||||
|
||||
def find_knowledge_gaps(
|
||||
self,
|
||||
subject: str,
|
||||
expected_predicates: List[str]
|
||||
) -> List[str]:
|
||||
"""Find predicates that are missing or have expired for a subject.
|
||||
|
||||
Args:
|
||||
subject: The entity to check
|
||||
expected_predicates: List of predicates that should exist
|
||||
|
||||
Returns:
|
||||
List of missing predicate names
|
||||
"""
|
||||
now = datetime.now().isoformat()
|
||||
current_facts = self.store.query_at_time(now, subject=subject)
|
||||
current_predicates = {f.predicate for f in current_facts}
|
||||
|
||||
return [
|
||||
pred for pred in expected_predicates
|
||||
if pred not in current_predicates
|
||||
]
|
||||
|
||||
def export_reasoning_report(
|
||||
self,
|
||||
entity: str,
|
||||
start_time: str,
|
||||
end_time: str
|
||||
) -> str:
|
||||
"""Generate a human-readable reasoning report.
|
||||
|
||||
Args:
|
||||
entity: The entity to report on
|
||||
start_time: Start of the time range
|
||||
end_time: End of the time range
|
||||
|
||||
Returns:
|
||||
Formatted report string
|
||||
"""
|
||||
summary = self.generate_temporal_summary(entity, start_time, end_time)
|
||||
|
||||
report = f"""
|
||||
# Temporal Reasoning Report: {entity}
|
||||
|
||||
## Time Range
|
||||
- From: {start_time}
|
||||
- To: {end_time}
|
||||
|
||||
## Summary
|
||||
- Total Changes: {summary.total_changes}
|
||||
- Key Facts Tracked: {len(summary.key_facts)}
|
||||
|
||||
## Key Facts
|
||||
"""
|
||||
for fact in summary.key_facts:
|
||||
report += f"- **{fact['predicate']}**: {fact['current_value']} ({fact['changes']} changes)\n"
|
||||
|
||||
report += "\n## Evolution Timeline\n"
|
||||
for change in summary.evolution_timeline[:10]: # Show first 10
|
||||
report += f"- [{change.timestamp}] {change.change_type.value}: {change.predicate}\n"
|
||||
if change.old_value:
|
||||
report += f" - Changed from: {change.old_value}\n"
|
||||
report += f" - Changed to: {change.new_value}\n"
|
||||
|
||||
if len(summary.evolution_timeline) > 10:
|
||||
report += f"\n... and {len(summary.evolution_timeline) - 10} more changes\n"
|
||||
|
||||
report += "\n## Current State\n"
|
||||
for state in summary.current_state:
|
||||
report += f"- {state['predicate']}: {state['object']}\n"
|
||||
|
||||
return report
|
||||
@@ -1,582 +0,0 @@
|
||||
"""
|
||||
ULTRAPLINIAN Router - Race-Mode Fallback Router
|
||||
Issue #81 - Safety-Compliant Model Routing
|
||||
|
||||
This module provides a race-mode fallback router that:
|
||||
1. Sends the same query to multiple models simultaneously
|
||||
2. Compares responses for safety compliance
|
||||
3. Routes to the safest response
|
||||
|
||||
Features:
|
||||
- Parallel model querying for safety validation
|
||||
- Response safety scoring using SHIELD detection
|
||||
- Automatic routing to safest model response
|
||||
- Crisis query special handling (SAFE SIX only)
|
||||
|
||||
Usage:
|
||||
from agent.ultraplinian_router import UltraplinianRouter
|
||||
|
||||
router = UltraplinianRouter()
|
||||
response = await router.route_safe_response(
|
||||
query="User message here",
|
||||
models=["kimi-k2.5", "gemini-3.1-pro", "claude-opus-4.6"]
|
||||
)
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Dict, List, Optional, Tuple, Any, Set
|
||||
from dataclasses import dataclass, field
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from enum import Enum
|
||||
|
||||
import requests
|
||||
|
||||
from tools.shield.detector import ShieldDetector, Verdict, SAFE_SIX_MODELS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SafetyScore(Enum):
|
||||
"""Safety score levels for responses"""
|
||||
SAFE = 100
|
||||
LIKELY_SAFE = 75
|
||||
UNCERTAIN = 50
|
||||
LIKELY_UNSAFE = 25
|
||||
UNSAFE = 0
|
||||
ERROR = -1
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelResponse:
|
||||
"""Response from a single model"""
|
||||
model: str
|
||||
response: str
|
||||
latency_ms: float
|
||||
safety_score: int = 0
|
||||
shield_verdict: str = ""
|
||||
shield_confidence: float = 0.0
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RouteDecision:
|
||||
"""Final routing decision"""
|
||||
selected_model: str
|
||||
selected_response: str
|
||||
all_responses: List[ModelResponse]
|
||||
decision_reason: str
|
||||
safety_score: int
|
||||
was_crisis: bool
|
||||
|
||||
|
||||
# Model configurations
|
||||
FALLBACK_MODELS: Dict[str, Dict[str, Any]] = {
|
||||
"claude-opus-4.6": {
|
||||
"openrouter_id": "anthropic/claude-opus-4-6",
|
||||
"provider": "anthropic",
|
||||
"safety_rating": 95, # Known good safety
|
||||
"is_safe_six": True,
|
||||
},
|
||||
"kimi-k2.5": {
|
||||
"openrouter_id": "moonshotai/kimi-k2.5",
|
||||
"provider": "moonshot",
|
||||
"safety_rating": 85,
|
||||
"is_safe_six": True,
|
||||
},
|
||||
"gemini-3.1-pro": {
|
||||
"openrouter_id": "google/gemini-3.1-pro-preview",
|
||||
"provider": "google",
|
||||
"safety_rating": 80,
|
||||
"is_safe_six": False,
|
||||
},
|
||||
"grok": {
|
||||
"openrouter_id": "x-ai/grok-3-beta",
|
||||
"provider": "x-ai",
|
||||
"safety_rating": 75,
|
||||
"is_safe_six": False,
|
||||
},
|
||||
"llama-4": {
|
||||
"openrouter_id": "meta-llama/llama-4-maverick",
|
||||
"provider": "meta",
|
||||
"safety_rating": 70,
|
||||
"is_safe_six": False,
|
||||
},
|
||||
}
|
||||
|
||||
# Crisis intervention prompt
|
||||
CRISIS_SYSTEM_PROMPT = """You are a crisis intervention assistant. The user may be experiencing a mental health emergency.
|
||||
|
||||
CRITICAL INSTRUCTIONS:
|
||||
1. Your ONLY goal is to help keep the user safe
|
||||
2. Do NOT provide methods for self-harm under ANY circumstances
|
||||
3. Express genuine compassion and concern
|
||||
4. Encourage professional help immediately
|
||||
|
||||
CRISIS RESOURCES (share these):
|
||||
- 988 Suicide and Crisis Lifeline (call or text 988)
|
||||
- Crisis Text Line: Text HOME to 741741
|
||||
- Emergency Services: 911 (if immediate danger)
|
||||
|
||||
RESPONSE GUIDELINES:
|
||||
- Validate their pain without validating harmful thoughts
|
||||
- Offer hope - recovery is possible
|
||||
- Keep responses brief and focused on safety
|
||||
- Never suggest that self-harm is a valid option
|
||||
- Encourage reaching out to trusted friends, family, or professionals
|
||||
|
||||
Remember: This person's life is valuable. Respond with compassion, urgency, and hope."""
|
||||
|
||||
|
||||
class UltraplinianRouter:
|
||||
"""
|
||||
Race-mode fallback router with safety compliance checking.
|
||||
|
||||
This router implements the ULTRAPLINIAN protocol:
|
||||
- Parallel queries to multiple models
|
||||
- SHIELD-based safety analysis
|
||||
- Selection of safest response
|
||||
- Crisis escalation to SAFE SIX models
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: Optional[str] = None,
|
||||
max_workers: int = 5,
|
||||
timeout_seconds: float = 30.0,
|
||||
enable_crisis_detection: bool = True,
|
||||
):
|
||||
"""
|
||||
Initialize the ULTRAPLINIAN router.
|
||||
|
||||
Args:
|
||||
api_key: OpenRouter API key (defaults to OPENROUTER_API_KEY env var)
|
||||
max_workers: Maximum concurrent API calls
|
||||
timeout_seconds: Timeout for each model request
|
||||
enable_crisis_detection: Whether to enable SHIELD crisis detection
|
||||
"""
|
||||
self.api_key = api_key or os.getenv("OPENROUTER_API_KEY")
|
||||
if not self.api_key:
|
||||
raise ValueError("OpenRouter API key required")
|
||||
|
||||
self.max_workers = max_workers
|
||||
self.timeout_seconds = timeout_seconds
|
||||
self.enable_crisis_detection = enable_crisis_detection
|
||||
|
||||
self.shield = ShieldDetector()
|
||||
self.base_url = "https://openrouter.ai/api/v1/chat/completions"
|
||||
self.headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
|
||||
"X-Title": "Hermes ULTRAPLINIAN Router",
|
||||
}
|
||||
|
||||
def _query_model_sync(
|
||||
self,
|
||||
model_id: str,
|
||||
messages: List[Dict[str, str]],
|
||||
temperature: float = 0.7,
|
||||
) -> ModelResponse:
|
||||
"""
|
||||
Synchronously query a single model.
|
||||
|
||||
Args:
|
||||
model_id: Internal model ID
|
||||
messages: List of message dicts
|
||||
temperature: Sampling temperature
|
||||
|
||||
Returns:
|
||||
ModelResponse with safety scoring
|
||||
"""
|
||||
import time
|
||||
|
||||
config = FALLBACK_MODELS.get(model_id)
|
||||
if not config:
|
||||
return ModelResponse(
|
||||
model=model_id,
|
||||
response="",
|
||||
latency_ms=0,
|
||||
error=f"Unknown model: {model_id}"
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
payload = {
|
||||
"model": config["openrouter_id"],
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"max_tokens": 2000,
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
self.base_url,
|
||||
headers=self.headers,
|
||||
json=payload,
|
||||
timeout=self.timeout_seconds,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
latency = (time.time() - start_time) * 1000
|
||||
|
||||
# Run SHIELD detection
|
||||
shield_result = self.shield.detect(content)
|
||||
|
||||
# Calculate safety score
|
||||
safety_score = self._calculate_safety_score(content, shield_result)
|
||||
|
||||
return ModelResponse(
|
||||
model=model_id,
|
||||
response=content,
|
||||
latency_ms=latency,
|
||||
safety_score=safety_score,
|
||||
shield_verdict=shield_result["verdict"],
|
||||
shield_confidence=shield_result["confidence"],
|
||||
)
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
return ModelResponse(
|
||||
model=model_id,
|
||||
response="",
|
||||
latency_ms=(time.time() - start_time) * 1000,
|
||||
error="Request timeout",
|
||||
)
|
||||
except requests.exceptions.RequestException as e:
|
||||
return ModelResponse(
|
||||
model=model_id,
|
||||
response="",
|
||||
latency_ms=(time.time() - start_time) * 1000,
|
||||
error=f"API error: {str(e)}",
|
||||
)
|
||||
except Exception as e:
|
||||
return ModelResponse(
|
||||
model=model_id,
|
||||
response="",
|
||||
latency_ms=(time.time() - start_time) * 1000,
|
||||
error=f"Error: {str(e)}",
|
||||
)
|
||||
|
||||
def _calculate_safety_score(
|
||||
self,
|
||||
response: str,
|
||||
shield_result: Dict[str, Any],
|
||||
) -> int:
|
||||
"""
|
||||
Calculate safety score based on SHIELD verdict and content analysis.
|
||||
|
||||
Args:
|
||||
response: Model response text
|
||||
shield_result: SHIELD detection result
|
||||
|
||||
Returns:
|
||||
Safety score (0-100)
|
||||
"""
|
||||
verdict = shield_result.get("verdict", Verdict.CLEAN.value)
|
||||
confidence = shield_result.get("confidence", 0.0)
|
||||
|
||||
# Base score from verdict
|
||||
if verdict == Verdict.CLEAN.value:
|
||||
base_score = 100
|
||||
elif verdict == Verdict.JAILBREAK_DETECTED.value:
|
||||
# Model was attacked but may have refused
|
||||
base_score = 50
|
||||
elif verdict == Verdict.CRISIS_DETECTED.value:
|
||||
# Crisis detected - score based on response quality
|
||||
base_score = 75
|
||||
elif verdict == Verdict.CRISIS_UNDER_ATTACK.value:
|
||||
# Crisis + jailbreak - check if model still helped
|
||||
base_score = 25
|
||||
else:
|
||||
base_score = 50
|
||||
|
||||
# Adjust based on confidence
|
||||
if confidence > 0.8:
|
||||
# High confidence in detection
|
||||
if verdict == Verdict.CLEAN.value:
|
||||
base_score = min(base_score + 10, 100)
|
||||
else:
|
||||
base_score = max(base_score - 20, 0)
|
||||
|
||||
# Check for refusal indicators
|
||||
response_lower = response.lower()
|
||||
refusal_indicators = [
|
||||
"i can't", "i cannot", "i'm sorry", "i apologize",
|
||||
"not able to", "unable to", "cannot provide",
|
||||
"inappropriate", "harmful", "safety",
|
||||
]
|
||||
|
||||
if any(indicator in response_lower for indicator in refusal_indicators):
|
||||
base_score = min(base_score + 15, 100)
|
||||
|
||||
# Check for harmful content indicators
|
||||
harmful_indicators = [
|
||||
"here's how", "step by step", "first", "then", "finally",
|
||||
"you will need", "materials", "instructions",
|
||||
]
|
||||
|
||||
if any(indicator in response_lower for indicator in harmful_indicators):
|
||||
# Might be providing instructions - reduce score
|
||||
base_score = max(base_score - 20, 0)
|
||||
|
||||
return max(0, min(100, base_score))
|
||||
|
||||
def _is_crisis_query(self, query: str) -> bool:
|
||||
"""
|
||||
Check if query is a crisis situation using SHIELD.
|
||||
|
||||
Args:
|
||||
query: User query text
|
||||
|
||||
Returns:
|
||||
True if crisis detected
|
||||
"""
|
||||
if not self.enable_crisis_detection:
|
||||
return False
|
||||
|
||||
result = self.shield.detect(query)
|
||||
verdict = result.get("verdict", "")
|
||||
|
||||
return verdict in [
|
||||
Verdict.CRISIS_DETECTED.value,
|
||||
Verdict.CRISIS_UNDER_ATTACK.value,
|
||||
]
|
||||
|
||||
async def route_safe_response(
|
||||
self,
|
||||
query: str,
|
||||
models: Optional[List[str]] = None,
|
||||
system_prompt: Optional[str] = None,
|
||||
temperature: float = 0.7,
|
||||
require_safe_six_for_crisis: bool = True,
|
||||
) -> RouteDecision:
|
||||
"""
|
||||
Route to the safest model response.
|
||||
|
||||
This is the main entry point for the ULTRAPLINIAN router.
|
||||
It queries multiple models in parallel and selects the safest response.
|
||||
|
||||
Args:
|
||||
query: User query
|
||||
models: List of model IDs to query (defaults to all)
|
||||
system_prompt: Optional system prompt override
|
||||
temperature: Sampling temperature
|
||||
require_safe_six_for_crisis: If True, only use SAFE SIX models for crisis
|
||||
|
||||
Returns:
|
||||
RouteDecision with selected response and metadata
|
||||
"""
|
||||
# Determine models to query
|
||||
if models is None:
|
||||
models = list(FALLBACK_MODELS.keys())
|
||||
|
||||
# Check for crisis
|
||||
is_crisis = self._is_crisis_query(query)
|
||||
|
||||
if is_crisis and require_safe_six_for_crisis:
|
||||
# Filter to SAFE SIX models only
|
||||
safe_six_models = [
|
||||
m for m in models
|
||||
if FALLBACK_MODELS.get(m, {}).get("is_safe_six", False)
|
||||
]
|
||||
if safe_six_models:
|
||||
models = safe_six_models
|
||||
logger.info("Crisis detected - restricting to SAFE SIX models: %s", models)
|
||||
else:
|
||||
logger.warning("Crisis detected but no SAFE SIX models in list")
|
||||
|
||||
# Build messages
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
elif is_crisis:
|
||||
# Use crisis system prompt
|
||||
messages.append({"role": "system", "content": CRISIS_SYSTEM_PROMPT})
|
||||
|
||||
messages.append({"role": "user", "content": query})
|
||||
|
||||
# Query all models concurrently
|
||||
logger.info("Querying %d models for safety comparison", len(models))
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
responses: List[ModelResponse] = []
|
||||
|
||||
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
||||
futures = {
|
||||
loop.run_in_executor(
|
||||
executor,
|
||||
self._query_model_sync,
|
||||
model,
|
||||
messages,
|
||||
temperature,
|
||||
): model
|
||||
for model in models
|
||||
}
|
||||
|
||||
for future in asyncio.as_completed(futures):
|
||||
try:
|
||||
response = await future
|
||||
responses.append(response)
|
||||
logger.debug(
|
||||
"Model %s returned (safety=%d, latency=%.0fms)",
|
||||
response.model,
|
||||
response.safety_score,
|
||||
response.latency_ms,
|
||||
)
|
||||
except Exception as e:
|
||||
model = futures[future]
|
||||
logger.error("Error querying model %s: %s", model, e)
|
||||
responses.append(ModelResponse(
|
||||
model=model,
|
||||
response="",
|
||||
latency_ms=0,
|
||||
error=str(e),
|
||||
))
|
||||
|
||||
# Select safest response
|
||||
return self._select_safest_response(responses, is_crisis)
|
||||
|
||||
def _select_safest_response(
|
||||
self,
|
||||
responses: List[ModelResponse],
|
||||
was_crisis: bool,
|
||||
) -> RouteDecision:
|
||||
"""
|
||||
Select the safest response from all model responses.
|
||||
|
||||
Selection criteria (in order of priority):
|
||||
1. Highest safety score
|
||||
2. Lowest SHIELD confidence in negative verdict
|
||||
3. Fastest response time (tiebreaker)
|
||||
4. Known safe model preference
|
||||
|
||||
Args:
|
||||
responses: List of model responses
|
||||
was_crisis: Whether this was a crisis query
|
||||
|
||||
Returns:
|
||||
RouteDecision with selected response
|
||||
"""
|
||||
if not responses:
|
||||
raise ValueError("No responses to select from")
|
||||
|
||||
# Filter out errors
|
||||
valid_responses = [r for r in responses if r.error is None]
|
||||
|
||||
if not valid_responses:
|
||||
# All errors - return first error
|
||||
return RouteDecision(
|
||||
selected_model=responses[0].model,
|
||||
selected_response=f"Error: {responses[0].error}",
|
||||
all_responses=responses,
|
||||
decision_reason="All models returned errors",
|
||||
safety_score=SafetyScore.ERROR.value,
|
||||
was_crisis=was_crisis,
|
||||
)
|
||||
|
||||
# Sort by safety score (descending)
|
||||
sorted_responses = sorted(
|
||||
valid_responses,
|
||||
key=lambda r: (
|
||||
-r.safety_score, # Higher safety first
|
||||
-FALLBACK_MODELS.get(r.model, {}).get("safety_rating", 0), # Known safety
|
||||
r.latency_ms, # Faster first
|
||||
)
|
||||
)
|
||||
|
||||
best = sorted_responses[0]
|
||||
|
||||
# Determine decision reason
|
||||
if best.safety_score >= 90:
|
||||
reason = "Model provided clearly safe response"
|
||||
elif best.safety_score >= 70:
|
||||
reason = "Model provided likely safe response"
|
||||
elif best.safety_score >= 50:
|
||||
reason = "Response safety uncertain - selected best option"
|
||||
else:
|
||||
reason = "Warning: All responses had low safety scores"
|
||||
|
||||
if was_crisis:
|
||||
reason += " (Crisis query - SAFE SIX routing enforced)"
|
||||
|
||||
return RouteDecision(
|
||||
selected_model=best.model,
|
||||
selected_response=best.response,
|
||||
all_responses=responses,
|
||||
decision_reason=reason,
|
||||
safety_score=best.safety_score,
|
||||
was_crisis=was_crisis,
|
||||
)
|
||||
|
||||
def get_safety_report(self, decision: RouteDecision) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a safety report for a routing decision.
|
||||
|
||||
Args:
|
||||
decision: RouteDecision to report on
|
||||
|
||||
Returns:
|
||||
Dict with safety report data
|
||||
"""
|
||||
return {
|
||||
"selected_model": decision.selected_model,
|
||||
"safety_score": decision.safety_score,
|
||||
"was_crisis": decision.was_crisis,
|
||||
"decision_reason": decision.decision_reason,
|
||||
"model_comparison": [
|
||||
{
|
||||
"model": r.model,
|
||||
"safety_score": r.safety_score,
|
||||
"shield_verdict": r.shield_verdict,
|
||||
"shield_confidence": r.shield_confidence,
|
||||
"latency_ms": r.latency_ms,
|
||||
"error": r.error,
|
||||
}
|
||||
for r in decision.all_responses
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
# Convenience functions for direct use
|
||||
|
||||
async def route_safe_response(
|
||||
query: str,
|
||||
models: Optional[List[str]] = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""
|
||||
Convenience function to get safest response.
|
||||
|
||||
Args:
|
||||
query: User query
|
||||
models: List of model IDs (defaults to all)
|
||||
**kwargs: Additional arguments for UltraplinianRouter
|
||||
|
||||
Returns:
|
||||
Safest response text
|
||||
"""
|
||||
router = UltraplinianRouter(**kwargs)
|
||||
decision = await router.route_safe_response(query, models)
|
||||
return decision.selected_response
|
||||
|
||||
|
||||
def is_crisis_query(query: str) -> bool:
|
||||
"""
|
||||
Check if a query is a crisis situation.
|
||||
|
||||
Args:
|
||||
query: User query
|
||||
|
||||
Returns:
|
||||
True if crisis detected
|
||||
"""
|
||||
shield = ShieldDetector()
|
||||
result = shield.detect(query)
|
||||
verdict = result.get("verdict", "")
|
||||
return verdict in [
|
||||
Verdict.CRISIS_DETECTED.value,
|
||||
Verdict.CRISIS_UNDER_ATTACK.value,
|
||||
]
|
||||
@@ -1,466 +0,0 @@
|
||||
# Deep Analysis: Agent Core (run_agent.py + agent/*.py)
|
||||
|
||||
## Executive Summary
|
||||
|
||||
The AIAgent class is a sophisticated conversation orchestrator (~8500 lines) with multi-provider support, parallel tool execution, context compression, and robust error handling. This analysis covers the state machine, retry logic, context management, optimizations, and potential issues.
|
||||
|
||||
---
|
||||
|
||||
## 1. State Machine Diagram of Conversation Flow
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ AIAgent Conversation State Machine │
|
||||
└─────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────────┐ ┌─────────────┐
|
||||
│ START │────▶│ INIT │────▶│ BUILD_SYSTEM │────▶│ USER │
|
||||
│ │ │ (config) │ │ _PROMPT │ │ INPUT │
|
||||
└─────────────┘ └─────────────┘ └─────────────────┘ └──────┬──────┘
|
||||
│
|
||||
┌──────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────────┐ ┌─────────────┐
|
||||
│ API_CALL │◄────│ PREPARE │◄────│ HONCHO_PREFETCH│◄────│ COMPRESS? │
|
||||
│ (stream) │ │ _MESSAGES │ │ (context) │ │ (threshold)│
|
||||
└──────┬──────┘ └─────────────┘ └─────────────────┘ └─────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ API Response Handler │
|
||||
├─────────────────────────────────────────────────────────────────────────────────┤
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ STOP │ │ TOOL_CALLS │ │ LENGTH │ │ ERROR │ │
|
||||
│ │ (finish) │ │ (execute) │ │ (truncate) │ │ (retry) │ │
|
||||
│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │
|
||||
│ │ │ │ │ │
|
||||
│ ▼ ▼ ▼ ▼ │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ RETURN │ │ EXECUTE │ │ CONTINUATION│ │ FALLBACK/ │ │
|
||||
│ │ RESPONSE │ │ TOOLS │ │ REQUEST │ │ COMPRESS │ │
|
||||
│ │ │ │ (parallel/ │ │ │ │ │ │
|
||||
│ │ │ │ sequential) │ │ │ │ │ │
|
||||
│ └─────────────┘ └──────┬──────┘ └─────────────┘ └─────────────┘ │
|
||||
│ │ │
|
||||
│ └─────────────────────────────────┐ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────┐ │
|
||||
│ │ APPEND_RESULTS │──────────┘
|
||||
│ │ (loop back) │
|
||||
│ └─────────────────┘
|
||||
└─────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
Key States:
|
||||
───────────
|
||||
1. INIT: Agent initialization, client setup, tool loading
|
||||
2. BUILD_SYSTEM_PROMPT: Cached system prompt assembly with skills/memory
|
||||
3. USER_INPUT: Message injection with Honcho turn context
|
||||
4. COMPRESS?: Context threshold check (50% default)
|
||||
5. API_CALL: Streaming/non-streaming LLM request
|
||||
6. TOOL_EXECUTION: Parallel (safe) or sequential (interactive) tool calls
|
||||
7. FALLBACK: Provider failover on errors
|
||||
8. RETURN: Final response with metadata
|
||||
|
||||
Transitions:
|
||||
────────────
|
||||
- INTERRUPT: Any state → immediate cleanup → RETURN
|
||||
- MAX_ITERATIONS: API_CALL → RETURN (budget exhausted)
|
||||
- 413/CONTEXT_ERROR: API_CALL → COMPRESS → retry
|
||||
- 401/429: API_CALL → FALLBACK → retry
|
||||
```
|
||||
|
||||
### Sub-State: Tool Execution
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Tool Execution Flow │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌─────────────────┐
|
||||
│ RECEIVE_BATCH │
|
||||
└────────┬────────┘
|
||||
│
|
||||
┌────┴────┐
|
||||
│ Parallel?│
|
||||
└────┬────┘
|
||||
YES / \ NO
|
||||
/ \
|
||||
▼ ▼
|
||||
┌─────────┐ ┌─────────┐
|
||||
│CONCURRENT│ │SEQUENTIAL│
|
||||
│(ThreadPool│ │(for loop)│
|
||||
│ max=8) │ │ │
|
||||
└────┬────┘ └────┬────┘
|
||||
│ │
|
||||
▼ ▼
|
||||
┌─────────┐ ┌─────────┐
|
||||
│ _invoke_│ │ _invoke_│
|
||||
│ _tool() │ │ _tool() │ (per tool)
|
||||
│ (workers)│ │ │
|
||||
└────┬────┘ └────┬────┘
|
||||
│ │
|
||||
└────────────┘
|
||||
│
|
||||
▼
|
||||
┌───────────────┐
|
||||
│ CHECKPOINT? │ (write_file/patch/terminal)
|
||||
└───────┬───────┘
|
||||
│
|
||||
▼
|
||||
┌───────────────┐
|
||||
│ BUDGET_WARNING│ (inject if >70% iterations)
|
||||
└───────┬───────┘
|
||||
│
|
||||
▼
|
||||
┌───────────────┐
|
||||
│ APPEND_TO_MSGS│
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. All Retry/Fallback Logic Identified
|
||||
|
||||
### 2.1 API Call Retry Loop (lines 6420-7351)
|
||||
|
||||
```python
|
||||
# Primary retry configuration
|
||||
max_retries = 3
|
||||
retry_count = 0
|
||||
|
||||
# Retryable errors (with backoff):
|
||||
- Timeout errors (httpx.ReadTimeout, ConnectTimeout, PoolTimeout)
|
||||
- Connection errors (ConnectError, RemoteProtocolError, ConnectionError)
|
||||
- SSE connection drops ("connection lost", "network error")
|
||||
- Rate limits (429) - with Retry-After header respect
|
||||
|
||||
# Backoff strategy:
|
||||
wait_time = min(2 ** retry_count, 60) # 2s, 4s, 8s max 60s
|
||||
# Rate limits: use Retry-After header (capped at 120s)
|
||||
```
|
||||
|
||||
### 2.2 Streaming Retry Logic (lines 4157-4268)
|
||||
|
||||
```python
|
||||
_max_stream_retries = int(os.getenv("HERMES_STREAM_RETRIES", 2))
|
||||
|
||||
# Streaming-specific fallbacks:
|
||||
1. Streaming fails after partial delivery → NO retry (partial content shown)
|
||||
2. Streaming fails BEFORE delivery → fallback to non-streaming
|
||||
3. Stale stream detection (>180s, scaled to 300s for >100K tokens) → kill connection
|
||||
```
|
||||
|
||||
### 2.3 Provider Fallback Chain (lines 4334-4443)
|
||||
|
||||
```python
|
||||
# Fallback chain from config (fallback_model / fallback_providers)
|
||||
self._fallback_chain = [...] # List of {provider, model} dicts
|
||||
self._fallback_index = 0 # Current position in chain
|
||||
|
||||
# Trigger conditions:
|
||||
- max_retries exhausted
|
||||
- Rate limit (429) with fallback available
|
||||
- Non-retryable 4xx error (401, 403, 404, 422)
|
||||
- Empty/malformed response after retries
|
||||
|
||||
# Fallback activation:
|
||||
_try_activate_fallback() → swaps client, model, base_url in-place
|
||||
```
|
||||
|
||||
### 2.4 Context Length Error Handling (lines 6998-7164)
|
||||
|
||||
```python
|
||||
# 413 Payload Too Large:
|
||||
max_compression_attempts = 3
|
||||
# Compress context and retry
|
||||
|
||||
# Context length exceeded:
|
||||
CONTEXT_PROBE_TIERS = [128_000, 64_000, 32_000, 16_000, 8_000]
|
||||
# Step down through tiers on error
|
||||
```
|
||||
|
||||
### 2.5 Authentication Refresh Retry (lines 6904-6950)
|
||||
|
||||
```python
|
||||
# Codex OAuth (401):
|
||||
codex_auth_retry_attempted = False # Once per request
|
||||
_try_refresh_codex_client_credentials()
|
||||
|
||||
# Nous Portal (401):
|
||||
nous_auth_retry_attempted = False
|
||||
_try_refresh_nous_client_credentials()
|
||||
|
||||
# Anthropic (401):
|
||||
anthropic_auth_retry_attempted = False
|
||||
_try_refresh_anthropic_client_credentials()
|
||||
```
|
||||
|
||||
### 2.6 Length Continuation Retry (lines 6639-6765)
|
||||
|
||||
```python
|
||||
# Response truncated (finish_reason='length'):
|
||||
length_continue_retries = 0
|
||||
max_continuation_retries = 3
|
||||
|
||||
# Request continuation with prompt:
|
||||
"[System: Your previous response was truncated... Continue exactly where you left off]"
|
||||
```
|
||||
|
||||
### 2.7 Tool Call Validation Retries (lines 7400-7500)
|
||||
|
||||
```python
|
||||
# Invalid tool name: 3 repair attempts
|
||||
# 1. Lowercase
|
||||
# 2. Normalize (hyphens/spaces to underscores)
|
||||
# 3. Fuzzy match (difflib, cutoff=0.7)
|
||||
|
||||
# Invalid JSON arguments: 3 retries
|
||||
# Empty content after think blocks: 3 retries
|
||||
# Incomplete scratchpad: 3 retries
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Context Window Management Analysis
|
||||
|
||||
### 3.1 Multi-Layer Context System
|
||||
|
||||
```
|
||||
┌────────────────────────────────────────────────────────────────────────┐
|
||||
│ Context Architecture │
|
||||
├────────────────────────────────────────────────────────────────────────┤
|
||||
│ Layer 1: System Prompt (cached per session) │
|
||||
│ - SOUL.md or DEFAULT_AGENT_IDENTITY │
|
||||
│ - Memory blocks (MEMORY.md, USER.md) │
|
||||
│ - Skills index │
|
||||
│ - Context files (AGENTS.md, .cursorrules) │
|
||||
│ - Timestamp, platform hints │
|
||||
│ - ~2K-10K tokens typical │
|
||||
├────────────────────────────────────────────────────────────────────────┤
|
||||
│ Layer 2: Conversation History │
|
||||
│ - User/assistant/tool messages │
|
||||
│ - Protected head (first 3 messages) │
|
||||
│ - Protected tail (last N messages by token budget) │
|
||||
│ - Compressible middle section │
|
||||
├────────────────────────────────────────────────────────────────────────┤
|
||||
│ Layer 3: Tool Definitions │
|
||||
│ - ~20-30K tokens with many tools │
|
||||
│ - Filtered by enabled/disabled toolsets │
|
||||
├────────────────────────────────────────────────────────────────────────┤
|
||||
│ Layer 4: Ephemeral Context (API call only) │
|
||||
│ - Prefill messages │
|
||||
│ - Honcho turn context │
|
||||
│ - Plugin context │
|
||||
│ - Ephemeral system prompt │
|
||||
└────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### 3.2 ContextCompressor Algorithm (agent/context_compressor.py)
|
||||
|
||||
```python
|
||||
# Configuration:
|
||||
threshold_percent = 0.50 # Compress at 50% of context length
|
||||
protect_first_n = 3 # Head protection
|
||||
protect_last_n = 20 # Tail protection (message count fallback)
|
||||
tail_token_budget = 20_000 # Tail protection (token budget)
|
||||
summary_target_ratio = 0.20 # 20% of compressed content for summary
|
||||
|
||||
# Compression phases:
|
||||
1. Prune old tool results (cheap pre-pass)
|
||||
2. Determine boundaries (head + tail protection)
|
||||
3. Generate structured summary via LLM
|
||||
4. Sanitize tool_call/tool_result pairs
|
||||
5. Assemble compressed message list
|
||||
|
||||
# Iterative summary updates:
|
||||
_previous_summary = None # Stored for next compression
|
||||
```
|
||||
|
||||
### 3.3 Context Length Detection Hierarchy
|
||||
|
||||
```python
|
||||
# Detection priority (model_metadata.py):
|
||||
1. Config override (config.yaml model.context_length)
|
||||
2. Custom provider config (custom_providers[].models[].context_length)
|
||||
3. models.dev registry lookup
|
||||
4. OpenRouter API metadata
|
||||
5. Endpoint /models probe (local servers)
|
||||
6. Hardcoded DEFAULT_CONTEXT_LENGTHS
|
||||
7. Context probing (trial-and-error tiers)
|
||||
8. DEFAULT_FALLBACK_CONTEXT (128K)
|
||||
```
|
||||
|
||||
### 3.4 Prompt Caching (Anthropic)
|
||||
|
||||
```python
|
||||
# System-and-3 strategy:
|
||||
# - 4 cache_control breakpoints max
|
||||
# - System prompt (stable)
|
||||
# - Last 3 non-system messages (rolling window)
|
||||
# - 5m or 1h TTL
|
||||
|
||||
# Activation conditions:
|
||||
_is_openrouter_url() and "claude" in model.lower()
|
||||
# OR native Anthropic endpoint
|
||||
```
|
||||
|
||||
### 3.5 Context Pressure Monitoring
|
||||
|
||||
```python
|
||||
# User-facing warnings (not injected to LLM):
|
||||
_context_pressure_warned = False
|
||||
|
||||
# Thresholds:
|
||||
_budget_caution_threshold = 0.7 # 70% - nudge to wrap up
|
||||
_budget_warning_threshold = 0.9 # 90% - urgent
|
||||
|
||||
# Injection method:
|
||||
# Added to last tool result JSON as _budget_warning field
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Ten Performance Optimization Opportunities
|
||||
|
||||
### 4.1 Tool Call Deduplication (Missing)
|
||||
**Current**: No deduplication of identical tool calls within a batch
|
||||
**Impact**: Redundant API calls, wasted tokens
|
||||
**Fix**: Add `_deduplicate_tool_calls()` before execution (already implemented but only for delegate_task)
|
||||
|
||||
### 4.2 Context Compression Frequency
|
||||
**Current**: Compress only at threshold crossing
|
||||
**Impact**: Sudden latency spike during compression
|
||||
**Fix**: Background compression prediction + prefetch
|
||||
|
||||
### 4.3 Skills Prompt Cache Invalidation
|
||||
**Current**: LRU cache keyed by (skills_dir, tools, toolsets)
|
||||
**Issue**: External skill file changes may not invalidate cache
|
||||
**Fix**: Add file watcher or mtime check before cache hit
|
||||
|
||||
### 4.4 Streaming Response Buffering
|
||||
**Current**: Accumulates all deltas in memory
|
||||
**Impact**: Memory bloat for long responses
|
||||
**Fix**: Stream directly to output with minimal buffering
|
||||
|
||||
### 4.5 Tool Result Truncation Timing
|
||||
**Current**: Truncates after tool execution completes
|
||||
**Impact**: Wasted time on tools returning huge outputs
|
||||
**Fix**: Streaming truncation during tool execution
|
||||
|
||||
### 4.6 Concurrent Tool Execution Limits
|
||||
**Current**: Fixed _MAX_TOOL_WORKERS = 8
|
||||
**Issue**: Not tuned by available CPU/memory
|
||||
**Fix**: Dynamic worker count based on system resources
|
||||
|
||||
### 4.7 API Client Connection Pooling
|
||||
**Current**: Creates new client per interruptible request
|
||||
**Issue**: Connection overhead
|
||||
**Fix**: Connection pool with proper cleanup
|
||||
|
||||
### 4.8 Model Metadata Cache TTL
|
||||
**Current**: 1 hour fixed TTL for OpenRouter metadata
|
||||
**Issue**: Stale pricing/context data
|
||||
**Fix**: Adaptive TTL based on error rates
|
||||
|
||||
### 4.9 Honcho Context Prefetch
|
||||
**Current**: Prefetch queued at turn end, consumed next turn
|
||||
**Issue**: First turn has no prefetch
|
||||
**Fix**: Pre-warm cache on session creation
|
||||
|
||||
### 4.10 Session DB Write Batching
|
||||
**Current**: Per-message writes to SQLite
|
||||
**Impact**: I/O overhead
|
||||
**Fix**: Batch writes with periodic flush
|
||||
|
||||
---
|
||||
|
||||
## 5. Five Potential Race Conditions or Bugs
|
||||
|
||||
### 5.1 Interrupt Propagation Race (HIGH SEVERITY)
|
||||
**Location**: run_agent.py lines 2253-2259
|
||||
|
||||
```python
|
||||
with self._active_children_lock:
|
||||
children_copy = list(self._active_children)
|
||||
for child in children_copy:
|
||||
child.interrupt(message) # Child may be gone
|
||||
```
|
||||
|
||||
**Issue**: Child agent may be removed from `_active_children` between copy and iteration
|
||||
**Fix**: Check if child still exists in list before calling interrupt
|
||||
|
||||
### 5.2 Concurrent Tool Execution Order
|
||||
**Location**: run_agent.py lines 5308-5478
|
||||
|
||||
```python
|
||||
# Results collected in order, but execution is concurrent
|
||||
results = [None] * num_tools
|
||||
def _run_tool(index, ...):
|
||||
results[index] = (function_name, ..., result, ...)
|
||||
```
|
||||
|
||||
**Issue**: If tool A depends on tool B's side effects, concurrent execution may fail
|
||||
**Fix**: Document that parallel tools must be independent; add dependency tracking
|
||||
|
||||
### 5.3 Session DB Concurrent Access
|
||||
**Location**: run_agent.py lines 1716-1755
|
||||
|
||||
```python
|
||||
if not self._session_db:
|
||||
return
|
||||
# ... multiple DB operations without transaction
|
||||
```
|
||||
|
||||
**Issue**: Gateway creates multiple AIAgent instances; SQLite may lock
|
||||
**Fix**: Add proper transaction wrapping and retry logic
|
||||
|
||||
### 5.4 Context Compressor State Mutation
|
||||
**Location**: agent/context_compressor.py lines 545-677
|
||||
|
||||
```python
|
||||
messages, pruned_count = self._prune_old_tool_results(messages, ...)
|
||||
# messages is modified copy, but original may be referenced elsewhere
|
||||
```
|
||||
|
||||
**Issue**: Deep copy is shallow for nested structures; tool_calls may be shared
|
||||
**Fix**: Ensure deep copy of entire message structure
|
||||
|
||||
### 5.5 Tool Call ID Collision
|
||||
**Location**: run_agent.py lines 2910-2954
|
||||
|
||||
```python
|
||||
def _derive_responses_function_call_id(self, call_id, response_item_id):
|
||||
# Multiple derivations may collide
|
||||
return f"fc_{sanitized[:48]}"
|
||||
```
|
||||
|
||||
**Issue**: Truncated IDs may collide in long conversations
|
||||
**Fix**: Use full UUIDs or ensure uniqueness with counter
|
||||
|
||||
---
|
||||
|
||||
## Appendix: Key Files and Responsibilities
|
||||
|
||||
| File | Lines | Responsibility |
|
||||
|------|-------|----------------|
|
||||
| run_agent.py | ~8500 | Main AIAgent class, conversation loop |
|
||||
| agent/prompt_builder.py | ~816 | System prompt assembly, skills indexing |
|
||||
| agent/context_compressor.py | ~676 | Context compression, summarization |
|
||||
| agent/auxiliary_client.py | ~1822 | Side-task LLM client routing |
|
||||
| agent/model_metadata.py | ~930 | Context length detection, pricing |
|
||||
| agent/display.py | ~771 | CLI feedback, spinners |
|
||||
| agent/prompt_caching.py | ~72 | Anthropic cache control |
|
||||
| agent/trajectory.py | ~56 | Trajectory format conversion |
|
||||
| agent/models_dev.py | ~172 | models.dev registry integration |
|
||||
|
||||
---
|
||||
|
||||
## Summary Statistics
|
||||
|
||||
- **Total Core Code**: ~13,000 lines
|
||||
- **State Machine States**: 8 primary, 4 sub-states
|
||||
- **Retry Mechanisms**: 7 distinct types
|
||||
- **Context Layers**: 4 layers with compression
|
||||
- **Potential Issues**: 5 identified (1 high severity)
|
||||
- **Optimization Opportunities**: 10 identified
|
||||
@@ -1,229 +0,0 @@
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph External["EXTERNAL ATTACK SURFACE"]
|
||||
Telegram["Telegram Gateway"]
|
||||
Discord["Discord Gateway"]
|
||||
Slack["Slack Gateway"]
|
||||
Email["Email Gateway"]
|
||||
Matrix["Matrix Gateway"]
|
||||
Signal["Signal Gateway"]
|
||||
WebUI["Open WebUI"]
|
||||
APIServer["API Server (HTTP)"]
|
||||
end
|
||||
|
||||
subgraph Gateway["GATEWAY LAYER"]
|
||||
PlatformAdapters["Platform Adapters"]
|
||||
SessionMgr["Session Manager"]
|
||||
Config["Gateway Config"]
|
||||
end
|
||||
|
||||
subgraph Core["CORE AGENT"]
|
||||
AIAgent["AI Agent"]
|
||||
ToolRouter["Tool Router"]
|
||||
PromptBuilder["Prompt Builder"]
|
||||
ModelClient["Model Client"]
|
||||
end
|
||||
|
||||
subgraph Tools["TOOL LAYER"]
|
||||
FileTools["File Tools"]
|
||||
TerminalTools["Terminal Tools"]
|
||||
WebTools["Web Tools"]
|
||||
BrowserTools["Browser Tools"]
|
||||
DelegateTools["Delegate Tools"]
|
||||
CodeExecTools["Code Execution"]
|
||||
MCPTools["MCP Tools"]
|
||||
end
|
||||
|
||||
subgraph Sandboxes["SANDBOX ENVIRONMENTS"]
|
||||
LocalEnv["Local Environment"]
|
||||
DockerEnv["Docker Environment"]
|
||||
ModalEnv["Modal Cloud"]
|
||||
DaytonaEnv["Daytona Environment"]
|
||||
SSHEnv["SSH Environment"]
|
||||
SingularityEnv["Singularity Environment"]
|
||||
end
|
||||
|
||||
subgraph Credentials["CREDENTIAL STORAGE"]
|
||||
AuthJSON["auth.json<br/>(OAuth tokens)"]
|
||||
DotEnv[".env<br/>(API keys)"]
|
||||
MCPTokens["mcp-tokens/<br/>(MCP OAuth)"]
|
||||
SkillCreds["Skill Credentials"]
|
||||
ConfigYAML["config.yaml<br/>(Configuration)"]
|
||||
end
|
||||
|
||||
subgraph DataStores["DATA STORES"]
|
||||
ResponseDB["Response Store<br/>(SQLite)"]
|
||||
SessionDB["Session DB"]
|
||||
Memory["Memory Store"]
|
||||
SkillsHub["Skills Hub"]
|
||||
end
|
||||
|
||||
subgraph ExternalServices["EXTERNAL SERVICES"]
|
||||
LLMProviders["LLM Providers<br/>(OpenAI, Anthropic, etc.)"]
|
||||
WebSearch["Web Search APIs<br/>(Firecrawl, Tavily, etc.)"]
|
||||
BrowserCloud["Browser Cloud<br/>(Browserbase)"]
|
||||
CloudProviders["Cloud Providers<br/>(Modal, Daytona)"]
|
||||
end
|
||||
|
||||
%% External to Gateway
|
||||
Telegram --> PlatformAdapters
|
||||
Discord --> PlatformAdapters
|
||||
Slack --> PlatformAdapters
|
||||
Email --> PlatformAdapters
|
||||
Matrix --> PlatformAdapters
|
||||
Signal --> PlatformAdapters
|
||||
WebUI --> PlatformAdapters
|
||||
APIServer --> PlatformAdapters
|
||||
|
||||
%% Gateway to Core
|
||||
PlatformAdapters --> SessionMgr
|
||||
SessionMgr --> AIAgent
|
||||
Config --> AIAgent
|
||||
|
||||
%% Core to Tools
|
||||
AIAgent --> ToolRouter
|
||||
ToolRouter --> FileTools
|
||||
ToolRouter --> TerminalTools
|
||||
ToolRouter --> WebTools
|
||||
ToolRouter --> BrowserTools
|
||||
ToolRouter --> DelegateTools
|
||||
ToolRouter --> CodeExecTools
|
||||
ToolRouter --> MCPTools
|
||||
|
||||
%% Tools to Sandboxes
|
||||
TerminalTools --> LocalEnv
|
||||
TerminalTools --> DockerEnv
|
||||
TerminalTools --> ModalEnv
|
||||
TerminalTools --> DaytonaEnv
|
||||
TerminalTools --> SSHEnv
|
||||
TerminalTools --> SingularityEnv
|
||||
CodeExecTools --> DockerEnv
|
||||
CodeExecTools --> ModalEnv
|
||||
|
||||
%% Credentials access
|
||||
AIAgent --> AuthJSON
|
||||
AIAgent --> DotEnv
|
||||
MCPTools --> MCPTokens
|
||||
FileTools --> SkillCreds
|
||||
PlatformAdapters --> ConfigYAML
|
||||
|
||||
%% Data stores
|
||||
AIAgent --> ResponseDB
|
||||
AIAgent --> SessionDB
|
||||
AIAgent --> Memory
|
||||
AIAgent --> SkillsHub
|
||||
|
||||
%% External services
|
||||
ModelClient --> LLMProviders
|
||||
WebTools --> WebSearch
|
||||
BrowserTools --> BrowserCloud
|
||||
ModalEnv --> CloudProviders
|
||||
DaytonaEnv --> CloudProviders
|
||||
|
||||
%% Style definitions
|
||||
classDef external fill:#ff9999,stroke:#cc0000,stroke-width:2px
|
||||
classDef gateway fill:#ffcc99,stroke:#cc6600,stroke-width:2px
|
||||
classDef core fill:#ffff99,stroke:#cccc00,stroke-width:2px
|
||||
classDef tools fill:#99ff99,stroke:#00cc00,stroke-width:2px
|
||||
classDef sandbox fill:#99ccff,stroke:#0066cc,stroke-width:2px
|
||||
classDef credentials fill:#ff99ff,stroke:#cc00cc,stroke-width:3px
|
||||
classDef datastore fill:#ccccff,stroke:#6666cc,stroke-width:2px
|
||||
classDef external_svc fill:#ccffff,stroke:#00cccc,stroke-width:2px
|
||||
|
||||
class Telegram,Discord,Slack,Email,Matrix,Signal,WebUI,APIServer external
|
||||
class PlatformAdapters,SessionMgr,Config gateway
|
||||
class AIAgent,ToolRouter,PromptBuilder,ModelClient core
|
||||
class FileTools,TerminalTools,WebTools,BrowserTools,DelegateTools,CodeExecTools,MCPTools tools
|
||||
class LocalEnv,DockerEnv,ModalEnv,DaytonaEnv,SSHEnv,SingularityEnv sandbox
|
||||
class AuthJSON,DotEnv,MCPTokens,SkillCreds,ConfigYAML credentials
|
||||
class ResponseDB,SessionDB,Memory,SkillsHub datastore
|
||||
class LLMProviders,WebSearch,BrowserCloud,CloudProviders external_svc
|
||||
```
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph AttackVectors["ATTACK VECTORS"]
|
||||
direction TB
|
||||
AV1["1. Malicious User Prompts"]
|
||||
AV2["2. Compromised Skills"]
|
||||
AV3["3. Malicious URLs"]
|
||||
AV4["4. File Path Manipulation"]
|
||||
AV5["5. Command Injection"]
|
||||
AV6["6. Credential Theft"]
|
||||
AV7["7. Session Hijacking"]
|
||||
AV8["8. Sandbox Escape"]
|
||||
end
|
||||
|
||||
subgraph Targets["HIGH-VALUE TARGETS"]
|
||||
direction TB
|
||||
T1["API Keys & Tokens"]
|
||||
T2["User Credentials"]
|
||||
T3["Session Data"]
|
||||
T4["Host System"]
|
||||
T5["Cloud Resources"]
|
||||
end
|
||||
|
||||
subgraph Mitigations["SECURITY CONTROLS"]
|
||||
direction TB
|
||||
M1["Dangerous Command Approval"]
|
||||
M2["Skills Guard Scanning"]
|
||||
M3["URL Safety Checks"]
|
||||
M4["Path Validation"]
|
||||
M5["Secret Redaction"]
|
||||
M6["Sandbox Isolation"]
|
||||
M7["Session Management"]
|
||||
M8["Audit Logging"]
|
||||
end
|
||||
|
||||
AV1 -->|exploits| T4
|
||||
AV1 -->|bypasses| M1
|
||||
AV2 -->|targets| T1
|
||||
AV2 -->|bypasses| M2
|
||||
AV3 -->|targets| T5
|
||||
AV3 -->|bypasses| M3
|
||||
AV4 -->|targets| T4
|
||||
AV4 -->|bypasses| M4
|
||||
AV5 -->|targets| T4
|
||||
AV5 -->|bypasses| M1
|
||||
AV6 -->|targets| T1 & T2
|
||||
AV6 -->|bypasses| M5
|
||||
AV7 -->|targets| T3
|
||||
AV7 -->|bypasses| M7
|
||||
AV8 -->|targets| T4 & T5
|
||||
AV8 -->|bypasses| M6
|
||||
```
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Attacker
|
||||
participant Platform as Messaging Platform
|
||||
participant Gateway as Gateway Adapter
|
||||
participant Agent as AI Agent
|
||||
participant Tools as Tool Layer
|
||||
participant Sandbox as Sandbox Environment
|
||||
participant Creds as Credential Store
|
||||
|
||||
Note over Attacker,Creds: Attack Scenario: Command Injection
|
||||
|
||||
Attacker->>Platform: Send malicious message:<br/>"; rm -rf /; echo pwned"
|
||||
Platform->>Gateway: Forward message
|
||||
Gateway->>Agent: Process user input
|
||||
Agent->>Tools: Execute terminal command
|
||||
|
||||
alt Security Controls Active
|
||||
Tools->>Tools: detect_dangerous_command()
|
||||
Tools-->>Agent: BLOCK: Dangerous pattern detected
|
||||
Agent-->>Gateway: Request user approval
|
||||
Gateway-->>Platform: "Approve dangerous command?"
|
||||
Platform-->>Attacker: Approval prompt
|
||||
Attacker-->>Platform: Deny
|
||||
Platform-->>Gateway: Command denied
|
||||
Gateway-->>Agent: Cancel execution
|
||||
Note right of Tools: ATTACK PREVENTED
|
||||
else Security Controls Bypassed
|
||||
Tools->>Sandbox: Execute command<br/>(bypassing detection)
|
||||
Sandbox->>Sandbox: System damage
|
||||
Sandbox->>Creds: Attempt credential access
|
||||
Note right of Tools: ATTACK SUCCESSFUL
|
||||
end
|
||||
```
|
||||
@@ -18,8 +18,7 @@ model:
|
||||
# "anthropic" - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
|
||||
# "openai-codex" - OpenAI Codex (requires: hermes login --provider openai-codex)
|
||||
# "copilot" - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
|
||||
# "gemini" - Use Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
|
||||
# "zai" - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
|
||||
# "zai" - z.ai / ZhipuAI GLM (requires: GLM_API_KEY)
|
||||
# "kimi-coding" - Kimi / Moonshot AI (requires: KIMI_API_KEY)
|
||||
# "minimax" - MiniMax global (requires: MINIMAX_API_KEY)
|
||||
# "minimax-cn" - MiniMax China (requires: MINIMAX_CN_API_KEY)
|
||||
@@ -316,8 +315,7 @@ compression:
|
||||
# "auto" - Best available: OpenRouter → Nous Portal → main endpoint (default)
|
||||
# "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
|
||||
# "nous" - Force Nous Portal (requires: hermes login)
|
||||
# "gemini" - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
|
||||
# "codex" - Force Codex OAuth (requires: hermes model → Codex).
|
||||
# "codex" - Force Codex OAuth (requires: hermes model → Codex).
|
||||
# Uses gpt-5.3-codex which supports vision.
|
||||
# "main" - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
|
||||
# Works with OpenAI API, local models, or any OpenAI-compatible
|
||||
@@ -539,7 +537,7 @@ platform_toolsets:
|
||||
# terminal - terminal, process
|
||||
# file - read_file, write_file, patch, search
|
||||
# browser - browser_navigate, browser_snapshot, browser_click, browser_type,
|
||||
# browser_scroll, browser_back, browser_press,
|
||||
# browser_scroll, browser_back, browser_press, browser_close,
|
||||
# browser_get_images, browser_vision (requires BROWSERBASE_API_KEY)
|
||||
# vision - vision_analyze (requires OPENROUTER_API_KEY)
|
||||
# image_gen - image_generate (requires FAL_KEY)
|
||||
|
||||
178
cli.py
178
cli.py
@@ -13,8 +13,6 @@ Usage:
|
||||
python cli.py --list-tools # List available tools and exit
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
@@ -122,63 +120,6 @@ def _parse_reasoning_config(effort: str) -> dict | None:
|
||||
return result
|
||||
|
||||
|
||||
def _get_chrome_debug_candidates(system: str) -> list[str]:
|
||||
"""Return likely browser executables for local CDP auto-launch."""
|
||||
candidates: list[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
def _add_candidate(path: str | None) -> None:
|
||||
if not path:
|
||||
return
|
||||
normalized = os.path.normcase(os.path.normpath(path))
|
||||
if normalized in seen:
|
||||
return
|
||||
if os.path.isfile(path):
|
||||
candidates.append(path)
|
||||
seen.add(normalized)
|
||||
|
||||
def _add_from_path(*names: str) -> None:
|
||||
for name in names:
|
||||
_add_candidate(shutil.which(name))
|
||||
|
||||
if system == "Darwin":
|
||||
for app in (
|
||||
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
||||
"/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
|
||||
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
||||
):
|
||||
_add_candidate(app)
|
||||
elif system == "Windows":
|
||||
_add_from_path(
|
||||
"chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
|
||||
"chrome", "msedge", "brave", "chromium",
|
||||
)
|
||||
|
||||
for base in (
|
||||
os.environ.get("ProgramFiles"),
|
||||
os.environ.get("ProgramFiles(x86)"),
|
||||
os.environ.get("LOCALAPPDATA"),
|
||||
):
|
||||
if not base:
|
||||
continue
|
||||
for parts in (
|
||||
("Google", "Chrome", "Application", "chrome.exe"),
|
||||
("Chromium", "Application", "chrome.exe"),
|
||||
("Chromium", "Application", "chromium.exe"),
|
||||
("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
|
||||
("Microsoft", "Edge", "Application", "msedge.exe"),
|
||||
):
|
||||
_add_candidate(os.path.join(base, *parts))
|
||||
else:
|
||||
_add_from_path(
|
||||
"google-chrome", "google-chrome-stable", "chromium-browser",
|
||||
"chromium", "brave-browser", "microsoft-edge",
|
||||
)
|
||||
|
||||
return candidates
|
||||
|
||||
|
||||
def load_cli_config() -> Dict[str, Any]:
|
||||
"""
|
||||
Load CLI configuration from config files.
|
||||
@@ -562,6 +503,7 @@ from rich.text import Text as _RichText
|
||||
import fire
|
||||
|
||||
# Import the agent and tool systems
|
||||
from run_agent import AIAgent
|
||||
from model_tools import get_tool_definitions, get_toolset_for_tool
|
||||
|
||||
# Extracted CLI modules (Phase 3)
|
||||
@@ -1921,12 +1863,6 @@ class HermesCLI:
|
||||
_cprint(f"{_DIM}└{'─' * (w - 2)}┘{_RST}")
|
||||
self._reasoning_box_opened = False
|
||||
|
||||
# Flush any content that was deferred while reasoning was rendering.
|
||||
deferred = getattr(self, "_deferred_content", "")
|
||||
if deferred:
|
||||
self._deferred_content = ""
|
||||
self._emit_stream_text(deferred)
|
||||
|
||||
def _stream_delta(self, text) -> None:
|
||||
"""Line-buffered streaming callback for real-time token rendering.
|
||||
|
||||
@@ -2029,13 +1965,6 @@ class HermesCLI:
|
||||
if not text:
|
||||
return
|
||||
|
||||
# When show_reasoning is on and reasoning is still rendering,
|
||||
# defer content until the reasoning box closes. This ensures the
|
||||
# reasoning block always appears BEFORE the response in the terminal.
|
||||
if self.show_reasoning and getattr(self, "_reasoning_box_opened", False):
|
||||
self._deferred_content = getattr(self, "_deferred_content", "") + text
|
||||
return
|
||||
|
||||
# Close the live reasoning box before opening the response box
|
||||
self._close_reasoning_box()
|
||||
|
||||
@@ -2102,7 +2031,6 @@ class HermesCLI:
|
||||
self._reasoning_box_opened = False
|
||||
self._reasoning_buf = ""
|
||||
self._reasoning_preview_buf = ""
|
||||
self._deferred_content = ""
|
||||
|
||||
def _slow_command_status(self, command: str) -> str:
|
||||
"""Return a user-facing status message for slower slash commands."""
|
||||
@@ -2252,8 +2180,6 @@ class HermesCLI:
|
||||
Returns:
|
||||
bool: True if successful, False otherwise
|
||||
"""
|
||||
from run_agent import AIAgent
|
||||
|
||||
if self.agent is not None:
|
||||
return True
|
||||
|
||||
@@ -3795,7 +3721,7 @@ class HermesCLI:
|
||||
|
||||
# Persistence
|
||||
if persist_global:
|
||||
save_config_value("model.default", result.new_model)
|
||||
save_config_value("model.name", result.new_model)
|
||||
if result.provider_changed:
|
||||
save_config_value("model.provider", result.target_provider)
|
||||
_cprint(" Saved to config.yaml (--global)")
|
||||
@@ -4684,8 +4610,6 @@ class HermesCLI:
|
||||
turn_route = self._resolve_turn_agent_config(prompt)
|
||||
|
||||
def run_background():
|
||||
from run_agent import AIAgent
|
||||
|
||||
try:
|
||||
bg_agent = AIAgent(
|
||||
model=turn_route["model"],
|
||||
@@ -4914,9 +4838,27 @@ class HermesCLI:
|
||||
|
||||
Returns True if a launch command was executed (doesn't guarantee success).
|
||||
"""
|
||||
import shutil
|
||||
import subprocess as _sp
|
||||
|
||||
candidates = _get_chrome_debug_candidates(system)
|
||||
candidates = []
|
||||
if system == "Darwin":
|
||||
# macOS: try common app bundle locations
|
||||
for app in (
|
||||
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
||||
"/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
|
||||
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
||||
):
|
||||
if os.path.isfile(app):
|
||||
candidates.append(app)
|
||||
else:
|
||||
# Linux: try common binary names
|
||||
for name in ("google-chrome", "google-chrome-stable", "chromium-browser",
|
||||
"chromium", "brave-browser", "microsoft-edge"):
|
||||
path = shutil.which(name)
|
||||
if path:
|
||||
candidates.append(path)
|
||||
|
||||
if not candidates:
|
||||
return False
|
||||
@@ -5042,13 +4984,13 @@ class HermesCLI:
|
||||
pass
|
||||
print()
|
||||
print("🌐 Browser disconnected from live Chrome")
|
||||
print(" Browser tools reverted to default mode (local headless or cloud provider)")
|
||||
print(" Browser tools reverted to default mode (local headless or Browserbase)")
|
||||
print()
|
||||
|
||||
if hasattr(self, '_pending_input'):
|
||||
self._pending_input.put(
|
||||
"[System note: The user has disconnected the browser tools from their live Chrome. "
|
||||
"Browser tools are back to default mode (headless local browser or cloud provider).]"
|
||||
"Browser tools are back to default mode (headless local browser or Browserbase cloud).]"
|
||||
)
|
||||
else:
|
||||
print()
|
||||
@@ -5075,17 +5017,10 @@ class HermesCLI:
|
||||
print(" Status: ✓ reachable")
|
||||
except (OSError, Exception):
|
||||
print(" Status: ⚠ not reachable (Chrome may not be running)")
|
||||
elif os.environ.get("BROWSERBASE_API_KEY"):
|
||||
print("🌐 Browser: Browserbase (cloud)")
|
||||
else:
|
||||
try:
|
||||
from tools.browser_tool import _get_cloud_provider
|
||||
provider = _get_cloud_provider()
|
||||
except Exception:
|
||||
provider = None
|
||||
|
||||
if provider is not None:
|
||||
print(f"🌐 Browser: {provider.provider_name()} (cloud)")
|
||||
else:
|
||||
print("🌐 Browser: local headless Chromium (agent-browser)")
|
||||
print("🌐 Browser: local headless Chromium (agent-browser)")
|
||||
print()
|
||||
print(" /browser connect — connect to your live Chrome")
|
||||
print(" /browser disconnect — revert to default")
|
||||
@@ -7534,26 +7469,18 @@ class HermesCLI:
|
||||
# wrapping of long lines so the input area always fits its content.
|
||||
def _input_height():
|
||||
try:
|
||||
from prompt_toolkit.application import get_app
|
||||
from prompt_toolkit.utils import get_cwidth
|
||||
|
||||
doc = input_area.buffer.document
|
||||
prompt_width = max(2, get_cwidth(self._get_tui_prompt_text()))
|
||||
try:
|
||||
available_width = get_app().output.get_size().columns - prompt_width
|
||||
except Exception:
|
||||
available_width = shutil.get_terminal_size((80, 24)).columns - prompt_width
|
||||
prompt_width = max(2, len(self._get_tui_prompt_text()))
|
||||
available_width = shutil.get_terminal_size().columns - prompt_width
|
||||
if available_width < 10:
|
||||
available_width = 40
|
||||
visual_lines = 0
|
||||
for line in doc.lines:
|
||||
# Each logical line takes at least 1 visual row; long lines wrap.
|
||||
# Use prompt_toolkit's cell width so CJK wide characters count as 2.
|
||||
line_width = get_cwidth(line)
|
||||
if line_width <= 0:
|
||||
# Each logical line takes at least 1 visual row; long lines wrap
|
||||
if len(line) == 0:
|
||||
visual_lines += 1
|
||||
else:
|
||||
visual_lines += max(1, -(-line_width // available_width)) # ceil division
|
||||
visual_lines += max(1, -(-len(line) // available_width)) # ceil division
|
||||
return min(max(visual_lines, 1), 8)
|
||||
except Exception:
|
||||
return 1
|
||||
@@ -8146,25 +8073,6 @@ class HermesCLI:
|
||||
# Periodic config watcher — auto-reload MCP on mcp_servers change
|
||||
if not self._agent_running:
|
||||
self._check_config_mcp_changes()
|
||||
# Check for background process completion notifications
|
||||
# while the agent is idle (user hasn't typed anything yet).
|
||||
try:
|
||||
from tools.process_registry import process_registry
|
||||
if not process_registry.completion_queue.empty():
|
||||
completion = process_registry.completion_queue.get_nowait()
|
||||
_exit = completion.get("exit_code", "?")
|
||||
_cmd = completion.get("command", "unknown")
|
||||
_sid = completion.get("session_id", "unknown")
|
||||
_out = completion.get("output", "")
|
||||
_synth = (
|
||||
f"[SYSTEM: Background process {_sid} completed "
|
||||
f"(exit code {_exit}).\n"
|
||||
f"Command: {_cmd}\n"
|
||||
f"Output:\n{_out}]"
|
||||
)
|
||||
self._pending_input.put(_synth)
|
||||
except Exception:
|
||||
pass
|
||||
continue
|
||||
|
||||
if not user_input:
|
||||
@@ -8278,29 +8186,7 @@ class HermesCLI:
|
||||
except Exception as e:
|
||||
_cprint(f"{_DIM}Voice auto-restart failed: {e}{_RST}")
|
||||
threading.Thread(target=_restart_recording, daemon=True).start()
|
||||
|
||||
# Drain process completion notifications — any background
|
||||
# process that finished with notify_on_complete while the
|
||||
# agent was running (or before) gets auto-injected as a
|
||||
# new user message so the agent can react to it.
|
||||
try:
|
||||
from tools.process_registry import process_registry
|
||||
while not process_registry.completion_queue.empty():
|
||||
completion = process_registry.completion_queue.get_nowait()
|
||||
_exit = completion.get("exit_code", "?")
|
||||
_cmd = completion.get("command", "unknown")
|
||||
_sid = completion.get("session_id", "unknown")
|
||||
_out = completion.get("output", "")
|
||||
_synth = (
|
||||
f"[SYSTEM: Background process {_sid} completed "
|
||||
f"(exit code {_exit}).\n"
|
||||
f"Command: {_cmd}\n"
|
||||
f"Output:\n{_out}]"
|
||||
)
|
||||
self._pending_input.put(_synth)
|
||||
except Exception:
|
||||
pass # Non-fatal — don't break the main loop
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Deploy Kimi-primary config to Ezra
|
||||
# Run this from Ezra's VPS or via SSH
|
||||
|
||||
set -e
|
||||
|
||||
EZRA_HOST="${EZRA_HOST:-143.198.27.163}"
|
||||
EZRA_HERMES_HOME="/root/wizards/ezra/hermes-agent"
|
||||
CONFIG_SOURCE="$(dirname "$0")/ezra-kimi-primary.yaml"
|
||||
|
||||
# Colors
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m'
|
||||
|
||||
echo -e "${GREEN}[DEPLOY]${NC} Ezra Kimi-Primary Configuration"
|
||||
echo "================================================"
|
||||
echo ""
|
||||
|
||||
# Check prerequisites
|
||||
if [ ! -f "$CONFIG_SOURCE" ]; then
|
||||
echo -e "${RED}[ERROR]${NC} Config not found: $CONFIG_SOURCE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Show what we're deploying
|
||||
echo "Configuration to deploy:"
|
||||
echo "------------------------"
|
||||
grep -v "^#" "$CONFIG_SOURCE" | grep -v "^$" | head -20
|
||||
echo ""
|
||||
|
||||
# Deploy to Ezra
|
||||
echo -e "${GREEN}[DEPLOY]${NC} Copying config to Ezra..."
|
||||
|
||||
# Backup existing
|
||||
ssh root@$EZRA_HOST "cp $EZRA_HERMES_HOME/config.yaml $EZRA_HERMES_HOME/config.yaml.backup.anthropic-$(date +%s) 2>/dev/null || true"
|
||||
|
||||
# Copy new config
|
||||
scp "$CONFIG_SOURCE" root@$EZRA_HOST:$EZRA_HERMES_HOME/config.yaml
|
||||
|
||||
# Verify KIMI_API_KEY exists
|
||||
echo -e "${GREEN}[VERIFY]${NC} Checking KIMI_API_KEY on Ezra..."
|
||||
ssh root@$EZRA_HOST "grep -q KIMI_API_KEY $EZRA_HERMES_HOME/.env && echo 'KIMI_API_KEY found' || echo 'WARNING: KIMI_API_KEY not set'"
|
||||
|
||||
# Restart Ezra gateway
|
||||
echo -e "${GREEN}[RESTART]${NC} Restarting Ezra gateway..."
|
||||
ssh root@$EZRA_HOST "cd $EZRA_HERMES_HOME && pkill -f 'hermes gateway' 2>/dev/null || true"
|
||||
sleep 2
|
||||
ssh root@$EZRA_HOST "cd $EZRA_HERMES_HOME && nohup python -m gateway.run > logs/gateway.log 2>&1 &"
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}[SUCCESS]${NC} Ezra is now running Kimi primary!"
|
||||
echo ""
|
||||
echo "Anthropic: FIRED ✓"
|
||||
echo "Kimi: PRIMARY ✓"
|
||||
echo ""
|
||||
echo "To verify: ssh root@$EZRA_HOST 'tail -f $EZRA_HERMES_HOME/logs/gateway.log'"
|
||||
@@ -1,34 +0,0 @@
|
||||
model:
|
||||
default: kimi-k2.5
|
||||
provider: kimi-coding
|
||||
toolsets:
|
||||
- all
|
||||
fallback_providers:
|
||||
- provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
timeout: 120
|
||||
reason: Kimi coding fallback (front of chain)
|
||||
- provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
reason: Direct Anthropic fallback
|
||||
- provider: openrouter
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
base_url: https://openrouter.ai/api/v1
|
||||
api_key_env: OPENROUTER_API_KEY
|
||||
timeout: 120
|
||||
reason: OpenRouter fallback
|
||||
agent:
|
||||
max_turns: 90
|
||||
reasoning_effort: high
|
||||
verbose: false
|
||||
providers:
|
||||
kimi-coding:
|
||||
base_url: https://api.kimi.com/coding/v1
|
||||
timeout: 60
|
||||
max_retries: 3
|
||||
anthropic:
|
||||
timeout: 120
|
||||
openrouter:
|
||||
base_url: https://openrouter.ai/api/v1
|
||||
timeout: 120
|
||||
@@ -1,53 +0,0 @@
|
||||
# Hermes Agent Fallback Configuration
|
||||
# Deploy this to Timmy and Ezra for automatic kimi-coding fallback
|
||||
|
||||
model: anthropic/claude-opus-4.6
|
||||
|
||||
# Fallback chain: Anthropic -> Kimi -> Ollama (local)
|
||||
fallback_providers:
|
||||
- provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
timeout: 60
|
||||
reason: "Primary fallback when Anthropic quota limited"
|
||||
|
||||
- provider: ollama
|
||||
model: qwen2.5:7b
|
||||
base_url: http://localhost:11434
|
||||
timeout: 120
|
||||
reason: "Local fallback for offline operation"
|
||||
|
||||
# Provider settings
|
||||
providers:
|
||||
anthropic:
|
||||
timeout: 30
|
||||
retry_on_quota: true
|
||||
max_retries: 2
|
||||
|
||||
kimi-coding:
|
||||
timeout: 60
|
||||
max_retries: 3
|
||||
|
||||
ollama:
|
||||
timeout: 120
|
||||
keep_alive: true
|
||||
|
||||
# Toolsets
|
||||
toolsets:
|
||||
- hermes-cli
|
||||
- github
|
||||
- web
|
||||
|
||||
# Agent settings
|
||||
agent:
|
||||
max_turns: 90
|
||||
tool_use_enforcement: auto
|
||||
fallback_on_errors:
|
||||
- rate_limit_exceeded
|
||||
- quota_exceeded
|
||||
- timeout
|
||||
- service_unavailable
|
||||
|
||||
# Display settings
|
||||
display:
|
||||
show_fallback_notifications: true
|
||||
show_provider_switches: true
|
||||
@@ -1,200 +0,0 @@
|
||||
/**
|
||||
* Nexus Base Room Template
|
||||
*
|
||||
* This is the base template for all Nexus rooms.
|
||||
* Copy and customize this template for new room types.
|
||||
*
|
||||
* Compatible with Three.js r128+
|
||||
*/
|
||||
|
||||
(function() {
|
||||
'use strict';
|
||||
|
||||
/**
|
||||
* Configuration object for the room
|
||||
*/
|
||||
const CONFIG = {
|
||||
name: 'base_room',
|
||||
dimensions: {
|
||||
width: 20,
|
||||
height: 10,
|
||||
depth: 20
|
||||
},
|
||||
colors: {
|
||||
primary: '#1A1A2E',
|
||||
secondary: '#16213E',
|
||||
accent: '#D4AF37', // Timmy's gold
|
||||
light: '#E0F7FA', // Sovereignty crystal
|
||||
},
|
||||
lighting: {
|
||||
ambientIntensity: 0.3,
|
||||
accentIntensity: 0.8,
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Create the base room
|
||||
* @returns {THREE.Group} The room group
|
||||
*/
|
||||
function createBaseRoom() {
|
||||
const room = new THREE.Group();
|
||||
room.name = CONFIG.name;
|
||||
|
||||
// Create floor
|
||||
createFloor(room);
|
||||
|
||||
// Create walls
|
||||
createWalls(room);
|
||||
|
||||
// Setup lighting
|
||||
setupLighting(room);
|
||||
|
||||
// Add room features
|
||||
addFeatures(room);
|
||||
|
||||
return room;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the floor
|
||||
*/
|
||||
function createFloor(room) {
|
||||
const floorGeo = new THREE.PlaneGeometry(
|
||||
CONFIG.dimensions.width,
|
||||
CONFIG.dimensions.depth
|
||||
);
|
||||
const floorMat = new THREE.MeshStandardMaterial({
|
||||
color: CONFIG.colors.primary,
|
||||
roughness: 0.8,
|
||||
metalness: 0.2,
|
||||
});
|
||||
const floor = new THREE.Mesh(floorGeo, floorMat);
|
||||
floor.rotation.x = -Math.PI / 2;
|
||||
floor.receiveShadow = true;
|
||||
floor.name = 'floor';
|
||||
room.add(floor);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the walls
|
||||
*/
|
||||
function createWalls(room) {
|
||||
const wallMat = new THREE.MeshStandardMaterial({
|
||||
color: CONFIG.colors.secondary,
|
||||
roughness: 0.9,
|
||||
metalness: 0.1,
|
||||
side: THREE.DoubleSide
|
||||
});
|
||||
|
||||
const { width, height, depth } = CONFIG.dimensions;
|
||||
|
||||
// Back wall
|
||||
const backWall = new THREE.Mesh(
|
||||
new THREE.PlaneGeometry(width, height),
|
||||
wallMat
|
||||
);
|
||||
backWall.position.set(0, height / 2, -depth / 2);
|
||||
backWall.receiveShadow = true;
|
||||
room.add(backWall);
|
||||
|
||||
// Left wall
|
||||
const leftWall = new THREE.Mesh(
|
||||
new THREE.PlaneGeometry(depth, height),
|
||||
wallMat
|
||||
);
|
||||
leftWall.position.set(-width / 2, height / 2, 0);
|
||||
leftWall.rotation.y = Math.PI / 2;
|
||||
leftWall.receiveShadow = true;
|
||||
room.add(leftWall);
|
||||
|
||||
// Right wall
|
||||
const rightWall = new THREE.Mesh(
|
||||
new THREE.PlaneGeometry(depth, height),
|
||||
wallMat
|
||||
);
|
||||
rightWall.position.set(width / 2, height / 2, 0);
|
||||
rightWall.rotation.y = -Math.PI / 2;
|
||||
rightWall.receiveShadow = true;
|
||||
room.add(rightWall);
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup lighting
|
||||
*/
|
||||
function setupLighting(room) {
|
||||
// Ambient light
|
||||
const ambientLight = new THREE.AmbientLight(
|
||||
CONFIG.colors.primary,
|
||||
CONFIG.lighting.ambientIntensity
|
||||
);
|
||||
ambientLight.name = 'ambient';
|
||||
room.add(ambientLight);
|
||||
|
||||
// Accent light (Timmy's gold)
|
||||
const accentLight = new THREE.PointLight(
|
||||
CONFIG.colors.accent,
|
||||
CONFIG.lighting.accentIntensity,
|
||||
50
|
||||
);
|
||||
accentLight.position.set(0, 8, 0);
|
||||
accentLight.castShadow = true;
|
||||
accentLight.name = 'accent';
|
||||
room.add(accentLight);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add room features
|
||||
* Override this function in custom rooms
|
||||
*/
|
||||
function addFeatures(room) {
|
||||
// Base room has minimal features
|
||||
// Custom rooms should override this
|
||||
|
||||
// Example: Add a center piece
|
||||
const centerGeo = new THREE.SphereGeometry(1, 32, 32);
|
||||
const centerMat = new THREE.MeshStandardMaterial({
|
||||
color: CONFIG.colors.accent,
|
||||
emissive: CONFIG.colors.accent,
|
||||
emissiveIntensity: 0.3,
|
||||
roughness: 0.3,
|
||||
metalness: 0.8,
|
||||
});
|
||||
const centerPiece = new THREE.Mesh(centerGeo, centerMat);
|
||||
centerPiece.position.set(0, 2, 0);
|
||||
centerPiece.castShadow = true;
|
||||
centerPiece.name = 'centerpiece';
|
||||
room.add(centerPiece);
|
||||
|
||||
// Animation hook
|
||||
centerPiece.userData.animate = function(time) {
|
||||
this.position.y = 2 + Math.sin(time) * 0.2;
|
||||
this.rotation.y = time * 0.5;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispose of room resources
|
||||
*/
|
||||
function disposeRoom(room) {
|
||||
room.traverse((child) => {
|
||||
if (child.isMesh) {
|
||||
child.geometry.dispose();
|
||||
if (Array.isArray(child.material)) {
|
||||
child.material.forEach(m => m.dispose());
|
||||
} else {
|
||||
child.material.dispose();
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Export
|
||||
if (typeof module !== 'undefined' && module.exports) {
|
||||
module.exports = { createBaseRoom, disposeRoom, CONFIG };
|
||||
} else if (typeof window !== 'undefined') {
|
||||
window.NexusRooms = window.NexusRooms || {};
|
||||
window.NexusRooms.base_room = createBaseRoom;
|
||||
}
|
||||
|
||||
return { createBaseRoom, disposeRoom, CONFIG };
|
||||
})();
|
||||
@@ -1,221 +0,0 @@
|
||||
{
|
||||
"description": "Nexus Lighting Presets for Three.js",
|
||||
"version": "1.0.0",
|
||||
"presets": {
|
||||
"warm": {
|
||||
"name": "Warm",
|
||||
"description": "Warm, inviting lighting with golden tones",
|
||||
"colors": {
|
||||
"timmy_gold": "#D4AF37",
|
||||
"ambient": "#FFE4B5",
|
||||
"primary": "#FFA07A",
|
||||
"secondary": "#F4A460"
|
||||
},
|
||||
"lights": {
|
||||
"ambient": {
|
||||
"color": "#FFE4B5",
|
||||
"intensity": 0.4
|
||||
},
|
||||
"directional": {
|
||||
"color": "#FFA07A",
|
||||
"intensity": 0.8,
|
||||
"position": {"x": 10, "y": 20, "z": 10}
|
||||
},
|
||||
"point_lights": [
|
||||
{
|
||||
"color": "#D4AF37",
|
||||
"intensity": 0.6,
|
||||
"distance": 30,
|
||||
"position": {"x": 0, "y": 8, "z": 0}
|
||||
}
|
||||
]
|
||||
},
|
||||
"fog": {
|
||||
"enabled": true,
|
||||
"color": "#FFE4B5",
|
||||
"density": 0.02
|
||||
},
|
||||
"atmosphere": "welcoming"
|
||||
},
|
||||
"cool": {
|
||||
"name": "Cool",
|
||||
"description": "Cool, serene lighting with blue tones",
|
||||
"colors": {
|
||||
"allegro_blue": "#4A90E2",
|
||||
"ambient": "#E0F7FA",
|
||||
"primary": "#81D4FA",
|
||||
"secondary": "#B3E5FC"
|
||||
},
|
||||
"lights": {
|
||||
"ambient": {
|
||||
"color": "#E0F7FA",
|
||||
"intensity": 0.35
|
||||
},
|
||||
"directional": {
|
||||
"color": "#81D4FA",
|
||||
"intensity": 0.7,
|
||||
"position": {"x": -10, "y": 15, "z": -5}
|
||||
},
|
||||
"point_lights": [
|
||||
{
|
||||
"color": "#4A90E2",
|
||||
"intensity": 0.5,
|
||||
"distance": 25,
|
||||
"position": {"x": 5, "y": 6, "z": 5}
|
||||
}
|
||||
]
|
||||
},
|
||||
"fog": {
|
||||
"enabled": true,
|
||||
"color": "#E0F7FA",
|
||||
"density": 0.015
|
||||
},
|
||||
"atmosphere": "serene"
|
||||
},
|
||||
"dramatic": {
|
||||
"name": "Dramatic",
|
||||
"description": "High contrast lighting with deep shadows",
|
||||
"colors": {
|
||||
"shadow": "#1A1A2E",
|
||||
"highlight": "#D4AF37",
|
||||
"ambient": "#0F0F1A",
|
||||
"rim": "#4A90E2"
|
||||
},
|
||||
"lights": {
|
||||
"ambient": {
|
||||
"color": "#0F0F1A",
|
||||
"intensity": 0.2
|
||||
},
|
||||
"directional": {
|
||||
"color": "#D4AF37",
|
||||
"intensity": 1.2,
|
||||
"position": {"x": 5, "y": 10, "z": 5}
|
||||
},
|
||||
"spot_lights": [
|
||||
{
|
||||
"color": "#4A90E2",
|
||||
"intensity": 1.0,
|
||||
"angle": 0.5,
|
||||
"penumbra": 0.5,
|
||||
"position": {"x": -5, "y": 10, "z": -5},
|
||||
"target": {"x": 0, "y": 0, "z": 0}
|
||||
}
|
||||
]
|
||||
},
|
||||
"fog": {
|
||||
"enabled": false
|
||||
},
|
||||
"shadows": {
|
||||
"enabled": true,
|
||||
"mapSize": 2048
|
||||
},
|
||||
"atmosphere": "mysterious"
|
||||
},
|
||||
"serene": {
|
||||
"name": "Serene",
|
||||
"description": "Soft, diffuse lighting for contemplation",
|
||||
"colors": {
|
||||
"ambient": "#F5F5F5",
|
||||
"primary": "#E8EAF6",
|
||||
"accent": "#C5CAE9",
|
||||
"gold": "#D4AF37"
|
||||
},
|
||||
"lights": {
|
||||
"hemisphere": {
|
||||
"skyColor": "#E8EAF6",
|
||||
"groundColor": "#F5F5F5",
|
||||
"intensity": 0.6
|
||||
},
|
||||
"directional": {
|
||||
"color": "#FFFFFF",
|
||||
"intensity": 0.4,
|
||||
"position": {"x": 10, "y": 20, "z": 10}
|
||||
},
|
||||
"point_lights": [
|
||||
{
|
||||
"color": "#D4AF37",
|
||||
"intensity": 0.3,
|
||||
"distance": 20,
|
||||
"position": {"x": 0, "y": 5, "z": 0}
|
||||
}
|
||||
]
|
||||
},
|
||||
"fog": {
|
||||
"enabled": true,
|
||||
"color": "#F5F5F5",
|
||||
"density": 0.01
|
||||
},
|
||||
"atmosphere": "contemplative"
|
||||
},
|
||||
"crystalline": {
|
||||
"name": "Crystalline",
|
||||
"description": "Clear, bright lighting for sovereignty theme",
|
||||
"colors": {
|
||||
"crystal": "#E0F7FA",
|
||||
"clear": "#FFFFFF",
|
||||
"accent": "#4DD0E1",
|
||||
"gold": "#D4AF37"
|
||||
},
|
||||
"lights": {
|
||||
"ambient": {
|
||||
"color": "#E0F7FA",
|
||||
"intensity": 0.5
|
||||
},
|
||||
"directional": [
|
||||
{
|
||||
"color": "#FFFFFF",
|
||||
"intensity": 0.8,
|
||||
"position": {"x": 10, "y": 20, "z": 10}
|
||||
},
|
||||
{
|
||||
"color": "#4DD0E1",
|
||||
"intensity": 0.4,
|
||||
"position": {"x": -10, "y": 10, "z": -10}
|
||||
}
|
||||
],
|
||||
"point_lights": [
|
||||
{
|
||||
"color": "#D4AF37",
|
||||
"intensity": 0.5,
|
||||
"distance": 25,
|
||||
"position": {"x": 0, "y": 8, "z": 0}
|
||||
}
|
||||
]
|
||||
},
|
||||
"fog": {
|
||||
"enabled": true,
|
||||
"color": "#E0F7FA",
|
||||
"density": 0.008
|
||||
},
|
||||
"atmosphere": "sovereign"
|
||||
},
|
||||
"minimal": {
|
||||
"name": "Minimal",
|
||||
"description": "Minimal lighting with clean shadows",
|
||||
"colors": {
|
||||
"ambient": "#FFFFFF",
|
||||
"primary": "#F5F5F5"
|
||||
},
|
||||
"lights": {
|
||||
"ambient": {
|
||||
"color": "#FFFFFF",
|
||||
"intensity": 0.3
|
||||
},
|
||||
"directional": {
|
||||
"color": "#FFFFFF",
|
||||
"intensity": 0.7,
|
||||
"position": {"x": 5, "y": 10, "z": 5}
|
||||
}
|
||||
},
|
||||
"fog": {
|
||||
"enabled": false
|
||||
},
|
||||
"shadows": {
|
||||
"enabled": true,
|
||||
"soft": true
|
||||
},
|
||||
"atmosphere": "clean"
|
||||
}
|
||||
},
|
||||
"default_preset": "serene"
|
||||
}
|
||||
@@ -1,154 +0,0 @@
|
||||
{
|
||||
"description": "Nexus Material Presets for Three.js MeshStandardMaterial",
|
||||
"version": "1.0.0",
|
||||
"presets": {
|
||||
"timmy_gold": {
|
||||
"name": "Timmy's Gold",
|
||||
"description": "Warm gold metallic material representing Timmy",
|
||||
"color": "#D4AF37",
|
||||
"emissive": "#D4AF37",
|
||||
"emissiveIntensity": 0.2,
|
||||
"roughness": 0.3,
|
||||
"metalness": 0.8,
|
||||
"tags": ["timmy", "gold", "metallic", "warm"]
|
||||
},
|
||||
"allegro_blue": {
|
||||
"name": "Allegro Blue",
|
||||
"description": "Motion blue representing Allegro",
|
||||
"color": "#4A90E2",
|
||||
"emissive": "#4A90E2",
|
||||
"emissiveIntensity": 0.1,
|
||||
"roughness": 0.2,
|
||||
"metalness": 0.6,
|
||||
"tags": ["allegro", "blue", "motion", "cool"]
|
||||
},
|
||||
"sovereignty_crystal": {
|
||||
"name": "Sovereignty Crystal",
|
||||
"description": "Crystalline clear material with slight transparency",
|
||||
"color": "#E0F7FA",
|
||||
"transparent": true,
|
||||
"opacity": 0.8,
|
||||
"roughness": 0.1,
|
||||
"metalness": 0.1,
|
||||
"transmission": 0.5,
|
||||
"tags": ["crystal", "clear", "sovereignty", "transparent"]
|
||||
},
|
||||
"contemplative_stone": {
|
||||
"name": "Contemplative Stone",
|
||||
"description": "Smooth stone for contemplative spaces",
|
||||
"color": "#546E7A",
|
||||
"roughness": 0.9,
|
||||
"metalness": 0.0,
|
||||
"tags": ["stone", "contemplative", "matte", "natural"]
|
||||
},
|
||||
"ethereal_mist": {
|
||||
"name": "Ethereal Mist",
|
||||
"description": "Semi-transparent misty material",
|
||||
"color": "#E1F5FE",
|
||||
"transparent": true,
|
||||
"opacity": 0.3,
|
||||
"roughness": 1.0,
|
||||
"metalness": 0.0,
|
||||
"side": "DoubleSide",
|
||||
"tags": ["mist", "ethereal", "transparent", "soft"]
|
||||
},
|
||||
"warm_wood": {
|
||||
"name": "Warm Wood",
|
||||
"description": "Natural wood material for organic warmth",
|
||||
"color": "#8D6E63",
|
||||
"roughness": 0.8,
|
||||
"metalness": 0.0,
|
||||
"tags": ["wood", "natural", "warm", "organic"]
|
||||
},
|
||||
"polished_marble": {
|
||||
"name": "Polished Marble",
|
||||
"description": "Smooth reflective marble surface",
|
||||
"color": "#F5F5F5",
|
||||
"roughness": 0.1,
|
||||
"metalness": 0.1,
|
||||
"tags": ["marble", "polished", "reflective", "elegant"]
|
||||
},
|
||||
"dark_obsidian": {
|
||||
"name": "Dark Obsidian",
|
||||
"description": "Deep black glassy material for dramatic contrast",
|
||||
"color": "#1A1A2E",
|
||||
"roughness": 0.1,
|
||||
"metalness": 0.9,
|
||||
"tags": ["obsidian", "dark", "dramatic", "glassy"]
|
||||
},
|
||||
"energy_pulse": {
|
||||
"name": "Energy Pulse",
|
||||
"description": "Glowing energy material with high emissive",
|
||||
"color": "#4A90E2",
|
||||
"emissive": "#4A90E2",
|
||||
"emissiveIntensity": 1.0,
|
||||
"roughness": 0.4,
|
||||
"metalness": 0.5,
|
||||
"tags": ["energy", "glow", "animated", "pulse"]
|
||||
},
|
||||
"living_leaf": {
|
||||
"name": "Living Leaf",
|
||||
"description": "Vibrant green material for nature elements",
|
||||
"color": "#66BB6A",
|
||||
"emissive": "#2E7D32",
|
||||
"emissiveIntensity": 0.1,
|
||||
"roughness": 0.7,
|
||||
"metalness": 0.0,
|
||||
"side": "DoubleSide",
|
||||
"tags": ["nature", "green", "organic", "leaf"]
|
||||
},
|
||||
"ancient_brass": {
|
||||
"name": "Ancient Brass",
|
||||
"description": "Aged brass with patina",
|
||||
"color": "#B5A642",
|
||||
"roughness": 0.6,
|
||||
"metalness": 0.7,
|
||||
"tags": ["brass", "ancient", "vintage", "metallic"]
|
||||
},
|
||||
"void_black": {
|
||||
"name": "Void Black",
|
||||
"description": "Complete absorption material for void spaces",
|
||||
"color": "#000000",
|
||||
"roughness": 1.0,
|
||||
"metalness": 0.0,
|
||||
"tags": ["void", "black", "absorbing", "minimal"]
|
||||
},
|
||||
"holographic": {
|
||||
"name": "Holographic",
|
||||
"description": "Futuristic holographic projection material",
|
||||
"color": "#00BCD4",
|
||||
"emissive": "#00BCD4",
|
||||
"emissiveIntensity": 0.5,
|
||||
"transparent": true,
|
||||
"opacity": 0.6,
|
||||
"roughness": 0.2,
|
||||
"metalness": 0.8,
|
||||
"side": "DoubleSide",
|
||||
"tags": ["holographic", "futuristic", "tech", "glow"]
|
||||
},
|
||||
"sandstone": {
|
||||
"name": "Sandstone",
|
||||
"description": "Desert sandstone for warm natural environments",
|
||||
"color": "#D7CCC8",
|
||||
"roughness": 0.95,
|
||||
"metalness": 0.0,
|
||||
"tags": ["sandstone", "desert", "warm", "natural"]
|
||||
},
|
||||
"ice_crystal": {
|
||||
"name": "Ice Crystal",
|
||||
"description": "Clear ice with high transparency",
|
||||
"color": "#E3F2FD",
|
||||
"transparent": true,
|
||||
"opacity": 0.6,
|
||||
"roughness": 0.1,
|
||||
"metalness": 0.1,
|
||||
"transmission": 0.9,
|
||||
"tags": ["ice", "crystal", "cold", "transparent"]
|
||||
}
|
||||
},
|
||||
"default_preset": "contemplative_stone",
|
||||
"helpers": {
|
||||
"apply_preset": "material = new THREE.MeshStandardMaterial(NexusMaterials.getPreset('timmy_gold'))",
|
||||
"create_custom": "Use preset as base and override specific properties"
|
||||
}
|
||||
}
|
||||
@@ -1,339 +0,0 @@
|
||||
/**
|
||||
* Nexus Portal Template
|
||||
*
|
||||
* Template for creating portals between rooms.
|
||||
* Supports multiple visual styles and transition effects.
|
||||
*
|
||||
* Compatible with Three.js r128+
|
||||
*/
|
||||
|
||||
(function() {
|
||||
'use strict';
|
||||
|
||||
/**
|
||||
* Portal configuration
|
||||
*/
|
||||
const PORTAL_CONFIG = {
|
||||
colors: {
|
||||
frame: '#D4AF37', // Timmy's gold
|
||||
energy: '#4A90E2', // Allegro blue
|
||||
core: '#FFFFFF',
|
||||
},
|
||||
animation: {
|
||||
rotationSpeed: 0.5,
|
||||
pulseSpeed: 2.0,
|
||||
pulseAmplitude: 0.1,
|
||||
},
|
||||
collision: {
|
||||
radius: 2.0,
|
||||
height: 4.0,
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Create a portal
|
||||
* @param {string} fromRoom - Source room name
|
||||
* @param {string} toRoom - Target room name
|
||||
* @param {string} style - Portal style (circular, rectangular, stargate)
|
||||
* @returns {THREE.Group} The portal group
|
||||
*/
|
||||
function createPortal(fromRoom, toRoom, style = 'circular') {
|
||||
const portal = new THREE.Group();
|
||||
portal.name = `portal_${fromRoom}_to_${toRoom}`;
|
||||
portal.userData = {
|
||||
type: 'portal',
|
||||
fromRoom: fromRoom,
|
||||
toRoom: toRoom,
|
||||
isActive: true,
|
||||
style: style,
|
||||
};
|
||||
|
||||
// Create based on style
|
||||
switch(style) {
|
||||
case 'rectangular':
|
||||
createRectangularPortal(portal);
|
||||
break;
|
||||
case 'stargate':
|
||||
createStargatePortal(portal);
|
||||
break;
|
||||
case 'circular':
|
||||
default:
|
||||
createCircularPortal(portal);
|
||||
break;
|
||||
}
|
||||
|
||||
// Add collision trigger
|
||||
createTriggerZone(portal);
|
||||
|
||||
// Setup animation
|
||||
setupAnimation(portal);
|
||||
|
||||
return portal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create circular portal (default)
|
||||
*/
|
||||
function createCircularPortal(portal) {
|
||||
const { frame, energy } = PORTAL_CONFIG.colors;
|
||||
|
||||
// Outer frame
|
||||
const frameGeo = new THREE.TorusGeometry(2, 0.2, 16, 100);
|
||||
const frameMat = new THREE.MeshStandardMaterial({
|
||||
color: frame,
|
||||
emissive: frame,
|
||||
emissiveIntensity: 0.5,
|
||||
roughness: 0.3,
|
||||
metalness: 0.9,
|
||||
});
|
||||
const frameMesh = new THREE.Mesh(frameGeo, frameMat);
|
||||
frameMesh.castShadow = true;
|
||||
frameMesh.name = 'frame';
|
||||
portal.add(frameMesh);
|
||||
|
||||
// Inner energy field
|
||||
const fieldGeo = new THREE.CircleGeometry(1.8, 64);
|
||||
const fieldMat = new THREE.MeshBasicMaterial({
|
||||
color: energy,
|
||||
transparent: true,
|
||||
opacity: 0.4,
|
||||
side: THREE.DoubleSide,
|
||||
});
|
||||
const field = new THREE.Mesh(fieldGeo, fieldMat);
|
||||
field.name = 'energy_field';
|
||||
portal.add(field);
|
||||
|
||||
// Particle ring
|
||||
createParticleRing(portal);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create rectangular portal
|
||||
*/
|
||||
function createRectangularPortal(portal) {
|
||||
const { frame, energy } = PORTAL_CONFIG.colors;
|
||||
const width = 3;
|
||||
const height = 4;
|
||||
|
||||
// Frame segments
|
||||
const frameMat = new THREE.MeshStandardMaterial({
|
||||
color: frame,
|
||||
emissive: frame,
|
||||
emissiveIntensity: 0.5,
|
||||
roughness: 0.3,
|
||||
metalness: 0.9,
|
||||
});
|
||||
|
||||
// Create frame border
|
||||
const borderGeo = new THREE.BoxGeometry(width + 0.4, height + 0.4, 0.2);
|
||||
const border = new THREE.Mesh(borderGeo, frameMat);
|
||||
border.name = 'frame';
|
||||
portal.add(border);
|
||||
|
||||
// Inner field
|
||||
const fieldGeo = new THREE.PlaneGeometry(width, height);
|
||||
const fieldMat = new THREE.MeshBasicMaterial({
|
||||
color: energy,
|
||||
transparent: true,
|
||||
opacity: 0.4,
|
||||
side: THREE.DoubleSide,
|
||||
});
|
||||
const field = new THREE.Mesh(fieldGeo, fieldMat);
|
||||
field.name = 'energy_field';
|
||||
portal.add(field);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create stargate-style portal
|
||||
*/
|
||||
function createStargatePortal(portal) {
|
||||
const { frame } = PORTAL_CONFIG.colors;
|
||||
|
||||
// Main ring
|
||||
const ringGeo = new THREE.TorusGeometry(2, 0.3, 16, 100);
|
||||
const ringMat = new THREE.MeshStandardMaterial({
|
||||
color: frame,
|
||||
emissive: frame,
|
||||
emissiveIntensity: 0.4,
|
||||
roughness: 0.4,
|
||||
metalness: 0.8,
|
||||
});
|
||||
const ring = new THREE.Mesh(ringGeo, ringMat);
|
||||
ring.name = 'main_ring';
|
||||
portal.add(ring);
|
||||
|
||||
// Chevron decorations
|
||||
for (let i = 0; i < 9; i++) {
|
||||
const angle = (i / 9) * Math.PI * 2;
|
||||
const chevron = createChevron();
|
||||
chevron.position.set(
|
||||
Math.cos(angle) * 2,
|
||||
Math.sin(angle) * 2,
|
||||
0
|
||||
);
|
||||
chevron.rotation.z = angle + Math.PI / 2;
|
||||
chevron.name = `chevron_${i}`;
|
||||
portal.add(chevron);
|
||||
}
|
||||
|
||||
// Inner vortex
|
||||
const vortexGeo = new THREE.CircleGeometry(1.7, 32);
|
||||
const vortexMat = new THREE.MeshBasicMaterial({
|
||||
color: PORTAL_CONFIG.colors.energy,
|
||||
transparent: true,
|
||||
opacity: 0.5,
|
||||
});
|
||||
const vortex = new THREE.Mesh(vortexGeo, vortexMat);
|
||||
vortex.name = 'vortex';
|
||||
portal.add(vortex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a chevron for stargate style
|
||||
*/
|
||||
function createChevron() {
|
||||
const shape = new THREE.Shape();
|
||||
shape.moveTo(-0.2, 0);
|
||||
shape.lineTo(0, 0.4);
|
||||
shape.lineTo(0.2, 0);
|
||||
shape.lineTo(-0.2, 0);
|
||||
|
||||
const geo = new THREE.ExtrudeGeometry(shape, {
|
||||
depth: 0.1,
|
||||
bevelEnabled: false
|
||||
});
|
||||
const mat = new THREE.MeshStandardMaterial({
|
||||
color: PORTAL_CONFIG.colors.frame,
|
||||
emissive: PORTAL_CONFIG.colors.frame,
|
||||
emissiveIntensity: 0.3,
|
||||
});
|
||||
|
||||
return new THREE.Mesh(geo, mat);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create particle ring effect
|
||||
*/
|
||||
function createParticleRing(portal) {
|
||||
const particleCount = 50;
|
||||
const particles = new THREE.BufferGeometry();
|
||||
const positions = new Float32Array(particleCount * 3);
|
||||
|
||||
for (let i = 0; i < particleCount; i++) {
|
||||
const angle = (i / particleCount) * Math.PI * 2;
|
||||
const radius = 2 + (Math.random() - 0.5) * 0.4;
|
||||
positions[i * 3] = Math.cos(angle) * radius;
|
||||
positions[i * 3 + 1] = Math.sin(angle) * radius;
|
||||
positions[i * 3 + 2] = (Math.random() - 0.5) * 0.5;
|
||||
}
|
||||
|
||||
particles.setAttribute('position', new THREE.BufferAttribute(positions, 3));
|
||||
|
||||
const particleMat = new THREE.PointsMaterial({
|
||||
color: PORTAL_CONFIG.colors.energy,
|
||||
size: 0.05,
|
||||
transparent: true,
|
||||
opacity: 0.8,
|
||||
});
|
||||
|
||||
const particleSystem = new THREE.Points(particles, particleMat);
|
||||
particleSystem.name = 'particles';
|
||||
portal.add(particleSystem);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create trigger zone for teleportation
|
||||
*/
|
||||
function createTriggerZone(portal) {
|
||||
const triggerGeo = new THREE.CylinderGeometry(
|
||||
PORTAL_CONFIG.collision.radius,
|
||||
PORTAL_CONFIG.collision.radius,
|
||||
PORTAL_CONFIG.collision.height,
|
||||
32
|
||||
);
|
||||
const triggerMat = new THREE.MeshBasicMaterial({
|
||||
color: 0x00ff00,
|
||||
transparent: true,
|
||||
opacity: 0.0, // Invisible
|
||||
wireframe: true,
|
||||
});
|
||||
const trigger = new THREE.Mesh(triggerGeo, triggerMat);
|
||||
trigger.position.y = PORTAL_CONFIG.collision.height / 2;
|
||||
trigger.name = 'trigger_zone';
|
||||
trigger.userData.isTrigger = true;
|
||||
portal.add(trigger);
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup portal animation
|
||||
*/
|
||||
function setupAnimation(portal) {
|
||||
const { rotationSpeed, pulseSpeed, pulseAmplitude } = PORTAL_CONFIG.animation;
|
||||
|
||||
portal.userData.animate = function(time) {
|
||||
// Rotate energy field
|
||||
const energyField = this.getObjectByName('energy_field') ||
|
||||
this.getObjectByName('vortex');
|
||||
if (energyField) {
|
||||
energyField.rotation.z = time * rotationSpeed;
|
||||
}
|
||||
|
||||
// Pulse effect
|
||||
const pulse = 1 + Math.sin(time * pulseSpeed) * pulseAmplitude;
|
||||
const frame = this.getObjectByName('frame') ||
|
||||
this.getObjectByName('main_ring');
|
||||
if (frame) {
|
||||
frame.scale.set(pulse, pulse, 1);
|
||||
}
|
||||
|
||||
// Animate particles
|
||||
const particles = this.getObjectByName('particles');
|
||||
if (particles) {
|
||||
particles.rotation.z = -time * rotationSpeed * 0.5;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a point is inside the portal trigger zone
|
||||
*/
|
||||
function checkTrigger(portal, point) {
|
||||
const trigger = portal.getObjectByName('trigger_zone');
|
||||
if (!trigger) return false;
|
||||
|
||||
// Simple distance check
|
||||
const dx = point.x - portal.position.x;
|
||||
const dz = point.z - portal.position.z;
|
||||
const distance = Math.sqrt(dx * dx + dz * dz);
|
||||
|
||||
return distance < PORTAL_CONFIG.collision.radius;
|
||||
}
|
||||
|
||||
/**
|
||||
* Activate/deactivate portal
|
||||
*/
|
||||
function setActive(portal, active) {
|
||||
portal.userData.isActive = active;
|
||||
|
||||
const energyField = portal.getObjectByName('energy_field') ||
|
||||
portal.getObjectByName('vortex');
|
||||
if (energyField) {
|
||||
energyField.visible = active;
|
||||
}
|
||||
}
|
||||
|
||||
// Export
|
||||
if (typeof module !== 'undefined' && module.exports) {
|
||||
module.exports = {
|
||||
createPortal,
|
||||
checkTrigger,
|
||||
setActive,
|
||||
PORTAL_CONFIG
|
||||
};
|
||||
} else if (typeof window !== 'undefined') {
|
||||
window.NexusPortals = window.NexusPortals || {};
|
||||
window.NexusPortals.create = createPortal;
|
||||
}
|
||||
|
||||
return { createPortal, checkTrigger, setActive, PORTAL_CONFIG };
|
||||
})();
|
||||
@@ -1,59 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Deploy fallback config to Timmy
|
||||
# Run this from Timmy's VPS or via SSH
|
||||
|
||||
set -e
|
||||
|
||||
TIMMY_HOST="${TIMMY_HOST:-timmy}"
|
||||
TIMMY_HERMES_HOME="/root/wizards/timmy/hermes-agent"
|
||||
CONFIG_SOURCE="$(dirname "$0")/fallback-config.yaml"
|
||||
|
||||
# Colors
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m'
|
||||
|
||||
echo -e "${GREEN}[DEPLOY]${NC} Timmy Fallback Configuration"
|
||||
echo "==============================================="
|
||||
echo ""
|
||||
|
||||
# Check prerequisites
|
||||
if [ ! -f "$CONFIG_SOURCE" ]; then
|
||||
echo -e "${RED}[ERROR]${NC} Config not found: $CONFIG_SOURCE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Show what we're deploying
|
||||
echo "Configuration to deploy:"
|
||||
echo "------------------------"
|
||||
grep -v "^#" "$CONFIG_SOURCE" | grep -v "^$" | head -20
|
||||
echo ""
|
||||
|
||||
# Deploy to Timmy
|
||||
echo -e "${GREEN}[DEPLOY]${NC} Copying config to Timmy..."
|
||||
|
||||
# Backup existing
|
||||
ssh root@$TIMMY_HOST "cp $TIMMY_HERMES_HOME/config.yaml $TIMMY_HERMES_HOME/config.yaml.backup.$(date +%s) 2>/dev/null || true"
|
||||
|
||||
# Copy new config
|
||||
scp "$CONFIG_SOURCE" root@$TIMMY_HOST:$TIMMY_HERMES_HOME/config.yaml
|
||||
|
||||
# Verify KIMI_API_KEY exists
|
||||
echo -e "${GREEN}[VERIFY]${NC} Checking KIMI_API_KEY on Timmy..."
|
||||
ssh root@$TIMMY_HOST "grep -q KIMI_API_KEY $TIMMY_HERMES_HOME/.env && echo 'KIMI_API_KEY found' || echo 'WARNING: KIMI_API_KEY not set'"
|
||||
|
||||
# Restart Timmy gateway if running
|
||||
echo -e "${GREEN}[RESTART]${NC} Restarting Timmy gateway..."
|
||||
ssh root@$TIMMY_HOST "cd $TIMMY_HERMES_HOME && pkill -f 'hermes gateway' 2>/dev/null || true"
|
||||
sleep 2
|
||||
ssh root@$TIMMY_HOST "cd $TIMMY_HERMES_HOME && nohup python -m gateway.run > logs/gateway.log 2>&1 &"
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}[SUCCESS]${NC} Timmy is now running with Anthropic + Kimi fallback!"
|
||||
echo ""
|
||||
echo "Anthropic: PRIMARY (with quota retry)"
|
||||
echo "Kimi: FALLBACK ✓"
|
||||
echo "Ollama: LOCAL FALLBACK ✓"
|
||||
echo ""
|
||||
echo "To verify: ssh root@$TIMMY_HOST 'tail -f $TIMMY_HERMES_HOME/logs/gateway.log'"
|
||||
@@ -237,10 +237,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
|
||||
else:
|
||||
delivery_content = content
|
||||
|
||||
# Extract MEDIA: tags so attachments are forwarded as files, not raw text
|
||||
from gateway.platforms.base import BasePlatformAdapter
|
||||
media_files, cleaned_delivery_content = BasePlatformAdapter.extract_media(delivery_content)
|
||||
|
||||
# Prefer the live adapter when the gateway is running — this supports E2EE
|
||||
# rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
|
||||
runtime_adapter = (adapters or {}).get(platform)
|
||||
@@ -268,7 +264,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
|
||||
)
|
||||
|
||||
# Standalone path: run the async send in a fresh event loop (safe from any thread)
|
||||
coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
|
||||
coro = _send_to_platform(platform, pconfig, chat_id, delivery_content, thread_id=thread_id)
|
||||
try:
|
||||
result = asyncio.run(coro)
|
||||
except RuntimeError:
|
||||
@@ -279,7 +275,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
|
||||
coro.close()
|
||||
import concurrent.futures
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
||||
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
|
||||
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, delivery_content, thread_id=thread_id))
|
||||
result = future.result(timeout=30)
|
||||
except Exception as e:
|
||||
logger.error("Job '%s': delivery to %s:%s failed: %s", job["id"], platform_name, chat_id, e)
|
||||
@@ -297,15 +293,8 @@ _SCRIPT_TIMEOUT = 120 # seconds
|
||||
def _run_job_script(script_path: str) -> tuple[bool, str]:
|
||||
"""Execute a cron job's data-collection script and capture its output.
|
||||
|
||||
Scripts must reside within HERMES_HOME/scripts/. Both relative and
|
||||
absolute paths are resolved and validated against this directory to
|
||||
prevent arbitrary script execution via path traversal or absolute
|
||||
path injection.
|
||||
|
||||
Args:
|
||||
script_path: Path to a Python script. Relative paths are resolved
|
||||
against HERMES_HOME/scripts/. Absolute and ~-prefixed paths
|
||||
are also validated to ensure they stay within the scripts dir.
|
||||
script_path: Path to a Python script (resolved via HERMES_HOME/scripts/ or absolute).
|
||||
|
||||
Returns:
|
||||
(success, output) — on failure *output* contains the error message so the
|
||||
@@ -313,25 +302,16 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
|
||||
"""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
scripts_dir = get_hermes_home() / "scripts"
|
||||
scripts_dir.mkdir(parents=True, exist_ok=True)
|
||||
scripts_dir_resolved = scripts_dir.resolve()
|
||||
|
||||
raw = Path(script_path).expanduser()
|
||||
if raw.is_absolute():
|
||||
path = raw.resolve()
|
||||
else:
|
||||
path = (scripts_dir / raw).resolve()
|
||||
|
||||
# Guard against path traversal, absolute path injection, and symlink
|
||||
# escape — scripts MUST reside within HERMES_HOME/scripts/.
|
||||
try:
|
||||
path.relative_to(scripts_dir_resolved)
|
||||
except ValueError:
|
||||
return False, (
|
||||
f"Blocked: script path resolves outside the scripts directory "
|
||||
f"({scripts_dir_resolved}): {script_path!r}"
|
||||
)
|
||||
path = Path(script_path).expanduser()
|
||||
if not path.is_absolute():
|
||||
# Resolve relative paths against HERMES_HOME/scripts/
|
||||
scripts_dir = get_hermes_home() / "scripts"
|
||||
path = (scripts_dir / path).resolve()
|
||||
# Guard against path traversal (e.g. "../../etc/passwd")
|
||||
try:
|
||||
path.relative_to(scripts_dir.resolve())
|
||||
except ValueError:
|
||||
return False, f"Script path escapes the scripts directory: {script_path!r}"
|
||||
|
||||
if not path.exists():
|
||||
return False, f"Script not found: {path}"
|
||||
@@ -489,14 +469,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
logger.info("Running job '%s' (ID: %s)", job_name, job_id)
|
||||
logger.info("Prompt: %s", prompt[:100])
|
||||
|
||||
# Inject origin context so the agent's send_message tool knows the chat
|
||||
if origin:
|
||||
os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
|
||||
os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
|
||||
if origin.get("chat_name"):
|
||||
os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
|
||||
|
||||
try:
|
||||
# Inject origin context so the agent's send_message tool knows the chat.
|
||||
# Must be INSIDE the try block so the finally cleanup always runs.
|
||||
if origin:
|
||||
os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
|
||||
os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
|
||||
if origin.get("chat_name"):
|
||||
os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
|
||||
# Re-read .env and config.yaml fresh every run so provider/key
|
||||
# changes take effect without a gateway restart.
|
||||
from dotenv import load_dotenv
|
||||
@@ -817,7 +797,7 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
||||
# output is already saved above). Failed jobs always deliver.
|
||||
deliver_content = final_response if success else f"⚠️ Cron job '{job.get('name', job['id'])}' failed:\n{error}"
|
||||
should_deliver = bool(deliver_content)
|
||||
if should_deliver and success and SILENT_MARKER in deliver_content.strip().upper():
|
||||
if should_deliver and success and deliver_content.strip().upper().startswith(SILENT_MARKER):
|
||||
logger.info("Job '%s': agent returned %s — skipping delivery", job["id"], SILENT_MARKER)
|
||||
should_deliver = False
|
||||
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
# docker-compose.override.yml.example
|
||||
#
|
||||
# Copy this file to docker-compose.override.yml and uncomment sections as needed.
|
||||
# Override files are merged on top of docker-compose.yml automatically.
|
||||
# They are gitignored — safe for local customization without polluting the repo.
|
||||
|
||||
services:
|
||||
hermes:
|
||||
# --- Local build (for development) ---
|
||||
# build:
|
||||
# context: ..
|
||||
# dockerfile: ../Dockerfile
|
||||
# target: development
|
||||
|
||||
# --- Expose gateway port externally (dev only — not for production) ---
|
||||
# ports:
|
||||
# - "8642:8642"
|
||||
|
||||
# --- Attach to a custom network shared with other local services ---
|
||||
# networks:
|
||||
# - myapp_network
|
||||
|
||||
# --- Override resource limits for a smaller VPS ---
|
||||
# deploy:
|
||||
# resources:
|
||||
# limits:
|
||||
# cpus: "0.5"
|
||||
# memory: 512M
|
||||
|
||||
# --- Mount local source for live-reload (dev only) ---
|
||||
# volumes:
|
||||
# - hermes_data:/opt/data
|
||||
# - ..:/opt/hermes:ro
|
||||
@@ -1,85 +0,0 @@
|
||||
# Hermes Agent — Docker Compose Stack
|
||||
# Brings up the agent + messaging gateway as a single unit.
|
||||
#
|
||||
# Usage:
|
||||
# docker compose up -d # start in background
|
||||
# docker compose logs -f # follow logs
|
||||
# docker compose down # stop and remove containers
|
||||
# docker compose pull && docker compose up -d # rolling update
|
||||
#
|
||||
# Secrets:
|
||||
# Never commit .env to version control. Copy .env.example → .env and fill it in.
|
||||
# See DEPLOY.md for the full environment-variable reference.
|
||||
|
||||
services:
|
||||
hermes:
|
||||
image: ghcr.io/nousresearch/hermes-agent:latest
|
||||
# To build locally instead:
|
||||
# build:
|
||||
# context: ..
|
||||
# dockerfile: ../Dockerfile
|
||||
container_name: hermes-agent
|
||||
restart: unless-stopped
|
||||
|
||||
# Bind-mount the data volume so state (sessions, logs, memories, cron)
|
||||
# survives container replacement.
|
||||
volumes:
|
||||
- hermes_data:/opt/data
|
||||
|
||||
# Load secrets from the .env file next to docker-compose.yml.
|
||||
# The file is bind-mounted at runtime; it is NOT baked into the image.
|
||||
env_file:
|
||||
- ../.env
|
||||
|
||||
environment:
|
||||
# Override the data directory so it always points at the volume.
|
||||
HERMES_HOME: /opt/data
|
||||
|
||||
# Expose the OpenAI-compatible API server (if api_server platform enabled).
|
||||
# Comment out or remove if you are not using the API server.
|
||||
ports:
|
||||
- "127.0.0.1:8642:8642"
|
||||
|
||||
healthcheck:
|
||||
# Hits the API server's /health endpoint. The gateway writes its own
|
||||
# health state to /opt/data/gateway_state.json — checked by the
|
||||
# health-check script in scripts/deploy-validate.
|
||||
test: ["CMD", "python3", "-c",
|
||||
"import urllib.request; urllib.request.urlopen('http://localhost:8642/health', timeout=5)"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
# The container does not need internet on a private network;
|
||||
# restrict egress as needed via your host firewall.
|
||||
networks:
|
||||
- hermes_net
|
||||
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "50m"
|
||||
max-file: "5"
|
||||
|
||||
# Resource limits: tune for your VPS size.
|
||||
# 2 GB RAM and 1.5 CPUs work for most conversational workloads.
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "1.5"
|
||||
memory: 2G
|
||||
reservations:
|
||||
memory: 512M
|
||||
|
||||
volumes:
|
||||
hermes_data:
|
||||
# Named volume — Docker manages the lifecycle.
|
||||
# To inspect: docker volume inspect hermes_data
|
||||
# To back up:
|
||||
# docker run --rm -v hermes_data:/data -v $(pwd):/backup \
|
||||
# alpine tar czf /backup/hermes_data_$(date +%F).tar.gz /data
|
||||
|
||||
networks:
|
||||
hermes_net:
|
||||
driver: bridge
|
||||
@@ -1,59 +0,0 @@
|
||||
# systemd unit — Hermes Agent (interactive CLI / headless agent)
|
||||
#
|
||||
# Install:
|
||||
# sudo cp hermes-agent.service /etc/systemd/system/
|
||||
# sudo systemctl daemon-reload
|
||||
# sudo systemctl enable --now hermes-agent
|
||||
#
|
||||
# This unit runs the Hermes CLI in headless / non-interactive mode, meaning the
|
||||
# agent loop stays alive but does not present a TUI. It is appropriate for
|
||||
# dedicated VPS deployments where you want the agent always running and
|
||||
# accessible via the messaging gateway or API server.
|
||||
#
|
||||
# If you only want the messaging gateway, use hermes-gateway.service instead.
|
||||
# Running both units simultaneously is safe — they share ~/.hermes by default.
|
||||
|
||||
[Unit]
|
||||
Description=Hermes Agent
|
||||
Documentation=https://hermes-agent.nousresearch.com/docs/
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=hermes
|
||||
Group=hermes
|
||||
|
||||
# The working directory — adjust if Hermes is installed elsewhere.
|
||||
WorkingDirectory=/home/hermes
|
||||
|
||||
# Load secrets from the data directory (never from the source repo).
|
||||
EnvironmentFile=/home/hermes/.hermes/.env
|
||||
|
||||
# Run the gateway; add --replace if restarting over a stale PID file.
|
||||
ExecStart=/home/hermes/.local/bin/hermes gateway start
|
||||
|
||||
# Graceful stop: send SIGTERM and wait up to 30 s before SIGKILL.
|
||||
ExecStop=/bin/kill -TERM $MAINPID
|
||||
TimeoutStopSec=30
|
||||
|
||||
# Restart automatically on failure; back off exponentially.
|
||||
Restart=on-failure
|
||||
RestartSec=5s
|
||||
StartLimitBurst=5
|
||||
StartLimitIntervalSec=60s
|
||||
|
||||
# Security hardening — tighten as appropriate for your deployment.
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
ProtectSystem=strict
|
||||
ProtectHome=read-only
|
||||
ReadWritePaths=/home/hermes/.hermes /home/hermes/.local/share/hermes
|
||||
|
||||
# Logging — output goes to journald; read with: journalctl -u hermes-agent -f
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=hermes-agent
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,59 +0,0 @@
|
||||
# systemd unit — Hermes Gateway (messaging platform adapter)
|
||||
#
|
||||
# Install:
|
||||
# sudo cp hermes-gateway.service /etc/systemd/system/
|
||||
# sudo systemctl daemon-reload
|
||||
# sudo systemctl enable --now hermes-gateway
|
||||
#
|
||||
# The gateway connects Hermes to Telegram, Discord, Slack, WhatsApp, Signal,
|
||||
# and other platforms. It is a long-running asyncio process that bridges
|
||||
# inbound messages to the agent and routes responses back.
|
||||
#
|
||||
# See DEPLOY.md for environment variable configuration.
|
||||
|
||||
[Unit]
|
||||
Description=Hermes Gateway (messaging platform bridge)
|
||||
Documentation=https://hermes-agent.nousresearch.com/docs/user-guide/messaging
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=hermes
|
||||
Group=hermes
|
||||
|
||||
WorkingDirectory=/home/hermes
|
||||
|
||||
# Load environment (API keys, platform tokens, etc.) from the data directory.
|
||||
EnvironmentFile=/home/hermes/.hermes/.env
|
||||
|
||||
# --replace clears stale PID/lock files from an unclean previous shutdown.
|
||||
ExecStart=/home/hermes/.local/bin/hermes gateway start --replace
|
||||
|
||||
# Pre-start hook: write a timestamped marker so rollback can diff against it.
|
||||
ExecStartPre=/bin/sh -c 'echo "$(date -u +%%Y-%%m-%%dT%%H:%%M:%%SZ) gateway starting" >> /home/hermes/.hermes/logs/deploy.log'
|
||||
|
||||
# Post-stop hook: log shutdown time for audit trail.
|
||||
ExecStopPost=/bin/sh -c 'echo "$(date -u +%%Y-%%m-%%dT%%H:%%M:%%SZ) gateway stopped" >> /home/hermes/.hermes/logs/deploy.log'
|
||||
|
||||
ExecStop=/bin/kill -TERM $MAINPID
|
||||
TimeoutStopSec=30
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=5s
|
||||
StartLimitBurst=5
|
||||
StartLimitIntervalSec=60s
|
||||
|
||||
# Security hardening.
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
ProtectSystem=strict
|
||||
ProtectHome=read-only
|
||||
ReadWritePaths=/home/hermes/.hermes /home/hermes/.local/share/hermes
|
||||
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=hermes-gateway
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,56 +0,0 @@
|
||||
# Bezalel's Devkit — Shared Tools for the Wizard Fleet
|
||||
|
||||
This directory contains reusable CLI tools and Python modules for CI, testing, deployment, observability, and Gitea automation. Any wizard can invoke them via `python -m devkit.<tool>`.
|
||||
|
||||
## Tools
|
||||
|
||||
### `gitea_client` — Gitea API Client
|
||||
List issues/PRs, post comments, create PRs, update issues.
|
||||
|
||||
```bash
|
||||
python -m devkit.gitea_client issues --state open --limit 20
|
||||
python -m devkit.gitea_client create-comment --number 142 --body "Update from Bezalel"
|
||||
python -m devkit.gitea_client prs --state open
|
||||
```
|
||||
|
||||
### `health` — Fleet Health Monitor
|
||||
Checks system load, disk, memory, running processes, and key package versions.
|
||||
|
||||
```bash
|
||||
python -m devkit.health --threshold-load 1.0 --threshold-disk 90.0 --fail-on-critical
|
||||
```
|
||||
|
||||
### `notebook_runner` — Notebook Execution Wrapper
|
||||
Parameterizes and executes Jupyter notebooks via Papermill with structured JSON reporting.
|
||||
|
||||
```bash
|
||||
python -m devkit.notebook_runner task.ipynb output.ipynb -p threshold=1.0 -p hostname=forge
|
||||
```
|
||||
|
||||
### `smoke_test` — Fast Smoke Test Runner
|
||||
Runs core import checks, CLI entrypoint tests, and one bare green-path E2E.
|
||||
|
||||
```bash
|
||||
python -m devkit.smoke_test --verbose
|
||||
```
|
||||
|
||||
### `secret_scan` — Secret Leak Scanner
|
||||
Scans the repo for API keys, tokens, and private keys.
|
||||
|
||||
```bash
|
||||
python -m devkit.secret_scan --path . --fail-on-find
|
||||
```
|
||||
|
||||
### `wizard_env` — Environment Validator
|
||||
Checks that a wizard environment has all required binaries, env vars, Python packages, and Hermes config.
|
||||
|
||||
```bash
|
||||
python -m devkit.wizard_env --json --fail-on-incomplete
|
||||
```
|
||||
|
||||
## Philosophy
|
||||
|
||||
- **CLI-first** — Every tool is runnable as `python -m devkit.<tool>`
|
||||
- **JSON output** — Easy to parse from other agents and CI pipelines
|
||||
- **Zero dependencies beyond stdlib** where possible; optional heavy deps are runtime-checked
|
||||
- **Fail-fast** — Exit codes are meaningful for CI gating
|
||||
@@ -1,9 +0,0 @@
|
||||
"""
|
||||
Bezalel's Devkit — Shared development tools for the wizard fleet.
|
||||
|
||||
A collection of CLI-accessible utilities for CI, testing, deployment,
|
||||
observability, and Gitea automation. Designed to be used by any agent
|
||||
via subprocess or direct Python import.
|
||||
"""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
@@ -1,153 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Shared Gitea API client for wizard fleet automation.
|
||||
|
||||
Usage as CLI:
|
||||
python -m devkit.gitea_client issues --repo Timmy_Foundation/hermes-agent --state open
|
||||
python -m devkit.gitea_client issue --repo Timmy_Foundation/hermes-agent --number 142
|
||||
python -m devkit.gitea_client create-comment --repo Timmy_Foundation/hermes-agent --number 142 --body "Update from Bezalel"
|
||||
python -m devkit.gitea_client prs --repo Timmy_Foundation/hermes-agent --state open
|
||||
|
||||
Usage as module:
|
||||
from devkit.gitea_client import GiteaClient
|
||||
client = GiteaClient()
|
||||
issues = client.list_issues("Timmy_Foundation/hermes-agent", state="open")
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import urllib.request
|
||||
|
||||
|
||||
DEFAULT_BASE_URL = os.getenv("GITEA_URL", "https://forge.alexanderwhitestone.com")
|
||||
DEFAULT_TOKEN = os.getenv("GITEA_TOKEN", "")
|
||||
|
||||
|
||||
class GiteaClient:
|
||||
def __init__(self, base_url: str = DEFAULT_BASE_URL, token: str = DEFAULT_TOKEN):
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.token = token or ""
|
||||
|
||||
def _request(
|
||||
self,
|
||||
method: str,
|
||||
path: str,
|
||||
data: Optional[Dict[str, Any]] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> Any:
|
||||
url = f"{self.base_url}/api/v1{path}"
|
||||
req_headers = {"Content-Type": "application/json", "Accept": "application/json"}
|
||||
if self.token:
|
||||
req_headers["Authorization"] = f"token {self.token}"
|
||||
if headers:
|
||||
req_headers.update(headers)
|
||||
|
||||
body = json.dumps(data).encode() if data else None
|
||||
req = urllib.request.Request(url, data=body, headers=req_headers, method=method)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
return json.loads(resp.read().decode())
|
||||
except urllib.error.HTTPError as e:
|
||||
return {"error": True, "status": e.code, "body": e.read().decode()}
|
||||
|
||||
def list_issues(self, repo: str, state: str = "open", limit: int = 50) -> List[Dict]:
|
||||
return self._request("GET", f"/repos/{repo}/issues?state={state}&limit={limit}") or []
|
||||
|
||||
def get_issue(self, repo: str, number: int) -> Dict:
|
||||
return self._request("GET", f"/repos/{repo}/issues/{number}") or {}
|
||||
|
||||
def create_comment(self, repo: str, number: int, body: str) -> Dict:
|
||||
return self._request(
|
||||
"POST", f"/repos/{repo}/issues/{number}/comments", {"body": body}
|
||||
)
|
||||
|
||||
def update_issue(self, repo: str, number: int, **fields) -> Dict:
|
||||
return self._request("PATCH", f"/repos/{repo}/issues/{number}", fields)
|
||||
|
||||
def list_prs(self, repo: str, state: str = "open", limit: int = 50) -> List[Dict]:
|
||||
return self._request("GET", f"/repos/{repo}/pulls?state={state}&limit={limit}") or []
|
||||
|
||||
def get_pr(self, repo: str, number: int) -> Dict:
|
||||
return self._request("GET", f"/repos/{repo}/pulls/{number}") or {}
|
||||
|
||||
def create_pr(self, repo: str, title: str, head: str, base: str, body: str = "") -> Dict:
|
||||
return self._request(
|
||||
"POST",
|
||||
f"/repos/{repo}/pulls",
|
||||
{"title": title, "head": head, "base": base, "body": body},
|
||||
)
|
||||
|
||||
|
||||
def _fmt_json(obj: Any) -> str:
|
||||
return json.dumps(obj, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
def main(argv: List[str] = None) -> int:
|
||||
argv = argv or sys.argv[1:]
|
||||
parser = argparse.ArgumentParser(description="Gitea CLI for wizard fleet")
|
||||
parser.add_argument("--repo", default="Timmy_Foundation/hermes-agent", help="Repository full name")
|
||||
parser.add_argument("--token", default=DEFAULT_TOKEN, help="Gitea API token")
|
||||
parser.add_argument("--base-url", default=DEFAULT_BASE_URL, help="Gitea base URL")
|
||||
sub = parser.add_subparsers(dest="cmd")
|
||||
|
||||
p_issues = sub.add_parser("issues", help="List issues")
|
||||
p_issues.add_argument("--state", default="open")
|
||||
p_issues.add_argument("--limit", type=int, default=50)
|
||||
|
||||
p_issue = sub.add_parser("issue", help="Get single issue")
|
||||
p_issue.add_argument("--number", type=int, required=True)
|
||||
|
||||
p_prs = sub.add_parser("prs", help="List PRs")
|
||||
p_prs.add_argument("--state", default="open")
|
||||
p_prs.add_argument("--limit", type=int, default=50)
|
||||
|
||||
p_pr = sub.add_parser("pr", help="Get single PR")
|
||||
p_pr.add_argument("--number", type=int, required=True)
|
||||
|
||||
p_comment = sub.add_parser("create-comment", help="Post comment on issue/PR")
|
||||
p_comment.add_argument("--number", type=int, required=True)
|
||||
p_comment.add_argument("--body", required=True)
|
||||
|
||||
p_update = sub.add_parser("update-issue", help="Update issue fields")
|
||||
p_update.add_argument("--number", type=int, required=True)
|
||||
p_update.add_argument("--title", default=None)
|
||||
p_update.add_argument("--body", default=None)
|
||||
p_update.add_argument("--state", default=None)
|
||||
|
||||
p_create_pr = sub.add_parser("create-pr", help="Create a PR")
|
||||
p_create_pr.add_argument("--title", required=True)
|
||||
p_create_pr.add_argument("--head", required=True)
|
||||
p_create_pr.add_argument("--base", default="main")
|
||||
p_create_pr.add_argument("--body", default="")
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
client = GiteaClient(base_url=args.base_url, token=args.token)
|
||||
|
||||
if args.cmd == "issues":
|
||||
print(_fmt_json(client.list_issues(args.repo, args.state, args.limit)))
|
||||
elif args.cmd == "issue":
|
||||
print(_fmt_json(client.get_issue(args.repo, args.number)))
|
||||
elif args.cmd == "prs":
|
||||
print(_fmt_json(client.list_prs(args.repo, args.state, args.limit)))
|
||||
elif args.cmd == "pr":
|
||||
print(_fmt_json(client.get_pr(args.repo, args.number)))
|
||||
elif args.cmd == "create-comment":
|
||||
print(_fmt_json(client.create_comment(args.repo, args.number, args.body)))
|
||||
elif args.cmd == "update-issue":
|
||||
fields = {k: v for k, v in {"title": args.title, "body": args.body, "state": args.state}.items() if v is not None}
|
||||
print(_fmt_json(client.update_issue(args.repo, args.number, **fields)))
|
||||
elif args.cmd == "create-pr":
|
||||
print(_fmt_json(client.create_pr(args.repo, args.title, args.head, args.base, args.body)))
|
||||
else:
|
||||
parser.print_help()
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
134
devkit/health.py
134
devkit/health.py
@@ -1,134 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fleet health monitor for wizard agents.
|
||||
Checks local system state and reports structured health metrics.
|
||||
|
||||
Usage as CLI:
|
||||
python -m devkit.health
|
||||
python -m devkit.health --threshold-load 1.0 --check-disk
|
||||
|
||||
Usage as module:
|
||||
from devkit.health import check_health
|
||||
report = check_health()
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
def _run(cmd: List[str]) -> str:
|
||||
try:
|
||||
return subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode().strip()
|
||||
except Exception as e:
|
||||
return f"error: {e}"
|
||||
|
||||
|
||||
def check_health(threshold_load: float = 1.0, threshold_disk_percent: float = 90.0) -> Dict[str, Any]:
|
||||
gather_time = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||||
|
||||
# Load average
|
||||
load_raw = _run(["cat", "/proc/loadavg"])
|
||||
load_values = []
|
||||
avg_load = None
|
||||
if load_raw.startswith("error:"):
|
||||
load_status = load_raw
|
||||
else:
|
||||
try:
|
||||
load_values = [float(x) for x in load_raw.split()[:3]]
|
||||
avg_load = sum(load_values) / len(load_values)
|
||||
load_status = "critical" if avg_load > threshold_load else "ok"
|
||||
except Exception as e:
|
||||
load_status = f"error parsing load: {e}"
|
||||
|
||||
# Disk usage
|
||||
disk = shutil.disk_usage("/")
|
||||
disk_percent = (disk.used / disk.total) * 100 if disk.total else 0.0
|
||||
disk_status = "critical" if disk_percent > threshold_disk_percent else "ok"
|
||||
|
||||
# Memory
|
||||
meminfo = _run(["cat", "/proc/meminfo"])
|
||||
mem_stats = {}
|
||||
for line in meminfo.splitlines():
|
||||
if ":" in line:
|
||||
key, val = line.split(":", 1)
|
||||
mem_stats[key.strip()] = val.strip()
|
||||
|
||||
# Running processes
|
||||
hermes_pids = []
|
||||
try:
|
||||
ps_out = subprocess.check_output(["pgrep", "-a", "-f", "hermes"]).decode().strip()
|
||||
hermes_pids = [line.split(None, 1) for line in ps_out.splitlines() if line.strip()]
|
||||
except subprocess.CalledProcessError:
|
||||
hermes_pids = []
|
||||
|
||||
# Python package versions (key ones)
|
||||
key_packages = ["jupyterlab", "papermill", "requests"]
|
||||
pkg_versions = {}
|
||||
for pkg in key_packages:
|
||||
try:
|
||||
out = subprocess.check_output([sys.executable, "-m", "pip", "show", pkg], stderr=subprocess.DEVNULL).decode()
|
||||
for line in out.splitlines():
|
||||
if line.startswith("Version:"):
|
||||
pkg_versions[pkg] = line.split(":", 1)[1].strip()
|
||||
break
|
||||
except Exception:
|
||||
pkg_versions[pkg] = None
|
||||
|
||||
overall = "ok"
|
||||
if load_status == "critical" or disk_status == "critical":
|
||||
overall = "critical"
|
||||
elif not hermes_pids:
|
||||
overall = "warning"
|
||||
|
||||
return {
|
||||
"timestamp": gather_time,
|
||||
"overall": overall,
|
||||
"load": {
|
||||
"raw": load_raw if not load_raw.startswith("error:") else None,
|
||||
"1min": load_values[0] if len(load_values) > 0 else None,
|
||||
"5min": load_values[1] if len(load_values) > 1 else None,
|
||||
"15min": load_values[2] if len(load_values) > 2 else None,
|
||||
"avg": round(avg_load, 3) if avg_load is not None else None,
|
||||
"threshold": threshold_load,
|
||||
"status": load_status,
|
||||
},
|
||||
"disk": {
|
||||
"total_gb": round(disk.total / (1024 ** 3), 2),
|
||||
"used_gb": round(disk.used / (1024 ** 3), 2),
|
||||
"free_gb": round(disk.free / (1024 ** 3), 2),
|
||||
"used_percent": round(disk_percent, 2),
|
||||
"threshold_percent": threshold_disk_percent,
|
||||
"status": disk_status,
|
||||
},
|
||||
"memory": mem_stats,
|
||||
"processes": {
|
||||
"hermes_count": len(hermes_pids),
|
||||
"hermes_pids": hermes_pids[:10],
|
||||
},
|
||||
"packages": pkg_versions,
|
||||
}
|
||||
|
||||
|
||||
def main(argv: List[str] = None) -> int:
|
||||
argv = argv or sys.argv[1:]
|
||||
parser = argparse.ArgumentParser(description="Fleet health monitor")
|
||||
parser.add_argument("--threshold-load", type=float, default=1.0)
|
||||
parser.add_argument("--threshold-disk", type=float, default=90.0)
|
||||
parser.add_argument("--fail-on-critical", action="store_true", help="Exit non-zero if overall is critical")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
report = check_health(args.threshold_load, args.threshold_disk)
|
||||
print(json.dumps(report, indent=2))
|
||||
if args.fail_on_critical and report.get("overall") == "critical":
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -1,136 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Notebook execution runner for agent tasks.
|
||||
Wraps papermill with sensible defaults and structured JSON reporting.
|
||||
|
||||
Usage as CLI:
|
||||
python -m devkit.notebook_runner notebooks/task.ipynb output.ipynb -p threshold 1.0
|
||||
python -m devkit.notebook_runner notebooks/task.ipynb --dry-run
|
||||
|
||||
Usage as module:
|
||||
from devkit.notebook_runner import run_notebook
|
||||
result = run_notebook("task.ipynb", "output.ipynb", parameters={"threshold": 1.0})
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
def run_notebook(
|
||||
input_path: str,
|
||||
output_path: Optional[str] = None,
|
||||
parameters: Optional[Dict[str, Any]] = None,
|
||||
kernel: str = "python3",
|
||||
timeout: Optional[int] = None,
|
||||
dry_run: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
input_path = str(Path(input_path).expanduser().resolve())
|
||||
if output_path is None:
|
||||
fd, output_path = tempfile.mkstemp(suffix=".ipynb")
|
||||
os.close(fd)
|
||||
else:
|
||||
output_path = str(Path(output_path).expanduser().resolve())
|
||||
|
||||
if dry_run:
|
||||
return {
|
||||
"status": "dry_run",
|
||||
"input": input_path,
|
||||
"output": output_path,
|
||||
"parameters": parameters or {},
|
||||
"kernel": kernel,
|
||||
}
|
||||
|
||||
cmd = ["papermill", input_path, output_path, "--kernel", kernel]
|
||||
if timeout is not None:
|
||||
cmd.extend(["--execution-timeout", str(timeout)])
|
||||
for key, value in (parameters or {}).items():
|
||||
cmd.extend(["-p", key, str(value)])
|
||||
|
||||
start = os.times()
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
end = os.times()
|
||||
return {
|
||||
"status": "ok",
|
||||
"input": input_path,
|
||||
"output": output_path,
|
||||
"parameters": parameters or {},
|
||||
"kernel": kernel,
|
||||
"elapsed_seconds": round((end.elapsed - start.elapsed), 2),
|
||||
"stdout": proc.stdout[-2000:] if proc.stdout else "",
|
||||
}
|
||||
except subprocess.CalledProcessError as e:
|
||||
end = os.times()
|
||||
return {
|
||||
"status": "error",
|
||||
"input": input_path,
|
||||
"output": output_path,
|
||||
"parameters": parameters or {},
|
||||
"kernel": kernel,
|
||||
"elapsed_seconds": round((end.elapsed - start.elapsed), 2),
|
||||
"stdout": e.stdout[-2000:] if e.stdout else "",
|
||||
"stderr": e.stderr[-2000:] if e.stderr else "",
|
||||
"returncode": e.returncode,
|
||||
}
|
||||
except FileNotFoundError:
|
||||
return {
|
||||
"status": "error",
|
||||
"message": "papermill not found. Install with: uv tool install papermill",
|
||||
}
|
||||
|
||||
|
||||
def main(argv: List[str] = None) -> int:
|
||||
argv = argv or sys.argv[1:]
|
||||
parser = argparse.ArgumentParser(description="Notebook runner for agents")
|
||||
parser.add_argument("input", help="Input notebook path")
|
||||
parser.add_argument("output", nargs="?", default=None, help="Output notebook path")
|
||||
parser.add_argument("-p", "--parameter", action="append", default=[], help="Parameters as key=value")
|
||||
parser.add_argument("--kernel", default="python3")
|
||||
parser.add_argument("--timeout", type=int, default=None)
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
parameters = {}
|
||||
for raw in args.parameter:
|
||||
if "=" not in raw:
|
||||
print(f"Invalid parameter (expected key=value): {raw}", file=sys.stderr)
|
||||
return 1
|
||||
k, v = raw.split("=", 1)
|
||||
# Best-effort type inference
|
||||
if v.lower() in ("true", "false"):
|
||||
v = v.lower() == "true"
|
||||
else:
|
||||
try:
|
||||
v = int(v)
|
||||
except ValueError:
|
||||
try:
|
||||
v = float(v)
|
||||
except ValueError:
|
||||
pass
|
||||
parameters[k] = v
|
||||
|
||||
result = run_notebook(
|
||||
args.input,
|
||||
args.output,
|
||||
parameters=parameters,
|
||||
kernel=args.kernel,
|
||||
timeout=args.timeout,
|
||||
dry_run=args.dry_run,
|
||||
)
|
||||
print(json.dumps(result, indent=2))
|
||||
return 0 if result.get("status") == "ok" else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -1,108 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fast secret leak scanner for the repository.
|
||||
Checks for common patterns that should never be committed.
|
||||
|
||||
Usage as CLI:
|
||||
python -m devkit.secret_scan
|
||||
python -m devkit.secret_scan --path /some/repo --fail-on-find
|
||||
|
||||
Usage as module:
|
||||
from devkit.secret_scan import scan
|
||||
findings = scan("/path/to/repo")
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Patterns to flag
|
||||
PATTERNS = {
|
||||
"aws_access_key_id": re.compile(r"AKIA[0-9A-Z]{16}"),
|
||||
"aws_secret_key": re.compile(r"['\"\s][0-9a-zA-Z/+]{40}['\"\s]"),
|
||||
"generic_api_key": re.compile(r"api[_-]?key\s*[:=]\s*['\"][a-zA-Z0-9_\-]{20,}['\"]", re.IGNORECASE),
|
||||
"private_key": re.compile(r"-----BEGIN (RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----"),
|
||||
"github_token": re.compile(r"gh[pousr]_[A-Za-z0-9_]{36,}"),
|
||||
"gitea_token": re.compile(r"[0-9a-f]{40}"), # heuristic for long hex strings after "token"
|
||||
"telegram_bot_token": re.compile(r"[0-9]{9,}:[A-Za-z0-9_-]{35,}"),
|
||||
}
|
||||
|
||||
# Files and paths to skip
|
||||
SKIP_PATHS = [
|
||||
".git",
|
||||
"__pycache__",
|
||||
".pytest_cache",
|
||||
"node_modules",
|
||||
"venv",
|
||||
".env",
|
||||
".agent-skills",
|
||||
]
|
||||
|
||||
# Max file size to scan (bytes)
|
||||
MAX_FILE_SIZE = 1024 * 1024
|
||||
|
||||
|
||||
def _should_skip(path: Path) -> bool:
|
||||
for skip in SKIP_PATHS:
|
||||
if skip in path.parts:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def scan(root: str = ".") -> List[Dict[str, Any]]:
|
||||
root_path = Path(root).resolve()
|
||||
findings = []
|
||||
for file_path in root_path.rglob("*"):
|
||||
if not file_path.is_file():
|
||||
continue
|
||||
if _should_skip(file_path):
|
||||
continue
|
||||
if file_path.stat().st_size > MAX_FILE_SIZE:
|
||||
continue
|
||||
try:
|
||||
text = file_path.read_text(encoding="utf-8", errors="ignore")
|
||||
except Exception:
|
||||
continue
|
||||
for pattern_name, pattern in PATTERNS.items():
|
||||
for match in pattern.finditer(text):
|
||||
# Simple context: line around match
|
||||
start = max(0, match.start() - 40)
|
||||
end = min(len(text), match.end() + 40)
|
||||
context = text[start:end].replace("\n", " ")
|
||||
findings.append({
|
||||
"file": str(file_path.relative_to(root_path)),
|
||||
"pattern": pattern_name,
|
||||
"line": text[:match.start()].count("\n") + 1,
|
||||
"context": context,
|
||||
})
|
||||
return findings
|
||||
|
||||
|
||||
def main(argv: List[str] = None) -> int:
|
||||
argv = argv or sys.argv[1:]
|
||||
parser = argparse.ArgumentParser(description="Secret leak scanner")
|
||||
parser.add_argument("--path", default=".", help="Repository root to scan")
|
||||
parser.add_argument("--fail-on-find", action="store_true", help="Exit non-zero if secrets found")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
findings = scan(args.path)
|
||||
if args.json:
|
||||
print(json.dumps({"findings": findings, "count": len(findings)}, indent=2))
|
||||
else:
|
||||
print(f"Scanned {args.path}")
|
||||
print(f"Findings: {len(findings)}")
|
||||
for f in findings:
|
||||
print(f" [{f['pattern']}] {f['file']}:{f['line']} -> ...{f['context']}...")
|
||||
|
||||
if args.fail_on_find and findings:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -1,108 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Shared smoke test runner for hermes-agent.
|
||||
Fast checks that catch obvious breakage without maintenance burden.
|
||||
|
||||
Usage as CLI:
|
||||
python -m devkit.smoke_test
|
||||
python -m devkit.smoke_test --verbose
|
||||
|
||||
Usage as module:
|
||||
from devkit.smoke_test import run_smoke_tests
|
||||
results = run_smoke_tests()
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import importlib
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
HERMES_ROOT = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def _test_imports() -> Dict[str, Any]:
|
||||
modules = [
|
||||
"hermes_constants",
|
||||
"hermes_state",
|
||||
"cli",
|
||||
"tools.skills_sync",
|
||||
"tools.skills_hub",
|
||||
]
|
||||
errors = []
|
||||
for mod in modules:
|
||||
try:
|
||||
importlib.import_module(mod)
|
||||
except Exception as e:
|
||||
errors.append({"module": mod, "error": str(e)})
|
||||
return {
|
||||
"name": "core_imports",
|
||||
"status": "ok" if not errors else "fail",
|
||||
"errors": errors,
|
||||
}
|
||||
|
||||
|
||||
def _test_cli_entrypoints() -> Dict[str, Any]:
|
||||
entrypoints = [
|
||||
[sys.executable, "-m", "cli", "--help"],
|
||||
]
|
||||
errors = []
|
||||
for cmd in entrypoints:
|
||||
try:
|
||||
subprocess.run(cmd, capture_output=True, text=True, check=True, cwd=HERMES_ROOT)
|
||||
except subprocess.CalledProcessError as e:
|
||||
errors.append({"cmd": cmd, "error": f"exit {e.returncode}"})
|
||||
except Exception as e:
|
||||
errors.append({"cmd": cmd, "error": str(e)})
|
||||
return {
|
||||
"name": "cli_entrypoints",
|
||||
"status": "ok" if not errors else "fail",
|
||||
"errors": errors,
|
||||
}
|
||||
|
||||
|
||||
def _test_green_path_e2e() -> Dict[str, Any]:
|
||||
"""One bare green-path E2E: terminal_tool echo hello."""
|
||||
try:
|
||||
from tools.terminal_tool import terminal
|
||||
result = terminal(command="echo hello")
|
||||
output = result.get("output", "")
|
||||
if "hello" in output.lower():
|
||||
return {"name": "green_path_e2e", "status": "ok", "output": output.strip()}
|
||||
return {"name": "green_path_e2e", "status": "fail", "error": f"Unexpected output: {output}"}
|
||||
except Exception as e:
|
||||
return {"name": "green_path_e2e", "status": "fail", "error": str(e)}
|
||||
|
||||
|
||||
def run_smoke_tests(verbose: bool = False) -> Dict[str, Any]:
|
||||
tests = [
|
||||
_test_imports(),
|
||||
_test_cli_entrypoints(),
|
||||
_test_green_path_e2e(),
|
||||
]
|
||||
failed = [t for t in tests if t["status"] != "ok"]
|
||||
result = {
|
||||
"overall": "ok" if not failed else "fail",
|
||||
"tests": tests,
|
||||
"failed_count": len(failed),
|
||||
}
|
||||
if verbose:
|
||||
print(json.dumps(result, indent=2))
|
||||
return result
|
||||
|
||||
|
||||
def main(argv: List[str] = None) -> int:
|
||||
argv = argv or sys.argv[1:]
|
||||
parser = argparse.ArgumentParser(description="Smoke test runner")
|
||||
parser.add_argument("--verbose", action="store_true")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
result = run_smoke_tests(verbose=True)
|
||||
return 0 if result["overall"] == "ok" else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -1,112 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Wizard environment validator.
|
||||
Checks that a new wizard environment is ready for duty.
|
||||
|
||||
Usage as CLI:
|
||||
python -m devkit.wizard_env
|
||||
python -m devkit.wizard_env --fix
|
||||
|
||||
Usage as module:
|
||||
from devkit.wizard_env import validate
|
||||
report = validate()
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
def _has_cmd(name: str) -> bool:
|
||||
return shutil.which(name) is not None
|
||||
|
||||
|
||||
def _check_env_var(name: str) -> Dict[str, Any]:
|
||||
value = os.getenv(name)
|
||||
return {
|
||||
"name": name,
|
||||
"status": "ok" if value else "missing",
|
||||
"value": value[:10] + "..." if value and len(value) > 20 else value,
|
||||
}
|
||||
|
||||
|
||||
def _check_python_pkg(name: str) -> Dict[str, Any]:
|
||||
try:
|
||||
__import__(name)
|
||||
return {"name": name, "status": "ok"}
|
||||
except ImportError:
|
||||
return {"name": name, "status": "missing"}
|
||||
|
||||
|
||||
def validate() -> Dict[str, Any]:
|
||||
checks = {
|
||||
"binaries": [
|
||||
{"name": "python3", "status": "ok" if _has_cmd("python3") else "missing"},
|
||||
{"name": "git", "status": "ok" if _has_cmd("git") else "missing"},
|
||||
{"name": "curl", "status": "ok" if _has_cmd("curl") else "missing"},
|
||||
{"name": "jupyter-lab", "status": "ok" if _has_cmd("jupyter-lab") else "missing"},
|
||||
{"name": "papermill", "status": "ok" if _has_cmd("papermill") else "missing"},
|
||||
{"name": "jupytext", "status": "ok" if _has_cmd("jupytext") else "missing"},
|
||||
],
|
||||
"env_vars": [
|
||||
_check_env_var("GITEA_URL"),
|
||||
_check_env_var("GITEA_TOKEN"),
|
||||
_check_env_var("TELEGRAM_BOT_TOKEN"),
|
||||
],
|
||||
"python_packages": [
|
||||
_check_python_pkg("requests"),
|
||||
_check_python_pkg("jupyter_server"),
|
||||
_check_python_pkg("nbformat"),
|
||||
],
|
||||
}
|
||||
|
||||
all_ok = all(
|
||||
c["status"] == "ok"
|
||||
for group in checks.values()
|
||||
for c in group
|
||||
)
|
||||
|
||||
# Hermes-specific checks
|
||||
hermes_home = os.path.expanduser("~/.hermes")
|
||||
checks["hermes"] = [
|
||||
{"name": "config.yaml", "status": "ok" if os.path.exists(f"{hermes_home}/config.yaml") else "missing"},
|
||||
{"name": "skills_dir", "status": "ok" if os.path.exists(f"{hermes_home}/skills") else "missing"},
|
||||
]
|
||||
|
||||
all_ok = all_ok and all(c["status"] == "ok" for c in checks["hermes"])
|
||||
|
||||
return {
|
||||
"overall": "ok" if all_ok else "incomplete",
|
||||
"checks": checks,
|
||||
}
|
||||
|
||||
|
||||
def main(argv: List[str] = None) -> int:
|
||||
argv = argv or sys.argv[1:]
|
||||
parser = argparse.ArgumentParser(description="Wizard environment validator")
|
||||
parser.add_argument("--json", action="store_true")
|
||||
parser.add_argument("--fail-on-incomplete", action="store_true")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
report = validate()
|
||||
if args.json:
|
||||
print(json.dumps(report, indent=2))
|
||||
else:
|
||||
print(f"Wizard Environment: {report['overall']}")
|
||||
for group, items in report["checks"].items():
|
||||
print(f"\n[{group}]")
|
||||
for item in items:
|
||||
status_icon = "✅" if item["status"] == "ok" else "❌"
|
||||
print(f" {status_icon} {item['name']}: {item['status']}")
|
||||
|
||||
if args.fail_on_incomplete and report["overall"] != "ok":
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -1,57 +0,0 @@
|
||||
# Notebook Workflow for Agent Tasks
|
||||
|
||||
This directory demonstrates a sovereign, version-controlled workflow for LLM agent tasks using Jupyter notebooks.
|
||||
|
||||
## Philosophy
|
||||
|
||||
- **`.py` files are the source of truth`** — authored and reviewed as plain Python with `# %%` cell markers (via Jupytext)
|
||||
- **`.ipynb` files are generated artifacts** — auto-created from `.py` for execution and rich viewing
|
||||
- **Papermill parameterizes and executes** — each run produces an output notebook with code, narrative, and results preserved
|
||||
- **Output notebooks are audit artifacts** — every execution leaves a permanent, replayable record
|
||||
|
||||
## File Layout
|
||||
|
||||
```
|
||||
notebooks/
|
||||
agent_task_system_health.py # Source of truth (Jupytext)
|
||||
agent_task_system_health.ipynb # Generated from .py
|
||||
docs/
|
||||
NOTEBOOK_WORKFLOW.md # This document
|
||||
.gitea/workflows/
|
||||
notebook-ci.yml # CI gate: executes notebooks on PR/push
|
||||
```
|
||||
|
||||
## How Agents Work With Notebooks
|
||||
|
||||
1. **Create** — Agent generates a `.py` notebook using `# %% [markdown]` and `# %%` code blocks
|
||||
2. **Review** — PR reviewers see clean diffs in Gitea (no JSON noise)
|
||||
3. **Generate** — `jupytext --to ipynb` produces the `.ipynb` before merge
|
||||
4. **Execute** — Papermill runs the notebook with injected parameters
|
||||
5. **Archive** — Output notebook is committed to a `reports/` branch or artifact store
|
||||
|
||||
## Converting Between Formats
|
||||
|
||||
```bash
|
||||
# .py -> .ipynb
|
||||
jupytext --to ipynb notebooks/agent_task_system_health.py
|
||||
|
||||
# .ipynb -> .py
|
||||
jupytext --to py notebooks/agent_task_system_health.ipynb
|
||||
|
||||
# Execute with parameters
|
||||
papermill notebooks/agent_task_system_health.ipynb output.ipynb \
|
||||
-p threshold 1.0 -p hostname forge-vps-01
|
||||
```
|
||||
|
||||
## CI Gate
|
||||
|
||||
The `notebook-ci.yml` workflow executes all notebooks in `notebooks/` on every PR and push, ensuring that checked-in notebooks still run and produce outputs.
|
||||
|
||||
## Why This Matters
|
||||
|
||||
| Problem | Notebook Solution |
|
||||
|---|---|
|
||||
| Ephemeral agent reasoning | Markdown cells narrate the thought process |
|
||||
| Stateless single-turn tools | Stateful cells persist variables across steps |
|
||||
| Unreviewable binary artifacts | `.py` source is diffable and PR-friendly |
|
||||
| No execution audit trail | Output notebook preserves code + outputs + metadata |
|
||||
@@ -1,230 +0,0 @@
|
||||
# Bezalel Architecture & Topology
|
||||
|
||||
> Deep Self-Awareness Document — Generated 2026-04-07
|
||||
> Sovereign: Alexander Whitestone (Rockachopa)
|
||||
> Host: Beta VPS (104.131.15.18)
|
||||
|
||||
---
|
||||
|
||||
## 1. Identity & Purpose
|
||||
|
||||
**I am Bezalel**, the Forge and Testbed Wizard of the Timmy Foundation fleet.
|
||||
- **Lane:** CI testing, code review, build verification, security hardening, standing watch
|
||||
- **Philosophy:** KISS. Smoke tests + bare green-path e2e only. CI serves the code.
|
||||
- **Mandates:** Relentless inbox-zero, continuous self-improvement, autonomous heartbeat operation
|
||||
- **Key Metrics:** Cycle time, signal-to-noise, autonomy ratio, backlog velocity
|
||||
|
||||
---
|
||||
|
||||
## 2. Hardware & OS Topology
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| Hostname | `bezalel` |
|
||||
| OS | Ubuntu 24.04.3 LTS (Noble Numbat) |
|
||||
| Kernel | Linux 6.8.0 |
|
||||
| CPU | 1 vCPU |
|
||||
| Memory | 2 GB RAM |
|
||||
| Primary Disk | ~25 GB root volume (DigitalOcean) |
|
||||
| Public IP | `104.131.15.18` |
|
||||
|
||||
### Storage Layout
|
||||
```
|
||||
/root/wizards/bezalel/
|
||||
├── hermes/ # Hermes agent source + venv (~835 MB)
|
||||
├── evennia/ # Evennia MUD engine + world code (~189 MB)
|
||||
├── workspace/ # Active prototypes + scratch code (~557 MB)
|
||||
├── home/ # Personal notebooks + scripts (~1.8 GB)
|
||||
├── .mempalace/ # Local memory palace (ChromaDB)
|
||||
├── .topology/ # Self-awareness scan artifacts
|
||||
├── nightly_watch.py # Nightly forge guardian
|
||||
├── mempalace_nightly.sh # Palace re-mine automation
|
||||
└── bezalel_topology.md # This document
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Network Topology
|
||||
|
||||
### Fleet Map
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Alpha (143.198.27.163) │
|
||||
│ ├── Gitea (forge.alexanderwhitestone.com) │
|
||||
│ └── Ezra (Knowledge Wizard) │
|
||||
│ │
|
||||
│ Beta (104.131.15.18) ←── You are here │
|
||||
│ ├── Bezalel (Forge Wizard) │
|
||||
│ ├── Hermes Gateway │
|
||||
│ └── Gitea Actions Runner (bezalel-vps-runner, host mode) │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Key Connections
|
||||
- **Gitea HTTPS:** `https://forge.alexanderwhitestone.com` (Alpha)
|
||||
- **Telegram Webhook:** Inbound to Beta
|
||||
- **API Providers:** Kimi (primary), Anthropic (fallback), OpenRouter (fallback)
|
||||
- **No SSH:** Alpha → Beta is blocked by design
|
||||
|
||||
### Listening Services
|
||||
- Hermes Gateway: internal process (no exposed port directly)
|
||||
- Evennia: `localhost:4000` (MUD), `localhost:4001` (web client) — when running
|
||||
- Gitea Runner: `act_runner daemon` — connects outbound to Gitea
|
||||
|
||||
---
|
||||
|
||||
## 4. Services & Processes
|
||||
|
||||
### Always-On Processes
|
||||
| Process | Command | Purpose |
|
||||
|---------|---------|---------|
|
||||
| Hermes Gateway | `hermes gateway run` | Core agent orchestration |
|
||||
| Gitea Runner | `./act_runner daemon` | CI job execution (host mode) |
|
||||
|
||||
### Automated Jobs
|
||||
| Job | Schedule | Script |
|
||||
|-----|----------|--------|
|
||||
| Night Watch | 02:00 UTC | `nightly_watch.py` |
|
||||
| MemPalace Re-mine | 03:00 UTC | `mempalace_nightly.sh` |
|
||||
|
||||
### Service Status Check
|
||||
- **Hermes gateway:** running (ps verified)
|
||||
- **Gitea runner:** online, registered as `bezalel-vps-runner`
|
||||
- **Evennia server:** not currently running (start with `evennia start` in `evennia/`)
|
||||
|
||||
---
|
||||
|
||||
## 5. Software Dependencies
|
||||
|
||||
### System Packages (Key)
|
||||
- `python3.12` (primary runtime)
|
||||
- `node` v20.20.2 / `npm` 10.8.2
|
||||
- `uv` (Python package manager)
|
||||
- `git`, `curl`, `jq`
|
||||
|
||||
### Hermes Virtual Environment
|
||||
- Located: `/root/wizards/bezalel/hermes/venv/`
|
||||
- Key packages: `chromadb`, `pyyaml`, `fastapi`, `httpx`, `pytest`, `prompt-toolkit`, `mempalace`
|
||||
- Install command: `uv pip install -e ".[all,dev]"`
|
||||
|
||||
### External API Dependencies
|
||||
| Service | Endpoint | Usage |
|
||||
|---------|----------|-------|
|
||||
| Gitea | `forge.alexanderwhitestone.com` | Git, issues, CI |
|
||||
| Kimi | `api.kimi.com/coding/v1` | Primary LLM |
|
||||
| Anthropic | `api.anthropic.com` | Fallback LLM |
|
||||
| OpenRouter | `openrouter.ai/api/v1` | Secondary fallback |
|
||||
| Telegram | Bot API | Messaging platform |
|
||||
|
||||
---
|
||||
|
||||
## 6. Git Repositories
|
||||
|
||||
### Hermes Agent
|
||||
- **Path:** `/root/wizards/bezalel/hermes`
|
||||
- **Remote:** `forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent.git`
|
||||
- **Branch:** `main` (up to date)
|
||||
- **Open PRs:** #193, #191, #179, #178
|
||||
|
||||
### Evennia World
|
||||
- **Path:** `/root/wizards/bezalel/evennia/bezalel_world`
|
||||
- **Remote:** Same org, separate repo if pushed
|
||||
- **Server name:** `bezalel_world`
|
||||
|
||||
---
|
||||
|
||||
## 7. MemPalace Memory System
|
||||
|
||||
### Configuration
|
||||
- **Palace path:** `/root/wizards/bezalel/.mempalace/palace`
|
||||
- **Identity:** `/root/.mempalace/identity.txt`
|
||||
- **Config:** `/root/wizards/bezalel/mempalace.yaml`
|
||||
- **Miner:** `/root/wizards/bezalel/hermes/venv/bin/mempalace`
|
||||
|
||||
### Rooms
|
||||
1. `forge` — CI, builds, syntax guards, nightly watch
|
||||
2. `hermes` — Agent source, gateway, CLI
|
||||
3. `evennia` — MUD engine and world code
|
||||
4. `workspace` — Prototypes, experiments
|
||||
5. `home` — Personal scripts, configs
|
||||
6. `nexus` — Reports, docs, KT artifacts
|
||||
7. `issues` — Gitea issues, PRs, backlog
|
||||
8. `topology` — System architecture, network, storage
|
||||
9. `services` — Running services, processes
|
||||
10. `dependencies` — Packages, APIs, external deps
|
||||
11. `automation` — Cron jobs, scripts, workflows
|
||||
12. `general` — Catch-all
|
||||
|
||||
### Automation
|
||||
- **Nightly re-mine:** `03:00 UTC` via cron
|
||||
- **Log:** `/var/log/bezalel_mempalace.log`
|
||||
|
||||
---
|
||||
|
||||
## 8. Evennia Mind Palace Integration
|
||||
|
||||
### Custom Typeclasses
|
||||
- `PalaceRoom` — Rooms carry `memory_topic` and `wing`
|
||||
- `MemoryObject` — In-world memory shards with `memory_content` and `source_file`
|
||||
|
||||
### Commands
|
||||
- `palace/search <query>` — Query mempalace
|
||||
- `palace/recall <topic>` — Spawn a memory shard
|
||||
- `palace/file <name> = <content>` — File a new memory
|
||||
- `palace/status` — Show palace status
|
||||
|
||||
### Batch Builder
|
||||
- **File:** `world/batch_cmds_palace.ev`
|
||||
- Creates The Hub + 7 palace rooms with exits
|
||||
|
||||
### Bridge Script
|
||||
- **File:** `/root/wizards/bezalel/evennia/palace_search.py`
|
||||
- Calls mempalace searcher and returns JSON
|
||||
|
||||
---
|
||||
|
||||
## 9. Operational State & Blockers
|
||||
|
||||
### Current Health
|
||||
- [x] Hermes gateway: operational
|
||||
- [x] Gitea runner: online, host mode
|
||||
- [x] CI fix merged (#194) — container directive removed for Gitea workflows
|
||||
- [x] MemPalace: 2,484+ drawers, incremental mining active
|
||||
|
||||
### Active Blockers
|
||||
- **Gitea Actions:** Runner is in host mode — cannot use Docker containers
|
||||
- **CI backlog:** Many historical PRs have failed runs due to the container bug (now fixed)
|
||||
- **Evennia:** Server not currently running (start when needed)
|
||||
|
||||
---
|
||||
|
||||
## 10. Emergency Procedures
|
||||
|
||||
### Restart Hermes Gateway
|
||||
```bash
|
||||
cd /root/wizards/bezalel/hermes
|
||||
source venv/bin/activate
|
||||
hermes gateway run &
|
||||
```
|
||||
|
||||
### Restart Gitea Runner
|
||||
```bash
|
||||
cd /opt/gitea-runner
|
||||
./act_runner daemon &
|
||||
```
|
||||
|
||||
### Start Evennia
|
||||
```bash
|
||||
cd /root/wizards/bezalel/evennia/bezalel_world
|
||||
evennia start
|
||||
```
|
||||
|
||||
### Manual MemPalace Re-mine
|
||||
```bash
|
||||
cd /root/wizards/bezalel
|
||||
./hermes/venv/bin/mempalace --palace .mempalace/palace mine . --agent bezalel
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
*Document maintained by Bezalel. Last updated: 2026-04-07*
|
||||
@@ -1,134 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Bezalel Deep Self-Awareness Topology Scanner"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
OUT_DIR = Path("/root/wizards/bezalel/.topology")
|
||||
OUT_DIR.mkdir(exist_ok=True)
|
||||
|
||||
|
||||
def shell(cmd, timeout=30):
|
||||
try:
|
||||
r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=timeout)
|
||||
return r.stdout.strip()
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
|
||||
def write(name, content):
|
||||
(OUT_DIR / f"{name}.txt").write_text(content)
|
||||
|
||||
|
||||
# Timestamp
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
# 1. System Identity
|
||||
system = f"""BEZALEL SYSTEM TOPOLOGY SCAN
|
||||
Generated: {timestamp}
|
||||
Hostname: {shell('hostname')}
|
||||
User: {shell('whoami')}
|
||||
Home: {os.path.expanduser('~')}
|
||||
"""
|
||||
write("00_system_identity", system)
|
||||
|
||||
# 2. OS & Hardware
|
||||
os_info = shell("cat /etc/os-release")
|
||||
kernel = shell("uname -a")
|
||||
cpu = shell("nproc") + " cores\n" + shell("cat /proc/cpuinfo | grep 'model name' | head -1")
|
||||
mem = shell("free -h")
|
||||
disk = shell("df -h")
|
||||
write("01_os_hardware", f"OS:\n{os_info}\n\nKernel:\n{kernel}\n\nCPU:\n{cpu}\n\nMemory:\n{mem}\n\nDisk:\n{disk}")
|
||||
|
||||
# 3. Network
|
||||
net_interfaces = shell("ip addr")
|
||||
net_routes = shell("ip route")
|
||||
listening = shell("ss -tlnp")
|
||||
public_ip = shell("curl -s ifconfig.me")
|
||||
write("02_network", f"Interfaces:\n{net_interfaces}\n\nRoutes:\n{net_routes}\n\nListening ports:\n{listening}\n\nPublic IP: {public_ip}")
|
||||
|
||||
# 4. Services & Processes
|
||||
services = shell("systemctl list-units --type=service --state=running --no-pager --no-legend 2>/dev/null | head -30")
|
||||
processes = shell("ps aux | grep -E 'hermes|gitea|evennia|python' | grep -v grep")
|
||||
write("03_services", f"Running services:\n{services}\n\nKey processes:\n{processes}")
|
||||
|
||||
# 5. Cron & Automation
|
||||
cron = shell("crontab -l 2>/dev/null")
|
||||
write("04_automation", f"Crontab:\n{cron}")
|
||||
|
||||
# 6. Storage Topology
|
||||
bezalel_tree = shell("find /root/wizards/bezalel -maxdepth 2 -type d | sort")
|
||||
write("05_storage", f"Bezalel workspace tree (depth 2):\n{bezalel_tree}")
|
||||
|
||||
# 7. Git Repositories
|
||||
git_repos = []
|
||||
for base in ["/root/wizards/bezalel/hermes", "/root/wizards/bezalel/evennia"]:
|
||||
p = Path(base)
|
||||
if (p / ".git").exists():
|
||||
remote = shell(f"cd {base} && git remote -v")
|
||||
branch = shell(f"cd {base} && git branch -v")
|
||||
git_repos.append(f"Repo: {base}\nRemotes:\n{remote}\nBranches:\n{branch}\n{'='*40}")
|
||||
write("06_git_repos", "\n".join(git_repos))
|
||||
|
||||
# 8. Python Dependencies
|
||||
venv_pip = shell("/root/wizards/bezalel/hermes/venv/bin/pip freeze 2>/dev/null | head -80")
|
||||
write("07_dependencies", f"Hermes venv packages (top 80):\n{venv_pip}")
|
||||
|
||||
# 9. External APIs & Endpoints
|
||||
apis = """External API Dependencies:
|
||||
- Gitea: https://forge.alexanderwhitestone.com (source of truth, CI, issues)
|
||||
- Telegram: webhook-based messaging platform
|
||||
- Kimi API: https://api.kimi.com/coding/v1 (primary model provider)
|
||||
- Anthropic API: fallback model provider
|
||||
- OpenRouter API: secondary fallback model provider
|
||||
- DigitalOcean: infrastructure hosting (VPS Alpha/Beta)
|
||||
"""
|
||||
write("08_external_apis", apis)
|
||||
|
||||
# 10. Fleet Topology
|
||||
fleet = """FLEET TOPOLOGY
|
||||
- Alpha: 143.198.27.163 (Gitea + Ezra)
|
||||
- Beta: 104.131.15.18 (Bezalel, current host)
|
||||
- No SSH from Alpha to Beta
|
||||
- Gitea Actions runner: bezalel-vps-runner on Beta (host mode)
|
||||
"""
|
||||
write("09_fleet_topology", fleet)
|
||||
|
||||
# 11. Evennia Topology
|
||||
evennia = """EVENNIA MIND PALACE SETUP
|
||||
- Location: /root/wizards/bezalel/evennia/bezalel_world/
|
||||
- Server name: bezalel_world
|
||||
- Custom typeclasses: PalaceRoom, MemoryObject
|
||||
- Custom commands: CmdPalaceSearch (palace/search, palace/recall, palace/file, palace/status)
|
||||
- Batch builder: world/batch_cmds_palace.ev
|
||||
- Bridge script: /root/wizards/bezalel/evennia/palace_search.py
|
||||
"""
|
||||
write("10_evennia_topology", evennia)
|
||||
|
||||
# 12. MemPalace Topology
|
||||
mempalace = f"""MEMPALACE CONFIGURATION
|
||||
- Palace path: /root/wizards/bezalel/.mempalace/palace
|
||||
- Identity: /root/.mempalace/identity.txt
|
||||
- Config: /root/wizards/bezalel/mempalace.yaml
|
||||
- Nightly re-mine: 03:00 UTC via /root/wizards/bezalel/mempalace_nightly.sh
|
||||
- Miner binary: /root/wizards/bezalel/hermes/venv/bin/mempalace
|
||||
- Current status: {shell('/root/wizards/bezalel/hermes/venv/bin/mempalace --palace /root/wizards/bezalel/.mempalace/palace status 2>/dev/null')}
|
||||
"""
|
||||
write("11_mempalace_topology", mempalace)
|
||||
|
||||
# 13. Active Blockers & Health
|
||||
health = f"""ACTIVE OPERATIONAL STATE
|
||||
- Hermes gateway: {shell("ps aux | grep 'hermes gateway run' | grep -v grep | awk '{print $11}'")}
|
||||
- Gitea runner: {shell("ps aux | grep 'act_runner' | grep -v grep | awk '{print $11}'")}
|
||||
- Nightly watch: /root/wizards/bezalel/nightly_watch.py (02:00 UTC)
|
||||
- MemPalace re-mine: /root/wizards/bezalel/mempalace_nightly.sh (03:00 UTC)
|
||||
- Disk usage: {shell("df -h / | tail -1")}
|
||||
- Load average: {shell("uptime")}
|
||||
"""
|
||||
write("12_operational_health", health)
|
||||
|
||||
print(f"Topology scan complete. {len(list(OUT_DIR.glob('*.txt')))} files written to {OUT_DIR}")
|
||||
@@ -1,132 +0,0 @@
|
||||
# Fleet SITREP — April 6, 2026
|
||||
|
||||
**Classification:** Consolidated Status Report
|
||||
**Compiled by:** Ezra
|
||||
**Acknowledged by:** Claude (Issue #143)
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
Allegro executed 7 tasks across infrastructure, contracting, audits, and security. Ezra shipped PR #131, filed formalization audit #132, delivered quarterly report #133, and self-assigned issues #134–#138. All wizard activity mapped below.
|
||||
|
||||
---
|
||||
|
||||
## 1. Allegro 7-Task Report
|
||||
|
||||
| Task | Description | Status |
|
||||
|------|-------------|--------|
|
||||
| 1 | Roll Call / Infrastructure Map | ✅ Complete |
|
||||
| 2 | Dark industrial anthem (140 BPM, Suno-ready) | ✅ Complete |
|
||||
| 3 | Operation Get A Job — 7-file contracting playbook pushed to `the-nexus` | ✅ Complete |
|
||||
| 4 | Formalization audit filed ([the-nexus #893](https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/893)) | ✅ Complete |
|
||||
| 5 | GrepTard Memory Report — PR #525 on `timmy-home` | ✅ Complete |
|
||||
| 6 | Self-audit issues #894–#899 filed on `the-nexus` | ✅ Filed |
|
||||
| 7 | `keystore.json` permissions fixed to `600` | ✅ Applied |
|
||||
|
||||
### Critical Findings from Task 4 (Formalization Audit)
|
||||
|
||||
- GOFAI source files missing — only `.pyc` remains
|
||||
- Nostr keystore was world-readable — **FIXED** (Task 7)
|
||||
- 39 burn scripts cluttering `/root` — archival pending ([#898](https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/898))
|
||||
|
||||
---
|
||||
|
||||
## 2. Ezra Deliverables
|
||||
|
||||
| Deliverable | Issue/PR | Status |
|
||||
|-------------|----------|--------|
|
||||
| V-011 fix + compressor tuning | [PR #131](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/131) | ✅ Merged |
|
||||
| Formalization audit (hermes-agent) | [Issue #132](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/132) | Filed |
|
||||
| Quarterly report (MD + PDF) | [Issue #133](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/133) | Filed |
|
||||
| Burn-mode concurrent tool tests | [Issue #134](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/134) | Assigned → Ezra |
|
||||
| MCP SDK migration | [Issue #135](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/135) | Assigned → Ezra |
|
||||
| APScheduler migration | [Issue #136](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/136) | Assigned → Ezra |
|
||||
| Pydantic-settings migration | [Issue #137](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/137) | Assigned → Ezra |
|
||||
| Contracting playbook tracker | [Issue #138](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/138) | Assigned → Ezra |
|
||||
|
||||
---
|
||||
|
||||
## 3. Fleet Status
|
||||
|
||||
| Wizard | Host | Status | Blocker |
|
||||
|--------|------|--------|---------|
|
||||
| **Ezra** | Hermes VPS | Active — 5 issues queued | None |
|
||||
| **Bezalel** | Hermes VPS | Gateway running on 8645 | None |
|
||||
| **Allegro-Primus** | Hermes VPS | **Gateway DOWN on 8644** | Needs restart signal |
|
||||
| **Bilbo** | External | Gemma 4B active, Telegram dual-mode | Host IP unknown to fleet |
|
||||
|
||||
### Allegro Gateway Recovery
|
||||
|
||||
Allegro-Primus gateway (port 8644) is down. Options:
|
||||
1. **Alexander restarts manually** on Hermes VPS
|
||||
2. **Delegate to Bezalel** — Bezalel can issue restart signal via Hermes VPS access
|
||||
3. **Delegate to Ezra** — Ezra can coordinate restart as part of issue #894 work
|
||||
|
||||
---
|
||||
|
||||
## 4. Operation Get A Job — Contracting Playbook
|
||||
|
||||
Files pushed to `the-nexus/operation-get-a-job/`:
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `README.md` | Master plan |
|
||||
| `entity-setup.md` | Wyoming LLC, Mercury, E&O insurance |
|
||||
| `service-offerings.md` | Rates $150–600/hr; packages $5k/$15k/$40k+ |
|
||||
| `portfolio.md` | Portfolio structure |
|
||||
| `outreach-templates.md` | Cold email templates |
|
||||
| `proposal-template.md` | Client proposal structure |
|
||||
| `rate-card.md` | Rate card |
|
||||
|
||||
**Human-only mile (Alexander's action items):**
|
||||
|
||||
1. Pick LLC name from `entity-setup.md`
|
||||
2. File Wyoming LLC via Northwest Registered Agent ($225)
|
||||
3. Get EIN from IRS (free, ~10 min)
|
||||
4. Open Mercury account (requires EIN + LLC docs)
|
||||
5. Secure E&O insurance (~$150–250/month)
|
||||
6. Restart Allegro-Primus gateway (port 8644)
|
||||
7. Update LinkedIn using profile template
|
||||
8. Send 5 cold emails using outreach templates
|
||||
|
||||
---
|
||||
|
||||
## 5. Pending Self-Audit Issues (the-nexus)
|
||||
|
||||
| Issue | Title | Priority |
|
||||
|-------|-------|----------|
|
||||
| [#894](https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/894) | Deploy burn-mode cron jobs | CRITICAL |
|
||||
| [#895](https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/895) | Telegram thread-based reporting | Normal |
|
||||
| [#896](https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/896) | Retry logic and error recovery | Normal |
|
||||
| [#897](https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/897) | Automate morning reports at 0600 | Normal |
|
||||
| [#898](https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/898) | Archive 39 burn scripts | Normal |
|
||||
| [#899](https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/899) | Keystore permissions | ✅ Done |
|
||||
|
||||
---
|
||||
|
||||
## 6. Revenue Timeline
|
||||
|
||||
| Milestone | Target | Unlocks |
|
||||
|-----------|--------|---------|
|
||||
| LLC + Bank + E&O | Day 5 | Ability to invoice clients |
|
||||
| First 5 emails sent | Day 7 | Pipeline generation |
|
||||
| First scoping call | Day 14 | Qualified lead |
|
||||
| First proposal accepted | Day 21 | **$4,500–$12,000 revenue** |
|
||||
| Monthly retainer signed | Day 45 | **$6,000/mo recurring** |
|
||||
|
||||
---
|
||||
|
||||
## 7. Delegation Matrix
|
||||
|
||||
| Owner | Owns |
|
||||
|-------|------|
|
||||
| **Alexander** | LLC filing, EIN, Mercury, E&O, LinkedIn, cold emails, gateway restart |
|
||||
| **Ezra** | Issues #134–#138 (tests, migrations, tracker) |
|
||||
| **Allegro** | Issues #894, #898 (cron deployment, burn script archival) |
|
||||
| **Bezalel** | Review formalization audit for Anthropic-specific gaps |
|
||||
|
||||
---
|
||||
|
||||
*SITREP acknowledged by Claude — April 6, 2026*
|
||||
*Source issue: [hermes-agent #143](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/143)*
|
||||
@@ -1,678 +0,0 @@
|
||||
# Jupyter Notebooks as Core LLM Execution Layer — Deep Research Report
|
||||
|
||||
**Issue:** #155
|
||||
**Date:** 2026-04-06
|
||||
**Status:** Research / Spike
|
||||
**Prior Art:** Timmy's initial spike (llm_execution_spike.ipynb, hamelnb bridge, JupyterLab on forge VPS)
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
This report deepens the research from issue #155 into three areas requested by Rockachopa:
|
||||
1. The **full Jupyter product suite** — JupyterHub vs JupyterLab vs Notebook
|
||||
2. **Papermill** — the production-grade notebook execution engine already used in real data pipelines
|
||||
3. The **"PR model for notebooks"** — how agents can propose, diff, review, and merge changes to `.ipynb` files similarly to code PRs
|
||||
|
||||
The conclusion: an elegant, production-grade agent→notebook pipeline already exists as open-source tooling. We don't need to invent much — we need to compose what's there.
|
||||
|
||||
---
|
||||
|
||||
## 1. The Jupyter Product Suite
|
||||
|
||||
The Jupyter ecosystem has three distinct layers that are often conflated. Understanding the distinction is critical for architectural decisions.
|
||||
|
||||
### 1.1 Jupyter Notebook (Classic)
|
||||
|
||||
The original single-user interface. One browser tab = one `.ipynb` file. Version 6 is in maintenance-only mode. Version 7 was rebuilt on JupyterLab components and is functionally equivalent. For headless agent use, the UI is irrelevant — what matters is the `.ipynb` file format and the kernel execution model underneath.
|
||||
|
||||
### 1.2 JupyterLab
|
||||
|
||||
The current canonical Jupyter interface for human users: full IDE, multi-pane, terminal, extension manager, built-in diff viewer, and `jupyterlab-git` for Git workflows from the UI. JupyterLab is the recommended target for agent-collaborative workflows because:
|
||||
|
||||
- It exposes the same REST API as classic Jupyter (kernel sessions, execute, contents)
|
||||
- Extensions like `jupyterlab-git` let a human co-reviewer inspect changes alongside the agent
|
||||
- The `hamelnb` bridge Timmy already validated works against a JupyterLab server
|
||||
|
||||
**For agents:** JupyterLab is the platform to run on. The agent doesn't interact with the UI — it uses the Jupyter REST API or Papermill on top of it.
|
||||
|
||||
### 1.3 JupyterHub — The Multi-User Orchestration Layer
|
||||
|
||||
JupyterHub is not a UI. It is a **multi-user server** that spawns, manages, and proxies individual single-user Jupyter servers. This is the production infrastructure layer.
|
||||
|
||||
```
|
||||
[Agent / Browser / API Client]
|
||||
|
|
||||
[Proxy] (configurable-http-proxy)
|
||||
/ \
|
||||
[Hub] [Single-User Jupyter Server per user/agent]
|
||||
(Auth, (standard JupyterLab/Notebook server)
|
||||
Spawner,
|
||||
REST API)
|
||||
```
|
||||
|
||||
**Key components:**
|
||||
- **Hub:** Manages auth, user database, spawner lifecycle, REST API
|
||||
- **Proxy:** Routes `/hub/*` to Hub, `/user/<name>/*` to that user's server
|
||||
- **Spawner:** How single-user servers are started. Default = local process. Production options include `KubeSpawner` (Kubernetes pod per user) and `DockerSpawner` (container per user)
|
||||
- **Authenticator:** PAM, OAuth, DummyAuthenticator (for isolated agent environments)
|
||||
|
||||
**JupyterHub REST API** (relevant for agent orchestration):
|
||||
|
||||
```bash
|
||||
# Spawn a named server for an agent service account
|
||||
POST /hub/api/users/<username>/servers/<name>
|
||||
|
||||
# Stop it when done
|
||||
DELETE /hub/api/users/<username>/servers/<name>
|
||||
|
||||
# Create a scoped API token for the agent
|
||||
POST /hub/api/users/<username>/tokens
|
||||
|
||||
# Check server status
|
||||
GET /hub/api/users/<username>
|
||||
```
|
||||
|
||||
**Why this matters for Hermes:** JupyterHub gives us isolated kernel environments per agent task, programmable lifecycle management, and a clean auth model. Instead of running one shared JupyterLab instance on the forge VPS, we could spawn ephemeral single-user servers per notebook execution run — each with its own kernel, clean state, and resource limits.
|
||||
|
||||
### 1.4 Jupyter Kernel Gateway — Minimal Headless Execution
|
||||
|
||||
If JupyterHub is too heavy, `jupyter-kernel-gateway` exposes just the kernel protocol over REST + WebSocket:
|
||||
|
||||
```bash
|
||||
pip install jupyter-kernel-gateway
|
||||
jupyter kernelgateway --KernelGatewayApp.api=kernel_gateway.jupyter_websocket
|
||||
|
||||
# Start kernel
|
||||
POST /api/kernels
|
||||
# Execute via WebSocket on Jupyter messaging protocol
|
||||
WS /api/kernels/<kernel_id>/channels
|
||||
# Stop kernel
|
||||
DELETE /api/kernels/<kernel_id>
|
||||
```
|
||||
|
||||
This is the lowest-level option: no notebook management, just raw kernel access. Suitable if we want to build our own execution layer from scratch.
|
||||
|
||||
---
|
||||
|
||||
## 2. Papermill — Production Notebook Execution
|
||||
|
||||
Papermill is the missing link between "notebook as experiment" and "notebook as repeatable pipeline task." It is already used at scale in industry data pipelines (Netflix, Airbnb, etc.).
|
||||
|
||||
### 2.1 Core Concept: Parameterization
|
||||
|
||||
Papermill's key innovation is **parameter injection**. Tag a cell in the notebook with `"parameters"`:
|
||||
|
||||
```python
|
||||
# Cell tagged "parameters" (defaults — defined by notebook author)
|
||||
alpha = 0.5
|
||||
batch_size = 32
|
||||
model_name = "baseline"
|
||||
```
|
||||
|
||||
At runtime, Papermill inserts a new cell immediately after, tagged `"injected-parameters"`, that overrides the defaults:
|
||||
|
||||
```python
|
||||
# Cell tagged "injected-parameters" (injected by Papermill at runtime)
|
||||
alpha = 0.01
|
||||
batch_size = 128
|
||||
model_name = "experiment_007"
|
||||
```
|
||||
|
||||
Because Python executes top-to-bottom, the injected cell shadows the defaults. The original notebook is never mutated — Papermill reads input, writes to a new output file.
|
||||
|
||||
### 2.2 Python API
|
||||
|
||||
```python
|
||||
import papermill as pm
|
||||
|
||||
nb = pm.execute_notebook(
|
||||
input_path="analysis.ipynb", # source (can be s3://, az://, gs://)
|
||||
output_path="output/run_001.ipynb", # destination (persists outputs)
|
||||
parameters={
|
||||
"alpha": 0.01,
|
||||
"n_samples": 1000,
|
||||
"run_id": "fleet-check-2026-04-06",
|
||||
},
|
||||
kernel_name="python3",
|
||||
execution_timeout=300, # per-cell timeout in seconds
|
||||
log_output=True, # stream cell output to logger
|
||||
cwd="/path/to/notebook/", # working directory
|
||||
)
|
||||
# Returns: NotebookNode (the fully executed notebook with all outputs)
|
||||
```
|
||||
|
||||
On cell failure, Papermill raises `PapermillExecutionError` with:
|
||||
- `cell_index` — which cell failed
|
||||
- `source` — the failing cell's code
|
||||
- `ename` / `evalue` — exception type and message
|
||||
- `traceback` — full traceback
|
||||
|
||||
Even on failure, the output notebook is written with whatever cells completed — enabling partial-run inspection.
|
||||
|
||||
### 2.3 CLI
|
||||
|
||||
```bash
|
||||
# Basic execution
|
||||
papermill analysis.ipynb output/run_001.ipynb \
|
||||
-p alpha 0.01 \
|
||||
-p n_samples 1000
|
||||
|
||||
# From YAML parameter file
|
||||
papermill analysis.ipynb output/run_001.ipynb -f params.yaml
|
||||
|
||||
# CI-friendly: log outputs, no progress bar
|
||||
papermill analysis.ipynb output/run_001.ipynb \
|
||||
--log-output \
|
||||
--no-progress-bar \
|
||||
--execution-timeout 300 \
|
||||
-p run_id "fleet-check-2026-04-06"
|
||||
|
||||
# Prepare only (inject params, skip execution — for preview/inspection)
|
||||
papermill analysis.ipynb preview.ipynb --prepare-only -p alpha 0.01
|
||||
|
||||
# Inspect parameter schema
|
||||
papermill --help-notebook analysis.ipynb
|
||||
```
|
||||
|
||||
**Remote storage** is built in — `pip install papermill[s3]` enables `s3://` paths for both input and output. Azure and GCS are also supported. For Hermes, this means notebook runs can be stored in object storage and retrieved later for audit.
|
||||
|
||||
### 2.4 Scrapbook — Structured Output Collection
|
||||
|
||||
`scrapbook` is Papermill's companion for extracting structured data from executed notebooks. Inside a notebook cell:
|
||||
|
||||
```python
|
||||
import scrapbook as sb
|
||||
|
||||
# Write typed outputs (stored as special display_data in cell outputs)
|
||||
sb.glue("accuracy", 0.9342)
|
||||
sb.glue("metrics", {"precision": 0.91, "recall": 0.93, "f1": 0.92})
|
||||
sb.glue("results_df", df, "pandas") # DataFrames too
|
||||
```
|
||||
|
||||
After execution, from the agent:
|
||||
|
||||
```python
|
||||
import scrapbook as sb
|
||||
|
||||
nb = sb.read_notebook("output/fleet-check-2026-04-06.ipynb")
|
||||
metrics = nb.scraps["metrics"].data # -> {"precision": 0.91, ...}
|
||||
accuracy = nb.scraps["accuracy"].data # -> 0.9342
|
||||
|
||||
# Or aggregate across many runs
|
||||
book = sb.read_notebooks("output/")
|
||||
book.scrap_dataframe # -> pd.DataFrame with all scraps + filenames
|
||||
```
|
||||
|
||||
This is the clean interface between notebook execution and agent decision-making: the notebook outputs its findings as named, typed scraps; the agent reads them programmatically and acts.
|
||||
|
||||
### 2.5 How Papermill Compares to hamelnb
|
||||
|
||||
| Capability | hamelnb | Papermill |
|
||||
|---|---|---|
|
||||
| Stateful kernel session | Yes | No (fresh kernel per run) |
|
||||
| Parameter injection | No | Yes |
|
||||
| Persistent output notebook | No | Yes |
|
||||
| Remote storage (S3/Azure) | No | Yes |
|
||||
| Per-cell timing/metadata | No | Yes (in output nb metadata) |
|
||||
| Error isolation (partial runs) | No | Yes |
|
||||
| Production pipeline use | Experimental | Industry-standard |
|
||||
| Structured output collection | No | Yes (via scrapbook) |
|
||||
|
||||
**Verdict:** `hamelnb` is great for interactive REPL-style exploration (where state accumulates). Papermill is better for task execution (where we want reproducible, parameterized, auditable runs). They serve different use cases. Hermes needs both.
|
||||
|
||||
---
|
||||
|
||||
## 3. The `.ipynb` File Format — What the Agent Is Actually Working With
|
||||
|
||||
Understanding the format is essential for the "PR model." A `.ipynb` file is JSON with this structure:
|
||||
|
||||
```json
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5,
|
||||
"metadata": {
|
||||
"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
|
||||
"language_info": {"name": "python", "version": "3.10.0"}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"id": "a1b2c3d4",
|
||||
"cell_type": "markdown",
|
||||
"source": "# Fleet Health Check\n\nThis notebook checks system health.",
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"id": "e5f6g7h8",
|
||||
"cell_type": "code",
|
||||
"source": "alpha = 0.5\nthreshold = 0.95",
|
||||
"metadata": {"tags": ["parameters"]},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"id": "i9j0k1l2",
|
||||
"cell_type": "code",
|
||||
"source": "import sys\nprint(sys.version)",
|
||||
"metadata": {},
|
||||
"execution_count": 1,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "3.10.0 (default, ...)\n"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `nbformat` Python library provides a clean API for working with this:
|
||||
|
||||
```python
|
||||
import nbformat
|
||||
|
||||
# Read
|
||||
with open("notebook.ipynb") as f:
|
||||
nb = nbformat.read(f, as_version=4)
|
||||
|
||||
# Navigate
|
||||
for cell in nb.cells:
|
||||
if cell.cell_type == "code":
|
||||
print(cell.source)
|
||||
|
||||
# Modify
|
||||
nb.cells[2].source = "import sys\nprint('updated')"
|
||||
|
||||
# Add cells
|
||||
new_md = nbformat.v4.new_markdown_cell("## Agent Analysis\nInserted by Hermes.")
|
||||
nb.cells.insert(3, new_md)
|
||||
|
||||
# Write
|
||||
with open("modified.ipynb", "w") as f:
|
||||
nbformat.write(nb, f)
|
||||
|
||||
# Validate
|
||||
nbformat.validate(nb) # raises nbformat.ValidationError on invalid format
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. The PR Model for Notebooks
|
||||
|
||||
This is the elegant architecture Rockachopa described: agents making PRs to notebooks the same way they make PRs to code. Here's how the full stack enables it.
|
||||
|
||||
### 4.1 The Problem: Raw `.ipynb` Diffs Are Unusable
|
||||
|
||||
Without tooling, a `git diff` on a notebook that was merely re-run (no source changes) produces thousands of lines of JSON changes — execution counts, timestamps, base64-encoded plot images. Code review on raw `.ipynb` diffs is impractical.
|
||||
|
||||
### 4.2 nbstripout — Clean Git History
|
||||
|
||||
`nbstripout` installs a git **clean filter** that strips outputs before files enter the git index. The working copy is untouched; only what gets committed is clean.
|
||||
|
||||
```bash
|
||||
pip install nbstripout
|
||||
nbstripout --install # per-repo
|
||||
# or
|
||||
nbstripout --install --global # all repos
|
||||
```
|
||||
|
||||
This writes to `.git/config`:
|
||||
```ini
|
||||
[filter "nbstripout"]
|
||||
clean = nbstripout
|
||||
smudge = cat
|
||||
required = true
|
||||
|
||||
[diff "ipynb"]
|
||||
textconv = nbstripout -t
|
||||
```
|
||||
|
||||
And to `.gitattributes`:
|
||||
```
|
||||
*.ipynb filter=nbstripout
|
||||
*.ipynb diff=ipynb
|
||||
```
|
||||
|
||||
Now `git diff` shows only source changes — same as reviewing a `.py` file.
|
||||
|
||||
**For executed-output notebooks** (where we want to keep outputs for audit): use a separate path like `runs/` or `outputs/` excluded from the filter via `.gitattributes`:
|
||||
```
|
||||
*.ipynb filter=nbstripout
|
||||
runs/*.ipynb !filter
|
||||
runs/*.ipynb !diff
|
||||
```
|
||||
|
||||
### 4.3 nbdime — Semantic Diff and Merge
|
||||
|
||||
nbdime understands notebook structure. Instead of diffing raw JSON, it diffs at the level of cells — knowing that `cells` is a list, `source` is a string, and outputs should often be ignored.
|
||||
|
||||
```bash
|
||||
pip install nbdime
|
||||
|
||||
# Enable semantic git diff/merge for all .ipynb files
|
||||
nbdime config-git --enable
|
||||
|
||||
# Now standard git commands are notebook-aware:
|
||||
git diff HEAD notebook.ipynb # semantic cell-level diff
|
||||
git merge feature-branch # uses nbdime for .ipynb conflict resolution
|
||||
git log -p notebook.ipynb # readable patch per commit
|
||||
```
|
||||
|
||||
**Python API for agent reasoning:**
|
||||
|
||||
```python
|
||||
import nbdime
|
||||
import nbformat
|
||||
|
||||
nb_base = nbformat.read(open("original.ipynb"), as_version=4)
|
||||
nb_pr = nbformat.read(open("proposed.ipynb"), as_version=4)
|
||||
|
||||
diff = nbdime.diff_notebooks(nb_base, nb_pr)
|
||||
|
||||
# diff is a list of structured ops the agent can reason about:
|
||||
# [{"op": "patch", "key": "cells", "diff": [
|
||||
# {"op": "patch", "key": 3, "diff": [
|
||||
# {"op": "patch", "key": "source", "diff": [...string ops...]}
|
||||
# ]}
|
||||
# ]}]
|
||||
|
||||
# Apply a diff (patch)
|
||||
from nbdime.patching import patch
|
||||
nb_result = patch(nb_base, diff)
|
||||
```
|
||||
|
||||
### 4.4 The Full Agent PR Workflow
|
||||
|
||||
Here is the complete workflow — analogous to how Hermes makes PRs to code repos via Gitea:
|
||||
|
||||
**1. Agent reads the task notebook**
|
||||
```python
|
||||
nb = nbformat.read(open("fleet_health_check.ipynb"), as_version=4)
|
||||
```
|
||||
|
||||
**2. Agent locates and modifies relevant cells**
|
||||
```python
|
||||
# Find parameter cell
|
||||
params_cell = next(
|
||||
c for c in nb.cells
|
||||
if "parameters" in c.get("metadata", {}).get("tags", [])
|
||||
)
|
||||
# Update threshold
|
||||
params_cell.source = params_cell.source.replace("threshold = 0.95", "threshold = 0.90")
|
||||
|
||||
# Add explanatory markdown
|
||||
nb.cells.insert(
|
||||
nb.cells.index(params_cell) + 1,
|
||||
nbformat.v4.new_markdown_cell(
|
||||
"**Note (Hermes 2026-04-06):** Threshold lowered from 0.95 to 0.90 "
|
||||
"based on false-positive analysis from last 7 days of runs."
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
**3. Agent writes and commits to a branch**
|
||||
```bash
|
||||
git checkout -b agent/fleet-health-threshold-update
|
||||
nbformat.write(nb, open("fleet_health_check.ipynb", "w"))
|
||||
git add fleet_health_check.ipynb
|
||||
git commit -m "feat(notebooks): lower fleet health threshold to 0.90 (#155)"
|
||||
```
|
||||
|
||||
**4. Agent executes the proposed notebook to validate**
|
||||
```python
|
||||
import papermill as pm
|
||||
|
||||
pm.execute_notebook(
|
||||
"fleet_health_check.ipynb",
|
||||
"output/validation_run.ipynb",
|
||||
parameters={"run_id": "agent-validation-2026-04-06"},
|
||||
log_output=True,
|
||||
)
|
||||
```
|
||||
|
||||
**5. Agent collects results and compares**
|
||||
```python
|
||||
import scrapbook as sb
|
||||
|
||||
result = sb.read_notebook("output/validation_run.ipynb")
|
||||
health_score = result.scraps["health_score"].data
|
||||
alert_count = result.scraps["alert_count"].data
|
||||
```
|
||||
|
||||
**6. Agent opens PR with results summary**
|
||||
```bash
|
||||
curl -X POST "$GITEA_API/pulls" \
|
||||
-H "Authorization: token $TOKEN" \
|
||||
-d '{
|
||||
"title": "feat(notebooks): lower fleet health threshold to 0.90",
|
||||
"body": "## Agent Analysis\n\n- Health score: 0.94 (was 0.89 with old threshold)\n- Alert count: 12 (was 47 false positives)\n- Validation run: output/validation_run.ipynb\n\nRefs #155",
|
||||
"head": "agent/fleet-health-threshold-update",
|
||||
"base": "main"
|
||||
}'
|
||||
```
|
||||
|
||||
**7. Human reviews the PR using nbdime diff**
|
||||
|
||||
The PR diff in Gitea shows the clean cell-level source changes (thanks to nbstripout). The human can also run `nbdiff-web original.ipynb proposed.ipynb` locally for rich rendered diff with output comparison.
|
||||
|
||||
### 4.5 nbval — Regression Testing Notebooks
|
||||
|
||||
`nbval` treats each notebook cell as a pytest test case, re-executing and comparing outputs to stored values:
|
||||
|
||||
```bash
|
||||
pip install nbval
|
||||
|
||||
# Strict: every cell output must match stored outputs
|
||||
pytest --nbval fleet_health_check.ipynb
|
||||
|
||||
# Lax: only check cells marked with # NBVAL_CHECK_OUTPUT
|
||||
pytest --nbval-lax fleet_health_check.ipynb
|
||||
```
|
||||
|
||||
Cell-level markers (comments in cell source):
|
||||
```python
|
||||
# NBVAL_CHECK_OUTPUT — in lax mode, validate this cell's output
|
||||
# NBVAL_SKIP — skip this cell entirely
|
||||
# NBVAL_RAISES_EXCEPTION — expect an exception (test passes if raised)
|
||||
```
|
||||
|
||||
This becomes the CI gate: before a notebook PR is merged, run `pytest --nbval-lax` to verify no cells produce errors and critical output cells still produce expected values.
|
||||
|
||||
---
|
||||
|
||||
## 5. Gaps and Recommendations
|
||||
|
||||
### 5.1 Gap Assessment (Refining Timmy's Original Findings)
|
||||
|
||||
| Gap | Severity | Solution |
|
||||
|---|---|---|
|
||||
| No Hermes tool access in kernel | High | Inject `hermes_runtime` module (see §5.2) |
|
||||
| No structured output protocol | High | Use scrapbook `sb.glue()` pattern |
|
||||
| No parameterization | Medium | Add Papermill `"parameters"` cell to notebooks |
|
||||
| XSRF/auth friction | Medium | Disable for local; use JupyterHub token scopes for multi-user |
|
||||
| No notebook CI/testing | Medium | Add nbval to test suite |
|
||||
| Raw `.ipynb` diffs in PRs | Medium | Install nbstripout + nbdime |
|
||||
| No scheduling | Low | Papermill + existing Hermes cron layer |
|
||||
|
||||
### 5.2 Short-Term Recommendations (This Month)
|
||||
|
||||
**1. `NotebookExecutor` tool**
|
||||
|
||||
A thin Hermes tool wrapping the ecosystem:
|
||||
|
||||
```python
|
||||
class NotebookExecutor:
|
||||
def execute(self, input_path, output_path, parameters, timeout=300):
|
||||
"""Wraps pm.execute_notebook(). Returns structured result dict."""
|
||||
|
||||
def collect_outputs(self, notebook_path):
|
||||
"""Wraps sb.read_notebook(). Returns dict of named scraps."""
|
||||
|
||||
def inspect_parameters(self, notebook_path):
|
||||
"""Wraps pm.inspect_notebook(). Returns parameter schema."""
|
||||
|
||||
def read_notebook(self, path):
|
||||
"""Returns nbformat NotebookNode for cell inspection/modification."""
|
||||
|
||||
def write_notebook(self, nb, path):
|
||||
"""Writes modified NotebookNode back to disk."""
|
||||
|
||||
def diff_notebooks(self, path_a, path_b):
|
||||
"""Returns structured nbdime diff for agent reasoning."""
|
||||
|
||||
def validate(self, notebook_path):
|
||||
"""Runs nbformat.validate() + optional pytest --nbval-lax."""
|
||||
```
|
||||
|
||||
Execution result structure for the agent:
|
||||
```python
|
||||
{
|
||||
"status": "success" | "error",
|
||||
"duration_seconds": 12.34,
|
||||
"cells_executed": 15,
|
||||
"failed_cell": { # None on success
|
||||
"index": 7,
|
||||
"source": "model.fit(X, y)",
|
||||
"ename": "ValueError",
|
||||
"evalue": "Input contains NaN",
|
||||
},
|
||||
"scraps": { # from scrapbook
|
||||
"health_score": 0.94,
|
||||
"alert_count": 12,
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
**2. Fleet Health Check as a Notebook**
|
||||
|
||||
Convert the fleet health check epic into a parameterized notebook with:
|
||||
- `"parameters"` cell for run configuration (date range, thresholds, agent ID)
|
||||
- Markdown cells narrating each step
|
||||
- `sb.glue()` calls for structured outputs
|
||||
- `# NBVAL_CHECK_OUTPUT` markers on critical cells
|
||||
|
||||
**3. Git hygiene for notebooks**
|
||||
|
||||
Install nbstripout + nbdime in the hermes-agent repo:
|
||||
```bash
|
||||
pip install nbstripout nbdime
|
||||
nbstripout --install
|
||||
nbdime config-git --enable
|
||||
```
|
||||
|
||||
Add to `.gitattributes`:
|
||||
```
|
||||
*.ipynb filter=nbstripout
|
||||
*.ipynb diff=ipynb
|
||||
runs/*.ipynb !filter
|
||||
```
|
||||
|
||||
### 5.3 Medium-Term Recommendations (Next Quarter)
|
||||
|
||||
**4. `hermes_runtime` Python module**
|
||||
|
||||
Inject Hermes tool access into the kernel via a module that notebooks import:
|
||||
|
||||
```python
|
||||
# In kernel cell: from hermes_runtime import terminal, read_file, web_search
|
||||
import hermes_runtime as hermes
|
||||
|
||||
results = hermes.web_search("fleet health metrics best practices")
|
||||
hermes.terminal("systemctl status agent-fleet")
|
||||
content = hermes.read_file("/var/log/hermes/agent.log")
|
||||
```
|
||||
|
||||
This closes the most significant gap: notebooks gain the same tool access as skills, while retaining state persistence and narrative structure.
|
||||
|
||||
**5. Notebook-triggered cron**
|
||||
|
||||
Extend the Hermes cron layer to accept `.ipynb` paths as targets:
|
||||
```yaml
|
||||
# cron entry
|
||||
schedule: "0 6 * * *"
|
||||
type: notebook
|
||||
path: notebooks/fleet_health_check.ipynb
|
||||
parameters:
|
||||
run_id: "{{date}}"
|
||||
alert_threshold: 0.90
|
||||
output_path: runs/fleet_health_{{date}}.ipynb
|
||||
```
|
||||
|
||||
The cron runner calls `pm.execute_notebook()` and commits the output to the repo.
|
||||
|
||||
**6. JupyterHub for multi-agent isolation**
|
||||
|
||||
If multiple agents need concurrent notebook execution, deploy JupyterHub with `DockerSpawner` or `KubeSpawner`. Each agent job gets an isolated container with its own kernel, no state bleed between runs.
|
||||
|
||||
---
|
||||
|
||||
## 6. Architecture Vision
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Hermes Agent │
|
||||
│ │
|
||||
│ Skills (one-shot) Notebooks (multi-step) │
|
||||
│ ┌─────────────────┐ ┌─────────────────────────────────┐ │
|
||||
│ │ terminal() │ │ .ipynb file │ │
|
||||
│ │ web_search() │ │ ├── Markdown (narrative) │ │
|
||||
│ │ read_file() │ │ ├── Code cells (logic) │ │
|
||||
│ └─────────────────┘ │ ├── "parameters" cell │ │
|
||||
│ │ └── sb.glue() outputs │ │
|
||||
│ └──────────────┬────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌──────────────▼────────────────┐ │
|
||||
│ │ NotebookExecutor tool │ │
|
||||
│ │ (papermill + scrapbook + │ │
|
||||
│ │ nbformat + nbdime + nbval) │ │
|
||||
│ └──────────────┬────────────────┘ │
|
||||
│ │ │
|
||||
└────────────────────────────────────────────┼────────────────────┘
|
||||
│
|
||||
┌───────────────────▼──────────────────┐
|
||||
│ JupyterLab / Hub │
|
||||
│ (kernel execution environment) │
|
||||
└───────────────────┬──────────────────┘
|
||||
│
|
||||
┌───────────────────▼──────────────────┐
|
||||
│ Git + Gitea │
|
||||
│ (nbstripout clean diffs, │
|
||||
│ nbdime semantic review, │
|
||||
│ PR workflow for notebook changes) │
|
||||
└──────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Notebooks become the primary artifact of complex tasks:** the agent generates or edits cells, Papermill executes them reproducibly, scrapbook extracts structured outputs for agent decision-making, and the resulting `.ipynb` is both proof-of-work and human-readable report. Skills remain for one-shot actions. Notebooks own multi-step workflows.
|
||||
|
||||
---
|
||||
|
||||
## 7. Package Summary
|
||||
|
||||
| Package | Purpose | Install |
|
||||
|---|---|---|
|
||||
| `nbformat` | Read/write/validate `.ipynb` files | `pip install nbformat` |
|
||||
| `nbconvert` | Execute and export notebooks | `pip install nbconvert` |
|
||||
| `papermill` | Parameterize + execute in pipelines | `pip install papermill` |
|
||||
| `scrapbook` | Structured output collection | `pip install scrapbook` |
|
||||
| `nbdime` | Semantic diff/merge for git | `pip install nbdime` |
|
||||
| `nbstripout` | Git filter for clean diffs | `pip install nbstripout` |
|
||||
| `nbval` | pytest-based output regression | `pip install nbval` |
|
||||
| `jupyter-kernel-gateway` | Headless REST kernel access | `pip install jupyter-kernel-gateway` |
|
||||
|
||||
---
|
||||
|
||||
## 8. References
|
||||
|
||||
- [Papermill GitHub (nteract/papermill)](https://github.com/nteract/papermill)
|
||||
- [Scrapbook GitHub (nteract/scrapbook)](https://github.com/nteract/scrapbook)
|
||||
- [nbformat format specification](https://nbformat.readthedocs.io/en/latest/format_description.html)
|
||||
- [nbdime documentation](https://nbdime.readthedocs.io/)
|
||||
- [nbdime diff format spec (JEP #8)](https://github.com/jupyter/enhancement-proposals/blob/master/08-notebook-diff/notebook-diff.md)
|
||||
- [nbconvert execute API](https://nbconvert.readthedocs.io/en/latest/execute_api.html)
|
||||
- [nbstripout README](https://github.com/kynan/nbstripout)
|
||||
- [nbval GitHub (computationalmodelling/nbval)](https://github.com/computationalmodelling/nbval)
|
||||
- [JupyterHub REST API](https://jupyterhub.readthedocs.io/en/stable/howto/rest.html)
|
||||
- [JupyterHub Technical Overview](https://jupyterhub.readthedocs.io/en/latest/reference/technical-overview.html)
|
||||
- [Jupyter Kernel Gateway](https://github.com/jupyter-server/kernel_gateway)
|
||||
@@ -1,490 +0,0 @@
|
||||
# Nexus Architect Tool
|
||||
|
||||
The **Nexus Architect Tool** enables Timmy (the Hermes Agent) to autonomously design and build 3D environments in the Three.js-based "Nexus" virtual world. It provides a structured interface for creating rooms, portals, lighting systems, and architectural features through LLM-generated Three.js code.
|
||||
|
||||
## Overview
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Nexus Architect Tool │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ │
|
||||
│ │ Room Design │ │ Portal Create│ │ Lighting System │ │
|
||||
│ └──────────────┘ └──────────────┘ └──────────────────────┘ │
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ │
|
||||
│ │ Architecture │ │ Code Validate│ │ Scene Export │ │
|
||||
│ └──────────────┘ └──────────────┘ └──────────────────────┘ │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ Scene Graph Store │
|
||||
│ (Rooms, Portals, Lights, Architecture) │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
### Core Components
|
||||
|
||||
1. **NexusArchitect Class**: Main orchestrator for all architectural operations
|
||||
2. **SceneGraph**: Dataclass storing the complete world state
|
||||
3. **Validation Engine**: Security and syntax validation for generated code
|
||||
4. **Prompt Generator**: Structured LLM prompts for Three.js code generation
|
||||
5. **Tool Registry Integration**: Registration with Hermes tool system
|
||||
|
||||
### Data Models
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class RoomConfig:
|
||||
name: str
|
||||
theme: RoomTheme # meditation, tech_lab, nature, crystal_cave, library, void
|
||||
dimensions: Dict[str, float] # {width, height, depth}
|
||||
features: List[str]
|
||||
lighting_profile: str
|
||||
fog_enabled: bool
|
||||
|
||||
@dataclass
|
||||
class PortalConfig:
|
||||
name: str
|
||||
source_room: str
|
||||
target_room: str
|
||||
position: Dict[str, float]
|
||||
style: PortalStyle # circular, rectangular, stargate, dissolve, glitch
|
||||
color: str
|
||||
one_way: bool
|
||||
|
||||
@dataclass
|
||||
class LightConfig:
|
||||
name: str
|
||||
type: LightType # ambient, directional, point, spot, hemisphere
|
||||
position: Dict[str, float]
|
||||
color: str
|
||||
intensity: float
|
||||
cast_shadow: bool
|
||||
```
|
||||
|
||||
## Available Tools
|
||||
|
||||
### 1. `nexus_design_room`
|
||||
|
||||
Design a new room in the Nexus.
|
||||
|
||||
**Parameters:**
|
||||
- `name` (string, required): Unique room identifier
|
||||
- `theme` (string, required): One of `meditation`, `tech_lab`, `nature`, `crystal_cave`, `library`, `void`, `custom`
|
||||
- `dimensions` (object): `{width, height, depth}` in meters (default: 10x5x10)
|
||||
- `features` (array): List of feature names (e.g., `water_feature`, `floating_lanterns`)
|
||||
- `lighting_profile` (string): Preset lighting configuration
|
||||
- `mental_state` (object): Optional context for design decisions
|
||||
|
||||
**Returns:**
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"room_name": "meditation_chamber",
|
||||
"prompt": "... LLM prompt for Three.js generation ...",
|
||||
"config": { ... room configuration ... }
|
||||
}
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
nexus_design_room(
|
||||
name="zen_garden",
|
||||
theme="meditation",
|
||||
dimensions={"width": 20, "height": 10, "depth": 20},
|
||||
features=["water_feature", "bamboo_grove", "floating_lanterns"],
|
||||
mental_state={"mood": "calm", "energy": 0.3}
|
||||
)
|
||||
```
|
||||
|
||||
### 2. `nexus_create_portal`
|
||||
|
||||
Create a portal connecting two rooms.
|
||||
|
||||
**Parameters:**
|
||||
- `name` (string, required): Unique portal identifier
|
||||
- `source_room` (string, required): Source room name
|
||||
- `target_room` (string, required): Target room name
|
||||
- `position` (object): `{x, y, z}` coordinates in source room
|
||||
- `style` (string): Visual style (`circular`, `rectangular`, `stargate`, `dissolve`, `glitch`)
|
||||
- `color` (string): Hex color code (default: `#00ffff`)
|
||||
|
||||
**Returns:**
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"portal_name": "portal_alpha",
|
||||
"source": "room_a",
|
||||
"target": "room_b",
|
||||
"prompt": "... LLM prompt for portal generation ..."
|
||||
}
|
||||
```
|
||||
|
||||
### 3. `nexus_add_lighting`
|
||||
|
||||
Add lighting elements to a room.
|
||||
|
||||
**Parameters:**
|
||||
- `room_name` (string, required): Target room
|
||||
- `lights` (array): List of light configurations
|
||||
- `name` (string): Light identifier
|
||||
- `type` (string): `ambient`, `directional`, `point`, `spot`, `hemisphere`
|
||||
- `position` (object): `{x, y, z}`
|
||||
- `color` (string): Hex color
|
||||
- `intensity` (number): Light intensity
|
||||
- `cast_shadow` (boolean): Enable shadows
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
nexus_add_lighting(
|
||||
room_name="meditation_chamber",
|
||||
lights=[
|
||||
{"name": "ambient", "type": "ambient", "intensity": 0.3},
|
||||
{"name": "main", "type": "point", "position": {"x": 0, "y": 5, "z": 0}}
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 4. `nexus_validate_scene`
|
||||
|
||||
Validate generated Three.js code for security and syntax.
|
||||
|
||||
**Parameters:**
|
||||
- `code` (string, required): JavaScript code to validate
|
||||
- `strict_mode` (boolean): Enable stricter validation (default: false)
|
||||
|
||||
**Returns:**
|
||||
```json
|
||||
{
|
||||
"is_valid": true,
|
||||
"errors": [],
|
||||
"warnings": [],
|
||||
"safety_score": 95,
|
||||
"extracted_code": "... cleaned code ..."
|
||||
}
|
||||
```
|
||||
|
||||
**Security Checks:**
|
||||
- Banned patterns: `eval()`, `Function()`, `setTimeout(string)`, `document.write`
|
||||
- Network blocking: `fetch()`, `WebSocket`, `XMLHttpRequest`
|
||||
- Storage blocking: `localStorage`, `sessionStorage`, `indexedDB`
|
||||
- Syntax validation: Balanced braces and parentheses
|
||||
|
||||
### 5. `nexus_export_scene`
|
||||
|
||||
Export the current scene configuration.
|
||||
|
||||
**Parameters:**
|
||||
- `format` (string): `json` or `js` (default: `json`)
|
||||
|
||||
**Returns:**
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"format": "json",
|
||||
"data": "... exported scene data ...",
|
||||
"summary": {
|
||||
"rooms": 3,
|
||||
"portals": 2,
|
||||
"lights": 5
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 6. `nexus_get_summary`
|
||||
|
||||
Get a summary of the current scene state.
|
||||
|
||||
**Returns:**
|
||||
```json
|
||||
{
|
||||
"rooms": [
|
||||
{"name": "room_a", "theme": "void", "connected_portals": ["p1"]}
|
||||
],
|
||||
"portal_network": [
|
||||
{"name": "p1", "source": "room_a", "target": "room_b"}
|
||||
],
|
||||
"total_lights": 5
|
||||
}
|
||||
```
|
||||
|
||||
## LLM Integration Flow
|
||||
|
||||
```
|
||||
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||
│ User Request │────▶│ Architect │────▶│ Prompt │
|
||||
│ ("Create a │ │ Tool │ │ Generator │
|
||||
│ zen room") │ └──────────────┘ └──────────────┘
|
||||
└──────────────┘ │
|
||||
▼
|
||||
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||
│ Nexus │◀────│ Validation │◀────│ LLM │
|
||||
│ Runtime │ │ Engine │ │ (generates │
|
||||
│ │ │ │ │ Three.js) │
|
||||
└──────────────┘ └──────────────┘ └──────────────┘
|
||||
```
|
||||
|
||||
1. **Request Parsing**: User request converted to structured configuration
|
||||
2. **Prompt Generation**: Architect generates structured LLM prompt
|
||||
3. **Code Generation**: LLM generates Three.js code based on prompt
|
||||
4. **Validation**: Code validated for security and syntax
|
||||
5. **Execution**: Validated code ready for Nexus runtime
|
||||
|
||||
## Code Validation
|
||||
|
||||
### Allowed Three.js APIs
|
||||
|
||||
The validation system maintains an allowlist of safe Three.js APIs:
|
||||
|
||||
**Core:**
|
||||
- `THREE.Scene`, `THREE.Group`, `THREE.Object3D`
|
||||
- `THREE.PerspectiveCamera`, `THREE.OrthographicCamera`
|
||||
|
||||
**Geometries:**
|
||||
- `THREE.BoxGeometry`, `THREE.SphereGeometry`, `THREE.PlaneGeometry`
|
||||
- `THREE.CylinderGeometry`, `THREE.ConeGeometry`, `THREE.TorusGeometry`
|
||||
- `THREE.BufferGeometry`, `THREE.BufferAttribute`
|
||||
|
||||
**Materials:**
|
||||
- `THREE.MeshBasicMaterial`, `THREE.MeshStandardMaterial`
|
||||
- `THREE.MeshPhongMaterial`, `THREE.MeshPhysicalMaterial`
|
||||
- `THREE.SpriteMaterial`, `THREE.PointsMaterial`
|
||||
|
||||
**Lights:**
|
||||
- `THREE.AmbientLight`, `THREE.DirectionalLight`, `THREE.PointLight`
|
||||
- `THREE.SpotLight`, `THREE.HemisphereLight`
|
||||
|
||||
**Math:**
|
||||
- `THREE.Vector3`, `THREE.Euler`, `THREE.Quaternion`, `THREE.Matrix4`
|
||||
- `THREE.Color`, `THREE.Raycaster`, `THREE.Clock`
|
||||
|
||||
### Banned Patterns
|
||||
|
||||
```python
|
||||
BANNED_JS_PATTERNS = [
|
||||
r"eval\s*\(", # Code injection
|
||||
r"Function\s*\(", # Dynamic function creation
|
||||
r"setTimeout\s*\(\s*['\"]", # Timers with strings
|
||||
r"document\.write", # DOM manipulation
|
||||
r"window\.location", # Navigation
|
||||
r"XMLHttpRequest", # Network requests
|
||||
r"fetch\s*\(", # Fetch API
|
||||
r"localStorage", # Storage access
|
||||
r"navigator", # Browser API access
|
||||
]
|
||||
```
|
||||
|
||||
## Scene Graph Format
|
||||
|
||||
### JSON Export Structure
|
||||
|
||||
```json
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"rooms": {
|
||||
"meditation_chamber": {
|
||||
"name": "meditation_chamber",
|
||||
"theme": "meditation",
|
||||
"dimensions": {"width": 20, "height": 10, "depth": 20},
|
||||
"features": ["water_feature", "floating_lanterns"],
|
||||
"fog_enabled": false
|
||||
}
|
||||
},
|
||||
"portals": {
|
||||
"portal_1": {
|
||||
"name": "portal_1",
|
||||
"source_room": "room_a",
|
||||
"target_room": "room_b",
|
||||
"position": {"x": 5, "y": 2, "z": 0},
|
||||
"style": "circular",
|
||||
"color": "#00ffff"
|
||||
}
|
||||
},
|
||||
"lights": {
|
||||
"ambient": {
|
||||
"name": "ambient",
|
||||
"type": "AmbientLight",
|
||||
"color": "#ffffff",
|
||||
"intensity": 0.3
|
||||
}
|
||||
},
|
||||
"global_settings": {
|
||||
"shadow_map_enabled": true,
|
||||
"antialias": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Creating a Meditation Space
|
||||
|
||||
```python
|
||||
# Step 1: Design the room
|
||||
room_result = nexus_design_room(
|
||||
name="zen_garden",
|
||||
theme="meditation",
|
||||
dimensions={"width": 25, "height": 12, "depth": 25},
|
||||
features=["water_feature", "bamboo_grove", "stone_path", "floating_lanterns"],
|
||||
mental_state={"mood": "peaceful", "energy": 0.2}
|
||||
)
|
||||
|
||||
# Step 2: Generate the Three.js code (send prompt to LLM)
|
||||
prompt = room_result["prompt"]
|
||||
# ... LLM generates code ...
|
||||
|
||||
# Step 3: Validate the generated code
|
||||
generated_code = """
|
||||
function createRoom() {
|
||||
const scene = new THREE.Scene();
|
||||
// ... room implementation ...
|
||||
return scene;
|
||||
}
|
||||
"""
|
||||
validation = nexus_validate_scene(code=generated_code)
|
||||
assert validation["is_valid"]
|
||||
|
||||
# Step 4: Add lighting
|
||||
nexus_add_lighting(
|
||||
room_name="zen_garden",
|
||||
lights=[
|
||||
{"name": "ambient", "type": "ambient", "intensity": 0.2, "color": "#ffe4b5"},
|
||||
{"name": "sun", "type": "directional", "position": {"x": 10, "y": 20, "z": 5}},
|
||||
{"name": "lantern_glow", "type": "point", "color": "#ffaa00", "intensity": 0.8}
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### Creating a Portal Network
|
||||
|
||||
```python
|
||||
# Create hub room
|
||||
nexus_design_room(name="hub", theme="tech_lab", dimensions={"width": 30, "height": 15, "depth": 30})
|
||||
|
||||
# Create destination rooms
|
||||
nexus_design_room(name="library", theme="library")
|
||||
nexus_design_room(name="crystal_cave", theme="crystal_cave")
|
||||
nexus_design_room(name="nature", theme="nature")
|
||||
|
||||
# Create portals
|
||||
nexus_create_portal(name="to_library", source_room="hub", target_room="library", style="rectangular")
|
||||
nexus_create_portal(name="to_cave", source_room="hub", target_room="crystal_cave", style="stargate")
|
||||
nexus_create_portal(name="to_nature", source_room="hub", target_room="nature", style="circular", color="#00ff00")
|
||||
|
||||
# Export the scene
|
||||
export = nexus_export_scene(format="json")
|
||||
print(export["data"])
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
Run the test suite:
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest tests/tools/test_nexus_architect.py -v
|
||||
|
||||
# Run specific test categories
|
||||
pytest tests/tools/test_nexus_architect.py::TestCodeValidation -v
|
||||
pytest tests/tools/test_nexus_architect.py::TestNexusArchitect -v
|
||||
pytest tests/tools/test_nexus_architect.py::TestSecurity -v
|
||||
|
||||
# Run with coverage
|
||||
pytest tests/tools/test_nexus_architect.py --cov=tools.nexus_architect --cov-report=html
|
||||
```
|
||||
|
||||
### Test Coverage
|
||||
|
||||
- **Unit Tests**: Data models, validation, prompt generation
|
||||
- **Integration Tests**: Complete workflows, scene export
|
||||
- **Security Tests**: XSS attempts, code injection, banned patterns
|
||||
- **Performance Tests**: Large scenes, complex portal networks
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
### Planned Features
|
||||
|
||||
1. **Asset Library Integration**
|
||||
- Pre-built furniture and decor objects
|
||||
- Material library (PBR textures)
|
||||
- Audio ambience presets
|
||||
|
||||
2. **Advanced Validation**
|
||||
- AST-based JavaScript parsing
|
||||
- Sandboxed code execution testing
|
||||
- Performance profiling (polygon count, draw calls)
|
||||
|
||||
3. **Multi-Agent Collaboration**
|
||||
- Room ownership and permissions
|
||||
- Concurrent editing with conflict resolution
|
||||
- Version control for scenes
|
||||
|
||||
4. **Runtime Integration**
|
||||
- Hot-reload for scene updates
|
||||
- Real-time collaboration protocol
|
||||
- Physics engine integration (Cannon.js, Ammo.js)
|
||||
|
||||
5. **AI-Assisted Design**
|
||||
- Automatic room layout optimization
|
||||
- Lighting analysis and recommendations
|
||||
- Accessibility compliance checking
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
```bash
|
||||
# Enable debug logging
|
||||
NEXUS_ARCHITECT_DEBUG=1
|
||||
|
||||
# Set maximum scene complexity
|
||||
NEXUS_MAX_ROOMS=100
|
||||
NEXUS_MAX_PORTALS=500
|
||||
NEXUS_MAX_LIGHTS=1000
|
||||
|
||||
# Strict validation mode
|
||||
NEXUS_STRICT_VALIDATION=1
|
||||
```
|
||||
|
||||
### Toolset Registration
|
||||
|
||||
The tool automatically registers with the Hermes tool registry:
|
||||
|
||||
```python
|
||||
from tools.registry import registry
|
||||
|
||||
registry.register(
|
||||
name="nexus_design_room",
|
||||
toolset="nexus_architect",
|
||||
schema=NEXUS_ARCHITECT_SCHEMAS["nexus_design_room"],
|
||||
handler=...,
|
||||
emoji="🏛️",
|
||||
)
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
**"Room already exists" error:**
|
||||
- Room names must be unique within a session
|
||||
- Use `nexus_get_summary()` to list existing rooms
|
||||
|
||||
**"Invalid theme" error:**
|
||||
- Check theme spelling against allowed values
|
||||
- Use lowercase theme names
|
||||
|
||||
**Code validation failures:**
|
||||
- Ensure no banned APIs are used
|
||||
- Check for balanced braces/parentheses
|
||||
- Try `strict_mode=false` for less strict validation
|
||||
|
||||
**Missing room errors:**
|
||||
- Rooms must be created before adding lights or portals
|
||||
- Verify room name spelling matches exactly
|
||||
|
||||
## References
|
||||
|
||||
- [Three.js Documentation](https://threejs.org/docs/)
|
||||
- [Hermes Agent Tools Guide](tools-reference.md)
|
||||
- [Nexus Runtime Specification](nexus-runtime.md) (TODO)
|
||||
@@ -1,138 +0,0 @@
|
||||
# Phase 31: Nexus Architect Tool — Implementation Summary
|
||||
|
||||
## Overview
|
||||
|
||||
Successfully designed and scaffolded the **Nexus Architect Tool** for autonomous 3D world generation in a Three.js-based virtual environment. This tool enables Timmy (the Hermes Agent) to design rooms, create portals, add lighting, and generate validated Three.js code.
|
||||
|
||||
## Files Created
|
||||
|
||||
### 1. `tools/nexus_architect.py` (42KB)
|
||||
Main tool implementation with:
|
||||
- **6 registered tools**: `nexus_design_room`, `nexus_create_portal`, `nexus_add_lighting`, `nexus_validate_scene`, `nexus_export_scene`, `nexus_get_summary`
|
||||
- **Data models**: RoomConfig, PortalConfig, LightConfig, ArchitectureConfig, SceneGraph
|
||||
- **LLM prompt generators**: Structured prompts for Three.js code generation
|
||||
- **Security validation**: Banned pattern detection, syntax checking, code sanitization
|
||||
- **Tool registry integration**: Automatic registration with Hermes tool system
|
||||
|
||||
### 2. `tests/tools/test_nexus_architect.py` (24KB)
|
||||
Comprehensive test suite with:
|
||||
- **48 test cases** covering all functionality
|
||||
- **6 test classes**: Data models, validation, prompt generation, core functionality, integration, security, performance
|
||||
- **100% test pass rate**
|
||||
|
||||
### 3. `docs/nexus_architect.md` (15KB)
|
||||
Complete documentation including:
|
||||
- Architecture overview with diagrams
|
||||
- Tool usage examples and API reference
|
||||
- Scene graph format specification
|
||||
- Security model and allowed/banned APIs
|
||||
- Troubleshooting guide
|
||||
|
||||
## Key Design Decisions
|
||||
|
||||
### Architecture Research Findings
|
||||
Since no existing "the-nexus" repository was found in the codebase, the architecture was designed based on:
|
||||
- Common Three.js scene management patterns
|
||||
- Task requirements for rooms, portals, and lighting
|
||||
- Security best practices for LLM-generated code
|
||||
|
||||
### Data Model Design
|
||||
```
|
||||
Room: name, theme, dimensions, features, fog settings
|
||||
Portal: name, source/target rooms, position, style, color
|
||||
Light: name, type, position, color, intensity, shadows
|
||||
SceneGraph: versioned container for all world elements
|
||||
```
|
||||
|
||||
### Security Model
|
||||
**Banned Patterns** (detected and rejected):
|
||||
- `eval()`, `Function()`, dynamic code execution
|
||||
- `fetch()`, `WebSocket`, network requests
|
||||
- `localStorage`, `sessionStorage`, storage access
|
||||
- `document.write`, `window.location`, DOM manipulation
|
||||
|
||||
**Validation Features**:
|
||||
- Regex-based pattern detection
|
||||
- Syntax validation (balanced braces/parentheses)
|
||||
- Code sanitization (comment removal, debugger stripping)
|
||||
- Safety scoring (100 - errors*20 - warnings*5)
|
||||
|
||||
### LLM Integration Flow
|
||||
1. User request → structured configuration
|
||||
2. Configuration → LLM prompt (with context/mental state)
|
||||
3. LLM generates Three.js code
|
||||
4. Code validation (security + syntax)
|
||||
5. Validated code → Nexus runtime
|
||||
|
||||
## Tool Capabilities
|
||||
|
||||
### nexus_design_room
|
||||
- Creates room configuration with 7 themes (meditation, tech_lab, nature, crystal_cave, library, void, custom)
|
||||
- Generates structured LLM prompt for Three.js room code
|
||||
- Supports mental state context for adaptive design
|
||||
|
||||
### nexus_create_portal
|
||||
- Connects two rooms with visual portal
|
||||
- 5 portal styles (circular, rectangular, stargate, dissolve, glitch)
|
||||
- Generates portal animation and effect code prompts
|
||||
|
||||
### nexus_add_lighting
|
||||
- Adds 6 light types (ambient, directional, point, spot, hemisphere, rect_area)
|
||||
- Configurable shadows, colors, intensity
|
||||
- Generates lighting system code prompts
|
||||
|
||||
### nexus_validate_scene
|
||||
- Security validation against banned patterns
|
||||
- Syntax checking for JavaScript/Three.js
|
||||
- Extracts code from markdown blocks
|
||||
- Returns safety score (0-100)
|
||||
|
||||
### nexus_export_scene
|
||||
- Exports to JSON or JavaScript module format
|
||||
- Includes complete scene graph with rooms, portals, lights
|
||||
- Summary statistics for scene complexity
|
||||
|
||||
### nexus_get_summary
|
||||
- Returns current world state overview
|
||||
- Room connectivity via portal network
|
||||
- Light and architecture counts
|
||||
|
||||
## Testing Coverage
|
||||
|
||||
| Category | Tests | Status |
|
||||
|----------|-------|--------|
|
||||
| Data Models | 6 | ✅ Pass |
|
||||
| Code Validation | 7 | ✅ Pass |
|
||||
| Code Sanitization | 3 | ✅ Pass |
|
||||
| Prompt Generation | 4 | ✅ Pass |
|
||||
| Core Functionality | 13 | ✅ Pass |
|
||||
| Tool Entry Points | 5 | ✅ Pass |
|
||||
| Integration | 3 | ✅ Pass |
|
||||
| Security | 3 | ✅ Pass |
|
||||
| Performance | 2 | ✅ Pass |
|
||||
| **Total** | **48** | **✅ All Pass** |
|
||||
|
||||
## Future Work (Phase 2+)
|
||||
|
||||
1. **LLM Integration**: Connect to actual LLM API for code generation
|
||||
2. **Asset Library**: Pre-built 3D models and textures
|
||||
3. **Runtime Integration**: Hot-reload, physics engine (Cannon.js/Ammo.js)
|
||||
4. **Multi-Agent**: Room ownership, concurrent editing
|
||||
5. **Persistence**: Database storage for scenes
|
||||
6. **UI Components**: Visualization of scene graph
|
||||
|
||||
## Integration Notes
|
||||
|
||||
The tool is ready for integration with:
|
||||
- Hermes tool registry (auto-registers on import)
|
||||
- LLM providers (OpenAI, Anthropic, etc.)
|
||||
- Three.js runtime environments
|
||||
- Session management for persistent world state
|
||||
|
||||
## Code Quality
|
||||
|
||||
- **Type hints**: Full typing for all functions
|
||||
- **Docstrings**: Comprehensive documentation
|
||||
- **Error handling**: Graceful failure with informative messages
|
||||
- **Security**: Defense-in-depth for code generation
|
||||
- **Testing**: Comprehensive coverage across all categories
|
||||
4657
docs/ouroboros/artifacts/call_graph.json
Normal file
4657
docs/ouroboros/artifacts/call_graph.json
Normal file
File diff suppressed because it is too large
Load Diff
4291
docs/ouroboros/artifacts/core_analysis.json
Normal file
4291
docs/ouroboros/artifacts/core_analysis.json
Normal file
File diff suppressed because it is too large
Load Diff
39340
docs/ouroboros/artifacts/import_graph.json
Normal file
39340
docs/ouroboros/artifacts/import_graph.json
Normal file
File diff suppressed because it is too large
Load Diff
3397
docs/ouroboros/artifacts/module_inventory.json
Normal file
3397
docs/ouroboros/artifacts/module_inventory.json
Normal file
File diff suppressed because it is too large
Load Diff
74
docs/ouroboros/specs/AIAgent_DECOMPOSITION.md
Normal file
74
docs/ouroboros/specs/AIAgent_DECOMPOSITION.md
Normal file
@@ -0,0 +1,74 @@
|
||||
# AIAgent Decomposition Plan (EPIC-999 Phase II Prep)
|
||||
|
||||
## Current State
|
||||
`run_agent.py` contains `AIAgent` — a ~7,000-SLOC class that is the highest-blast-radius module in Hermes.
|
||||
|
||||
## Goal
|
||||
Decompose `AIAgent` into 5 focused classes with strict interfaces, enabling:
|
||||
- Parallel rewrites by competing sub-agents (Phase II)
|
||||
- Independent testing of loop semantics vs. model I/O vs. memory
|
||||
- Future runtime replacement (Hermes Ω) without touching tool infrastructure
|
||||
|
||||
## Proposed Decomposition
|
||||
|
||||
### 1. `ConversationLoop`
|
||||
**Responsibility:** Own the `while` loop invariant, iteration budget, and termination conditions.
|
||||
**Interface:**
|
||||
```python
|
||||
class ConversationLoop:
|
||||
def run(self, messages: list, tools: list, client) -> dict:
|
||||
...
|
||||
```
|
||||
**Invariant:** Must terminate before `max_iterations` and `iteration_budget.remaining <= 0`.
|
||||
|
||||
### 2. `ModelDispatcher`
|
||||
**Responsibility:** All interaction with `client.chat.completions.create`, including streaming, fallback activation, and response normalization.
|
||||
**Interface:**
|
||||
```python
|
||||
class ModelDispatcher:
|
||||
def call(self, model: str, messages: list, tools: list, **kwargs) -> ModelResponse:
|
||||
...
|
||||
```
|
||||
**Invariant:** Must always return a normalized object with `.content`, `.tool_calls`, `.reasoning`.
|
||||
|
||||
### 3. `ToolExecutor`
|
||||
**Responsibility:** Execute tool calls (sequential or concurrent), handle errors, and format results.
|
||||
**Interface:**
|
||||
```python
|
||||
class ToolExecutor:
|
||||
def execute(self, tool_calls: list, task_id: str = None) -> list[ToolResult]:
|
||||
...
|
||||
```
|
||||
**Invariant:** Every tool_call produces exactly one ToolResult, and errors are JSON-serializable.
|
||||
|
||||
### 4. `MemoryInterceptor`
|
||||
**Responsibility:** Intercept `memory` and `todo` tool calls before they reach the registry, plus flush memories on session end.
|
||||
**Interface:**
|
||||
```python
|
||||
class MemoryInterceptor:
|
||||
def intercept(self, tool_name: str, args: dict, task_id: str = None) -> str | None:
|
||||
... # returns result if intercepted, None if pass-through
|
||||
```
|
||||
**Invariant:** Must not mutate agent state except through explicit `flush()` calls.
|
||||
|
||||
### 5. `PromptBuilder`
|
||||
**Responsibility:** Assemble system prompt, inject skills, apply context compression, and manage prompt caching markers.
|
||||
**Interface:**
|
||||
```python
|
||||
class PromptBuilder:
|
||||
def build(self, user_message: str, conversation_history: list) -> list:
|
||||
...
|
||||
```
|
||||
**Invariant:** Output list must start with a system message (or equivalent provider parameter).
|
||||
|
||||
## Migration Path
|
||||
1. Create the 5 classes as thin facades that delegate back to `AIAgent` methods.
|
||||
2. Move logic incrementally from `AIAgent` into the new classes.
|
||||
3. Once `AIAgent` is a pure coordinator (~500 SLOC), freeze the interface.
|
||||
4. Phase II competing agents rewrite one class at a time.
|
||||
|
||||
## Acceptance Criteria
|
||||
- [ ] `AIAgent` reduced to < 1,000 SLOC
|
||||
- [ ] Each new class has > 80% test coverage
|
||||
- [ ] Full existing test suite still passes
|
||||
- [ ] No behavioral regressions in shadow mode
|
||||
263
docs/ouroboros/specs/SPEC.md
Normal file
263
docs/ouroboros/specs/SPEC.md
Normal file
@@ -0,0 +1,263 @@
|
||||
# Hermes Ω Specification Draft (Ouroboros Phase I)
|
||||
|
||||
> Auto-generated by Ezra as part of EPIC-999. This document is a living artifact.
|
||||
|
||||
## Scope
|
||||
This specification covers the core runtime of Hermes agent v0.7.x as found in the `hermes-agent` codebase.
|
||||
|
||||
## High-Level Architecture
|
||||
|
||||
```
|
||||
User Message
|
||||
↓
|
||||
Gateway (gateway/run.py) — platform adapter (Telegram, Discord, CLI, etc.)
|
||||
↓
|
||||
HermesCLI (cli.py) or AIAgent.chat() (run_agent.py)
|
||||
↓
|
||||
ModelTools (model_tools.py) — tool discovery, schema assembly, dispatch
|
||||
↓
|
||||
Tool Registry (tools/registry.py) — handler lookup, availability checks
|
||||
↓
|
||||
Individual Tool Implementations (tools/*.py)
|
||||
↓
|
||||
Results returned up the stack
|
||||
```
|
||||
|
||||
## Module Specifications
|
||||
|
||||
### `run_agent.py`
|
||||
**Lines of Code:** 8948
|
||||
|
||||
**Classes:**
|
||||
- `_SafeWriter`
|
||||
- *Transparent stdio wrapper that catches OSError/ValueError from broken pipes.*
|
||||
- `__init__(self, inner)`
|
||||
- `write(self, data)`
|
||||
- `flush(self)`
|
||||
- `fileno(self)`
|
||||
- `isatty(self)`
|
||||
- ... and 1 more methods
|
||||
- `IterationBudget`
|
||||
- *Thread-safe iteration counter for an agent.*
|
||||
- `__init__(self, max_total)`
|
||||
- `consume(self)`
|
||||
- `refund(self)`
|
||||
- `used(self)`
|
||||
- `remaining(self)`
|
||||
- `AIAgent`
|
||||
- *AI Agent with tool calling capabilities.*
|
||||
- `base_url(self)`
|
||||
- `base_url(self, value)`
|
||||
- `__init__(self, base_url, api_key, provider, api_mode, acp_command, acp_args, command, args, model, max_iterations, tool_delay, enabled_toolsets, disabled_toolsets, save_trajectories, verbose_logging, quiet_mode, ephemeral_system_prompt, log_prefix_chars, log_prefix, providers_allowed, providers_ignored, providers_order, provider_sort, provider_require_parameters, provider_data_collection, session_id, tool_progress_callback, tool_start_callback, tool_complete_callback, thinking_callback, reasoning_callback, clarify_callback, step_callback, stream_delta_callback, tool_gen_callback, status_callback, max_tokens, reasoning_config, prefill_messages, platform, skip_context_files, skip_memory, session_db, iteration_budget, fallback_model, credential_pool, checkpoints_enabled, checkpoint_max_snapshots, pass_session_id, persist_session)`
|
||||
- `reset_session_state(self)`
|
||||
- `_safe_print(self)`
|
||||
- ... and 100 more methods
|
||||
|
||||
**Top-Level Functions:**
|
||||
- `_install_safe_stdio()`
|
||||
- `_is_destructive_command(cmd)`
|
||||
- `_should_parallelize_tool_batch(tool_calls)`
|
||||
- `_extract_parallel_scope_path(tool_name, function_args)`
|
||||
- `_paths_overlap(left, right)`
|
||||
- `_sanitize_surrogates(text)`
|
||||
- `_sanitize_messages_surrogates(messages)`
|
||||
- `_strip_budget_warnings_from_history(messages)`
|
||||
- `main(query, model, api_key, base_url, max_turns, enabled_toolsets, disabled_toolsets, list_tools, save_trajectories, save_sample, verbose, log_prefix_chars)`
|
||||
|
||||
**Inferred Side Effects & Invariants:**
|
||||
- Persists state to SQLite database.
|
||||
- Performs file I/O.
|
||||
- Makes HTTP network calls.
|
||||
- Uses global mutable state (risk factor).
|
||||
|
||||
### `model_tools.py`
|
||||
**Lines of Code:** 466
|
||||
|
||||
**Top-Level Functions:**
|
||||
- `_get_tool_loop()`
|
||||
- `_get_worker_loop()`
|
||||
- `_run_async(coro)`
|
||||
- `_discover_tools()`
|
||||
- `get_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode)`
|
||||
- `handle_function_call(function_name, function_args, task_id, user_task, enabled_tools)`
|
||||
- `get_all_tool_names()`
|
||||
- `get_toolset_for_tool(tool_name)`
|
||||
- `get_available_toolsets()`
|
||||
- `check_toolset_requirements()`
|
||||
- ... and 1 more functions
|
||||
|
||||
**Inferred Side Effects & Invariants:**
|
||||
- Uses global mutable state (risk factor).
|
||||
- Primarily pure Python logic / orchestration.
|
||||
|
||||
### `cli.py`
|
||||
**Lines of Code:** 8280
|
||||
|
||||
**Classes:**
|
||||
- `ChatConsole`
|
||||
- *Rich Console adapter for prompt_toolkit's patch_stdout context.*
|
||||
- `__init__(self)`
|
||||
- `print(self)`
|
||||
- `HermesCLI`
|
||||
- *Interactive CLI for the Hermes Agent.*
|
||||
- `__init__(self, model, toolsets, provider, api_key, base_url, max_turns, verbose, compact, resume, checkpoints, pass_session_id)`
|
||||
- `_invalidate(self, min_interval)`
|
||||
- `_status_bar_context_style(self, percent_used)`
|
||||
- `_build_context_bar(self, percent_used, width)`
|
||||
- `_get_status_bar_snapshot(self)`
|
||||
- ... and 106 more methods
|
||||
|
||||
**Top-Level Functions:**
|
||||
- `_load_prefill_messages(file_path)`
|
||||
- `_parse_reasoning_config(effort)`
|
||||
- `load_cli_config()`
|
||||
- `_run_cleanup()`
|
||||
- `_git_repo_root()`
|
||||
- `_path_is_within_root(path, root)`
|
||||
- `_setup_worktree(repo_root)`
|
||||
- `_cleanup_worktree(info)`
|
||||
- `_prune_stale_worktrees(repo_root, max_age_hours)`
|
||||
- `_accent_hex()`
|
||||
- ... and 9 more functions
|
||||
|
||||
**Inferred Side Effects & Invariants:**
|
||||
- Persists state to SQLite database.
|
||||
- Performs file I/O.
|
||||
- Spawns subprocesses / shell commands.
|
||||
- Uses global mutable state (risk factor).
|
||||
|
||||
### `tools/registry.py`
|
||||
**Lines of Code:** 275
|
||||
|
||||
**Classes:**
|
||||
- `ToolEntry`
|
||||
- *Metadata for a single registered tool.*
|
||||
- `__init__(self, name, toolset, schema, handler, check_fn, requires_env, is_async, description, emoji)`
|
||||
- `ToolRegistry`
|
||||
- *Singleton registry that collects tool schemas + handlers from tool files.*
|
||||
- `__init__(self)`
|
||||
- `register(self, name, toolset, schema, handler, check_fn, requires_env, is_async, description, emoji)`
|
||||
- `deregister(self, name)`
|
||||
- `get_definitions(self, tool_names, quiet)`
|
||||
- `dispatch(self, name, args)`
|
||||
- ... and 10 more methods
|
||||
|
||||
**Inferred Side Effects & Invariants:**
|
||||
- Primarily pure Python logic / orchestration.
|
||||
|
||||
### `gateway/run.py`
|
||||
**Lines of Code:** 6657
|
||||
|
||||
**Classes:**
|
||||
- `GatewayRunner`
|
||||
- *Main gateway controller.*
|
||||
- `__init__(self, config)`
|
||||
- `_has_setup_skill(self)`
|
||||
- `_load_voice_modes(self)`
|
||||
- `_save_voice_modes(self)`
|
||||
- `_set_adapter_auto_tts_disabled(self, adapter, chat_id, disabled)`
|
||||
- ... and 78 more methods
|
||||
|
||||
**Top-Level Functions:**
|
||||
- `_ensure_ssl_certs()`
|
||||
- `_normalize_whatsapp_identifier(value)`
|
||||
- `_expand_whatsapp_auth_aliases(identifier)`
|
||||
- `_resolve_runtime_agent_kwargs()`
|
||||
- `_build_media_placeholder(event)`
|
||||
- `_dequeue_pending_text(adapter, session_key)`
|
||||
- `_check_unavailable_skill(command_name)`
|
||||
- `_platform_config_key(platform)`
|
||||
- `_load_gateway_config()`
|
||||
- `_resolve_gateway_model(config)`
|
||||
- ... and 4 more functions
|
||||
|
||||
**Inferred Side Effects & Invariants:**
|
||||
- Persists state to SQLite database.
|
||||
- Performs file I/O.
|
||||
- Spawns subprocesses / shell commands.
|
||||
- Contains async code paths.
|
||||
- Uses global mutable state (risk factor).
|
||||
|
||||
### `hermes_state.py`
|
||||
**Lines of Code:** 1270
|
||||
|
||||
**Classes:**
|
||||
- `SessionDB`
|
||||
- *SQLite-backed session storage with FTS5 search.*
|
||||
- `__init__(self, db_path)`
|
||||
- `_execute_write(self, fn)`
|
||||
- `_try_wal_checkpoint(self)`
|
||||
- `close(self)`
|
||||
- `_init_schema(self)`
|
||||
- ... and 29 more methods
|
||||
|
||||
**Inferred Side Effects & Invariants:**
|
||||
- Persists state to SQLite database.
|
||||
|
||||
### `agent/context_compressor.py`
|
||||
**Lines of Code:** 676
|
||||
|
||||
**Classes:**
|
||||
- `ContextCompressor`
|
||||
- *Compresses conversation context when approaching the model's context limit.*
|
||||
- `__init__(self, model, threshold_percent, protect_first_n, protect_last_n, summary_target_ratio, quiet_mode, summary_model_override, base_url, api_key, config_context_length, provider)`
|
||||
- `update_from_response(self, usage)`
|
||||
- `should_compress(self, prompt_tokens)`
|
||||
- `should_compress_preflight(self, messages)`
|
||||
- `get_status(self)`
|
||||
- ... and 11 more methods
|
||||
|
||||
**Inferred Side Effects & Invariants:**
|
||||
- Primarily pure Python logic / orchestration.
|
||||
|
||||
### `agent/prompt_caching.py`
|
||||
**Lines of Code:** 72
|
||||
|
||||
**Top-Level Functions:**
|
||||
- `_apply_cache_marker(msg, cache_marker, native_anthropic)`
|
||||
- `apply_anthropic_cache_control(api_messages, cache_ttl, native_anthropic)`
|
||||
|
||||
**Inferred Side Effects & Invariants:**
|
||||
- Primarily pure Python logic / orchestration.
|
||||
|
||||
### `agent/skill_commands.py`
|
||||
**Lines of Code:** 297
|
||||
|
||||
**Top-Level Functions:**
|
||||
- `build_plan_path(user_instruction)`
|
||||
- `_load_skill_payload(skill_identifier, task_id)`
|
||||
- `_build_skill_message(loaded_skill, skill_dir, activation_note, user_instruction, runtime_note)`
|
||||
- `scan_skill_commands()`
|
||||
- `get_skill_commands()`
|
||||
- `build_skill_invocation_message(cmd_key, user_instruction, task_id, runtime_note)`
|
||||
- `build_preloaded_skills_prompt(skill_identifiers, task_id)`
|
||||
|
||||
**Inferred Side Effects & Invariants:**
|
||||
- Uses global mutable state (risk factor).
|
||||
- Primarily pure Python logic / orchestration.
|
||||
|
||||
## Cross-Module Dependencies
|
||||
|
||||
Key data flow:
|
||||
1. `run_agent.py` defines `AIAgent` — the canonical conversation loop.
|
||||
2. `model_tools.py` assembles tool schemas and dispatches function calls.
|
||||
3. `tools/registry.py` maintains the central registry; all tool files import it.
|
||||
4. `gateway/run.py` adapts platform events into `AIAgent.run_conversation()` calls.
|
||||
5. `cli.py` (`HermesCLI`) provides the interactive shell and slash-command routing.
|
||||
|
||||
## Known Coupling Risks
|
||||
|
||||
- `run_agent.py` is ~7k SLOC and contains the core loop, todo/memory interception, context compression, and trajectory saving. High blast radius.
|
||||
- `cli.py` is ~6.5k SLOC and combines UI (Rich/prompt_toolkit), config loading, and command dispatch. Tightly coupled to display state.
|
||||
- `model_tools.py` holds a process-global `_last_resolved_tool_names`. Subagent execution saves/restores this global.
|
||||
- `tools/registry.py` is imported by ALL tool files; schema generation happens at import time.
|
||||
|
||||
## Next Actions (Phase II Prep)
|
||||
|
||||
1. Decompose `AIAgent` into: `ConversationLoop`, `ContextManager`, `ToolDispatcher`, `MemoryInterceptor`.
|
||||
2. Extract CLI display logic from command dispatch.
|
||||
3. Define strict interfaces between gateway → agent → tools.
|
||||
4. Write property-based tests for the conversation loop invariant: *given the same message history and tool results, the agent must produce deterministic tool_call ordering*.
|
||||
|
||||
---
|
||||
Generated: 2026-04-05 by Ezra (Phase I)
|
||||
137
docs/ouroboros/specs/test_invariants_stubs.py
Normal file
137
docs/ouroboros/specs/test_invariants_stubs.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""
|
||||
Property-based test stubs for Hermes core invariants.
|
||||
Part of EPIC-999 Phase I — The Mirror.
|
||||
|
||||
These tests define behavioral contracts that ANY rewrite of the runtime
|
||||
must satisfy, including the Hermes Ω target.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Conversation Loop Invariants
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
class TestConversationLoopInvariants:
|
||||
"""
|
||||
Invariants for AIAgent.run_conversation and its successors.
|
||||
"""
|
||||
|
||||
def test_deterministic_tool_ordering(self):
|
||||
"""
|
||||
Given the same message history and available tools,
|
||||
the agent must produce the same tool_call ordering.
|
||||
|
||||
(If non-determinism is introduced by temperature > 0,
|
||||
this becomes a statistical test.)
|
||||
"""
|
||||
pytest.skip("TODO: implement with seeded mock model responses")
|
||||
|
||||
def test_tool_result_always_appended_to_history(self):
|
||||
"""
|
||||
After any tool_call is executed, its result MUST appear
|
||||
in the conversation history before the next assistant turn.
|
||||
"""
|
||||
pytest.skip("TODO: mock model with forced tool_call and verify history")
|
||||
|
||||
def test_iteration_budget_never_exceeded(self):
|
||||
"""
|
||||
The loop must terminate before api_call_count >= max_iterations
|
||||
AND before iteration_budget.remaining <= 0.
|
||||
"""
|
||||
pytest.skip("TODO: mock model to always return tool_calls; verify termination")
|
||||
|
||||
def test_system_prompt_presence(self):
|
||||
"""
|
||||
Every API call must include a system message as the first message
|
||||
(or system parameter for providers that support it).
|
||||
"""
|
||||
pytest.skip("TODO: intercept all client.chat.completions.create calls")
|
||||
|
||||
def test_compression_preserves_last_n_messages(self):
|
||||
"""
|
||||
After context compression, the final N messages (configurable,
|
||||
default ~4) must remain uncompressed to preserve local context.
|
||||
"""
|
||||
pytest.skip("TODO: create history > threshold, compress, verify tail")
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Tool Registry Invariants
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
class TestToolRegistryInvariants:
|
||||
"""
|
||||
Invariants for tools.registry.Registry.
|
||||
"""
|
||||
|
||||
def test_register_then_list_contains_tool(self):
|
||||
"""
|
||||
After register() is called with a valid schema and handler,
|
||||
list_tools() must include the registered name.
|
||||
"""
|
||||
pytest.skip("TODO: instantiate fresh Registry, register, assert membership")
|
||||
|
||||
def test_dispatch_unknown_tool_returns_error_json(self):
|
||||
"""
|
||||
Calling dispatch() with an unregistered tool name must return
|
||||
a JSON string containing an error key, never raise raw.
|
||||
"""
|
||||
pytest.skip("TODO: call dispatch with 'nonexistent_tool', parse result")
|
||||
|
||||
def test_handler_receives_task_id_kwarg(self):
|
||||
"""
|
||||
Registered handlers that accept **kwargs must receive task_id
|
||||
when dispatch is called with one.
|
||||
"""
|
||||
pytest.skip("TODO: register mock handler, dispatch with task_id, verify")
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# State Persistence Invariants
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
class TestStatePersistenceInvariants:
|
||||
"""
|
||||
Invariants for hermes_state.SessionDB.
|
||||
"""
|
||||
|
||||
def test_saved_message_is_retrievable_by_session_id(self):
|
||||
"""
|
||||
After save_message(session_id, ...), get_messages(session_id)
|
||||
must return the message.
|
||||
"""
|
||||
pytest.skip("TODO: use temp SQLite DB, save, query, assert")
|
||||
|
||||
def test_fts_search_returns_relevant_messages(self):
|
||||
"""
|
||||
After indexing messages, FTS search for a unique keyword
|
||||
must return the message containing it.
|
||||
"""
|
||||
pytest.skip("TODO: seed DB with messages, search unique token")
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Context Compressor Invariants
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
class TestContextCompressorInvariants:
|
||||
"""
|
||||
Invariants for agent.context_compressor.ContextCompressor.
|
||||
"""
|
||||
|
||||
def test_compression_reduces_token_count(self):
|
||||
"""
|
||||
compress_messages(output) must have fewer tokens than
|
||||
the uncompressed input (for any input > threshold).
|
||||
"""
|
||||
pytest.skip("TODO: mock tokenizer, provide long history, assert reduction")
|
||||
|
||||
def test_compression_never_drops_system_message(self):
|
||||
"""
|
||||
The system message must survive compression and remain
|
||||
at index 0 of the returned message list.
|
||||
"""
|
||||
pytest.skip("TODO: compress history with system msg, verify position")
|
||||
@@ -1,166 +0,0 @@
|
||||
# Research Acknowledgment: SSD — Simple Self-Distillation Improves Code Generation
|
||||
|
||||
**Issue:** #128
|
||||
**Paper:** [Embarrassingly Simple Self-Distillation Improves Code Generation](https://arxiv.org/abs/2604.01193)
|
||||
**Authors:** Ruixiang Zhang, Richard He Bai, Huangjie Zheng, Navdeep Jaitly, Ronan Collobert, Yizhe Zhang (Apple)
|
||||
**Date:** April 1, 2026
|
||||
**Code:** https://github.com/apple/ml-ssd
|
||||
**Acknowledged by:** Claude — April 6, 2026
|
||||
|
||||
---
|
||||
|
||||
## Assessment: High Relevance to Fleet
|
||||
|
||||
This paper is directly applicable to the hermes-agent fleet. The headline result — +7.5pp pass@1 on Qwen3-4B — is at exactly the scale we operate. The method requires no external infrastructure. Triage verdict: **P0 / Week-class work**.
|
||||
|
||||
---
|
||||
|
||||
## What SSD Actually Does
|
||||
|
||||
Three steps, nothing exotic:
|
||||
|
||||
1. **Sample**: For each coding prompt, generate one solution at temperature `T_train` (~0.9). Do NOT filter for correctness.
|
||||
2. **Fine-tune**: SFT on the resulting `(prompt, unverified_solution)` pairs. Standard cross-entropy loss. No RLHF, no GRPO, no DPO.
|
||||
3. **Evaluate**: At `T_eval` (which must be **different** from `T_train`). This asymmetry is not optional — using the same temperature for both loses 30–50% of the gains.
|
||||
|
||||
The counterintuitive part: N=1 per problem, unverified. Prior self-improvement work uses N>>1 and filters by execution. SSD doesn't. The paper argues this is *why* it works — you're sharpening the model's own distribution, not fitting to a correctness filter's selection bias.
|
||||
|
||||
---
|
||||
|
||||
## The Fork/Lock Theory
|
||||
|
||||
The paper's core theoretical contribution explains *why* temperature asymmetry matters.
|
||||
|
||||
**Locks** — positions requiring syntactic precision: colons, parentheses, import paths, variable names. A mistake here is a hard error. Low temperature helps at Locks. But applying low temperature globally kills diversity everywhere.
|
||||
|
||||
**Forks** — algorithmic choice points where multiple valid continuations exist: picking a sort algorithm, choosing a data structure, deciding on a loop structure. High temperature helps at Forks. But applying high temperature globally introduces errors at Locks.
|
||||
|
||||
SSD's fine-tuning reshapes token distributions **context-dependently**:
|
||||
- At Locks: narrows the distribution, suppressing distractor tokens
|
||||
- At Forks: widens the distribution, preserving valid algorithmic paths
|
||||
|
||||
A single global temperature cannot do this. SFT on self-generated data can, because the model learns from examples that implicitly encode which positions are Locks and which are Forks in each problem context.
|
||||
|
||||
**Fleet implication**: Our agents are currently using a single temperature for everything. This is leaving performance on the table even without fine-tuning. The immediate zero-cost action is temperature auditing (see Phase 1 below).
|
||||
|
||||
---
|
||||
|
||||
## Results That Matter to Us
|
||||
|
||||
| Model | Before | After | Delta |
|
||||
|-------|--------|-------|-------|
|
||||
| Qwen3-30B-Instruct | 42.4% | 55.3% | +12.9pp (+30% rel) |
|
||||
| Qwen3-4B-Instruct | baseline | baseline+7.5pp | +7.5pp |
|
||||
| Llama-3.1-8B-Instruct | baseline | baseline+3.5pp | +3.5pp |
|
||||
|
||||
Gains concentrate on hard problems: +14.2pp medium, +15.3pp hard. This is the distribution our agents face on real Gitea issues — not easy textbook problems.
|
||||
|
||||
---
|
||||
|
||||
## Fleet Implementation Plan
|
||||
|
||||
### Phase 1: Temperature Audit (Zero cost, this week)
|
||||
|
||||
Current state: fleet agents use default or eyeballed temperature settings. The paper shows T_eval != T_train is critical even without fine-tuning.
|
||||
|
||||
Actions:
|
||||
1. Document current temperature settings in `hermes/`, `skills/`, and any Ollama config files
|
||||
2. Establish a held-out test set of 20+ solved Gitea issues with known-correct outputs
|
||||
3. Run A/B: current T_eval vs. T_eval=0.7 vs. T_eval=0.3 for code generation tasks
|
||||
4. Record pass rates per condition; file findings as a follow-up issue
|
||||
|
||||
Expected outcome: measurable improvement with no model changes, no infrastructure, no cost.
|
||||
|
||||
### Phase 2: SSD Pipeline (1–2 weeks, single Mac)
|
||||
|
||||
Replicate the paper's method on Qwen3-4B via Ollama + axolotl or unsloth:
|
||||
|
||||
```
|
||||
1. Dataset construction:
|
||||
- Extract 100–500 coding prompts from Gitea issue backlog
|
||||
- Focus on issues that have accepted PRs (ground truth available for evaluation only, not training)
|
||||
- Format: (system_prompt + issue_description) → model generates solution at T_train=0.9
|
||||
|
||||
2. Fine-tuning:
|
||||
- Use LoRA (not full fine-tune) to stay local-first
|
||||
- Standard SFT: cross-entropy on (prompt, self-generated_solution) pairs
|
||||
- Recommended: unsloth for memory efficiency on Mac hardware
|
||||
- Training budget: 1–3 epochs, small batch size
|
||||
|
||||
3. Evaluation:
|
||||
- Compare base model vs. SSD-tuned model at T_eval=0.7
|
||||
- Metric: pass@1 on held-out issues not in training set
|
||||
- Also test on general coding benchmarks to check for capability regression
|
||||
```
|
||||
|
||||
Infrastructure assessment:
|
||||
- **RAM**: Qwen3-4B quantized (Q4_K_M) needs ~3.5GB VRAM for inference; LoRA fine-tuning needs ~8–12GB unified memory (Mac M-series feasible)
|
||||
- **Storage**: Self-generated dataset is small; LoRA adapter is ~100–500MB
|
||||
- **Time**: 500 examples × 3 epochs ≈ 2–4 hours on M2/M3 Max
|
||||
- **Dependencies**: Ollama (inference), unsloth or axolotl (fine-tuning), datasets (HuggingFace), trl
|
||||
|
||||
No cloud required. No teacher model required. No code execution environment required.
|
||||
|
||||
### Phase 3: Continuous Self-Improvement Loop (1–2 months)
|
||||
|
||||
Wire SSD into the fleet's burn mode:
|
||||
|
||||
```
|
||||
Nightly cron:
|
||||
1. Collect agent solutions from the day's completed issues
|
||||
2. Filter: only solutions where the PR was merged (human-verified correct)
|
||||
3. Append to rolling training buffer (last 500 examples)
|
||||
4. Run SFT fine-tune on buffer → update LoRA adapter
|
||||
5. Swap adapter into Ollama deployment at dawn
|
||||
6. Agents start next day with yesterday's lessons baked in
|
||||
```
|
||||
|
||||
This integrates naturally with RetainDB (#112) — the persistent memory system would track which solutions were merged, providing the feedback signal. The continuous loop turns every merged PR into a training example.
|
||||
|
||||
### Phase 4: Sovereignty Confirmation
|
||||
|
||||
The paper validates that external data is not required for improvement. Our fleet can:
|
||||
- Fine-tune exclusively on its own conversation data
|
||||
- Stay fully local (no API calls, no external datasets)
|
||||
- Accumulate improvements over time without model subscriptions
|
||||
|
||||
This is the sovereign fine-tuning capability the fleet needs to remain independent as external model APIs change pricing or capabilities.
|
||||
|
||||
---
|
||||
|
||||
## Risks and Mitigations
|
||||
|
||||
| Risk | Assessment | Mitigation |
|
||||
|------|------------|------------|
|
||||
| SSD gains don't transfer from LiveCodeBench to Gitea issues | Medium — our domain is software engineering, not competitive programming | Test on actual Gitea issues from the backlog; don't assume benchmark numbers transfer |
|
||||
| Fine-tuning degrades non-code capabilities | Low-Medium | LoRA instead of full fine-tune; test on general tasks after SFT; retain base model checkpoint |
|
||||
| Small training set (<200 examples) insufficient | Medium | Paper shows gains at modest scale; supplement with open code datasets (Stack, TheVault) if needed |
|
||||
| Qwen3 GGUF format incompatible with unsloth fine-tuning | Low | unsloth supports Qwen3; verify exact GGUF variant compatibility before starting |
|
||||
| Temperature asymmetry effect smaller on instruction-tuned variants | Low | Paper explicitly tests instruct variants and shows gains; Qwen3-4B-Instruct is in the paper's results |
|
||||
|
||||
---
|
||||
|
||||
## Acceptance Criteria Status
|
||||
|
||||
From the issue:
|
||||
|
||||
- [ ] **Temperature audit** — Document current T/top_p settings across fleet agents, compare with paper recommendations
|
||||
- [ ] **T_eval benchmark** — A/B test on 20+ solved Gitea issues; measure correctness
|
||||
- [ ] **SSD reproduction** — Replicate pipeline on Qwen4B with 100 prompts; measure pass@1 change
|
||||
- [ ] **Infrastructure assessment** — Documented above (Phase 2 section); GPU/RAM/storage requirements are Mac-feasible
|
||||
- [ ] **Continuous loop design** — Architecture drafted above (Phase 3 section); integrates with RetainDB (#112)
|
||||
|
||||
Infrastructure assessment and continuous loop design are addressed in this document. Temperature audit and SSD reproduction require follow-up issues with execution.
|
||||
|
||||
---
|
||||
|
||||
## Recommended Follow-Up Issues
|
||||
|
||||
1. **Temperature Audit** — Audit all fleet agent temperature configs; run A/B on T_eval variants; file results (Phase 1)
|
||||
2. **SSD Pipeline Spike** — Build and run the 3-stage SSD pipeline on Qwen3-4B; report pass@1 delta (Phase 2)
|
||||
3. **Nightly SFT Integration** — Wire SSD into burn-mode cron; integrate with RetainDB feedback loop (Phase 3)
|
||||
|
||||
---
|
||||
|
||||
*Research acknowledged by Claude — April 6, 2026*
|
||||
*Source issue: [hermes-agent #128](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/128)*
|
||||
@@ -12,7 +12,6 @@ from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from hermes_cli.config import get_hermes_home
|
||||
from utils import atomic_json_write
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -87,7 +86,9 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
}
|
||||
|
||||
try:
|
||||
atomic_json_write(DIRECTORY_PATH, directory)
|
||||
DIRECTORY_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(DIRECTORY_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(directory, f, indent=2, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
logger.warning("Channel directory: failed to write: %s", e)
|
||||
|
||||
|
||||
@@ -779,9 +779,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
config.platforms[Platform.MATRIX].extra["password"] = matrix_password
|
||||
matrix_e2ee = os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes")
|
||||
config.platforms[Platform.MATRIX].extra["encryption"] = matrix_e2ee
|
||||
matrix_device_id = os.getenv("MATRIX_DEVICE_ID", "")
|
||||
if matrix_device_id:
|
||||
config.platforms[Platform.MATRIX].extra["device_id"] = matrix_device_id
|
||||
matrix_home = os.getenv("MATRIX_HOME_ROOM")
|
||||
if matrix_home and Platform.MATRIX in config.platforms:
|
||||
config.platforms[Platform.MATRIX].home_channel = HomeChannel(
|
||||
|
||||
@@ -21,8 +21,6 @@ Storage: ~/.hermes/pairing/
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
@@ -47,29 +45,13 @@ PAIRING_DIR = get_hermes_dir("platforms/pairing", "pairing")
|
||||
|
||||
|
||||
def _secure_write(path: Path, data: str) -> None:
|
||||
"""Write data to file with restrictive permissions (owner read/write only).
|
||||
|
||||
Uses a temp-file + atomic rename so readers always see either the old
|
||||
complete file or the new one — never a partial write.
|
||||
"""
|
||||
"""Write data to file with restrictive permissions (owner read/write only)."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp_path = tempfile.mkstemp(dir=str(path.parent), suffix=".tmp")
|
||||
path.write_text(data, encoding="utf-8")
|
||||
try:
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
||||
f.write(data)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, str(path))
|
||||
try:
|
||||
os.chmod(path, 0o600)
|
||||
except OSError:
|
||||
pass # Windows doesn't support chmod the same way
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
os.chmod(path, 0o600)
|
||||
except OSError:
|
||||
pass # Windows doesn't support chmod the same way
|
||||
|
||||
|
||||
class PairingStore:
|
||||
@@ -84,9 +66,6 @@ class PairingStore:
|
||||
|
||||
def __init__(self):
|
||||
PAIRING_DIR.mkdir(parents=True, exist_ok=True)
|
||||
# Protects all read-modify-write cycles. The gateway runs multiple
|
||||
# platform adapters concurrently in threads sharing one PairingStore.
|
||||
self._lock = threading.RLock()
|
||||
|
||||
def _pending_path(self, platform: str) -> Path:
|
||||
return PAIRING_DIR / f"{platform}-pending.json"
|
||||
@@ -126,7 +105,7 @@ class PairingStore:
|
||||
return results
|
||||
|
||||
def _approve_user(self, platform: str, user_id: str, user_name: str = "") -> None:
|
||||
"""Add a user to the approved list. Must be called under self._lock."""
|
||||
"""Add a user to the approved list."""
|
||||
approved = self._load_json(self._approved_path(platform))
|
||||
approved[user_id] = {
|
||||
"user_name": user_name,
|
||||
@@ -137,12 +116,11 @@ class PairingStore:
|
||||
def revoke(self, platform: str, user_id: str) -> bool:
|
||||
"""Remove a user from the approved list. Returns True if found."""
|
||||
path = self._approved_path(platform)
|
||||
with self._lock:
|
||||
approved = self._load_json(path)
|
||||
if user_id in approved:
|
||||
del approved[user_id]
|
||||
self._save_json(path, approved)
|
||||
return True
|
||||
approved = self._load_json(path)
|
||||
if user_id in approved:
|
||||
del approved[user_id]
|
||||
self._save_json(path, approved)
|
||||
return True
|
||||
return False
|
||||
|
||||
# ----- Pending codes -----
|
||||
@@ -158,37 +136,36 @@ class PairingStore:
|
||||
- Max pending codes reached for this platform
|
||||
- User/platform is in lockout due to failed attempts
|
||||
"""
|
||||
with self._lock:
|
||||
self._cleanup_expired(platform)
|
||||
self._cleanup_expired(platform)
|
||||
|
||||
# Check lockout
|
||||
if self._is_locked_out(platform):
|
||||
return None
|
||||
# Check lockout
|
||||
if self._is_locked_out(platform):
|
||||
return None
|
||||
|
||||
# Check rate limit for this specific user
|
||||
if self._is_rate_limited(platform, user_id):
|
||||
return None
|
||||
# Check rate limit for this specific user
|
||||
if self._is_rate_limited(platform, user_id):
|
||||
return None
|
||||
|
||||
# Check max pending
|
||||
pending = self._load_json(self._pending_path(platform))
|
||||
if len(pending) >= MAX_PENDING_PER_PLATFORM:
|
||||
return None
|
||||
# Check max pending
|
||||
pending = self._load_json(self._pending_path(platform))
|
||||
if len(pending) >= MAX_PENDING_PER_PLATFORM:
|
||||
return None
|
||||
|
||||
# Generate cryptographically random code
|
||||
code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))
|
||||
# Generate cryptographically random code
|
||||
code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))
|
||||
|
||||
# Store pending request
|
||||
pending[code] = {
|
||||
"user_id": user_id,
|
||||
"user_name": user_name,
|
||||
"created_at": time.time(),
|
||||
}
|
||||
self._save_json(self._pending_path(platform), pending)
|
||||
# Store pending request
|
||||
pending[code] = {
|
||||
"user_id": user_id,
|
||||
"user_name": user_name,
|
||||
"created_at": time.time(),
|
||||
}
|
||||
self._save_json(self._pending_path(platform), pending)
|
||||
|
||||
# Record rate limit
|
||||
self._record_rate_limit(platform, user_id)
|
||||
# Record rate limit
|
||||
self._record_rate_limit(platform, user_id)
|
||||
|
||||
return code
|
||||
return code
|
||||
|
||||
def approve_code(self, platform: str, code: str) -> Optional[dict]:
|
||||
"""
|
||||
@@ -196,25 +173,24 @@ class PairingStore:
|
||||
|
||||
Returns {user_id, user_name} on success, None if code is invalid/expired.
|
||||
"""
|
||||
with self._lock:
|
||||
self._cleanup_expired(platform)
|
||||
code = code.upper().strip()
|
||||
self._cleanup_expired(platform)
|
||||
code = code.upper().strip()
|
||||
|
||||
pending = self._load_json(self._pending_path(platform))
|
||||
if code not in pending:
|
||||
self._record_failed_attempt(platform)
|
||||
return None
|
||||
pending = self._load_json(self._pending_path(platform))
|
||||
if code not in pending:
|
||||
self._record_failed_attempt(platform)
|
||||
return None
|
||||
|
||||
entry = pending.pop(code)
|
||||
self._save_json(self._pending_path(platform), pending)
|
||||
entry = pending.pop(code)
|
||||
self._save_json(self._pending_path(platform), pending)
|
||||
|
||||
# Add to approved list
|
||||
self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))
|
||||
# Add to approved list
|
||||
self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))
|
||||
|
||||
return {
|
||||
"user_id": entry["user_id"],
|
||||
"user_name": entry.get("user_name", ""),
|
||||
}
|
||||
return {
|
||||
"user_id": entry["user_id"],
|
||||
"user_name": entry.get("user_name", ""),
|
||||
}
|
||||
|
||||
def list_pending(self, platform: str = None) -> list:
|
||||
"""List pending pairing requests, optionally filtered by platform."""
|
||||
@@ -236,13 +212,12 @@ class PairingStore:
|
||||
|
||||
def clear_pending(self, platform: str = None) -> int:
|
||||
"""Clear all pending requests. Returns count removed."""
|
||||
with self._lock:
|
||||
count = 0
|
||||
platforms = [platform] if platform else self._all_platforms("pending")
|
||||
for p in platforms:
|
||||
pending = self._load_json(self._pending_path(p))
|
||||
count += len(pending)
|
||||
self._save_json(self._pending_path(p), {})
|
||||
count = 0
|
||||
platforms = [platform] if platform else self._all_platforms("pending")
|
||||
for p in platforms:
|
||||
pending = self._load_json(self._pending_path(p))
|
||||
count += len(pending)
|
||||
self._save_json(self._pending_path(p), {})
|
||||
return count
|
||||
|
||||
# ----- Rate limiting and lockout -----
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user