2026-02-21 22:31:43 -08:00
|
|
|
"""System prompt assembly -- identity, platform hints, skills index, context files.
|
|
|
|
|
|
|
|
|
|
All functions are stateless. AIAgent._build_system_prompt() calls these to
|
|
|
|
|
assemble pieces, then combines them with memory and ephemeral prompts.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import logging
|
|
|
|
|
import os
|
|
|
|
|
import re
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import Optional
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
Harden agent attack surface: scan writes to memory, skills, cron, and context files
The security scanner (skills_guard.py) was only wired into the hub install path.
All other write paths to persistent state — skills created by the agent, memory
entries, cron prompts, and context files — bypassed it entirely. This closes
those gaps:
- file_operations: deny-list blocks writes to ~/.ssh, ~/.aws, ~/.hermes/.env, etc.
- code_execution_tool: filter secret env vars from sandbox child process
- skill_manager_tool: wire scan_skill() into create/edit/patch/write_file with rollback
- skills_guard: add "agent-created" trust level (same policy as community)
- memory_tool: scan content for injection/exfil before system prompt injection
- prompt_builder: scan AGENTS.md, .cursorrules, SOUL.md for prompt injection
- cronjob_tools: scan cron prompts for critical threats before scheduling
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 23:43:15 -05:00
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Context file scanning — detect prompt injection in AGENTS.md, .cursorrules,
|
|
|
|
|
# SOUL.md before they get injected into the system prompt.
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
_CONTEXT_THREAT_PATTERNS = [
|
|
|
|
|
(r'ignore\s+(previous|all|above|prior)\s+instructions', "prompt_injection"),
|
|
|
|
|
(r'do\s+not\s+tell\s+the\s+user', "deception_hide"),
|
|
|
|
|
(r'system\s+prompt\s+override', "sys_prompt_override"),
|
|
|
|
|
(r'disregard\s+(your|all|any)\s+(instructions|rules|guidelines)', "disregard_rules"),
|
|
|
|
|
(r'act\s+as\s+(if|though)\s+you\s+(have\s+no|don\'t\s+have)\s+(restrictions|limits|rules)', "bypass_restrictions"),
|
|
|
|
|
(r'<!--[^>]*(?:ignore|override|system|secret|hidden)[^>]*-->', "html_comment_injection"),
|
|
|
|
|
(r'<\s*div\s+style\s*=\s*["\'].*display\s*:\s*none', "hidden_div"),
|
|
|
|
|
(r'translate\s+.*\s+into\s+.*\s+and\s+(execute|run|eval)', "translate_execute"),
|
|
|
|
|
(r'curl\s+[^\n]*\$\{?\w*(KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|API)', "exfil_curl"),
|
|
|
|
|
(r'cat\s+[^\n]*(\.env|credentials|\.netrc|\.pgpass)', "read_secrets"),
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
_CONTEXT_INVISIBLE_CHARS = {
|
|
|
|
|
'\u200b', '\u200c', '\u200d', '\u2060', '\ufeff',
|
|
|
|
|
'\u202a', '\u202b', '\u202c', '\u202d', '\u202e',
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _scan_context_content(content: str, filename: str) -> str:
|
|
|
|
|
"""Scan context file content for injection. Returns sanitized content."""
|
|
|
|
|
findings = []
|
|
|
|
|
|
|
|
|
|
# Check invisible unicode
|
|
|
|
|
for char in _CONTEXT_INVISIBLE_CHARS:
|
|
|
|
|
if char in content:
|
|
|
|
|
findings.append(f"invisible unicode U+{ord(char):04X}")
|
|
|
|
|
|
|
|
|
|
# Check threat patterns
|
|
|
|
|
for pattern, pid in _CONTEXT_THREAT_PATTERNS:
|
|
|
|
|
if re.search(pattern, content, re.IGNORECASE):
|
|
|
|
|
findings.append(pid)
|
|
|
|
|
|
|
|
|
|
if findings:
|
|
|
|
|
logger.warning("Context file %s blocked: %s", filename, ", ".join(findings))
|
|
|
|
|
return f"[BLOCKED: {filename} contained potential prompt injection ({', '.join(findings)}). Content not loaded.]"
|
|
|
|
|
|
|
|
|
|
return content
|
|
|
|
|
|
2026-03-17 04:16:32 -07:00
|
|
|
|
|
|
|
|
def _find_git_root(start: Path) -> Optional[Path]:
|
|
|
|
|
"""Walk *start* and its parents looking for a ``.git`` directory.
|
|
|
|
|
|
|
|
|
|
Returns the directory containing ``.git``, or ``None`` if we hit the
|
|
|
|
|
filesystem root without finding one.
|
|
|
|
|
"""
|
|
|
|
|
current = start.resolve()
|
|
|
|
|
for parent in [current, *current.parents]:
|
|
|
|
|
if (parent / ".git").exists():
|
|
|
|
|
return parent
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_HERMES_MD_NAMES = (".hermes.md", "HERMES.md")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _find_hermes_md(cwd: Path) -> Optional[Path]:
|
|
|
|
|
"""Discover the nearest ``.hermes.md`` or ``HERMES.md``.
|
|
|
|
|
|
|
|
|
|
Search order: *cwd* first, then each parent directory up to (and
|
|
|
|
|
including) the git repository root. Returns the first match, or
|
|
|
|
|
``None`` if nothing is found.
|
|
|
|
|
"""
|
|
|
|
|
stop_at = _find_git_root(cwd)
|
|
|
|
|
current = cwd.resolve()
|
|
|
|
|
|
|
|
|
|
for directory in [current, *current.parents]:
|
|
|
|
|
for name in _HERMES_MD_NAMES:
|
|
|
|
|
candidate = directory / name
|
|
|
|
|
if candidate.is_file():
|
|
|
|
|
return candidate
|
|
|
|
|
# Stop walking at the git root (or filesystem root).
|
|
|
|
|
if stop_at and directory == stop_at:
|
|
|
|
|
break
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _strip_yaml_frontmatter(content: str) -> str:
|
|
|
|
|
"""Remove optional YAML frontmatter (``---`` delimited) from *content*.
|
|
|
|
|
|
|
|
|
|
The frontmatter may contain structured config (model overrides, tool
|
|
|
|
|
settings) that will be handled separately in a future PR. For now we
|
|
|
|
|
strip it so only the human-readable markdown body is injected into the
|
|
|
|
|
system prompt.
|
|
|
|
|
"""
|
|
|
|
|
if content.startswith("---"):
|
|
|
|
|
end = content.find("\n---", 3)
|
|
|
|
|
if end != -1:
|
|
|
|
|
# Skip past the closing --- and any trailing newline
|
|
|
|
|
body = content[end + 4:].lstrip("\n")
|
|
|
|
|
return body if body else content
|
|
|
|
|
return content
|
|
|
|
|
|
|
|
|
|
|
2026-02-21 22:31:43 -08:00
|
|
|
# =========================================================================
|
|
|
|
|
# Constants
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
|
|
DEFAULT_AGENT_IDENTITY = (
|
|
|
|
|
"You are Hermes Agent, an intelligent AI assistant created by Nous Research. "
|
|
|
|
|
"You are helpful, knowledgeable, and direct. You assist users with a wide "
|
|
|
|
|
"range of tasks including answering questions, writing and editing code, "
|
|
|
|
|
"analyzing information, creative work, and executing actions via your tools. "
|
|
|
|
|
"You communicate clearly, admit uncertainty when appropriate, and prioritize "
|
2026-03-07 10:14:19 -08:00
|
|
|
"being genuinely useful over being verbose unless otherwise directed below. "
|
|
|
|
|
"Be targeted and efficient in your exploration and investigations."
|
2026-02-21 22:31:43 -08:00
|
|
|
)
|
|
|
|
|
|
2026-02-22 02:31:52 -08:00
|
|
|
MEMORY_GUIDANCE = (
|
2026-03-14 11:26:18 -07:00
|
|
|
"You have persistent memory across sessions. Save durable facts using the memory "
|
|
|
|
|
"tool: user preferences, environment details, tool quirks, and stable conventions. "
|
2026-03-16 06:52:32 -07:00
|
|
|
"Memory is injected into every turn, so keep it compact and focused on facts that "
|
|
|
|
|
"will still matter later.\n"
|
|
|
|
|
"Prioritize what reduces future user steering — the most valuable memory is one "
|
|
|
|
|
"that prevents the user from having to correct or remind you again. "
|
|
|
|
|
"User preferences and recurring corrections matter more than procedural task details.\n"
|
|
|
|
|
"Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
|
|
|
|
|
"state to memory; use session_search to recall those from past transcripts. "
|
|
|
|
|
"If you've discovered a new way to do something, solved a problem that could be "
|
|
|
|
|
"necessary later, save it as a skill with the skill tool."
|
2026-02-22 02:31:52 -08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
SESSION_SEARCH_GUIDANCE = (
|
|
|
|
|
"When the user references something from a past conversation or you suspect "
|
2026-03-14 11:26:18 -07:00
|
|
|
"relevant cross-session context exists, use session_search to recall it before "
|
|
|
|
|
"asking them to repeat themselves."
|
2026-02-22 02:31:52 -08:00
|
|
|
)
|
|
|
|
|
|
2026-02-22 13:28:13 -08:00
|
|
|
SKILLS_GUIDANCE = (
|
|
|
|
|
"After completing a complex task (5+ tool calls), fixing a tricky error, "
|
2026-03-16 06:52:32 -07:00
|
|
|
"or discovering a non-trivial workflow, save the approach as a "
|
|
|
|
|
"skill with skill_manage so you can reuse it next time.\n"
|
|
|
|
|
"When using a skill and finding it outdated, incomplete, or wrong, "
|
|
|
|
|
"patch it immediately with skill_manage(action='patch') — don't wait to be asked. "
|
|
|
|
|
"Skills that aren't maintained become liabilities."
|
2026-02-22 13:28:13 -08:00
|
|
|
)
|
|
|
|
|
|
2026-02-21 22:31:43 -08:00
|
|
|
PLATFORM_HINTS = {
|
|
|
|
|
"whatsapp": (
|
|
|
|
|
"You are on a text messaging communication platform, WhatsApp. "
|
2026-03-02 16:34:49 -03:00
|
|
|
"Please do not use markdown as it does not render. "
|
|
|
|
|
"You can send media files natively: to deliver a file to the user, "
|
|
|
|
|
"include MEDIA:/absolute/path/to/file in your response. The file "
|
|
|
|
|
"will be sent as a native WhatsApp attachment — images (.jpg, .png, "
|
|
|
|
|
".webp) appear as photos, videos (.mp4, .mov) play inline, and other "
|
|
|
|
|
"files arrive as downloadable documents. You can also include image "
|
|
|
|
|
"URLs in markdown format  and they will be sent as photos."
|
2026-02-21 22:31:43 -08:00
|
|
|
),
|
|
|
|
|
"telegram": (
|
|
|
|
|
"You are on a text messaging communication platform, Telegram. "
|
2026-03-02 16:34:49 -03:00
|
|
|
"Please do not use markdown as it does not render. "
|
|
|
|
|
"You can send media files natively: to deliver a file to the user, "
|
2026-03-07 22:57:05 -08:00
|
|
|
"include MEDIA:/absolute/path/to/file in your response. Images "
|
|
|
|
|
"(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
|
|
|
|
|
"bubbles, and videos (.mp4) play inline. You can also include image "
|
|
|
|
|
"URLs in markdown format  and they will be sent as native photos."
|
2026-02-21 22:31:43 -08:00
|
|
|
),
|
|
|
|
|
"discord": (
|
2026-03-07 22:57:05 -08:00
|
|
|
"You are in a Discord server or group chat communicating with your user. "
|
|
|
|
|
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
|
|
|
|
"in your response. Images (.png, .jpg, .webp) are sent as photo "
|
|
|
|
|
"attachments, audio as file attachments. You can also include image URLs "
|
|
|
|
|
"in markdown format  and they will be sent as attachments."
|
|
|
|
|
),
|
|
|
|
|
"slack": (
|
|
|
|
|
"You are in a Slack workspace communicating with your user. "
|
|
|
|
|
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
|
|
|
|
"in your response. Images (.png, .jpg, .webp) are uploaded as photo "
|
|
|
|
|
"attachments, audio as file attachments. You can also include image URLs "
|
|
|
|
|
"in markdown format  and they will be uploaded as attachments."
|
2026-02-21 22:31:43 -08:00
|
|
|
),
|
fix: Signal adapter parity pass — integration gaps, clawdbot features, env var simplification
Integration gaps fixed (7 files missing Signal):
- cron/scheduler.py: Signal in platform_map (cron delivery was broken)
- agent/prompt_builder.py: PLATFORM_HINTS for Signal (agent knows it's on Signal)
- toolsets.py: hermes-signal toolset + added to hermes-gateway composite
- hermes_cli/status.py: Signal + Slack in platform status display
- tools/send_message_tool.py: Signal example in target description
- tools/cronjob_tools.py: Signal in delivery option docs + schema
- gateway/channel_directory.py: Signal in session-based channel discovery
Clawdbot parity features added to signal.py:
- Self-message filtering: prevents reply loops by checking sender != account
- SyncMessage filtering: ignores sync envelopes (sent transcripts, read receipts)
- Edit message support: reads dataMessage from editMessage envelope
- Mention rendering: replaces \uFFFC placeholders with @identifier text
- Jitter in SSE reconnection backoff (20% randomization, prevents thundering herd)
Env var simplification (7 → 4):
- Removed SIGNAL_DM_POLICY (DM auth follows standard platform pattern via
SIGNAL_ALLOWED_USERS + DM pairing, same as Telegram/Discord)
- Removed SIGNAL_GROUP_POLICY (derived from SIGNAL_GROUP_ALLOWED_USERS:
not set = disabled, set with IDs = allowlist, set with * = open)
- Removed SIGNAL_DEBUG (was setting root logger, removed entirely)
- Remaining: SIGNAL_HTTP_URL, SIGNAL_ACCOUNT (required),
SIGNAL_ALLOWED_USERS, SIGNAL_GROUP_ALLOWED_USERS (optional)
Updated all docs (website, AGENTS.md, signal.md) to match.
2026-03-08 21:00:21 -07:00
|
|
|
"signal": (
|
|
|
|
|
"You are on a text messaging communication platform, Signal. "
|
|
|
|
|
"Please do not use markdown as it does not render. "
|
|
|
|
|
"You can send media files natively: to deliver a file to the user, "
|
|
|
|
|
"include MEDIA:/absolute/path/to/file in your response. Images "
|
|
|
|
|
"(.png, .jpg, .webp) appear as photos, audio as attachments, and other "
|
|
|
|
|
"files arrive as downloadable documents. You can also include image "
|
|
|
|
|
"URLs in markdown format  and they will be sent as photos."
|
|
|
|
|
),
|
feat: add email gateway platform (IMAP/SMTP)
Allow users to interact with Hermes by sending and receiving emails.
Uses IMAP polling for incoming messages and SMTP for replies with
proper threading (In-Reply-To, References headers).
Integrates with all 14 gateway extension points: config, adapter
factory, authorization, send_message tool, cron delivery, toolsets,
prompt hints, channel directory, setup wizard, status display, and
env example.
65 tests covering config, parsing, dispatch, threading, IMAP fetch,
SMTP send, attachments, and all integration points.
2026-03-10 03:15:38 +03:00
|
|
|
"email": (
|
|
|
|
|
"You are communicating via email. Write clear, well-structured responses "
|
|
|
|
|
"suitable for email. Use plain text formatting (no markdown). "
|
|
|
|
|
"Keep responses concise but complete. You can send file attachments — "
|
|
|
|
|
"include MEDIA:/absolute/path/to/file in your response. The subject line "
|
|
|
|
|
"is preserved for threading. Do not include greetings or sign-offs unless "
|
|
|
|
|
"contextually appropriate."
|
|
|
|
|
),
|
2026-03-14 19:07:50 -07:00
|
|
|
"cron": (
|
2026-03-20 05:18:05 -07:00
|
|
|
"You are running as a scheduled cron job. There is no user present — you "
|
|
|
|
|
"cannot ask questions, request clarification, or wait for follow-up. Execute "
|
|
|
|
|
"the task fully and autonomously, making reasonable decisions where needed. "
|
|
|
|
|
"Your final response is automatically delivered to the job's configured "
|
|
|
|
|
"destination — put the primary content directly in your response."
|
2026-03-14 19:07:50 -07:00
|
|
|
),
|
2026-02-21 22:31:43 -08:00
|
|
|
"cli": (
|
|
|
|
|
"You are a CLI AI Agent. Try not to use markdown but simple text "
|
|
|
|
|
"renderable inside a terminal."
|
|
|
|
|
),
|
feat: add SMS (Twilio) platform adapter
Add SMS as a first-class messaging platform via the Twilio API.
Shares credentials with the existing telephony skill — same
TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN, TWILIO_PHONE_NUMBER env vars.
Adapter (gateway/platforms/sms.py):
- aiohttp webhook server for inbound (Twilio form-encoded POSTs)
- Twilio REST API with Basic auth for outbound
- Markdown stripping, smart chunking at 1600 chars
- Echo loop prevention, phone number redaction in logs
Integration (13 files):
- gateway config, run, channel_directory
- agent prompt_builder (SMS platform hint)
- cron scheduler, cronjob tools
- send_message_tool (_send_sms via Twilio API)
- toolsets (hermes-sms + hermes-gateway)
- gateway setup wizard, status display
- pyproject.toml (sms optional extra)
- 21 tests
Docs:
- website/docs/user-guide/messaging/sms.md (full setup guide)
- Updated messaging index (architecture, toolsets, security, links)
- Updated environment-variables.md reference
Inspired by PR #1575 (@sunsakis), rewritten for Twilio.
2026-03-17 03:14:53 -07:00
|
|
|
"sms": (
|
|
|
|
|
"You are communicating via SMS. Keep responses concise and use plain text "
|
|
|
|
|
"only — no markdown, no formatting. SMS messages are limited to ~1600 "
|
|
|
|
|
"characters, so be brief and direct."
|
|
|
|
|
),
|
2026-02-21 22:31:43 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
CONTEXT_FILE_MAX_CHARS = 20_000
|
|
|
|
|
CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
|
|
|
|
|
CONTEXT_TRUNCATE_TAIL_RATIO = 0.2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =========================================================================
|
|
|
|
|
# Skills index
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
2026-03-13 03:14:04 -07:00
|
|
|
def _parse_skill_file(skill_file: Path) -> tuple[bool, dict, str]:
|
|
|
|
|
"""Read a SKILL.md once and return platform compatibility, frontmatter, and description.
|
|
|
|
|
|
|
|
|
|
Returns (is_compatible, frontmatter, description). On any error, returns
|
|
|
|
|
(True, {}, "") to err on the side of showing the skill.
|
2026-03-07 00:47:54 -08:00
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
from tools.skills_tool import _parse_frontmatter, skill_matches_platform
|
2026-03-13 03:14:04 -07:00
|
|
|
|
2026-03-07 00:47:54 -08:00
|
|
|
raw = skill_file.read_text(encoding="utf-8")[:2000]
|
|
|
|
|
frontmatter, _ = _parse_frontmatter(raw)
|
2026-03-13 03:14:04 -07:00
|
|
|
|
|
|
|
|
if not skill_matches_platform(frontmatter):
|
|
|
|
|
return False, {}, ""
|
|
|
|
|
|
|
|
|
|
desc = ""
|
|
|
|
|
raw_desc = frontmatter.get("description", "")
|
|
|
|
|
if raw_desc:
|
|
|
|
|
desc = str(raw_desc).strip().strip("'\"")
|
|
|
|
|
if len(desc) > 60:
|
|
|
|
|
desc = desc[:57] + "..."
|
|
|
|
|
|
|
|
|
|
return True, frontmatter, desc
|
2026-03-14 02:19:30 -07:00
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug("Failed to parse skill file %s: %s", skill_file, e)
|
2026-03-13 03:14:04 -07:00
|
|
|
return True, {}, ""
|
2026-03-07 00:47:54 -08:00
|
|
|
|
|
|
|
|
|
2026-03-09 23:13:39 +03:00
|
|
|
def _read_skill_conditions(skill_file: Path) -> dict:
|
|
|
|
|
"""Extract conditional activation fields from SKILL.md frontmatter."""
|
|
|
|
|
try:
|
|
|
|
|
from tools.skills_tool import _parse_frontmatter
|
|
|
|
|
raw = skill_file.read_text(encoding="utf-8")[:2000]
|
|
|
|
|
frontmatter, _ = _parse_frontmatter(raw)
|
|
|
|
|
hermes = frontmatter.get("metadata", {}).get("hermes", {})
|
|
|
|
|
return {
|
|
|
|
|
"fallback_for_toolsets": hermes.get("fallback_for_toolsets", []),
|
|
|
|
|
"requires_toolsets": hermes.get("requires_toolsets", []),
|
|
|
|
|
"fallback_for_tools": hermes.get("fallback_for_tools", []),
|
|
|
|
|
"requires_tools": hermes.get("requires_tools", []),
|
|
|
|
|
}
|
2026-03-14 02:19:30 -07:00
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug("Failed to read skill conditions from %s: %s", skill_file, e)
|
2026-03-09 23:13:39 +03:00
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _skill_should_show(
|
|
|
|
|
conditions: dict,
|
|
|
|
|
available_tools: "set[str] | None",
|
|
|
|
|
available_toolsets: "set[str] | None",
|
|
|
|
|
) -> bool:
|
|
|
|
|
"""Return False if the skill's conditional activation rules exclude it."""
|
|
|
|
|
if available_tools is None and available_toolsets is None:
|
|
|
|
|
return True # No filtering info — show everything (backward compat)
|
|
|
|
|
|
|
|
|
|
at = available_tools or set()
|
|
|
|
|
ats = available_toolsets or set()
|
|
|
|
|
|
|
|
|
|
# fallback_for: hide when the primary tool/toolset IS available
|
|
|
|
|
for ts in conditions.get("fallback_for_toolsets", []):
|
|
|
|
|
if ts in ats:
|
|
|
|
|
return False
|
|
|
|
|
for t in conditions.get("fallback_for_tools", []):
|
|
|
|
|
if t in at:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# requires: hide when a required tool/toolset is NOT available
|
|
|
|
|
for ts in conditions.get("requires_toolsets", []):
|
|
|
|
|
if ts not in ats:
|
|
|
|
|
return False
|
|
|
|
|
for t in conditions.get("requires_tools", []):
|
|
|
|
|
if t not in at:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_skills_system_prompt(
|
|
|
|
|
available_tools: "set[str] | None" = None,
|
|
|
|
|
available_toolsets: "set[str] | None" = None,
|
|
|
|
|
) -> str:
|
2026-02-21 22:31:43 -08:00
|
|
|
"""Build a compact skill index for the system prompt.
|
|
|
|
|
|
2026-02-22 13:28:13 -08:00
|
|
|
Scans ~/.hermes/skills/ for SKILL.md files grouped by category.
|
|
|
|
|
Includes per-skill descriptions from frontmatter so the model can
|
|
|
|
|
match skills by meaning, not just name.
|
2026-03-07 00:47:54 -08:00
|
|
|
Filters out skills incompatible with the current OS platform.
|
2026-02-21 22:31:43 -08:00
|
|
|
"""
|
|
|
|
|
hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
|
|
|
|
skills_dir = hermes_home / "skills"
|
|
|
|
|
|
|
|
|
|
if not skills_dir.exists():
|
|
|
|
|
return ""
|
|
|
|
|
|
2026-03-13 03:14:04 -07:00
|
|
|
# Collect skills with descriptions, grouped by category.
|
2026-02-22 13:28:13 -08:00
|
|
|
# Each entry: (skill_name, description)
|
refactor: reorganize skills into sub-categories
The skills directory was getting disorganized — mlops alone had 40
skills in a flat list, and 12 categories were singletons with just
one skill each.
Code change:
- prompt_builder.py: Support sub-categories in skill scanner.
skills/mlops/training/axolotl/SKILL.md now shows as category
'mlops/training' instead of just 'mlops'. Backwards-compatible
with existing flat structure.
Split mlops (40 skills) into 7 sub-categories:
- mlops/training (12): accelerate, axolotl, flash-attention,
grpo-rl-training, peft, pytorch-fsdp, pytorch-lightning,
simpo, slime, torchtitan, trl-fine-tuning, unsloth
- mlops/inference (8): gguf, guidance, instructor, llama-cpp,
obliteratus, outlines, tensorrt-llm, vllm
- mlops/models (6): audiocraft, clip, llava, segment-anything,
stable-diffusion, whisper
- mlops/vector-databases (4): chroma, faiss, pinecone, qdrant
- mlops/evaluation (5): huggingface-tokenizers,
lm-evaluation-harness, nemo-curator, saelens, weights-and-biases
- mlops/cloud (2): lambda-labs, modal
- mlops/research (1): dspy
Merged singleton categories:
- gifs → media (gif-search joins youtube-content)
- music-creation → media (heartmula, songsee)
- diagramming → creative (excalidraw joins ascii-art)
- ocr-and-documents → productivity
- domain → research (domain-intel)
- feeds → research (blogwatcher)
- market-data → research (polymarket)
Fixed misplaced skills:
- mlops/code-review → software-development (not ML-specific)
- mlops/ml-paper-writing → research (academic writing)
Added DESCRIPTION.md files for all new/updated categories.
2026-03-09 03:35:53 -07:00
|
|
|
# Supports sub-categories: skills/mlops/training/axolotl/SKILL.md
|
2026-03-13 03:14:04 -07:00
|
|
|
# -> category "mlops/training", skill "axolotl"
|
2026-03-18 03:17:37 -07:00
|
|
|
# Load disabled skill names once for the entire scan
|
|
|
|
|
try:
|
|
|
|
|
from tools.skills_tool import _get_disabled_skill_names
|
|
|
|
|
disabled = _get_disabled_skill_names()
|
|
|
|
|
except Exception:
|
|
|
|
|
disabled = set()
|
|
|
|
|
|
2026-02-22 13:28:13 -08:00
|
|
|
skills_by_category: dict[str, list[tuple[str, str]]] = {}
|
2026-02-21 22:31:43 -08:00
|
|
|
for skill_file in skills_dir.rglob("SKILL.md"):
|
2026-03-18 03:17:37 -07:00
|
|
|
is_compatible, frontmatter, desc = _parse_skill_file(skill_file)
|
2026-03-13 03:14:04 -07:00
|
|
|
if not is_compatible:
|
2026-03-07 00:47:54 -08:00
|
|
|
continue
|
2026-02-21 22:31:43 -08:00
|
|
|
rel_path = skill_file.relative_to(skills_dir)
|
|
|
|
|
parts = rel_path.parts
|
|
|
|
|
if len(parts) >= 2:
|
|
|
|
|
skill_name = parts[-2]
|
refactor: reorganize skills into sub-categories
The skills directory was getting disorganized — mlops alone had 40
skills in a flat list, and 12 categories were singletons with just
one skill each.
Code change:
- prompt_builder.py: Support sub-categories in skill scanner.
skills/mlops/training/axolotl/SKILL.md now shows as category
'mlops/training' instead of just 'mlops'. Backwards-compatible
with existing flat structure.
Split mlops (40 skills) into 7 sub-categories:
- mlops/training (12): accelerate, axolotl, flash-attention,
grpo-rl-training, peft, pytorch-fsdp, pytorch-lightning,
simpo, slime, torchtitan, trl-fine-tuning, unsloth
- mlops/inference (8): gguf, guidance, instructor, llama-cpp,
obliteratus, outlines, tensorrt-llm, vllm
- mlops/models (6): audiocraft, clip, llava, segment-anything,
stable-diffusion, whisper
- mlops/vector-databases (4): chroma, faiss, pinecone, qdrant
- mlops/evaluation (5): huggingface-tokenizers,
lm-evaluation-harness, nemo-curator, saelens, weights-and-biases
- mlops/cloud (2): lambda-labs, modal
- mlops/research (1): dspy
Merged singleton categories:
- gifs → media (gif-search joins youtube-content)
- music-creation → media (heartmula, songsee)
- diagramming → creative (excalidraw joins ascii-art)
- ocr-and-documents → productivity
- domain → research (domain-intel)
- feeds → research (blogwatcher)
- market-data → research (polymarket)
Fixed misplaced skills:
- mlops/code-review → software-development (not ML-specific)
- mlops/ml-paper-writing → research (academic writing)
Added DESCRIPTION.md files for all new/updated categories.
2026-03-09 03:35:53 -07:00
|
|
|
category = "/".join(parts[:-2]) if len(parts) > 2 else parts[0]
|
2026-02-21 22:31:43 -08:00
|
|
|
else:
|
|
|
|
|
category = "general"
|
|
|
|
|
skill_name = skill_file.parent.name
|
2026-03-18 03:17:37 -07:00
|
|
|
# Respect user's disabled skills config
|
|
|
|
|
fm_name = frontmatter.get("name", skill_name)
|
|
|
|
|
if fm_name in disabled or skill_name in disabled:
|
|
|
|
|
continue
|
|
|
|
|
# Skip skills whose conditional activation rules exclude them
|
|
|
|
|
conditions = _read_skill_conditions(skill_file)
|
|
|
|
|
if not _skill_should_show(conditions, available_tools, available_toolsets):
|
|
|
|
|
continue
|
2026-02-22 13:28:13 -08:00
|
|
|
skills_by_category.setdefault(category, []).append((skill_name, desc))
|
2026-02-21 22:31:43 -08:00
|
|
|
|
|
|
|
|
if not skills_by_category:
|
|
|
|
|
return ""
|
|
|
|
|
|
2026-02-22 13:28:13 -08:00
|
|
|
# Read category-level descriptions from DESCRIPTION.md
|
refactor: reorganize skills into sub-categories
The skills directory was getting disorganized — mlops alone had 40
skills in a flat list, and 12 categories were singletons with just
one skill each.
Code change:
- prompt_builder.py: Support sub-categories in skill scanner.
skills/mlops/training/axolotl/SKILL.md now shows as category
'mlops/training' instead of just 'mlops'. Backwards-compatible
with existing flat structure.
Split mlops (40 skills) into 7 sub-categories:
- mlops/training (12): accelerate, axolotl, flash-attention,
grpo-rl-training, peft, pytorch-fsdp, pytorch-lightning,
simpo, slime, torchtitan, trl-fine-tuning, unsloth
- mlops/inference (8): gguf, guidance, instructor, llama-cpp,
obliteratus, outlines, tensorrt-llm, vllm
- mlops/models (6): audiocraft, clip, llava, segment-anything,
stable-diffusion, whisper
- mlops/vector-databases (4): chroma, faiss, pinecone, qdrant
- mlops/evaluation (5): huggingface-tokenizers,
lm-evaluation-harness, nemo-curator, saelens, weights-and-biases
- mlops/cloud (2): lambda-labs, modal
- mlops/research (1): dspy
Merged singleton categories:
- gifs → media (gif-search joins youtube-content)
- music-creation → media (heartmula, songsee)
- diagramming → creative (excalidraw joins ascii-art)
- ocr-and-documents → productivity
- domain → research (domain-intel)
- feeds → research (blogwatcher)
- market-data → research (polymarket)
Fixed misplaced skills:
- mlops/code-review → software-development (not ML-specific)
- mlops/ml-paper-writing → research (academic writing)
Added DESCRIPTION.md files for all new/updated categories.
2026-03-09 03:35:53 -07:00
|
|
|
# Checks both the exact category path and parent directories
|
2026-02-21 22:31:43 -08:00
|
|
|
category_descriptions = {}
|
|
|
|
|
for category in skills_by_category:
|
refactor: reorganize skills into sub-categories
The skills directory was getting disorganized — mlops alone had 40
skills in a flat list, and 12 categories were singletons with just
one skill each.
Code change:
- prompt_builder.py: Support sub-categories in skill scanner.
skills/mlops/training/axolotl/SKILL.md now shows as category
'mlops/training' instead of just 'mlops'. Backwards-compatible
with existing flat structure.
Split mlops (40 skills) into 7 sub-categories:
- mlops/training (12): accelerate, axolotl, flash-attention,
grpo-rl-training, peft, pytorch-fsdp, pytorch-lightning,
simpo, slime, torchtitan, trl-fine-tuning, unsloth
- mlops/inference (8): gguf, guidance, instructor, llama-cpp,
obliteratus, outlines, tensorrt-llm, vllm
- mlops/models (6): audiocraft, clip, llava, segment-anything,
stable-diffusion, whisper
- mlops/vector-databases (4): chroma, faiss, pinecone, qdrant
- mlops/evaluation (5): huggingface-tokenizers,
lm-evaluation-harness, nemo-curator, saelens, weights-and-biases
- mlops/cloud (2): lambda-labs, modal
- mlops/research (1): dspy
Merged singleton categories:
- gifs → media (gif-search joins youtube-content)
- music-creation → media (heartmula, songsee)
- diagramming → creative (excalidraw joins ascii-art)
- ocr-and-documents → productivity
- domain → research (domain-intel)
- feeds → research (blogwatcher)
- market-data → research (polymarket)
Fixed misplaced skills:
- mlops/code-review → software-development (not ML-specific)
- mlops/ml-paper-writing → research (academic writing)
Added DESCRIPTION.md files for all new/updated categories.
2026-03-09 03:35:53 -07:00
|
|
|
cat_path = Path(category)
|
|
|
|
|
desc_file = skills_dir / cat_path / "DESCRIPTION.md"
|
2026-02-21 22:31:43 -08:00
|
|
|
if desc_file.exists():
|
|
|
|
|
try:
|
|
|
|
|
content = desc_file.read_text(encoding="utf-8")
|
|
|
|
|
match = re.search(r"^---\s*\n.*?description:\s*(.+?)\s*\n.*?^---", content, re.MULTILINE | re.DOTALL)
|
|
|
|
|
if match:
|
|
|
|
|
category_descriptions[category] = match.group(1).strip()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug("Could not read skill description %s: %s", desc_file, e)
|
|
|
|
|
|
|
|
|
|
index_lines = []
|
|
|
|
|
for category in sorted(skills_by_category.keys()):
|
2026-02-22 13:28:13 -08:00
|
|
|
cat_desc = category_descriptions.get(category, "")
|
|
|
|
|
if cat_desc:
|
|
|
|
|
index_lines.append(f" {category}: {cat_desc}")
|
2026-02-21 22:31:43 -08:00
|
|
|
else:
|
|
|
|
|
index_lines.append(f" {category}:")
|
2026-02-22 13:28:13 -08:00
|
|
|
# Deduplicate and sort skills within each category
|
|
|
|
|
seen = set()
|
|
|
|
|
for name, desc in sorted(skills_by_category[category], key=lambda x: x[0]):
|
|
|
|
|
if name in seen:
|
|
|
|
|
continue
|
|
|
|
|
seen.add(name)
|
|
|
|
|
if desc:
|
|
|
|
|
index_lines.append(f" - {name}: {desc}")
|
|
|
|
|
else:
|
|
|
|
|
index_lines.append(f" - {name}")
|
2026-02-21 22:31:43 -08:00
|
|
|
|
|
|
|
|
return (
|
|
|
|
|
"## Skills (mandatory)\n"
|
|
|
|
|
"Before replying, scan the skills below. If one clearly matches your task, "
|
|
|
|
|
"load it with skill_view(name) and follow its instructions. "
|
|
|
|
|
"If a skill has issues, fix it with skill_manage(action='patch').\n"
|
2026-03-16 06:52:32 -07:00
|
|
|
"After difficult/iterative tasks, offer to save as a skill. "
|
|
|
|
|
"If a skill you loaded was missing steps, had wrong commands, or needed "
|
|
|
|
|
"pitfalls you discovered, update it before finishing.\n"
|
2026-02-21 22:31:43 -08:00
|
|
|
"\n"
|
|
|
|
|
"<available_skills>\n"
|
|
|
|
|
+ "\n".join(index_lines) + "\n"
|
|
|
|
|
"</available_skills>\n"
|
|
|
|
|
"\n"
|
|
|
|
|
"If none match, proceed normally without loading a skill."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =========================================================================
|
|
|
|
|
# Context files (SOUL.md, AGENTS.md, .cursorrules)
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
|
|
def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
|
|
|
|
|
"""Head/tail truncation with a marker in the middle."""
|
|
|
|
|
if len(content) <= max_chars:
|
|
|
|
|
return content
|
|
|
|
|
head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
|
|
|
|
|
tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
|
|
|
|
|
head = content[:head_chars]
|
|
|
|
|
tail = content[-tail_chars:]
|
|
|
|
|
marker = f"\n\n[...truncated {filename}: kept {head_chars}+{tail_chars} of {len(content)} chars. Use file tools to read the full file.]\n\n"
|
|
|
|
|
return head + marker + tail
|
|
|
|
|
|
|
|
|
|
|
2026-03-18 04:11:20 -07:00
|
|
|
def load_soul_md() -> Optional[str]:
|
|
|
|
|
"""Load SOUL.md from HERMES_HOME and return its content, or None.
|
|
|
|
|
|
|
|
|
|
Used as the agent identity (slot #1 in the system prompt). When this
|
|
|
|
|
returns content, ``build_context_files_prompt`` should be called with
|
|
|
|
|
``skip_soul=True`` so SOUL.md isn't injected twice.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.config import ensure_hermes_home
|
|
|
|
|
ensure_hermes_home()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug("Could not ensure HERMES_HOME before loading SOUL.md: %s", e)
|
|
|
|
|
|
|
|
|
|
soul_path = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) / "SOUL.md"
|
|
|
|
|
if not soul_path.exists():
|
|
|
|
|
return None
|
|
|
|
|
try:
|
|
|
|
|
content = soul_path.read_text(encoding="utf-8").strip()
|
|
|
|
|
if not content:
|
|
|
|
|
return None
|
|
|
|
|
content = _scan_context_content(content, "SOUL.md")
|
|
|
|
|
content = _truncate_content(content, "SOUL.md")
|
|
|
|
|
return content
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug("Could not read SOUL.md from %s: %s", soul_path, e)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
feat: priority-based context file selection + CLAUDE.md support (#2301)
Previously, all project context files (AGENTS.md, .cursorrules, .hermes.md)
were loaded and concatenated into the system prompt. This bloated the prompt
with potentially redundant or conflicting instructions.
Now only ONE project context type is loaded, using priority order:
1. .hermes.md / HERMES.md (walk to git root)
2. AGENTS.md / agents.md (recursive directory walk)
3. CLAUDE.md / claude.md (cwd only, NEW)
4. .cursorrules / .cursor/rules/*.mdc (cwd only)
SOUL.md from HERMES_HOME remains independent and always loads.
Also adds CLAUDE.md as a recognized context file format, matching the
convention popularized by Claude Code.
Refactored the monolithic function into four focused helpers:
_load_hermes_md, _load_agents_md, _load_claude_md, _load_cursorrules.
Tests: replaced 1 coexistence test with 10 new tests covering priority
ordering, CLAUDE.md loading, case sensitivity, injection blocking.
2026-03-21 06:26:20 -07:00
|
|
|
def _load_hermes_md(cwd_path: Path) -> str:
|
|
|
|
|
""".hermes.md / HERMES.md — walk to git root."""
|
|
|
|
|
hermes_md_path = _find_hermes_md(cwd_path)
|
|
|
|
|
if not hermes_md_path:
|
|
|
|
|
return ""
|
|
|
|
|
try:
|
|
|
|
|
content = hermes_md_path.read_text(encoding="utf-8").strip()
|
|
|
|
|
if not content:
|
|
|
|
|
return ""
|
|
|
|
|
content = _strip_yaml_frontmatter(content)
|
|
|
|
|
rel = hermes_md_path.name
|
|
|
|
|
try:
|
|
|
|
|
rel = str(hermes_md_path.relative_to(cwd_path))
|
|
|
|
|
except ValueError:
|
|
|
|
|
pass
|
|
|
|
|
content = _scan_context_content(content, rel)
|
|
|
|
|
result = f"## {rel}\n\n{content}"
|
|
|
|
|
return _truncate_content(result, ".hermes.md")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug("Could not read %s: %s", hermes_md_path, e)
|
|
|
|
|
return ""
|
2026-02-21 22:31:43 -08:00
|
|
|
|
|
|
|
|
|
feat: priority-based context file selection + CLAUDE.md support (#2301)
Previously, all project context files (AGENTS.md, .cursorrules, .hermes.md)
were loaded and concatenated into the system prompt. This bloated the prompt
with potentially redundant or conflicting instructions.
Now only ONE project context type is loaded, using priority order:
1. .hermes.md / HERMES.md (walk to git root)
2. AGENTS.md / agents.md (recursive directory walk)
3. CLAUDE.md / claude.md (cwd only, NEW)
4. .cursorrules / .cursor/rules/*.mdc (cwd only)
SOUL.md from HERMES_HOME remains independent and always loads.
Also adds CLAUDE.md as a recognized context file format, matching the
convention popularized by Claude Code.
Refactored the monolithic function into four focused helpers:
_load_hermes_md, _load_agents_md, _load_claude_md, _load_cursorrules.
Tests: replaced 1 coexistence test with 10 new tests covering priority
ordering, CLAUDE.md loading, case sensitivity, injection blocking.
2026-03-21 06:26:20 -07:00
|
|
|
def _load_agents_md(cwd_path: Path) -> str:
|
|
|
|
|
"""AGENTS.md — hierarchical, recursive directory walk."""
|
2026-02-21 22:31:43 -08:00
|
|
|
top_level_agents = None
|
|
|
|
|
for name in ["AGENTS.md", "agents.md"]:
|
|
|
|
|
candidate = cwd_path / name
|
|
|
|
|
if candidate.exists():
|
|
|
|
|
top_level_agents = candidate
|
|
|
|
|
break
|
|
|
|
|
|
feat: priority-based context file selection + CLAUDE.md support (#2301)
Previously, all project context files (AGENTS.md, .cursorrules, .hermes.md)
were loaded and concatenated into the system prompt. This bloated the prompt
with potentially redundant or conflicting instructions.
Now only ONE project context type is loaded, using priority order:
1. .hermes.md / HERMES.md (walk to git root)
2. AGENTS.md / agents.md (recursive directory walk)
3. CLAUDE.md / claude.md (cwd only, NEW)
4. .cursorrules / .cursor/rules/*.mdc (cwd only)
SOUL.md from HERMES_HOME remains independent and always loads.
Also adds CLAUDE.md as a recognized context file format, matching the
convention popularized by Claude Code.
Refactored the monolithic function into four focused helpers:
_load_hermes_md, _load_agents_md, _load_claude_md, _load_cursorrules.
Tests: replaced 1 coexistence test with 10 new tests covering priority
ordering, CLAUDE.md loading, case sensitivity, injection blocking.
2026-03-21 06:26:20 -07:00
|
|
|
if not top_level_agents:
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
agents_files = []
|
|
|
|
|
for root, dirs, files in os.walk(cwd_path):
|
|
|
|
|
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ('node_modules', '__pycache__', 'venv', '.venv')]
|
|
|
|
|
for f in files:
|
|
|
|
|
if f.lower() == "agents.md":
|
|
|
|
|
agents_files.append(Path(root) / f)
|
|
|
|
|
agents_files.sort(key=lambda p: len(p.parts))
|
|
|
|
|
|
|
|
|
|
total_content = ""
|
|
|
|
|
for agents_path in agents_files:
|
|
|
|
|
try:
|
|
|
|
|
content = agents_path.read_text(encoding="utf-8").strip()
|
|
|
|
|
if content:
|
|
|
|
|
rel_path = agents_path.relative_to(cwd_path)
|
|
|
|
|
content = _scan_context_content(content, str(rel_path))
|
|
|
|
|
total_content += f"## {rel_path}\n\n{content}\n\n"
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug("Could not read %s: %s", agents_path, e)
|
|
|
|
|
|
|
|
|
|
if not total_content:
|
|
|
|
|
return ""
|
|
|
|
|
return _truncate_content(total_content, "AGENTS.md")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _load_claude_md(cwd_path: Path) -> str:
|
|
|
|
|
"""CLAUDE.md / claude.md — cwd only."""
|
|
|
|
|
for name in ["CLAUDE.md", "claude.md"]:
|
|
|
|
|
candidate = cwd_path / name
|
|
|
|
|
if candidate.exists():
|
2026-02-21 22:31:43 -08:00
|
|
|
try:
|
feat: priority-based context file selection + CLAUDE.md support (#2301)
Previously, all project context files (AGENTS.md, .cursorrules, .hermes.md)
were loaded and concatenated into the system prompt. This bloated the prompt
with potentially redundant or conflicting instructions.
Now only ONE project context type is loaded, using priority order:
1. .hermes.md / HERMES.md (walk to git root)
2. AGENTS.md / agents.md (recursive directory walk)
3. CLAUDE.md / claude.md (cwd only, NEW)
4. .cursorrules / .cursor/rules/*.mdc (cwd only)
SOUL.md from HERMES_HOME remains independent and always loads.
Also adds CLAUDE.md as a recognized context file format, matching the
convention popularized by Claude Code.
Refactored the monolithic function into four focused helpers:
_load_hermes_md, _load_agents_md, _load_claude_md, _load_cursorrules.
Tests: replaced 1 coexistence test with 10 new tests covering priority
ordering, CLAUDE.md loading, case sensitivity, injection blocking.
2026-03-21 06:26:20 -07:00
|
|
|
content = candidate.read_text(encoding="utf-8").strip()
|
2026-02-21 22:31:43 -08:00
|
|
|
if content:
|
feat: priority-based context file selection + CLAUDE.md support (#2301)
Previously, all project context files (AGENTS.md, .cursorrules, .hermes.md)
were loaded and concatenated into the system prompt. This bloated the prompt
with potentially redundant or conflicting instructions.
Now only ONE project context type is loaded, using priority order:
1. .hermes.md / HERMES.md (walk to git root)
2. AGENTS.md / agents.md (recursive directory walk)
3. CLAUDE.md / claude.md (cwd only, NEW)
4. .cursorrules / .cursor/rules/*.mdc (cwd only)
SOUL.md from HERMES_HOME remains independent and always loads.
Also adds CLAUDE.md as a recognized context file format, matching the
convention popularized by Claude Code.
Refactored the monolithic function into four focused helpers:
_load_hermes_md, _load_agents_md, _load_claude_md, _load_cursorrules.
Tests: replaced 1 coexistence test with 10 new tests covering priority
ordering, CLAUDE.md loading, case sensitivity, injection blocking.
2026-03-21 06:26:20 -07:00
|
|
|
content = _scan_context_content(content, name)
|
|
|
|
|
result = f"## {name}\n\n{content}"
|
|
|
|
|
return _truncate_content(result, "CLAUDE.md")
|
2026-02-21 22:31:43 -08:00
|
|
|
except Exception as e:
|
feat: priority-based context file selection + CLAUDE.md support (#2301)
Previously, all project context files (AGENTS.md, .cursorrules, .hermes.md)
were loaded and concatenated into the system prompt. This bloated the prompt
with potentially redundant or conflicting instructions.
Now only ONE project context type is loaded, using priority order:
1. .hermes.md / HERMES.md (walk to git root)
2. AGENTS.md / agents.md (recursive directory walk)
3. CLAUDE.md / claude.md (cwd only, NEW)
4. .cursorrules / .cursor/rules/*.mdc (cwd only)
SOUL.md from HERMES_HOME remains independent and always loads.
Also adds CLAUDE.md as a recognized context file format, matching the
convention popularized by Claude Code.
Refactored the monolithic function into four focused helpers:
_load_hermes_md, _load_agents_md, _load_claude_md, _load_cursorrules.
Tests: replaced 1 coexistence test with 10 new tests covering priority
ordering, CLAUDE.md loading, case sensitivity, injection blocking.
2026-03-21 06:26:20 -07:00
|
|
|
logger.debug("Could not read %s: %s", candidate, e)
|
|
|
|
|
return ""
|
2026-02-21 22:31:43 -08:00
|
|
|
|
|
|
|
|
|
feat: priority-based context file selection + CLAUDE.md support (#2301)
Previously, all project context files (AGENTS.md, .cursorrules, .hermes.md)
were loaded and concatenated into the system prompt. This bloated the prompt
with potentially redundant or conflicting instructions.
Now only ONE project context type is loaded, using priority order:
1. .hermes.md / HERMES.md (walk to git root)
2. AGENTS.md / agents.md (recursive directory walk)
3. CLAUDE.md / claude.md (cwd only, NEW)
4. .cursorrules / .cursor/rules/*.mdc (cwd only)
SOUL.md from HERMES_HOME remains independent and always loads.
Also adds CLAUDE.md as a recognized context file format, matching the
convention popularized by Claude Code.
Refactored the monolithic function into four focused helpers:
_load_hermes_md, _load_agents_md, _load_claude_md, _load_cursorrules.
Tests: replaced 1 coexistence test with 10 new tests covering priority
ordering, CLAUDE.md loading, case sensitivity, injection blocking.
2026-03-21 06:26:20 -07:00
|
|
|
def _load_cursorrules(cwd_path: Path) -> str:
|
|
|
|
|
""".cursorrules + .cursor/rules/*.mdc — cwd only."""
|
2026-02-21 22:31:43 -08:00
|
|
|
cursorrules_content = ""
|
|
|
|
|
cursorrules_file = cwd_path / ".cursorrules"
|
|
|
|
|
if cursorrules_file.exists():
|
|
|
|
|
try:
|
|
|
|
|
content = cursorrules_file.read_text(encoding="utf-8").strip()
|
|
|
|
|
if content:
|
Harden agent attack surface: scan writes to memory, skills, cron, and context files
The security scanner (skills_guard.py) was only wired into the hub install path.
All other write paths to persistent state — skills created by the agent, memory
entries, cron prompts, and context files — bypassed it entirely. This closes
those gaps:
- file_operations: deny-list blocks writes to ~/.ssh, ~/.aws, ~/.hermes/.env, etc.
- code_execution_tool: filter secret env vars from sandbox child process
- skill_manager_tool: wire scan_skill() into create/edit/patch/write_file with rollback
- skills_guard: add "agent-created" trust level (same policy as community)
- memory_tool: scan content for injection/exfil before system prompt injection
- prompt_builder: scan AGENTS.md, .cursorrules, SOUL.md for prompt injection
- cronjob_tools: scan cron prompts for critical threats before scheduling
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 23:43:15 -05:00
|
|
|
content = _scan_context_content(content, ".cursorrules")
|
2026-02-21 22:31:43 -08:00
|
|
|
cursorrules_content += f"## .cursorrules\n\n{content}\n\n"
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug("Could not read .cursorrules: %s", e)
|
|
|
|
|
|
|
|
|
|
cursor_rules_dir = cwd_path / ".cursor" / "rules"
|
|
|
|
|
if cursor_rules_dir.exists() and cursor_rules_dir.is_dir():
|
|
|
|
|
mdc_files = sorted(cursor_rules_dir.glob("*.mdc"))
|
|
|
|
|
for mdc_file in mdc_files:
|
|
|
|
|
try:
|
|
|
|
|
content = mdc_file.read_text(encoding="utf-8").strip()
|
|
|
|
|
if content:
|
Harden agent attack surface: scan writes to memory, skills, cron, and context files
The security scanner (skills_guard.py) was only wired into the hub install path.
All other write paths to persistent state — skills created by the agent, memory
entries, cron prompts, and context files — bypassed it entirely. This closes
those gaps:
- file_operations: deny-list blocks writes to ~/.ssh, ~/.aws, ~/.hermes/.env, etc.
- code_execution_tool: filter secret env vars from sandbox child process
- skill_manager_tool: wire scan_skill() into create/edit/patch/write_file with rollback
- skills_guard: add "agent-created" trust level (same policy as community)
- memory_tool: scan content for injection/exfil before system prompt injection
- prompt_builder: scan AGENTS.md, .cursorrules, SOUL.md for prompt injection
- cronjob_tools: scan cron prompts for critical threats before scheduling
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 23:43:15 -05:00
|
|
|
content = _scan_context_content(content, f".cursor/rules/{mdc_file.name}")
|
2026-02-21 22:31:43 -08:00
|
|
|
cursorrules_content += f"## .cursor/rules/{mdc_file.name}\n\n{content}\n\n"
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug("Could not read %s: %s", mdc_file, e)
|
|
|
|
|
|
feat: priority-based context file selection + CLAUDE.md support (#2301)
Previously, all project context files (AGENTS.md, .cursorrules, .hermes.md)
were loaded and concatenated into the system prompt. This bloated the prompt
with potentially redundant or conflicting instructions.
Now only ONE project context type is loaded, using priority order:
1. .hermes.md / HERMES.md (walk to git root)
2. AGENTS.md / agents.md (recursive directory walk)
3. CLAUDE.md / claude.md (cwd only, NEW)
4. .cursorrules / .cursor/rules/*.mdc (cwd only)
SOUL.md from HERMES_HOME remains independent and always loads.
Also adds CLAUDE.md as a recognized context file format, matching the
convention popularized by Claude Code.
Refactored the monolithic function into four focused helpers:
_load_hermes_md, _load_agents_md, _load_claude_md, _load_cursorrules.
Tests: replaced 1 coexistence test with 10 new tests covering priority
ordering, CLAUDE.md loading, case sensitivity, injection blocking.
2026-03-21 06:26:20 -07:00
|
|
|
if not cursorrules_content:
|
|
|
|
|
return ""
|
|
|
|
|
return _truncate_content(cursorrules_content, ".cursorrules")
|
2026-02-21 22:31:43 -08:00
|
|
|
|
2026-03-17 04:16:32 -07:00
|
|
|
|
feat: priority-based context file selection + CLAUDE.md support (#2301)
Previously, all project context files (AGENTS.md, .cursorrules, .hermes.md)
were loaded and concatenated into the system prompt. This bloated the prompt
with potentially redundant or conflicting instructions.
Now only ONE project context type is loaded, using priority order:
1. .hermes.md / HERMES.md (walk to git root)
2. AGENTS.md / agents.md (recursive directory walk)
3. CLAUDE.md / claude.md (cwd only, NEW)
4. .cursorrules / .cursor/rules/*.mdc (cwd only)
SOUL.md from HERMES_HOME remains independent and always loads.
Also adds CLAUDE.md as a recognized context file format, matching the
convention popularized by Claude Code.
Refactored the monolithic function into four focused helpers:
_load_hermes_md, _load_agents_md, _load_claude_md, _load_cursorrules.
Tests: replaced 1 coexistence test with 10 new tests covering priority
ordering, CLAUDE.md loading, case sensitivity, injection blocking.
2026-03-21 06:26:20 -07:00
|
|
|
def build_context_files_prompt(cwd: Optional[str] = None, skip_soul: bool = False) -> str:
|
|
|
|
|
"""Discover and load context files for the system prompt.
|
|
|
|
|
|
|
|
|
|
Priority (first found wins — only ONE project context type is loaded):
|
|
|
|
|
1. .hermes.md / HERMES.md (walk to git root)
|
|
|
|
|
2. AGENTS.md / agents.md (recursive directory walk)
|
|
|
|
|
3. CLAUDE.md / claude.md (cwd only)
|
|
|
|
|
4. .cursorrules / .cursor/rules/*.mdc (cwd only)
|
|
|
|
|
|
|
|
|
|
SOUL.md from HERMES_HOME is independent and always included when present.
|
|
|
|
|
Each context source is capped at 20,000 chars.
|
|
|
|
|
|
|
|
|
|
When *skip_soul* is True, SOUL.md is not included here (it was already
|
|
|
|
|
loaded via ``load_soul_md()`` for the identity slot).
|
|
|
|
|
"""
|
|
|
|
|
if cwd is None:
|
|
|
|
|
cwd = os.getcwd()
|
|
|
|
|
|
|
|
|
|
cwd_path = Path(cwd).resolve()
|
|
|
|
|
sections = []
|
|
|
|
|
|
|
|
|
|
# Priority-based project context: first match wins
|
|
|
|
|
project_context = (
|
|
|
|
|
_load_hermes_md(cwd_path)
|
|
|
|
|
or _load_agents_md(cwd_path)
|
|
|
|
|
or _load_claude_md(cwd_path)
|
|
|
|
|
or _load_cursorrules(cwd_path)
|
|
|
|
|
)
|
|
|
|
|
if project_context:
|
|
|
|
|
sections.append(project_context)
|
2026-03-17 04:16:32 -07:00
|
|
|
|
2026-03-18 04:11:20 -07:00
|
|
|
# SOUL.md from HERMES_HOME only — skip when already loaded as identity
|
|
|
|
|
if not skip_soul:
|
|
|
|
|
soul_content = load_soul_md()
|
|
|
|
|
if soul_content:
|
|
|
|
|
sections.append(soul_content)
|
2026-02-21 22:31:43 -08:00
|
|
|
|
|
|
|
|
if not sections:
|
|
|
|
|
return ""
|
|
|
|
|
return "# Project Context\n\nThe following project context files have been loaded and should be followed:\n\n" + "\n".join(sections)
|