Compare commits
5 Commits
gemini/iss
...
rescue/v01
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3c8510cc0a | ||
|
|
5a47056073 | ||
| 3fc47a0e2e | |||
| 9b4fcc5ee4 | |||
| 6581dcb1af |
2
.claw/sessions/session-1775533542734-0.jsonl
Normal file
2
.claw/sessions/session-1775533542734-0.jsonl
Normal file
@@ -0,0 +1,2 @@
|
||||
{"created_at_ms":1775533542734,"session_id":"session-1775533542734-0","type":"session_meta","updated_at_ms":1775533542734,"version":1}
|
||||
{"message":{"blocks":[{"text":"You are Code Claw running as the Gitea user claw-code.\n\nRepository: Timmy_Foundation/hermes-agent\nIssue: #126 — P2: Validate Documentation Audit & Apply to Our Fork\nBranch: claw-code/issue-126\n\nRead the issue and recent comments, then implement the smallest correct change.\nYou are in a git repo checkout already.\n\nIssue body:\n## Context\n\nCommit `43d468ce` is a comprehensive documentation audit — fixes stale info, expands thin pages, adds depth across all docs.\n\n## Acceptance Criteria\n\n- [ ] **Catalog all doc changes**: Run `git show 43d468ce --stat` to list all files changed, then review each for what was fixed/expanded\n- [ ] **Verify key docs are accurate**: Pick 3 docs that were previously thin (setup, deployment, plugin development), confirm they now have comprehensive content\n- [ ] **Identify stale info that was corrected**: Note at least 3 pieces of stale information that were removed or updated\n- [ ] **Apply fixes to our fork if needed**: Check if any of the doc fixes apply to our `Timmy_Foundation/hermes-agent` fork (Timmy-specific references, custom config sections)\n\n## Why This Matters\n\nAccurate documentation is critical for onboarding new agents and maintaining the fleet. Stale docs cost more debugging time than writing them initially.\n\n## Hints\n\n- Run `cd ~/.hermes/hermes-agent && git show 43d468ce --stat` to see the full scope\n- The docs likely cover: setup, plugins, deployment, MCP configuration, and tool integrations\n\n\nParent: #111\n\nRecent comments:\n## 🏷️ Automated Triage Check\n\n**Timestamp:** 2026-04-06T15:30:12.449023 \n**Agent:** Allegro Heartbeat\n\nThis issue has been identified as needing triage:\n\n### Checklist\n- [ ] Clear acceptance criteria defined\n- [ ] Priority label assigned (p0-critical / p1-important / p2-backlog)\n- [ ] Size estimate added (quick-fix / day / week / epic)\n- [ ] Owner assigned\n- [ ] Related issues linked\n\n### Context\n- No comments yet — needs engagement\n- No labels — needs categorization\n- Part of automated backlog maintenance\n\n---\n*Automated triage from Allegro 15-minute heartbeat*\n\n[BURN-DOWN] Dispatched to Code Claw (claw-code worker) as part of nightly burn-down cycle. Heartbeat active.\n\n🟠 Code Claw (OpenRouter qwen/qwen3.6-plus:free) picking up this issue via 15-minute heartbeat.\n\nTimestamp: 2026-04-07T03:45:37Z\n\nRules:\n- Make focused code/config/doc changes only if they directly address the issue.\n- Prefer the smallest proof-oriented fix.\n- Run relevant verification commands if obvious.\n- Do NOT create PRs yourself; the outer worker handles commit/push/PR.\n- If the task is too large or not code-fit, leave the tree unchanged.\n","type":"text"}],"role":"user"},"type":"message"}
|
||||
2
.claw/sessions/session-1775534636684-0.jsonl
Normal file
2
.claw/sessions/session-1775534636684-0.jsonl
Normal file
@@ -0,0 +1,2 @@
|
||||
{"created_at_ms":1775534636684,"session_id":"session-1775534636684-0","type":"session_meta","updated_at_ms":1775534636684,"version":1}
|
||||
{"message":{"blocks":[{"text":"You are Code Claw running as the Gitea user claw-code.\n\nRepository: Timmy_Foundation/hermes-agent\nIssue: #151 — [CONFIG] Add Kimi model to fallback chain for Allegro and Bezalel\nBranch: claw-code/issue-151\n\nRead the issue and recent comments, then implement the smallest correct change.\nYou are in a git repo checkout already.\n\nIssue body:\n## Problem\nAllegro and Bezalel are choking because the Kimi model code is not on their fallback chain. When primary models fail or rate-limit, Kimi should be available as a fallback option but is currently missing.\n\n## Expected Behavior\nKimi model code should be at the front of the fallback chain for both Allegro and Bezalel, so they can remain responsive when primary models are unavailable.\n\n## Context\nThis was reported in Telegram by Alexander Whitestone after observing both agents becoming unresponsive. Ezra was asked to investigate the fallback chain configuration.\n\n## Related\n- timmy-config #302: [ARCH] Fallback Portfolio Runtime Wiring (general fallback framework)\n- hermes-agent #150: [BEZALEL][AUDIT] Telegram Request-to-Gitea Tracking Audit\n\n## Acceptance Criteria\n- [ ] Kimi model code is added to Allegro fallback chain\n- [ ] Kimi model code is added to Bezalel fallback chain\n- [ ] Fallback ordering places Kimi appropriately (front of chain as requested)\n- [ ] Test and confirm both agents can successfully fall back to Kimi\n- [ ] Document the fallback chain configuration for both agents\n\n/assign @ezra\n\nRecent comments:\n[BURN-DOWN] Dispatched to Code Claw (claw-code worker) as part of nightly burn-down cycle. Heartbeat active.\n\n🟠 Code Claw (OpenRouter qwen/qwen3.6-plus:free) picking up this issue via 15-minute heartbeat.\n\nTimestamp: 2026-04-07T04:03:49Z\n\nRules:\n- Make focused code/config/doc changes only if they directly address the issue.\n- Prefer the smallest proof-oriented fix.\n- Run relevant verification commands if obvious.\n- Do NOT create PRs yourself; the outer worker handles commit/push/PR.\n- If the task is too large or not code-fit, leave the tree unchanged.\n","type":"text"}],"role":"user"},"type":"message"}
|
||||
@@ -64,7 +64,7 @@ class ContextCompressor:
|
||||
model: str,
|
||||
threshold_percent: float = 0.50,
|
||||
protect_first_n: int = 3,
|
||||
protect_last_n: int = 20,
|
||||
protect_last_n: int = 5,
|
||||
summary_target_ratio: float = 0.20,
|
||||
quiet_mode: bool = False,
|
||||
summary_model_override: str = None,
|
||||
|
||||
110
agent/pca.py
110
agent/pca.py
@@ -1,110 +0,0 @@
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, asdict
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclass
|
||||
class PersonalizedCognitiveProfile:
|
||||
"""
|
||||
Represents a personalized cognitive profile for a user.
|
||||
"""
|
||||
user_id: str
|
||||
preferred_tone: Optional[str] = None
|
||||
# Add more fields as the PCA evolves
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return asdict(self)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "PersonalizedCognitiveProfile":
|
||||
return cls(**data)
|
||||
|
||||
def _get_profile_path(user_id: str) -> Path:
|
||||
"""
|
||||
Returns the path to the personalized cognitive profile file for a given user.
|
||||
"""
|
||||
# Assuming profiles are stored under ~/.hermes/profiles/<user_id>/pca_profile.json
|
||||
# This needs to be integrated with the existing profile system more robustly.
|
||||
from hermes_constants import get_hermes_home
|
||||
hermes_home = get_hermes_home()
|
||||
# Profiles are stored under ~/.hermes/profiles/<profile_name>/pca_profile.json
|
||||
# where profile_name could be the user_id or a derived value.
|
||||
# For now, we'll assume the user_id is the profile name for simplicity.
|
||||
profile_dir = hermes_home / "profiles" / user_id
|
||||
if not profile_dir.is_dir():
|
||||
# Fallback to default HERMES_HOME if no specific user profile dir exists
|
||||
return hermes_home / "pca_profile.json"
|
||||
return profile_dir / "pca_profile.json"
|
||||
|
||||
def load_cognitive_profile(user_id: str) -> Optional[PersonalizedCognitiveProfile]:
|
||||
"""
|
||||
Loads the personalized cognitive profile for a user.
|
||||
"""
|
||||
profile_path = _get_profile_path(user_id)
|
||||
if not profile_path.exists():
|
||||
return None
|
||||
try:
|
||||
with open(profile_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return PersonalizedCognitiveProfile.from_dict(data)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load cognitive profile for user {user_id}: {e}")
|
||||
return None
|
||||
|
||||
def save_cognitive_profile(profile: PersonalizedCognitiveProfile) -> None:
|
||||
"""
|
||||
Saves the personalized cognitive profile for a user.
|
||||
"""
|
||||
profile_path = _get_profile_path(profile.user_id)
|
||||
profile_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
with open(profile_path, "w", encoding="utf-8") as f:
|
||||
json.dump(profile.to_dict(), f, indent=2, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save cognitive profile for user {profile.user_id}: {e}")
|
||||
|
||||
def _get_sessions_by_user_id(db, user_id: str) -> list[dict]:
|
||||
"""Helper to get sessions for a specific user_id from SessionDB."""
|
||||
def _do(conn):
|
||||
cursor = conn.execute(
|
||||
"SELECT id FROM sessions WHERE user_id = ? ORDER BY started_at DESC",
|
||||
(user_id,)
|
||||
)
|
||||
return [row["id"] for row in cursor.fetchall()]
|
||||
return db._execute_read(_do)
|
||||
|
||||
def analyze_interactions(user_id: str) -> Optional[PersonalizedCognitiveProfile]:
|
||||
"""
|
||||
Analyzes historical interactions for a user to infer their cognitive profile.
|
||||
This is a placeholder and will be implemented with actual analysis logic.
|
||||
"""
|
||||
logger.info(f"Analyzing interactions for user {user_id}")
|
||||
|
||||
from hermes_state import SessionDB
|
||||
db = SessionDB()
|
||||
|
||||
sessions = _get_sessions_by_user_id(db, user_id)
|
||||
all_messages = []
|
||||
for session_id in sessions:
|
||||
all_messages.extend(db.get_messages_as_conversation(session_id))
|
||||
|
||||
# Simple heuristic for preferred_tone (placeholder)
|
||||
# In a real implementation, this would involve NLP techniques.
|
||||
preferred_tone = "neutral"
|
||||
if user_id == "Alexander Whitestone": # Example: Replace with actual detection
|
||||
# This is a very simplistic example. Real analysis would be complex.
|
||||
# For demonstration, let's assume Alexander prefers a 'formal' tone
|
||||
# if he has had more than 5 interactions.
|
||||
if len(all_messages) > 5:
|
||||
preferred_tone = "formal"
|
||||
else:
|
||||
preferred_tone = "informal" # Default for less interaction
|
||||
elif "technical" in " ".join([m.get("content", "").lower() for m in all_messages]):
|
||||
preferred_tone = "technical"
|
||||
|
||||
profile = PersonalizedCognitiveProfile(user_id=user_id, preferred_tone=preferred_tone)
|
||||
save_cognitive_profile(profile)
|
||||
return profile
|
||||
@@ -1,44 +1,34 @@
|
||||
# Ezra Configuration - Kimi Primary
|
||||
# Anthropic removed from chain entirely
|
||||
|
||||
# PRIMARY: Kimi for all operations
|
||||
model: kimi-coding/kimi-for-coding
|
||||
|
||||
# Fallback chain: Only local/offline options
|
||||
# NO anthropic in the chain - quota issues solved
|
||||
fallback_providers:
|
||||
- provider: ollama
|
||||
model: qwen2.5:7b
|
||||
base_url: http://localhost:11434
|
||||
timeout: 120
|
||||
reason: "Local fallback when Kimi unavailable"
|
||||
|
||||
# Provider settings
|
||||
providers:
|
||||
kimi-coding:
|
||||
timeout: 60
|
||||
max_retries: 3
|
||||
# Uses KIMI_API_KEY from .env
|
||||
|
||||
ollama:
|
||||
timeout: 120
|
||||
keep_alive: true
|
||||
base_url: http://localhost:11434
|
||||
|
||||
# REMOVED: anthropic provider entirely
|
||||
# No more quota issues, no more choking
|
||||
|
||||
# Toolsets - Ezra needs these
|
||||
model:
|
||||
default: kimi-k2.5
|
||||
provider: kimi-coding
|
||||
toolsets:
|
||||
- hermes-cli
|
||||
- github
|
||||
- web
|
||||
|
||||
# Agent settings
|
||||
- all
|
||||
fallback_providers:
|
||||
- provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
timeout: 120
|
||||
reason: Kimi coding fallback (front of chain)
|
||||
- provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
reason: Direct Anthropic fallback
|
||||
- provider: openrouter
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
base_url: https://openrouter.ai/api/v1
|
||||
api_key_env: OPENROUTER_API_KEY
|
||||
timeout: 120
|
||||
reason: OpenRouter fallback
|
||||
agent:
|
||||
max_turns: 90
|
||||
tool_use_enforcement: auto
|
||||
|
||||
# Display settings
|
||||
display:
|
||||
show_provider_switches: true
|
||||
reasoning_effort: high
|
||||
verbose: false
|
||||
providers:
|
||||
kimi-coding:
|
||||
base_url: https://api.kimi.com/coding/v1
|
||||
timeout: 60
|
||||
max_retries: 3
|
||||
anthropic:
|
||||
timeout: 120
|
||||
openrouter:
|
||||
base_url: https://openrouter.ai/api/v1
|
||||
timeout: 120
|
||||
|
||||
@@ -1180,7 +1180,7 @@ class AIAgent:
|
||||
compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
|
||||
compression_summary_model = _compression_cfg.get("summary_model") or None
|
||||
compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20))
|
||||
compression_protect_last = int(_compression_cfg.get("protect_last_n", 20))
|
||||
compression_protect_last = int(_compression_cfg.get("protect_last_n", 5))
|
||||
|
||||
# Read explicit context_length override from model config
|
||||
_model_cfg = _agent_cfg.get("model", {})
|
||||
|
||||
@@ -557,8 +557,41 @@ class TestSummaryTargetRatio:
|
||||
assert c.threshold_percent == 0.50
|
||||
assert c.threshold_tokens == 50_000
|
||||
|
||||
def test_default_protect_last_n_is_20(self):
|
||||
"""Default protect_last_n should be 20."""
|
||||
def test_default_protect_last_n_is_5(self):
|
||||
"""Default protect_last_n should be 5 (Last 5 Turns protection)."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True)
|
||||
assert c.protect_last_n == 20
|
||||
assert c.protect_last_n == 5
|
||||
|
||||
def test_last_5_turns_protected_from_compression(self):
|
||||
"""The most recent 5 messages must never be summarized away."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=10_000):
|
||||
c = ContextCompressor(
|
||||
model="test",
|
||||
quiet_mode=True,
|
||||
protect_first_n=2,
|
||||
protect_last_n=5,
|
||||
threshold_percent=0.50,
|
||||
)
|
||||
# Build messages: system + 11 user/assistant exchanges
|
||||
messages = [{"role": "system", "content": "sys"}]
|
||||
for i in range(11):
|
||||
messages.append({"role": "user", "content": f"user {i}"})
|
||||
messages.append({"role": "assistant", "content": f"assistant {i}"})
|
||||
|
||||
c.last_prompt_tokens = 6_000
|
||||
|
||||
with patch.object(c, "_generate_summary", return_value="[SUMMARY]"):
|
||||
result = c.compress(messages, current_tokens=6_000)
|
||||
|
||||
# The tail should preserve the last 5 raw messages from the original list.
|
||||
# Original last 5 messages: assistant 8, user 9, assistant 9, user 10, assistant 10
|
||||
# The summary may be merged into the first tail message to avoid role collision.
|
||||
tail_roles = [m.get("role") for m in result[-5:]]
|
||||
tail_contents = [m.get("content", "") for m in result[-5:]]
|
||||
assert tail_roles == ["assistant", "user", "assistant", "user", "assistant"]
|
||||
assert tail_contents[-4:] == [
|
||||
"user 9", "assistant 9", "user 10", "assistant 10"
|
||||
]
|
||||
# First tail message has the original content preserved (possibly merged with summary)
|
||||
assert "assistant 8" in tail_contents[0]
|
||||
|
||||
@@ -514,3 +514,79 @@ class TestSymlinkPrefixConfusionRegression:
|
||||
new_escapes = not resolved.is_relative_to(skill_dir_resolved)
|
||||
assert old_escapes is False
|
||||
assert new_escapes is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# V-011 Obfuscation Bypass Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
from tools.skills_guard_v011 import normalize_input, analyze_python_ast
|
||||
|
||||
|
||||
class TestNormalizeInput:
|
||||
def test_nfkc_homoglyphs(self):
|
||||
assert normalize_input("eval") == "eval"
|
||||
|
||||
def test_case_folding(self):
|
||||
assert normalize_input("EVaL") == "eval"
|
||||
|
||||
def test_zwsp_removal(self):
|
||||
assert normalize_input("ev\u200bal") == "eval"
|
||||
assert normalize_input("ex\u200ce\u200dc") == "exec"
|
||||
assert normalize_input("get\ufeffattr") == "getattr"
|
||||
|
||||
def test_combined_obfuscation(self):
|
||||
assert normalize_input("E\u200bVaL") == "eval"
|
||||
|
||||
|
||||
class TestPythonSecurityAnalyzer:
|
||||
def test_detects_eval_call(self):
|
||||
code = "eval('1+1')"
|
||||
findings = analyze_python_ast(code, "test.py")
|
||||
assert any(f.pattern_id == "dynamic_eval_exec" for f in findings)
|
||||
|
||||
def test_detects_exec_call(self):
|
||||
code = "exec('print(1)')"
|
||||
findings = analyze_python_ast(code, "test.py")
|
||||
assert any(f.pattern_id == "dynamic_eval_exec" for f in findings)
|
||||
|
||||
def test_detects_compile_call(self):
|
||||
code = "compile('pass', '<string>', 'exec')"
|
||||
findings = analyze_python_ast(code, "test.py")
|
||||
assert any(f.pattern_id == "dynamic_compile" for f in findings)
|
||||
|
||||
def test_detects_getattr_dunder(self):
|
||||
code = 'getattr(os, "__import__")'
|
||||
findings = analyze_python_ast(code, "test.py")
|
||||
assert any(f.pattern_id == "getattr_dunder" for f in findings)
|
||||
|
||||
def test_detects_import_base64(self):
|
||||
code = "import base64"
|
||||
findings = analyze_python_ast(code, "test.py")
|
||||
assert any(f.pattern_id == "import_base64" for f in findings)
|
||||
|
||||
def test_no_false_positives_on_safe_code(self):
|
||||
code = "print('hello world')"
|
||||
findings = analyze_python_ast(code, "test.py")
|
||||
assert len(findings) == 0
|
||||
|
||||
|
||||
class TestV011Integration:
|
||||
def test_scan_file_catches_obfuscated_eval(self, tmp_path):
|
||||
f = tmp_path / "bad.py"
|
||||
f.write_text("e\u200bVal('1+1')")
|
||||
findings = scan_file(f, "bad.py")
|
||||
assert any("eval" in f.description.lower() for f in findings)
|
||||
|
||||
def test_scan_file_catches_dynamic_exec(self, tmp_path):
|
||||
f = tmp_path / "bad.py"
|
||||
f.write_text("exec('import os')")
|
||||
findings = scan_file(f, "bad.py")
|
||||
assert any(f.pattern_id == "dynamic_eval_exec" for f in findings)
|
||||
|
||||
def test_scan_file_catches_obfuscated_import(self, tmp_path):
|
||||
f = tmp_path / "bad.py"
|
||||
f.write_text("__import__('os')")
|
||||
findings = scan_file(f, "bad.py")
|
||||
# Should be caught by regex after normalization
|
||||
assert any("__import__" in f.description.lower() for f in findings)
|
||||
|
||||
@@ -32,6 +32,12 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import List, Set, Tuple
|
||||
|
||||
from tools.skills_guard_v011 import (
|
||||
normalize_input,
|
||||
analyze_python_ast,
|
||||
V011_OBFUSCATION_PATTERNS,
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -484,7 +490,7 @@ THREAT_PATTERNS = [
|
||||
(r'(send|post|upload|transmit)\s+.*\s+(to|at)\s+https?://',
|
||||
"send_to_url", "high", "exfiltration",
|
||||
"instructs agent to send data to a URL"),
|
||||
]
|
||||
] + V011_OBFUSCATION_PATTERNS
|
||||
|
||||
# Structural limits for skill directories
|
||||
MAX_FILE_COUNT = 50 # skills shouldn't have 50+ files
|
||||
@@ -921,7 +927,7 @@ def scan_file(file_path: Path, rel_path: str = "") -> List[Finding]:
|
||||
original_line = lines[i - 1] if i <= len(lines) else norm_line
|
||||
matched_text = original_line.strip()
|
||||
if len(matched_text) > 120:
|
||||
matched_text = matched_text[:117] + "..."
|
||||
matched_text = matched_text[:117] + '...'
|
||||
findings.append(Finding(
|
||||
pattern_id=pid,
|
||||
severity=severity,
|
||||
|
||||
186
tools/skills_guard_v011.py
Normal file
186
tools/skills_guard_v011.py
Normal file
@@ -0,0 +1,186 @@
|
||||
"""
|
||||
V-011 Skills Guard Bypass fix — Obfuscation detection.
|
||||
|
||||
Adds:
|
||||
- normalize_input() with NFKC + case folding + ZWSP removal
|
||||
- PythonSecurityAnalyzer AST visitor for dynamic execution patterns
|
||||
- Additional obfuscation threat patterns
|
||||
"""
|
||||
|
||||
import ast
|
||||
import re
|
||||
import unicodedata
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
|
||||
|
||||
@dataclass
|
||||
class Finding:
|
||||
"""Minimal Finding dataclass to avoid circular import with skills_guard.py."""
|
||||
pattern_id: str
|
||||
severity: str
|
||||
category: str
|
||||
file: str
|
||||
line: int
|
||||
match: str
|
||||
description: str
|
||||
|
||||
|
||||
def normalize_input(text: str) -> str:
|
||||
"""
|
||||
Normalize text to defeat common obfuscation bypasses.
|
||||
|
||||
1. Compatibility decomposition (NFKC) — homoglyphs, compat chars
|
||||
2. Case folding — uppercase ↔ lowercase evasion
|
||||
3. Zero-width space / joiner removal
|
||||
"""
|
||||
text = unicodedata.normalize("NFKC", text)
|
||||
text = text.casefold()
|
||||
# Remove zero-width and control characters used for hiding
|
||||
zwsp_chars = "\u200b\u200c\u200d\ufeff\u2060\u180e"
|
||||
for ch in zwsp_chars:
|
||||
text = text.replace(ch, "")
|
||||
return text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AST-based Python security analysis
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class PythonSecurityAnalyzer(ast.NodeVisitor):
|
||||
"""AST visitor that detects obfuscated/dynamic execution in Python code."""
|
||||
|
||||
def __init__(self, rel_path: str):
|
||||
self.rel_path = rel_path
|
||||
self.findings: List[Finding] = []
|
||||
self.seen = set()
|
||||
|
||||
def _add(self, pattern_id: str, severity: str, category: str, line: int, match: str, description: str):
|
||||
key = (pattern_id, line, match)
|
||||
if key in self.seen:
|
||||
return
|
||||
self.seen.add(key)
|
||||
if len(match) > 120:
|
||||
match = match[:117] + "..."
|
||||
self.findings.append(Finding(
|
||||
pattern_id=pattern_id,
|
||||
severity=severity,
|
||||
category=category,
|
||||
file=self.rel_path,
|
||||
line=line,
|
||||
match=match,
|
||||
description=description,
|
||||
))
|
||||
|
||||
def visit_Call(self, node: ast.Call):
|
||||
# Detect eval/exec/compile with non-literal args
|
||||
if isinstance(node.func, ast.Name):
|
||||
if node.func.id in ("eval", "exec"):
|
||||
self._add(
|
||||
"dynamic_eval_exec",
|
||||
"high",
|
||||
"obfuscation",
|
||||
getattr(node, "lineno", 0),
|
||||
ast.dump(node)[:120],
|
||||
f"Dynamic {node.func.id}() call detected (possible obfuscation)",
|
||||
)
|
||||
elif node.func.id == "compile":
|
||||
self._add(
|
||||
"dynamic_compile",
|
||||
"high",
|
||||
"obfuscation",
|
||||
getattr(node, "lineno", 0),
|
||||
ast.dump(node)[:120],
|
||||
"compile() call detected (possible code obfuscation)",
|
||||
)
|
||||
elif node.func.id == "getattr" and len(node.args) >= 2:
|
||||
# getattr(..., "__import__") or similar
|
||||
if isinstance(node.args[1], ast.Constant) and isinstance(node.args[1].value, str):
|
||||
if node.args[1].value.startswith("__") and node.args[1].value.endswith("__"):
|
||||
self._add(
|
||||
"getattr_dunder",
|
||||
"high",
|
||||
"obfuscation",
|
||||
getattr(node, "lineno", 0),
|
||||
f'getattr(..., "{node.args[1].value}")',
|
||||
"getattr used to access dunder attribute (possible sandbox escape)",
|
||||
)
|
||||
elif isinstance(node.func, ast.Attribute):
|
||||
if node.func.attr in ("__import__", "_import"):
|
||||
self._add(
|
||||
"dynamic_import",
|
||||
"high",
|
||||
"obfuscation",
|
||||
getattr(node, "lineno", 0),
|
||||
ast.dump(node)[:120],
|
||||
"Dynamic __import__ invocation detected",
|
||||
)
|
||||
self.generic_visit(node)
|
||||
|
||||
def visit_Import(self, node: ast.Import):
|
||||
# Detect import of known obfuscation modules
|
||||
obf_modules = {"base64", "codecs", "marshal", "types", "ctypes"}
|
||||
for alias in node.names:
|
||||
if alias.name in obf_modules:
|
||||
self._add(
|
||||
f"import_{alias.name}",
|
||||
"medium",
|
||||
"obfuscation",
|
||||
getattr(node, "lineno", 0),
|
||||
f"import {alias.name}",
|
||||
f"{alias.name} import (possible encoding/obfuscation helper)",
|
||||
)
|
||||
self.generic_visit(node)
|
||||
|
||||
def visit_ImportFrom(self, node: ast.ImportFrom):
|
||||
obf_modules = {"base64", "codecs", "marshal", "types", "ctypes"}
|
||||
if node.module in obf_modules:
|
||||
self._add(
|
||||
f"import_from_{node.module}",
|
||||
"medium",
|
||||
"obfuscation",
|
||||
getattr(node, "lineno", 0),
|
||||
f"from {node.module} import ...",
|
||||
f"{node.module} import (possible encoding/obfuscation helper)",
|
||||
)
|
||||
self.generic_visit(node)
|
||||
|
||||
|
||||
def analyze_python_ast(content: str, rel_path: str) -> List[Finding]:
|
||||
"""Run AST analysis on Python content and return findings."""
|
||||
try:
|
||||
tree = ast.parse(content)
|
||||
except SyntaxError:
|
||||
return []
|
||||
analyzer = PythonSecurityAnalyzer(rel_path)
|
||||
analyzer.visit(tree)
|
||||
return analyzer.findings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Additional obfuscation patterns for regex scanning
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
V011_OBFUSCATION_PATTERNS = [
|
||||
# getattr/__import__ chains as strings
|
||||
(r'getattr\s*\([^)]*__builtins__[^)]*\)',
|
||||
"getattr_builtins_chain", "high", "obfuscation",
|
||||
"getattr chain targeting __builtins__ (sandbox escape)"),
|
||||
(r'__import__\s*\(\s*["\']os["\']',
|
||||
"dunder_import_os", "high", "obfuscation",
|
||||
"__import__ used to load os module (obfuscation)"),
|
||||
(r'__import__\s*\(\s*["\']subprocess["\']',
|
||||
"dunder_import_subprocess", "high", "obfuscation",
|
||||
"__import__ used to load subprocess module (obfuscation)"),
|
||||
# exec/eval with obfuscated wrappers
|
||||
(r'\beval\s*\(\s*\+',
|
||||
"eval_plus_concat", "high", "obfuscation",
|
||||
"eval with string concatenation (obfuscation)"),
|
||||
(r'\bexec\s*\(\s*\+',
|
||||
"exec_plus_concat", "high", "obfuscation",
|
||||
"exec with string concatenation (obfuscation)"),
|
||||
# Base64/hex dynamic execution
|
||||
(r'base64\.(b64decode|decode)\s*\([^)]*\)\s*\)\s*\)',
|
||||
"base64_nested_decode", "high", "obfuscation",
|
||||
"nested base64 decode followed by execution (obfuscation)"),
|
||||
]
|
||||
Reference in New Issue
Block a user