Files
hermes-agent/tests/test_evidence_store.py

187 lines
6.2 KiB
Python
Raw Normal View History

feat: add OSS Security Forensics skill (Skills Hub) (#1482) * feat: add OSS Security Forensics skill (Skills Hub) Salvaged from PR #1066 by zagiscoming. Adds a 7-phase multi-agent investigation framework for GitHub supply chain attack forensics. Skill contents (optional-skills/security/oss-forensics/): - SKILL.md: 420-line investigation framework with 8 anti-hallucination guardrails, 5 specialist investigators, ethical use guidelines, and API rate limiting guidance - evidence-store.py: CLI evidence manager with add/list/verify/query/ export/summary + SHA-256 integrity + chain of custody - references/: evidence types, GH Archive BigQuery guide (expanded with 12 event types and 6 query templates), recovery techniques (4 methods), investigation templates (5 attack patterns) - templates/: forensic report template (151 lines), malicious package report template Changes from original PR: - Dropped unrelated core tool changes (delegate_tool.py role parameter, AGENTS.md, README.md modifications) - Removed duplicate skills/security/oss-forensics/ placement - Fixed github-archive-guide.md (missing from optional-skills/, expanded from 33 to 160+ lines with all 12 event types and query templates) - Added ethical use guidelines and API rate limiting sections - Rewrote tests to match the v2 evidence store API (12 tests, all pass) Closes #384 * fix: use python3 and SKILL_DIR paths throughout oss-forensics skill - Replace all 'python' invocations with 'python3' for portability (Ubuntu doesn't ship 'python' by default) - Replace relative '../scripts/' and '../templates/' paths with SKILL_DIR/scripts/ and SKILL_DIR/templates/ convention - Add path convention note before Phase 0 explaining SKILL_DIR - Fix double --- separator (cosmetic) - Applies to SKILL.md, evidence-store.py docstring, recovery-techniques.md, and forensic-report.md template --------- Co-authored-by: zagiscoming <zagiscoming@users.noreply.github.com>
2026-03-15 21:59:53 -07:00
import os
import json
import pytest
from pathlib import Path
import importlib.util
# Load the hyphenated script name dynamically
repo_root = Path(__file__).parent.parent
script_path = repo_root / "optional-skills" / "security" / "oss-forensics" / "scripts" / "evidence-store.py"
spec = importlib.util.spec_from_file_location("evidence_store", str(script_path))
evidence_store = importlib.util.module_from_spec(spec)
spec.loader.exec_module(evidence_store)
EvidenceStore = evidence_store.EvidenceStore
def test_evidence_store_init(tmp_path):
store_file = tmp_path / "test_evidence.json"
store = EvidenceStore(str(store_file))
assert store.filepath == str(store_file)
assert len(store.data["evidence"]) == 0
assert "metadata" in store.data
assert store.data["metadata"]["version"] == "2.0"
assert "chain_of_custody" in store.data
def test_evidence_store_add(tmp_path):
store_file = tmp_path / "test_evidence.json"
store = EvidenceStore(str(store_file))
eid = store.add(
source="test_source",
content="test_content",
evidence_type="git",
actor="test_actor",
notes="test_notes",
)
assert eid == "EV-0001"
assert len(store.data["evidence"]) == 1
assert store.data["evidence"][0]["content"] == "test_content"
assert store.data["evidence"][0]["id"] == "EV-0001"
assert store.data["evidence"][0]["actor"] == "test_actor"
assert store.data["evidence"][0]["notes"] == "test_notes"
# Verify SHA-256 was computed
assert store.data["evidence"][0]["content_sha256"] is not None
assert len(store.data["evidence"][0]["content_sha256"]) == 64
def test_evidence_store_add_persists(tmp_path):
store_file = tmp_path / "test_evidence.json"
store = EvidenceStore(str(store_file))
store.add(source="s1", content="c1", evidence_type="git")
# Reload from disk
store2 = EvidenceStore(str(store_file))
assert len(store2.data["evidence"]) == 1
assert store2.data["evidence"][0]["id"] == "EV-0001"
def test_evidence_store_sequential_ids(tmp_path):
store_file = tmp_path / "test_evidence.json"
store = EvidenceStore(str(store_file))
eid1 = store.add(source="s1", content="c1", evidence_type="git")
eid2 = store.add(source="s2", content="c2", evidence_type="gh_api")
eid3 = store.add(source="s3", content="c3", evidence_type="ioc")
assert eid1 == "EV-0001"
assert eid2 == "EV-0002"
assert eid3 == "EV-0003"
def test_evidence_store_list(tmp_path):
store_file = tmp_path / "test_evidence.json"
store = EvidenceStore(str(store_file))
store.add(source="s1", content="c1", evidence_type="git", actor="a1")
store.add(source="s2", content="c2", evidence_type="gh_api", actor="a2")
all_evidence = store.list_evidence()
assert len(all_evidence) == 2
git_evidence = store.list_evidence(filter_type="git")
assert len(git_evidence) == 1
assert git_evidence[0]["actor"] == "a1"
actor_evidence = store.list_evidence(filter_actor="a2")
assert len(actor_evidence) == 1
assert actor_evidence[0]["type"] == "gh_api"
def test_evidence_store_verify_integrity(tmp_path):
store_file = tmp_path / "test_evidence.json"
store = EvidenceStore(str(store_file))
store.add(source="s1", content="c1", evidence_type="git")
assert len(store.verify_integrity()) == 0
# Manually corrupt the content to trigger a hash mismatch
store.data["evidence"][0]["content"] = "corrupted_content"
issues = store.verify_integrity()
assert len(issues) == 1
assert issues[0]["id"] == "EV-0001"
def test_evidence_store_query(tmp_path):
store_file = tmp_path / "test_evidence.json"
store = EvidenceStore(str(store_file))
store.add(source="github_api", content="malicious activity detected", evidence_type="gh_api")
store.add(source="manual", content="clean observation", evidence_type="manual")
results = store.query("malicious")
assert len(results) == 1
assert results[0]["source"] == "github_api"
# Query should be case-insensitive
results = store.query("MALICIOUS")
assert len(results) == 1
def test_evidence_store_query_searches_multiple_fields(tmp_path):
store_file = tmp_path / "test_evidence.json"
store = EvidenceStore(str(store_file))
store.add(source="git_fsck", content="dangling commit abc123", evidence_type="git", actor="attacker")
store.add(source="manual", content="clean", evidence_type="manual")
# Search by source
assert len(store.query("fsck")) == 1
# Search by actor
assert len(store.query("attacker")) == 1
# Search returns nothing for non-matching
assert len(store.query("nonexistent")) == 0
def test_evidence_store_chain_of_custody(tmp_path):
store_file = tmp_path / "test_evidence.json"
store = EvidenceStore(str(store_file))
store.add(source="s1", content="c1", evidence_type="git")
store.add(source="s2", content="c2", evidence_type="gh_api")
chain = store.data["chain_of_custody"]
assert len(chain) == 2
assert chain[0]["evidence_id"] == "EV-0001"
assert chain[0]["action"] == "add"
assert chain[1]["evidence_id"] == "EV-0002"
def test_evidence_store_export_markdown(tmp_path):
store_file = tmp_path / "test_evidence.json"
store = EvidenceStore(str(store_file))
store.add(source="git_log", content="suspicious commit", evidence_type="git", actor="actor1")
md = store.export_markdown()
assert "# Evidence Registry" in md
assert "EV-0001" in md
assert "Chain of Custody" in md
assert "actor1" in md
def test_evidence_store_summary(tmp_path):
store_file = tmp_path / "test_evidence.json"
store = EvidenceStore(str(store_file))
store.add(source="s1", content="c1", evidence_type="git", actor="a1")
store.add(source="s2", content="c2", evidence_type="git", actor="a2")
store.add(source="s3", content="c3", evidence_type="gh_api", actor="a1")
s = store.summary()
assert s["total"] == 3
assert s["by_type"]["git"] == 2
assert s["by_type"]["gh_api"] == 1
assert "a1" in s["unique_actors"]
assert "a2" in s["unique_actors"]
def test_evidence_store_corrupted_file(tmp_path):
store_file = tmp_path / "test_evidence.json"
store_file.write_text("NOT VALID JSON {{{")
with pytest.raises(SystemExit):
EvidenceStore(str(store_file))