tests/test_evidence_store.py

import os
import json
import pytest
from pathlib import Path
import importlib.util

# Load the hyphenated script name dynamically
repo_root = Path(__file__).parent.parent
script_path = repo_root / "optional-skills" / "security" / "oss-forensics" / "scripts" / "evidence-store.py"

spec = importlib.util.spec_from_file_location("evidence_store", str(script_path))
evidence_store = importlib.util.module_from_spec(spec)
spec.loader.exec_module(evidence_store)
EvidenceStore = evidence_store.EvidenceStore


def test_evidence_store_init(tmp_path):
    store_file = tmp_path / "test_evidence.json"
    store = EvidenceStore(str(store_file))
    assert store.filepath == str(store_file)
    assert len(store.data["evidence"]) == 0
    assert "metadata" in store.data
    assert store.data["metadata"]["version"] == "2.0"
    assert "chain_of_custody" in store.data


def test_evidence_store_add(tmp_path):
    store_file = tmp_path / "test_evidence.json"
    store = EvidenceStore(str(store_file))

    eid = store.add(
        source="test_source",
        content="test_content",
        evidence_type="git",
        actor="test_actor",
        notes="test_notes",
    )

    assert eid == "EV-0001"
    assert len(store.data["evidence"]) == 1
    assert store.data["evidence"][0]["content"] == "test_content"
    assert store.data["evidence"][0]["id"] == "EV-0001"
    assert store.data["evidence"][0]["actor"] == "test_actor"
    assert store.data["evidence"][0]["notes"] == "test_notes"
    # Verify SHA-256 was computed
    assert store.data["evidence"][0]["content_sha256"] is not None
    assert len(store.data["evidence"][0]["content_sha256"]) == 64


def test_evidence_store_add_persists(tmp_path):
    store_file = tmp_path / "test_evidence.json"
    store = EvidenceStore(str(store_file))
    store.add(source="s1", content="c1", evidence_type="git")

    # Reload from disk
    store2 = EvidenceStore(str(store_file))
    assert len(store2.data["evidence"]) == 1
    assert store2.data["evidence"][0]["id"] == "EV-0001"


def test_evidence_store_sequential_ids(tmp_path):
    store_file = tmp_path / "test_evidence.json"
    store = EvidenceStore(str(store_file))

    eid1 = store.add(source="s1", content="c1", evidence_type="git")
    eid2 = store.add(source="s2", content="c2", evidence_type="gh_api")
    eid3 = store.add(source="s3", content="c3", evidence_type="ioc")

    assert eid1 == "EV-0001"
    assert eid2 == "EV-0002"
    assert eid3 == "EV-0003"


def test_evidence_store_list(tmp_path):
    store_file = tmp_path / "test_evidence.json"
    store = EvidenceStore(str(store_file))

    store.add(source="s1", content="c1", evidence_type="git", actor="a1")
    store.add(source="s2", content="c2", evidence_type="gh_api", actor="a2")

    all_evidence = store.list_evidence()
    assert len(all_evidence) == 2

    git_evidence = store.list_evidence(filter_type="git")
    assert len(git_evidence) == 1
    assert git_evidence[0]["actor"] == "a1"

    actor_evidence = store.list_evidence(filter_actor="a2")
    assert len(actor_evidence) == 1
    assert actor_evidence[0]["type"] == "gh_api"


def test_evidence_store_verify_integrity(tmp_path):
    store_file = tmp_path / "test_evidence.json"
    store = EvidenceStore(str(store_file))

    store.add(source="s1", content="c1", evidence_type="git")
    assert len(store.verify_integrity()) == 0

    # Manually corrupt the content to trigger a hash mismatch
    store.data["evidence"][0]["content"] = "corrupted_content"
    issues = store.verify_integrity()
    assert len(issues) == 1
    assert issues[0]["id"] == "EV-0001"


def test_evidence_store_query(tmp_path):
    store_file = tmp_path / "test_evidence.json"
    store = EvidenceStore(str(store_file))

    store.add(source="github_api", content="malicious activity detected", evidence_type="gh_api")
    store.add(source="manual", content="clean observation", evidence_type="manual")

    results = store.query("malicious")
    assert len(results) == 1
    assert results[0]["source"] == "github_api"

    # Query should be case-insensitive
    results = store.query("MALICIOUS")
    assert len(results) == 1


def test_evidence_store_query_searches_multiple_fields(tmp_path):
    store_file = tmp_path / "test_evidence.json"
    store = EvidenceStore(str(store_file))

    store.add(source="git_fsck", content="dangling commit abc123", evidence_type="git", actor="attacker")
    store.add(source="manual", content="clean", evidence_type="manual")

    # Search by source
    assert len(store.query("fsck")) == 1
    # Search by actor
    assert len(store.query("attacker")) == 1
    # Search returns nothing for non-matching
    assert len(store.query("nonexistent")) == 0


def test_evidence_store_chain_of_custody(tmp_path):
    store_file = tmp_path / "test_evidence.json"
    store = EvidenceStore(str(store_file))

    store.add(source="s1", content="c1", evidence_type="git")
    store.add(source="s2", content="c2", evidence_type="gh_api")

    chain = store.data["chain_of_custody"]
    assert len(chain) == 2
    assert chain[0]["evidence_id"] == "EV-0001"
    assert chain[0]["action"] == "add"
    assert chain[1]["evidence_id"] == "EV-0002"


def test_evidence_store_export_markdown(tmp_path):
    store_file = tmp_path / "test_evidence.json"
    store = EvidenceStore(str(store_file))

    store.add(source="git_log", content="suspicious commit", evidence_type="git", actor="actor1")

    md = store.export_markdown()
    assert "# Evidence Registry" in md
    assert "EV-0001" in md
    assert "Chain of Custody" in md
    assert "actor1" in md


def test_evidence_store_summary(tmp_path):
    store_file = tmp_path / "test_evidence.json"
    store = EvidenceStore(str(store_file))

    store.add(source="s1", content="c1", evidence_type="git", actor="a1")
    store.add(source="s2", content="c2", evidence_type="git", actor="a2")
    store.add(source="s3", content="c3", evidence_type="gh_api", actor="a1")

    s = store.summary()
    assert s["total"] == 3
    assert s["by_type"]["git"] == 2
    assert s["by_type"]["gh_api"] == 1
    assert "a1" in s["unique_actors"]
    assert "a2" in s["unique_actors"]


def test_evidence_store_corrupted_file(tmp_path):
    store_file = tmp_path / "test_evidence.json"
    store_file.write_text("NOT VALID JSON {{{")

    with pytest.raises(SystemExit):
        EvidenceStore(str(store_file))
feat: add OSS Security Forensics skill (Skills Hub) (#1482) * feat: add OSS Security Forensics skill (Skills Hub) Salvaged from PR #1066 by zagiscoming. Adds a 7-phase multi-agent investigation framework for GitHub supply chain attack forensics. Skill contents (optional-skills/security/oss-forensics/): - SKILL.md: 420-line investigation framework with 8 anti-hallucination guardrails, 5 specialist investigators, ethical use guidelines, and API rate limiting guidance - evidence-store.py: CLI evidence manager with add/list/verify/query/ export/summary + SHA-256 integrity + chain of custody - references/: evidence types, GH Archive BigQuery guide (expanded with 12 event types and 6 query templates), recovery techniques (4 methods), investigation templates (5 attack patterns) - templates/: forensic report template (151 lines), malicious package report template Changes from original PR: - Dropped unrelated core tool changes (delegate_tool.py role parameter, AGENTS.md, README.md modifications) - Removed duplicate skills/security/oss-forensics/ placement - Fixed github-archive-guide.md (missing from optional-skills/, expanded from 33 to 160+ lines with all 12 event types and query templates) - Added ethical use guidelines and API rate limiting sections - Rewrote tests to match the v2 evidence store API (12 tests, all pass) Closes #384 * fix: use python3 and SKILL_DIR paths throughout oss-forensics skill - Replace all 'python' invocations with 'python3' for portability (Ubuntu doesn't ship 'python' by default) - Replace relative '../scripts/' and '../templates/' paths with SKILL_DIR/scripts/ and SKILL_DIR/templates/ convention - Add path convention note before Phase 0 explaining SKILL_DIR - Fix double --- separator (cosmetic) - Applies to SKILL.md, evidence-store.py docstring, recovery-techniques.md, and forensic-report.md template --------- Co-authored-by: zagiscoming <zagiscoming@users.noreply.github.com> 2026-03-15 21:59:53 -07:00			`import os`
			`import json`
			`import pytest`
			`from pathlib import Path`
			`import importlib.util`

			`# Load the hyphenated script name dynamically`
			`repo_root = Path(__file__).parent.parent`
			`script_path = repo_root / "optional-skills" / "security" / "oss-forensics" / "scripts" / "evidence-store.py"`

			`spec = importlib.util.spec_from_file_location("evidence_store", str(script_path))`
			`evidence_store = importlib.util.module_from_spec(spec)`
			`spec.loader.exec_module(evidence_store)`
			`EvidenceStore = evidence_store.EvidenceStore`


			`def test_evidence_store_init(tmp_path):`
			`store_file = tmp_path / "test_evidence.json"`
			`store = EvidenceStore(str(store_file))`
			`assert store.filepath == str(store_file)`
			`assert len(store.data["evidence"]) == 0`
			`assert "metadata" in store.data`
			`assert store.data["metadata"]["version"] == "2.0"`
			`assert "chain_of_custody" in store.data`


			`def test_evidence_store_add(tmp_path):`
			`store_file = tmp_path / "test_evidence.json"`
			`store = EvidenceStore(str(store_file))`

			`eid = store.add(`
			`source="test_source",`
			`content="test_content",`
			`evidence_type="git",`
			`actor="test_actor",`
			`notes="test_notes",`
			`)`

			`assert eid == "EV-0001"`
			`assert len(store.data["evidence"]) == 1`
			`assert store.data["evidence"][0]["content"] == "test_content"`
			`assert store.data["evidence"][0]["id"] == "EV-0001"`
			`assert store.data["evidence"][0]["actor"] == "test_actor"`
			`assert store.data["evidence"][0]["notes"] == "test_notes"`
			`# Verify SHA-256 was computed`
			`assert store.data["evidence"][0]["content_sha256"] is not None`
			`assert len(store.data["evidence"][0]["content_sha256"]) == 64`


			`def test_evidence_store_add_persists(tmp_path):`
			`store_file = tmp_path / "test_evidence.json"`
			`store = EvidenceStore(str(store_file))`
			`store.add(source="s1", content="c1", evidence_type="git")`

			`# Reload from disk`
			`store2 = EvidenceStore(str(store_file))`
			`assert len(store2.data["evidence"]) == 1`
			`assert store2.data["evidence"][0]["id"] == "EV-0001"`


			`def test_evidence_store_sequential_ids(tmp_path):`
			`store_file = tmp_path / "test_evidence.json"`
			`store = EvidenceStore(str(store_file))`

			`eid1 = store.add(source="s1", content="c1", evidence_type="git")`
			`eid2 = store.add(source="s2", content="c2", evidence_type="gh_api")`
			`eid3 = store.add(source="s3", content="c3", evidence_type="ioc")`

			`assert eid1 == "EV-0001"`
			`assert eid2 == "EV-0002"`
			`assert eid3 == "EV-0003"`


			`def test_evidence_store_list(tmp_path):`
			`store_file = tmp_path / "test_evidence.json"`
			`store = EvidenceStore(str(store_file))`

			`store.add(source="s1", content="c1", evidence_type="git", actor="a1")`
			`store.add(source="s2", content="c2", evidence_type="gh_api", actor="a2")`

			`all_evidence = store.list_evidence()`
			`assert len(all_evidence) == 2`

			`git_evidence = store.list_evidence(filter_type="git")`
			`assert len(git_evidence) == 1`
			`assert git_evidence[0]["actor"] == "a1"`

			`actor_evidence = store.list_evidence(filter_actor="a2")`
			`assert len(actor_evidence) == 1`
			`assert actor_evidence[0]["type"] == "gh_api"`


			`def test_evidence_store_verify_integrity(tmp_path):`
			`store_file = tmp_path / "test_evidence.json"`
			`store = EvidenceStore(str(store_file))`

			`store.add(source="s1", content="c1", evidence_type="git")`
			`assert len(store.verify_integrity()) == 0`

			`# Manually corrupt the content to trigger a hash mismatch`
			`store.data["evidence"][0]["content"] = "corrupted_content"`
			`issues = store.verify_integrity()`
			`assert len(issues) == 1`
			`assert issues[0]["id"] == "EV-0001"`


			`def test_evidence_store_query(tmp_path):`
			`store_file = tmp_path / "test_evidence.json"`
			`store = EvidenceStore(str(store_file))`

			`store.add(source="github_api", content="malicious activity detected", evidence_type="gh_api")`
			`store.add(source="manual", content="clean observation", evidence_type="manual")`

			`results = store.query("malicious")`
			`assert len(results) == 1`
			`assert results[0]["source"] == "github_api"`

			`# Query should be case-insensitive`
			`results = store.query("MALICIOUS")`
			`assert len(results) == 1`


			`def test_evidence_store_query_searches_multiple_fields(tmp_path):`
			`store_file = tmp_path / "test_evidence.json"`
			`store = EvidenceStore(str(store_file))`

			`store.add(source="git_fsck", content="dangling commit abc123", evidence_type="git", actor="attacker")`
			`store.add(source="manual", content="clean", evidence_type="manual")`

			`# Search by source`
			`assert len(store.query("fsck")) == 1`
			`# Search by actor`
			`assert len(store.query("attacker")) == 1`
			`# Search returns nothing for non-matching`
			`assert len(store.query("nonexistent")) == 0`


			`def test_evidence_store_chain_of_custody(tmp_path):`
			`store_file = tmp_path / "test_evidence.json"`
			`store = EvidenceStore(str(store_file))`

			`store.add(source="s1", content="c1", evidence_type="git")`
			`store.add(source="s2", content="c2", evidence_type="gh_api")`

			`chain = store.data["chain_of_custody"]`
			`assert len(chain) == 2`
			`assert chain[0]["evidence_id"] == "EV-0001"`
			`assert chain[0]["action"] == "add"`
			`assert chain[1]["evidence_id"] == "EV-0002"`


			`def test_evidence_store_export_markdown(tmp_path):`
			`store_file = tmp_path / "test_evidence.json"`
			`store = EvidenceStore(str(store_file))`

			`store.add(source="git_log", content="suspicious commit", evidence_type="git", actor="actor1")`

			`md = store.export_markdown()`
			`assert "# Evidence Registry" in md`
			`assert "EV-0001" in md`
			`assert "Chain of Custody" in md`
			`assert "actor1" in md`


			`def test_evidence_store_summary(tmp_path):`
			`store_file = tmp_path / "test_evidence.json"`
			`store = EvidenceStore(str(store_file))`

			`store.add(source="s1", content="c1", evidence_type="git", actor="a1")`
			`store.add(source="s2", content="c2", evidence_type="git", actor="a2")`
			`store.add(source="s3", content="c3", evidence_type="gh_api", actor="a1")`

			`s = store.summary()`
			`assert s["total"] == 3`
			`assert s["by_type"]["git"] == 2`
			`assert s["by_type"]["gh_api"] == 1`
			`assert "a1" in s["unique_actors"]`
			`assert "a2" in s["unique_actors"]`


			`def test_evidence_store_corrupted_file(tmp_path):`
			`store_file = tmp_path / "test_evidence.json"`
			`store_file.write_text("NOT VALID JSON {{{")`

			`with pytest.raises(SystemExit):`
			`EvidenceStore(str(store_file))`