from __future__ import annotations import json import subprocess import sys from pathlib import Path from scripts.trajectory_sanitize import sanitize_payload def test_sanitize_payload_redacts_secrets_and_normalizes_paths() -> None: payload = { "api_key": "sk-secret1234567890ABCDEF", "token": "ghp_12345ABCDEfghijk67890lmnoPQRST", "password": "supersecret", "notes": ( "email user@example.com keep alexpaynex@gmail.com and " "alexander@alexanderwhitestone.com; " "ip 10.0.0.8 keep 143.198.27.163; " "host app.internal.local; " "path /Users/apayne/projects/timmy/file.txt and /home/runner/work/app.py" ), "messages": [{"content": "Contact admin@corp.com and token=abc123"}], } sanitized = sanitize_payload(payload) assert sanitized["api_key"] == "[REDACTED_API_KEY]" assert sanitized["token"] == "[REDACTED_TOKEN]" assert sanitized["password"] == "[REDACTED_PASSWORD]" notes = sanitized["notes"] assert "user@example.com" not in notes assert "admin@corp.com" not in sanitized["messages"][0]["content"] assert "[REDACTED_EMAIL]" in notes assert "alexpaynex@gmail.com" in notes assert "alexander@alexanderwhitestone.com" in notes assert "10.0.0.8" not in notes assert "143.198.27.163" in notes assert "[REDACTED_IP]" in notes assert "app.internal.local" not in notes assert "[REDACTED_HOST]" in notes assert "~/projects/timmy/file.txt" in notes assert "~/work/app.py" in notes assert "/Users/apayne/" not in notes assert "/home/runner/" not in notes def test_sanitize_payload_is_idempotent() -> None: payload = { "api_key": "sk-secret1234567890ABCDEF", "notes": "email user@example.com host app.internal.local ip 10.0.0.8", } once = sanitize_payload(payload) twice = sanitize_payload(once) assert twice == once def test_cli_sanitizes_json_and_jsonl_files(tmp_path: Path) -> None: input_dir = tmp_path / "input" output_dir = tmp_path / "output" input_dir.mkdir() session_json = input_dir / "session_a.json" session_json.write_text( json.dumps( { "email": "private@example.com", "path": "/Users/alice/project/file.txt", "host": "app.internal.local", "ip": "10.0.0.8", } ) ) session_jsonl = input_dir / "session_b.jsonl" session_jsonl.write_text( json.dumps({"token": "ghp_12345ABCDEfghijk67890lmnoPQRST"}) + "\n" ) result = subprocess.run( [ sys.executable, "-m", "scripts.trajectory_sanitize", "--input", str(input_dir), "--output", str(output_dir), ], capture_output=True, text=True, check=True, ) assert "Sanitized 2 file(s)" in result.stdout sanitized_json = json.loads((output_dir / "session_a.json").read_text()) assert sanitized_json["email"] == "[REDACTED_EMAIL]" assert sanitized_json["path"] == "~/project/file.txt" assert sanitized_json["host"] == "[REDACTED_HOST]" assert sanitized_json["ip"] == "[REDACTED_IP]" sanitized_jsonl = (output_dir / "session_b.jsonl").read_text().strip().splitlines() assert json.loads(sanitized_jsonl[0])["token"] == "[REDACTED_TOKEN]"