Files
timmy-home/tests/test_trajectory_sanitize.py

108 lines
3.4 KiB
Python

from __future__ import annotations
import json
import subprocess
import sys
from pathlib import Path
from scripts.trajectory_sanitize import sanitize_payload
def test_sanitize_payload_redacts_secrets_and_normalizes_paths() -> None:
payload = {
"api_key": "sk-secret1234567890ABCDEF",
"token": "ghp_12345ABCDEfghijk67890lmnoPQRST",
"password": "supersecret",
"notes": (
"email user@example.com keep alexpaynex@gmail.com and "
"alexander@alexanderwhitestone.com; "
"ip 10.0.0.8 keep 143.198.27.163; "
"host app.internal.local; "
"path /Users/apayne/projects/timmy/file.txt and /home/runner/work/app.py"
),
"messages": [{"content": "Contact admin@corp.com and token=abc123"}],
}
sanitized = sanitize_payload(payload)
assert sanitized["api_key"] == "[REDACTED_API_KEY]"
assert sanitized["token"] == "[REDACTED_TOKEN]"
assert sanitized["password"] == "[REDACTED_PASSWORD]"
notes = sanitized["notes"]
assert "user@example.com" not in notes
assert "admin@corp.com" not in sanitized["messages"][0]["content"]
assert "[REDACTED_EMAIL]" in notes
assert "alexpaynex@gmail.com" in notes
assert "alexander@alexanderwhitestone.com" in notes
assert "10.0.0.8" not in notes
assert "143.198.27.163" in notes
assert "[REDACTED_IP]" in notes
assert "app.internal.local" not in notes
assert "[REDACTED_HOST]" in notes
assert "~/projects/timmy/file.txt" in notes
assert "~/work/app.py" in notes
assert "/Users/apayne/" not in notes
assert "/home/runner/" not in notes
def test_sanitize_payload_is_idempotent() -> None:
payload = {
"api_key": "sk-secret1234567890ABCDEF",
"notes": "email user@example.com host app.internal.local ip 10.0.0.8",
}
once = sanitize_payload(payload)
twice = sanitize_payload(once)
assert twice == once
def test_cli_sanitizes_json_and_jsonl_files(tmp_path: Path) -> None:
input_dir = tmp_path / "input"
output_dir = tmp_path / "output"
input_dir.mkdir()
session_json = input_dir / "session_a.json"
session_json.write_text(
json.dumps(
{
"email": "private@example.com",
"path": "/Users/alice/project/file.txt",
"host": "app.internal.local",
"ip": "10.0.0.8",
}
)
)
session_jsonl = input_dir / "session_b.jsonl"
session_jsonl.write_text(
json.dumps({"token": "ghp_12345ABCDEfghijk67890lmnoPQRST"}) + "\n"
)
result = subprocess.run(
[
sys.executable,
"-m",
"scripts.trajectory_sanitize",
"--input",
str(input_dir),
"--output",
str(output_dir),
],
capture_output=True,
text=True,
check=True,
)
assert "Sanitized 2 file(s)" in result.stdout
sanitized_json = json.loads((output_dir / "session_a.json").read_text())
assert sanitized_json["email"] == "[REDACTED_EMAIL]"
assert sanitized_json["path"] == "~/project/file.txt"
assert sanitized_json["host"] == "[REDACTED_HOST]"
assert sanitized_json["ip"] == "[REDACTED_IP]"
sanitized_jsonl = (output_dir / "session_b.jsonl").read_text().strip().splitlines()
assert json.loads(sanitized_jsonl[0])["token"] == "[REDACTED_TOKEN]"