108 lines
3.4 KiB
Python
108 lines
3.4 KiB
Python
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import json
|
||
|
|
import subprocess
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
from scripts.trajectory_sanitize import sanitize_payload
|
||
|
|
|
||
|
|
|
||
|
|
def test_sanitize_payload_redacts_secrets_and_normalizes_paths() -> None:
|
||
|
|
payload = {
|
||
|
|
"api_key": "sk-secret1234567890ABCDEF",
|
||
|
|
"token": "ghp_12345ABCDEfghijk67890lmnoPQRST",
|
||
|
|
"password": "supersecret",
|
||
|
|
"notes": (
|
||
|
|
"email user@example.com keep alexpaynex@gmail.com and "
|
||
|
|
"alexander@alexanderwhitestone.com; "
|
||
|
|
"ip 10.0.0.8 keep 143.198.27.163; "
|
||
|
|
"host app.internal.local; "
|
||
|
|
"path /Users/apayne/projects/timmy/file.txt and /home/runner/work/app.py"
|
||
|
|
),
|
||
|
|
"messages": [{"content": "Contact admin@corp.com and token=abc123"}],
|
||
|
|
}
|
||
|
|
|
||
|
|
sanitized = sanitize_payload(payload)
|
||
|
|
|
||
|
|
assert sanitized["api_key"] == "[REDACTED_API_KEY]"
|
||
|
|
assert sanitized["token"] == "[REDACTED_TOKEN]"
|
||
|
|
assert sanitized["password"] == "[REDACTED_PASSWORD]"
|
||
|
|
|
||
|
|
notes = sanitized["notes"]
|
||
|
|
assert "user@example.com" not in notes
|
||
|
|
assert "admin@corp.com" not in sanitized["messages"][0]["content"]
|
||
|
|
assert "[REDACTED_EMAIL]" in notes
|
||
|
|
assert "alexpaynex@gmail.com" in notes
|
||
|
|
assert "alexander@alexanderwhitestone.com" in notes
|
||
|
|
assert "10.0.0.8" not in notes
|
||
|
|
assert "143.198.27.163" in notes
|
||
|
|
assert "[REDACTED_IP]" in notes
|
||
|
|
assert "app.internal.local" not in notes
|
||
|
|
assert "[REDACTED_HOST]" in notes
|
||
|
|
assert "~/projects/timmy/file.txt" in notes
|
||
|
|
assert "~/work/app.py" in notes
|
||
|
|
assert "/Users/apayne/" not in notes
|
||
|
|
assert "/home/runner/" not in notes
|
||
|
|
|
||
|
|
|
||
|
|
def test_sanitize_payload_is_idempotent() -> None:
|
||
|
|
payload = {
|
||
|
|
"api_key": "sk-secret1234567890ABCDEF",
|
||
|
|
"notes": "email user@example.com host app.internal.local ip 10.0.0.8",
|
||
|
|
}
|
||
|
|
|
||
|
|
once = sanitize_payload(payload)
|
||
|
|
twice = sanitize_payload(once)
|
||
|
|
|
||
|
|
assert twice == once
|
||
|
|
|
||
|
|
|
||
|
|
def test_cli_sanitizes_json_and_jsonl_files(tmp_path: Path) -> None:
|
||
|
|
input_dir = tmp_path / "input"
|
||
|
|
output_dir = tmp_path / "output"
|
||
|
|
input_dir.mkdir()
|
||
|
|
|
||
|
|
session_json = input_dir / "session_a.json"
|
||
|
|
session_json.write_text(
|
||
|
|
json.dumps(
|
||
|
|
{
|
||
|
|
"email": "private@example.com",
|
||
|
|
"path": "/Users/alice/project/file.txt",
|
||
|
|
"host": "app.internal.local",
|
||
|
|
"ip": "10.0.0.8",
|
||
|
|
}
|
||
|
|
)
|
||
|
|
)
|
||
|
|
|
||
|
|
session_jsonl = input_dir / "session_b.jsonl"
|
||
|
|
session_jsonl.write_text(
|
||
|
|
json.dumps({"token": "ghp_12345ABCDEfghijk67890lmnoPQRST"}) + "\n"
|
||
|
|
)
|
||
|
|
|
||
|
|
result = subprocess.run(
|
||
|
|
[
|
||
|
|
sys.executable,
|
||
|
|
"-m",
|
||
|
|
"scripts.trajectory_sanitize",
|
||
|
|
"--input",
|
||
|
|
str(input_dir),
|
||
|
|
"--output",
|
||
|
|
str(output_dir),
|
||
|
|
],
|
||
|
|
capture_output=True,
|
||
|
|
text=True,
|
||
|
|
check=True,
|
||
|
|
)
|
||
|
|
|
||
|
|
assert "Sanitized 2 file(s)" in result.stdout
|
||
|
|
|
||
|
|
sanitized_json = json.loads((output_dir / "session_a.json").read_text())
|
||
|
|
assert sanitized_json["email"] == "[REDACTED_EMAIL]"
|
||
|
|
assert sanitized_json["path"] == "~/project/file.txt"
|
||
|
|
assert sanitized_json["host"] == "[REDACTED_HOST]"
|
||
|
|
assert sanitized_json["ip"] == "[REDACTED_IP]"
|
||
|
|
|
||
|
|
sanitized_jsonl = (output_dir / "session_b.jsonl").read_text().strip().splitlines()
|
||
|
|
assert json.loads(sanitized_jsonl[0])["token"] == "[REDACTED_TOKEN]"
|