Compare commits
1 Commits
fix/524
...
step35/873
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ff327efa19 |
259
scripts/sherlock_osint.py
Normal file
259
scripts/sherlock_osint.py
Normal file
@@ -0,0 +1,259 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Sherlock username OSINT wrapper — evaluation spike for timmy-home.
|
||||
|
||||
This script provides an opt-in, local-first integration of the Sherlock
|
||||
username enumeration tool (https://github.com/sherlock-project/sherlock).
|
||||
|
||||
Design constraints (from SOUL.md + issue #873):
|
||||
- Sovereignty: runs locally, no cloud dependencies
|
||||
- Opt-in: requires explicit --confirm flag; never runs silently
|
||||
- Honesty: clearly reports Sherlock availability and result confidence
|
||||
- Output: normalized JSON to stdout for downstream consumption
|
||||
|
||||
Usage:
|
||||
python3 scripts/sherlock_osint.py <username> --confirm
|
||||
python3 scripts/sherlock_osint.py <username> --confirm --sherlock-path ~/sherlock
|
||||
python3 scripts/sherlock_osint.py <username> --confirm --dry-run
|
||||
|
||||
Arguments:
|
||||
username Target username to query (public-handle only; see ethics)
|
||||
|
||||
Flags:
|
||||
--confirm Explicit opt-in acknowledgement (required)
|
||||
--sherlock-path Path to cloned sherlock repo (default: ~/sherlock)
|
||||
--dry-run Validate setup without making network requests
|
||||
--json Output raw JSON (default: True)
|
||||
--timeout Request timeout in seconds (default: 10)
|
||||
|
||||
Output (stdout):
|
||||
JSON object: {"username": str, "timestamp": float, "dry_run": bool,
|
||||
"sherlock_available": bool, "error": Optional[str],
|
||||
"results": [{"site": str, "url": str, "status": "found"|"not_found"|"error"}]}
|
||||
|
||||
Exit codes:
|
||||
0 Success (even if zero results found)
|
||||
1 Sherlock module not available or missing --confirm
|
||||
2 Runtime error during search
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Sherlock username OSINT wrapper — evaluation spike",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Ethics boundary:
|
||||
Use only for legitimate research on publicly available usernames.
|
||||
Do not use for harassment, stalking, or doxxing. Results require
|
||||
manual human verification — a "found" result means a public profile
|
||||
with that username exists, not that it belongs to your target.
|
||||
"""
|
||||
)
|
||||
parser.add_argument("username", help="Public username to search (public-handle only)")
|
||||
parser.add_argument("--confirm", action="store_true",
|
||||
help="Explicit opt-in: acknowledge ethical boundaries and network impact")
|
||||
parser.add_argument("--sherlock-path", type=Path, default=Path.home() / "sherlock",
|
||||
help="Path to cloned sherlock-project/sherlock repo")
|
||||
parser.add_argument("--dry-run", action="store_true",
|
||||
help="Validate setup without making network requests")
|
||||
parser.add_argument("--timeout", type=int, default=10,
|
||||
help="HTTP request timeout in seconds")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def import_sherlock(sherlock_path: Path) -> Optional[Any]:
|
||||
"""Attempt to import the sherlock Python module from the given path.
|
||||
|
||||
Returns the module on success, or None with an error message on failure.
|
||||
"""
|
||||
import importlib.util
|
||||
|
||||
# Expected location: sherlock_path/sherlock_project/sherlock.py
|
||||
candidate = sherlock_path / "sherlock_project" / "sherlock.py"
|
||||
if not candidate.exists():
|
||||
return None, f"sherlock.py not found at {candidate}"
|
||||
|
||||
try:
|
||||
spec = importlib.util.spec_from_file_location("sherlock", candidate)
|
||||
sherlock_mod = importlib.util.module_from_spec(spec)
|
||||
sys.modules["sherlock"] = sherlock_mod
|
||||
spec.loader.exec_module(sherlock_mod)
|
||||
return sherlock_mod, None
|
||||
except Exception as e:
|
||||
return None, f"Failed to import sherlock module: {e}"
|
||||
|
||||
|
||||
def load_site_data(sherlock_path: Path) -> Optional[Dict]:
|
||||
"""Load sherlock's site definitions from data.json."""
|
||||
data_path = sherlock_path / "sherlock_project" / "resources" / "data.json"
|
||||
if not data_path.exists():
|
||||
return None, f"data.json not found at {data_path}"
|
||||
try:
|
||||
with open(data_path) as f:
|
||||
return json.load(f), None
|
||||
except json.JSONDecodeError as e:
|
||||
return None, f"Invalid JSON in data.json: {e}"
|
||||
|
||||
|
||||
def run_sherlock_search(
|
||||
sherlock_mod: Any,
|
||||
username: str,
|
||||
site_data: Dict,
|
||||
timeout: int,
|
||||
dry_run: bool
|
||||
) -> List[Dict[str, str]]:
|
||||
"""Execute a Sherlock search and return normalized results.
|
||||
|
||||
Normalized result schema per entry:
|
||||
{"site": <site_name>, "url": <profile_url_or_empty>, "status": "found|not_found|error"}
|
||||
"""
|
||||
if dry_run:
|
||||
# Simulate a minimal result set without network
|
||||
return [
|
||||
{"site": "GitHub", "url": f"https://github.com/{username}", "status": "found"},
|
||||
{"site": "Reddit", "url": "", "status": "not_found"},
|
||||
]
|
||||
|
||||
# Real execution path
|
||||
from sherlock_project import notify # sherlock's QueryNotify class expected
|
||||
query_notify = notify.QueryNotify(username=username, print_found_only=False, verbose=False)
|
||||
|
||||
try:
|
||||
raw_results = sherlock_mod.sherlock(
|
||||
username=username,
|
||||
site_data=site_data,
|
||||
query_notify=query_notify,
|
||||
verbose=False,
|
||||
timeout=timeout,
|
||||
)
|
||||
except TypeError:
|
||||
# Fallback for older sherlock API (no timeout kwarg)
|
||||
raw_results = sherlock_mod.sherlock(
|
||||
username=username,
|
||||
site_data=site_data,
|
||||
query_notify=query_notify,
|
||||
verbose=False,
|
||||
)
|
||||
|
||||
# Normalize dict-of-results into list
|
||||
normalized = []
|
||||
for site_name, info in raw_results.items():
|
||||
status = "found" if info.get("status") == "found" else "not_found"
|
||||
if info.get("error"):
|
||||
status = "error"
|
||||
normalized.append({
|
||||
"site": site_name,
|
||||
"url": info.get("url", ""),
|
||||
"status": status,
|
||||
})
|
||||
return normalized
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
|
||||
if not args.confirm:
|
||||
print("ERROR: --confirm flag is required for opt-in.", file=sys.stderr)
|
||||
print("This script makes network requests to 400+ sites.", file=sys.stderr)
|
||||
print("Re-run with --confirm to acknowledge ethical boundaries and network impact.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
print(f"--- Sherlock OSINT wrapper (evaluation) ---", file=sys.stderr)
|
||||
print(f"Target username: {args.username}", file=sys.stderr)
|
||||
print(f"Sherlock path: {args.sherlock_path}", file=sys.stderr)
|
||||
print(f"Dry run: {args.dry_run}", file=sys.stderr)
|
||||
|
||||
# Dry-run: simulate without needing sherlock installed
|
||||
if args.dry_run:
|
||||
payload = {
|
||||
"username": args.username,
|
||||
"timestamp": time.time(),
|
||||
"dry_run": True,
|
||||
"sherlock_available": True, # pretend available in dry-run demo
|
||||
"error": None,
|
||||
"results": [
|
||||
{"site": "GitHub", "url": f"https://github.com/{args.username}", "status": "found"},
|
||||
{"site": "Reddit", "url": "", "status": "not_found"},
|
||||
],
|
||||
"meta": {"sites_available": 478, "results_returned": 2, "note": "simulated"},
|
||||
}
|
||||
print(json.dumps(payload, indent=2))
|
||||
return 0
|
||||
|
||||
# Check sherlock availability
|
||||
sherlock_mod, err = import_sherlock(args.sherlock_path)
|
||||
if err:
|
||||
payload = {
|
||||
"username": args.username,
|
||||
"timestamp": time.time(),
|
||||
"dry_run": args.dry_run,
|
||||
"sherlock_available": False,
|
||||
"error": err,
|
||||
"results": [],
|
||||
}
|
||||
print(json.dumps(payload, indent=2))
|
||||
print(f"NOTE: To enable, clone https://github.com/sherlock-project/sherlock to {args.sherlock_path}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
site_data, err = load_site_data(args.sherlock_path)
|
||||
if err:
|
||||
payload = {
|
||||
"username": args.username,
|
||||
"timestamp": time.time(),
|
||||
"dry_run": args.dry_run,
|
||||
"sherlock_available": True,
|
||||
"error": err,
|
||||
"results": [],
|
||||
}
|
||||
print(json.dumps(payload, indent=2))
|
||||
return 2
|
||||
|
||||
# Run search
|
||||
try:
|
||||
results = run_sherlock_search(
|
||||
sherlock_mod=sherlock_mod,
|
||||
username=args.username,
|
||||
site_data=site_data,
|
||||
timeout=args.timeout,
|
||||
dry_run=args.dry_run,
|
||||
)
|
||||
except Exception as e:
|
||||
payload = {
|
||||
"username": args.username,
|
||||
"timestamp": time.time(),
|
||||
"dry_run": args.dry_run,
|
||||
"sherlock_available": True,
|
||||
"error": f"Search failed: {e}",
|
||||
"results": [],
|
||||
}
|
||||
print(json.dumps(payload, indent=2))
|
||||
return 2
|
||||
|
||||
# Success payload
|
||||
payload = {
|
||||
"username": args.username,
|
||||
"timestamp": time.time(),
|
||||
"dry_run": args.dry_run,
|
||||
"sherlock_available": True,
|
||||
"error": None,
|
||||
"results": results,
|
||||
"meta": {
|
||||
"sites_available": len(site_data),
|
||||
"results_returned": len(results),
|
||||
}
|
||||
}
|
||||
|
||||
print(json.dumps(payload, indent=2))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
71
tests/test_sherlock_osint.py
Normal file
71
tests/test_sherlock_osint.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""Tests for scripts.sherlock_osint — evaluation wrapper for Sherlock username OSINT."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
from scripts import sherlock_osint as wrapper
|
||||
|
||||
|
||||
def test_parse_args_requires_confirm():
|
||||
"""Without --confirm, the script exits with code 1."""
|
||||
with pytest.raises(SystemExit) as excinfo:
|
||||
wrapper.parse_args() # Will fail due to no username; we check that below
|
||||
# Actually parse_args itself won't exit if no username; it'll show error
|
||||
# But our main() uses confirm flag. Let's test main() logic via parse_args.
|
||||
args = wrapper.parse_args(["testuser"])
|
||||
assert args.username == "testuser"
|
||||
assert args.confirm is False
|
||||
|
||||
|
||||
def test_parse_args_with_confirm():
|
||||
args = wrapper.parse_args(["testuser", "--confirm"])
|
||||
assert args.username == "testuser"
|
||||
assert args.confirm is True
|
||||
|
||||
|
||||
def test_import_sherlock_missing_module(tmp_path):
|
||||
"""Import gracefully fails when sherlock is not installed."""
|
||||
result, err = wrapper.import_sherlock(tmp_path / "nonexistent")
|
||||
assert result is None
|
||||
assert "sherlock.py not found" in err
|
||||
|
||||
|
||||
def test_load_site_data_missing_file(tmp_path):
|
||||
result, err = wrapper.load_site_data(tmp_path / "bogus")
|
||||
assert result is None
|
||||
assert "data.json not found" in err
|
||||
|
||||
|
||||
def test_dry_run_produces_valid_json(tmp_path, monkeypatch, capsys):
|
||||
"""Dry-run mode produces well-formed JSON without needing sherlock installed."""
|
||||
# Force dry-run through parse_args and main flow via subprocess
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "scripts.sherlock_osint", "testuser", "--confirm", "--dry-run"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=str(Path(__file__).parent.parent),
|
||||
)
|
||||
assert result.returncode == 0
|
||||
payload = json.loads(result.stdout)
|
||||
assert payload["username"] == "testuser"
|
||||
assert payload["dry_run"] is True
|
||||
assert payload["sherlock_available"] is False # module not present in test env
|
||||
assert payload["error"] is not None # import fails
|
||||
assert payload["results"] == []
|
||||
|
||||
|
||||
def test_missing_confirm_exit_code(tmp_path, monkeypatch, capsys):
|
||||
"""Running without --confirm exits with code 1."""
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "scripts.sherlock_osint", "testuser"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=str(Path(__file__).parent.parent),
|
||||
)
|
||||
assert result.returncode == 1
|
||||
assert "--confirm" in result.stderr.lower()
|
||||
Reference in New Issue
Block a user