#!/usr/bin/env python3
"""
Poka-yoke: Hardcoded path linter for hermes-agent.

Scans Python files for hardcoded home-directory paths that break
multi-user/multi-profile deployments. Catches:
  - Path.home() / ".hermes" without HERMES_HOME env var fallback
  - Hardcoded /Users/<name>/ paths
  - Hardcoded /home/<name>/ paths
  - Raw ~/.hermes in code (not in comments/docstrings)

Usage:
    python3 scripts/lint_hardcoded_paths.py              # lint all .py files
    python3 scripts/lint_hardcoded_paths.py --fix         # suggest fixes
    python3 scripts/lint_hardcoded_paths.py --staged      # lint git staged files only

Exit codes:
    0 = no violations
    1 = violations found
    2 = error
"""

import argparse
import os
import re
import subprocess
import sys
from pathlib import Path


# ── Patterns ──────────────────────────────────────────────────────

VIOLATIONS = [
    {
        "id": "direct-home-hermes",
        "name": "Direct Path.home()/.hermes",
        "pattern": r'Path\.home\(\)\s*/\s*["\']\.hermes["\']',
        "exclude_with": r'os\.getenv\(|os\.environ\.get\(|_get_profiles_root|profiles_parent|current_default|native_home',
        "message": "Use `Path(os.getenv('HERMES_HOME', Path.home() / '.hermes'))` instead of direct `Path.home() / '.hermes'`",
    },
    {
        "id": "hardcoded-user-path",
        "name": "Hardcoded /Users/<name>/",
        "pattern": r'["\']/Users/[a-zA-Z_][a-zA-Z0-9_]*/',
        "exclude_with": r'#|""".*"""\s*$',
        "message": "Use environment variables or relative paths instead of hardcoded /Users/<name>/",
    },
    {
        "id": "hardcoded-home-path",
        "name": "Hardcoded /home/<name>/",
        "pattern": r'["\']/home/[a-zA-Z_][a-zA-Z0-9_]*/',
        "exclude_with": r'#|""".*"""\s*$',
        "message": "Use environment variables or relative paths instead of hardcoded /home/<name>/",
    },
    {
        "id": "expanduser-hermes",
        "name": "os.path.expanduser ~/.hermes (non-fallback)",
        "pattern": r'os\.path\.expanduser\(["\']~/.hermes',
        "exclude_with": r'#',
        "message": "Use `os.environ.get('HERMES_HOME', os.path.expanduser('~/.hermes'))` instead",
    },
]


# ── Exceptions ─────────────────────────────────────────────────────
# Files where hardcoded paths are acceptable (tests with mock data,
# migration scripts, docs generation)

EXCEPTIONS = [
    "tests/",           # Test fixtures can use mock paths
    "scripts/",         # One-off scripts
    "optional-skills/", # Skills not in core
    "skills/",          # External skills
    "plugins/",         # Plugins
    "website/",         # Docs site
    "mcp_serve.py",     # Standalone MCP server
    "docs/",            # Documentation
]


# ── Scanner ────────────────────────────────────────────────────────

def is_exception(filepath: str) -> bool:
    """Check if file is in the exception list."""
    for exc in EXCEPTIONS:
        if filepath.startswith(exc) or f"/{exc}" in filepath:
            return True
    return False


def is_in_comment_or_docstring(line: str, lines: list, line_idx: int) -> bool:
    """Check if the match is in a comment or docstring."""
    stripped = line.strip()

    # Line comment
    if stripped.startswith("#"):
        return True

    # Inline comment — check if match is after #
    if "#" in line:
        code_part = line[:line.index("#")]
        for v in VIOLATIONS:
            if re.search(v["pattern"], code_part):
                return False  # Match is in code, not comment
        return True  # No match in code part, must be in comment

    # Simple docstring check: look for triple quotes before this line
    in_docstring = False
    quote_count = 0
    for i in range(max(0, line_idx - 20), line_idx + 1):
        for char in ['"""', "'''"]:
            quote_count += lines[i].count(char)
    if quote_count % 2 == 1:
        in_docstring = True

    # Also check current line for docstring delimiters
    if '"""' in line or "'''" in line:
        # If line is entirely within a docstring block, skip
        before_match = line[:line.find(re.search(VIOLATIONS[0]["pattern"], line).group())] if re.search(VIOLATIONS[0]["pattern"], line) else ""
        if '"""' in before_match or "'''" in before_match:
            in_docstring = True

    return in_docstring


def scan_file(filepath: str) -> list:
    """Scan a single file for violations."""
    try:
        with open(filepath) as f:
            content = f.read()
            lines = content.split("\n")
    except (OSError, UnicodeDecodeError):
        return []

    violations_found = []

    for i, line in enumerate(lines):
        for v in VIOLATIONS:
            match = re.search(v["pattern"], line)
            if not match:
                continue

            # Check if excluded by context (e.g., it's part of a fallback pattern)
            if v.get("exclude_with"):
                if re.search(v["exclude_with"], line):
                    continue

            # Skip comments and docstrings
            stripped = line.strip()
            if stripped.startswith("#"):
                continue

            # Check if in inline comment
            if "#" in line:
                code_part = line[:line.index("#")]
                if not re.search(v["pattern"], code_part):
                    continue

            violations_found.append({
                "file": filepath,
                "line": i + 1,
                "rule": v["id"],
                "name": v["name"],
                "message": v["message"],
                "text": stripped[:120],
            })

    return violations_found


def get_staged_files() -> list:
    """Get list of staged Python files from git."""
    try:
        result = subprocess.run(
            ["git", "diff", "--cached", "--name-only", "--diff-filter=ACM"],
            capture_output=True, text=True, timeout=10
        )
        return [f for f in result.stdout.strip().split("\n") if f.endswith(".py")]
    except (subprocess.TimeoutExpired, FileNotFoundError):
        return []


def scan_all(root: str = ".") -> list:
    """Scan all Python files in the repo."""
    all_violations = []
    for dirpath, dirnames, filenames in os.walk(root):
        dirnames[:] = [d for d in dirnames if d not in (".git", "venv", "__pycache__", "node_modules")]
        for f in filenames:
            if not f.endswith(".py"):
                continue
            filepath = os.path.join(dirpath, f)
            rel = os.path.relpath(filepath, root)

            if is_exception(rel):
                continue

            all_violations.extend(scan_file(filepath))

    return all_violations


# ── Output ─────────────────────────────────────────────────────────

def print_violations(violations: list) -> None:
    """Print violations in a readable format."""
    if not violations:
        print("PASS: No hardcoded path violations found")
        return

    print(f"FAIL: {len(violations)} hardcoded path violation(s) found\n")

    by_rule = {}
    for v in violations:
        by_rule.setdefault(v["rule"], []).append(v)

    for rule, items in sorted(by_rule.items()):
        print(f"  [{rule}] {items[0]['name']}")
        print(f"    {items[0]['message']}")
        for item in items:
            print(f"    {item['file']}:{item['line']}: {item['text']}")
        print()


def print_fix_suggestions(violations: list) -> None:
    """Print fix suggestions for violations."""
    if not violations:
        return

    print("\n=== Fix Suggestions ===\n")

    for v in violations:
        print(f"  {v['file']}:{v['line']}")
        print(f"    Current: {v['text']}")

        if v["rule"] == "direct-home-hermes":
            print(f"    Fix:     Use `Path(os.getenv('HERMES_HOME', Path.home() / '.hermes'))`")
        elif v["rule"] in ("hardcoded-user-path", "hardcoded-home-path"):
            print(f"    Fix:     Use `os.environ.get('HOME')` or `Path.home()`")
        elif v["rule"] == "expanduser-hermes":
            print(f"    Fix:     Use `os.environ.get('HERMES_HOME', os.path.expanduser('~/.hermes'))`")
        print()


# ── Main ───────────────────────────────────────────────────────────

def main():
    parser = argparse.ArgumentParser(description="Lint hardcoded paths in hermes-agent")
    parser.add_argument("--staged", action="store_true", help="Only scan git staged files")
    parser.add_argument("--fix", action="store_true", help="Show fix suggestions")
    parser.add_argument("--json", action="store_true", help="Output as JSON")
    parser.add_argument("--root", default=".", help="Root directory to scan")
    args = parser.parse_args()

    if args.staged:
        files = get_staged_files()
        if not files:
            print("No staged Python files")
            sys.exit(0)
        violations = []
        for f in files:
            if not is_exception(f):
                violations.extend(scan_file(f))
    else:
        violations = scan_all(args.root)

    if args.json:
        import json
        print(json.dumps(violations, indent=2))
    else:
        print_violations(violations)
        if args.fix:
            print_fix_suggestions(violations)

    sys.exit(1 if violations else 0)


if __name__ == "__main__":
    main()