#!/usr/bin/env python3 """Check local markdown links. Scans markdown files for local links and fails on broken targets. Ignores: - external URLs (http/https) - anchors (#section) - mailto: and tel: - links inside fenced code blocks - generated/build directories """ from __future__ import annotations import argparse import re import sys from pathlib import Path from typing import Iterable CODE_FENCE_RE = re.compile(r"^```") LINK_RE = re.compile(r"(? bool: target = target.strip() return ( not target or target.startswith("http://") or target.startswith("https://") or target.startswith("mailto:") or target.startswith("tel:") or target.startswith("#") ) def normalize_target(target: str) -> str: target = target.strip() if target.startswith("<") and target.endswith(">"): target = target[1:-1].strip() if "#" in target: target = target.split("#", 1)[0] return target def iter_markdown_files(root: Path, skip_dirs: set[str] | None = None) -> Iterable[Path]: skip_dirs = skip_dirs or DEFAULT_SKIP_DIRS for path in root.rglob("*.md"): if any(part in skip_dirs for part in path.relative_to(root).parts): continue yield path def iter_links(path: Path) -> Iterable[tuple[int, str]]: in_code_fence = False for line_no, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1): if CODE_FENCE_RE.match(line.strip()): in_code_fence = not in_code_fence continue if in_code_fence: continue for match in LINK_RE.finditer(line): yield line_no, match.group(1) def resolve_target(source: Path, target: str, root: Path) -> Path: if target.startswith("/"): return (root / target.lstrip("/")).resolve() return (source.parent / target).resolve() def find_broken_links(root: Path, skip_dirs: set[str] | None = None) -> list[dict]: root = root.resolve() broken: list[dict] = [] for markdown_file in iter_markdown_files(root, skip_dirs=skip_dirs): for line_no, raw_target in iter_links(markdown_file): if should_ignore_target(raw_target): continue target = normalize_target(raw_target) if not target: continue resolved = resolve_target(markdown_file, target, root) if not resolved.exists(): broken.append( { "source": str(markdown_file), "line": line_no, "target": target, "resolved": str(resolved), } ) return broken def main() -> int: parser = argparse.ArgumentParser(description="Fail on broken local markdown links.") parser.add_argument("root", nargs="?", default=".", help="Repo root to scan (default: .)") args = parser.parse_args() root = Path(args.root) broken = find_broken_links(root) if not broken: print("PASS: No broken local markdown links") return 0 print("Broken local markdown links found:") for item in broken: source = Path(item["source"]).relative_to(root.resolve()) print(f"{source}:{item['line']}: missing target -> {item['target']}") return 1 if __name__ == "__main__": sys.exit(main())