Compare commits
1 Commits
fix/138-qu
...
fix/48-mar
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cd18bd06be |
@@ -22,3 +22,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
if grep -rE 'sk-or-|sk-ant-|ghp_|AKIA' . --include='*.yml' --include='*.py' --include='*.sh' 2>/dev/null | grep -v .gitea | grep -v llama-cpp-fork; then exit 1; fi
|
if grep -rE 'sk-or-|sk-ant-|ghp_|AKIA' . --include='*.yml' --include='*.py' --include='*.sh' 2>/dev/null | grep -v .gitea | grep -v llama-cpp-fork; then exit 1; fi
|
||||||
echo "PASS: No secrets"
|
echo "PASS: No secrets"
|
||||||
|
- name: Markdown link check
|
||||||
|
run: |
|
||||||
|
python3 check_markdown_links.py
|
||||||
|
|||||||
124
check_markdown_links.py
Normal file
124
check_markdown_links.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Check local markdown links.
|
||||||
|
|
||||||
|
Scans markdown files for local links and fails on broken targets.
|
||||||
|
Ignores:
|
||||||
|
- external URLs (http/https)
|
||||||
|
- anchors (#section)
|
||||||
|
- mailto: and tel:
|
||||||
|
- links inside fenced code blocks
|
||||||
|
- generated/build directories
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
CODE_FENCE_RE = re.compile(r"^```")
|
||||||
|
LINK_RE = re.compile(r"(?<!!)\[[^\]]+\]\(([^)]+)\)")
|
||||||
|
DEFAULT_SKIP_DIRS = {
|
||||||
|
".git",
|
||||||
|
".gitea",
|
||||||
|
".pytest_cache",
|
||||||
|
"__pycache__",
|
||||||
|
"build",
|
||||||
|
"dist",
|
||||||
|
"node_modules",
|
||||||
|
"llama-cpp-fork",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def should_ignore_target(target: str) -> bool:
|
||||||
|
target = target.strip()
|
||||||
|
return (
|
||||||
|
not target
|
||||||
|
or target.startswith("http://")
|
||||||
|
or target.startswith("https://")
|
||||||
|
or target.startswith("mailto:")
|
||||||
|
or target.startswith("tel:")
|
||||||
|
or target.startswith("#")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_target(target: str) -> str:
|
||||||
|
target = target.strip()
|
||||||
|
if target.startswith("<") and target.endswith(">"):
|
||||||
|
target = target[1:-1].strip()
|
||||||
|
if "#" in target:
|
||||||
|
target = target.split("#", 1)[0]
|
||||||
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
def iter_markdown_files(root: Path, skip_dirs: set[str] | None = None) -> Iterable[Path]:
|
||||||
|
skip_dirs = skip_dirs or DEFAULT_SKIP_DIRS
|
||||||
|
for path in root.rglob("*.md"):
|
||||||
|
if any(part in skip_dirs for part in path.relative_to(root).parts):
|
||||||
|
continue
|
||||||
|
yield path
|
||||||
|
|
||||||
|
|
||||||
|
def iter_links(path: Path) -> Iterable[tuple[int, str]]:
|
||||||
|
in_code_fence = False
|
||||||
|
for line_no, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1):
|
||||||
|
if CODE_FENCE_RE.match(line.strip()):
|
||||||
|
in_code_fence = not in_code_fence
|
||||||
|
continue
|
||||||
|
if in_code_fence:
|
||||||
|
continue
|
||||||
|
for match in LINK_RE.finditer(line):
|
||||||
|
yield line_no, match.group(1)
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_target(source: Path, target: str, root: Path) -> Path:
|
||||||
|
if target.startswith("/"):
|
||||||
|
return (root / target.lstrip("/")).resolve()
|
||||||
|
return (source.parent / target).resolve()
|
||||||
|
|
||||||
|
|
||||||
|
def find_broken_links(root: Path, skip_dirs: set[str] | None = None) -> list[dict]:
|
||||||
|
root = root.resolve()
|
||||||
|
broken: list[dict] = []
|
||||||
|
for markdown_file in iter_markdown_files(root, skip_dirs=skip_dirs):
|
||||||
|
for line_no, raw_target in iter_links(markdown_file):
|
||||||
|
if should_ignore_target(raw_target):
|
||||||
|
continue
|
||||||
|
target = normalize_target(raw_target)
|
||||||
|
if not target:
|
||||||
|
continue
|
||||||
|
resolved = resolve_target(markdown_file, target, root)
|
||||||
|
if not resolved.exists():
|
||||||
|
broken.append(
|
||||||
|
{
|
||||||
|
"source": str(markdown_file),
|
||||||
|
"line": line_no,
|
||||||
|
"target": target,
|
||||||
|
"resolved": str(resolved),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return broken
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
parser = argparse.ArgumentParser(description="Fail on broken local markdown links.")
|
||||||
|
parser.add_argument("root", nargs="?", default=".", help="Repo root to scan (default: .)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
root = Path(args.root)
|
||||||
|
broken = find_broken_links(root)
|
||||||
|
if not broken:
|
||||||
|
print("PASS: No broken local markdown links")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
print("Broken local markdown links found:")
|
||||||
|
for item in broken:
|
||||||
|
source = Path(item["source"]).relative_to(root.resolve())
|
||||||
|
print(f"{source}:{item['line']}: missing target -> {item['target']}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
74
tests/test_markdown_link_check.py
Normal file
74
tests/test_markdown_link_check.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import textwrap
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from check_markdown_links import find_broken_links
|
||||||
|
|
||||||
|
|
||||||
|
def write(path: Path, content: str) -> None:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(textwrap.dedent(content).lstrip(), encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def test_reports_missing_local_markdown_target_with_line_number(tmp_path: Path):
|
||||||
|
write(
|
||||||
|
tmp_path / "README.md",
|
||||||
|
"""
|
||||||
|
# Repo
|
||||||
|
|
||||||
|
See [status](docs/status.md).
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
broken = find_broken_links(tmp_path)
|
||||||
|
|
||||||
|
assert len(broken) == 1
|
||||||
|
assert broken[0]["source"].endswith("README.md")
|
||||||
|
assert broken[0]["line"] == 3
|
||||||
|
assert broken[0]["target"] == "docs/status.md"
|
||||||
|
|
||||||
|
|
||||||
|
def test_allows_existing_relative_targets(tmp_path: Path):
|
||||||
|
write(tmp_path / "docs" / "status.md", "# Status\n")
|
||||||
|
write(
|
||||||
|
tmp_path / "README.md",
|
||||||
|
"""
|
||||||
|
# Repo
|
||||||
|
|
||||||
|
See [status](docs/status.md).
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert find_broken_links(tmp_path) == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_ignores_external_anchor_mailto_and_tel_links(tmp_path: Path):
|
||||||
|
write(
|
||||||
|
tmp_path / "README.md",
|
||||||
|
"""
|
||||||
|
[external](https://example.com)
|
||||||
|
[anchor](#section)
|
||||||
|
[mail](mailto:test@example.com)
|
||||||
|
[call](tel:988)
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert find_broken_links(tmp_path) == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_ignores_links_inside_fenced_code_blocks(tmp_path: Path):
|
||||||
|
write(
|
||||||
|
tmp_path / "README.md",
|
||||||
|
"""
|
||||||
|
```md
|
||||||
|
[broken](docs/missing.md)
|
||||||
|
```
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert find_broken_links(tmp_path) == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_skips_build_directories(tmp_path: Path):
|
||||||
|
write(tmp_path / "build" / "README.md", "[broken](missing.md)\n")
|
||||||
|
|
||||||
|
assert find_broken_links(tmp_path) == []
|
||||||
Reference in New Issue
Block a user