the-testament/build/semantic_linker.py

import os
import re
import json

def link_chapters(chapters_dir):
    print("--- [Testament] Running Semantic Linker (GOFAI) ---")
    links = {}

    if not os.path.exists(chapters_dir):
        print(f"Error: {chapters_dir} not found")
        return

    # 1. Extract keywords from each chapter
    for filename in sorted(os.listdir(chapters_dir)):
        if not filename.endswith(".md"): continue

        path = os.path.join(chapters_dir, filename)
        with open(path, 'r') as f:
            content = f.read()

        # Simple keyword extraction (proper nouns or capitalized phrases)
        keywords = set(re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', content))
        links[filename] = keywords

    # 2. Find cross-references
    cross_refs = []
    filenames = list(links.keys())
    for i in range(len(filenames)):
        for j in range(i + 1, len(filenames)):
            f1, f2 = filenames[i], filenames[j]
            common = links[f1].intersection(links[f2])

            # Filter out common English words that might be capitalized
            common = {w for w in common if w not in {"The", "A", "An", "In", "On", "At", "To", "From", "By", "He", "She", "It", "They"}}

            if common:
                cross_refs.append({
                    "source": f1,
                    "target": f2,
                    "keywords": list(common)
                })

    # 3. Save to build/cross_refs.json
    os.makedirs("build", exist_ok=True)
    with open("build/cross_refs.json", "w") as f:
        json.dump(cross_refs, f, indent=2)

    print(f"Linked {len(cross_refs)} relationships across {len(filenames)} chapters.")

if __name__ == "__main__":
    link_chapters("chapters")