Files
the-testament/build/semantic_linker.py
2026-04-11 01:40:36 +00:00

52 lines
1.7 KiB
Python

import os
import re
import json
def link_chapters(chapters_dir):
print("--- [Testament] Running Semantic Linker (GOFAI) ---")
links = {}
if not os.path.exists(chapters_dir):
print(f"Error: {chapters_dir} not found")
return
# 1. Extract keywords from each chapter
for filename in sorted(os.listdir(chapters_dir)):
if not filename.endswith(".md"): continue
path = os.path.join(chapters_dir, filename)
with open(path, 'r') as f:
content = f.read()
# Simple keyword extraction (proper nouns or capitalized phrases)
keywords = set(re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', content))
links[filename] = keywords
# 2. Find cross-references
cross_refs = []
filenames = list(links.keys())
for i in range(len(filenames)):
for j in range(i + 1, len(filenames)):
f1, f2 = filenames[i], filenames[j]
common = links[f1].intersection(links[f2])
# Filter out common English words that might be capitalized
common = {w for w in common if w not in {"The", "A", "An", "In", "On", "At", "To", "From", "By", "He", "She", "It", "They"}}
if common:
cross_refs.append({
"source": f1,
"target": f2,
"keywords": list(common)
})
# 3. Save to build/cross_refs.json
os.makedirs("build", exist_ok=True)
with open("build/cross_refs.json", "w") as f:
json.dump(cross_refs, f, indent=2)
print(f"Linked {len(cross_refs)} relationships across {len(filenames)} chapters.")
if __name__ == "__main__":
link_chapters("chapters")