52 lines
1.7 KiB
Python
52 lines
1.7 KiB
Python
import os
|
|
import re
|
|
import json
|
|
|
|
def link_chapters(chapters_dir):
|
|
print("--- [Testament] Running Semantic Linker (GOFAI) ---")
|
|
links = {}
|
|
|
|
if not os.path.exists(chapters_dir):
|
|
print(f"Error: {chapters_dir} not found")
|
|
return
|
|
|
|
# 1. Extract keywords from each chapter
|
|
for filename in sorted(os.listdir(chapters_dir)):
|
|
if not filename.endswith(".md"): continue
|
|
|
|
path = os.path.join(chapters_dir, filename)
|
|
with open(path, 'r') as f:
|
|
content = f.read()
|
|
|
|
# Simple keyword extraction (proper nouns or capitalized phrases)
|
|
keywords = set(re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', content))
|
|
links[filename] = keywords
|
|
|
|
# 2. Find cross-references
|
|
cross_refs = []
|
|
filenames = list(links.keys())
|
|
for i in range(len(filenames)):
|
|
for j in range(i + 1, len(filenames)):
|
|
f1, f2 = filenames[i], filenames[j]
|
|
common = links[f1].intersection(links[f2])
|
|
|
|
# Filter out common English words that might be capitalized
|
|
common = {w for w in common if w not in {"The", "A", "An", "In", "On", "At", "To", "From", "By", "He", "She", "It", "They"}}
|
|
|
|
if common:
|
|
cross_refs.append({
|
|
"source": f1,
|
|
"target": f2,
|
|
"keywords": list(common)
|
|
})
|
|
|
|
# 3. Save to build/cross_refs.json
|
|
os.makedirs("build", exist_ok=True)
|
|
with open("build/cross_refs.json", "w") as f:
|
|
json.dump(cross_refs, f, indent=2)
|
|
|
|
print(f"Linked {len(cross_refs)} relationships across {len(filenames)} chapters.")
|
|
|
|
if __name__ == "__main__":
|
|
link_chapters("chapters")
|