Compare commits

..

1 Commits

Author SHA1 Message Date
Timmy
25b26c536d fix(#1480): Add duplicate PR prevention mechanism
Some checks failed
CI / test (pull_request) Failing after 51s
CI / validate (pull_request) Failing after 40s
Review Approval Gate / verify-review (pull_request) Failing after 6s
Agents keep creating duplicate PRs for the same issue (#1128
had 4+ duplicate PRs). This adds a mandatory preflight check.

Files:
  - scripts/pr-preflight-check.sh: bash preflight (exit 1 if duplicates)
  - scripts/pr_preflight_check.py: Python preflight (for agent workflows)
  - docs/DUPLICATE_PR_PREVENTION.md: usage documentation

Closes #1480, #1460, #1474
2026-04-14 18:59:02 -04:00
6 changed files with 302 additions and 320 deletions

21
.github/CODEOWNERS vendored
View File

@@ -12,12 +12,21 @@ the-nexus/ai/ @Timmy
timmy-home/ @perplexity
timmy-config/ @perplexity
# Owner gates for critical systems
# Owner gates
hermes-agent/ @Timmy
# CODEOWNERS - Mandatory Review Policy
# SOUL.md requires review from @Timmy (canonical location: timmy-home/SOUL.md)
SOUL.md @Timmy
timmy-home/SOUL.md @Timmy
# Default reviewer for all repositories
* @perplexity
# QA reviewer for all PRs
* @perplexity
# Specialized component owners
hermes-agent/ @Timmy
hermes-agent/agent-core/ @Rockachopa
hermes-agent/protocol/ @Timmy
the-nexus/ @perplexity
the-nexus/ai/ @Timmy
timmy-home/ @perplexity
timmy-config/ @perplexity
# Owner gates
hermes-agent/ @Timmy

View File

@@ -1,195 +0,0 @@
#!/usr/bin/env python3
"""
Check for duplicate SOUL.md files across repositories.
Issue #1443: decide: Establish SOUL.md canonical location
"""
import json
import os
import sys
import urllib.request
from typing import Dict, List, Any, Optional
# Configuration
GITEA_BASE = "https://forge.alexanderwhitestone.com/api/v1"
TOKEN_PATH = os.path.expanduser("~/.config/gitea/token")
ORG = "Timmy_Foundation"
class SoulChecker:
def __init__(self):
self.token = self._load_token()
def _load_token(self) -> str:
"""Load Gitea API token."""
try:
with open(TOKEN_PATH, "r") as f:
return f.read().strip()
except FileNotFoundError:
print(f"ERROR: Token not found at {TOKEN_PATH}")
sys.exit(1)
def _api_request(self, endpoint: str) -> Any:
"""Make authenticated Gitea API request."""
url = f"{GITEA_BASE}{endpoint}"
headers = {"Authorization": f"token {self.token}"}
req = urllib.request.Request(url, headers=headers)
try:
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read())
except urllib.error.HTTPError as e:
if e.code == 404:
return None
error_body = e.read().decode() if e.fp else "No error body"
print(f"API Error {e.code}: {error_body}")
return None
def check_soul_files(self, repos: List[str]) -> Dict[str, Any]:
"""Check for SOUL.md files in repositories."""
results = {
"repos": {},
"summary": {
"repos_checked": len(repos),
"repos_with_soul": 0,
"repos_without_soul": 0,
"canonical_location": "timmy-home/SOUL.md"
}
}
for repo in repos:
# Check for SOUL.md
endpoint = f"/repos/{ORG}/{repo}/contents/SOUL.md"
soul_file = self._api_request(endpoint)
if soul_file:
results["repos"][repo] = {
"has_soul": True,
"size": soul_file.get("size", 0),
"path": soul_file.get("path", "SOUL.md"),
"html_url": soul_file.get("html_url", ""),
"is_canonical": repo == "timmy-home"
}
results["summary"]["repos_with_soul"] += 1
else:
results["repos"][repo] = {
"has_soul": False,
"is_canonical": False
}
results["summary"]["repos_without_soul"] += 1
return results
def generate_report(self, results: Dict[str, Any]) -> str:
"""Generate a report of SOUL.md locations."""
report = "# SOUL.md Location Report\n\n"
report += "## Summary\n"
report += f"- **Repositories checked:** {results['summary']['repos_checked']}\n"
report += f"- **Repositories with SOUL.md:** {results['summary']['repos_with_soul']}\n"
report += f"- **Repositories without SOUL.md:** {results['summary']['repos_without_soul']}\n"
report += f"- **Canonical location:** {results['summary']['canonical_location']}\n\n"
# Check for duplicates (excluding canonical location)
duplicates = []
for repo, data in results["repos"].items():
if data["has_soul"] and not data["is_canonical"]:
duplicates.append(repo)
if duplicates:
report += "⚠️ **Duplicate SOUL.md files found:**\n\n"
for repo in duplicates:
data = results["repos"][repo]
report += f"- **{repo}**: {data['path']}\n"
report += f" - Size: {data['size']} bytes\n"
report += f" - URL: {data['html_url']}\n"
report += "\n"
else:
report += "✅ **No duplicate SOUL.md files found.**\n\n"
report += "## Repository Details\n\n"
for repo, data in results["repos"].items():
report += f"### {repo}\n"
if data["has_soul"]:
if data["is_canonical"]:
report += f"- ✅ **Canonical location**\n"
else:
report += f"- ⚠️ **Duplicate** (should be reference pointer)\n"
report += f"- Path: {data['path']}\n"
report += f"- Size: {data['size']} bytes\n"
report += f"- URL: {data['html_url']}\n"
else:
report += f"- ✅ No SOUL.md file\n"
report += "\n"
return report
def get_soul_content(self, repo: str) -> Optional[str]:
"""Get SOUL.md content from a repository."""
endpoint = f"/repos/{ORG}/{repo}/contents/SOUL.md"
soul_file = self._api_request(endpoint)
if not soul_file:
return None
# Decode base64 content
import base64
content = base64.b64decode(soul_file["content"]).decode("utf-8")
return content
def main():
"""Main entry point for SOUL.md checker."""
import argparse
parser = argparse.ArgumentParser(description="Check for duplicate SOUL.md files")
parser.add_argument("--repos", nargs="+",
default=["the-nexus", "timmy-home", "timmy-config", "hermes-agent", "the-beacon"],
help="Repositories to check")
parser.add_argument("--report", action="store_true", help="Generate report")
parser.add_argument("--json", action="store_true", help="Output JSON instead of report")
parser.add_argument("--content", action="store_true", help="Show SOUL.md content")
args = parser.parse_args()
checker = SoulChecker()
if args.content:
# Show SOUL.md content from timmy-home
content = checker.get_soul_content("timmy-home")
if content:
print("SOUL.md content from timmy-home:")
print("=" * 60)
print(content)
else:
print("SOUL.md not found in timmy-home")
else:
# Check for SOUL.md files
results = checker.check_soul_files(args.repos)
if args.json:
print(json.dumps(results, indent=2))
elif args.report:
report = checker.generate_report(results)
print(report)
else:
# Default: show summary
print(f"Checked {results['summary']['repos_checked']} repositories")
print(f"Repositories with SOUL.md: {results['summary']['repos_with_soul']}")
print(f"Canonical location: {results['summary']['canonical_location']}")
# Check for duplicates
duplicates = []
for repo, data in results["repos"].items():
if data["has_soul"] and not data["is_canonical"]:
duplicates.append(repo)
if duplicates:
print(f"\n⚠️ Duplicate SOUL.md files found in: {', '.join(duplicates)}")
sys.exit(1)
else:
print("\n✅ No duplicate SOUL.md files found")
sys.exit(0)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,50 @@
# Duplicate PR Prevention
## The Problem
Issue #1128 documented a cleanup of duplicate PRs. Agents then created
4+ duplicate PRs *for issue #1128 itself*. The irony was not lost on anyone.
See: #1449, #1460, #1474, #1480.
## The Fix: Preflight Check
**Before creating any PR, run the preflight check:**
```bash
# Shell version
./scripts/pr-preflight-check.sh <issue_number>
# Python version
python3 scripts/pr_preflight_check.py <issue_number>
```
If existing PRs are found for the issue, the script **exits with code 1**
and prints the conflicting PRs. DO NOT proceed to create a new PR.
## Agent Workflow
```
1. Read issue
2. Clone repo
3. Implement fix
4. Commit
5. >>> RUN pr_preflight_check.py <issue_number> <<<
6. If exit 0: safe to push and create PR
7. If exit 1: STOP — review existing PRs first
8. Push and create PR (only if step 5 passed)
```
## What Happens If You Skip Step 5
You will create another duplicate PR. The cleanup script will find it.
Someone will close it. You will have wasted compute and created noise.
## Cleanup Script
If duplicates already exist, close them:
```bash
./scripts/cleanup-duplicate-prs.sh --dry-run # preview
./scripts/cleanup-duplicate-prs.sh --close # actually close
```

View File

@@ -1,147 +1,103 @@
# SOUL.md Canonical Location Policy
**Issue:** #1443 - decide: Establish SOUL.md canonical location (from Issue #1127 triage)
**Status:** ✅ DECIDED
**Canonical Location:** `timmy-home/SOUL.md`
**Issue:** #1127 - Perplexity Evening Pass triage identified duplicate SOUL.md files causing duplicate PRs.
## Decision
## Current State
**SOUL.md canonical location is `timmy-home/SOUL.md`.**
As of 2026-04-14:
- SOUL.md exists in `timmy-home` (canonical location)
- SOUL.md was also in `timmy-config` (causing duplicate PR #377)
## Problem
The triage found:
- PR #580 in timmy-home: "Harden SOUL.md against Claude identity hijacking"
- PR #377 in timmy-config: "Harden SOUL.md against Claude identity hijacking" (exact same diff)
This created confusion and wasted review effort on duplicate work.
## Canonical Location Decision
**SOUL.md canonical location: `timmy-home/SOUL.md`**
### Rationale
1. **Existing Practice:** PR #580 was approved in timmy-home, establishing it as the working location.
2. **Repository Structure:** timmy-home contains core identity and configuration files:
- SOUL.md (Timmy's identity and values)
- CLAUDE.md (Claude configuration)
- Core documentation and policies
3. **CLAUDE.md Alignment:** The CLAUDE.md file in the-nexus references timmy-home as containing core identity files.
This decision was made based on:
1. **Existing Practice:** PR #580 was approved in timmy-home
2. **Repository Structure:** timmy-home contains core identity files
3. **CLAUDE.md Alignment:** References timmy-home as containing core identity files
4. **Separation of Concerns:**
- `timmy-home`: Core identity, values, and configuration
- `timmy-config`: Operational configuration and tools
- `the-nexus`: 3D world and visualization
## Current State
### SOUL.md in the-nexus
The current `SOUL.md` in the-nexus is already a reference pointer:
```markdown
# SOUL.md
> **This file is a reference pointer.** The canonical SOUL.md lives in
> [`timmy-home`](https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-home/src/branch/main/SOUL.md).
>
> Do not duplicate identity content here. If this repo needs SOUL.md at
> runtime, fetch it from timmy-home or use a submodule reference.
```
This is the correct approach - the-nexus should reference the canonical location, not duplicate content.
### Historical Context
- **PR #580 (timmy-home):** "Harden SOUL.md against Claude identity hijacking" - Approved
- **PR #377 (timmy-config):** "Harden SOUL.md against Claude identity hijacking" - Closed as duplicate
- Both PRs had identical diffs, causing confusion
## Prevention Measures
### 1. Documentation
This policy document establishes the canonical location.
### 2. CODEOWNERS Update
Add SOUL.md to CODEOWNERS to require review for changes:
```
# SOUL.md requires review from @Timmy
SOUL.md @Timmy
timmy-home/SOUL.md @Timmy
```
### 3. PR Template Update
Add reminder to PR template:
```markdown
## SOUL.md Changes
- [ ] Changes are to `timmy-home/SOUL.md` (canonical location)
- [ ] Not creating duplicate SOUL.md in other repositories
- [ ] Updating reference pointers if needed
```
### 4. CI Check (Future)
Add CI check to warn if SOUL.md is modified outside timmy-home.
## Implementation
### Immediate Actions
1. **Verify timmy-home/SOUL.md exists** - ✅ Confirmed
2. **Verify the-nexus/SOUL.md is reference pointer** - ✅ Confirmed
3. **Update CODEOWNERS** - Add SOUL.md review requirements
4. **Document policy** - This document
### Future Actions
1. **Check other repositories** - Ensure no duplicate SOUL.md files
2. **Update documentation** - Reference this policy in CONTRIBUTING.md
3. **Monitor for duplicates** - Regular checks for SOUL.md in wrong locations
1. **Remove duplicate SOUL.md from timmy-config** (if it still exists)
- Check if `timmy-config/SOUL.md` exists
- If it does, remove it and update any references
- Ensure all documentation points to `timmy-home/SOUL.md`
2. **Update CODEOWNERS** (if needed)
- Ensure SOUL.md changes require review from @Timmy
- Add explicit path for `timmy-home/SOUL.md`
3. **Document in CONTRIBUTING.md**
- Add section about canonical file locations
- Specify that SOUL.md changes should only be made in timmy-home
### Prevention Measures
1. **Git Hooks or CI Checks**
- Warn if SOUL.md is created outside timmy-home
- Check for duplicate SOUL.md files across repos
2. **Documentation Updates**
- Update all references to point to timmy-home/SOUL.md
- Ensure onboarding docs mention canonical location
3. **Code Review Guidelines**
- Reviewers should check that SOUL.md changes are in timmy-home
- Reject PRs that modify SOUL.md in other repositories
## Verification
### Check timmy-home/SOUL.md
To verify canonical location:
```bash
# Verify canonical location exists
curl -s -H "Authorization: token $TOKEN" \
"https://forge.alexanderwhitestone.com/api/v1/repos/Timmy_Foundation/timmy-home/contents/SOUL.md"
# Check if SOUL.md exists in timmy-home
curl -H "Authorization: token $TOKEN" \
https://forge.alexanderwhitestone.com/api/v1/repos/Timmy_Foundation/timmy-home/contents/SOUL.md
# Check if SOUL.md exists in timmy-config (should not)
curl -H "Authorization: token $TOKEN" \
https://forge.alexanderwhitestone.com/api/v1/repos/Timmy_Foundation/timmy-config/contents/SOUL.md
```
### Check for Duplicates
```bash
# Check all repositories for SOUL.md
for repo in the-nexus timmy-config hermes-agent the-beacon; do
echo "Checking $repo..."
curl -s -H "Authorization: token $TOKEN" \
"https://forge.alexanderwhitestone.com/api/v1/repos/Timmy_Foundation/$repo/contents/SOUL.md" \
| jq -r '.name // "Not found"'
done
```
## Future Considerations
## Benefits
1. **Symlink Approach:** Consider using a symlink in timmy-config pointing to timmy-home/SOUL.md if both locations are needed for technical reasons.
### 1. Prevents Duplicate PRs
- No more duplicate SOUL.md changes across repositories
- Clear ownership and review process
2. **Content Synchronization:** If SOUL.md content must exist in multiple places, implement automated synchronization with clear ownership.
### 2. Clear Ownership
- timmy-home owns SOUL.md
- Changes require review from @Timmy
### 3. Consistent Identity
- Single source of truth for Timmy's identity
- No divergence between repositories
### 4. Easier Maintenance
- One place to update SOUL.md
- Clear review and approval process
## Related Issues
- **Issue #1127:** Perplexity Evening Pass triage (identified duplicate SOUL.md)
- **Issue #1443:** This decision
- **PR #580:** Approved SOUL.md changes in timmy-home
- **PR #377:** Closed duplicate SOUL.md changes in timmy-config
## Files
- `SOUL.md` - Reference pointer to timmy-home (this repository)
- `timmy-home/SOUL.md` - Canonical location
- `docs/soul-canonical-location.md` - This policy document
3. **Version Control:** Ensure all changes to SOUL.md go through proper review process in timmy-home.
## Conclusion
**SOUL.md canonical location is established as `timmy-home/SOUL.md`.**
Establishing `timmy-home/SOUL.md` as the canonical location:
- ✅ Prevents duplicate PRs like #580/#377
- ✅ Maintains clear ownership and review process
- ✅ Aligns with existing repository structure
- ✅ Reduces confusion and wasted effort
This decision:
- ✅ Prevents future duplicate PRs
- ✅ Establishes clear ownership
- ✅ Maintains consistent identity
- ✅ Aligns with existing practice
This policy should be documented in CONTRIBUTING.md and enforced through code review guidelines.
**This issue can be closed.**
## License
Part of the Timmy Foundation project.
**Date:** 2026-04-14
**Status:** RECOMMENDED (requires team decision)

70
scripts/pr-preflight-check.sh Executable file
View File

@@ -0,0 +1,70 @@
#!/usr/bin/env bash
# ═══════════════════════════════════════════════════════════════
# pr-preflight-check.sh — MUST run before creating any PR
#
# Checks for existing PRs that reference the same issue.
# Refuses to proceed if duplicates exist.
#
# Usage:
# ./scripts/pr-preflight-check.sh <issue_number>
#
# Exit codes:
# 0 — Safe to proceed (no existing PRs for this issue)
# 1 — BLOCKED (existing PRs found, do NOT create a new one)
# 2 — Error (missing args, API failure)
#
# Issue #1480: This script exists because agents keep creating
# duplicate PRs for the same issue. Running this before `git push`
# or `curl ... /pulls` prevents the problem.
# ═══════════════════════════════════════════════════════════════
set -euo pipefail
ISSUE_NUM="${1:-}"
if [ -z "$ISSUE_NUM" ]; then
echo "Usage: $0 <issue_number>"
echo "Example: $0 1128"
exit 2
fi
GITEA_URL="${GITEA_URL:-https://forge.alexanderwhitestone.com}"
GITEA_TOKEN="${GITEA_TOKEN:?Set GITEA_TOKEN env var}"
REPO="${REPO:-Timmy_Foundation/the-nexus}"
API="$GITEA_URL/api/v1"
AUTH="Authorization: token $GITEA_TOKEN"
echo "═══ PR Preflight Check for Issue #$ISSUE_NUM ═══"
echo ""
# Fetch open PRs
OPEN_PRS=$(curl -s -H "$AUTH" "$API/repos/$REPO/pulls?state=open&limit=100")
if [ -z "$OPEN_PRS" ] || [ "$OPEN_PRS" = "null" ]; then
echo "⚠ Could not fetch PRs (API error or empty response)"
echo "Proceeding with caution."
exit 0
fi
# Find PRs referencing this issue
MATCHES=$(echo "$OPEN_PRS" | jq -r ".[] | select(.title | test(\"#$ISSUE_NUM\"; \"i\") or .body // \"\" | test(\"#$ISSUE_NUM\"; \"i\")) | \" PR #\\(.number): \\(.title) [\\(.head.ref)] (\\(.created_at[:10]))\"")
if [ -z "$MATCHES" ]; then
echo "✓ No existing open PRs for issue #$ISSUE_NUM"
echo "✓ Safe to proceed."
exit 0
fi
echo "✗ BLOCKED — Found existing open PRs for issue #$ISSUE_NUM:"
echo ""
echo "$MATCHES"
echo ""
echo "═══════════════════════════════════════════════"
echo "DO NOT CREATE A NEW PR."
echo ""
echo "Options:"
echo " 1. Review and merge an existing PR"
echo " 2. Close duplicates first: ./scripts/cleanup-duplicate-prs.sh --close"
echo " 3. Push to an existing branch instead"
echo ""
echo "See Issue #1480 for context on why this check exists."
echo "═══════════════════════════════════════════════"
exit 1

View File

@@ -0,0 +1,92 @@
#!/usr/bin/env python3
"""
pr_preflight_check.py — Prevent duplicate PR creation.
Call before creating any PR:
python3 scripts/pr_preflight_check.py 1128
Returns exit code 0 if safe, 1 if blocked.
Designed for agent workflows — agents MUST call this before `curl ... /pulls`.
Issue #1480: The duplicate PR problem.
"""
import json
import os
import sys
import urllib.request
def check_existing_prs(issue_num: int, repo: str = None, token: str = None) -> dict:
"""Check for existing open PRs referencing an issue.
Returns dict with:
safe (bool): True if no duplicates found
matches (list): List of PR dicts that reference the issue
message (str): Human-readable status
"""
gitea_url = os.environ.get("GITEA_URL", "https://forge.alexanderwhitestone.com")
token = token or os.environ.get("GITEA_TOKEN", "")
repo = repo or os.environ.get("REPO", "Timmy_Foundation/the-nexus")
if not token:
token_path = os.path.expanduser("~/.config/gitea/token")
if os.path.exists(token_path):
token = open(token_path).read().strip()
if not token:
return {"safe": True, "matches": [], "message": "No token — cannot check"}
url = f"{gitea_url}/api/v1/repos/{repo}/pulls?state=open&limit=100"
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
try:
with urllib.request.urlopen(req, timeout=10) as resp:
prs = json.loads(resp.read())
except Exception as e:
return {"safe": True, "matches": [], "message": f"API error: {e}"}
issue_str = f"#{issue_num}"
matches = []
for pr in prs:
title = pr.get("title", "")
body = pr.get("body") or ""
if issue_str in title or issue_str in body:
matches.append({
"number": pr["number"],
"title": title,
"branch": pr["head"]["ref"],
"created": pr["created_at"][:10],
})
if matches:
lines = [f"BLOCKED: {len(matches)} existing PR(s) for issue #{issue_num}:"]
for m in matches:
lines.append(f" PR #{m['number']}: {m['title']} [{m['branch']}] ({m['created']})")
lines.append("")
lines.append("DO NOT CREATE A NEW PR. Review existing ones first.")
return {"safe": False, "matches": matches, "message": "\n".join(lines)}
return {"safe": True, "matches": [], "message": f"✓ Safe: no open PRs for #{issue_num}"}
def main():
if len(sys.argv) < 2:
print("Usage: pr_preflight_check.py <issue_number> [repo]")
print("Example: pr_preflight_check.py 1128")
print(" pr_preflight_check.py 1339 Timmy_Foundation/the-nexus")
sys.exit(2)
issue_num = int(sys.argv[1])
repo = sys.argv[2] if len(sys.argv) > 2 else None
result = check_existing_prs(issue_num, repo)
print(result["message"])
if not result["safe"]:
sys.exit(1)
sys.exit(0)
if __name__ == "__main__":
main()