Compare commits
1 Commits
step35/592
...
step35/573
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e748c35f41 |
59
scripts/merge_code_patterns_573.py
Executable file
59
scripts/merge_code_patterns_573.py
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Merge code pattern files to reach 5K total pairs for issue #573.
|
||||
|
||||
This merges:
|
||||
~/.hermes/training-data/code-patterns.jsonl (4000 pairs, existing main file)
|
||||
+ training-data/code-patterns-evennia-&-tower.jsonl (1000 pairs, repo's Evennia & Tower patterns)
|
||||
---------------------------------------------------
|
||||
~/.hermes/training-data/code-patterns.jsonl (5000 pairs, output)
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
HOME = os.path.expanduser("~")
|
||||
TARGET = os.path.join(HOME, ".hermes", "training-data", "code-patterns.jsonl")
|
||||
SOURCE1 = TARGET # existing
|
||||
SOURCE2 = os.path.join(os.path.dirname(__file__), "..", "training-data", "code-patterns-evennia-&-tower.jsonl")
|
||||
SOURCE2 = os.path.abspath(SOURCE2)
|
||||
|
||||
def main():
|
||||
print(f"Reading existing file: {SOURCE1}")
|
||||
with open(SOURCE1, 'r') as f:
|
||||
existing = [json.loads(line.strip()) for line in f if line.strip()]
|
||||
|
||||
print(f"Reading Evennia&Tower file: {SOURCE2}")
|
||||
with open(SOURCE2, 'r') as f:
|
||||
new_patterns = [json.loads(line.strip()) for line in f if line.strip()]
|
||||
|
||||
existing_ids = {p['id'] for p in existing}
|
||||
unique_new = [p for p in new_patterns if p['id'] not in existing_ids]
|
||||
dupes = len(new_patterns) - len(unique_new)
|
||||
print(f"New patterns to add: {len(unique_new)} (duplicates filtered: {dupes})")
|
||||
|
||||
# Merge and re-number the new ones to avoid ID collisions
|
||||
next_idx = max(int(p['id'].rsplit('-', 1)[-1]) for p in existing) if existing else 0
|
||||
for i, p in enumerate(unique_new):
|
||||
next_idx += 1
|
||||
old_id = p['id']
|
||||
# Update ID to be sequential after existing
|
||||
domain = p.get('domain', 'unknown')
|
||||
p['id'] = f"{domain.replace(' ', '-')}-{next_idx:04d}"
|
||||
|
||||
merged = existing + unique_new
|
||||
print(f"Total pairs after merge: {len(merged)}")
|
||||
|
||||
# Write to temp first, then move
|
||||
tmp_path = TARGET + '.tmp'
|
||||
with open(tmp_path, 'w') as f:
|
||||
for p in merged:
|
||||
f.write(json.dumps(p, ensure_ascii=False) + '\n')
|
||||
|
||||
os.replace(tmp_path, TARGET)
|
||||
print(f"Wrote merged dataset to: {TARGET}")
|
||||
print(f"Issue #573 complete: {len(merged)} code pattern pairs available.")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
35
training-data/README-5K.md
Normal file
35
training-data/README-5K.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# Code Patterns — 5K Dataset Completion
|
||||
|
||||
This documents the completion of Training Factory issue #[#573](https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-config/issues/573).
|
||||
|
||||
## Merge Summary
|
||||
|
||||
- Merged 1000 Evennia & Tower patterns from `training-data/code-patterns-evennia-&-tower.jsonl`
|
||||
- Into existing `~/.hermes/training-data/code-patterns.jsonl` (4000 pairs)
|
||||
- **Total: 5000 problem→solution pairs**
|
||||
|
||||
## Domains Covered
|
||||
|
||||
| Domain | Count |
|
||||
|--------|-------|
|
||||
| tool-usage | 1000 |
|
||||
| gitea-api | 1000 |
|
||||
| hermes-agent | 1000 |
|
||||
| deployment | 600 |
|
||||
| evennia-support | 400 |
|
||||
| MUD world | 339 |
|
||||
| Tower architecture | 303 |
|
||||
| spatial memory | 119 |
|
||||
| NPC behavior | 88 |
|
||||
| multi-user bridge | 80 |
|
||||
| room state | 71 |
|
||||
|
||||
## Script
|
||||
|
||||
The merge is performed by `scripts/merge_code_patterns_573.py`. Run it to (re)generate the 5K dataset from source files.
|
||||
|
||||
## Verification
|
||||
|
||||
```bash
|
||||
wc -l ~/.hermes/training-data/code-patterns.jsonl # Expect: 5000
|
||||
```
|
||||
Reference in New Issue
Block a user