Merge pull request 'feat: add research paper project scaffolder' (#308) from feat/research-paper-scaffolder into main
Some checks failed
Forge CI / smoke-and-build (push) Failing after 25s
Some checks failed
Forge CI / smoke-and-build (push) Failing after 25s
Merge PR #308: feat: add research paper project scaffolder
This commit was merged in pull request #308.
This commit is contained in:
@@ -121,6 +121,27 @@ workspace/
|
||||
human_eval/ # Human evaluation materials (if needed)
|
||||
```
|
||||
|
||||
### Quick Start with Scaffolder
|
||||
|
||||
Use the scaffolder to create a ready-to-write workspace in one command:
|
||||
|
||||
```bash
|
||||
python3 SKILL_DIR/scripts/scaffold_paper.py init my-paper --venue neurips2025 --title "My Paper Title"
|
||||
cd my-paper
|
||||
# Workspace is ready — fill in scope.md, then proceed to Phase 1
|
||||
```
|
||||
|
||||
This creates the full directory structure (paper/, experiments/, results/, etc.), copies the venue LaTeX template, generates starter files (scope.md, experiment_log.md, TODO.md, cost_tracker.py), initializes git, and makes the first commit.
|
||||
|
||||
```bash
|
||||
# List available venues and their requirements
|
||||
python3 SKILL_DIR/scripts/scaffold_paper.py venues
|
||||
|
||||
# Check project progress at any time
|
||||
python3 SKILL_DIR/scripts/scaffold_paper.py status my-paper
|
||||
```
|
||||
|
||||
|
||||
### Step 0.3: Set Up Version Control
|
||||
|
||||
```bash
|
||||
|
||||
675
skills/research/research-paper-writing/scripts/scaffold_paper.py
Normal file
675
skills/research/research-paper-writing/scripts/scaffold_paper.py
Normal file
@@ -0,0 +1,675 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Research Paper Project Scaffolder.
|
||||
|
||||
Creates a ready-to-write research paper workspace with directory structure,
|
||||
LaTeX templates, experiment tracking files, and git initialization.
|
||||
|
||||
Companion tool for the /research-paper-writing skill.
|
||||
|
||||
Usage:
|
||||
python3 SKILL_DIR/scripts/scaffold_paper.py init my-paper --venue neurips2025 --title "My Paper"
|
||||
python3 SKILL_DIR/scripts/scaffold_paper.py venues
|
||||
python3 SKILL_DIR/scripts/scaffold_paper.py status my-paper
|
||||
|
||||
Pure Python stdlib — no external dependencies.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# ── Venue definitions ─────────────────────────────────────────────
|
||||
|
||||
VENUES = {
|
||||
"neurips2025": {
|
||||
"name": "NeurIPS 2025",
|
||||
"pages": 9,
|
||||
"format": "Double-blind",
|
||||
"checklist": "Paper Checklist required (15 items)",
|
||||
"notes": "No Broader Impact section required since 2022. Supplementary unlimited.",
|
||||
"template_dir": "neurips2025",
|
||||
},
|
||||
"icml2026": {
|
||||
"name": "ICML 2026",
|
||||
"pages": 8,
|
||||
"format": "Double-blind",
|
||||
"checklist": "Broader Impact statement required",
|
||||
"notes": "Strict 8-page limit for main text. Appendix allowed.",
|
||||
"template_dir": "icml2026",
|
||||
},
|
||||
"iclr2026": {
|
||||
"name": "ICLR 2026",
|
||||
"pages": 9,
|
||||
"format": "Double-blind",
|
||||
"checklist": "LLM disclosure required, reciprocal reviewing agreement",
|
||||
"notes": "arXiv posting allowed before submission.",
|
||||
"template_dir": "iclr2026",
|
||||
},
|
||||
"acl": {
|
||||
"name": "ACL (long paper)",
|
||||
"pages": 8,
|
||||
"format": "Double-blind",
|
||||
"checklist": "Mandatory Limitations section, Responsible NLP checklist",
|
||||
"notes": "Short papers: 4 pages. Findings track available.",
|
||||
"template_dir": "acl",
|
||||
},
|
||||
"aaai2026": {
|
||||
"name": "AAAI 2026",
|
||||
"pages": 7,
|
||||
"format": "Double-blind",
|
||||
"checklist": "Strict style file - no modifications whatsoever",
|
||||
"notes": "7-page limit is tight. Plan content carefully.",
|
||||
"template_dir": "aaai2026",
|
||||
},
|
||||
"colm2025": {
|
||||
"name": "COLM 2025",
|
||||
"pages": 9,
|
||||
"format": "Double-blind",
|
||||
"checklist": "Frame contribution for language model community",
|
||||
"notes": "Newer venue focused on language models.",
|
||||
"template_dir": "colm2025",
|
||||
},
|
||||
}
|
||||
|
||||
# ── Directory structure ───────────────────────────────────────────
|
||||
|
||||
DIRS = [
|
||||
"paper",
|
||||
"experiments",
|
||||
"code",
|
||||
"results",
|
||||
"tasks",
|
||||
"human_eval",
|
||||
"context",
|
||||
"figures",
|
||||
"logs",
|
||||
]
|
||||
|
||||
# ── Template content ──────────────────────────────────────────────
|
||||
|
||||
|
||||
def scope_md(title: str, venue_key: str) -> str:
|
||||
v = VENUES.get(venue_key, {})
|
||||
venue_name = v.get("name", venue_key)
|
||||
pages = v.get("pages", "?")
|
||||
return f"""# Research Paper Scope
|
||||
|
||||
## The What (one-sentence contribution)
|
||||
[What is the single thing this paper contributes?]
|
||||
|
||||
## The Why (evidence)
|
||||
[What evidence supports it?]
|
||||
|
||||
## The So What (significance)
|
||||
[Why should readers care?]
|
||||
|
||||
## Title
|
||||
{title}
|
||||
|
||||
## Target Venue
|
||||
{venue_name} ({pages} pages, deadline: TBD)
|
||||
|
||||
## Paper Type
|
||||
- [ ] Empirical (IMRaD)
|
||||
- [ ] Survey / Literature Review
|
||||
- [ ] Position Paper
|
||||
- [ ] Technical Report
|
||||
- [ ] Theory
|
||||
- [ ] Benchmark
|
||||
|
||||
## Authors & Roles
|
||||
| Author | Sections Owned | Role |
|
||||
|--------|---------------|------|
|
||||
| | | Lead |
|
||||
|
||||
## Notation Conventions
|
||||
- Vectors: lowercase bold (\\mathbf{{x}})
|
||||
- Matrices: uppercase bold (\\mathbf{{A}})
|
||||
- Citation style: \\citet{{}} for narrative, \\citep{{}} for parenthetical
|
||||
- British / American spelling: [choose one]
|
||||
"""
|
||||
|
||||
|
||||
EXPERIMENT_LOG_MD = """# Experiment Log
|
||||
|
||||
## Contribution (one sentence)
|
||||
[The paper's main claim]
|
||||
|
||||
## Experiments Run
|
||||
|
||||
### Experiment 1: [Name]
|
||||
- **Claim tested**: [Which paper claim this supports]
|
||||
- **Setup**: [Model, dataset, config, number of runs]
|
||||
- **Key result**: [One sentence with the number]
|
||||
- **Result files**: results/exp1/
|
||||
- **Figures generated**: figures/exp1_*.pdf
|
||||
- **Surprising findings**: [Anything unexpected]
|
||||
|
||||
## Figures
|
||||
| Filename | Description | Section |
|
||||
|----------|-------------|---------|
|
||||
| | | |
|
||||
|
||||
## Failed Experiments (document for honesty)
|
||||
- [What was tried, why it failed, what it tells us]
|
||||
|
||||
## Open Questions
|
||||
- [Anything the results raised that the paper should address]
|
||||
"""
|
||||
|
||||
|
||||
TODO_MD = """# Research Paper TODO
|
||||
|
||||
## Phase 0: Setup
|
||||
- [x] Scaffold project workspace
|
||||
- [ ] Define one-sentence contribution (scope.md)
|
||||
- [ ] Agree on notation conventions
|
||||
|
||||
## Phase 1: Literature Review
|
||||
- [ ] Identify seed papers from existing references
|
||||
- [ ] Search for related work (3 rounds: breadth, depth, targeted)
|
||||
- [ ] Verify every citation programmatically
|
||||
- [ ] Organize related work by methodology
|
||||
|
||||
## Phase 2: Experiment Design
|
||||
- [ ] Map claims to experiments (claim-experiment table)
|
||||
- [ ] Design baselines (naive, strong, ablation, compute-matched)
|
||||
- [ ] Define evaluation protocol (metrics, aggregation, tests, sample sizes)
|
||||
- [ ] Estimate compute budget
|
||||
|
||||
## Phase 3: Execution
|
||||
- [ ] Write experiment scripts
|
||||
- [ ] Run experiments
|
||||
- [ ] Monitor and handle failures
|
||||
- [ ] Commit results with descriptive messages
|
||||
|
||||
## Phase 4: Analysis
|
||||
- [ ] Aggregate results
|
||||
- [ ] Compute statistical significance
|
||||
- [ ] Identify the story (main finding, surprises, failures)
|
||||
- [ ] Create figures and tables
|
||||
- [ ] Write experiment_log.md (bridge to writeup)
|
||||
|
||||
## Phase 5: Drafting
|
||||
- [ ] Write Abstract
|
||||
- [ ] Write Introduction
|
||||
- [ ] Write Related Work
|
||||
- [ ] Write Methods
|
||||
- [ ] Write Experiments / Results
|
||||
- [ ] Write Discussion
|
||||
- [ ] Write Conclusion
|
||||
|
||||
## Phase 6: Self-Review
|
||||
- [ ] Simulate 3 reviewers (skeptical, supportive, methods-focused)
|
||||
- [ ] Address major concerns
|
||||
- [ ] Revision pass
|
||||
|
||||
## Phase 7: Submission
|
||||
- [ ] Conference-specific formatting
|
||||
- [ ] Venue checklist complete
|
||||
- [ ] Final PDF compiles cleanly
|
||||
- [ ] Supplementary materials ready
|
||||
- [ ] Submit
|
||||
"""
|
||||
|
||||
|
||||
GITIGNORE = """# LaTeX auxiliary files
|
||||
*.aux
|
||||
*.bbl
|
||||
*.blg
|
||||
*.fdb_latexmk
|
||||
*.fls
|
||||
*.log
|
||||
*.out
|
||||
*.synctex.gz
|
||||
*.toc
|
||||
*.nav
|
||||
*.snm
|
||||
*.vrb
|
||||
*.lot
|
||||
*.lof
|
||||
*.idx
|
||||
*.ilg
|
||||
*.ind
|
||||
*.bak
|
||||
*.sav
|
||||
|
||||
# Compiled output
|
||||
*.pdf
|
||||
!paper/*.pdf
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.pyc
|
||||
.venv/
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Editor
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
.idea/
|
||||
.vscode/
|
||||
|
||||
# Large data (download, don't commit)
|
||||
data/*.csv
|
||||
data/*.json
|
||||
data/*.parquet
|
||||
checkpoints/
|
||||
*.ckpt
|
||||
*.pt
|
||||
*.pth
|
||||
"""
|
||||
|
||||
|
||||
# ── Commands ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def cmd_init(args):
|
||||
"""Initialize a new research paper project."""
|
||||
project = Path(args.project_dir)
|
||||
|
||||
if project.exists() and any(project.iterdir()):
|
||||
print(f"Error: {project} already exists and is not empty.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
venue_key = args.venue
|
||||
if venue_key and venue_key not in VENUES:
|
||||
print(f"Error: Unknown venue '{venue_key}'. Run 'venues' to see options.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
title = args.title or "Untitled Paper"
|
||||
venue_key = venue_key or "neurips2025"
|
||||
|
||||
print(f"Scaffolding project: {project}")
|
||||
print(f" Venue: {VENUES[venue_key]['name']}")
|
||||
print(f" Title: {title}")
|
||||
print()
|
||||
|
||||
# Create directories
|
||||
for d in DIRS:
|
||||
(project / d).mkdir(parents=True, exist_ok=True)
|
||||
print(f" mkdir {d}/")
|
||||
|
||||
# Copy venue template if available
|
||||
skill_dir = Path(__file__).resolve().parent.parent
|
||||
template_src = skill_dir / "templates" / VENUES[venue_key]["template_dir"]
|
||||
if template_src.exists():
|
||||
template_dst = project / "paper"
|
||||
for item in template_src.iterdir():
|
||||
dst = template_dst / item.name
|
||||
if item.is_file():
|
||||
shutil.copy2(item, dst)
|
||||
elif item.is_dir():
|
||||
shutil.copytree(item, dst / item.name, dirs_exist_ok=True)
|
||||
print(f" Copied {venue_key} template to paper/")
|
||||
else:
|
||||
print(f" (No template found at {template_src} — create paper/ files manually)")
|
||||
|
||||
# Write starter files
|
||||
_write(project / "scope.md", scope_md(title, venue_key))
|
||||
_write(project / "experiment_log.md", EXPERIMENT_LOG_MD)
|
||||
_write(project / "TODO.md", TODO_MD)
|
||||
_write(project / "experiment_journal.jsonl", "")
|
||||
_write(project / "results" / "cost_log.jsonl", "")
|
||||
_write(project / ".gitignore", GITIGNORE)
|
||||
|
||||
# Write a minimal README
|
||||
readme = f"# {title}\n\nResearch paper project targeting {VENUES[venue_key]['name']}.\n\n"
|
||||
readme += "Scaffolded with `/research-paper-writing` skill.\n\n"
|
||||
readme += "## Quick Start\n\n"
|
||||
readme += "1. Fill in `scope.md` (contribution, evidence, significance)\n"
|
||||
readme += "2. Literature review (Phase 1)\n"
|
||||
readme += "3. Design experiments (Phase 2)\n"
|
||||
readme += "4. Run experiments, write results to `experiment_log.md` (Phases 3-4)\n"
|
||||
readme += "5. Draft paper in `paper/` (Phase 5)\n"
|
||||
readme += "6. Self-review and submit (Phases 6-7)\n\n"
|
||||
readme += "See `TODO.md` for detailed checklist.\n"
|
||||
_write(project / "README.md", readme)
|
||||
|
||||
# Write cost tracker helper
|
||||
cost_tracker = '''"""
|
||||
Cost tracker for research experiments.
|
||||
|
||||
Usage:
|
||||
from cost_tracker import log_cost, summarize_costs
|
||||
log_cost("exp1", "claude-sonnet-4-20250514", 1000, 500, 0.015)
|
||||
summarize_costs()
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
COST_LOG = os.path.join(os.path.dirname(__file__), "results", "cost_log.jsonl")
|
||||
|
||||
|
||||
def log_cost(experiment: str, model: str, input_tokens: int,
|
||||
output_tokens: int, cost_usd: float):
|
||||
"""Append a cost entry to the cost log."""
|
||||
entry = {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"experiment": experiment,
|
||||
"model": model,
|
||||
"input_tokens": input_tokens,
|
||||
"output_tokens": output_tokens,
|
||||
"cost_usd": cost_usd,
|
||||
}
|
||||
with open(COST_LOG, "a") as f:
|
||||
f.write(json.dumps(entry) + "\\n")
|
||||
|
||||
|
||||
def summarize_costs() -> dict:
|
||||
"""Summarize total costs from the cost log."""
|
||||
if not os.path.exists(COST_LOG):
|
||||
return {"total_usd": 0, "entries": 0, "by_experiment": {}}
|
||||
|
||||
total = 0.0
|
||||
by_exp = {}
|
||||
count = 0
|
||||
with open(COST_LOG) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
entry = json.loads(line)
|
||||
cost = entry.get("cost_usd", 0)
|
||||
total += cost
|
||||
exp = entry.get("experiment", "unknown")
|
||||
by_exp[exp] = by_exp.get(exp, 0) + cost
|
||||
count += 1
|
||||
|
||||
return {"total_usd": round(total, 4), "entries": count, "by_experiment": by_exp}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
summary = summarize_costs()
|
||||
print(json.dumps(summary, indent=2))
|
||||
'''
|
||||
_write(project / "cost_tracker.py", cost_tracker)
|
||||
|
||||
# Initialize git
|
||||
print()
|
||||
try:
|
||||
subprocess.run(["git", "init"], cwd=project, capture_output=True, check=True)
|
||||
subprocess.run(["git", "add", "-A"], cwd=project, capture_output=True, check=True)
|
||||
subprocess.run(
|
||||
["git", "commit", "-m", f"Scaffold research paper project: {title}"],
|
||||
cwd=project,
|
||||
capture_output=True,
|
||||
check=True,
|
||||
env={**os.environ, "GIT_AUTHOR_NAME": "Hermes", "GIT_COMMITTER_NAME": "Hermes",
|
||||
"GIT_AUTHOR_EMAIL": "hermes@local", "GIT_COMMITTER_EMAIL": "hermes@local"},
|
||||
)
|
||||
print(" git init + initial commit")
|
||||
except FileNotFoundError:
|
||||
print(" (git not found — skipping git init)")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f" (git init failed: {e})")
|
||||
|
||||
print()
|
||||
print(f"Project ready at: {project.resolve()}")
|
||||
print(f"Next: fill in scope.md, then start Phase 1 (Literature Review)")
|
||||
|
||||
|
||||
def cmd_venues(args):
|
||||
"""List available conference templates."""
|
||||
if args.json:
|
||||
print(json.dumps(VENUES, indent=2))
|
||||
return
|
||||
|
||||
print("Available conference templates:")
|
||||
print()
|
||||
print(f" {'Venue':<20} {'Pages':<7} {'Format':<15} Notes")
|
||||
print(f" {'─'*20} {'─'*7} {'─'*15} {'─'*40}")
|
||||
for key, v in VENUES.items():
|
||||
print(f" {key:<20} {v['pages']:<7} {v['format']:<15} {v['checklist']}")
|
||||
print()
|
||||
print("Use: scaffold_paper.py init <project> --venue <venue>")
|
||||
|
||||
|
||||
def cmd_status(args):
|
||||
"""Check project health and phase progress."""
|
||||
project = Path(args.project_dir)
|
||||
|
||||
if not project.exists():
|
||||
print(f"Error: {project} does not exist.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(_status_dict(project), indent=2))
|
||||
return
|
||||
|
||||
status = _status_dict(project)
|
||||
print(f"Project: {project.resolve()}")
|
||||
print()
|
||||
|
||||
# Phase artifacts
|
||||
print("Phase Progress:")
|
||||
for phase, info in status["phases"].items():
|
||||
icon = "\u2705" if info["complete"] else "\u2b1c"
|
||||
print(f" {icon} {phase}: {info['detail']}")
|
||||
|
||||
# TODO summary
|
||||
print()
|
||||
todo = status["todo"]
|
||||
print(f"TODO: {todo['done']}/{todo['total']} tasks complete ({todo['pct']}%)")
|
||||
|
||||
# Cost summary
|
||||
if status["costs"]["entries"] > 0:
|
||||
c = status["costs"]
|
||||
print(f"Costs: ${c['total_usd']:.2f} across {c['entries']} entries")
|
||||
for exp, cost in c["by_experiment"].items():
|
||||
print(f" {exp}: ${cost:.4f}")
|
||||
|
||||
# Git status
|
||||
print()
|
||||
git = status["git"]
|
||||
if git["initialized"]:
|
||||
print(f"Git: {git['commits']} commits, {'CLEAN' if git['clean'] else 'DIRTY (' + str(git['uncommitted']) + ' uncommitted)'}")
|
||||
else:
|
||||
print("Git: not initialized")
|
||||
|
||||
# Results
|
||||
results = status["results"]
|
||||
if results["count"] > 0:
|
||||
print(f"Results: {results['count']} result files found")
|
||||
|
||||
print()
|
||||
|
||||
|
||||
def _status_dict(project: Path) -> dict:
|
||||
"""Build a status dictionary for the project."""
|
||||
phases = {}
|
||||
|
||||
# Phase 0: Setup
|
||||
scope_exists = (project / "scope.md").exists()
|
||||
scope_filled = False
|
||||
if scope_exists:
|
||||
content = (project / "scope.md").read_text()
|
||||
scope_filled = "[What is the single thing" not in content
|
||||
phases["Phase 0: Setup"] = {
|
||||
"complete": scope_filled,
|
||||
"detail": "scope.md filled" if scope_filled else ("scope.md exists (template)" if scope_exists else "scope.md missing"),
|
||||
}
|
||||
|
||||
# Phase 1: Literature
|
||||
bib_files = list(project.rglob("*.bib"))
|
||||
phases["Phase 1: Literature"] = {
|
||||
"complete": len(bib_files) > 0,
|
||||
"detail": f"{len(bib_files)} .bib file(s)" if bib_files else "No .bib files yet",
|
||||
}
|
||||
|
||||
# Phase 2-3: Experiments
|
||||
exp_scripts = list((project / "experiments").glob("*.py")) if (project / "experiments").exists() else []
|
||||
phases["Phase 2-3: Experiments"] = {
|
||||
"complete": len(exp_scripts) > 0,
|
||||
"detail": f"{len(exp_scripts)} experiment script(s)" if exp_scripts else "No experiment scripts",
|
||||
}
|
||||
|
||||
# Phase 4: Analysis
|
||||
exp_log = project / "experiment_log.md"
|
||||
log_filled = False
|
||||
if exp_log.exists():
|
||||
content = exp_log.read_text()
|
||||
log_filled = "Experiment 1: [Name]" not in content and len(content.strip()) > 200
|
||||
phases["Phase 4: Analysis"] = {
|
||||
"complete": log_filled,
|
||||
"detail": "experiment_log.md populated" if log_filled else "experiment_log.md is template",
|
||||
}
|
||||
|
||||
# Phase 5: Drafting
|
||||
tex_files = list((project / "paper").rglob("*.tex")) if (project / "paper").exists() else []
|
||||
has_content = False
|
||||
for tf in tex_files:
|
||||
content = tf.read_text()
|
||||
if "\\begin{abstract}" in content and len(content) > 2000:
|
||||
has_content = True
|
||||
break
|
||||
phases["Phase 5: Drafting"] = {
|
||||
"complete": has_content,
|
||||
"detail": f"{len(tex_files)} .tex file(s), {'content detected' if has_content else 'template only'}",
|
||||
}
|
||||
|
||||
# Phase 6-7: Review & Submit
|
||||
pdf_files = list((project / "paper").glob("*.pdf")) if (project / "paper").exists() else []
|
||||
phases["Phase 6-7: Review & Submit"] = {
|
||||
"complete": len(pdf_files) > 0,
|
||||
"detail": f"{len(pdf_files)} PDF(s) in paper/" if pdf_files else "No compiled PDFs",
|
||||
}
|
||||
|
||||
# TODO parsing
|
||||
todo_info = {"done": 0, "total": 0, "pct": 0}
|
||||
todo_file = project / "TODO.md"
|
||||
if todo_file.exists():
|
||||
content = todo_file.read_text()
|
||||
done = content.count("- [x]")
|
||||
undone = content.count("- [ ]")
|
||||
total = done + undone
|
||||
todo_info = {"done": done, "total": total, "pct": round(done / total * 100) if total > 0 else 0}
|
||||
|
||||
# Cost summary
|
||||
cost_info = {"total_usd": 0, "entries": 0, "by_experiment": {}}
|
||||
cost_file = project / "results" / "cost_log.jsonl"
|
||||
if cost_file.exists():
|
||||
total = 0.0
|
||||
by_exp = {}
|
||||
count = 0
|
||||
for line in cost_file.read_text().splitlines():
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
entry = json.loads(line)
|
||||
cost = entry.get("cost_usd", 0)
|
||||
total += cost
|
||||
exp = entry.get("experiment", "unknown")
|
||||
by_exp[exp] = by_exp.get(exp, 0) + cost
|
||||
count += 1
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
cost_info = {"total_usd": round(total, 4), "entries": count, "by_experiment": by_exp}
|
||||
|
||||
# Git
|
||||
git_info = {"initialized": False, "clean": True, "commits": 0, "uncommitted": 0}
|
||||
if (project / ".git").exists():
|
||||
git_info["initialized"] = True
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "rev-list", "--count", "HEAD"],
|
||||
cwd=project, capture_output=True, text=True
|
||||
)
|
||||
if result.returncode == 0:
|
||||
git_info["commits"] = int(result.stdout.strip())
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "status", "--porcelain"],
|
||||
cwd=project, capture_output=True, text=True
|
||||
)
|
||||
if result.returncode == 0:
|
||||
changes = [l for l in result.stdout.splitlines() if l.strip()]
|
||||
git_info["uncommitted"] = len(changes)
|
||||
git_info["clean"] = len(changes) == 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Results
|
||||
result_files = list((project / "results").rglob("*.json")) if (project / "results").exists() else []
|
||||
# Exclude cost_log.jsonl
|
||||
result_files = [f for f in result_files if f.name != "cost_log.jsonl"]
|
||||
results_info = {"count": len(result_files)}
|
||||
|
||||
return {
|
||||
"project": str(project.resolve()),
|
||||
"phases": phases,
|
||||
"todo": todo_info,
|
||||
"costs": cost_info,
|
||||
"git": git_info,
|
||||
"results": results_info,
|
||||
}
|
||||
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _write(path: Path, content: str):
|
||||
"""Write content to a file, creating parent dirs."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(content)
|
||||
print(f" write {path.relative_to(path.parent.parent) if len(path.parts) > 2 else path.name}")
|
||||
|
||||
|
||||
# ── CLI ───────────────────────────────────────────────────────────
|
||||
|
||||
COMMAND_MAP = {
|
||||
"init": cmd_init,
|
||||
"venues": cmd_venues,
|
||||
"status": cmd_status,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="scaffold_paper",
|
||||
description="Research paper project scaffolder. Companion to /research-paper-writing skill.",
|
||||
)
|
||||
subparsers = parser.add_subparsers(dest="command", help="Command to run")
|
||||
|
||||
# init
|
||||
p_init = subparsers.add_parser("init", help="Initialize a new paper project")
|
||||
p_init.add_argument("project_dir", help="Directory to create")
|
||||
p_init.add_argument("--venue", default="neurips2025",
|
||||
choices=list(VENUES.keys()),
|
||||
help="Target conference (default: neurips2025)")
|
||||
p_init.add_argument("--title", default=None, help="Paper title")
|
||||
|
||||
# venues
|
||||
p_venues = subparsers.add_parser("venues", help="List available conference templates")
|
||||
p_venues.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
|
||||
# status
|
||||
p_status = subparsers.add_parser("status", help="Check project status")
|
||||
p_status.add_argument("project_dir", help="Project directory to check")
|
||||
p_status.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
COMMAND_MAP[args.command](args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user