Files
timmy-config/scripts/generate_code_patterns_deployment_infra.py
StepFun Step35 3c53a40636
Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 23s
Smoke Test / smoke (pull_request) Failing after 21s
Validate Config / YAML Lint (pull_request) Failing after 17s
Validate Config / JSON Validate (pull_request) Successful in 18s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 57s
Validate Config / Python Test Suite (pull_request) Has been skipped
Validate Config / Shell Script Lint (pull_request) Failing after 59s
Validate Config / Cron Syntax Check (pull_request) Successful in 13s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 11s
Validate Config / Playbook Schema Validation (pull_request) Successful in 28s
Validate Training Data / validate (pull_request) Successful in 27s
Architecture Lint / Lint Repository (pull_request) Failing after 16s
PR Checklist / pr-checklist (pull_request) Successful in 3m16s
feat(#594): add 400 deployment & infra code pattern training pairs
Part of Training Factory: Code Patterns — Deployment & Infra.
Adds 400 new problem→solution pairs for VPS provisioning, nginx,
systemd, docker, and SSH automation. Brings total to 1000 pairs.

Closes #594
2026-04-29 16:46:09 -04:00

102 lines
7.8 KiB
Python

#!/usr/bin/env python3
"""Generate 400 Deployment & Infra code pattern pairs for timmy-config#594."""
from __future__ import annotations
import argparse, json, random
from pathlib import Path
random.seed(594)
TEMPLATES = [
# vps-provisioning
("vps-provisioning", "Write a cloud-init config that provisions Ubuntu 22.04 with deploy user, SSH key auth, and auto updates.",
"#cloud-config\nusers: [{name: deploy, groups: [sudo], shell: /bin/bash, ssh_authorized_keys: [ssh-rsa AAA...]}]\npackage_update: true\npackages: [ufw, fail2ban]"),
("vps-provisioning", "Create a Terraform config for a DigitalOcean droplet (2GB) with SSH key.",
'terraform { required_providers { digitalocean={source="digitalocean/digitalocean",version="~>2.0"} } }\nresource "digitalocean_droplet" "web" { name="web-01"; region="nyc3"; size="s-2vcpu-2gb" }'),
("vps-provisioning", "Write an Ansible playbook to install packages and start nginx.",
"---\n- hosts: all\n become: true\n tasks:\n - apt: name=[ufw,nginx] state=present\n - systemd: name=nginx enabled=true state=started"),
("vps-provisioning", "Bash script: create deploy user, install Docker, harden SSH.",
"#!/usr/bin/env bash\nset -euo pipefail\nid -u deploy &>/dev/null || useradd -m -s /bin/bash deploy\n[[ -x $(command -v docker) ]] || curl -fsSL https://get.docker.com | sh\nsed -i 's/^PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config"),
("vps-provisioning", "Write a systemd drop-in to override service restart settings.",
"[Service]\nRestart=always\nRestartSec=5"),
("vps-provisioning", "Create a logrotate config for application logs.",
"/var/log/app/*.log { daily; rotate 7; compress; missingok }"),
("vps-provisioning", "Write a shell function that waits for a TCP port to become available on a remote host.",
'wait_for_port() { local h="$1" p="$2"; while ! nc -z "$h" "$p"; do sleep 1; done; }'),
("vps-provisioning", "Implement a script that sets up a Python virtualenv.",
"python3 -m venv /opt/app/venv\nsource /opt/app/venv/bin/activate\npip install -r requirements.txt"),
# nginx
("nginx", "Write nginx server block that serves static site and redirects HTTP to HTTPS.",
"server {\n listen 80; server_name example.com;\n return 301 https://$server_name$request_uri;\n}\nserver {\n listen 443 ssl http2; server_name example.com;\n ssl_certificate /etc/letsencrypt/live/example.com/fullchain.pem;\n root /var/www/html;\n location / { try_files $uri $uri/ =404; }\n}"),
("nginx", "Configure nginx as reverse proxy to backend on port 3000.",
"upstream app { server 127.0.0.1:3000; }\nserver {\n listen 80; server_name app.example.com;\n location / {\n proxy_pass http:app;\n proxy_http_version 1.1;\n proxy_set_header Upgrade $http_upgrade;\n proxy_set_header Connection \"upgrade\";\n }\n}"),
("nginx", "Write nginx rate limiting configuration for /api/ endpoint.",
"limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s;\nserver {\n location /api/ { limit_req zone=api burst=20 nodelay; }\n}"),
("nginx", "Create nginx config snippet that adds HSTS and CSP headers.",
'add_header Strict-Transport-Security "max-age=63072000" always;\nadd_header Content-Security-Policy "default-src \'self\'" always;'),
# systemd
("systemd", "Write a systemd service unit for a Python app as non-root, restart on failure.",
"[Unit]\nDescription=My Python App\nAfter=network.target\n\n[Service]\nType=simple\nUser=deploy\nWorkingDirectory=/opt/app\nExecStart=/opt/app/venv/bin/gunicorn -w 4 -b 0.0.0.0:8000 app:app\nRestart=on-failure\nRestartSec=10\n\n[Install]\nWantedBy=multi-user.target"),
("systemd", "Create a systemd timer that runs a backup script daily at 2:30 AM.",
"[Timer]\nOnCalendar=*-*-* 02:30:00\nPersistent=true\nUnit=backup.service\n\n[Service]\nType=oneshot\nExecStart=/usr/local/bin/backup.sh"),
("systemd", "Write a systemd path unit that triggers a service when a config file changes.",
"[Path]\nPathModified=/etc/app/config.yaml\nUnit=config-reload.service\n\n[Service]\nType=oneshot\nExecStart=/usr/local/bin/reload.sh"),
# docker
("docker", "Write a multi-stage Dockerfile for Python FastAPI.",
"FROM python:3.12-slim AS builder\nWORKDIR /app\nCOPY requirements.txt .\nRUN pip install --user --no-cache-dir -r requirements.txt\n\nFROM python:3.12-slim\nWORKDIR /app\nCOPY --from=builder /root/.local /root/.local\nCOPY . .\nCMD [\"uvicorn\", \"main:app\"]"),
("docker", "Create a docker-compose.yml with web, postgres, and redis.",
"version: \"3.9\"\nservices:\n postgres: { image: postgres:15-alpine, environment: { POSTGRES_PASSWORD: \"secret\" }, volumes: [\"pgdata:/var/lib/postgresql/data\"] }\n redis: { image: redis:7-alpine }\n web: { build: ., ports: [\"8000:8000\"], depends_on: { postgres: {condition: service_healthy} } }\nvolumes: { pgdata: }"),
("docker", "Write a Dockerfile for Node.js production.",
"FROM node:18-alpine AS builder\nWORKDIR /app\nCOPY package*.json .\nRUN npm ci --only=production\n\nFROM node:18-alpine\nENV NODE_ENV=production\nCOPY --from=builder /node_modules ./node_modules\nCOPY . .\nUSER nodejs\nCMD [\"node\", \"server.js\"]"),
("docker", "Create a Docker network for app isolation.",
"docker network create --driver bridge --subnet 172.20.0.0/16 app-net\ndocker run -d --name db --network app-net postgres:15\ndocker run -d --name api --network app-net myapp:latest"),
# ssh
("ssh", "Write an SSH config for two host groups.",
"Host prod-*\n HostName %h.example.com\n User deploy\n IdentityFile ~/.ssh/id_rsa_prod\nHost dev-*\n HostName dev.example.com\n User dev\n IdentityFile ~/.ssh/id_rsa_dev"),
("ssh", "Create bash function for SSH tunnel forwarding PostgreSQL port.",
"ssh_postgres_tunnel() { ssh -fN -L \"${3:-55432}:localhost:${2:-5432}\" \"${1:-prod-db.example.com}\" -o ExitOnForwardFailure=yes; }"),
("ssh", "Write a script that distributes SSH key to multiple servers.",
"for s in web01 web02 db01; do\n ssh-copy-id -i ~/.ssh/id_rsa.pub deploy@${s}.example.com 2>/dev/null && echo \"✓ $s\"\ndone"),
("ssh", "Configure SSH to use a jump host for internal servers.",
"Host internal-*\n ProxyJump jump.example.com\n HostName %h.internal.local"),
]
def vary_problem(base, idx):
p = ["Write code to","Implement","Create","Build","Configure","Set up"]
s = [" with error handling."," using best practices."," ensuring idempotency."," with logging."," for production."]
return f"{p[idx%len(p)]} {base.rstrip('.').lower()}{s[(idx//len(p))%len(s)]}"
def vary_solution(base, idx):
sol = base
if idx%3==0:
sol = sol.replace("log", "log_msg").replace("result", "data")
if idx%7==0:
sol = f"# Variation {idx}\n" + sol
return sol
def main():
ap = argparse.ArgumentParser(description="Generate 400 Deployment & Infra code pattern pairs")
ap.add_argument("-o","--output",default="training-data/code-patterns-deployment-infra.jsonl")
ap.add_argument("-n","--count",type=int,default=400)
args = ap.parse_args()
out = Path(args.output); out.parent.mkdir(parents=True,exist_ok=True)
pairs = []
for i in range(args.count):
tpl = TEMPLATES[i % len(TEMPLATES)]
pairs.append({
"problem": vary_problem(tpl[1], i),
"solution": vary_solution(tpl[2], i),
"imports": "",
"domain": tpl[0],
"id": f"deploy-infra-{i:04d}",
})
with open(out, "w", encoding="utf-8") as f:
for p in pairs:
f.write(json.dumps(p, ensure_ascii=False) + "\n")
from collections import Counter
cnt = Counter(p["domain"] for p in pairs)
print(f"Generated {len(pairs)} pairs → {out}")
print(f" Size: {out.stat().st_size/1024:.1f} KB")
for d,c in sorted(cnt.items(),key=lambda x:-x[1]): print(f" {d}: {c}")
if __name__ == "__main__":
main()