#!/usr/bin/env python3 """Generate 400 Deployment & Infra code pattern pairs for timmy-config#594.""" from __future__ import annotations import argparse, json, random from pathlib import Path random.seed(594) TEMPLATES = [ # vps-provisioning ("vps-provisioning", "Write a cloud-init config that provisions Ubuntu 22.04 with deploy user, SSH key auth, and auto updates.", "#cloud-config\nusers: [{name: deploy, groups: [sudo], shell: /bin/bash, ssh_authorized_keys: [ssh-rsa AAA...]}]\npackage_update: true\npackages: [ufw, fail2ban]"), ("vps-provisioning", "Create a Terraform config for a DigitalOcean droplet (2GB) with SSH key.", 'terraform { required_providers { digitalocean={source="digitalocean/digitalocean",version="~>2.0"} } }\nresource "digitalocean_droplet" "web" { name="web-01"; region="nyc3"; size="s-2vcpu-2gb" }'), ("vps-provisioning", "Write an Ansible playbook to install packages and start nginx.", "---\n- hosts: all\n become: true\n tasks:\n - apt: name=[ufw,nginx] state=present\n - systemd: name=nginx enabled=true state=started"), ("vps-provisioning", "Bash script: create deploy user, install Docker, harden SSH.", "#!/usr/bin/env bash\nset -euo pipefail\nid -u deploy &>/dev/null || useradd -m -s /bin/bash deploy\n[[ -x $(command -v docker) ]] || curl -fsSL https://get.docker.com | sh\nsed -i 's/^PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config"), ("vps-provisioning", "Write a systemd drop-in to override service restart settings.", "[Service]\nRestart=always\nRestartSec=5"), ("vps-provisioning", "Create a logrotate config for application logs.", "/var/log/app/*.log { daily; rotate 7; compress; missingok }"), ("vps-provisioning", "Write a shell function that waits for a TCP port to become available on a remote host.", 'wait_for_port() { local h="$1" p="$2"; while ! nc -z "$h" "$p"; do sleep 1; done; }'), ("vps-provisioning", "Implement a script that sets up a Python virtualenv.", "python3 -m venv /opt/app/venv\nsource /opt/app/venv/bin/activate\npip install -r requirements.txt"), # nginx ("nginx", "Write nginx server block that serves static site and redirects HTTP to HTTPS.", "server {\n listen 80; server_name example.com;\n return 301 https://$server_name$request_uri;\n}\nserver {\n listen 443 ssl http2; server_name example.com;\n ssl_certificate /etc/letsencrypt/live/example.com/fullchain.pem;\n root /var/www/html;\n location / { try_files $uri $uri/ =404; }\n}"), ("nginx", "Configure nginx as reverse proxy to backend on port 3000.", "upstream app { server 127.0.0.1:3000; }\nserver {\n listen 80; server_name app.example.com;\n location / {\n proxy_pass http:app;\n proxy_http_version 1.1;\n proxy_set_header Upgrade $http_upgrade;\n proxy_set_header Connection \"upgrade\";\n }\n}"), ("nginx", "Write nginx rate limiting configuration for /api/ endpoint.", "limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s;\nserver {\n location /api/ { limit_req zone=api burst=20 nodelay; }\n}"), ("nginx", "Create nginx config snippet that adds HSTS and CSP headers.", 'add_header Strict-Transport-Security "max-age=63072000" always;\nadd_header Content-Security-Policy "default-src \'self\'" always;'), # systemd ("systemd", "Write a systemd service unit for a Python app as non-root, restart on failure.", "[Unit]\nDescription=My Python App\nAfter=network.target\n\n[Service]\nType=simple\nUser=deploy\nWorkingDirectory=/opt/app\nExecStart=/opt/app/venv/bin/gunicorn -w 4 -b 0.0.0.0:8000 app:app\nRestart=on-failure\nRestartSec=10\n\n[Install]\nWantedBy=multi-user.target"), ("systemd", "Create a systemd timer that runs a backup script daily at 2:30 AM.", "[Timer]\nOnCalendar=*-*-* 02:30:00\nPersistent=true\nUnit=backup.service\n\n[Service]\nType=oneshot\nExecStart=/usr/local/bin/backup.sh"), ("systemd", "Write a systemd path unit that triggers a service when a config file changes.", "[Path]\nPathModified=/etc/app/config.yaml\nUnit=config-reload.service\n\n[Service]\nType=oneshot\nExecStart=/usr/local/bin/reload.sh"), # docker ("docker", "Write a multi-stage Dockerfile for Python FastAPI.", "FROM python:3.12-slim AS builder\nWORKDIR /app\nCOPY requirements.txt .\nRUN pip install --user --no-cache-dir -r requirements.txt\n\nFROM python:3.12-slim\nWORKDIR /app\nCOPY --from=builder /root/.local /root/.local\nCOPY . .\nCMD [\"uvicorn\", \"main:app\"]"), ("docker", "Create a docker-compose.yml with web, postgres, and redis.", "version: \"3.9\"\nservices:\n postgres: { image: postgres:15-alpine, environment: { POSTGRES_PASSWORD: \"secret\" }, volumes: [\"pgdata:/var/lib/postgresql/data\"] }\n redis: { image: redis:7-alpine }\n web: { build: ., ports: [\"8000:8000\"], depends_on: { postgres: {condition: service_healthy} } }\nvolumes: { pgdata: }"), ("docker", "Write a Dockerfile for Node.js production.", "FROM node:18-alpine AS builder\nWORKDIR /app\nCOPY package*.json .\nRUN npm ci --only=production\n\nFROM node:18-alpine\nENV NODE_ENV=production\nCOPY --from=builder /node_modules ./node_modules\nCOPY . .\nUSER nodejs\nCMD [\"node\", \"server.js\"]"), ("docker", "Create a Docker network for app isolation.", "docker network create --driver bridge --subnet 172.20.0.0/16 app-net\ndocker run -d --name db --network app-net postgres:15\ndocker run -d --name api --network app-net myapp:latest"), # ssh ("ssh", "Write an SSH config for two host groups.", "Host prod-*\n HostName %h.example.com\n User deploy\n IdentityFile ~/.ssh/id_rsa_prod\nHost dev-*\n HostName dev.example.com\n User dev\n IdentityFile ~/.ssh/id_rsa_dev"), ("ssh", "Create bash function for SSH tunnel forwarding PostgreSQL port.", "ssh_postgres_tunnel() { ssh -fN -L \"${3:-55432}:localhost:${2:-5432}\" \"${1:-prod-db.example.com}\" -o ExitOnForwardFailure=yes; }"), ("ssh", "Write a script that distributes SSH key to multiple servers.", "for s in web01 web02 db01; do\n ssh-copy-id -i ~/.ssh/id_rsa.pub deploy@${s}.example.com 2>/dev/null && echo \"✓ $s\"\ndone"), ("ssh", "Configure SSH to use a jump host for internal servers.", "Host internal-*\n ProxyJump jump.example.com\n HostName %h.internal.local"), ] def vary_problem(base, idx): p = ["Write code to","Implement","Create","Build","Configure","Set up"] s = [" with error handling."," using best practices."," ensuring idempotency."," with logging."," for production."] return f"{p[idx%len(p)]} {base.rstrip('.').lower()}{s[(idx//len(p))%len(s)]}" def vary_solution(base, idx): sol = base if idx%3==0: sol = sol.replace("log", "log_msg").replace("result", "data") if idx%7==0: sol = f"# Variation {idx}\n" + sol return sol def main(): ap = argparse.ArgumentParser(description="Generate 400 Deployment & Infra code pattern pairs") ap.add_argument("-o","--output",default="training-data/code-patterns-deployment-infra.jsonl") ap.add_argument("-n","--count",type=int,default=400) args = ap.parse_args() out = Path(args.output); out.parent.mkdir(parents=True,exist_ok=True) pairs = [] for i in range(args.count): tpl = TEMPLATES[i % len(TEMPLATES)] pairs.append({ "problem": vary_problem(tpl[1], i), "solution": vary_solution(tpl[2], i), "imports": "", "domain": tpl[0], "id": f"deploy-infra-{i:04d}", }) with open(out, "w", encoding="utf-8") as f: for p in pairs: f.write(json.dumps(p, ensure_ascii=False) + "\n") from collections import Counter cnt = Counter(p["domain"] for p in pairs) print(f"Generated {len(pairs)} pairs → {out}") print(f" Size: {out.stat().st_size/1024:.1f} KB") for d,c in sorted(cnt.items(),key=lambda x:-x[1]): print(f" {d}: {c}") if __name__ == "__main__": main()