689 lines
24 KiB
Markdown
689 lines
24 KiB
Markdown
# Optimization Reference
|
|
|
|
> **See also:** architecture.md · composition.md · scenes.md · shaders.md · inputs.md · troubleshooting.md
|
|
|
|
## Hardware Detection
|
|
|
|
Detect the user's hardware at script startup and adapt rendering parameters automatically. Never hardcode worker counts or resolution.
|
|
|
|
### CPU and Memory Detection
|
|
|
|
```python
|
|
import multiprocessing
|
|
import platform
|
|
import shutil
|
|
import os
|
|
|
|
def detect_hardware():
|
|
"""Detect hardware capabilities and return render config."""
|
|
cpu_count = multiprocessing.cpu_count()
|
|
|
|
# Leave 1-2 cores free for OS + ffmpeg encoding
|
|
if cpu_count >= 16:
|
|
workers = cpu_count - 2
|
|
elif cpu_count >= 8:
|
|
workers = cpu_count - 1
|
|
elif cpu_count >= 4:
|
|
workers = cpu_count - 1
|
|
else:
|
|
workers = max(1, cpu_count)
|
|
|
|
# Memory detection (platform-specific)
|
|
try:
|
|
if platform.system() == "Darwin":
|
|
import subprocess
|
|
mem_bytes = int(subprocess.check_output(["sysctl", "-n", "hw.memsize"]).strip())
|
|
elif platform.system() == "Linux":
|
|
with open("/proc/meminfo") as f:
|
|
for line in f:
|
|
if line.startswith("MemTotal"):
|
|
mem_bytes = int(line.split()[1]) * 1024
|
|
break
|
|
else:
|
|
mem_bytes = 8 * 1024**3 # assume 8GB on unknown
|
|
except Exception:
|
|
mem_bytes = 8 * 1024**3
|
|
|
|
mem_gb = mem_bytes / (1024**3)
|
|
|
|
# Each worker uses ~50-150MB depending on grid sizes
|
|
# Cap workers if memory is tight
|
|
mem_per_worker_mb = 150
|
|
max_workers_by_mem = int(mem_gb * 1024 * 0.6 / mem_per_worker_mb) # use 60% of RAM
|
|
workers = min(workers, max_workers_by_mem)
|
|
|
|
# ffmpeg availability and codec support
|
|
has_ffmpeg = shutil.which("ffmpeg") is not None
|
|
|
|
return {
|
|
"cpu_count": cpu_count,
|
|
"workers": workers,
|
|
"mem_gb": mem_gb,
|
|
"platform": platform.system(),
|
|
"arch": platform.machine(),
|
|
"has_ffmpeg": has_ffmpeg,
|
|
}
|
|
```
|
|
|
|
### Adaptive Quality Profiles
|
|
|
|
Scale resolution, FPS, CRF, and grid density based on hardware:
|
|
|
|
```python
|
|
def quality_profile(hw, target_duration_s, user_preference="auto"):
|
|
"""
|
|
Returns render settings adapted to hardware.
|
|
user_preference: "auto", "draft", "preview", "production", "max"
|
|
"""
|
|
if user_preference == "draft":
|
|
return {"vw": 960, "vh": 540, "fps": 12, "crf": 28, "workers": min(4, hw["workers"]),
|
|
"grid_scale": 0.5, "shaders": "minimal", "particles_max": 200}
|
|
|
|
if user_preference == "preview":
|
|
return {"vw": 1280, "vh": 720, "fps": 15, "crf": 25, "workers": hw["workers"],
|
|
"grid_scale": 0.75, "shaders": "standard", "particles_max": 500}
|
|
|
|
if user_preference == "max":
|
|
return {"vw": 3840, "vh": 2160, "fps": 30, "crf": 15, "workers": hw["workers"],
|
|
"grid_scale": 2.0, "shaders": "full", "particles_max": 3000}
|
|
|
|
# "production" or "auto"
|
|
# Auto-detect: estimate render time, downgrade if it would take too long
|
|
n_frames = int(target_duration_s * 24)
|
|
est_seconds_per_frame = 0.18 # ~180ms at 1080p
|
|
est_total_s = n_frames * est_seconds_per_frame / max(1, hw["workers"])
|
|
|
|
if hw["mem_gb"] < 4 or hw["cpu_count"] <= 2:
|
|
# Low-end: 720p, 15fps
|
|
return {"vw": 1280, "vh": 720, "fps": 15, "crf": 23, "workers": hw["workers"],
|
|
"grid_scale": 0.75, "shaders": "standard", "particles_max": 500}
|
|
|
|
if est_total_s > 3600: # would take over an hour
|
|
# Downgrade to 720p to speed up
|
|
return {"vw": 1280, "vh": 720, "fps": 24, "crf": 20, "workers": hw["workers"],
|
|
"grid_scale": 0.75, "shaders": "standard", "particles_max": 800}
|
|
|
|
# Standard production: 1080p 24fps
|
|
return {"vw": 1920, "vh": 1080, "fps": 24, "crf": 20, "workers": hw["workers"],
|
|
"grid_scale": 1.0, "shaders": "full", "particles_max": 1200}
|
|
|
|
|
|
def apply_quality_profile(profile):
|
|
"""Set globals from quality profile."""
|
|
global VW, VH, FPS, N_WORKERS
|
|
VW = profile["vw"]
|
|
VH = profile["vh"]
|
|
FPS = profile["fps"]
|
|
N_WORKERS = profile["workers"]
|
|
# Grid sizes scale with resolution
|
|
# CRF passed to ffmpeg encoder
|
|
# Shader set determines which post-processing is active
|
|
```
|
|
|
|
### CLI Integration
|
|
|
|
```python
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--quality", choices=["draft", "preview", "production", "max", "auto"],
|
|
default="auto", help="Render quality preset")
|
|
parser.add_argument("--aspect", choices=["landscape", "portrait", "square"],
|
|
default="landscape", help="Aspect ratio preset")
|
|
parser.add_argument("--workers", type=int, default=0, help="Override worker count (0=auto)")
|
|
parser.add_argument("--resolution", type=str, default="", help="Override resolution e.g. 1280x720")
|
|
args = parser.parse_args()
|
|
|
|
hw = detect_hardware()
|
|
if args.workers > 0:
|
|
hw["workers"] = args.workers
|
|
profile = quality_profile(hw, target_duration, args.quality)
|
|
|
|
# Apply aspect ratio preset (before manual resolution override)
|
|
ASPECT_PRESETS = {
|
|
"landscape": (1920, 1080),
|
|
"portrait": (1080, 1920),
|
|
"square": (1080, 1080),
|
|
}
|
|
if args.aspect != "landscape" and not args.resolution:
|
|
profile["vw"], profile["vh"] = ASPECT_PRESETS[args.aspect]
|
|
|
|
if args.resolution:
|
|
w, h = args.resolution.split("x")
|
|
profile["vw"], profile["vh"] = int(w), int(h)
|
|
apply_quality_profile(profile)
|
|
|
|
log(f"Hardware: {hw['cpu_count']} cores, {hw['mem_gb']:.1f}GB RAM, {hw['platform']}")
|
|
log(f"Render: {profile['vw']}x{profile['vh']} @{profile['fps']}fps, "
|
|
f"CRF {profile['crf']}, {profile['workers']} workers")
|
|
```
|
|
|
|
### Portrait Mode Considerations
|
|
|
|
Portrait (1080x1920) has the same pixel count as landscape 1080p, so performance is equivalent. But composition patterns differ:
|
|
|
|
| Concern | Landscape | Portrait |
|
|
|---------|-----------|----------|
|
|
| Grid cols at `lg` | 160 | 90 |
|
|
| Grid rows at `lg` | 45 | 80 |
|
|
| Max text line chars | ~50 centered | ~25-30 centered |
|
|
| Vertical rain | Short travel | Long, dramatic travel |
|
|
| Horizontal spectrum | Full width | Needs rotation or compression |
|
|
| Radial effects | Natural circles | Tall ellipses (aspect correction handles this) |
|
|
| Particle explosions | Wide spread | Tall spread |
|
|
| Text stacking | 3-4 lines comfortable | 8-10 lines comfortable |
|
|
| Quote layout | 2-3 wide lines | 5-6 short lines |
|
|
|
|
**Portrait-optimized patterns:**
|
|
- Vertical rain/matrix effects are naturally enhanced — longer column travel
|
|
- Fire columns rise through more screen space
|
|
- Rising embers/particles have more vertical runway
|
|
- Text can be stacked more aggressively with more lines
|
|
- Radial effects work if aspect correction is applied (GridLayer handles this automatically)
|
|
- Spectrum bars can be rotated 90 degrees (vertical bars from bottom)
|
|
|
|
**Portrait text layout:**
|
|
```python
|
|
def layout_text_portrait(text, max_chars_per_line=25, grid=None):
|
|
"""Break text into short lines for portrait display."""
|
|
words = text.split()
|
|
lines = []; current = ""
|
|
for w in words:
|
|
if len(current) + len(w) + 1 > max_chars_per_line:
|
|
lines.append(current.strip())
|
|
current = w + " "
|
|
else:
|
|
current += w + " "
|
|
if current.strip():
|
|
lines.append(current.strip())
|
|
return lines
|
|
```
|
|
|
|
## Performance Budget
|
|
|
|
Target: 100-200ms per frame (5-10 fps single-threaded, 40-80 fps across 8 workers).
|
|
|
|
| Component | Time | Notes |
|
|
|-----------|------|-------|
|
|
| Feature extraction | 1-5ms | Pre-computed for all frames before render |
|
|
| Effect function | 2-15ms | Vectorized numpy, avoid Python loops |
|
|
| Character render | 80-150ms | **Bottleneck** -- per-cell Python loop |
|
|
| Shader pipeline | 5-25ms | Depends on active shaders |
|
|
| ffmpeg encode | ~5ms | Amortized by pipe buffering |
|
|
|
|
## Bitmap Pre-Rasterization
|
|
|
|
Rasterize every character at init, not per-frame:
|
|
|
|
```python
|
|
# At init time -- done once
|
|
for c in all_characters:
|
|
img = Image.new("L", (cell_w, cell_h), 0)
|
|
ImageDraw.Draw(img).text((0, 0), c, fill=255, font=font)
|
|
bitmaps[c] = np.array(img, dtype=np.float32) / 255.0 # float32 for fast multiply
|
|
|
|
# At render time -- fast lookup
|
|
bitmap = bitmaps[char]
|
|
canvas[y:y+ch, x:x+cw] = np.maximum(canvas[y:y+ch, x:x+cw],
|
|
(bitmap[:,:,None] * color).astype(np.uint8))
|
|
```
|
|
|
|
Collect all characters from all palettes + overlay text into the init set. Lazy-init for any missed characters.
|
|
|
|
## Pre-Rendered Background Textures
|
|
|
|
Alternative to `_render_vf()` for backgrounds where characters don't need to change every frame. Pre-bake a static ASCII texture once at init, then multiply by a per-cell color field each frame. One matrix multiply vs thousands of bitmap blits.
|
|
|
|
Use when: background layer uses a fixed character palette and only color/brightness varies per frame. NOT suitable for layers where character selection depends on a changing value field.
|
|
|
|
### Init: Bake the Texture
|
|
|
|
```python
|
|
# In GridLayer.__init__:
|
|
self._bg_row_idx = np.clip(
|
|
(np.arange(VH) - self.oy) // self.ch, 0, self.rows - 1
|
|
)
|
|
self._bg_col_idx = np.clip(
|
|
(np.arange(VW) - self.ox) // self.cw, 0, self.cols - 1
|
|
)
|
|
self._bg_textures = {}
|
|
|
|
def make_bg_texture(self, palette):
|
|
"""Pre-render a static ASCII texture (grayscale float32) once."""
|
|
if palette not in self._bg_textures:
|
|
texture = np.zeros((VH, VW), dtype=np.float32)
|
|
rng = random.Random(12345)
|
|
ch_list = [c for c in palette if c != " " and c in self.bm]
|
|
if not ch_list:
|
|
ch_list = list(self.bm.keys())[:5]
|
|
for row in range(self.rows):
|
|
y = self.oy + row * self.ch
|
|
if y + self.ch > VH:
|
|
break
|
|
for col in range(self.cols):
|
|
x = self.ox + col * self.cw
|
|
if x + self.cw > VW:
|
|
break
|
|
bm = self.bm[rng.choice(ch_list)]
|
|
texture[y:y+self.ch, x:x+self.cw] = bm
|
|
self._bg_textures[palette] = texture
|
|
return self._bg_textures[palette]
|
|
```
|
|
|
|
### Render: Color Field x Cached Texture
|
|
|
|
```python
|
|
def render_bg(self, color_field, palette=PAL_CIRCUIT):
|
|
"""Fast background: pre-rendered ASCII texture * per-cell color field.
|
|
color_field: (rows, cols, 3) uint8. Returns (VH, VW, 3) uint8."""
|
|
texture = self.make_bg_texture(palette)
|
|
# Expand cell colors to pixel coords via pre-computed index maps
|
|
color_px = color_field[
|
|
self._bg_row_idx[:, None], self._bg_col_idx[None, :]
|
|
].astype(np.float32)
|
|
return (texture[:, :, None] * color_px).astype(np.uint8)
|
|
```
|
|
|
|
### Usage in a Scene
|
|
|
|
```python
|
|
# Build per-cell color from effect fields (cheap — rows*cols, not VH*VW)
|
|
hue = ((t * 0.05 + val * 0.2) % 1.0).astype(np.float32)
|
|
R, G, B = hsv2rgb(hue, np.full_like(val, 0.5), val)
|
|
color_field = mkc(R, G, B, g.rows, g.cols) # (rows, cols, 3) uint8
|
|
|
|
# Render background — single matrix multiply, no per-cell loop
|
|
canvas_bg = g.render_bg(color_field, PAL_DENSE)
|
|
```
|
|
|
|
The texture init loop runs once and is cached per palette. Per-frame cost is one fancy-index lookup + one broadcast multiply — orders of magnitude faster than the per-cell bitmap blit loop in `render()` for dense backgrounds.
|
|
|
|
## Coordinate Array Caching
|
|
|
|
Pre-compute all grid-relative coordinate arrays at init, not per-frame:
|
|
|
|
```python
|
|
# These are O(rows*cols) and used in every effect
|
|
self.rr = np.arange(rows)[:, None] # row indices
|
|
self.cc = np.arange(cols)[None, :] # col indices
|
|
self.dist = np.sqrt(dx**2 + dy**2) # distance from center
|
|
self.angle = np.arctan2(dy, dx) # angle from center
|
|
self.dist_n = ... # normalized distance
|
|
```
|
|
|
|
## Vectorized Effect Patterns
|
|
|
|
### Avoid Per-Cell Python Loops in Effects
|
|
|
|
The render loop (compositing bitmaps) is unavoidably per-cell. But effect functions must be fully vectorized numpy -- never iterate over rows/cols in Python.
|
|
|
|
Bad (O(rows*cols) Python loop):
|
|
```python
|
|
for r in range(rows):
|
|
for c in range(cols):
|
|
val[r, c] = math.sin(c * 0.1 + t) * math.cos(r * 0.1 - t)
|
|
```
|
|
|
|
Good (vectorized):
|
|
```python
|
|
val = np.sin(g.cc * 0.1 + t) * np.cos(g.rr * 0.1 - t)
|
|
```
|
|
|
|
### Vectorized Matrix Rain
|
|
|
|
The naive per-column per-trail-pixel loop is the second biggest bottleneck after the render loop. Use numpy fancy indexing:
|
|
|
|
```python
|
|
# Instead of nested Python loops over columns and trail pixels:
|
|
# Build row index arrays for all active trail pixels at once
|
|
all_rows = []
|
|
all_cols = []
|
|
all_fades = []
|
|
for c in range(cols):
|
|
head = int(S["ry"][c])
|
|
trail_len = S["rln"][c]
|
|
for i in range(trail_len):
|
|
row = head - i
|
|
if 0 <= row < rows:
|
|
all_rows.append(row)
|
|
all_cols.append(c)
|
|
all_fades.append(1.0 - i / trail_len)
|
|
|
|
# Vectorized assignment
|
|
ar = np.array(all_rows)
|
|
ac = np.array(all_cols)
|
|
af = np.array(all_fades, dtype=np.float32)
|
|
# Assign chars and colors in bulk using fancy indexing
|
|
ch[ar, ac] = ... # vectorized char assignment
|
|
co[ar, ac, 1] = (af * bri * 255).astype(np.uint8) # green channel
|
|
```
|
|
|
|
### Vectorized Fire Columns
|
|
|
|
Same pattern -- accumulate index arrays, assign in bulk:
|
|
|
|
```python
|
|
fire_val = np.zeros((rows, cols), dtype=np.float32)
|
|
for fi in range(n_cols):
|
|
fx_c = int((fi * cols / n_cols + np.sin(t * 2 + fi * 0.7) * 3) % cols)
|
|
height = int(energy * rows * 0.7)
|
|
dy = np.arange(min(height, rows))
|
|
fr = rows - 1 - dy
|
|
frac = dy / max(height, 1)
|
|
# Width spread: base columns wider at bottom
|
|
for dx in range(-1, 2): # 3-wide columns
|
|
c = fx_c + dx
|
|
if 0 <= c < cols:
|
|
fire_val[fr, c] = np.maximum(fire_val[fr, c],
|
|
(1 - frac * 0.6) * (0.5 + rms * 0.5))
|
|
# Now map fire_val to chars and colors in one vectorized pass
|
|
```
|
|
|
|
## PIL String Rendering for Text-Heavy Scenes
|
|
|
|
Alternative to per-cell bitmap blitting when rendering many long text strings (scrolling tickers, typewriter sequences, idea floods). Uses PIL's native `ImageDraw.text()` which renders an entire string in one C call, vs one Python-loop bitmap blit per character.
|
|
|
|
Typical win: a scene with 56 ticker rows renders 56 PIL `text()` calls instead of ~10K individual bitmap blits.
|
|
|
|
Use when: scene renders many rows of readable text strings. NOT suitable for sparse or spatially-scattered single characters (use normal `render()` for those).
|
|
|
|
```python
|
|
from PIL import Image, ImageDraw
|
|
|
|
def render_text_layer(grid, rows_data, font):
|
|
"""Render dense text rows via PIL instead of per-cell bitmap blitting.
|
|
|
|
Args:
|
|
grid: GridLayer instance (for oy, ch, ox, font metrics)
|
|
rows_data: list of (row_index, text_string, rgb_tuple) — one per row
|
|
font: PIL ImageFont instance (grid.font)
|
|
|
|
Returns:
|
|
uint8 array (VH, VW, 3) — canvas with rendered text
|
|
"""
|
|
img = Image.new("RGB", (VW, VH), (0, 0, 0))
|
|
draw = ImageDraw.Draw(img)
|
|
for row_idx, text, color in rows_data:
|
|
y = grid.oy + row_idx * grid.ch
|
|
if y + grid.ch > VH:
|
|
break
|
|
draw.text((grid.ox, y), text, fill=color, font=font)
|
|
return np.array(img)
|
|
```
|
|
|
|
### Usage in a Ticker Scene
|
|
|
|
```python
|
|
# Build ticker data (text + color per row)
|
|
rows_data = []
|
|
for row in range(n_tickers):
|
|
text = build_ticker_text(row, t) # scrolling substring
|
|
color = hsv2rgb_scalar(hue, 0.85, bri) # (R, G, B) tuple
|
|
rows_data.append((row, text, color))
|
|
|
|
# One PIL pass instead of thousands of bitmap blits
|
|
canvas_tickers = render_text_layer(g_md, rows_data, g_md.font)
|
|
|
|
# Blend with other layers normally
|
|
result = blend_canvas(canvas_bg, canvas_tickers, "screen", 0.9)
|
|
```
|
|
|
|
This is purely a rendering optimization — same visual output, fewer draw calls. The grid's `render()` method is still needed for sparse character fields where characters are placed individually based on value fields.
|
|
|
|
## Bloom Optimization
|
|
|
|
**Do NOT use `scipy.ndimage.uniform_filter`** -- measured at 424ms/frame.
|
|
|
|
Use 4x downsample + manual box blur instead -- 84ms/frame (5x faster):
|
|
|
|
```python
|
|
sm = canvas[::4, ::4].astype(np.float32) # 4x downsample
|
|
br = np.where(sm > threshold, sm, 0)
|
|
for _ in range(3): # 3-pass manual box blur
|
|
p = np.pad(br, ((1,1),(1,1),(0,0)), mode='edge')
|
|
br = (p[:-2,:-2] + p[:-2,1:-1] + p[:-2,2:] +
|
|
p[1:-1,:-2] + p[1:-1,1:-1] + p[1:-1,2:] +
|
|
p[2:,:-2] + p[2:,1:-1] + p[2:,2:]) / 9.0
|
|
bl = np.repeat(np.repeat(br, 4, axis=0), 4, axis=1)[:H, :W]
|
|
```
|
|
|
|
## Vignette Caching
|
|
|
|
Distance field is resolution- and strength-dependent, never changes per frame:
|
|
|
|
```python
|
|
_vig_cache = {}
|
|
def sh_vignette(canvas, strength):
|
|
key = (canvas.shape[0], canvas.shape[1], round(strength, 2))
|
|
if key not in _vig_cache:
|
|
Y = np.linspace(-1, 1, H)[:, None]
|
|
X = np.linspace(-1, 1, W)[None, :]
|
|
_vig_cache[key] = np.clip(1.0 - np.sqrt(X**2+Y**2) * strength, 0.15, 1).astype(np.float32)
|
|
return np.clip(canvas * _vig_cache[key][:,:,None], 0, 255).astype(np.uint8)
|
|
```
|
|
|
|
Same pattern for CRT barrel distortion (cache remap coordinates).
|
|
|
|
## Film Grain Optimization
|
|
|
|
Generate noise at half resolution, tile up:
|
|
|
|
```python
|
|
noise = np.random.randint(-amt, amt+1, (H//2, W//2, 1), dtype=np.int16)
|
|
noise = np.repeat(np.repeat(noise, 2, axis=0), 2, axis=1)[:H, :W]
|
|
```
|
|
|
|
2x blocky grain looks like film grain and costs 1/4 the random generation.
|
|
|
|
## Parallel Rendering
|
|
|
|
### Worker Architecture
|
|
|
|
```python
|
|
hw = detect_hardware()
|
|
N_WORKERS = hw["workers"]
|
|
|
|
# Batch splitting (for non-clip architectures)
|
|
batch_size = (n_frames + N_WORKERS - 1) // N_WORKERS
|
|
batches = [(i, i*batch_size, min((i+1)*batch_size, n_frames), features, seg_path) ...]
|
|
|
|
with multiprocessing.Pool(N_WORKERS) as pool:
|
|
segments = pool.starmap(render_batch, batches)
|
|
```
|
|
|
|
### Per-Clip Parallelism (Preferred for Segmented Videos)
|
|
|
|
```python
|
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
|
|
with ProcessPoolExecutor(max_workers=N_WORKERS) as pool:
|
|
futures = {pool.submit(render_clip, seg, features, path): seg["id"]
|
|
for seg, path in clip_args}
|
|
for fut in as_completed(futures):
|
|
clip_id = futures[fut]
|
|
try:
|
|
fut.result()
|
|
log(f" {clip_id} done")
|
|
except Exception as e:
|
|
log(f" {clip_id} FAILED: {e}")
|
|
```
|
|
|
|
### Worker Isolation
|
|
|
|
Each worker:
|
|
- Creates its own `Renderer` instance (with full grid + bitmap init)
|
|
- Opens its own ffmpeg subprocess
|
|
- Has independent random seed (`random.seed(batch_id * 10000)`)
|
|
- Writes to its own segment file and stderr log
|
|
|
|
### ffmpeg Pipe Safety
|
|
|
|
**CRITICAL**: Never `stderr=subprocess.PIPE` with long-running ffmpeg. The stderr buffer fills at ~64KB and deadlocks:
|
|
|
|
```python
|
|
# WRONG -- will deadlock
|
|
pipe = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
|
|
# RIGHT -- stderr to file
|
|
stderr_fh = open(err_path, "w")
|
|
pipe = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=stderr_fh)
|
|
# ... write all frames ...
|
|
pipe.stdin.close()
|
|
pipe.wait()
|
|
stderr_fh.close()
|
|
```
|
|
|
|
### Concatenation
|
|
|
|
```python
|
|
with open(concat_file, "w") as cf:
|
|
for seg in segments:
|
|
cf.write(f"file '{seg}'\n")
|
|
|
|
cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", concat_file]
|
|
if audio_path:
|
|
cmd += ["-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-b:a", "192k", "-shortest"]
|
|
else:
|
|
cmd += ["-c:v", "copy"]
|
|
cmd.append(output_path)
|
|
subprocess.run(cmd, capture_output=True, check=True)
|
|
```
|
|
|
|
## Particle System Performance
|
|
|
|
Cap particle counts based on quality profile:
|
|
|
|
| System | Low | Standard | High |
|
|
|--------|-----|----------|------|
|
|
| Explosion | 300 | 1000 | 2500 |
|
|
| Embers | 500 | 1500 | 3000 |
|
|
| Starfield | 300 | 800 | 1500 |
|
|
| Dissolve | 200 | 600 | 1200 |
|
|
|
|
Cull by truncating lists:
|
|
```python
|
|
MAX_PARTICLES = profile.get("particles_max", 1200)
|
|
if len(S["px"]) > MAX_PARTICLES:
|
|
for k in ("px", "py", "vx", "vy", "life", "char"):
|
|
S[k] = S[k][-MAX_PARTICLES:] # keep newest
|
|
```
|
|
|
|
## Memory Management
|
|
|
|
- Feature arrays: pre-computed for all frames, shared across workers via fork semantics (COW)
|
|
- Canvas: allocated once per worker, reused (`np.zeros(...)`)
|
|
- Character arrays: allocated per frame (cheap -- rows*cols U1 strings)
|
|
- Bitmap cache: ~500KB per grid size, initialized once per worker
|
|
|
|
Total memory per worker: ~50-150MB. Total: ~400-800MB for 8 workers.
|
|
|
|
For low-memory systems (< 4GB), reduce worker count and use smaller grids.
|
|
|
|
## Brightness Verification
|
|
|
|
After render, spot-check brightness at sample timestamps:
|
|
|
|
```python
|
|
for t in [2, 30, 60, 120, 180]:
|
|
cmd = ["ffmpeg", "-ss", str(t), "-i", output_path,
|
|
"-frames:v", "1", "-f", "rawvideo", "-pix_fmt", "rgb24", "-"]
|
|
r = subprocess.run(cmd, capture_output=True)
|
|
arr = np.frombuffer(r.stdout, dtype=np.uint8)
|
|
print(f"t={t}s mean={arr.mean():.1f} max={arr.max()}")
|
|
```
|
|
|
|
Target: mean > 5 for quiet sections, mean > 15 for active sections. If consistently below, increase brightness floor in effects and/or global boost multiplier.
|
|
|
|
## Render Time Estimates
|
|
|
|
Scale with hardware. Baseline: 1080p, 24fps, ~180ms/frame/worker.
|
|
|
|
| Duration | Frames | 4 workers | 8 workers | 16 workers |
|
|
|----------|--------|-----------|-----------|------------|
|
|
| 30s | 720 | ~3 min | ~2 min | ~1 min |
|
|
| 2 min | 2,880 | ~13 min | ~7 min | ~4 min |
|
|
| 3.5 min | 5,040 | ~23 min | ~12 min | ~6 min |
|
|
| 5 min | 7,200 | ~33 min | ~17 min | ~9 min |
|
|
| 10 min | 14,400 | ~65 min | ~33 min | ~17 min |
|
|
|
|
At 720p: multiply times by ~0.5. At 4K: multiply by ~4.
|
|
|
|
Heavier effects (many particles, dense grids, extra shader passes) add ~20-50%.
|
|
|
|
---
|
|
|
|
## Temp File Cleanup
|
|
|
|
Rendering generates intermediate files that accumulate across runs. Clean up after the final concat/mux step.
|
|
|
|
### Files to Clean
|
|
|
|
| File type | Source | Location |
|
|
|-----------|--------|----------|
|
|
| WAV extracts | `ffmpeg -i input.mp3 ... tmp.wav` | `tempfile.mktemp()` or project dir |
|
|
| Segment clips | `render_clip()` output | `segments/seg_00.mp4` etc. |
|
|
| Concat list | ffmpeg concat demuxer input | `segments/concat.txt` |
|
|
| ffmpeg stderr logs | piped to file for debugging | `*.log` in project dir |
|
|
| Feature cache | pickled numpy arrays | `*.pkl` or `*.npz` |
|
|
|
|
### Cleanup Function
|
|
|
|
```python
|
|
import glob
|
|
import tempfile
|
|
import shutil
|
|
|
|
def cleanup_render_artifacts(segments_dir="segments", keep_final=True):
|
|
"""Remove intermediate files after successful render.
|
|
|
|
Call this AFTER verifying the final output exists and plays correctly.
|
|
|
|
Args:
|
|
segments_dir: directory containing segment clips and concat list
|
|
keep_final: if True, only delete intermediates (not the final output)
|
|
"""
|
|
removed = []
|
|
|
|
# 1. Segment clips
|
|
if os.path.isdir(segments_dir):
|
|
shutil.rmtree(segments_dir)
|
|
removed.append(f"directory: {segments_dir}")
|
|
|
|
# 2. Temporary WAV files
|
|
for wav in glob.glob("*.wav"):
|
|
if wav.startswith("tmp") or wav.startswith("extracted_"):
|
|
os.remove(wav)
|
|
removed.append(wav)
|
|
|
|
# 3. ffmpeg stderr logs
|
|
for log in glob.glob("ffmpeg_*.log"):
|
|
os.remove(log)
|
|
removed.append(log)
|
|
|
|
# 4. Feature cache (optional — useful to keep for re-renders)
|
|
# for cache in glob.glob("features_*.npz"):
|
|
# os.remove(cache)
|
|
# removed.append(cache)
|
|
|
|
print(f"Cleaned {len(removed)} artifacts: {removed}")
|
|
return removed
|
|
```
|
|
|
|
### Integration with Render Pipeline
|
|
|
|
Call cleanup at the end of the main render script, after the final output is verified:
|
|
|
|
```python
|
|
# At end of main()
|
|
if os.path.exists(output_path) and os.path.getsize(output_path) > 1000:
|
|
cleanup_render_artifacts(segments_dir="segments")
|
|
print(f"Done. Output: {output_path}")
|
|
else:
|
|
print("WARNING: final output missing or empty — skipping cleanup")
|
|
```
|
|
|
|
### Temp File Best Practices
|
|
|
|
- Use `tempfile.mkdtemp()` for segment directories — avoids polluting the project dir
|
|
- Name WAV extracts with `tempfile.mktemp(suffix=".wav")` so they're in the OS temp dir
|
|
- For debugging, set `KEEP_INTERMEDIATES=1` env var to skip cleanup
|
|
- Feature caches (`.npz`) are cheap to store and expensive to recompute — default to keeping them
|