557 lines
12 KiB
Markdown
557 lines
12 KiB
Markdown
|
|
# Hyperparameter Tuning with PyTorch Lightning
|
|||
|
|
|
|||
|
|
## Integration with Tuning Frameworks
|
|||
|
|
|
|||
|
|
Lightning integrates seamlessly with popular hyperparameter tuning libraries.
|
|||
|
|
|
|||
|
|
### 1. Ray Tune Integration
|
|||
|
|
|
|||
|
|
**Installation**:
|
|||
|
|
```bash
|
|||
|
|
pip install ray[tune]
|
|||
|
|
pip install lightning
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
**Basic Ray Tune example**:
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
import lightning as L
|
|||
|
|
from ray import tune
|
|||
|
|
from ray.tune.integration.pytorch_lightning import TuneReportCallback
|
|||
|
|
|
|||
|
|
class LitModel(L.LightningModule):
|
|||
|
|
def __init__(self, lr, batch_size):
|
|||
|
|
super().__init__()
|
|||
|
|
self.lr = lr
|
|||
|
|
self.batch_size = batch_size
|
|||
|
|
self.model = nn.Sequential(nn.Linear(10, 128), nn.ReLU(), nn.Linear(128, 1))
|
|||
|
|
|
|||
|
|
def training_step(self, batch, batch_idx):
|
|||
|
|
loss = self.model(batch).mean()
|
|||
|
|
self.log('train_loss', loss)
|
|||
|
|
return loss
|
|||
|
|
|
|||
|
|
def validation_step(self, batch, batch_idx):
|
|||
|
|
val_loss = self.model(batch).mean()
|
|||
|
|
self.log('val_loss', val_loss)
|
|||
|
|
|
|||
|
|
def configure_optimizers(self):
|
|||
|
|
return torch.optim.Adam(self.parameters(), lr=self.lr)
|
|||
|
|
|
|||
|
|
def train_fn(config):
|
|||
|
|
"""Training function for Ray Tune."""
|
|||
|
|
model = LitModel(lr=config["lr"], batch_size=config["batch_size"])
|
|||
|
|
|
|||
|
|
# Add callback to report metrics to Tune
|
|||
|
|
trainer = L.Trainer(
|
|||
|
|
max_epochs=10,
|
|||
|
|
callbacks=[TuneReportCallback({"loss": "val_loss"}, on="validation_end")]
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
trainer.fit(model, train_loader, val_loader)
|
|||
|
|
|
|||
|
|
# Define search space
|
|||
|
|
config = {
|
|||
|
|
"lr": tune.loguniform(1e-5, 1e-1),
|
|||
|
|
"batch_size": tune.choice([16, 32, 64, 128])
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# Run hyperparameter search
|
|||
|
|
analysis = tune.run(
|
|||
|
|
train_fn,
|
|||
|
|
config=config,
|
|||
|
|
num_samples=20, # 20 trials
|
|||
|
|
resources_per_trial={"gpu": 1}
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Best hyperparameters
|
|||
|
|
best_config = analysis.get_best_config(metric="loss", mode="min")
|
|||
|
|
print(f"Best config: {best_config}")
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
**Advanced: Population-Based Training (PBT)**:
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
from ray.tune.schedulers import PopulationBasedTraining
|
|||
|
|
|
|||
|
|
# PBT scheduler
|
|||
|
|
scheduler = PopulationBasedTraining(
|
|||
|
|
time_attr='training_iteration',
|
|||
|
|
metric='val_loss',
|
|||
|
|
mode='min',
|
|||
|
|
perturbation_interval=5, # Perturb every 5 epochs
|
|||
|
|
hyperparam_mutations={
|
|||
|
|
"lr": tune.loguniform(1e-5, 1e-1),
|
|||
|
|
"batch_size": [16, 32, 64, 128]
|
|||
|
|
}
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
analysis = tune.run(
|
|||
|
|
train_fn,
|
|||
|
|
config=config,
|
|||
|
|
num_samples=8, # Population size
|
|||
|
|
scheduler=scheduler,
|
|||
|
|
resources_per_trial={"gpu": 1}
|
|||
|
|
)
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 2. Optuna Integration
|
|||
|
|
|
|||
|
|
**Installation**:
|
|||
|
|
```bash
|
|||
|
|
pip install optuna
|
|||
|
|
pip install optuna-integration
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
**Optuna example**:
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
import optuna
|
|||
|
|
from optuna.integration import PyTorchLightningPruningCallback
|
|||
|
|
|
|||
|
|
def objective(trial):
|
|||
|
|
# Suggest hyperparameters
|
|||
|
|
lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
|
|||
|
|
batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])
|
|||
|
|
n_layers = trial.suggest_int('n_layers', 1, 3)
|
|||
|
|
hidden_size = trial.suggest_int('hidden_size', 64, 512, step=64)
|
|||
|
|
|
|||
|
|
# Create model
|
|||
|
|
model = LitModel(lr=lr, n_layers=n_layers, hidden_size=hidden_size)
|
|||
|
|
|
|||
|
|
# Pruning callback (early stopping for bad trials)
|
|||
|
|
pruning_callback = PyTorchLightningPruningCallback(trial, monitor="val_loss")
|
|||
|
|
|
|||
|
|
trainer = L.Trainer(
|
|||
|
|
max_epochs=20,
|
|||
|
|
callbacks=[pruning_callback],
|
|||
|
|
enable_progress_bar=False,
|
|||
|
|
logger=False
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
trainer.fit(model, train_loader, val_loader)
|
|||
|
|
|
|||
|
|
return trainer.callback_metrics["val_loss"].item()
|
|||
|
|
|
|||
|
|
# Create study
|
|||
|
|
study = optuna.create_study(
|
|||
|
|
direction='minimize',
|
|||
|
|
pruner=optuna.pruners.MedianPruner() # Prune bad trials early
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Optimize
|
|||
|
|
study.optimize(objective, n_trials=50, timeout=3600)
|
|||
|
|
|
|||
|
|
# Best params
|
|||
|
|
print(f"Best trial: {study.best_trial.params}")
|
|||
|
|
print(f"Best value: {study.best_value}")
|
|||
|
|
|
|||
|
|
# Visualization
|
|||
|
|
optuna.visualization.plot_optimization_history(study).show()
|
|||
|
|
optuna.visualization.plot_param_importances(study).show()
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
**Optuna with distributed training**:
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
import optuna
|
|||
|
|
|
|||
|
|
# Shared database for distributed optimization
|
|||
|
|
storage = optuna.storages.RDBStorage(
|
|||
|
|
url='postgresql://user:pass@localhost/optuna'
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
study = optuna.create_study(
|
|||
|
|
study_name='distributed_study',
|
|||
|
|
storage=storage,
|
|||
|
|
load_if_exists=True,
|
|||
|
|
direction='minimize'
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Run on multiple machines
|
|||
|
|
study.optimize(objective, n_trials=50)
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 3. Weights & Biases (WandB) Sweeps
|
|||
|
|
|
|||
|
|
**Installation**:
|
|||
|
|
```bash
|
|||
|
|
pip install wandb
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
**WandB sweep config** (`sweep.yaml`):
|
|||
|
|
```yaml
|
|||
|
|
program: train.py
|
|||
|
|
method: bayes
|
|||
|
|
metric:
|
|||
|
|
name: val_loss
|
|||
|
|
goal: minimize
|
|||
|
|
parameters:
|
|||
|
|
lr:
|
|||
|
|
distribution: log_uniform_values
|
|||
|
|
min: 0.00001
|
|||
|
|
max: 0.1
|
|||
|
|
batch_size:
|
|||
|
|
values: [16, 32, 64, 128]
|
|||
|
|
optimizer:
|
|||
|
|
values: ['adam', 'sgd', 'adamw']
|
|||
|
|
dropout:
|
|||
|
|
distribution: uniform
|
|||
|
|
min: 0.0
|
|||
|
|
max: 0.5
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
**Training script** (`train.py`):
|
|||
|
|
```python
|
|||
|
|
import wandb
|
|||
|
|
import lightning as L
|
|||
|
|
from lightning.pytorch.loggers import WandbLogger
|
|||
|
|
|
|||
|
|
def train():
|
|||
|
|
# Initialize wandb
|
|||
|
|
wandb.init()
|
|||
|
|
config = wandb.config
|
|||
|
|
|
|||
|
|
# Create model with sweep params
|
|||
|
|
model = LitModel(
|
|||
|
|
lr=config.lr,
|
|||
|
|
batch_size=config.batch_size,
|
|||
|
|
optimizer=config.optimizer,
|
|||
|
|
dropout=config.dropout
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# WandB logger
|
|||
|
|
wandb_logger = WandbLogger(project='hyperparameter-sweep')
|
|||
|
|
|
|||
|
|
trainer = L.Trainer(
|
|||
|
|
max_epochs=20,
|
|||
|
|
logger=wandb_logger
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
trainer.fit(model, train_loader, val_loader)
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
train()
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
**Launch sweep**:
|
|||
|
|
```bash
|
|||
|
|
# Initialize sweep
|
|||
|
|
wandb sweep sweep.yaml
|
|||
|
|
# Output: wandb: Created sweep with ID: abc123
|
|||
|
|
|
|||
|
|
# Run agent (can run on multiple machines)
|
|||
|
|
wandb agent your-entity/your-project/abc123
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 4. Hyperopt Integration
|
|||
|
|
|
|||
|
|
**Installation**:
|
|||
|
|
```bash
|
|||
|
|
pip install hyperopt
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
**Hyperopt example**:
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
from hyperopt import hp, fmin, tpe, Trials
|
|||
|
|
|
|||
|
|
def objective(params):
|
|||
|
|
model = LitModel(
|
|||
|
|
lr=params['lr'],
|
|||
|
|
batch_size=int(params['batch_size']),
|
|||
|
|
hidden_size=int(params['hidden_size'])
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
trainer = L.Trainer(
|
|||
|
|
max_epochs=10,
|
|||
|
|
enable_progress_bar=False,
|
|||
|
|
logger=False
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
trainer.fit(model, train_loader, val_loader)
|
|||
|
|
|
|||
|
|
# Return loss (minimize)
|
|||
|
|
return trainer.callback_metrics["val_loss"].item()
|
|||
|
|
|
|||
|
|
# Define search space
|
|||
|
|
space = {
|
|||
|
|
'lr': hp.loguniform('lr', np.log(1e-5), np.log(1e-1)),
|
|||
|
|
'batch_size': hp.quniform('batch_size', 16, 128, 16),
|
|||
|
|
'hidden_size': hp.quniform('hidden_size', 64, 512, 64)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# Optimize
|
|||
|
|
trials = Trials()
|
|||
|
|
best = fmin(
|
|||
|
|
fn=objective,
|
|||
|
|
space=space,
|
|||
|
|
algo=tpe.suggest, # Tree-structured Parzen Estimator
|
|||
|
|
max_evals=50,
|
|||
|
|
trials=trials
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
print(f"Best hyperparameters: {best}")
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## Built-In Lightning Tuning
|
|||
|
|
|
|||
|
|
### Auto Learning Rate Finder
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
class LitModel(L.LightningModule):
|
|||
|
|
def __init__(self, lr=1e-3):
|
|||
|
|
super().__init__()
|
|||
|
|
self.lr = lr
|
|||
|
|
self.model = nn.Linear(10, 1)
|
|||
|
|
|
|||
|
|
def configure_optimizers(self):
|
|||
|
|
return torch.optim.Adam(self.parameters(), lr=self.lr)
|
|||
|
|
|
|||
|
|
def training_step(self, batch, batch_idx):
|
|||
|
|
loss = self.model(batch).mean()
|
|||
|
|
return loss
|
|||
|
|
|
|||
|
|
# Find optimal learning rate
|
|||
|
|
model = LitModel()
|
|||
|
|
trainer = L.Trainer(auto_lr_find=True)
|
|||
|
|
|
|||
|
|
# This runs LR finder before training
|
|||
|
|
trainer.tune(model, train_loader)
|
|||
|
|
|
|||
|
|
# Or manually
|
|||
|
|
from lightning.pytorch.tuner import Tuner
|
|||
|
|
tuner = Tuner(trainer)
|
|||
|
|
lr_finder = tuner.lr_find(model, train_loader)
|
|||
|
|
|
|||
|
|
# Plot results
|
|||
|
|
fig = lr_finder.plot(suggest=True)
|
|||
|
|
fig.show()
|
|||
|
|
|
|||
|
|
# Get suggested LR
|
|||
|
|
suggested_lr = lr_finder.suggestion()
|
|||
|
|
print(f"Suggested LR: {suggested_lr}")
|
|||
|
|
|
|||
|
|
# Update model
|
|||
|
|
model.lr = suggested_lr
|
|||
|
|
|
|||
|
|
# Train with optimal LR
|
|||
|
|
trainer.fit(model, train_loader)
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### Auto Batch Size Finder
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
class LitModel(L.LightningModule):
|
|||
|
|
def __init__(self, batch_size=32):
|
|||
|
|
super().__init__()
|
|||
|
|
self.batch_size = batch_size
|
|||
|
|
self.model = nn.Linear(10, 1)
|
|||
|
|
|
|||
|
|
def train_dataloader(self):
|
|||
|
|
return DataLoader(dataset, batch_size=self.batch_size)
|
|||
|
|
|
|||
|
|
model = LitModel()
|
|||
|
|
trainer = L.Trainer(auto_scale_batch_size='binsearch')
|
|||
|
|
|
|||
|
|
# Find optimal batch size
|
|||
|
|
trainer.tune(model)
|
|||
|
|
|
|||
|
|
print(f"Optimal batch size: {model.batch_size}")
|
|||
|
|
|
|||
|
|
# Train with optimal batch size
|
|||
|
|
trainer.fit(model, train_loader)
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## Advanced Tuning Strategies
|
|||
|
|
|
|||
|
|
### 1. Multi-Fidelity Optimization (Successive Halving)
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
from ray.tune.schedulers import ASHAScheduler
|
|||
|
|
|
|||
|
|
# ASHA: Asynchronous Successive Halving Algorithm
|
|||
|
|
scheduler = ASHAScheduler(
|
|||
|
|
max_t=100, # Max epochs
|
|||
|
|
grace_period=10, # Min epochs before stopping
|
|||
|
|
reduction_factor=2 # Halve resources each round
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
analysis = tune.run(
|
|||
|
|
train_fn,
|
|||
|
|
config=config,
|
|||
|
|
num_samples=64,
|
|||
|
|
scheduler=scheduler,
|
|||
|
|
resources_per_trial={"gpu": 1}
|
|||
|
|
)
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
**How it works**:
|
|||
|
|
- Start 64 trials
|
|||
|
|
- After 10 epochs, stop bottom 50% (32 trials remain)
|
|||
|
|
- After 20 epochs, stop bottom 50% (16 trials remain)
|
|||
|
|
- After 40 epochs, stop bottom 50% (8 trials remain)
|
|||
|
|
- After 80 epochs, stop bottom 50% (4 trials remain)
|
|||
|
|
- Run remaining 4 trials to completion (100 epochs)
|
|||
|
|
|
|||
|
|
### 2. Bayesian Optimization
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
from ray.tune.search.bayesopt import BayesOptSearch
|
|||
|
|
|
|||
|
|
search = BayesOptSearch(
|
|||
|
|
metric="val_loss",
|
|||
|
|
mode="min"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
analysis = tune.run(
|
|||
|
|
train_fn,
|
|||
|
|
config=config,
|
|||
|
|
num_samples=50,
|
|||
|
|
search_alg=search,
|
|||
|
|
resources_per_trial={"gpu": 1}
|
|||
|
|
)
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 3. Grid Search
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
from ray import tune
|
|||
|
|
|
|||
|
|
# Exhaustive grid search
|
|||
|
|
config = {
|
|||
|
|
"lr": tune.grid_search([1e-5, 1e-4, 1e-3, 1e-2]),
|
|||
|
|
"batch_size": tune.grid_search([16, 32, 64, 128]),
|
|||
|
|
"optimizer": tune.grid_search(['adam', 'sgd', 'adamw'])
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# Total trials: 4 × 4 × 3 = 48
|
|||
|
|
analysis = tune.run(train_fn, config=config)
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 4. Random Search
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
config = {
|
|||
|
|
"lr": tune.loguniform(1e-5, 1e-1),
|
|||
|
|
"batch_size": tune.choice([16, 32, 64, 128]),
|
|||
|
|
"dropout": tune.uniform(0.0, 0.5),
|
|||
|
|
"hidden_size": tune.randint(64, 512)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# Random sampling
|
|||
|
|
analysis = tune.run(
|
|||
|
|
train_fn,
|
|||
|
|
config=config,
|
|||
|
|
num_samples=100 # 100 random samples
|
|||
|
|
)
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## Best Practices
|
|||
|
|
|
|||
|
|
### 1. Start Simple
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
# Phase 1: Coarse search (fast)
|
|||
|
|
coarse_config = {
|
|||
|
|
"lr": tune.loguniform(1e-5, 1e-1),
|
|||
|
|
"batch_size": tune.choice([32, 64])
|
|||
|
|
}
|
|||
|
|
coarse_analysis = tune.run(train_fn, config=coarse_config, num_samples=10, max_epochs=5)
|
|||
|
|
|
|||
|
|
# Phase 2: Fine-tune around best (slow)
|
|||
|
|
best_lr = coarse_analysis.best_config["lr"]
|
|||
|
|
fine_config = {
|
|||
|
|
"lr": tune.uniform(best_lr * 0.5, best_lr * 2),
|
|||
|
|
"batch_size": tune.choice([16, 32, 64, 128])
|
|||
|
|
}
|
|||
|
|
fine_analysis = tune.run(train_fn, config=fine_config, num_samples=20, max_epochs=20)
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 2. Use Checkpointing
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
def train_fn(config, checkpoint_dir=None):
|
|||
|
|
model = LitModel(lr=config["lr"])
|
|||
|
|
|
|||
|
|
trainer = L.Trainer(
|
|||
|
|
max_epochs=100,
|
|||
|
|
callbacks=[
|
|||
|
|
TuneReportCheckpointCallback(
|
|||
|
|
metrics={"loss": "val_loss"},
|
|||
|
|
filename="checkpoint",
|
|||
|
|
on="validation_end"
|
|||
|
|
)
|
|||
|
|
]
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Resume from checkpoint if exists
|
|||
|
|
ckpt_path = None
|
|||
|
|
if checkpoint_dir:
|
|||
|
|
ckpt_path = os.path.join(checkpoint_dir, "checkpoint")
|
|||
|
|
|
|||
|
|
trainer.fit(model, train_loader, val_loader, ckpt_path=ckpt_path)
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 3. Monitor Resource Usage
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
import GPUtil
|
|||
|
|
|
|||
|
|
def train_fn(config):
|
|||
|
|
# Before training
|
|||
|
|
GPUs = GPUtil.getGPUs()
|
|||
|
|
print(f"GPU memory before: {GPUs[0].memoryUsed} MB")
|
|||
|
|
|
|||
|
|
# Train
|
|||
|
|
model = LitModel(lr=config["lr"], batch_size=config["batch_size"])
|
|||
|
|
trainer.fit(model, train_loader)
|
|||
|
|
|
|||
|
|
# After training
|
|||
|
|
GPUs = GPUtil.getGPUs()
|
|||
|
|
print(f"GPU memory after: {GPUs[0].memoryUsed} MB")
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## Common Issues
|
|||
|
|
|
|||
|
|
### Issue: Trials Running Out of Memory
|
|||
|
|
|
|||
|
|
**Solution**: Reduce concurrent trials or batch size
|
|||
|
|
```python
|
|||
|
|
analysis = tune.run(
|
|||
|
|
train_fn,
|
|||
|
|
config=config,
|
|||
|
|
resources_per_trial={"gpu": 0.5}, # 2 trials per GPU
|
|||
|
|
max_concurrent_trials=2 # Limit concurrent trials
|
|||
|
|
)
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### Issue: Slow Hyperparameter Search
|
|||
|
|
|
|||
|
|
**Solution**: Use early stopping scheduler
|
|||
|
|
```python
|
|||
|
|
from ray.tune.schedulers import ASHAScheduler
|
|||
|
|
|
|||
|
|
scheduler = ASHAScheduler(
|
|||
|
|
max_t=100,
|
|||
|
|
grace_period=5, # Stop bad trials after 5 epochs
|
|||
|
|
reduction_factor=3
|
|||
|
|
)
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### Issue: Can't Reproduce Best Trial
|
|||
|
|
|
|||
|
|
**Solution**: Set seeds in training function
|
|||
|
|
```python
|
|||
|
|
def train_fn(config):
|
|||
|
|
L.seed_everything(42, workers=True)
|
|||
|
|
# Rest of training...
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## Resources
|
|||
|
|
|
|||
|
|
- Ray Tune + Lightning: https://docs.ray.io/en/latest/tune/examples/tune-pytorch-lightning.html
|
|||
|
|
- Optuna: https://optuna.readthedocs.io/
|
|||
|
|
- WandB Sweeps: https://docs.wandb.ai/guides/sweeps
|
|||
|
|
- Lightning Tuner: https://lightning.ai/docs/pytorch/stable/tuning.html
|