keras_tuner <- import("keras_tuner",
convert = FALSE)
hp <- keras_tuner$HyperParameters()
hp$Boolean(name = "bool")False
BIBC2025 workshop - Hyperparameter tuning
RSFAS, ANU
reticulate basicsHyperparameter tuning in CNNs is crucial: with the wrong choices, like a network that’s too small or a learning rate that’s too unstable, the model may not just perform poorly, it may fail to learn anything at all.
CNNs involve many hyperparameters, including:
keras_tunerkeras_tuner is a hyperparameter tuning library built for keras.
It offers:
All the defined hyperparameters will be recorded in the hyperparameter space.
[Boolean(name: "bool", default: False), Choice(name: 'class', values: ['a', 'b'], ordered: False, default: a), Float(name: 'f', min_value: '0.0', max_value: '1.0', step: '0.1', sampling: 'linear', default: '0.0'), Int(name: 'i', min_value: 0, max_value: 10, step: 2, sampling: linear, default: 0)]
Consider a simple CNN model, where we want to tune the number of filters:
input <- keras$layers$Input(tuple(32L, 32L, 3L))
x <- keras$layers$Conv2D(16L, tuple(3L, 3L), padding = "same", activation = "relu")(input)
x <- keras$layers$MaxPool2D(tuple(2L, 2L))(x)
x <- keras$layers$GlobalAveragePooling2D()(x)
output <- keras$layers$Dense(1L, activation = "sigmoid")(x)
model <- keras$Model(input, output)
model$compile(optimizer = "sgd",
loss = "binary_crossentropy",
metrics = list("accuracy"))We can use the Int hyperparameter filters as a placeholder for the argument.
build_model <- function(hp) {
filters <- hp$Int("filters", min_value = 8L, max_value = 64L)
input <- keras$layers$Input(tuple(32L, 32L, 3L))
x <- keras$layers$Conv2D(filters, tuple(3L, 3L), padding = "same", activation = "relu")(input)
x <- keras$layers$MaxPool2D(tuple(2L, 2L))(x)
x <- keras$layers$GlobalAveragePooling2D()(x)
output <- keras$layers$Dense(1L, activation = "sigmoid")(x)
model <- keras$Model(input, output)
model$compile(optimizer = "sgd",
loss = "binary_crossentropy",
metrics = list("accuracy"))
return(model)
}Similarly, we can customize the learning rate using Float.
build_model <- function(hp) {
filters <- hp$Int("filters", min_value = 8L, max_value = 64L)
lr <- hp$Float("lr", min_value = 1e-5, max_value = 1e-2,
step = 2L, sampling = "log")
input <- keras$layers$Input(tuple(32L, 32L, 3L))
x <- keras$layers$Conv2D(filters, tuple(3L, 3L), padding = "same", activation = "relu")(input)
x <- keras$layers$MaxPool2D(tuple(2L, 2L))(x)
x <- keras$layers$GlobalAveragePooling2D()(x)
output <- keras$layers$Dense(1L, activation = "sigmoid")(x)
model <- keras$Model(input, output)
model$compile(optimizer = keras$optimizers$SGD(learning_rate = lr),
loss = "binary_crossentropy",
metrics = list("accuracy"))
return(model)
}Branching can be done via Choice or Boolean.
build_model <- function(hp) {
filters <- hp$Int("filters", min_value = 8L, max_value = 64L)
lr <- hp$Float("lr", min_value = 1e-5, max_value = 1e-2,
step = 2L, sampling = "log")
gp <- hp$Choice("gp", values = c("max", "ave"))
input <- keras$layers$Input(tuple(32L, 32L, 3L))
x <- keras$layers$Conv2D(filters, tuple(3L, 3L), padding = "same", activation = "relu")(input)
x <- keras$layers$MaxPool2D(tuple(2L, 2L))(x)
if (py_to_r(gp) == "ave") {
x <- keras$layers$GlobalAveragePooling2D()(x)
} else {
x <- keras$layers$GlobalMaxPool2D()(x)
}
output <- keras$layers$Dense(1L, activation = "sigmoid")(x)
model <- keras$Model(input, output)
model$compile(optimizer = keras$optimizers$SGD(learning_rate = lr),
loss = "binary_crossentropy",
metrics = list("accuracy"))
return(model)
}Combined with for loop, Int can be used to stack convolutional blocks.
build_model <- function(hp) {
filters <- hp$Int("filters", min_value = 8L, max_value = 64L)
lr <- hp$Float("lr", min_value = 1e-5, max_value = 1e-2,
step = 2L, sampling = "log")
gp <- hp$Choice("gp", values = c("max", "ave"))
blocks <- hp$Int("blocks", min_value = 1L, max_value = 4L)
input <- keras$layers$Input(tuple(32L, 32L, 3L))
x <- input
py_for(i ~ py_builtins$range(blocks), {
x <- keras$layers$Conv2D(filters, tuple(3L, 3L), padding = "same", activation = "relu")(x)
x <- keras$layers$MaxPool2D(tuple(2L, 2L))(x)
})
if (py_to_r(gp) == "ave") {
x <- keras$layers$GlobalAveragePooling2D()(x)
} else {
x <- keras$layers$GlobalMaxPool2D()(x)
}
output <- keras$layers$Dense(1L, activation = "sigmoid")(x)
model <- keras$Model(input, output)
model$compile(optimizer = keras$optimizers$SGD(learning_rate = lr),
loss = "binary_crossentropy",
metrics = list("accuracy"))
return(model)
}keras_tuner$RandomSearch)
keras_tuner$GridSearch)
keras_tuner$BayesianOptimization)
keras_tuner$Hyperband)
Reloading Tuner from keras_tuner/cat_and_dog_bayesian/tuner0.json
Search space summary
Default search space size: 4
filters (Int)
{'default': None, 'conditions': [], 'min_value': 8, 'max_value': 64, 'step': 1, 'sampling': 'linear'}
lr (Float)
{'default': 1e-05, 'conditions': [], 'min_value': 1e-05, 'max_value': 0.01, 'step': 2, 'sampling': 'log'}
gp (Choice)
{'default': 'max', 'conditions': [], 'values': ['max', 'ave'], 'ordered': False}
blocks (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 4, 'step': 1, 'sampling': 'linear'}
search can be used in the same way as fit.
Results summary
Results in keras_tuner/cat_and_dog_bayesian
Showing 10 best trials
Objective(name="val_accuracy", direction="max")
Trial 00 summary
Hyperparameters:
filters: 48
lr: 1e-05
gp: ave
blocks: 1
Score: 0.7127913236618042
Trial 04 summary
Hyperparameters:
filters: 62
lr: 2e-05
gp: max
blocks: 2
Score: 0.6926796436309814
Trial 02 summary
Hyperparameters:
filters: 33
lr: 0.00032
gp: ave
blocks: 3
Score: 0.6901034116744995
Trial 03 summary
Hyperparameters:
filters: 56
lr: 4e-05
gp: max
blocks: 3
Score: 0.6897758841514587
Trial 01 summary
Hyperparameters:
filters: 24
lr: 0.00128
gp: ave
blocks: 4
Score: 0.6739906668663025
Trial 05 summary
Hyperparameters:
filters: 8
lr: 0.00512
gp: max
blocks: 4
Score: 0.6547898054122925
Trial 06 summary
Hyperparameters:
filters: 8
lr: 0.00512
gp: max
blocks: 1
Score: 0.6524869203567505
Trial 07 summary
Hyperparameters:
filters: 42
lr: 0.00512
gp: max
blocks: 1
Score: 0.6261098980903625
Trial 09 summary
Hyperparameters:
filters: 64
lr: 0.00512
gp: max
blocks: 1
Score: 0.6083124876022339
Trial 08 summary
Hyperparameters:
filters: 64
lr: 0.00512
gp: max
blocks: 1
Score: 0.6056148409843445
To tune the model training process, we need to subclass HyperModel.
Reloading Tuner from keras_tuner/cat_and_dog_bayesian_fit/tuner0.json
Search space summary
Default search space size: 5
filters (Int)
{'default': None, 'conditions': [], 'min_value': 8, 'max_value': 64, 'step': 1, 'sampling': 'linear'}
lr (Float)
{'default': 1e-05, 'conditions': [], 'min_value': 1e-05, 'max_value': 0.01, 'step': 2, 'sampling': 'log'}
gp (Choice)
{'default': 'max', 'conditions': [], 'values': ['max', 'ave'], 'ordered': False}
blocks (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 4, 'step': 1, 'sampling': 'linear'}
epochs (Int)
{'default': None, 'conditions': [], 'min_value': 10, 'max_value': 100, 'step': 10, 'sampling': 'linear'}
Results summary
Results in keras_tuner/cat_and_dog_bayesian_fit
Showing 10 best trials
Objective(name="val_accuracy", direction="max")
Trial 07 summary
Hyperparameters:
filters: 50
lr: 0.00512
gp: max
blocks: 1
epochs: 100
Score: 0.6940000057220459
Trial 05 summary
Hyperparameters:
filters: 32
lr: 0.00512
gp: max
blocks: 1
epochs: 70
Score: 0.6909999847412109
Trial 09 summary
Hyperparameters:
filters: 64
lr: 0.00512
gp: max
blocks: 1
epochs: 60
Score: 0.6884999871253967
Trial 01 summary
Hyperparameters:
filters: 25
lr: 0.00512
gp: max
blocks: 1
epochs: 90
Score: 0.6884999871253967
Trial 08 summary
Hyperparameters:
filters: 39
lr: 0.00512
gp: max
blocks: 3
epochs: 100
Score: 0.6759999990463257
Trial 04 summary
Hyperparameters:
filters: 56
lr: 0.00128
gp: max
blocks: 2
epochs: 20
Score: 0.6464999914169312
Trial 06 summary
Hyperparameters:
filters: 8
lr: 0.00512
gp: max
blocks: 1
epochs: 30
Score: 0.6230000257492065
Trial 03 summary
Hyperparameters:
filters: 52
lr: 0.00032
gp: max
blocks: 3
epochs: 40
Score: 0.6175000071525574
Trial 00 summary
Hyperparameters:
filters: 61
lr: 0.00256
gp: ave
blocks: 2
epochs: 10
Score: 0.5684999823570251
Trial 02 summary
Hyperparameters:
filters: 55
lr: 0.00016
gp: ave
blocks: 1
epochs: 50
Score: 0.5460000038146973
Model: "functional"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ input_layer (InputLayer) │ (None, 32, 32, 3) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d (Conv2D) │ (None, 32, 32, 50) │ 1,400 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D) │ (None, 16, 16, 50) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_max_pooling2d │ (None, 50) │ 0 │
│ (GlobalMaxPooling2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense) │ (None, 1) │ 51 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 1,451 (5.67 KB)
Trainable params: 1,451 (5.67 KB)
Non-trainable params: 0 (0.00 B)
After selecting the optimal hyperparameters, we typically retrain the model using the full dataset.
This differs from the hyperparameter search stage, where a portion of the training data is usually held out for validation.

Slides URL: https://ibsar-cv-workshop.patrickli.org/ | Canberra time