sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_3.5.2 magrittr_1.5 tools_3.5.2 htmltools_0.3.6
## [5] yaml_2.2.0 Rcpp_1.0.0 stringi_1.2.4 rmarkdown_1.11
## [9] knitr_1.21 stringr_1.4.0 xfun_0.4 digest_0.6.18
## [13] evaluate_0.13
Source: https://tensorflow.rstudio.com/keras/articles/examples/mnist_acgan.html
In this example, we train a Generative Adversarial Network (GAN) on the MNIST data set.
The MNIST database (Modified National Institute of Standards and Technology database) is a large database of handwritten digits (\(28 \times 28\)) that is commonly used for training and testing machine learning algorithms.
60,000 training images, 10,000 testing images.
library(keras)
library(progress)
library(abind)
k_set_image_data_format('channels_first')
# Loade mnist data, and force it to be of shape (..., 1, 28, 28) with
# range [-1, 1]
mnist <- dataset_mnist()
mnist$train$x <- (mnist$train$x - 127.5) / 127.5
mnist$test$x <- (mnist$test$x - 127.5) / 127.5
mnist$train$x <- array_reshape(mnist$train$x, c(60000, 1, 28, 28))
mnist$test$x <- array_reshape(mnist$test$x, c(10000, 1, 28, 28))
num_train <- dim(mnist$train$x)[1]
num_test <- dim(mnist$test$x)[1]
# Functions ---------------------------------------------------------------
build_generator <- function(latent_size){
# We will map a pair of (z, L), where z is a latent vector and L is a
# label drawn from P_c, to image space (..., 1, 28, 28)
cnn <- keras_model_sequential()
cnn %>%
layer_dense(1024, input_shape = latent_size, activation = "relu") %>%
layer_dense(128*7*7, activation = "relu") %>%
layer_reshape(c(128, 7, 7)) %>%
# Upsample to (..., 14, 14)
layer_upsampling_2d(size = c(2, 2)) %>%
layer_conv_2d(
256, c(5,5), padding = "same", activation = "relu",
kernel_initializer = "glorot_normal"
) %>%
# Upsample to (..., 28, 28)
layer_upsampling_2d(size = c(2, 2)) %>%
layer_conv_2d(
128, c(5,5), padding = "same", activation = "tanh",
kernel_initializer = "glorot_normal"
) %>%
# Take a channel axis reduction
layer_conv_2d(
1, c(2,2), padding = "same", activation = "tanh",
kernel_initializer = "glorot_normal"
)
# This is the z space commonly referred to in GAN papers
latent <- layer_input(shape = list(latent_size))
# This will be our label
image_class <- layer_input(shape = list(1))
# 10 classes in MNIST
cls <- image_class %>%
layer_embedding(
input_dim = 10, output_dim = latent_size,
embeddings_initializer='glorot_normal'
) %>%
layer_flatten()
# Hadamard product between z-space and a class conditional embedding
h <- layer_multiply(list(latent, cls))
fake_image <- cnn(h)
keras_model(list(latent, image_class), fake_image)
}
build_discriminator <- function(){
# Build a relatively standard conv net, with LeakyReLUs as suggested in
# the reference paper
cnn <- keras_model_sequential()
cnn %>%
layer_conv_2d(
32, c(3,3), padding = "same", strides = c(2,2),
input_shape = c(1, 28, 28)
) %>%
layer_activation_leaky_relu() %>%
layer_dropout(0.3) %>%
layer_conv_2d(64, c(3, 3), padding = "same", strides = c(1,1)) %>%
layer_activation_leaky_relu() %>%
layer_dropout(0.3) %>%
layer_conv_2d(128, c(3, 3), padding = "same", strides = c(2,2)) %>%
layer_activation_leaky_relu() %>%
layer_dropout(0.3) %>%
layer_conv_2d(256, c(3, 3), padding = "same", strides = c(1,1)) %>%
layer_activation_leaky_relu() %>%
layer_dropout(0.3) %>%
layer_flatten()
image <- layer_input(shape = c(1, 28, 28))
features <- cnn(image)
# First output (name=generation) is whether or not the discriminator
# thinks the image that is being shown is fake, and the second output
# (name=auxiliary) is the class that the discriminator thinks the image
# belongs to.
fake <- features %>%
layer_dense(1, activation = "sigmoid", name = "generation")
aux <- features %>%
layer_dense(10, activation = "softmax", name = "auxiliary")
keras_model(image, list(fake, aux))
}
# Batch and latent size taken from the paper
epochs <- 15
batch_size <- 100
latent_size <- 100
# Adam parameters suggested in https://arxiv.org/abs/1511.06434
adam_lr <- 0.00005
adam_beta_1 <- 0.5
# Build the discriminator
discriminator <- build_discriminator()
discriminator %>% compile(
optimizer = optimizer_adam(lr = adam_lr, beta_1 = adam_beta_1),
loss = list("binary_crossentropy", "sparse_categorical_crossentropy")
)
discriminator
## Model
## ___________________________________________________________________________
## Layer (type) Output Shape Param # Connected to
## ===========================================================================
## input_1 (InputLayer) (None, 1, 28, 28 0
## ___________________________________________________________________________
## sequential (Sequential) (None, 12544) 387840 input_1[0][0]
## ___________________________________________________________________________
## generation (Dense) (None, 1) 12545 sequential[1][0]
## ___________________________________________________________________________
## auxiliary (Dense) (None, 10) 125450 sequential[1][0]
## ===========================================================================
## Total params: 525,835
## Trainable params: 525,835
## Non-trainable params: 0
## ___________________________________________________________________________
# Build the generator
generator <- build_generator(latent_size)
generator %>% compile(
optimizer = optimizer_adam(lr = adam_lr, beta_1 = adam_beta_1),
loss = "binary_crossentropy"
)
generator
## Model
## ___________________________________________________________________________
## Layer (type) Output Shape Param # Connected to
## ===========================================================================
## input_3 (InputLayer) (None, 1) 0
## ___________________________________________________________________________
## embedding (Embedding) (None, 1, 100) 1000 input_3[0][0]
## ___________________________________________________________________________
## input_2 (InputLayer) (None, 100) 0
## ___________________________________________________________________________
## flatten_1 (Flatten) (None, 100) 0 embedding[0][0]
## ___________________________________________________________________________
## multiply (Multiply) (None, 100) 0 input_2[0][0]
## flatten_1[0][0]
## ___________________________________________________________________________
## sequential_1 (Sequentia (None, 1, 28, 28 8171521 multiply[0][0]
## ===========================================================================
## Total params: 8,172,521
## Trainable params: 8,172,521
## Non-trainable params: 0
## ___________________________________________________________________________
latent <- layer_input(shape = list(latent_size))
image_class <- layer_input(shape = list(1), dtype = "int32")
fake <- generator(list(latent, image_class))
# Only want to be able to train generation for the combined model
freeze_weights(discriminator)
results <- discriminator(fake)
combined <- keras_model(list(latent, image_class), results)
combined %>% compile(
optimizer = optimizer_adam(lr = adam_lr, beta_1 = adam_beta_1),
loss = list("binary_crossentropy", "sparse_categorical_crossentropy")
)
combined
## Model
## ___________________________________________________________________________
## Layer (type) Output Shape Param # Connected to
## ===========================================================================
## input_4 (InputLayer) (None, 100) 0
## ___________________________________________________________________________
## input_5 (InputLayer) (None, 1) 0
## ___________________________________________________________________________
## model_1 (Model) (None, 1, 28, 28 8172521 input_4[0][0]
## input_5[0][0]
## ___________________________________________________________________________
## model (Model) [(None, 1), (Non 525835 model_1[1][0]
## ===========================================================================
## Total params: 8,698,356
## Trainable params: 8,172,521
## Non-trainable params: 525,835
## ___________________________________________________________________________
for(epoch in 1:epochs){
num_batches <- trunc(num_train / batch_size)
pb <- progress_bar$new(
total = num_batches,
format = sprintf("epoch %s/%s :elapsed [:bar] :percent :eta", epoch, epochs),
clear = FALSE
)
epoch_gen_loss <- NULL
epoch_disc_loss <- NULL
possible_indexes <- 1:num_train
for(index in 1:num_batches){
pb$tick()
# Generate a new batch of noise
noise <- runif(n = batch_size*latent_size, min = -1, max = 1) %>%
matrix(nrow = batch_size, ncol = latent_size)
# Get a batch of real images
batch <- sample(possible_indexes, size = batch_size)
possible_indexes <- possible_indexes[!possible_indexes %in% batch]
image_batch <- mnist$train$x[batch,,,,drop = FALSE]
label_batch <- mnist$train$y[batch]
# Sample some labels from p_c
sampled_labels <- sample(0:9, batch_size, replace = TRUE) %>%
matrix(ncol = 1)
# Generate a batch of fake images, using the generated labels as a
# conditioner. We reshape the sampled labels to be
# (batch_size, 1) so that we can feed them into the embedding
# layer as a length one sequence
generated_images <- predict(generator, list(noise, sampled_labels))
X <- abind(image_batch, generated_images, along = 1)
y <- c(rep(1L, batch_size), rep(0L, batch_size)) %>% matrix(ncol = 1)
aux_y <- c(label_batch, sampled_labels) %>% matrix(ncol = 1)
# Train discriminator on 2*batch size (real + fake) images
disc_loss <- train_on_batch(
discriminator, x = X,
y = list(y, aux_y)
)
epoch_disc_loss <- rbind(epoch_disc_loss, unlist(disc_loss))
# Make new noise. Generate 2 * batch size here such that
# the generator optimizes over an identical number of images as the
# discriminator
noise <- runif(2*batch_size*latent_size, min = -1, max = 1) %>%
matrix(nrow = 2*batch_size, ncol = latent_size)
sampled_labels <- sample(0:9, size = 2*batch_size, replace = TRUE) %>%
matrix(ncol = 1)
# Want to train the generator to trick the discriminator
# For the generator, we want all the {fake, not-fake} labels to say
# not-fake
trick <- rep(1, 2*batch_size) %>% matrix(ncol = 1)
combined_loss <- train_on_batch(
combined,
list(noise, sampled_labels),
list(trick, sampled_labels)
)
epoch_gen_loss <- rbind(epoch_gen_loss, unlist(combined_loss))
}
cat(sprintf("\nTesting for epoch %02d:", epoch))
# Evaluate the testing loss here
# Generate a new batch of noise
noise <- runif(num_test*latent_size, min = -1, max = 1) %>%
matrix(nrow = num_test, ncol = latent_size)
# Sample some labels from p_c and generate images from them
sampled_labels <- sample(0:9, size = num_test, replace = TRUE) %>%
matrix(ncol = 1)
generated_images <- predict(generator, list(noise, sampled_labels))
X <- abind(mnist$test$x, generated_images, along = 1)
y <- c(rep(1, num_test), rep(0, num_test)) %>% matrix(ncol = 1)
aux_y <- c(mnist$test$y, sampled_labels) %>% matrix(ncol = 1)
# See if the discriminator can figure itself out...
discriminator_test_loss <- evaluate(
discriminator, X, list(y, aux_y),
verbose = FALSE
) %>% unlist()
discriminator_train_loss <- apply(epoch_disc_loss, 2, mean)
# Make new noise
noise <- runif(2*num_test*latent_size, min = -1, max = 1) %>%
matrix(nrow = 2*num_test, ncol = latent_size)
sampled_labels <- sample(0:9, size = 2*num_test, replace = TRUE) %>%
matrix(ncol = 1)
trick <- rep(1, 2*num_test) %>% matrix(ncol = 1)
generator_test_loss = combined %>% evaluate(
list(noise, sampled_labels),
list(trick, sampled_labels),
verbose = FALSE
)
generator_train_loss <- apply(epoch_gen_loss, 2, mean)
# Generate an epoch report on performance
row_fmt <- "\n%22s : loss %4.2f | %5.2f | %5.2f"
cat(sprintf(
row_fmt,
"generator (train)",
generator_train_loss[1],
generator_train_loss[2],
generator_train_loss[3]
))
cat(sprintf(
row_fmt,
"generator (test)",
generator_test_loss[1],
generator_test_loss[2],
generator_test_loss[3]
))
cat(sprintf(
row_fmt,
"discriminator (train)",
discriminator_train_loss[1],
discriminator_train_loss[2],
discriminator_train_loss[3]
))
cat(sprintf(
row_fmt,
"discriminator (test)",
discriminator_test_loss[1],
discriminator_test_loss[2],
discriminator_test_loss[3]
))
cat("\n")
# Generate some digits to display
noise <- runif(10*latent_size, min = -1, max = 1) %>%
matrix(nrow = 10, ncol = latent_size)
sampled_labels <- 0:9 %>%
matrix(ncol = 1)
# Get a batch to display
generated_images <- predict(
generator,
list(noise, sampled_labels)
)
img <- NULL
for(i in 1:10){
img <- cbind(img, generated_images[i,,,])
}
((img + 1)/2) %>% as.raster() %>%
plot()
}
It took about 33 mins to train 15 epochs on a single NVIDIA P100 GPU.
library(cloudml)
cloudml_train("mnist_acgan.R", master_type = "standard_p100")
Collect job output:
library(cloudml)
job_collect("cloudml_2019_03_12_154649238")
View runs:
library(cloudml)
## Loading required package: tfruns
view_run("runs/cloudml_2019_03_12_154649238")
## starting httpd help server ...
## done