Convolutional Neural Network
The convolutional neural network (CNN) is one of the key building blocks for deep learning. Mathematically, it is a linear operator whose actions are "local", in the sense that each output only depends on a small number of inputs. These actions share the same kernel functions, and the sharing reduces the number of parameters significantly.
One remarkable feature of CNNs is that they are massively parallelizable. The parallesim makes CNNs very efficient on GPUs, which are good at doing a large number of simple tasks at the same time.
In the practical use of CNNs, we can stick to images, which have four dimensions: batch number, height, width, and channel. A CNN transforms the images to another images with the same four dimensions, but possibly with different heights, widths, and channels. In the following script, we use CNNs instead of fully connected neural networks to train a variational autoencoder. Readers can compare the results with this article.
You are also encouraged to run the same script on CPUs and GPUs. You might get surprised at the huge performance gap for training CNNs on these two different computing environment. We also observe some CNN artifacts (the dots in the images).
using ADCME
using PyPlot
using MLDatasets
using ProgressMeter
using Images
mutable struct Generator
dim_z::Int64
layers::Array
end
function Generator( dim_z::Int64 = 100, ngf::Int64 = 8)
layers = [
Conv2DTranspose(ngf*32, 4, 1, use_bias=false)
BatchNormalization()
relu
Conv2DTranspose(ngf*16, 4, 1, padding="same", use_bias=false)
BatchNormalization()
relu
Conv2DTranspose(ngf*8, 4, 1, use_bias=false)
x -> pad(x, [
0 0
0 1
0 1
0 0
])
BatchNormalization()
relu
Conv2DTranspose(1, 4, 4, use_bias = false)
BatchNormalization()
sigmoid
]
Generator(dim_z, layers)
end
function (g::Generator)(z)
z = constant(z)
z = reshape(z, (-1, 1, 1, g.dim_z))
@info size(z)
for l in g.layers
z = l(z)
@info size(z)
end
return z
end
function encoder(x, n_hidden, n_output, rate)
local μ, σ
variable_scope("encoder") do
y = dense(x, n_hidden, activation = "elu")
y = dropout(y, rate, ADCME.options.training.training)
y = dense(y, n_hidden, activation = "tanh")
y = dropout(y, rate, ADCME.options.training.training)
y = dense(y, 2n_output)
μ = y[:, 1:n_output]
σ = 1e-6 + softplus(y[:,n_output+1:end])
end
return μ, σ
end
function decoder(z, n_hidden, n_output, rate)
Generator(dim_z)(z)
end
function autoencoder(xh, x, dim_img, dim_z, n_hidden, rate)
μ, σ = encoder(xh, n_hidden, dim_z, rate)
z = μ + σ .* tf.random_normal(size(μ), 0, 1, dtype=tf.float64)
y = decoder(z, n_hidden, dim_img, rate)
y = clip(y, 1e-8, 1-1e-8)
y = tf.reshape(y, (-1,32^2))
marginal_likelihood = sum(x .* log(y) + (1-x).*log(1-y), dims=2)
KL_divergence = 0.5 * sum(μ^2 + σ^2 - log(1e-8 + σ^2) - 1, dims=2)
marginal_likelihood = mean(marginal_likelihood)
KL_divergence = mean(KL_divergence)
ELBO = marginal_likelihood - KL_divergence
loss = -ELBO
return y, loss, -marginal_likelihood, KL_divergence
end
function step(epoch)
tx = train_x[1:batch_size,:]
@showprogress for i = 1:div(60000, batch_size)
idx = Array((i-1)*batch_size+1:i*batch_size)
run(sess, opt, x=>train_x[idx,:])
end
y_, loss_, ml_, kl_ = run(sess, [y, loss, ml, KL_divergence],
feed_dict = Dict(
ADCME.options.training.training=>false,
x => tx
))
println("epoch $epoch: L_tot = $(loss_), L_likelihood = $(ml_), L_KL = $(kl_)")
close("all")
for i = 1:3
for j = 1:3
k = (i-1)*3 + j
img = reshape(y_[k,:], 32, 32)'|>Array
img = imresize(img, 28, 28)
subplot(3,3,k)
imshow(img)
end
end
savefig("result$epoch.png")
end
n_hidden = 500
rate = 0.1
dim_z = 100
dim_img = 32^2
batch_size = 32
ADCME.options.training.training = placeholder(true)
x = placeholder(Float64, shape = [32, 32^2])
xh = x
y, loss, ml, KL_divergence = autoencoder(xh, x, dim_img, dim_z, n_hidden, rate)
opt = AdamOptimizer(1e-3).minimize(loss)
train_x_ = MNIST.traintensor(Float64);
train_x = zeros(60000, 32^2)
for i = 1:60000
train_x[i,:] = imresize(train_x_[:, :, i], 32, 32)[:]
end
sess = Session(); init(sess)
for i = 1:100
@info i
step(i)
end