Generative Adversarial Networks (GANs) are a class of machine learning frameworks designed by Ian Goodfellow and his colleagues in 2014. GANs consist of two neural networks—the Generator and the Discriminator—that compete against each other to produce realistic data samples. In this guide, we'll focus on building a GAN to generate handwritten digit images similar to those in the MNIST dataset.
The GAN architecture comprises two main components:
Begin by importing the essential libraries required for building and training the GAN.
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import matplotlib.pyplot as plt
import os
Load the MNIST dataset and preprocess it to prepare for training the GAN.
(x_train, _), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1).astype('float32')
x_train = (x_train - 127.5) / 127.5 # Normalize to [-1, 1]
BUFFER_SIZE = 60000
BATCH_SIZE = 128
LATENT_DIM = 100
train_dataset = tf.data.Dataset.from_tensor_slices(x_train).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
The Generator takes random noise as input and produces synthetic images.
def build_generator(latent_dim):
model = models.Sequential([
layers.Dense(7 * 7 * 256, use_bias=False, input_shape=(latent_dim,)),
layers.BatchNormalization(),
layers.LeakyReLU(),
layers.Reshape((7, 7, 256)),
layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False),
layers.BatchNormalization(),
layers.LeakyReLU(),
layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False),
layers.BatchNormalization(),
layers.LeakyReLU(),
layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh')
])
return model
generator = build_generator(LATENT_DIM)
The Discriminator evaluates images and determines their authenticity.
def build_discriminator():
model = models.Sequential([
layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same', input_shape=[28, 28, 1]),
layers.LeakyReLU(),
layers.Dropout(0.3),
layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'),
layers.LeakyReLU(),
layers.Dropout(0.3),
layers.Flatten(),
layers.Dense(1, activation='sigmoid')
])
return model
discriminator = build_discriminator()
Configure the Discriminator with an optimizer and loss function.
discriminator.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
Combine the Generator and Discriminator into a single GAN model.
discriminator.trainable = False
gan_input = layers.Input(shape=(LATENT_DIM,))
generated_image = generator(gan_input)
gan_output = discriminator(generated_image)
gan = models.Model(gan_input, gan_output)
gan.compile(optimizer='adam', loss='binary_crossentropy')
Implement the training loop for the GAN, alternating between training the Discriminator and the Generator.
EPOCHS = 100
CHECKPOINT_DIR = './gan_checkpoints'
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
checkpoint_prefix = os.path.join(CHECKPOINT_DIR, "ckpt")
checkpoint = tf.train.Checkpoint(generator=generator,
discriminator=discriminator,
gan=gan)
gen_losses = []
disc_losses = []
for epoch in range(EPOCHS):
for real_images in train_dataset:
batch_size = real_images.shape[0]
noise = tf.random.normal([batch_size, LATENT_DIM])
# Generate fake images
generated_images = generator(noise, training=True)
# Combine real and fake images
combined_images = tf.concat([real_images, generated_images], axis=0)
# Assign labels: real=1, fake=0
labels = tf.concat([tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0)
labels += 0.05 * tf.random.normal(labels.shape) # Add noise to labels
# Train Discriminator
with tf.GradientTape() as disc_tape:
predictions = discriminator(combined_images, training=True)
disc_loss = tf.keras.losses.BinaryCrossentropy()(labels, predictions)
grads = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
discriminator.optimizer.apply_gradients(zip(grads, discriminator.trainable_variables))
# Train Generator
noise = tf.random.normal([batch_size, LATENT_DIM])
misleading_labels = tf.ones((batch_size, 1)) # Labels for generator training
with tf.GradientTape() as gen_tape:
generated_images = generator(noise, training=True)
predictions = discriminator(generated_images, training=False)
gen_loss = tf.keras.losses.BinaryCrossentropy()(misleading_labels, predictions)
grads = gen_tape.gradient(gen_loss, generator.trainable_variables)
gan.optimizer.apply_gradients(zip(grads, generator.trainable_variables))
gen_losses.append(gen_loss.numpy())
disc_losses.append(disc_loss.numpy())
# Save checkpoints every 10 epochs
if (epoch + 1) % 10 == 0:
checkpoint.save(file_prefix=checkpoint_prefix)
print(f'Epoch {epoch+1}, Generator Loss: {gen_loss.numpy()}, Discriminator Loss: {disc_loss.numpy()}')
# Optionally, visualize progress
if (epoch + 1) % 10 == 0:
noise = tf.random.normal([16, LATENT_DIM])
generated_images = generator(noise, training=False)
fig = plt.figure(figsize=(4,4))
for i in range(generated_images.shape[0]):
plt.subplot(4, 4, i+1)
plt.imshow(generated_images[i, :, :, 0] * 127.5 + 127.5, cmap='gray')
plt.axis('off')
plt.show()
Checkpointing is crucial for saving the state of your model during training. This allows you to resume training later or use the trained model for inference.
checkpoint = tf.train.Checkpoint(generator=generator,
discriminator=discriminator,
gan=gan)
checkpoint_prefix = os.path.join(CHECKPOINT_DIR, "ckpt")
# Save checkpoints every 10 epochs during training (as shown above)
To restore from the latest checkpoint:
checkpoint.restore(tf.train.latest_checkpoint(CHECKPOINT_DIR))
Monitoring the Generator and Discriminator losses helps in understanding the training dynamics and ensuring that neither model overwhelms the other.
plt.figure(figsize=(10,5))
plt.plot(gen_losses, label="Generator Loss")
plt.plot(disc_losses, label="Discriminator Loss")
plt.title("Training Losses")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()
The latent space visualization allows you to see how varying the input noise vectors affects the generated images. This provides insights into the Generator's ability to map latent variables to meaningful image features.
def plot_latent_space(generator, n=10, figsize=10):
# Create a grid of points in the latent space
grid_x = np.linspace(-2, 2, n)
grid_y = np.linspace(-2, 2, n)
figure = np.zeros((28 * n, 28 * n))
for i, yi in enumerate(grid_x):
for j, xi in enumerate(grid_y):
z = np.array([[xi, yi] + [0]*(LATENT_DIM-2)])
generated_image = generator.predict(z)
digit = generated_image[0].reshape(28, 28)
figure[i * 28: (i + 1) * 28,
j * 28: (j + 1) * 28] = digit
plt.figure(figsize=(figsize, figsize))
plt.imshow(figure, cmap='gray')
plt.axis('off')
plt.show()
# Call the function
plot_latent_space(generator)
After training the GAN, the next step is to utilize the generated images to train a CNN for image classification. This demonstrates how GANs can augment datasets and improve model performance.
Use the trained Generator to produce synthetic MNIST-like images.
def generate_synthetic_images(generator, latent_dim, num_images):
noise = tf.random.normal([num_images, latent_dim])
generated_images = generator(noise, training=False)
generated_images = generated_images * 127.5 + 127.5 # Rescale to [0, 255]
return generated_images.numpy()
num_synthetic = 60000
synthetic_images = generate_synthetic_images(generator, LATENT_DIM, num_synthetic)
synthetic_labels = np.random.randint(0, 10, num_synthetic) # Assign random labels or use more sophisticated labeling if possible
Create a CNN architecture suitable for classifying the generated images.
def build_cnn():
model = models.Sequential([
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
cnn = build_cnn()
Prepare the synthetic images and labels for training the CNN.
# Reshape if necessary
synthetic_images = synthetic_images.reshape(-1, 28, 28, 1).astype('float32')
# Normalize the images
synthetic_images = synthetic_images / 255.0
cnn.fit(synthetic_images, synthetic_labels, epochs=10, batch_size=64, validation_split=0.2)
After training, evaluate the CNN's performance on a real test set to gauge its effectiveness.
# Load real MNIST test data
(_, _), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0
# Evaluate the CNN
test_loss, test_acc = cnn.evaluate(x_test, y_test, verbose=2)
print(f'Test Accuracy: {test_acc*100:.2f}%')
This comprehensive guide has walked you through the process of building a Generative Adversarial Network using the MNIST dataset, saving and managing model checkpoints, visualizing both training metrics and the latent space, and finally leveraging the GAN-generated images to train a Convolutional Neural Network for image classification. By integrating GANs and CNNs, you can enhance data augmentation strategies and improve machine learning model performance.