[Tensorflow&Keras] Keras Introduction

8 minute read

Keras

  • from https://keras.io/guides/functional_api/
import numpy as np
import tensorflow as tf
# import keras -> keras.io
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input

Keras Introduction

  • Input() : used to instantiate a Keras tensor
    • Keras tensor: a symbolic tensor-like object, which we augment with certain attributes
# 784-dimensional vector input
inputs = Input(shape=(784,))   # 784-dimensional vector. The batch size is always omitted 
                               # since only the shape of each sample is specified.
# image input
img_inputs = Input(shape=(32, 32, 3))
inputs.shape, img_inputs.shape, inputs.dtype, img_inputs.dtype
(TensorShape([None, 784]),
 TensorShape([None, 32, 32, 3]),
 tf.float32,
 tf.float32)


# create a new node in the graph of layers
dense = layers.Dense(64, activation="relu")
x = dense(inputs)
# few more layers
x = layers.Dense(64, activation="relu")(x)
outputs = layers.Dense(10)(x)
# create Model
model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")
model.summary()
Model: "mnist_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense (Dense)                (None, 64)                50240     
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 10)                650       
=================================================================
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________


# must install pydot, graphviz
keras.utils.plot_model(model, "my_first_model.png", show_shapes=True)

output_10_0


Using Functional API

inputs = Input(shape=(784,))
x = Dense(64, activation="relu")(inputs)
x = Dense(64, activation="relu")(x)
outputs = Dense(10)(x)
# create a Model by specifying its inputs and outputs in the graph of layers
model = Model(inputs=inputs, outputs=outputs, name="mnist_model")
model.summary()
Model: "mnist_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_3 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense_48 (Dense)             (None, 64)                50240     
_________________________________________________________________
dense_49 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_50 (Dense)             (None, 10)                650       
=================================================================
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________

number of parameters to train

  • 784 * 64 + 64 = 50240
  • 64 * 64 + 64 = 4160
  • 64 * 10 + 10 = 650


Using Sequential API

# another type model definition
model = Sequential()
model.add(Dense(64, input_shape=(784,), activation='relu')) # 첫번째 계츧에서 input_shape 지정
model.add(Dense(64, activation='relu'))
model.add(Dense(10))
model.summary()
Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_54 (Dense)             (None, 64)                50240     
_________________________________________________________________
dense_55 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_56 (Dense)             (None, 10)                650       
=================================================================
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________


Training, evaluation, and Inference

  • try to use Sequential() model, with MNIST dataset
from tensorflow.keras.datasets.mnist import load_data

(x_train, y_train), (x_test, y_test) = load_data()
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
11493376/11490434 [==============================] - 0s 0us/step
11501568/11490434 [==============================] - 0s 0us/step
(60000, 28, 28) (10000, 28, 28) (60000,) (10000,)
x_train = x_train.reshape(60000, 784).astype("float32") / 255.
x_test = x_test.reshape(10000, 784).astype("float32") / 255.
# one-hot encoding
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
print(y_train.shape, y_test.shape)
(60000, 10) (10000, 10)
model.compile(
    loss=keras.losses.CategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(),
    metrics=["accuracy"],
)
# from_logits=True: inform the loss function that the output values generated by the model 
# are not normalized, a.k.a. logits. (i.e. softmax function has not been applied on them)
history = model.fit(x_train, y_train, batch_size=100, epochs=5, validation_split=0.2)

test_scores = model.evaluate(x_test, y_test, verbose=2)
print("Test loss:", test_scores[0])
print("Test accuracy:", test_scores[1])
Epoch 1/5
480/480 [==============================] - 3s 5ms/step - loss: 0.4050 - accuracy: 0.8880 - val_loss: 0.2059 - val_accuracy: 0.9383
Epoch 2/5
480/480 [==============================] - 2s 4ms/step - loss: 0.1835 - accuracy: 0.9467 - val_loss: 0.1538 - val_accuracy: 0.9558
Epoch 3/5
480/480 [==============================] - 2s 4ms/step - loss: 0.1368 - accuracy: 0.9596 - val_loss: 0.1383 - val_accuracy: 0.9581
Epoch 4/5
480/480 [==============================] - 2s 4ms/step - loss: 0.1084 - accuracy: 0.9674 - val_loss: 0.1105 - val_accuracy: 0.9683
Epoch 5/5
480/480 [==============================] - 2s 5ms/step - loss: 0.0875 - accuracy: 0.9737 - val_loss: 0.1087 - val_accuracy: 0.9669
313/313 - 1s - loss: 0.1050 - accuracy: 0.9671
Test loss: 0.10498897731304169
Test accuracy: 0.9671000242233276
  • we can see that size(train)=60000*0.8=48000 (480 steps/epoch)
  • and size(val) = 60000*0.2=12000
history.history.keys()
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])


Save the model

  • There are two different types of saving models
    • Tensorflow SavedModel (recommended)
      • model architecture
      • model weight values (that were learned during training)
      • model training config, if any (as passed to compile)
      • optimizer and its state, if any (to restart training where you left off)
    • previous keras H5 (simplified version)
    • for more information: see https://www.tensorflow.org/guide/keras/save_and_serialize?hl=ko
# method 1: savedmodel type
model.save("path_to_my_model")
del model
# Recreate the exact same model purely from the file:
model = keras.models.load_model("path_to_my_model")
INFO:tensorflow:Assets written to: path_to_my_model/assets
# method 2: h5 type
model.save("my_model.h5")
del model
model = keras.models.load_model("my_model.h5")



To define multiple models

  • a single graph of layers can be used to generate multiple models
encoder_input = Input(shape=(28, 28, 1), name="img")
x = layers.Conv2D(16, 3, activation="relu")(encoder_input)
x = layers.Conv2D(32, 3, activation="relu")(x)
x = layers.MaxPooling2D(3)(x)
x = layers.Conv2D(32, 3, activation="relu")(x)
x = layers.Conv2D(16, 3, activation="relu")(x)
encoder_output = layers.GlobalMaxPooling2D()(x) 
            # (batch_size, rows, columns, channels)->(batch_size, channels)

encoder = Model(encoder_input, encoder_output, name="encoder")
encoder.summary()

x = layers.Reshape((4, 4, 1))(encoder_output)
x = layers.Conv2DTranspose(16, 3, activation="relu")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu")(x)
x = layers.UpSampling2D(3)(x)
x = layers.Conv2DTranspose(16, 3, activation="relu")(x)
decoder_output = layers.Conv2DTranspose(1, 3, activation="relu")(x)

autoencoder = Model(encoder_input, decoder_output, name="autoencoder")
autoencoder.summary()
Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
img (InputLayer)             [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 26, 26, 16)        160       
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 24, 24, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 32)          0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 6, 6, 32)          9248      
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 4, 4, 16)          4624      
_________________________________________________________________
global_max_pooling2d_1 (Glob (None, 16)                0         
=================================================================
Total params: 18,672
Trainable params: 18,672
Non-trainable params: 0
_________________________________________________________________
Model: "autoencoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
img (InputLayer)             [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 26, 26, 16)        160       
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 24, 24, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 32)          0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 6, 6, 32)          9248      
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 4, 4, 16)          4624      
_________________________________________________________________
global_max_pooling2d_1 (Glob (None, 16)                0         
_________________________________________________________________
reshape_1 (Reshape)          (None, 4, 4, 1)           0         
_________________________________________________________________
conv2d_transpose_4 (Conv2DTr (None, 6, 6, 16)          160       
_________________________________________________________________
conv2d_transpose_5 (Conv2DTr (None, 8, 8, 32)          4640      
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 24, 24, 32)        0         
_________________________________________________________________
conv2d_transpose_6 (Conv2DTr (None, 26, 26, 16)        4624      
_________________________________________________________________
conv2d_transpose_7 (Conv2DTr (None, 28, 28, 1)         145       
=================================================================
Total params: 28,241
Trainable params: 28,241
Non-trainable params: 0
_________________________________________________________________


more information about de-convolution


  • Convolutions
    • Convolutions
    • Dilated Convolutions (a.k.s Astrous Convolutions)
    • Transposed Convolutions (a.k.a. deconvolutions or fractionally strided convolutions)
    • Separable Convolutions
    • from https://towardsdatascience.com/types-of-convolutions-in-deep-learning-717013397f4d
  • Difference between UpSample2D and Conv2DTranspose

    • simple scaling up vs. trained
    • UpSampling2D is just a simple scaling up of the image by using nearest neighbor or bilinear upsampling, so nothing smart. Advantage is it’s cheap.
    • Conv2DTranspose is a convolution operation whose kernel is learnt (just like normal conv2d operation) while training your model. Using Conv2DTranspose will also upsample its input but the key difference is the model should learn what is the best upsampling for the job.
  • transposed convolution image.png



Simple Examples

  • https://machinelearningmastery.com/how-to-make-classification-and-regression-predictions-for-deep-learning-models-in-keras/

Classification

# example of training a final classification model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.datasets import make_blobs
from sklearn.preprocessing import MinMaxScaler
# generate 2d classification dataset
X, y = make_blobs(n_samples=100, centers=2, n_features=2, random_state=1)
scaler = MinMaxScaler()
scaler.fit(X)
X = scaler.transform(X)
# define and fit the final model
model = Sequential()
model.add(Dense(4, input_dim=2, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam')
model.fit(X, y, epochs=200, verbose=0)
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 4)                 12        
_________________________________________________________________
dense_1 (Dense)              (None, 4)                 20        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 5         
=================================================================
Total params: 37
Trainable params: 37
Non-trainable params: 0
_________________________________________________________________


  • prediction: two types
    • class prediction
    • probability prediction
# predict the result for a new data Xnew
Xnew, _ = make_blobs(n_samples=3, centers=2, n_features=2, random_state=1)
Xnew = scaler.transform(Xnew)

# make a prediction for probability
print('Probability...')
ynew = model.predict(Xnew)
for i in range(len(Xnew)):
	print("X=%s, Predicted=%s" % (Xnew[i], ynew[i]))
Probability...
X=[0.89337759 0.65864154], Predicted=[0.04033506]
X=[0.29097707 0.12978982], Predicted=[0.9291382]
X=[0.78082614 0.75391697], Predicted=[0.04962662]


# make a prediction for classes
ynew = (model.predict(Xnew) > 0.5).astype("int32")
# show the inputs and predicted outputs
print('Classes...')
for i in range(len(Xnew)):
	print("X=%s, Predicted=%s" % (Xnew[i], ynew[i]))
Classes...
X=[0.89337759 0.65864154], Predicted=[0]
X=[0.29097707 0.12978982], Predicted=[1]
X=[0.78082614 0.75391697], Predicted=[0]


Regression

  • Question: Is it necessary to scale target values also?
  • Answer: It helps to converge your GD algorithm especially when the target values are spreaded large.
  • A target variable with a large spread of values, in turn, may result in large error gradient values causing weight values to change dramatically, making the learning process unstable.
# with scaling target values
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler

X, y = make_regression(n_samples=100, n_features=2, noise=0.05, random_state=1)
scalerX, scalerY = StandardScaler(), StandardScaler()
scalerX.fit(X)
scalerY.fit(y.reshape(100,1))   # (100,) -> (100,1)
X = scalerX.transform(X)
y = scalerY.transform(y.reshape(100,1))

# define and fit the final model
model = Sequential()
model.add(Dense(4, input_dim=2, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='linear'))
model.summary()
model.compile(loss='mse', optimizer='adam')
model.fit(X, y, epochs=1000, verbose=0)
Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_45 (Dense)             (None, 4)                 12        
_________________________________________________________________
dense_46 (Dense)             (None, 4)                 20        
_________________________________________________________________
dense_47 (Dense)             (None, 1)                 5         
=================================================================
Total params: 37
Trainable params: 37
Non-trainable params: 0
_________________________________________________________________


<keras.callbacks.History at 0x7fde0646d850>
# new instances to predict
Xnew, a = make_regression(n_samples=3, n_features=2, noise=0.05, random_state=7)
Xnew = scalerX.transform(Xnew)
ynew = model.predict(Xnew)

for i in range(len(Xnew)):
	print("X=%s, Predicted=%s" % (Xnew[i], ynew[i]))
X=[0.04887696 0.21052199], Predicted=[0.22459722]
X=[ 1.80651612 -0.80617796], Predicted=[-0.05703998]
X=[-0.82240444 -0.26142258], Predicted=[-0.58791125]
scalerY.inverse_transform(ynew), a
(array([[ 36.273132],
        [ 14.821406],
        [-25.613968]], dtype=float32),
 array([ 21.28207192,  22.13978868, -21.10578639]))


# without target scaling

X, y = make_regression(n_samples=100, n_features=2, noise=0.05, random_state=1)
scalerX = StandardScaler()
X = scalerX.fit_transform(X)

# define and fit the final model
model = Sequential()
model.add(Dense(4, input_dim=2, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='linear'))

model.compile(loss='mse', optimizer='adam')
model.fit(X, y, epochs=1000, verbose=0)

Xnew, a = make_regression(n_samples=3, n_features=2, noise=0.05, random_state=7)
Xnew = scalerX.transform(Xnew)
# make a prediction
ynew = model.predict(Xnew)
# show the inputs and predicted outputs
for i in range(len(Xnew)):
	print("X=%s, Predicted=%s" % (Xnew[i], ynew[i]))
X=[0.04887696 0.21052199], Predicted=[40.85008]
X=[ 1.80651612 -0.80617796], Predicted=[11.68873]
X=[-0.82240444 -0.26142258], Predicted=[-31.26053]


# linear regression
from sklearn.linear_model import LinearRegression
X, y = make_regression(n_samples=100, n_features=2, noise=0.05, random_state=1)
scalerX = StandardScaler()
X = scalerX.fit_transform(X)

lin_model = LinearRegression()
lin_model.fit(X, y)

Xnew, a = make_regression(n_samples=3, n_features=2, noise=0.05, random_state=7)
Xnew = scalerX.transform(Xnew)

lin_model.predict(Xnew), a, Xnew
(array([ 36.28583995,  14.27272315, -25.27107631]),
 array([ 21.28207192,  22.13978868, -21.10578639]),
 array([[ 0.04887696,  0.21052199],
        [ 1.80651612, -0.80617796],
        [-0.82240444, -0.26142258]]))

Leave a comment