[Tensorflow&Keras] Keras Introduction

8 minute read

Keras

from https://keras.io/guides/functional_api/

import numpy as np
import tensorflow as tf
# import keras -> keras.io
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input

Keras Introduction

Input() : used to instantiate a Keras tensor
- Keras tensor: a symbolic tensor-like object, which we augment with certain attributes

# 784-dimensional vector input
inputs = Input(shape=(784,))   # 784-dimensional vector. The batch size is always omitted 
                               # since only the shape of each sample is specified.

# image input
img_inputs = Input(shape=(32, 32, 3))

inputs.shape, img_inputs.shape, inputs.dtype, img_inputs.dtype

(TensorShape([None, 784]),
 TensorShape([None, 32, 32, 3]),
 tf.float32,
 tf.float32)

# create a new node in the graph of layers
dense = layers.Dense(64, activation="relu")
x = dense(inputs)

# few more layers
x = layers.Dense(64, activation="relu")(x)
outputs = layers.Dense(10)(x)

# create Model
model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")

model.summary()

Model: "mnist_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense (Dense)                (None, 64)                50240     
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 10)                650       
=================================================================
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________

# must install pydot, graphviz
keras.utils.plot_model(model, "my_first_model.png", show_shapes=True)

output_10_0

Using Functional API

inputs = Input(shape=(784,))
x = Dense(64, activation="relu")(inputs)
x = Dense(64, activation="relu")(x)
outputs = Dense(10)(x)

# create a Model by specifying its inputs and outputs in the graph of layers
model = Model(inputs=inputs, outputs=outputs, name="mnist_model")
model.summary()

Model: "mnist_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_3 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense_48 (Dense)             (None, 64)                50240     
_________________________________________________________________
dense_49 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_50 (Dense)             (None, 10)                650       
=================================================================
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________

number of parameters to train

784 * 64 + 64 = 50240
64 * 64 + 64 = 4160
64 * 10 + 10 = 650

Using Sequential API

# another type model definition
model = Sequential()
model.add(Dense(64, input_shape=(784,), activation='relu')) # 첫번째 계츧에서 input_shape 지정
model.add(Dense(64, activation='relu'))
model.add(Dense(10))
model.summary()

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_54 (Dense)             (None, 64)                50240     
_________________________________________________________________
dense_55 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_56 (Dense)             (None, 10)                650       
=================================================================
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________

Training, evaluation, and Inference

try to use Sequential() model, with MNIST dataset

from tensorflow.keras.datasets.mnist import load_data

(x_train, y_train), (x_test, y_test) = load_data()
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
11493376/11490434 [==============================] - 0s 0us/step
11501568/11490434 [==============================] - 0s 0us/step
(60000, 28, 28) (10000, 28, 28) (60000,) (10000,)

x_train = x_train.reshape(60000, 784).astype("float32") / 255.
x_test = x_test.reshape(10000, 784).astype("float32") / 255.

# one-hot encoding
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
print(y_train.shape, y_test.shape)

(60000, 10) (10000, 10)

model.compile(
    loss=keras.losses.CategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(),
    metrics=["accuracy"],
)
# from_logits=True: inform the loss function that the output values generated by the model 
# are not normalized, a.k.a. logits. (i.e. softmax function has not been applied on them)
history = model.fit(x_train, y_train, batch_size=100, epochs=5, validation_split=0.2)

test_scores = model.evaluate(x_test, y_test, verbose=2)
print("Test loss:", test_scores[0])
print("Test accuracy:", test_scores[1])

Epoch 1/5
480/480 [==============================] - 3s 5ms/step - loss: 0.4050 - accuracy: 0.8880 - val_loss: 0.2059 - val_accuracy: 0.9383
Epoch 2/5
480/480 [==============================] - 2s 4ms/step - loss: 0.1835 - accuracy: 0.9467 - val_loss: 0.1538 - val_accuracy: 0.9558
Epoch 3/5
480/480 [==============================] - 2s 4ms/step - loss: 0.1368 - accuracy: 0.9596 - val_loss: 0.1383 - val_accuracy: 0.9581
Epoch 4/5
480/480 [==============================] - 2s 4ms/step - loss: 0.1084 - accuracy: 0.9674 - val_loss: 0.1105 - val_accuracy: 0.9683
Epoch 5/5
480/480 [==============================] - 2s 5ms/step - loss: 0.0875 - accuracy: 0.9737 - val_loss: 0.1087 - val_accuracy: 0.9669
313/313 - 1s - loss: 0.1050 - accuracy: 0.9671
Test loss: 0.10498897731304169
Test accuracy: 0.9671000242233276

we can see that size(train)=60000*0.8=48000 (480 steps/epoch)
and size(val) = 60000*0.2=12000

history.history.keys()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])

Save the model

There are two different types of saving models
- Tensorflow SavedModel (recommended)
  - model architecture
  - model weight values (that were learned during training)
  - model training config, if any (as passed to compile)
  - optimizer and its state, if any (to restart training where you left off)
- previous keras H5 (simplified version)
- for more information: see https://www.tensorflow.org/guide/keras/save_and_serialize?hl=ko

# method 1: savedmodel type
model.save("path_to_my_model")
del model
# Recreate the exact same model purely from the file:
model = keras.models.load_model("path_to_my_model")

INFO:tensorflow:Assets written to: path_to_my_model/assets

# method 2: h5 type
model.save("my_model.h5")
del model
model = keras.models.load_model("my_model.h5")

To define multiple models

a single graph of layers can be used to generate multiple models

encoder_input = Input(shape=(28, 28, 1), name="img")
x = layers.Conv2D(16, 3, activation="relu")(encoder_input)
x = layers.Conv2D(32, 3, activation="relu")(x)
x = layers.MaxPooling2D(3)(x)
x = layers.Conv2D(32, 3, activation="relu")(x)
x = layers.Conv2D(16, 3, activation="relu")(x)
encoder_output = layers.GlobalMaxPooling2D()(x) 
            # (batch_size, rows, columns, channels)->(batch_size, channels)

encoder = Model(encoder_input, encoder_output, name="encoder")
encoder.summary()

x = layers.Reshape((4, 4, 1))(encoder_output)
x = layers.Conv2DTranspose(16, 3, activation="relu")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu")(x)
x = layers.UpSampling2D(3)(x)
x = layers.Conv2DTranspose(16, 3, activation="relu")(x)
decoder_output = layers.Conv2DTranspose(1, 3, activation="relu")(x)

autoencoder = Model(encoder_input, decoder_output, name="autoencoder")
autoencoder.summary()

Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
img (InputLayer)             [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 26, 26, 16)        160       
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 24, 24, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 32)          0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 6, 6, 32)          9248      
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 4, 4, 16)          4624      
_________________________________________________________________
global_max_pooling2d_1 (Glob (None, 16)                0         
=================================================================
Total params: 18,672
Trainable params: 18,672
Non-trainable params: 0
_________________________________________________________________
Model: "autoencoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
img (InputLayer)             [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 26, 26, 16)        160       
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 24, 24, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 32)          0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 6, 6, 32)          9248      
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 4, 4, 16)          4624      
_________________________________________________________________
global_max_pooling2d_1 (Glob (None, 16)                0         
_________________________________________________________________
reshape_1 (Reshape)          (None, 4, 4, 1)           0         
_________________________________________________________________
conv2d_transpose_4 (Conv2DTr (None, 6, 6, 16)          160       
_________________________________________________________________
conv2d_transpose_5 (Conv2DTr (None, 8, 8, 32)          4640      
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 24, 24, 32)        0         
_________________________________________________________________
conv2d_transpose_6 (Conv2DTr (None, 26, 26, 16)        4624      
_________________________________________________________________
conv2d_transpose_7 (Conv2DTr (None, 28, 28, 1)         145       
=================================================================
Total params: 28,241
Trainable params: 28,241
Non-trainable params: 0
_________________________________________________________________

more information about de-convolution

Convolutions
- Convolutions
- Dilated Convolutions (a.k.s Astrous Convolutions)
- Transposed Convolutions (a.k.a. deconvolutions or fractionally strided convolutions)
- Separable Convolutions
- from https://towardsdatascience.com/types-of-convolutions-in-deep-learning-717013397f4d
Difference between UpSample2D and Conv2DTranspose
- simple scaling up vs. trained
- UpSampling2D is just a simple scaling up of the image by using nearest neighbor or bilinear upsampling, so nothing smart. Advantage is it’s cheap.
- Conv2DTranspose is a convolution operation whose kernel is learnt (just like normal conv2d operation) while training your model. Using Conv2DTranspose will also upsample its input but the key difference is the model should learn what is the best upsampling for the job.
transposed convolution

Simple Examples

https://machinelearningmastery.com/how-to-make-classification-and-regression-predictions-for-deep-learning-models-in-keras/

Classification

# example of training a final classification model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.datasets import make_blobs
from sklearn.preprocessing import MinMaxScaler
# generate 2d classification dataset
X, y = make_blobs(n_samples=100, centers=2, n_features=2, random_state=1)
scaler = MinMaxScaler()
scaler.fit(X)
X = scaler.transform(X)
# define and fit the final model
model = Sequential()
model.add(Dense(4, input_dim=2, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam')
model.fit(X, y, epochs=200, verbose=0)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 4)                 12        
_________________________________________________________________
dense_1 (Dense)              (None, 4)                 20        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 5         
=================================================================
Total params: 37
Trainable params: 37
Non-trainable params: 0
_________________________________________________________________

prediction: two types
- class prediction
- probability prediction

# predict the result for a new data Xnew
Xnew, _ = make_blobs(n_samples=3, centers=2, n_features=2, random_state=1)
Xnew = scaler.transform(Xnew)

# make a prediction for probability
print('Probability...')
ynew = model.predict(Xnew)
for i in range(len(Xnew)):
	print("X=%s, Predicted=%s" % (Xnew[i], ynew[i]))

Probability...
X=[0.89337759 0.65864154], Predicted=[0.04033506]
X=[0.29097707 0.12978982], Predicted=[0.9291382]
X=[0.78082614 0.75391697], Predicted=[0.04962662]

# make a prediction for classes
ynew = (model.predict(Xnew) > 0.5).astype("int32")
# show the inputs and predicted outputs
print('Classes...')
for i in range(len(Xnew)):
	print("X=%s, Predicted=%s" % (Xnew[i], ynew[i]))

Classes...
X=[0.89337759 0.65864154], Predicted=[0]
X=[0.29097707 0.12978982], Predicted=[1]
X=[0.78082614 0.75391697], Predicted=[0]

Regression

Question: Is it necessary to scale target values also?
Answer: It helps to converge your GD algorithm especially when the target values are spreaded large.
A target variable with a large spread of values, in turn, may result in large error gradient values causing weight values to change dramatically, making the learning process unstable.

# with scaling target values
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler

X, y = make_regression(n_samples=100, n_features=2, noise=0.05, random_state=1)
scalerX, scalerY = StandardScaler(), StandardScaler()
scalerX.fit(X)
scalerY.fit(y.reshape(100,1))   # (100,) -> (100,1)
X = scalerX.transform(X)
y = scalerY.transform(y.reshape(100,1))

# define and fit the final model
model = Sequential()
model.add(Dense(4, input_dim=2, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='linear'))
model.summary()
model.compile(loss='mse', optimizer='adam')
model.fit(X, y, epochs=1000, verbose=0)

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_45 (Dense)             (None, 4)                 12        
_________________________________________________________________
dense_46 (Dense)             (None, 4)                 20        
_________________________________________________________________
dense_47 (Dense)             (None, 1)                 5         
=================================================================
Total params: 37
Trainable params: 37
Non-trainable params: 0
_________________________________________________________________

<keras.callbacks.History at 0x7fde0646d850>

# new instances to predict
Xnew, a = make_regression(n_samples=3, n_features=2, noise=0.05, random_state=7)
Xnew = scalerX.transform(Xnew)
ynew = model.predict(Xnew)

for i in range(len(Xnew)):
	print("X=%s, Predicted=%s" % (Xnew[i], ynew[i]))

X=[0.04887696 0.21052199], Predicted=[0.22459722]
X=[ 1.80651612 -0.80617796], Predicted=[-0.05703998]
X=[-0.82240444 -0.26142258], Predicted=[-0.58791125]

scalerY.inverse_transform(ynew), a

(array([[ 36.273132],
        [ 14.821406],
        [-25.613968]], dtype=float32),
 array([ 21.28207192,  22.13978868, -21.10578639]))

# without target scaling

X, y = make_regression(n_samples=100, n_features=2, noise=0.05, random_state=1)
scalerX = StandardScaler()
X = scalerX.fit_transform(X)

# define and fit the final model
model = Sequential()
model.add(Dense(4, input_dim=2, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='linear'))

model.compile(loss='mse', optimizer='adam')
model.fit(X, y, epochs=1000, verbose=0)

Xnew, a = make_regression(n_samples=3, n_features=2, noise=0.05, random_state=7)
Xnew = scalerX.transform(Xnew)
# make a prediction
ynew = model.predict(Xnew)
# show the inputs and predicted outputs
for i in range(len(Xnew)):
	print("X=%s, Predicted=%s" % (Xnew[i], ynew[i]))

X=[0.04887696 0.21052199], Predicted=[40.85008]
X=[ 1.80651612 -0.80617796], Predicted=[11.68873]
X=[-0.82240444 -0.26142258], Predicted=[-31.26053]

# linear regression
from sklearn.linear_model import LinearRegression
X, y = make_regression(n_samples=100, n_features=2, noise=0.05, random_state=1)
scalerX = StandardScaler()
X = scalerX.fit_transform(X)

lin_model = LinearRegression()
lin_model.fit(X, y)

Xnew, a = make_regression(n_samples=3, n_features=2, noise=0.05, random_state=7)
Xnew = scalerX.transform(Xnew)

lin_model.predict(Xnew), a, Xnew

(array([ 36.28583995,  14.27272315, -25.27107631]),
 array([ 21.28207192,  22.13978868, -21.10578639]),
 array([[ 0.04887696,  0.21052199],
        [ 1.80651612, -0.80617796],
        [-0.82240444, -0.26142258]]))

Share on

Twitter Facebook LinkedIn

wowo0709

[Tensorflow&Keras] Keras Introduction

Keras

Keras Introduction

Using Functional API

Using Sequential API

Training, evaluation, and Inference

Save the model

To define multiple models

more information about de-convolution

Simple Examples

Classification

Regression

Share on

Leave a comment

You may also enjoy

[Python] Effective Python CH 2. 리스트와 딕셔너리 - 1

[Python] Effective Python CH 1. 파이썬답게 생각하기 - 2

[Python] Effective Python CH 1. 파이썬답게 생각하기 - 1

[Python] Effective Python 전체 목차