TensorFlow搭建模型方式总结

引言

TensorFlow提供了多种API，使得入门者和专家可以根据自己的需求选择不同的API搭建模型。

基于Keras Sequential API搭建模型

Sequential适用于线性堆叠的方式搭建模型，即每层只有一个输入和输出。

import tensorflow as tf

# 导入手写数字数据集
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 数据标准化
x_train, x_test = x_train/255, x_test/255

# 使用Sequential搭建模型
# 方式一
model = tf.keras.models.Sequential([

    # 加入CNN层(2D), 使用了3个卷积核, 卷积核的尺寸为3X3, 步长为1, 输入图像的维度为28X28X1
    tf.keras.layers.Conv2D(3, kernel_size=3, strides=1, input_shape=(28, 28, 1)),

    # 加入激活函数
    tf.keras.layers.Activation('relu'),

    # 加入2X2池化层, 步长为2
    tf.keras.layers.MaxPool2D(pool_size=2, strides=2),

    # 把图像数据平铺
    tf.keras.layers.Flatten(),

    # 加入全连接层, 设置神经元为128个, 设置relu激活函数
    tf.keras.layers.Dense(128, activation='relu'),

    # 加入全连接层(输出层), 设置输出数量为10, 设置softmax激活函数
    tf.keras.layers.Dense(10, activation='softmax')
])

# 方式二
model2 = tf.keras.models.Sequential()
model2.add(tf.keras.layers.Conv2D(3, kernel_size=3, strides=1, input_shape=(28, 28, 1)))
model2.add(tf.keras.layers.Activation('relu'))
model2.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
model2.add(tf.keras.layers.Flatten())
model2.add(tf.keras.layers.Dense(128, activation='relu'))
model2.add(tf.keras.layers.Dense(10, activation='softmax'))

# 模型概览
model.summary()

"""
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (None, 26, 26, 3)         30        

 activation (Activation)     (None, 26, 26, 3)         0         

 max_pooling2d (MaxPooling2D  (None, 13, 13, 3)        0         
 )                                                               

 flatten (Flatten)           (None, 507)               0         

 dense (Dense)               (None, 128)               65024     

 dense_1 (Dense)             (None, 10)                1290      

=================================================================
Total params: 66,344
Trainable params: 66,344
"""

# 编译 为模型加入优化器, 损失函数, 评估指标
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# 训练模型, 2个epoch, batch size为100
model.fit(x_train, y_train, epochs=2, batch_size=100)

基于Keras 函数API搭建模型

由于Sequential是线性堆叠的，只有一个输入和输出，但是当我们需要搭建多输入模型时，如输入图片、文本描述等，这几类信息可能需要分别使用CNN，RNN模型提取信息，然后汇总信息到最后的神经网络中预测输出。或者是多输出任务，如根据音乐预测音乐类型和发行时间。亦或是一些非线性的拓扑网络结构模型，如使用残差链接、Inception等。上述这些情况的网络都不是线性搭建，要搭建如此复杂的网络，需要使用函数API来搭建。

简单实例

import tensorflow as tf

# 导入手写数字数据集
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 数据标准化
x_train, x_test = x_train/255, x_test/255

input_tensor = tf.keras.layers.Input(shape=(28, 28, 1))

# CNN层(2D), 使用了3个卷积核, 卷积核的尺寸为3X3, 步长为1, 输入图像的维度为28X28X1
x = tf.keras.layers.Conv2D(3, kernel_size=3, strides=1)(input_tensor)

# 激活函数
x = tf.keras.layers.Activation('relu')(x)

# 2X2池化层, 步长为2
x = tf.keras.layers.MaxPool2D(pool_size=2, strides=2)(x)

# 把图像数据平铺
x = tf.keras.layers.Flatten()(x)

# 全连接层, 设置神经元为128个, 设置relu激活函数
x = tf.keras.layers.Dense(128, activation='relu')(x)

# 全连接层(输出层), 设置输出数量为10, 设置softmax激活函数
output = tf.keras.layers.Dense(10, activation='softmax')(x)

model = tf.keras.models.Model(input_tensor, output)

# 模型概览
model.summary()

"""
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 26, 26, 3)         30        
                                                                 
 activation (Activation)     (None, 26, 26, 3)         0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 3)        0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 507)               0         
                                                                 
 dense (Dense)               (None, 128)               65024     
                                                                 
 dense_1 (Dense)             (None, 10)                1290      
                                                                 
=================================================================
Total params: 66,344
Trainable params: 66,344
Non-trainable params: 0
_________________________________________________________________

"""

# 编译 为模型加入优化器, 损失函数, 评估指标
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# 训练模型, 2个epoch, batch size为100
model.fit(x_train, y_train, epochs=2, batch_size=100)

多输入实例

import tensorflow as tf

# 输入1
input_tensor1 = tf.keras.layers.Input(shape=(28,))
x1 = tf.keras.layers.Dense(16, activation='relu')(input_tensor1)
output1 = tf.keras.layers.Dense(32, activation='relu')(x1)

# 输入2
input_tensor2 = tf.keras.layers.Input(shape=(28,))
x2 = tf.keras.layers.Dense(16, activation='relu')(input_tensor2)
output2 = tf.keras.layers.Dense(32, activation='relu')(x2)

# 合并输入1和输入2
concat = tf.keras.layers.concatenate([output1, output2])

# 顶层分类模型
output = tf.keras.layers.Dense(10, activation='relu')(concat)

model = tf.keras.models.Model([input_tensor1, input_tensor2], output)

# 编译
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

多输出实例

import tensorflow as tf

# 输入
input_tensor = tf.keras.layers.Input(shape=(28,))
x = tf.keras.layers.Dense(16, activation='relu')(input_tensor)
output = tf.keras.layers.Dense(32, activation='relu')(x)


# 多个输出
output1 = tf.keras.layers.Dense(10, activation='relu')(output)
output2 = tf.keras.layers.Dense(1, activation='sigmoid')(output)

model = tf.keras.models.Model(input_tensor, [output1, output2])

# 编译
model.compile(
    optimizer='adam',
    loss=['sparse_categorical_crossentropy', 'binary_crossentropy'],
    metrics=['accuracy']
)

子类化API

相较于上述使用高阶API，使用子类化API的方式来搭建模型，可以根据需求对模型中的任何一部分进行修改。

import tensorflow as tf

# 导入手写数字数据集
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 数据标准化
x_train, x_test = x_train / 255, x_test / 255

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(buffer_size=10).batch(32)
test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)


class MyModel(tf.keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.flatten = tf.keras.layers.Flatten()
        self.hidden_layer1 = tf.keras.layers.Dense(16, activation='relu')
        self.hidden_layer2 = tf.keras.layers.Dense(10, activation='softmax')

    # 定义模型
    def call(self, x):
        h = self.flatten(x)
        h = self.hidden_layer1(h)
        y = self.hidden_layer2(h)
        return y


model = MyModel()

# 损失函数 和 优化器
loss_function = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# 评估指标
train_loss = tf.keras.metrics.Mean()  # 一个epoch的loss
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()  # 一个epoch的准确率

test_loss = tf.keras.metrics.Mean()
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()


@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        y_pre = model(x)
        loss = loss_function(y, y_pre)
    grad = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grad, model.trainable_variables))

    train_loss(loss)
    train_accuracy(y, y_pre)


@tf.function
def test_step(x, y):
    y_pre = model(x)
    te_loss = loss_function(y, y_pre)

    test_loss(te_loss)
    test_accuracy(y, y_pre)


epoch = 2

for i in range(epoch):

    # 重置评估指标
    train_loss.reset_states()
    train_accuracy.reset_states()

    # 按照batch size 进行训练
    for x, y in train_data:
        train_step(x, y)

    print(f'epoch {i+1} train loss {train_loss.result()} train accuracy {train_accuracy.result()}')