引言
TensorFlow提供了多种API,使得入门者和专家可以根据自己的需求选择不同的API搭建模型。
基于Keras Sequential API搭建模型
Sequential适用于线性堆叠的方式搭建模型,即每层只有一个输入和输出。
import tensorflow as tf # 导入手写数字数据集 mnist = tf.keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() # 数据标准化 x_train, x_test = x_train/255, x_test/255 # 使用Sequential搭建模型 # 方式一 model = tf.keras.models.Sequential([ # 加入CNN层(2D), 使用了3个卷积核, 卷积核的尺寸为3X3, 步长为1, 输入图像的维度为28X28X1 tf.keras.layers.Conv2D(3, kernel_size=3, strides=1, input_shape=(28, 28, 1)), # 加入激活函数 tf.keras.layers.Activation('relu'), # 加入2X2池化层, 步长为2 tf.keras.layers.MaxPool2D(pool_size=2, strides=2), # 把图像数据平铺 tf.keras.layers.Flatten(), # 加入全连接层, 设置神经元为128个, 设置relu激活函数 tf.keras.layers.Dense(128, activation='relu'), # 加入全连接层(输出层), 设置输出数量为10, 设置softmax激活函数 tf.keras.layers.Dense(10, activation='softmax') ]) # 方式二 model2 = tf.keras.models.Sequential() model2.add(tf.keras.layers.Conv2D(3, kernel_size=3, strides=1, input_shape=(28, 28, 1))) model2.add(tf.keras.layers.Activation('relu')) model2.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2)) model2.add(tf.keras.layers.Flatten()) model2.add(tf.keras.layers.Dense(128, activation='relu')) model2.add(tf.keras.layers.Dense(10, activation='softmax')) # 模型概览 model.summary() """ Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 26, 26, 3) 30 activation (Activation) (None, 26, 26, 3) 0 max_pooling2d (MaxPooling2D (None, 13, 13, 3) 0 ) flatten (Flatten) (None, 507) 0 dense (Dense) (None, 128) 65024 dense_1 (Dense) (None, 10) 1290 ================================================================= Total params: 66,344 Trainable params: 66,344 """ # 编译 为模型加入优化器, 损失函数, 评估指标 model.compile( optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'] ) # 训练模型, 2个epoch, batch size为100 model.fit(x_train, y_train, epochs=2, batch_size=100)
基于Keras 函数API搭建模型
由于Sequential是线性堆叠的,只有一个输入和输出,但是当我们需要搭建多输入模型时,如输入图片、文本描述等,这几类信息可能需要分别使用CNN,RNN模型提取信息,然后汇总信息到最后的神经网络中预测输出。或者是多输出任务,如根据音乐预测音乐类型和发行时间。亦或是一些非线性的拓扑网络结构模型,如使用残差链接、Inception等。上述这些情况的网络都不是线性搭建,要搭建如此复杂的网络,需要使用函数API来搭建。
简单实例
import tensorflow as tf # 导入手写数字数据集 mnist = tf.keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() # 数据标准化 x_train, x_test = x_train/255, x_test/255 input_tensor = tf.keras.layers.Input(shape=(28, 28, 1)) # CNN层(2D), 使用了3个卷积核, 卷积核的尺寸为3X3, 步长为1, 输入图像的维度为28X28X1 x = tf.keras.layers.Conv2D(3, kernel_size=3, strides=1)(input_tensor) # 激活函数 x = tf.keras.layers.Activation('relu')(x) # 2X2池化层, 步长为2 x = tf.keras.layers.MaxPool2D(pool_size=2, strides=2)(x) # 把图像数据平铺 x = tf.keras.layers.Flatten()(x) # 全连接层, 设置神经元为128个, 设置relu激活函数 x = tf.keras.layers.Dense(128, activation='relu')(x) # 全连接层(输出层), 设置输出数量为10, 设置softmax激活函数 output = tf.keras.layers.Dense(10, activation='softmax')(x) model = tf.keras.models.Model(input_tensor, output) # 模型概览 model.summary() """ Model: "model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) [(None, 28, 28, 1)] 0 conv2d (Conv2D) (None, 26, 26, 3) 30 activation (Activation) (None, 26, 26, 3) 0 max_pooling2d (MaxPooling2D (None, 13, 13, 3) 0 ) flatten (Flatten) (None, 507) 0 dense (Dense) (None, 128) 65024 dense_1 (Dense) (None, 10) 1290 ================================================================= Total params: 66,344 Trainable params: 66,344 Non-trainable params: 0 _________________________________________________________________ """ # 编译 为模型加入优化器, 损失函数, 评估指标 model.compile( optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'] ) # 训练模型, 2个epoch, batch size为100 model.fit(x_train, y_train, epochs=2, batch_size=100)
多输入实例
import tensorflow as tf # 输入1 input_tensor1 = tf.keras.layers.Input(shape=(28,)) x1 = tf.keras.layers.Dense(16, activation='relu')(input_tensor1) output1 = tf.keras.layers.Dense(32, activation='relu')(x1) # 输入2 input_tensor2 = tf.keras.layers.Input(shape=(28,)) x2 = tf.keras.layers.Dense(16, activation='relu')(input_tensor2) output2 = tf.keras.layers.Dense(32, activation='relu')(x2) # 合并输入1和输入2 concat = tf.keras.layers.concatenate([output1, output2]) # 顶层分类模型 output = tf.keras.layers.Dense(10, activation='relu')(concat) model = tf.keras.models.Model([input_tensor1, input_tensor2], output) # 编译 model.compile( optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'] )
多输出实例
import tensorflow as tf # 输入 input_tensor = tf.keras.layers.Input(shape=(28,)) x = tf.keras.layers.Dense(16, activation='relu')(input_tensor) output = tf.keras.layers.Dense(32, activation='relu')(x) # 多个输出 output1 = tf.keras.layers.Dense(10, activation='relu')(output) output2 = tf.keras.layers.Dense(1, activation='sigmoid')(output) model = tf.keras.models.Model(input_tensor, [output1, output2]) # 编译 model.compile( optimizer='adam', loss=['sparse_categorical_crossentropy', 'binary_crossentropy'], metrics=['accuracy'] )
子类化API
相较于上述使用高阶API,使用子类化API的方式来搭建模型,可以根据需求对模型中的任何一部分进行修改。
import tensorflow as tf # 导入手写数字数据集 mnist = tf.keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() # 数据标准化 x_train, x_test = x_train / 255, x_test / 255 train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(buffer_size=10).batch(32) test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32) class MyModel(tf.keras.Model): def __init__(self): super(MyModel, self).__init__() self.flatten = tf.keras.layers.Flatten() self.hidden_layer1 = tf.keras.layers.Dense(16, activation='relu') self.hidden_layer2 = tf.keras.layers.Dense(10, activation='softmax') # 定义模型 def call(self, x): h = self.flatten(x) h = self.hidden_layer1(h) y = self.hidden_layer2(h) return y model = MyModel() # 损失函数 和 优化器 loss_function = tf.keras.losses.SparseCategoricalCrossentropy() optimizer = tf.keras.optimizers.Adam() # 评估指标 train_loss = tf.keras.metrics.Mean() # 一个epoch的loss train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy() # 一个epoch的准确率 test_loss = tf.keras.metrics.Mean() test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy() @tf.function def train_step(x, y): with tf.GradientTape() as tape: y_pre = model(x) loss = loss_function(y, y_pre) grad = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grad, model.trainable_variables)) train_loss(loss) train_accuracy(y, y_pre) @tf.function def test_step(x, y): y_pre = model(x) te_loss = loss_function(y, y_pre) test_loss(te_loss) test_accuracy(y, y_pre) epoch = 2 for i in range(epoch): # 重置评估指标 train_loss.reset_states() train_accuracy.reset_states() # 按照batch size 进行训练 for x, y in train_data: train_step(x, y) print(f'epoch {i+1} train loss {train_loss.result()} train accuracy {train_accuracy.result()}')
参考
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:TensorFlow搭建模型方式总结 - Python技术站