在 subclassed_model.py 中,通过对 tf.keras.Model 进行子类化,设计了两个自定义模型。
1 import tensorflow as tf 2 tf.enable_eager_execution() 3 4 5 # parameters 6 UNITS = 8 7 8 9 class Encoder(tf.keras.Model): 10 def __init__(self): 11 super(Encoder, self).__init__() 12 self.fc1 = tf.keras.layers.Dense(units=UNITS * 2, activation='relu') 13 self.fc2 = tf.keras.layers.Dense(units=UNITS, activation='relu') 14 15 def call(self, inputs): 16 r = self.fc1(inputs) 17 return self.fc2(r) 18 19 20 class Decoder(tf.keras.Model): 21 def __init__(self): 22 super(Decoder, self).__init__() 23 self.fc = tf.keras.layers.Dense(units=1) 24 25 def call(self, inputs): 26 return self.fc(inputs)
在 save_subclassed_model.py 中,创建了 5000 组训练数据集,实例化 Encoder()、Decoder() 模型,优化器采用 tf.train.AdamOptimizer(),以均方误差作为 Loss 函数。训练过程中,每 5 个 epoch 保存一次模型。
1 from subclassed_model import * 2 3 import numpy as np 4 import matplotlib.pyplot as plt 5 import os 6 7 import tensorflow as tf 8 tf.enable_eager_execution() 9 10 11 # create training data 12 X = np.linspace(-1, 1, 5000) 13 np.random.shuffle(X) 14 y = X ** 3 + 1 + np.random.normal(0, 0.05, (5000,)) 15 16 # plot data 17 plt.scatter(X, y) 18 plt.show() 19 20 # training dataset 21 BATCH_SIZE = 16 22 BUFFER_SIZE = 128 23 24 training_dataset = tf.data.Dataset.from_tensor_slices((X, y)).batch(BATCH_SIZE).shuffle(BUFFER_SIZE) 25 26 # initialize subclassed models 27 encoder = Encoder() 28 decoder = Decoder() 29 30 optimizer = tf.train.AdamOptimizer() 31 32 33 # loss function 34 def loss_function(real, pred): 35 return tf.losses.mean_squared_error(labels=real, predictions=pred) 36 37 38 # training 39 EPOCHS = 15 40 41 # checkpoint 42 checkpoint_dir = './training_checkpoints' 43 checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt') 44 checkpoint = tf.train.Checkpoint(optimizer=optimizer, 45 encoder=encoder, 46 decoder=decoder) 47 if not os.path.exists(checkpoint_dir): 48 os.makedirs(checkpoint_dir) 49 50 for epoch in range(EPOCHS): 51 epoch_loss = 0 52 53 for (batch, (x, y)) in enumerate(training_dataset): 54 x = tf.cast(x, tf.float32) 55 y = tf.cast(y, tf.float32) 56 x = tf.expand_dims(x, axis=1) # tf.Tensor([...], shape=(16, 1), dtype=float32) 57 y = tf.expand_dims(y, axis=1) # tf.Tensor([...], shape=(16, 1), dtype=float32) 58 59 with tf.GradientTape() as tape: 60 y_ = encoder(x) 61 prediction = decoder(y_) 62 batch_loss = loss_function(real=y, pred=prediction) 63 64 grads = tape.gradient(batch_loss, encoder.variables + decoder.variables) 65 optimizer.apply_gradients(zip(grads, encoder.variables + decoder.variables), 66 global_step=tf.train.get_or_create_global_step()) 67 68 epoch_loss += batch_loss 69 70 if (batch + 1) % 100 == 0: 71 print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1, 72 batch + 1, 73 batch_loss.numpy())) 74 75 print('Epoch {} Loss {:.4f}'.format(epoch + 1, epoch_loss / len(X))) 76 77 if (epoch + 1) % 5 == 0: 78 checkpoint.save(file_prefix=checkpoint_prefix)
运行 save_subclassed_model.py。
2019-06-27 12:57:14.253635: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 2019-06-27 12:57:15.660142: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties: name: GeForce GTX 1060 major: 6 minor: 1 memoryClockRate(GHz): 1.6705 pciBusID: 0000:01:00.0 totalMemory: 6.00GiB freeMemory: 4.97GiB 2019-06-27 12:57:15.660397: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0 2019-06-27 12:57:16.488227: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix: 2019-06-27 12:57:16.488385: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988] 0 2019-06-27 12:57:16.488476: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 0: N 2019-06-27 12:57:16.488772: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 4722 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1060, pci bus id: 0000:01:00.0, compute capability: 6.1) Epoch 1 Batch 100 Loss 0.1120 Epoch 1 Batch 200 Loss 0.0179 Epoch 1 Batch 300 Loss 0.0347 Epoch 1 Loss 0.0111 Epoch 2 Batch 100 Loss 0.0144 Epoch 2 Batch 200 Loss 0.0097 Epoch 2 Batch 300 Loss 0.0141 Epoch 2 Loss 0.0012 Epoch 3 Batch 100 Loss 0.0060 Epoch 3 Batch 200 Loss 0.0037 Epoch 3 Batch 300 Loss 0.0054 Epoch 3 Loss 0.0007 Epoch 4 Batch 100 Loss 0.0088 Epoch 4 Batch 200 Loss 0.0038 Epoch 4 Batch 300 Loss 0.0093 Epoch 4 Loss 0.0004 Epoch 5 Batch 100 Loss 0.0039 Epoch 5 Batch 200 Loss 0.0044 Epoch 5 Batch 300 Loss 0.0031 Epoch 5 Loss 0.0003 Epoch 6 Batch 100 Loss 0.0025 Epoch 6 Batch 200 Loss 0.0038 Epoch 6 Batch 300 Loss 0.0027 Epoch 6 Loss 0.0002 Epoch 7 Batch 100 Loss 0.0026 Epoch 7 Batch 200 Loss 0.0032 Epoch 7 Batch 300 Loss 0.0041 Epoch 7 Loss 0.0002 Epoch 8 Batch 100 Loss 0.0022 Epoch 8 Batch 200 Loss 0.0031 Epoch 8 Batch 300 Loss 0.0026 Epoch 8 Loss 0.0002 Epoch 9 Batch 100 Loss 0.0040 Epoch 9 Batch 200 Loss 0.0014 Epoch 9 Batch 300 Loss 0.0040 Epoch 9 Loss 0.0002 Epoch 10 Batch 100 Loss 0.0023 Epoch 10 Batch 200 Loss 0.0030 Epoch 10 Batch 300 Loss 0.0038 Epoch 10 Loss 0.0002 Epoch 11 Batch 100 Loss 0.0028 Epoch 11 Batch 200 Loss 0.0020 Epoch 11 Batch 300 Loss 0.0025 Epoch 11 Loss 0.0002 Epoch 12 Batch 100 Loss 0.0027 Epoch 12 Batch 200 Loss 0.0045 Epoch 12 Batch 300 Loss 0.0021 Epoch 12 Loss 0.0002 Epoch 13 Batch 100 Loss 0.0016 Epoch 13 Batch 200 Loss 0.0033 Epoch 13 Batch 300 Loss 0.0024 Epoch 13 Loss 0.0002 Epoch 14 Batch 100 Loss 0.0034 Epoch 14 Batch 200 Loss 0.0028 Epoch 14 Batch 300 Loss 0.0033 Epoch 14 Loss 0.0002 Epoch 15 Batch 100 Loss 0.0019 Epoch 15 Batch 200 Loss 0.0030 Epoch 15 Batch 300 Loss 0.0037 Epoch 15 Loss 0.0002 Process finished with exit code 0
查看 checkpoint_dir 目录下的文件。
在 load_subclassed_model.py 中,创建了 200 组测试数据,加载了 the latest checkpoint 中保存的模型参数,对模型进行了测试。
1 from subclassed_model import * 2 3 import numpy as np 4 import matplotlib.pyplot as plt 5 6 import tensorflow as tf 7 tf.enable_eager_execution() 8 9 10 # load model 11 encoder = Encoder() 12 decoder = Decoder() 13 optimizer = tf.train.AdamOptimizer() 14 15 checkpoint_dir = './training_checkpoints' 16 17 checkpoint = tf.train.Checkpoint(optimizer=optimizer, 18 encoder=encoder, 19 decoder=decoder) 20 checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) 21 22 # build model 23 BATCH_SIZE = 16 24 25 encoder.build(input_shape=tf.TensorShape((BATCH_SIZE, 1))) 26 decoder.build(input_shape=tf.TensorShape((BATCH_SIZE, UNITS))) 27 28 encoder.summary() 29 decoder.summary() 30 31 # create validation data 32 X_test = np.linspace(-1, 1, 200) 33 34 # validation dataset 35 val_dataset = tf.data.Dataset.from_tensor_slices(X_test).batch(1) 36 37 # predict and plot 38 results = [] 39 for (batch, x) in enumerate(val_dataset): 40 x = tf.cast(x, tf.float32) 41 x = tf.expand_dims(x, axis=1) 42 y_ = encoder(x) 43 prediction = decoder(y_) 44 # print(prediction.numpy()[0][0]) 45 results.append(prediction.numpy()[0][0]) 46 47 # plot results 48 plt.scatter(X_test, results) 49 plt.show()
运行 load_subclassed_model.py。
2019-06-27 13:27:40.712260: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 2019-06-27 13:27:42.105938: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties: name: GeForce GTX 1060 major: 6 minor: 1 memoryClockRate(GHz): 1.6705 pciBusID: 0000:01:00.0 totalMemory: 6.00GiB freeMemory: 4.97GiB 2019-06-27 13:27:42.106200: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0 2019-06-27 13:27:42.921364: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix: 2019-06-27 13:27:42.921510: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988] 0 2019-06-27 13:27:42.921594: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 0: N 2019-06-27 13:27:42.921777: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 4722 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1060, pci bus id: 0000:01:00.0, compute capability: 6.1) _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense (Dense) multiple 32 _________________________________________________________________ dense_1 (Dense) multiple 136 ================================================================= Total params: 168 Trainable params: 168 Non-trainable params: 0 _________________________________________________________________ _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_2 (Dense) multiple 9 ================================================================= Total params: 9 Trainable params: 9 Non-trainable params: 0 _________________________________________________________________ Process finished with exit code 0
版权声明:本文为博主原创文章,欢迎转载,转载请注明作者及原文出处!
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:[Tensorflow] 使用 tf.train.Checkpoint() 保存 / 加载 keras subclassed model - Python技术站