1. 初探迁移学习
1.1 什么是迁移学习?
迁移学习(Transfer Learning)是指在训练模型时,将预训练模型的一部分参数引入到新的模型中,以加快模型的训练速度及提高模型的准确率。稳定后的模型部分或全部的函数参数作为新模型的参数来使用。
1.2 迁移学习的优势
- 可以提高模型的准确率
- 可以加快模型的训练速度
- 可以减少数据量及时间投入
1.3 迁移学习的应用
2. 实现迁移学习
2.1 图像识别任务
2.1.1 安装TensorFlow
pip install tensorflow
2.1.2 下载预训练模型
- 下载Inception-v3模型:http://download.tensorflow.org/models/image/imagenet/inception-v3-2016-03-01.tar.gz
- 解压到指定目录下(假设目录为./models)
2.1.3 准备数据集
2.1.4 构建网络
import tensorflow as tf
from tensorflow.python.platform import gfile
with tf.Session() as sess:
model_filename = './models/inception-v3.pb'
with gfile.FastGFile(model_filename, 'rb') as f:
graph_def = tf.GraphDef()
tf.import_graph_def(graph_def, name='')
# 获取输出层
softmax_tensor = sess.graph.get_tensor_by_name('softmax:0')
2.1.5 训练模型
# 转换数据格式,构建输入的Tensor
def read_file_list(file_list):
images = []
labels = []
for file in file_list:
image = cv2.imread(file)
image = cv2.resize(image, (299, 299))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
return np.asarray(images, dtype=np.float32) / 255.0, np.asarray(labels, dtype=np.int32)
train_images, train_labels = read_file_list(train_list)
test_images, test_labels = read_file_list(test_list)
train_image_tensor = sess.graph.get_tensor_by_name('input:0')
train_label_tensor = sess.graph.get_tensor_by_name('output:0')
# 构造损失函数和优化器
cross_entropy = tf.losses.sparse_softmax_cross_entropy(train_label_tensor, logits_tensor)
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
# 训练模型
with tf.Session() as sess:
# 初始化全局变量
for i in range(steps):
_, loss = sess.run([train_step, cross_entropy], feed_dict={train_image_tensor: train_images, train_label_tensor: train_labels})
if i % 100 == 0:
print("Step: ", i, " Loss: ", loss)
accuracy = sess.run(accuracy_tensor, feed_dict={train_image_tensor: test_images, train_label_tensor: test_labels})
print("Accuracy: ", accuracy)
2.2 自然语言处理任务
2.2.1 安装TensorFlow
pip install tensorflow
2.2.2 下载预训练模型
- 下载GloVe预训练模型:http://nlp.stanford.edu/data/glove.6B.zip
- 解压到指定目录下(假设目录为./models)
2.2.3 准备数据集
2.2.4 构建网络
import tensorflow as tf
import numpy as np
from tensorflow.contrib.rnn import BasicLSTMCell
# 构建模型
class LSTM_Model(object):
def __init__(self, num_classes, num_units):
self.num_classes = num_classes
self.num_units = num_units
self.labels = tf.placeholder(tf.int32, [None])
self.inputs = tf.placeholder(tf.float32, [None, None, self.num_units])
self.seq_lens = tf.placeholder(tf.int32, [None])
self.dropout_keep_prob = tf.placeholder(tf.float32)
cell_fw = BasicLSTMCell(self.num_units)
cell_bw = BasicLSTMCell(self.num_units)
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, self.inputs, sequence_length=self.seq_lens, dtype=tf.float32)
output = tf.concat(outputs, 2)
output = tf.layers.dropout(output, rate=self.dropout_keep_prob)
output = tf.layers.dense(output, self.num_classes, activation=None)
self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=output))
self.train_op = tf.train.AdamOptimizer().minimize(self.loss)
self.prediction = tf.argmax(output, 1)
correct_pred = tf.equal(tf.cast(self.prediction, tf.int32), self.labels)
self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
def train(self, sess, x, y, seq_lens, dropout_keep_prob):
feed_dict = {self.inputs: x, self.labels: y, self.seq_lens: seq_lens, self.dropout_keep_prob:dropout_keep_prob}
_, loss, accuracy = sess.run([self.train_op, self.loss, self.accuracy], feed_dict=feed_dict)
return loss, accuracy
def predict(self, sess, x, seq_lens):
feed_dict = {self.inputs: x, self.seq_lens: seq_lens, self.dropout_keep_prob: 1.0}
pred = sess.run(self.prediction, feed_dict=feed_dict)
return pred
2.2.5 训练模型
import numpy as np
from keras.preprocessing import sequence
from keras.datasets import imdb
from keras.layers import Embedding
# 加载数据
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000, maxlen=100)
x_train = sequence.pad_sequences(x_train, maxlen=100, padding='post', truncating='post')
x_test = sequence.pad_sequences(x_test, maxlen=100, padding='post', truncating='post')
# 加载GloVe预训练模型
glove_file = "./models/glove.6B.100d.txt"
word2idx = imdb.get_word_index()
idx2word = {k: (v + 3) for k, v in word2idx.items()}
idx2word[0] = "<PAD/>"
idx2word[1] = "<START/>"
idx2word[2] = "<UNK/>"
idx2word[3] = "<UNUSED/>"
embedding_matrix = []
with open(glove_file, 'r') as f:
for line in f:
line = line.strip().split()
word = line[0]
if word in idx2word:
embedding = np.asarray(line[1:], dtype=np.float32)
embedding_matrix = np.asarray(embedding_matrix, dtype=np.float32)
embedding_layer = Embedding(input_dim=x_train.shape[0], output_dim=100, input_length=100, weights=[embedding_matrix])
with tf.Session() as sess:
lstm_model = LSTM_Model(num_classes=2, num_units=100)
for i in range(steps):
idxs = np.random.permutation(np.arange(0,len(y_train)))
batch_loss = []
batch_acc = []
for j in range(0, len(y_train), batch_size):
batch_idx = idxs[j:j+batch_size]
batch_seq_lens = np.sum(np.sign(x_train[batch_idx]),axis=1)
batch_loss_j, batch_acc_j = lstm_model.train(sess, x_train[batch_idx], y_train[batch_idx], batch_seq_lens, dropout_keep_prob=0.7)
if i % 10 == 0:
print("Step: ", i, " Loss: ", np.mean(batch_loss), " Accuracy: ", np.mean(batch_acc))
test_seq_lens = np.sum(np.sign(x_test),axis=1)
test_accuracy = lstm_model.predict(sess, x_test, test_seq_lens)
print("Test Accuracy: ", test_accuracy)
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:详解tensorflow实现迁移学习实例 - Python技术站