1 构建一个简单的网络层

我们可以通过继承tf.keras.layer.Layer,实现一个自定义的网络层。

In [1]:
from __future__ import absolute_import, division, print_function
import tensorflow as tf
tf.keras.backend.clear_session()
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
 
/home/doit/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
In [2]:
# 定义网络层就是:设置网络权重和输出到输入的计算过程
class MyLayer(layers.Layer):
    def __init__(self, input_dim=32, unit=32):
        super(MyLayer, self).__init__()
        
        w_init = tf.random_normal_initializer()
        # 权重变量
        self.weight = tf.Variable(initial_value=w_init(
            shape=(input_dim, unit), dtype=tf.float32), trainable=True)
        
        b_init = tf.zeros_initializer()
        # 偏置变量
        self.bias = tf.Variable(initial_value=b_init(
            shape=(unit,), dtype=tf.float32), trainable=True)
    
    def call(self, inputs):
        # 全连接网络
        return tf.matmul(inputs, self.weight) + self.bias
        
x = tf.ones((3,5))
my_layer = MyLayer(5, 4)
out = my_layer(x)
print(out)
 
tf.Tensor(
[[-0.07703826  0.02132557  0.06587847  0.11276232]
 [-0.07703826  0.02132557  0.06587847  0.11276232]
 [-0.07703826  0.02132557  0.06587847  0.11276232]], shape=(3, 4), dtype=float32)
 

按上面构建网络层,图层会自动跟踪权重w和b,当然我们也可以直接用add_weight的方法构建权重

In [3]:
class MyLayer(layers.Layer):
    def __init__(self, input_dim=32, unit=32):
        super(MyLayer, self).__init__()
        # 使用add_weight添加网络变量,使其可追踪
        self.weight = self.add_weight(shape=(input_dim, unit),
                                     initializer=keras.initializers.RandomNormal(),
                                     trainable=True)
        self.bias = self.add_weight(shape=(unit,),
                                   initializer=keras.initializers.Zeros(),
                                   trainable=True)
    
    def call(self, inputs):
        return tf.matmul(inputs, self.weight) + self.bias
        
x = tf.ones((3,5))
my_layer = MyLayer(5, 4)
out = my_layer(x)
print(out)
 
tf.Tensor(
[[ 0.05495948 -0.07037501  0.20973718 -0.08516831]
 [ 0.05495948 -0.07037501  0.20973718 -0.08516831]
 [ 0.05495948 -0.07037501  0.20973718 -0.08516831]], shape=(3, 4), dtype=float32)
 

也可以设置不可训练的权重

In [4]:
class AddLayer(layers.Layer):
    def __init__(self, input_dim=32):
        super(AddLayer, self).__init__()
        # 只存储,不训练的变量
        self.sum = self.add_weight(shape=(input_dim,),
                                     initializer=keras.initializers.Zeros(),
                                     trainable=False)
       
    
    def call(self, inputs):
        self.sum.assign_add(tf.reduce_sum(inputs, axis=0))
        return self.sum
        
x = tf.ones((3,3))
my_layer = AddLayer(3)
out = my_layer(x)
print(out.numpy())
out = my_layer(x)
print(out.numpy())
print('weight:', my_layer.weights)
print('non-trainable weight:', my_layer.non_trainable_weights)
print('trainable weight:', my_layer.trainable_weights)
 
[3. 3. 3.]
[6. 6. 6.]
weight: [<tf.Variable 'Variable:0' shape=(3,) dtype=float32, numpy=array([6., 6., 6.], dtype=float32)>]
non-trainable weight: [<tf.Variable 'Variable:0' shape=(3,) dtype=float32, numpy=array([6., 6., 6.], dtype=float32)>]
trainable weight: []
 

当定义网络时不知道网络的维度是可以重写build()函数,用获得的shape构建网络

In [5]:
class MyLayer(layers.Layer):
    def __init__(self, unit=32):
        super(MyLayer, self).__init__()
        self.unit = unit
        
    def build(self, input_shape):
        # 在build时获取input_shape
        self.weight = self.add_weight(shape=(input_shape[-1], self.unit),
                                     initializer=keras.initializers.RandomNormal(),
                                     trainable=True)
        self.bias = self.add_weight(shape=(self.unit,),
                                   initializer=keras.initializers.Zeros(),
                                   trainable=True)
    
    def call(self, inputs):
        return tf.matmul(inputs, self.weight) + self.bias
        

my_layer = MyLayer(3)
x = tf.ones((3,5))
out = my_layer(x)
print(out)
my_layer = MyLayer(3)

x = tf.ones((2,2))
out = my_layer(x)
print(out)
 
tf.Tensor(
[[-0.25201735  0.09862914  0.06587204]
 [-0.25201735  0.09862914  0.06587204]
 [-0.25201735  0.09862914  0.06587204]], shape=(3, 3), dtype=float32)
tf.Tensor(
[[-0.0270178  -0.03847811 -0.09622537]
 [-0.0270178  -0.03847811 -0.09622537]], shape=(2, 3), dtype=float32)
 

2 使用子层递归构建网络层

可以在自定义网络层中调用其他自定义网络层

In [6]:
class MyBlock(layers.Layer):
    def __init__(self):
        super(MyBlock, self).__init__()
        # 其他自定义网络层
        self.layer1 = MyLayer(32)
        self.layer2 = MyLayer(16)
        self.layer3 = MyLayer(2)
    def call(self, inputs):
        h1 = self.layer1(inputs)
        h1 = tf.nn.relu(h1)
        h2 = self.layer2(h1)
        h2 = tf.nn.relu(h2)
        return self.layer3(h2)
    
my_block = MyBlock()
print('trainable weights:', len(my_block.trainable_weights))
y = my_block(tf.ones(shape=(3, 64)))
# 构建网络在build()里面,所以执行了才有网络
print('trainable weights:', len(my_block.trainable_weights))
 
trainable weights: 0
trainable weights: 6
 

可以通过构建网络层的方法来收集loss,并可以递归调用。

In [7]:
class LossLayer(layers.Layer):
  
  def __init__(self, rate=1e-2):
    super(LossLayer, self).__init__()
    self.rate = rate
  
  def call(self, inputs):
    # 添加loss
    self.add_loss(self.rate * tf.reduce_sum(inputs))
    return inputs

class OutLayer(layers.Layer):
    def __init__(self):
        super(OutLayer, self).__init__()
        self.loss_fun=LossLayer(1e-2)
    def call(self, inputs):
        # 就一个loss层
        return self.loss_fun(inputs)
    
my_layer = OutLayer()
print(len(my_layer.losses)) # 还未call
y = my_layer(tf.zeros(1,1))
print(len(my_layer.losses)) # 执行call之后
y = my_layer(tf.zeros(1,1))
print(len(my_layer.losses)) # call之前会重新置0
 
0
1
1
 

如果中间调用了keras网络层,里面的正则化loss也会被加入进来

In [8]:
class OuterLayer(layers.Layer):

    def __init__(self):
        super(OuterLayer, self).__init__()
        # 子层中正则化loss也会添加到总的loss中
        self.dense = layers.Dense(32, kernel_regularizer=tf.keras.regularizers.l2(1e-3))
    
    def call(self, inputs):
        return self.dense(inputs)


my_layer = OuterLayer()
y = my_layer(tf.zeros((1,1)))
print(my_layer.losses) 
print(my_layer.weights)
 
[<tf.Tensor: id=266, shape=(), dtype=float32, numpy=0.0020732714>]
[<tf.Variable 'outer_layer/dense/kernel:0' shape=(1, 32) dtype=float32, numpy=
array([[ 0.18419057,  0.41972226,  0.06816366,  0.3822255 , -0.18456893,
        -0.25967044, -0.3724193 , -0.10103354,  0.28944224, -0.38763577,
         0.26489502,  0.40765905,  0.3051821 ,  0.3081022 , -0.02279559,
         0.16751188,  0.23520029,  0.28544015,  0.22495598,  0.21490109,
         0.28766167, -0.2586367 , -0.04058033, -0.22604927, -0.12887421,
         0.10930204,  0.41669363,  0.1015256 ,  0.0400646 ,  0.12960178,
        -0.04219085, -0.32611725]], dtype=float32)>, <tf.Variable 'outer_layer/dense/bias:0' shape=(32,) dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
      dtype=float32)>]
 

3 其他网络层配置

3.1 使自己的网络层可以序列化

In [9]:
class Linear(layers.Layer):

    def __init__(self, units=32, **kwargs):
        super(Linear, self).__init__(**kwargs)
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(shape=(input_shape[-1], self.units),
                                 initializer='random_normal',
                                 trainable=True)
        self.b = self.add_weight(shape=(self.units,),
                                 initializer='random_normal',
                                 trainable=True)
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b
    
    def get_config(self):
        # 获取网络配置,用于实现序列化
        config = super(Linear, self).get_config()
        config.update({'units':self.units})
        return config
    
    
layer = Linear(125)
config = layer.get_config()
print(config)
# 从配置中构建网络,(已知网络结构,不知超参的情况)
new_layer = Linear.from_config(config)
 
{'name': 'linear', 'trainable': True, 'dtype': 'float32', 'units': 125}
 

如果在反序列化中(从配置中构建网络)需要更大的灵活性,可以重写from_config方法。

In [10]:
def from_config(cls, config):
    return cls(**config)
 

3.2 配置训练时特有参数

有一些网络层, 如BatchNormalization层和Dropout层,在训练和推理中具有不同的行为,对于此类层,则需要在方法中使用train等参数进行控制。

In [11]:
class MyDropout(layers.Layer):
    def __init__(self, rate, **kwargs):
        super(MyDropout, self).__init__(**kwargs)
        self.rate = rate
    def call(self, inputs, training=None):
        return tf.cond(training, 
                       lambda: tf.nn.dropout(inputs, rate=self.rate),
                      lambda: inputs)
 

4 构建自己的模型

通常,我们使用Layer类来定义内部计算块,并使用Model类来定义外部模型 - 即要训练的对象。

Model类与Layer的区别:

  • 它对外开放内置的训练,评估和预测函数(model.fit(),model.evaluate(),model.predict())。
  • 它通过model.layers属性开放其内部网络层列表。
  • 它对外开放保存和序列化API。
 

4.1 自定义模型

下面通过构建一个变分自编码器(VAE),来介绍如何构建自己的网络, 并使用内置的函数进行训练。

In [12]:
# 采样网络
class Sampling(layers.Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5*z_log_var) * epsilon
# 编码器
class Encoder(layers.Layer):
    def __init__(self, latent_dim=32, 
                intermediate_dim=64, name='encoder', **kwargs):
        super(Encoder, self).__init__(name=name, **kwargs)
        self.dense_proj = layers.Dense(intermediate_dim, activation='relu')
        self.dense_mean = layers.Dense(latent_dim)
        self.dense_log_var = layers.Dense(latent_dim)
        self.sampling = Sampling()
        
    def call(self, inputs):
        h1 = self.dense_proj(inputs)
        # 获取z_mean和z_log_var
        z_mean = self.dense_mean(h1)
        z_log_var = self.dense_log_var(h1)
        # 进行采样
        z = self.sampling((z_mean, z_log_var))
        return z_mean, z_log_var, z
        
# 解码器
class Decoder(layers.Layer):
    def __init__(self, original_dim, 
                 intermediate_dim=64, name='decoder', **kwargs):
        super(Decoder, self).__init__(name=name,