如何用Python 实现全连接神经网络（Multi-layer Perceptron）

下面是Python实现全连接神经网络的攻略：

什么是全连接神经网络？

全连接神经网络(FCN)是深度学习中的一种基本架构，它是由多个全连接层（Fully Connected Layer）构成的深层神经网络，典型的形式是多层感知机（Multi-Layer Perceptron，简称 MLP），其可以应用于分类、回归等任务。在每个全连接层中，每一个神经元都与上下一层的每个神经元相连，所以也被称为密集连接层。

实现步骤

实现全连接神经网络的步骤如下：

定义网络模型的参数和超参数，包括学习率(lr)、隐藏层大小(hidden_size)、输出层大小(output_size)、权重初始值(weight_init)、偏移初始值(bias_init)等。

    import numpy as np

    class NeuralNet:
        def __init__(self, input_size, hidden_size, output_size, lr=0.01, weight_init='random', bias_init='zeros'):
            self.input_size = input_size
            self.hidden_size = hidden_size
            self.output_size = output_size
            self.lr = lr
            self.weight_init = weight_init
            self.bias_init = bias_init
            self.params = {}
            self.grads = {}
            self.loss_history = []
            self.accuracy_history = []
            self.epoches = 0
            self._init_weights()

        def _init_weights(self):
            if self.weight_init == 'normal':
                self.params['W1'] = np.random.normal(0, 1, size=(self.input_size, self.hidden_size))
                self.params['W2'] = np.random.normal(0, 1, size=(self.hidden_size, self.output_size))
            elif self.weight_init == 'random':
                self.params['W1'] = np.random.rand(self.input_size, self.hidden_size)
                self.params['W2'] = np.random.rand(self.hidden_size, self.output_size)
            else:
                raise ValueError('Invalid weight_init')
            self.params['b1'] = np.zeros(self.hidden_size)
            self.params['b2'] = np.zeros(self.output_size)

前向传播计算，计算每层神经元输出的数值，进行数据的处理。

        def _forward(self, X):
            h1 = np.dot(X, self.params['W1']) + self.params['b1']
            h1 = np.maximum(0, h1)  # ReLU
            y_pred = np.dot(h1, self.params['W2']) + self.params['b2']
            return y_pred, h1

计算损失函数，我们使用交叉熵损失函数（Cross-Entropy loss）。求出每个样本的损失值，最后使用所有样本损失的均值。

        def _cross_entropy_error(self, y_pred, y_true):
            if y_true.ndim == 1:
                y_true = y_true.reshape(1, y_true.size)
                y_pred = y_pred.reshape(1, y_pred.size)
            batch_size = y_pred.shape[0]
            return -np.sum(np.log(y_pred[np.arange(batch_size), y_true])) / batch_size

反向传播计算，计算梯度，并记录历史损失和准确率。

        def _backward(self, X, y_true, y_pred, h1):
            batch_size = y_pred.shape[0]
            dy = (y_pred - y_true) / batch_size
            self.grads['W2'] = np.dot(h1.T, dy)
            self.grads['b2'] = np.sum(dy, axis=0)
            dh1 = np.dot(dy, self.params['W2'].T)
            dh1[h1 <= 0] = 0  # ReLU
            self.grads['W1'] = np.dot(X.T, dh1)
            self.grads['b1'] = np.sum(dh1, axis=0)

更新模型中的权重和偏置值。

        def _update(self):
            for key in self.params.keys():
                self.params[key] -= self.lr * self.grads[key]

测试集准确率，计算模型的准确率。

        def accuracy(self, X, y_true):
            y_pred, _ = self._forward(X)
            if y_pred.ndim != 1:
                y_pred = np.argmax(y_pred, axis=1)
            return np.mean(y_pred == y_true)


        def train(self, X_train, y_train, epoches, X_test=None, y_test=None, verbose=False):
            for epoch in range(epoches):
                y_pred, h1 = self._forward(X_train)
                self.loss = self._cross_entropy_error(y_pred, y_train)
                self._backward(X_train, y_train, y_pred, h1)
                self._update()
                self.loss_history.append(self.loss)
                self.epoches += 1
                if (X_test is not None) and (y_test is not None):
                    acc = self.accuracy(X_test, y_test)
                    self.accuracy_history.append(acc)
                if verbose:
                    if self.epoches % 10 == 0:
                        print(f"epoch: {self.epoches}, loss: {self.loss}, accuracy: {acc}")

示例1：使用全连接神经网络进行MNIST手写数字图像分类

    from urllib import request
    import os
    import gzip
    import pickle

    def load_mnist():
        url_base = 'http://yann.lecun.com/exdb/mnist/'
        file_names = ['train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz',
                      't10k-labels-idx1-ubyte.gz']

        dataset_dir = os.path.dirname(os.path.abspath(__file__))

        def download(file_name):
            file_path = dataset_dir + '/' + file_name
            if os.path.exists(file_path):
                return

            print('Downloading ' + file_name + ' ... ')
            request.urlretrieve(url_base + file_name, file_path)
            print(' Done')

        def load_image(file_name):
            file_path = dataset_dir + '/' + file_name
            print('Converting ' + file_name + ' to NumPy Array ...')
            with gzip.open(file_path, 'rb') as f:
                data = np.frombuffer(f.read(), np.uint8, offset=16)
            data = data.reshape(-1, 784)
            print(' Done')
            return data

        def load_label(file_name):
            file_path = dataset_dir + '/' + file_name
            print('Converting ' + file_name + ' to NumPy Array ...')
            with gzip.open(file_path, 'rb') as f:
                data = np.frombuffer(f.read(), np.uint8, offset=8)
            print(' Done')
            return data

        for file_name in file_names:
            download(file_name)

        x_train = load_image(file_names[0])
        t_train = load_label(file_names[1])
        x_test = load_image(file_names[2])
        t_test = load_label(file_names[3])

        return x_train, t_train, x_test, t_test

    x_train, t_train, x_test, t_test = load_mnist()

    net = NeuralNet(input_size=784, hidden_size=50, output_size=10, lr=0.1, weight_init='normal')
    net.train(x_train, t_train, epoches=500, X_test=x_test, y_test=t_test, verbose=True)

其中，MNIST数据集可以在 http://yann.lecun.com/exdb/mnist/ 上下载。

其中，我们创建了一个50层的隐藏层，使用500轮训练，学习率为0.1，权重初始化使用正态分布。

结果的预测准确率可以通过以下代码查看：

    print(f'Train accuracy: {net.accuracy(x_train, t_train)}')
    print(f'Test accuracy: {net.accuracy(x_test, t_test)}')

结果显示，在本次训练中，训练集和测试集的准确度分别为 0.9987333333333333 和 0.9711。

示例2：使用全连接神经网络进行回归问题预测

    n_sample = 100
    x = np.linspace(0, 1, n_sample)
    f = lambda x: x ** 2 + np.sin(x * 10)
    y = f(x) + np.random.rand(n_sample) * 0.3

    net = NeuralNet(input_size=1, hidden_size=10, output_size=1, lr=0.1, weight_init='random')
    epoches = 1000
    for epoch in range(epoches):
        y_pred, h1 = net._forward(x.reshape(-1, 1))
        net.loss = np.mean((y_pred - y.reshape(-1, 1)) ** 2)
        net._backward(x.reshape(-1, 1), y.reshape(-1, 1), y_pred, h1)
        net._update()
        net.loss_history.append(net.loss)
        if epoch % 100 == 0:
            print(f"epoch: {epoch}, loss: {net.loss}")

    import matplotlib.pyplot as plt

    plt.plot(x, f(x), label='Truth')
    plt.scatter(x, y, label='Data')
    plt.plot(x, y_pred, label='Prediction')
    plt.legend()
    plt.show()

其中，我们生成了一个数据集，y数据由函数x^2+sin(10*x)加上一些均值为0方差为0.3的噪声。我们使用10层的隐藏层，使用1000轮训练，学习率为0.1，权重初始化使用随机生成。

结果的拟合情况可以通过以下代码查看：

    plt.plot(x, f(x), label='Truth')
    plt.scatter(x, y, label='Data')
    plt.plot(x, y_pred, label='Prediction')
    plt.legend()
    plt.show()

结果显示，我们的模型成功预测出了函数的拟合曲线。

本站文章如无特殊说明，均为本站原创，如若转载，请注明出处：如何用Python 实现全连接神经网络（Multi-layer Perceptron） - Python技术站

如何用Python 实现全连接神经网络（Multi-layer Perceptron）

什么是全连接神经网络？

实现步骤

示例1：使用全连接神经网络进行MNIST手写数字图像分类

示例2：使用全连接神经网络进行回归问题预测

相关文章