以下是使用NumPy从头开始实现神经网络的完整攻略以及两个示例:
一、准备工作
-
安装NumPy库。
-
准备数据集。神经网络需要有数据进行训练和测试,因此需要准备数据集。这里以鸢尾花数据集为例。
-
导入NumPy和数据集。
import numpy as np
from sklearn.datasets import load_iris
iris = load_iris()
二、数据预处理
- 分离训练集和测试集。将数据集分为训练集和测试集,通常测试集占数据集的20-30%。
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size = 0.3)
- 归一化。由于特征值的取值范围不同,需要将特征值归一化,确保在同一范围内和同一数据格式内。
X_train = X_train / np.max(X_train)
X_test = X_test / np.max(X_test)
- One-hot编码。将标签值变成一个向量,向量的长度等于标签可选范围的个数,每个值对应一个可能的取值,与实际取值完全对应的位置上放置1,其余位置则为0。
def one_hot(y):
y_onehot = np.zeros((y.shape[0], np.unique(y).shape[0]))
for i in range(y.shape[0]):
y_onehot[i, y[i]] = 1
return y_onehot
y_train = one_hot(y_train)
y_test = one_hot(y_test)
三、构建神经网络
- 初始化权重。权重即神经网络中每个神经元与其他神经元相互接触的连接权重,需要初始化为随机值。
def init_params(layer_dimensions):
params = {}
L = len(layer_dimensions)
for l in range(1, L):
params['W' + str(l)] = np.random.randn(layer_dimensions[l], layer_dimensions[l-1]) * 0.01
params['b' + str(l)] = np.zeros((layer_dimensions[l], 1))
return params
params = init_params([4, 10, 3])
- 前向传播。由于神经网络的本质就是通过前向传播来预测输出结果,因此需要编写前向传播算法。
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def forward_propagation(X, params):
L = len(params) // 2
A = X
for l in range(1, L):
A_prev = A
Z = np.dot(params['W' + str(l)], A_prev) + params['b' + str(l)]
A = sigmoid(Z)
ZL = np.dot(params['W' + str(L)], A) + params['b' + str(L)]
AL = np.exp(ZL) / np.sum(np.exp(ZL), axis = 0)
return AL
AL = forward_propagation(X_train, params)
- 计算损失函数。定义损失函数是为了衡量预测结果和实际结果的差距。
def compute_cost(AL, Y):
m = Y.shape[1]
cost = (-1/m) * np.sum(np.dot(Y, np.log(AL).T) + np.dot(1-Y, np.log(1-AL).T))
cost = np.squeeze(cost)
return cost
cost = compute_cost(AL, y_train)
- 反向传播更新权重。定义反向传播函数进行权重的更新。
def sigmoid_backward(dA, Z):
A = sigmoid(Z)
return dA * A * (1 - A)
def backward_propagation(AL, Y, cache):
grads = {}
L = len(cache) // 3
m = Y.shape[1]
Y = Y.reshape(AL.shape)
dZL = AL - Y
dAL_prev = dZL
for l in reversed(range(1, L+1)):
A_prev = cache['A' + str(l-1)]
dZ = np.multiply(sigmoid_backward(dAL_prev, cache['Z' + str(l-1)]), dAL_prev)
grads['dW' + str(l)] = np.dot(dZ, A_prev.T) / m
grads['db' + str(l)] = np.sum(dZ, axis = 1, keepdims=True) / m
dAL_prev = np.dot(cache['W' + str(l)].T, dZ)
return grads
grads = backward_propagation(AL, y_train, cache)
- 使用梯度下降法更新参数。
def update_params(params, grads, learning_rate):
L = len(params) // 2
for l in range(1, L+1):
params['W' + str(l)] = params['W' + str(l)] - learning_rate * grads['dW' + str(l)]
params['b' + str(l)] = params['b' + str(l)] - learning_rate * grads['db' + str(l)]
return params
params = update_params(params, grads, learning_rate = 0.01)
四、模型训练和预测
- 定义模型训练函数,使用整个数据集进行多次迭代来训练神经网络模型。
def model(X_train, Y_train, layer_dimensions, learning_rate = 0.01, num_iterations = 10000, print_cost = False):
params = init_params(layer_dimensions)
for i in range(num_iterations):
AL = forward_propagation(X_train, params)
cost = compute_cost(AL, Y_train)
grads = backward_propagation(AL, Y_train, cache)
params = update_params(params, grads, learning_rate)
if print_cost and i % 1000 == 0:
print('Cost after iteration %i: %f' %(i, cost))
return params
params = model(X_train.T, y_train.T, [4, 10, 3])
- 定义模型预测函数。
def predict(X, params):
AL = forward_propagation(X, params)
predictions = np.argmax(AL, axis = 0)
return predictions
y_pred = predict(X_test.T, params)
五、代码示例
下面给出两个代码示例,分别使用神经网络解决手写数字识别和狗与猫分类问题。
- 手写数字识别
准备数据集。
from sklearn.datasets import load_digits
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size = 0.3)
X_train = X_train / np.max(X_train)
X_test = X_test / np.max(X_test)
y_train = one_hot(y_train)
y_test = one_hot(y_test)
定义模型。以多层神经网络为例。
params = model(X_train.T, y_train.T, [64, 128, 64, 10], learning_rate = 0.1, num_iterations = 3000, print_cost = True)
预测结果。
y_pred = predict(X_test.T, params)
- 狗与猫分类问题
准备数据集。
import os
path = 'train/'
def load_data(path):
X = []
y = []
for file in os.listdir(path):
if 'dog' in file:
y.append(1)
elif 'cat' in file:
y.append(0)
img = cv2.imread(os.path.join(path,file))
img = cv2.resize(img, (64, 64), interpolation = cv2.INTER_AREA)
X.append(img)
return np.array(X), np.array(y)
X_train, y_train = load_data(path)
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size = 0.3)
X_train = X_train / 255
X_test = X_test / 255
y_train = y_train.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))
y_train = one_hot(y_train)
y_test = one_hot(y_test)
定义模型。以卷积神经网络为例。
params = model(X_train.T, y_train.T, [64, 128, 256, 512, 2], learning_rate = 0.001, num_iterations = 10, print_cost = True)
预测结果。
y_pred = predict(X_test.T, params)
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:使用NumPy从头开始实现神经网络 - Python技术站