参考大佬的博客https://blog.csdn.net/u013733326/article/details/79639509

代码:

# coding=utf-8
# This is a sample Python script.

# Press ⌃R to execute it or replace it with your code.
# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.


import numpy as np
import matplotlib.pyplot as plt
import h5py
from lr_utils import load_dataset


# Press the green button  in the gutter to run the script.
def load_dataset():
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:])  # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])  # your train set labels

    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:])  # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:])  # your test set labels

    classes = np.array(test_dataset["list_classes"][:])  # the list of classes

    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))

    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes


def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))


def init(dim):
    w = np.zeros(shape=(dim, 1))
    b = 0
    assert (w.shape == (dim, 1))
    assert (isinstance(b, int) or isinstance(b, float))
    return w, b


def propagate(w, b, X, Y):
    m = X.shape[1]
    A = sigmoid(np.dot(w.T, X) + b)
    # print("m1: ", m)
    cost = (-1.0 / m) * np.sum(Y * np.log(A) + (1.0 - Y) * (np.log((1.0 - A))))
    dw = (1.0 / m) * np.dot(X, (A - Y).T)
    db = (1.0 / m) * np.sum(A - Y)
    assert (dw.shape == w.shape)
    assert (db.dtype == float)
    cost = np.squeeze(cost)
    assert (cost.shape == ())

    grads = {
        "dw": dw,
        "db": db
    }
    return grads, cost


def optimize(w, b, X, Y, num_iterations, learning_rate):
    costs = []

    for i in range(num_iterations):
        # print("i: ", i)
        grads, cost = propagate(w, b, X, Y)

        dw = grads["dw"]
        db = grads["db"]

        w = w - learning_rate * dw
        b = b - learning_rate * db
        if i % 100 == 0:
            costs.append(cost)

        # 记录成本
        if i % 100 == 0:
            costs.append(cost)

    params = {
        "w": w,
        "b": b
    }

    grads = {
        "dw": dw,
        "db": db
    }
    return params, grads, costs


def predict(w, b, X):
    m = X.shape[1]
    Y_prediction = np.zeros((1, m))
    w = w.reshape(X.shape[0], 1)
    A = sigmoid(np.dot(w.T, X) + b)
    for i in range(A.shape[1]):
        Y_prediction[0][i] = 1 if A[0][i] > 0.5 else 0
    return Y_prediction

def solve(X_train, Y_train, X_test, Y_test, num_iteration = 2000, learning_rate = 0.5) :
    w, b = init(X_train.shape[0])
    params, grads, costs = optimize(w, b, X_train, Y_train, num_iteration, learning_rate)
    w = params["w"]
    b = params["b"]
    Y_perdiction_test = predict(w, b, X_test)
    Y_perdiction_train = predict(w, b, X_train)
    print("learning_rate = ", learning_rate)
    print("训练集准确性:" + format(100 - np.mean(abs(Y_perdiction_train - Y_train)) * 100), "%")
    print("测试集准确性:" + format(100 - np.mean(abs(Y_perdiction_test - Y_test)) * 100), "%")

    d = {
        "costs":costs,
        "Y_perdiction_test": Y_perdiction_test,
        "Y_perdiction_train": Y_perdiction_train,
        "w": w,
        "b": b,
        "learning_rate": learning_rate,
        "num_iteration": num_iteration
    }
    return d

if __name__ == '__main__':
    train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
    # 压缩图像
    train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
    test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
    # print(str(train_set_x_flatten.shape))
    # 让数据位于0-1之间
    train_set_x_flatten = train_set_x_flatten / 255
    test_set_x_flatten = test_set_x_flatten / 255
    print(str(train_set_x_flatten.shape))
    # w, b, X, Y = np.array([[1], [2]]), 2, np.array([[1, 2], [3, 4]]), np.array([[1, 0]])
    # grands, cost = propagate(w, b, X, Y)
    # print("dw = " + str(grands["dw"]))
    # print("db = " + str(grands["db"]))
    # print("cost = " + str(cost))
    # w, b, X, Y = np.array([[1], [2]]), 2, np.array([[1, 2], [3, 4]]), np.array([[1, 0]])
    # params, grads, costs = optimize(w, b, X, Y, num_iterations=100, learning_rate=0.009, print_cost=True)
    # #params, grands, costs = optimize(w, b, X, Y, num_iterations = 100, learning_rate = 0.09, print_cost = False)
    # print("w = " + str(params["w"]))
    # print("w = " + str(params["b"]))
    # print("dw = " + str(grads["dw"]))
    # print("db = " + str(grads["db"]))
    learning_rates = [0.01, 0.001, 0.0001]
    # learning_rates = [0.1, 0.01, 0.001]
    d = {}
    for i in learning_rates:
        d[str(i)] = solve(train_set_x_flatten, train_set_y, test_set_x_flatten, test_set_y, num_iteration=2000, learning_rate=i)
    for i in learning_rates:
        plt.plot(np.squeeze(d[str(i)]["costs"]), label = str(d[str(i)]["learning_rate"]))
    # for i in learning_rates:
    #     plt.plot(np.squeeze(models[str(i)]["costs"]), label=str(models[str(i)]["learning_rate"]))

    plt.ylabel('cost')
    plt.xlabel('iterations')

    # legend = plt.legend(loc='upper center', shadow=True)
    # frame = legend.get_frame()
    # frame.set_facecolor('0.90')
    plt.show()
    # plt.ylabel('cost')
    # plt.xlabel('iterations (per hundreds')
    # plt.title("Learning_rate" )
    # plt.show()
    # index = 25
    # plt.imshow(train_set_x_orig[index])
    # plt.show()
    # print ("It is a" + classes[np.squeeze(train_set_y[:,index])].decode("utf8"))

# See PyCharm help at https://www.jetbrains.com/help/pycharm/