sigmoidGradient.m

g = sigmoid(z) .* (1 - sigmoid(z));

randInitializeWeights.m

epsilon_init = 0.12; 

W = rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init;  

nnCostFunction.m

%Feedforward and cost function
h = eye(num_labels);
y = h(y,:);
a1 = [ones(m,1) X];
z2 = a1*Theta1';
a2 = sigmoid(z2);
n = size(a2,1);
a2 = [ones(n,1) a2];
a3 = sigmoid(a2*Theta2');
J = sum(sum(-y.*log(a3) - (1-y).*log(1-a3)))/m;

%Regularized cost function
regularized = lambda/(2*m)*(sum(sum(Theta1(:,2:end).^2))+sum(sum(Theta2(:,2:end).^2)));
J = J + regularized;


%Backpropagation
delta3 = a3 - y;
delta2 = delta3*Theta2;
delta2 = delta2(:,2:end);
delta2 = delta2 .* sigmoidGradient(z2);

delta_1 = zeros(size(Theta1));
delta_2 = zeros(size(Theta2));

delta_1 = delta_1 + delta2'*a1;
delta_2 = delta_2 + delta3'*a2;

Theta1_grad = ((1/m)*delta_1) + ((lambda/m)*Theta1);
Theta2_grad = ((1/m)*delta_2) + ((lambda/m)*Theta2);

Theta1_grad(:,1) = Theta1_grad(:,1) - ((lambda/m)*(Theta1(:,1)));
Theta2_grad(:,1) = Theta2_grad(:,1) - ((lambda/m)*(Theta2(:,1)));