lrCostFunction.m

J = 1./m*(-y'*log(sigmoid(X*theta))-(1-y')*log(1-sigmoid(X*theta)));
J = J + lambda/(2*m)*(sum(theta.^2)-theta(1).^2);
grad = 1./m*X'*(sigmoid(X*theta)-y);
grad = grad + lambda/m*theta;
grad(1) = grad(1) - lambda/m*theta(1);

oneVsAll.m

initial_theta = zeros(n+1,1);
options = optimset('GradObj', 'on', 'MaxIter', 50);
for c = 1:num_labels
	[theta] = fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)),initial_theta, options);
	all_theta(c,:) = theta';
end

predictOneVsAll.m

[c,p] = max(sigmoid(X*all_theta'),[],2);

predict.m

X = [ones(m,1) X];

a2 = sigmoid(X*Theta1');
a2 = [ones(size(a2,1),1) a2];

a3 = (sigmoid(a2*Theta2'));

[c, p] = max(a3, [], 2);