function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters)
%GRADIENTDESCENT Performs gradient descent to learn theta
% theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by
% taking num_iters gradient steps with learning rate alpha
% Initialize some useful values
m = length(y); % number of training examples
J_history = zeros(num_iters, 1);
for iter = 1:num_iters
% d = sum(X' * (X * theta - y)) / m;
s = zeros(size(theta));
for i = 1:m
s += (theta' * X(i,:)' - y(i)) * X(i,:)';
end
theta = theta - alpha * (s / m);
% ============================================================
% Save the cost J in every iteration
J_history(iter) = computeCost(X, y, theta);
disp(J_history(iter))
end
end