import random # from copy import copy from matplotlib import pyplot as plt from mnist import MNIST import numpy as np def decision(x): return np.argmax(x) def sigmoid(x): return 1 / (1 + np.exp(-x)) def train_error(data, weights_1, weights_2): sample_size = 20 test_data = random.sample(data, sample_size) total_err = 0 for d in test_data: y = np.zeros(10) img, label = np.matrix(d[0]), d[1] y[label] = 1 pot_1 = weights_1.T * img.T pot_2 = weights_2.T * pot_1 err = y - pot_2.T total_err += abs(np.sum(err)) return total_err def validate(weights_1, weights_2, mndata): images, labels = mndata.load_testing() miss = 0 for d in zip(images, labels): img, label = zip(d) img = np.matrix(img) / 255 pot_1 = sigmoid(weights_1.T * img.T) pot_2 = sigmoid(weights_2.T * pot_1) if decision(pot_2) != label: miss += 1 print(miss / 10000) return miss / 10000 def train(mndata): EPSILON = 0.15 images, labels = mndata.load_training() data = list(zip(images, labels)) # orig_data = copy(data) random.shuffle(data) print('Data loaded') weights_1 = np.matrix(np.random.rand(28 * 28, 100) / (28 * 28)) # 784x25 weights_2 = np.matrix(np.random.rand(100, 10) / 25) # 25x10 err_hist = [1] # img, label = data.pop() # img = np.matrix(img) / 255 # y = np.zeros(10) # y[label] = 1 # __import__('ipdb').set_trace() i = 0 while np.mean(err_hist[-20:]) > 0.001 and i < 500000: y = np.zeros(10) img, label = data[i % len(data)] img = np.matrix(img) / 255 # 1 x 784 y[label] = 1 # Forward prop pot_1 = sigmoid(weights_1.T * img.T) # 25 x 1 pot_2 = sigmoid(weights_2.T * pot_1) # 10 x 1 # Error delta_2 = np.multiply(np.multiply(pot_2.T, 1 - pot_2.T), (y - pot_2.T)) delta_1 = np.multiply( np.multiply(pot_1, (1 - pot_1)), (weights_2 * delta_2.T) ) # Backprop delta_w_2 = EPSILON * pot_1 * delta_2 delta_w_1 = EPSILON * img.T * delta_1.T weights_2 += delta_w_2 weights_1 += delta_w_1 # err_hist.append(train_error(orig_data, weights_1, weights_2)) err_hist.append(np.sum(np.abs(delta_2))) if i % 100 == 0: print(f'Iter {i}, error: {err_hist[-1]}') if i % 10000 == 0: EPSILON *= 0.98 if i % 60000 == 0: print('Shuffling data') random.shuffle(data) i += 1 # plt.scatter([i for i in range(len(err_hist))], err_hist) sampled_err = [] for i in range(1, len(err_hist), 100): sampled_err.append(np.mean(err_hist[i:i + 100])) plt.plot(sampled_err) plt.xlabel('Iteration number (* 100)') plt.ylabel('Error') plt.legend() plt.show() return weights_1, weights_2 if __name__ == '__main__': mndata = MNIST('./data') mndata.gz = True weights_1, weights_2 = train(mndata) validate(weights_1, weights_2, mndata)