1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111 | import random
# from copy import copy
from matplotlib import pyplot as plt
from mnist import MNIST
import numpy as np
def decision(x):
return np.argmax(x)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def train_error(data, weights_1, weights_2):
sample_size = 20
test_data = random.sample(data, sample_size)
total_err = 0
for d in test_data:
y = np.zeros(10)
img, label = np.matrix(d[0]), d[1]
y[label] = 1
pot_1 = weights_1.T * img.T
pot_2 = weights_2.T * pot_1
err = y - pot_2.T
total_err += abs(np.sum(err))
return total_err
def validate(weights_1, weights_2, mndata):
images, labels = mndata.load_testing()
miss = 0
for d in zip(images, labels):
img, label = zip(d)
img = np.matrix(img) / 255
pot_1 = sigmoid(weights_1.T * img.T)
pot_2 = sigmoid(weights_2.T * pot_1)
if decision(pot_2) != label:
miss += 1
print(miss / 10000)
return miss / 10000
def train(mndata):
EPSILON = 0.15
images, labels = mndata.load_training()
data = list(zip(images, labels))
# orig_data = copy(data)
random.shuffle(data)
print('Data loaded')
weights_1 = np.matrix(np.random.rand(28 * 28, 100) / (28 * 28)) # 784x25
weights_2 = np.matrix(np.random.rand(100, 10) / 25) # 25x10
err_hist = [1]
# img, label = data.pop()
# img = np.matrix(img) / 255
# y = np.zeros(10)
# y[label] = 1
# __import__('ipdb').set_trace()
i = 0
while np.mean(err_hist[-20:]) > 0.001 and i < 500000:
y = np.zeros(10)
img, label = data[i % len(data)]
img = np.matrix(img) / 255 # 1 x 784
y[label] = 1
# Forward prop
pot_1 = sigmoid(weights_1.T * img.T) # 25 x 1
pot_2 = sigmoid(weights_2.T * pot_1) # 10 x 1
# Error
delta_2 = np.multiply(np.multiply(pot_2.T, 1 - pot_2.T), (y - pot_2.T))
delta_1 = np.multiply(
np.multiply(pot_1, (1 - pot_1)),
(weights_2 * delta_2.T)
)
# Backprop
delta_w_2 = EPSILON * pot_1 * delta_2
delta_w_1 = EPSILON * img.T * delta_1.T
weights_2 += delta_w_2
weights_1 += delta_w_1
# err_hist.append(train_error(orig_data, weights_1, weights_2))
err_hist.append(np.sum(np.abs(delta_2)))
if i % 100 == 0:
print(f'Iter {i}, error: {err_hist[-1]}')
if i % 10000 == 0:
EPSILON *= 0.98
if i % 60000 == 0:
print('Shuffling data')
random.shuffle(data)
i += 1
# plt.scatter([i for i in range(len(err_hist))], err_hist)
sampled_err = []
for i in range(1, len(err_hist), 100):
sampled_err.append(np.mean(err_hist[i:i + 100]))
plt.plot(sampled_err)
plt.xlabel('Iteration number (* 100)')
plt.ylabel('Error')
plt.legend()
plt.show()
return weights_1, weights_2
if __name__ == '__main__':
mndata = MNIST('./data')
mndata.gz = True
weights_1, weights_2 = train(mndata)
validate(weights_1, weights_2, mndata)
|