import random
# from copy import copy

from matplotlib import pyplot as plt
from mnist import MNIST
import numpy as np


def decision(x):
    return np.argmax(x)


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def train_error(data, weights_1, weights_2):
    sample_size = 20
    test_data = random.sample(data, sample_size)
    total_err = 0
    for d in test_data:
        y = np.zeros(10)
        img, label = np.matrix(d[0]), d[1]
        y[label] = 1

        pot_1 = weights_1.T * img.T
        pot_2 = weights_2.T * pot_1
        err = y - pot_2.T
        total_err += abs(np.sum(err))
    return total_err


def validate(weights_1, weights_2, mndata):
    images, labels = mndata.load_testing()
    miss = 0
    for d in zip(images, labels):
        img, label = zip(d)
        img = np.matrix(img) / 255

        pot_1 = sigmoid(weights_1.T * img.T)
        pot_2 = sigmoid(weights_2.T * pot_1)
        if decision(pot_2) != label:
            miss += 1
    print(miss / 10000)
    return miss / 10000


def train(mndata):
    EPSILON = 0.15
    images, labels = mndata.load_training()
    data = list(zip(images, labels))
    # orig_data = copy(data)
    random.shuffle(data)
    print('Data loaded')
    weights_1 = np.matrix(np.random.rand(28 * 28, 100) / (28 * 28))  # 784x25
    weights_2 = np.matrix(np.random.rand(100, 10) / 25)  # 25x10
    err_hist = [1]

    # img, label = data.pop()
    # img = np.matrix(img) / 255
    # y = np.zeros(10)
    # y[label] = 1
    # __import__('ipdb').set_trace()
    i = 0
    while np.mean(err_hist[-20:]) > 0.001 and i < 500000:
        y = np.zeros(10)
        img, label = data[i % len(data)]
        img = np.matrix(img) / 255  # 1 x 784
        y[label] = 1

        # Forward prop
        pot_1 = sigmoid(weights_1.T * img.T)  # 25 x 1
        pot_2 = sigmoid(weights_2.T * pot_1)  # 10 x 1
        # Error
        delta_2 = np.multiply(np.multiply(pot_2.T, 1 - pot_2.T), (y - pot_2.T))
        delta_1 = np.multiply(
                np.multiply(pot_1, (1 - pot_1)),
                (weights_2 * delta_2.T)
        )
        # Backprop
        delta_w_2 = EPSILON * pot_1 * delta_2
        delta_w_1 = EPSILON * img.T * delta_1.T
        weights_2 += delta_w_2
        weights_1 += delta_w_1
        # err_hist.append(train_error(orig_data, weights_1, weights_2))
        err_hist.append(np.sum(np.abs(delta_2)))
        if i % 100 == 0:
            print(f'Iter {i}, error: {err_hist[-1]}')
        if i % 10000 == 0:
            EPSILON *= 0.98
        if i % 60000 == 0:
            print('Shuffling data')
            random.shuffle(data)
        i += 1
    # plt.scatter([i for i in range(len(err_hist))], err_hist)
    sampled_err = []
    for i in range(1, len(err_hist), 100):
        sampled_err.append(np.mean(err_hist[i:i + 100]))
    plt.plot(sampled_err)
    plt.xlabel('Iteration number (* 100)')
    plt.ylabel('Error')
    plt.legend()
    plt.show()
    return weights_1, weights_2


if __name__ == '__main__':
    mndata = MNIST('./data')
    mndata.gz = True
    weights_1, weights_2 = train(mndata)
    validate(weights_1, weights_2, mndata)