neural network doesn’t seem to be learning

Issue

This Content is from Stack Overflow. Question asked by sta

i’m a beginner in this field and i’ve tried to make a neural network from scratch using numpy after watching a few videos. when i run it the accuracy seems to increase a bit and then consistently decrease. this changes to a very slow but consistent increase once i set a smaller value of alpha (about 0.0001>). have i gone wrong somewhere?

from re import X
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os





currentdirectory = os.getcwd()
data = pd.read_csv(f'{currentdirectory}/train.csv')


data = np.array(data)
A, b = data.shape 
np.random.shuffle(data) 





data_test = data[0:1000].T

Y_test = data_test[0] 

X_test = data_test[1:b] 
X_test = X_test / 255



data_train = data[1000:A].T 
Y_train = data_train[0] 
X_train = data_train[1:b] 
X_train = X_train / 255





def ReLU(inputs):
    return np.maximum(0,inputs)

def softmax(inputs):
    expvalues = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
    probabilities = expvalues / np.sum(expvalues, axis=1, keepdims=True)
    return probabilities
   
    
def one_hot(inputs): 
    onehot_inputs = np.zeros((inputs.max()+1, inputs.size))
    onehot_inputs [inputs,np.arange(inputs.size)] = 1 
    return onehot_inputs

def derivativeRELU(x):
    return x>0






class neuralnetwork:
   
    def __init__(self, inputs, neurons): 
        self.weights = np.random.randn(neurons, inputs) * np.sqrt(1./(784)) 
        self.biases = np.random.randn(neurons, 1)* np.sqrt(1./10) 

        self.weights2 = np.random.randn(10, 10)* np.sqrt(1./20)
        self.biases2 = np.random.randn(neurons, 1)* np.sqrt(1./(784))

    def forwardprop(self, inputs):
        self.output1 = np.dot(self.weights, inputs) +self.biases
        self.activation1 = ReLU(self.output1)
        self.output2 = np.dot(self.weights2, self.activation1) + self.biases2
        self.activation2 = softmax(self.output2)
        return self.activation2



    def backprop (self, X, Y):
        onehotY = one_hot(Y)
        self.outputs2_D = 2*(self.activation2 - onehotY)
        self.weights2_D = 1/A * self.outputs2_D.dot(self.activation1.T)
        self.biases2_D = 1/A * np.sum(self.outputs2_D,1)

        self.outputs1_D = self.weights2.T.dot(self.outputs2_D) * derivativeRELU(self.output1) 
        self.weights1_D = 1/A* self.outputs1_D.dot(X.T) 
        self.biases1_D = 1/A * np.sum(self.outputs1_D,1)

    def update(self, alpha):
        self.weights -= alpha * self.weights1_D
        self.biases -= alpha * np.reshape(self.biases1_D,(10,1))
        self.weights2 -= alpha * self.weights2_D
        self.biases2 -= alpha * np.reshape(self.biases2_D,(10,1))
    

def get_predictions(A2):
    return np.argmax(A2, 0)
        
def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size


layer = neuralnetwork(784, 10)
for i in range(0,1000): 
    A2 = layer.forwardprop(X_train)
    layer.backprop(X_train, Y_train)
    layer.update(0.003)
    if i % 10 == 0:
        print("Iteration: ", i)
        predictions = get_predictions(A2)
        print(predictions[10])
        print(get_accuracy(predictions, Y_train))



Solution

This question is not yet answered, be the first one who answer using the comment. Later the confirmed answer will be published as the solution.

This Question and Answer are collected from stackoverflow and tested by JTuto community, is licensed under the terms of CC BY-SA 2.5. - CC BY-SA 3.0. - CC BY-SA 4.0.

people found this article helpful. What about you?