neural network doesn’t seem to be learning


This Content is from Stack Overflow. Question asked by sta

i’m a beginner in this field and i’ve tried to make a neural network from scratch using numpy after watching a few videos. when i run it the accuracy seems to increase a bit and then consistently decrease. this changes to a very slow but consistent increase once i set a smaller value of alpha (about 0.0001>). have i gone wrong somewhere?

from re import X
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

currentdirectory = os.getcwd()
data = pd.read_csv(f'{currentdirectory}/train.csv')

data = np.array(data)
A, b = data.shape 

data_test = data[0:1000].T

Y_test = data_test[0] 

X_test = data_test[1:b] 
X_test = X_test / 255

data_train = data[1000:A].T 
Y_train = data_train[0] 
X_train = data_train[1:b] 
X_train = X_train / 255

def ReLU(inputs):
    return np.maximum(0,inputs)

def softmax(inputs):
    expvalues = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
    probabilities = expvalues / np.sum(expvalues, axis=1, keepdims=True)
    return probabilities
def one_hot(inputs): 
    onehot_inputs = np.zeros((inputs.max()+1, inputs.size))
    onehot_inputs [inputs,np.arange(inputs.size)] = 1 
    return onehot_inputs

def derivativeRELU(x):
    return x>0

class neuralnetwork:
    def __init__(self, inputs, neurons): 
        self.weights = np.random.randn(neurons, inputs) * np.sqrt(1./(784)) 
        self.biases = np.random.randn(neurons, 1)* np.sqrt(1./10) 

        self.weights2 = np.random.randn(10, 10)* np.sqrt(1./20)
        self.biases2 = np.random.randn(neurons, 1)* np.sqrt(1./(784))

    def forwardprop(self, inputs):
        self.output1 =, inputs) +self.biases
        self.activation1 = ReLU(self.output1)
        self.output2 =, self.activation1) + self.biases2
        self.activation2 = softmax(self.output2)
        return self.activation2

    def backprop (self, X, Y):
        onehotY = one_hot(Y)
        self.outputs2_D = 2*(self.activation2 - onehotY)
        self.weights2_D = 1/A *
        self.biases2_D = 1/A * np.sum(self.outputs2_D,1)

        self.outputs1_D = * derivativeRELU(self.output1) 
        self.weights1_D = 1/A* 
        self.biases1_D = 1/A * np.sum(self.outputs1_D,1)

    def update(self, alpha):
        self.weights -= alpha * self.weights1_D
        self.biases -= alpha * np.reshape(self.biases1_D,(10,1))
        self.weights2 -= alpha * self.weights2_D
        self.biases2 -= alpha * np.reshape(self.biases2_D,(10,1))

def get_predictions(A2):
    return np.argmax(A2, 0)
def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

layer = neuralnetwork(784, 10)
for i in range(0,1000): 
    A2 = layer.forwardprop(X_train)
    layer.backprop(X_train, Y_train)
    if i % 10 == 0:
        print("Iteration: ", i)
        predictions = get_predictions(A2)
        print(get_accuracy(predictions, Y_train))


This question is not yet answered, be the first one who answer using the comment. Later the confirmed answer will be published as the solution.

This Question and Answer are collected from stackoverflow and tested by JTuto community, is licensed under the terms of CC BY-SA 2.5. - CC BY-SA 3.0. - CC BY-SA 4.0.

people found this article helpful. What about you?