import numpy as np
from random import random
class MLP:
#Contrucción de la arquitectura principal de la red
def __init__(self, num_inputs=2, hidden_layers=[3,2], num_outputs=1):
self.num_inputs=num_inputs
self.hidden_layers=hidden_layers
self.num_outputs=num_outputs
layers=[num_inputs]+hidden_layers+[num_outputs]
self.weights=weights=[np.random.rand(layers[i],layers[i+1]) for i in range(len(layers)-1)]
self.activations=[np.zeros(layers[i]) for i in range(len(layers))]
self.derivatives=[np.zeros((layers[i],layers[i+1])) for i in range(len(layers)-1)]
self.umbral=[np.random.rand(layers[i]) for i in range(1,len(layers))]
self.delta=[np.zeros(layers[i]) for i in range(1,len(layers))]
#Aplicación del forward propagation
def forward_propagation(self, inputs):
activations=inputs
self.activations[0]=inputs
for i,w in enumerate(self.weights):
net_inputs=np.dot(activations,w)+self.umbral[i]
activations=self._sigmoid(net_inputs)
self.activations[i+1]=activations
return activations
#Aplicación del back propagation
def back_propagate(self, error, verbose=False):
for i in reversed(range(len(self.derivatives))):
activations=self.activations[i+1]
delta=error*self._sigmoid_derivative(activations)
self.delta[i]=delta
delta_reshaped=delta.reshape(delta.shape[0],-1).T
current_activations=self.activations[i]
current_activations_reshaped=current_activations.reshape(current_activations.shape[0],-1)
self.derivatives[i]=np.dot(current_activations_reshaped, delta_reshaped)
error=np.dot(delta, self.weights[i].T)
if verbose:
print("The derivatives for the weights W{}: {}, biases: {}".format(i,self.derivatives[i],self.delta[i]))
return error
#Actualización de pesos y umbrales mediante el empleo de descenso por gradiente
def gradient_descent(self, learning_rate):
for i in range(len(self.weights)):
self.weights[i]+=self.derivatives[i]*learning_rate
self.umbral[i]+=self.delta[i]*learning_rate
#Entrenamiento de la red neuronal
def train(self, inputs, targets, epochs,learning_rate):
for i in range(epochs):
sum_error=0
for (input, target) in zip(inputs, targets):
output=self.forward_propagation(input)
error=target-output
self.back_propagate(error)
self.gradient_descent(learning_rate)
sum_error+=self._mse(target, output)
print("The error is: {}, epoch: {}".format(sum_error/len(inputs), i+1))
#Función de error cuadrático medio
def _mse(self, target, output):
return np.average((target-output)**2)
#Función sigmoide
def _sigmoid(self, x):
return 1/(1+np.exp(-x))
#Función sigmoide prima
def _sigmoid_derivative(self, x):
return x*(1-x)
if __name__=="__main__":
inputs= np.array([[random()/2 for _ in range(2)] for _ in range(500)])
targets=np.array([[i[0]+i[1]] for i in inputs])
mlp=MLP(2,[3,2,3], 1)
mlp.train(inputs, targets, 500, 2.5)
input=np.array([.1, 0.3])
target=np.array([input[0]+input[1]])
output=mlp.forward_propagation(input)
print("The prediction is: {}, the real value is: {}".format(output[0],target[0]))
Resultado para la predicción de una entrada, donde la red tiene que predecir el valor resultante de la suma de dos números (0<R<0.5)
(https://i.imgur.com/Ky4bCWK.jpg)
Para ver un poco mas explicado acerca del algoritmo, comparto una lista de youtube donde explican muy claro el desarrollo matemático del perceptrón multicapa:
https://youtu.be/jaEIv_E29sk