Perceptron/XOR.java

190 lines
5.0 KiB
Java

import java.text.DecimalFormat;
public class XOR{
private static double fdt(double x,Boolean deriv){
if (deriv) return x*(1-x);
return 1/(1+Math.exp(-x));
}
//applica fdt a una matrice
private static double[][] fdt_mat(double[][] x,Boolean deriv){
if (deriv) {
for(int i=0; i<x.length; i++){
for(int j=0; j<x[0].length; j++){
x[i][j] = x[i][j]*(1-x[i][j]);
}
}
}
else {
for(int i=0; i<x.length; i++){
for(int j=0; j<x[0].length; j++){
x[i][j] = 1/(1+Math.exp(-x[i][j]));
}
}
}
return x;
}
//funzione per traslare invece di for{for{}}
private static double[][] translate(double[][] matrix){
double[][] Tmatrix = new double[matrix[0].length][matrix.length];
for(int i=0; i<Tmatrix.length; i++){
for(int j=0; j<Tmatrix[0].length; j++){
Tmatrix[i][j] = matrix[j][i];
}
}
return Tmatrix;
}
private static double[][] dot_product(double[][] firstMatrix, double[][] secondMatrix) {
int r1 = firstMatrix.length;
int c1 = firstMatrix[0].length;
int r2 = secondMatrix.length;
int c2 = secondMatrix[0].length;
double[][] product = new double[r1][c2];
for(int i = 0; i < r1; i++) {
for (int j = 0; j < c2; j++) {
for (int k = 0; k < c1; k++) {
product[i][j] += firstMatrix[i][k] * secondMatrix[k][j];
}
}
}
return product;
}
private static void stampa_matrice(double matrix[][]){
DecimalFormat numberFormat = new DecimalFormat("#.000");
for(int i=0; i<matrix.length; i++){
for(int j=0; j<matrix[0].length; j++){
System.out.print(numberFormat.format(matrix[i][j])+"\t"); //3 decimali
//System.out.print((matrix[i][j])+"\t"); //tutti i decimali
}
//System.out.print(" ");
System.out.println("\n");
}
}
public static void main(String args[]){
int iterazioni = 1000000;
int hidden_layer = 5;
double epsilon = 0.001;
if (args.length > 0) iterazioni=Integer.parseInt(args[0]);
//Matrice degli input, 4x3
double X[][] = {
{0d,0d,1d},
{0d,1d,1d},
{1d,0d,1d},
{1d,1d,1d}
};
//Matrice degli output desiderati \xi, 4x1, una riga per ogni esempio
double Y[][] = {
{0d},
{1d},
{1d},
{0d}
};
double Ytest[][] = {
{1d},
{0d},
{0d},
{1d}
};
//synapsi, primo set, 3x4
double[][] syn0 = new double[X[0].length][hidden_layer];
syn0 = randomize_matrix(syn0);
//synapsi, secondo set, 4x1
double[][] syn1 = new double[hidden_layer][1];
syn1 = randomize_matrix(syn1);
//fase forward iniziale
double[][] l0 = X;
double[][] l1 = dot_product(l0,syn0);
double[][] l2 = dot_product(l1,syn1);
for(int iter=0; iter<iterazioni; iter++){ //main cycle
double old_error = error_func(l2,Y);
double[][] delta_nu = subtract_matrix_ebe(Y , l2);
double delta_W = 0; //batch
for(int j=0;j<hidden_layer; j++){ //aggiornamento delle sinapsi del Perceptron di output
for(int nu=0;nu<X.length; nu++){
double [][] y_nu = new double[1][l1[nu].length];
for(int y=0;y<l1[nu].length; y++){ y_nu[0][y] = l1[nu][y]; }
delta_W += epsilon*delta_nu[nu][0]*fdt(dot_product(y_nu,syn1)[0][0],true)*l1[nu][j];
}
syn1[j][0] += delta_W;
}
double[][] syn0_transposed = new double[syn0[0].length][syn0.length];
for(int i=0;i<syn0_transposed.length;i++){
for(int j=0;j<syn0_transposed[0].length; j++){
syn0_transposed[i][j] = syn0[j][i];
}
}
double delta_wij = 0;
for(int i=0;i<X[0].length;i++){ //loop sugli input
for(int j=0;j<hidden_layer; j++){ //loop sui neuroni dello strato hidden
for(int nu=0;nu<X.length; nu++){
double [][] y_nu = new double[1][l1[nu].length];
for(int y=0;y<l1[nu].length; y++){ y_nu[0][y] = l1[nu][y]; }
double [][] X_row = new double[1][X[nu].length];
double [][] syn0_transposed_row = new double[1][X[nu].length];
for(int y=0;y<X[nu].length; y++){
syn0_transposed_row[0][y] = syn0_transposed[nu][y];
X_row[0][y] = X[nu][y];
}
//delta_wij += epsilon*delta_nu[nu][0]*fdt(dot_product(y_nu,syn1)[0][0],true)*syn1[j][0]*fdt(dot_product(X_row,syn0_transposed[j])[0][0],true)*X[nu][i];
//OUT OF BOUNDS PER QUALCHE MOTIVO
//stampa_matrice(dot_product(X_row,syn0_transposed_row));
}
syn1[j][0] += delta_wij;
}
}
}
}
private static double[][] randomize_matrix(double[][] mat){ //prende matrice e mette numero randomico in celle
for(int i=0; i<mat.length; i++){
for(int j=0; j<mat[0].length; j++){
mat[i][j] = 0.1*Math.random();
}
}
return mat;
}
//ELEMENT BY ELEMENT
private static double[][] subtract_matrix_ebe(double[][] mat1, double[][] mat2){ //sottrazione di matrici termine a termine
double[][] mat = new double[mat1.length][mat1[0].length];
for(int i=0; i<mat.length; i++){
for(int j=0; j<mat[0].length; j++){
mat[i][j] = mat1[i][j] - mat2[i][j];
}
}
return mat;
}
private static double error_func(double[][] output, double[][] xi){ //calcolo di errore di output di rete
double e=0;
for(int i=0; i<output.length; i++){
e += Math.pow((output[i][0] - xi[i][0]),2);
}
return 0.5*e;
}
}