Perceptron/XOR.java

import java.text.DecimalFormat;

public class XOR{

	private static double fdt(double x,Boolean deriv){
		if (deriv)	return x*(1-x);
		return 1/(1+Math.exp(-x));
	}

	//applica fdt a una matrice
	private static double[][] fdt_mat(double[][] x,Boolean deriv){
		if (deriv) {
			for(int i=0; i<x.length; i++){
				for(int j=0; j<x[0].length; j++){
					x[i][j] = x[i][j]*(1-x[i][j]);
				}
			}
		}
		else {
			for(int i=0; i<x.length; i++){
				for(int j=0; j<x[0].length; j++){
					x[i][j] = 1/(1+Math.exp(-x[i][j]));
				}
			}
		}
		return x;
	}

	//funzione per traslare invece di for{for{}}
	private static double[][] translate(double[][] matrix){
		double[][] Tmatrix = new double[matrix[0].length][matrix.length];
		for(int i=0; i<Tmatrix.length; i++){
			for(int j=0; j<Tmatrix[0].length; j++){
				Tmatrix[i][j] = matrix[j][i];
			}
		}
		return Tmatrix;
	}

	private static double[][] dot_product(double[][] firstMatrix, double[][] secondMatrix) {
		int r1 = firstMatrix.length;
		int c1 = firstMatrix[0].length;
		int r2 = secondMatrix.length;
		int c2 = secondMatrix[0].length;
		double[][] product = new double[r1][c2];
		for(int i = 0; i < r1; i++) {
		    for (int j = 0; j < c2; j++) {
			for (int k = 0; k < c1; k++) {
			    product[i][j] += firstMatrix[i][k] * secondMatrix[k][j];
			}
		    }
		}

		return product;
	}

	private static void stampa_matrice(double matrix[][]){
		DecimalFormat numberFormat = new DecimalFormat("#.000");
		for(int i=0; i<matrix.length; i++){
			for(int j=0; j<matrix[0].length; j++){
				System.out.print(numberFormat.format(matrix[i][j])+"\t"); //3 decimali
				//System.out.print((matrix[i][j])+"\t"); //tutti i decimali
			}
			//System.out.print(" ");
			System.out.println("\n");
		}
	}

	public static void main(String args[]){
		int iterazioni = 1000000;
		int hidden_layer = 5;
		double epsilon = 0.001;
		if (args.length > 0) iterazioni=Integer.parseInt(args[0]);
		//Matrice degli input, 4x3
		double X[][] = {
			{0d,0d,1d},
			{0d,1d,1d},
			{1d,0d,1d},
			{1d,1d,1d}
		};

		//Matrice degli output desiderati \xi, 4x1, una riga per ogni esempio
		double Y[][] = {
			{0d},
			{1d},
			{1d},
			{0d}
			};
		double Ytest[][] = {
			{1d},
			{0d},
			{0d},
			{1d}
			};

		//synapsi, primo set, 3x4
		double[][] syn0 = new double[X[0].length][hidden_layer];
		syn0 = randomize_matrix(syn0);

		//synapsi, secondo set, 4x1
		double[][] syn1 = new double[hidden_layer][1];
		syn1 = randomize_matrix(syn1);

		//fase forward iniziale
		double[][] l0 = X;
		double[][] l1 = dot_product(l0,syn0);
		double[][] l2 = dot_product(l1,syn1);

		for(int iter=0; iter<iterazioni; iter++){ //main cycle
			double old_error = error_func(l2,Y);

			double[][] delta_nu = subtract_matrix_ebe(Y , l2);


			double delta_W = 0; //batch
			for(int j=0;j<hidden_layer; j++){ //aggiornamento delle sinapsi del Perceptron di output
				for(int nu=0;nu<X.length; nu++){
					double [][] y_nu = new double[1][l1[nu].length];
					for(int y=0;y<l1[nu].length; y++){ y_nu[0][y] = l1[nu][y]; }
					delta_W += epsilon*delta_nu[nu][0]*fdt(dot_product(y_nu,syn1)[0][0],true)*l1[nu][j];
				}
				syn1[j][0] += delta_W;
			}

			double[][] syn0_transposed = new double[syn0[0].length][syn0.length];
			for(int i=0;i<syn0_transposed.length;i++){
				for(int j=0;j<syn0_transposed[0].length; j++){
					syn0_transposed[i][j] = syn0[j][i];
				}

			}


			double delta_wij = 0;
			for(int i=0;i<X[0].length;i++){ //loop sugli input
				for(int j=0;j<hidden_layer; j++){ //loop sui neuroni dello strato hidden
					for(int nu=0;nu<X.length; nu++){
						double [][] y_nu = new double[1][l1[nu].length];
						for(int y=0;y<l1[nu].length; y++){ y_nu[0][y] = l1[nu][y]; }
						double [][] X_row = new double[1][X[nu].length];
						double [][] syn0_transposed_row = new double[1][X[nu].length];
						for(int y=0;y<X[nu].length; y++){
							syn0_transposed_row[0][y] = syn0_transposed[nu][y];
							X_row[0][y] = X[nu][y];

						}
						//delta_wij += epsilon*delta_nu[nu][0]*fdt(dot_product(y_nu,syn1)[0][0],true)*syn1[j][0]*fdt(dot_product(X_row,syn0_transposed[j])[0][0],true)*X[nu][i];
						//OUT OF BOUNDS PER QUALCHE MOTIVO
						//stampa_matrice(dot_product(X_row,syn0_transposed_row));
					}
					syn1[j][0] += delta_wij;
				}

			}

		}


	}

	private static double[][] randomize_matrix(double[][] mat){ //prende matrice e mette numero randomico in celle
		for(int i=0; i<mat.length; i++){
			for(int j=0; j<mat[0].length; j++){
				mat[i][j] = 0.1*Math.random();
			}
		}
		return mat;
	}
					//ELEMENT BY ELEMENT
	private static double[][] subtract_matrix_ebe(double[][] mat1, double[][] mat2){ //sottrazione di matrici termine a termine
		double[][] mat = new double[mat1.length][mat1[0].length];
		for(int i=0; i<mat.length; i++){
			for(int j=0; j<mat[0].length; j++){
				mat[i][j] = mat1[i][j] - mat2[i][j];
			}
		}
		return mat;
	}

	private static double error_func(double[][] output, double[][] xi){ //calcolo di errore di output di rete
		double e=0;
		for(int i=0; i<output.length; i++){
			e += Math.pow((output[i][0] - xi[i][0]),2);
		}
		return 0.5*e;
	}
}