siamo a buon punto nella back-prop ma mannaggia a quanto sei tedesco java

2022-01-26 20:11:01 +01:00
parent 2d1d63345b
commit 907f19b076
1 changed files with 189 additions and 0 deletions
--- a/XOR.java
+++ b/XOR.java
@@ -0,0 +1,189 @@
 import java.text.DecimalFormat;
 public class XOR{
 	private static double fdt(double x,Boolean deriv){
 		if (deriv)	return x*(1-x);
 		return 1/(1+Math.exp(-x));
 	}
 	//applica fdt a una matrice
 	private static double[][] fdt_mat(double[][] x,Boolean deriv){
 		if (deriv) {
 			for(int i=0; i<x.length; i++){
 				for(int j=0; j<x[0].length; j++){
 					x[i][j] = x[i][j]*(1-x[i][j]);
 				}
 			}
 		}
 		else {
 			for(int i=0; i<x.length; i++){
 				for(int j=0; j<x[0].length; j++){
 					x[i][j] = 1/(1+Math.exp(-x[i][j]));
 				}
 			}
 		}
 		return x;
 	}
 	//funzione per traslare invece di for{for{}}
 	private static double[][] translate(double[][] matrix){
 		double[][] Tmatrix = new double[matrix[0].length][matrix.length];
 		for(int i=0; i<Tmatrix.length; i++){
 			for(int j=0; j<Tmatrix[0].length; j++){
 				Tmatrix[i][j] = matrix[j][i];
 			}
 		}
 		return Tmatrix;
 	}
 	private static double[][] dot_product(double[][] firstMatrix, double[][] secondMatrix) {
 		int r1 = firstMatrix.length;
 		int c1 = firstMatrix[0].length;
 		int r2 = secondMatrix.length;
 		int c2 = secondMatrix[0].length;
 		double[][] product = new double[r1][c2];
 		for(int i = 0; i < r1; i++) {
 		    for (int j = 0; j < c2; j++) {
 			for (int k = 0; k < c1; k++) {
 			    product[i][j] += firstMatrix[i][k] * secondMatrix[k][j];
 			}
 		    }
 		}
 		return product;
 	}
 	private static void stampa_matrice(double matrix[][]){
 		DecimalFormat numberFormat = new DecimalFormat("#.000");
 		for(int i=0; i<matrix.length; i++){
 			for(int j=0; j<matrix[0].length; j++){
 				System.out.print(numberFormat.format(matrix[i][j])+"\t"); //3 decimali
 				//System.out.print((matrix[i][j])+"\t"); //tutti i decimali
 			}
 			//System.out.print(" ");
 			System.out.println("\n");
 		}
 	}
 	public static void main(String args[]){
 		int iterazioni = 1000000;
 		int hidden_layer = 5;
 		double epsilon = 0.001;
 		if (args.length > 0) iterazioni=Integer.parseInt(args[0]);
 		//Matrice degli input, 4x3
 		double X[][] = {
 			{0d,0d,1d},
 			{0d,1d,1d},
 			{1d,0d,1d},
 			{1d,1d,1d}
 		};
 		//Matrice degli output desiderati \xi, 4x1, una riga per ogni esempio
 		double Y[][] = { 
 			{0d},
 			{1d},
 			{1d},
 			{0d}
 			};
 		double Ytest[][] = { 
 			{1d},
 			{0d},
 			{0d},
 			{1d}
 			};
 		//synapsi, primo set, 3x4
 		double[][] syn0 = new double[X[0].length][hidden_layer];
 		syn0 = randomize_matrix(syn0);
 		//synapsi, secondo set, 4x1
 		double[][] syn1 = new double[hidden_layer][1];
 		syn1 = randomize_matrix(syn1);
 		//fase forward iniziale
 		double[][] l0 = X;
 		double[][] l1 = dot_product(l0,syn0);
 		double[][] l2 = dot_product(l1,syn1);
 		for(int iter=0; iter<iterazioni; iter++){ //main cycle
 			double old_error = error_func(l2,Y);
 			double[][] delta_nu = subtract_matrix_ebe(Y , l2);
 			double delta_W = 0; //batch
 			for(int j=0;j<hidden_layer; j++){ //aggiornamento delle sinapsi del Perceptron di output
 				for(int nu=0;nu<X.length; nu++){
 					double [][] y_nu = new double[1][l1[nu].length];
 					for(int y=0;y<l1[nu].length; y++){ y_nu[0][y] = l1[nu][y]; }
 					delta_W += epsilon*delta_nu[nu][0]*fdt(dot_product(y_nu,syn1)[0][0],true)*l1[nu][j];
 				}
 				syn1[j][0] += delta_W;
 			}
 			double[][] syn0_transposed = new double[syn0[0].length][syn0.length];
 			for(int i=0;i<syn0_transposed.length;i++){ 
 				for(int j=0;j<syn0_transposed[0].length; j++){ 
 					syn0_transposed[i][j] = syn0[j][i];
 				}
 			}
 			double delta_wij = 0;
 			for(int i=0;i<X[0].length;i++){ //loop sugli input
 				for(int j=0;j<hidden_layer; j++){ //loop sui neuroni dello strato hidden
 					for(int nu=0;nu<X.length; nu++){
 						double [][] y_nu = new double[1][l1[nu].length];
 						for(int y=0;y<l1[nu].length; y++){ y_nu[0][y] = l1[nu][y]; }
 						double [][] X_row = new double[1][X[nu].length];
 						double [][] syn0_transposed_row = new double[1][X[nu].length];
 						for(int y=0;y<X[nu].length; y++){
 							syn0_transposed_row[0][y] = syn0_transposed[nu][y];
 							X_row[0][y] = X[nu][y];
 						}
 						//delta_wij += epsilon*delta_nu[nu][0]*fdt(dot_product(y_nu,syn1)[0][0],true)*syn1[j][0]*fdt(dot_product(X_row,syn0_transposed[j])[0][0],true)*X[nu][i];
 						//OUT OF BOUNDS PER QUALCHE MOTIVO
 						//stampa_matrice(dot_product(X_row,syn0_transposed_row));
 					}
 					syn1[j][0] += delta_wij;
 				}
 			}
 		}
 	}
 	private static double[][] randomize_matrix(double[][] mat){ //prende matrice e mette numero randomico in celle
 		for(int i=0; i<mat.length; i++){
 			for(int j=0; j<mat[0].length; j++){
 				mat[i][j] = 0.1*Math.random();
 			}
 		}
 		return mat;
 	}
 					//ELEMENT BY ELEMENT
 	private static double[][] subtract_matrix_ebe(double[][] mat1, double[][] mat2){ //sottrazione di matrici termine a termine
 		double[][] mat = new double[mat1.length][mat1[0].length];
 		for(int i=0; i<mat.length; i++){
 			for(int j=0; j<mat[0].length; j++){
 				mat[i][j] = mat1[i][j] - mat2[i][j];
 			}
 		}
 		return mat;
 	}
 	private static double error_func(double[][] output, double[][] xi){ //calcolo di errore di output di rete
 		double e=0;
 		for(int i=0; i<output.length; i++){
 			e += Math.pow((output[i][0] - xi[i][0]),2);
 		}
 		return 0.5*e;
 	}
 }