190 lines
5.0 KiB
Java
190 lines
5.0 KiB
Java
import java.text.DecimalFormat;
|
|
|
|
public class XOR{
|
|
|
|
private static double fdt(double x,Boolean deriv){
|
|
if (deriv) return x*(1-x);
|
|
return 1/(1+Math.exp(-x));
|
|
}
|
|
|
|
//applica fdt a una matrice
|
|
private static double[][] fdt_mat(double[][] x,Boolean deriv){
|
|
if (deriv) {
|
|
for(int i=0; i<x.length; i++){
|
|
for(int j=0; j<x[0].length; j++){
|
|
x[i][j] = x[i][j]*(1-x[i][j]);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
for(int i=0; i<x.length; i++){
|
|
for(int j=0; j<x[0].length; j++){
|
|
x[i][j] = 1/(1+Math.exp(-x[i][j]));
|
|
}
|
|
}
|
|
}
|
|
return x;
|
|
}
|
|
|
|
//funzione per traslare invece di for{for{}}
|
|
private static double[][] translate(double[][] matrix){
|
|
double[][] Tmatrix = new double[matrix[0].length][matrix.length];
|
|
for(int i=0; i<Tmatrix.length; i++){
|
|
for(int j=0; j<Tmatrix[0].length; j++){
|
|
Tmatrix[i][j] = matrix[j][i];
|
|
}
|
|
}
|
|
return Tmatrix;
|
|
}
|
|
|
|
private static double[][] dot_product(double[][] firstMatrix, double[][] secondMatrix) {
|
|
int r1 = firstMatrix.length;
|
|
int c1 = firstMatrix[0].length;
|
|
int r2 = secondMatrix.length;
|
|
int c2 = secondMatrix[0].length;
|
|
double[][] product = new double[r1][c2];
|
|
for(int i = 0; i < r1; i++) {
|
|
for (int j = 0; j < c2; j++) {
|
|
for (int k = 0; k < c1; k++) {
|
|
product[i][j] += firstMatrix[i][k] * secondMatrix[k][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
return product;
|
|
}
|
|
|
|
private static void stampa_matrice(double matrix[][]){
|
|
DecimalFormat numberFormat = new DecimalFormat("#.000");
|
|
for(int i=0; i<matrix.length; i++){
|
|
for(int j=0; j<matrix[0].length; j++){
|
|
System.out.print(numberFormat.format(matrix[i][j])+"\t"); //3 decimali
|
|
//System.out.print((matrix[i][j])+"\t"); //tutti i decimali
|
|
}
|
|
//System.out.print(" ");
|
|
System.out.println("\n");
|
|
}
|
|
}
|
|
|
|
public static void main(String args[]){
|
|
int iterazioni = 1000000;
|
|
int hidden_layer = 5;
|
|
double epsilon = 0.001;
|
|
if (args.length > 0) iterazioni=Integer.parseInt(args[0]);
|
|
//Matrice degli input, 4x3
|
|
double X[][] = {
|
|
{0d,0d,1d},
|
|
{0d,1d,1d},
|
|
{1d,0d,1d},
|
|
{1d,1d,1d}
|
|
};
|
|
|
|
//Matrice degli output desiderati \xi, 4x1, una riga per ogni esempio
|
|
double Y[][] = {
|
|
{0d},
|
|
{1d},
|
|
{1d},
|
|
{0d}
|
|
};
|
|
double Ytest[][] = {
|
|
{1d},
|
|
{0d},
|
|
{0d},
|
|
{1d}
|
|
};
|
|
|
|
//synapsi, primo set, 3x4
|
|
double[][] syn0 = new double[X[0].length][hidden_layer];
|
|
syn0 = randomize_matrix(syn0);
|
|
|
|
//synapsi, secondo set, 4x1
|
|
double[][] syn1 = new double[hidden_layer][1];
|
|
syn1 = randomize_matrix(syn1);
|
|
|
|
//fase forward iniziale
|
|
double[][] l0 = X;
|
|
double[][] l1 = dot_product(l0,syn0);
|
|
double[][] l2 = dot_product(l1,syn1);
|
|
|
|
for(int iter=0; iter<iterazioni; iter++){ //main cycle
|
|
double old_error = error_func(l2,Y);
|
|
|
|
double[][] delta_nu = subtract_matrix_ebe(Y , l2);
|
|
|
|
|
|
double delta_W = 0; //batch
|
|
for(int j=0;j<hidden_layer; j++){ //aggiornamento delle sinapsi del Perceptron di output
|
|
for(int nu=0;nu<X.length; nu++){
|
|
double [][] y_nu = new double[1][l1[nu].length];
|
|
for(int y=0;y<l1[nu].length; y++){ y_nu[0][y] = l1[nu][y]; }
|
|
delta_W += epsilon*delta_nu[nu][0]*fdt(dot_product(y_nu,syn1)[0][0],true)*l1[nu][j];
|
|
}
|
|
syn1[j][0] += delta_W;
|
|
}
|
|
|
|
double[][] syn0_transposed = new double[syn0[0].length][syn0.length];
|
|
for(int i=0;i<syn0_transposed.length;i++){
|
|
for(int j=0;j<syn0_transposed[0].length; j++){
|
|
syn0_transposed[i][j] = syn0[j][i];
|
|
}
|
|
|
|
}
|
|
|
|
|
|
double delta_wij = 0;
|
|
for(int i=0;i<X[0].length;i++){ //loop sugli input
|
|
for(int j=0;j<hidden_layer; j++){ //loop sui neuroni dello strato hidden
|
|
for(int nu=0;nu<X.length; nu++){
|
|
double [][] y_nu = new double[1][l1[nu].length];
|
|
for(int y=0;y<l1[nu].length; y++){ y_nu[0][y] = l1[nu][y]; }
|
|
double [][] X_row = new double[1][X[nu].length];
|
|
double [][] syn0_transposed_row = new double[1][X[nu].length];
|
|
for(int y=0;y<X[nu].length; y++){
|
|
syn0_transposed_row[0][y] = syn0_transposed[nu][y];
|
|
X_row[0][y] = X[nu][y];
|
|
|
|
}
|
|
//delta_wij += epsilon*delta_nu[nu][0]*fdt(dot_product(y_nu,syn1)[0][0],true)*syn1[j][0]*fdt(dot_product(X_row,syn0_transposed[j])[0][0],true)*X[nu][i];
|
|
//OUT OF BOUNDS PER QUALCHE MOTIVO
|
|
//stampa_matrice(dot_product(X_row,syn0_transposed_row));
|
|
}
|
|
syn1[j][0] += delta_wij;
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
private static double[][] randomize_matrix(double[][] mat){ //prende matrice e mette numero randomico in celle
|
|
for(int i=0; i<mat.length; i++){
|
|
for(int j=0; j<mat[0].length; j++){
|
|
mat[i][j] = 0.1*Math.random();
|
|
}
|
|
}
|
|
return mat;
|
|
}
|
|
//ELEMENT BY ELEMENT
|
|
private static double[][] subtract_matrix_ebe(double[][] mat1, double[][] mat2){ //sottrazione di matrici termine a termine
|
|
double[][] mat = new double[mat1.length][mat1[0].length];
|
|
for(int i=0; i<mat.length; i++){
|
|
for(int j=0; j<mat[0].length; j++){
|
|
mat[i][j] = mat1[i][j] - mat2[i][j];
|
|
}
|
|
}
|
|
return mat;
|
|
}
|
|
|
|
private static double error_func(double[][] output, double[][] xi){ //calcolo di errore di output di rete
|
|
double e=0;
|
|
for(int i=0; i<output.length; i++){
|
|
e += Math.pow((output[i][0] - xi[i][0]),2);
|
|
}
|
|
return 0.5*e;
|
|
}
|
|
}
|
|
|