/* 2lp.c */ /* This program is designed to train a two layer perceptron */ /* by means of the back propogation algorithm. */ /* This program can train from cold start (weights randomly */ /* chosen or it can train from a warm start (existing */ /* weights read in at the start of the training procedure). */ /* The format of the command is the following: */ /* 2lp input */ /* Here input represents the first part of the name of the */ /* input file containing the training data.. All training */ /* files must have the extension .trn.. Thus if you wished */ /* to train using file xxx.trn as input you would enter: */ /* 2lp xxx */ /* As part of the training 2lp will create a log file called*/ /* (in this case) xxx.log. The final value of the weights */ /* will be placed in a file called (in this case) xxx.wts */ /* If you wish to do warm start training using the file */ /* xxx.wts, you must enter the command: */ /* 2lp xxx xxx.wts */ /* The format of an input weights file is the same as that */ /* generated by 2lp.. Thus no editing is required. */ /* NOTE: In this case the existing version of xxx.wts will */ /* be overwritten and lost when training completes. */ /* Sample of input data: */ /* 0.08 0.7 0.4 4 2 1 b05-A 0 0 2.836 1 0.800 0 -0.185 0.103 0.744 0.103 b05-A 0 1 1.104 1 0.140 1 0.000 -0.205 0.026 0.410 */ /* Input data format: */ /* The first line contains the maximum error value, the gain */ /* and the momentum in floating point format. */ /* The second layer contains the number of inputs, number of */ /* hidden units and number of outputs. */ /* Each subsequent line contains a single training record.. */ /* The first five fields are read using the formats: */ /* %s %d %d %f %d.. The data placed in these fields must be */ /* compaitible with the formats. It is, however, not used */ /* at all in the training procedure. I typically use it as */ /* documentation that enables me to correlate quickly a */ /* training sample with the image/feature that it is */ /* associated with.. */ /* The next group of fields are the target output values. */ /* In the example the number of outputs is 1 so there is */ /* only a single target output. The output must be specified */ /* in floating point. */ /* The next field is an integer 0 or 1 called the select */ /* value. This training program is designed to work with */ /* fuzzy target scores.. A "good" score is one near 0.0 */ /* and a "bad" score is one near 1.0.. Select is the hard */ /* logic value associated with the fuzzy logic target. For */ /* "historical" reasons a "good" select score is 1 and a */ /* bad select score is 0. In training the network, any */ /* error that occur on the hardlogic side of the target */ /* fuzzy logic value are ignored. This approach has been */ /* show to lead to significantly faster training convergence */ /* and I believe that it also leads to a "smoother" network */ /* that with superior generalization properties */ /* Thus if the target score = .30 and the computed score */ /* is .20 and select is 1 the computation will be accepted */ /* as correct. Similarly if the target is .6 the computed */ /* value .8 and select = 0, no error is recognized. However */ /* if the computed score is not on the select "side" of the */ /* target score, it must differ by less than the error */ /* threshold specified on line 1 of the input or weight */ /* adjustment will occur. NOTE: the single select value */ /* is applicable to all outputs.. I typically use a single */ /* output so this is not a problem.. However, if multiple */ /* outputs are being used all need to be defined so that */ /* 0.0 is "good" and 1.0 is "bad". */ /* The next fields are the input values. Each value should */ /* be specified in floating point. The number of values must */ /* equal that specified on line 2 of the input. */ /* Limits on the number of inputs, outputs, hidden units, and */ /* training samples are specified below in #define statements */ #include #include #include #include #include #define DEBUG 0 #define MAX_RECORDS 2000 #define MAX_INPUTS 32 #define MAX_LAYER1 32 #define MAX_OUTPUTS 8 #define MAX_PASSES 400000000 #define CONV_EPS 0.10 #define PRINT_INC 1000 #define RANDMAX 2147483647 #ifdef UNIX #define BUF_SIZE 0x10000 #else #define BUF_SIZE 0x3fff #endif static float *intab; /* Base address of input table */ static float *targtab; /* Base address of target table */ static float inbuf[BUF_SIZE]; static float targbuf[BUF_SIZE]; static float avgdiff; static float maxdiff; /* Data structures used to contain data documenting the input */ static char idtab[MAX_RECORDS][6]; static int tptab[MAX_RECORDS]; static int bptab[MAX_RECORDS]; static int seltab[MAX_RECORDS]; static int errortab[MAX_RECORDS]; static int tpndx; static int bpndx; static int apscore; static float weights1[MAX_INPUTS][MAX_LAYER1]; static float deltawt1[MAX_INPUTS][MAX_LAYER1]; static float weights2[MAX_LAYER1][MAX_OUTPUTS]; static float deltawt2[MAX_LAYER1][MAX_OUTPUTS]; static float layer1[MAX_LAYER1]; static float output[MAX_OUTPUTS]; static float outdel[MAX_OUTPUTS]; static float targout[MAX_OUTPUTS]; static int count0; /* Number of inputs in use */ static int count1; /* Number of layer 2 units */ static int count2; /* Number of outputs */ static int incount; /* Number of training smpls*/ static FILE *infile; static FILE *netfile; static FILE *nnlog; static float gain = 0.3; static float momentum = 0.7; static float epsilon = CONV_EPS; static float threshold1[MAX_LAYER1]; static float threshold2[MAX_OUTPUTS]; static int attempts; static int successes; static long iterations; float compute(); char namebuf[20]; char *namebase; void sigproc(sig) int sig; { iterations = MAX_PASSES + sig; } main(argc, argv) int argc; char **argv; { int prntime; int prnflag = 0; int adjflag; int i; float diff; /* Set up a signal handler */ #ifdef UNIX signal(SIGINT, sigproc, -1); #else signal(SIGINT, sigproc); #endif /* Set up pointers to buffers to be used... Done so that it is */ /* easy to changed to dyamically allocated buffers. */ intab = inbuf; targtab = targbuf; if (argc < 2) { printf("Usage is: slptrain input-data \n"); exit(1); } /* Attempt to open input file */ prntime = PRINT_INC; namebase = argv[1]; strcpy(namebuf, argv[1]); strcat(namebuf, ".trn"); infile = fopen(namebuf, "r"); if (infile == NULL) { printf("Couldn't open %s \n", argv[1]); exit(1); } strcpy(namebuf, argv[1]); strcat(namebuf, ".log"); nnlog = fopen(namebuf, "w"); /* Get convergence threshold */ fscanf(infile, "%f %f %f", &epsilon, &gain, &momentum); /* Acquire counts of input, hidden, and output units */ fscanf(infile, "%2d %2d %2d", &count0, &count1, &count2); layer1[count1] = 1.0; /* Load input and target data */ getinput(); fclose(infile); /* Initialize the weights.. Weights may either come from an */ /* existing weight file be randomly initialized. */ if (argc < 3) initwts(); else loadwts(argv[2]); iterations = 0; // partord(); while (iterations < MAX_PASSES) { attempts = 0; successes = 0; maxdiff = 0.0; avgdiff = 0.0; adjflag = 0; /* Make a pass over all of the input samples */ for (i = 0; i < incount; i++) { diff = compute(i, prnflag); if (diff > epsilon) { adjflag = 1; adjust(i); printwts(); errortab[i] += 1; } } prntime -= 1; if ((iterations % 100) == 0) printf("%ld %d %d %f %f \n", iterations, attempts, successes, maxdiff, avgdiff/incount); if (prntime == 0) { fprintf(nnlog, "A: %4d S: %4d \n", attempts, successes); printwts(); prntime = PRINT_INC; } iterations += 1; /* If the process has converged, quit now */ if (! adjflag) break; } /* Make one last pass over the input printing computed values */ attempts = 0; successes = 0; maxdiff = 0.0; avgdiff = 0.0; for (i = 0; i < incount; i++) { compute(i, 1); } printf("%6d %6d \n", attempts, successes); /* Log final weights to output file */ logwts(); fprintf(nnlog, "Training completed.. iterations = %d \n", iterations); fprintf(nnlog, "Last pass attempts = %d.. successes = %d \n", attempts, successes); return(0); } /* random.c */ /* Pseudo - random number generator proposed in Oct 88 issue of */ /* CACM */ #define begin { #define end } static int a = 16807; static long m = 2147483647; static long q = 127773; /* m div a */ static long r = 2836; /* m mod a */ static long seed = 1; long random() begin long lo, hi, test; hi = seed / q; lo = seed % q; test = a * lo - r * hi; if (test > 0) seed = test; else seed = test + m; return(seed); end /* Get a small random number.. used to ensure that training */ /* process doesn't get stuck at initial local minimum. */ float getrand() { long rval; float fval; rval = random(); fval = (float)rval / (8.0 * (float)RANDMAX + 1.0); return(fval); } /* Initialize weights.. Each weight is initially set to 1 / number */ /* of connections plus a random offset. */ initwts() { int i; int j; /* Init weights connecting input layer to hidden layer */ for (i = 0; i <= count0; i++) { for (j = 0; j < count1; j++) { weights1[i][j] = 1.0 / count0 + getrand(); deltawt1[i][j] = 0.0; } } /* Init weights connecting hidden layer to output layer */ for (i = 0; i <= count1; i++) { for (j = 0; j < count2; j++) { weights2[i][j] = 1.0 / count1 + getrand(); deltawt2[i][j] = 0.0; } } /* Last hidden layer unit is locked at value 1 to provide an */ /* adjustable threshold for output layer computations. */ layer1[count1] = 1.0; } /* Get input data one sample at a time */ getinput() { fprintf(nnlog, " I n p u t D a t a \n\n"); while (getrec(incount) == 0) { incount += 1; if (incount == MAX_RECORDS) { printf("Input data overflow! \n"); break; } } fprintf(nnlog, "\n\n O u t p u t D a t a \n\n"); } /* getrec.c */ /* Get a single input sample record.. Data is formatted as follows: */ /* tpndx, bpndx, input_values, input_sum, ap_score, target_values */ /* | | | | /* Indicated values are not used in the computation.. but exist in */ /* input in order to correlate problems with output back to actual */ /* image data. */ getrec(id) int id; { int i; char imname[10]; int vals; int status; float sum; float *inloc; float *targloc; float decision; /* Get image ID value */ vals = fscanf(infile, "%s", imname); strcpy(idtab[id], imname); if (vals != 1) return(1); fprintf(nnlog, "%s ", imname); /* Get 1st two dummy values */ vals = fscanf(infile, "%d %d", &tpndx, &bpndx); tptab[id] = tpndx; bptab[id] = bpndx; if (vals != 2) return(1); fprintf(nnlog, "%2d %2d - ", tpndx, bpndx); /* Read dummy values from input */ fscanf(infile, "%f", &sum); fscanf(infile, "%d", &apscore); /* Finally acquire target values */ targloc = targtab + id * count2; for (i = 0; i < count2; i++) { fscanf(infile, "%f", targloc); fprintf(nnlog, "%f ", *targloc); targloc += 1; } fscanf(infile, "%f", &decision); seltab[id] = decision; fprintf(nnlog, "%2d ", seltab[id]); /* Set up buffer pointer to first element of current input sample */ inloc = intab + (count0 + 1) * id; /* Read input values. */ for (i = 0; i < count0; i++) { vals = fscanf(infile, "%f", inloc); inloc += 1; if (vals != 1) { return(1); } fprintf(nnlog, "%6.3f ", *(inloc - 1)); } /* Generate a fixed input to be used to drive a weight */ /* acting like a threshold on hidden layer */ *inloc = 1.0; inloc += 1; fprintf(nnlog, "\n"); return(0); } /* See if any partial order constraints are violated */ partord() { int i, j, k; float *in1, *in2; int order; for (i = 0; i < incount; i++) { for (j = 0; j <= i; j++) { in1 = intab + (count0 + 1) * i; in2 = intab + (count0 + 1) * j; order = 0; for (k = 0; k < count0; k++) { if (*in1++ <= *in2++) order += 1; } if (order == 0) { if (*(targtab + i * count2) < *(targtab + j * count2)) { printf("Order error.. Recs %d - %s and %d - %s \n", i, idtab[i], j, idtab[j]); } } else if (order == count0) { if (*(targtab + i * count2) > *(targtab + j * count2)) { printf("Order error.. Recs %d - %s and %d - %s \n", i, idtab[i], j, idtab[j]); } } } } } /* Compute the value of the sigmoid limiter function */ float sigmoid(value) float value; { float rval; rval = 1.0 / (1 + exp(-value)); return(rval); } /* Compute all outputs for a given input sample */ float compute(id, prnflag) int id; /* input sample identification */ int prnflag; /* printing indicator */ { int i, j; float sum; float *in; float *targ; float diff; float absdiff; float recdiff; /* Maximum error for entire record */ /* Compute output values for the hidden units */ /* One trip through outer loop for each hidden unit */ recdiff = 0.0; for (i = 0; i < count1; i++) { sum = 0; in = intab + (count0 + 1) * id; for (j = 0; j <= count0; j++) { sum += *in * weights1[j][i]; in += 1; #if DEBUG printf(" Sum 1: i = %d , j = %d, sum = %f \n", i, j, sum); #endif } layer1[i] = sigmoid(sum); } /* Compute output values for the output units */ targ = targtab + (count2) * id; for (i = 0; i < count2; i++) { sum = 0; for (j = 0; j <= count1; j++) { sum += layer1[j] * weights2[j][i]; #if DEBUG printf(" Sum 2: i = %d , j = %d, sum = %f \n", i, j, sum); #endif } output[i] = sigmoid(sum); attempts += 1; #if 0 diff = fabs(*(targ + i) - output[i]); #endif diff = *(targ + i) - output[i]; if ((seltab[id] == 0) && (diff < 0)) diff = 0.0; else if ((seltab[id] == 1) && (diff > 0.0)) diff = 0.0; absdiff = fabs(diff); if (absdiff > maxdiff) maxdiff = absdiff; if (absdiff > recdiff) recdiff = absdiff; avgdiff += absdiff; if (prnflag) { fprintf(nnlog, "%4s %3d %3d %3d -- ", idtab[id], tptab[id], bptab[id], seltab[id]); fprintf(nnlog, "%6.3f - %6.3f - %6.3f - %6.3f ", output[i], *(targ + i), diff, maxdiff); fprintf(nnlog, "- %7d ", errortab[id]); if (absdiff < epsilon) fprintf(nnlog, " S\n"); else fprintf(nnlog, " F\n"); } } if (recdiff < epsilon) successes += 1; return(recdiff); } /* Compute error term for an output unit */ float outdelta(ndx, id) int ndx; /* Index of current output layer unit */ int id; /* Id of the input sample being processed */ { float fval; float *targ; targ = targtab + id * count2 + ndx; fval = output[ndx] * (1 - output[ndx]) * (*targ - output[ndx]); outdel[ndx] = fval; return(fval); } float hiddelta(ndx) int ndx; /* Index of current hidden layer unit */ { float fval; float sum; int i; fval = 0.0; for (i = 0; i < count2; i++) fval += outdel[i] * weights2[ndx][i]; fval *= layer1[ndx] * (1.0 - layer1[ndx]); return(fval); } /* Adjust all weights based on error in sample just computed */ adjust(id) int id; /* Index of input sample just computed */ { int i, j; float sum; float delta; float oldwt; float *in; /* Adjust weights connecting hidden layer to output layer */ for (j = 0; j < count2; j++) { for (i = 0; i <= count1; i++) { oldwt = weights2[i][j]; weights2[i][j] += gain * layer1[i] * outdelta(j, id); weights2[i][j] += momentum * deltawt2[i][j]; deltawt2[i][j] = weights2[i][j] - oldwt; #if DEBUG printf( "Wts 2: i = %d , j = %d, sum = %f \n", i, j, weights2[i][j]); #endif } } /* Adjust weights connecting input layer to hidden layer */ for (j = 0; j < count1; j++) { in = intab + (count0 + 1) * id; for (i = 0; i <= count0; i++) { oldwt = weights1[i][j]; weights1[i][j] += gain * (*in) * hiddelta(j); in += 1; weights1[i][j] += momentum * deltawt1[i][j]; #if 0 if (i < count0) if (weights1[i][j] < 0.0) weights1[i][j] = 0.0; #endif deltawt1[i][j] = weights1[i][j] - oldwt; #if DEBUG printf( " Wts 1: i = %d , j = %d, sum = %f \n", i, j, weights1[i][j]); #endif } } } /* Print current weight values */ printwts() { int i; int j; fprintf(nnlog, "\n Input - Hidden Weights \n"); for (i = 0; i < count1; i++) { for (j = 0; j <= count0; j++) { fprintf(nnlog, "%6.3f ", weights1[j][i]); } fprintf(nnlog, "\n"); } fprintf(nnlog, "\n Hidden - Output Weights \n"); for (i = 0; i < count2; i++) { for (j = 0; j <= count1; j++) { fprintf(nnlog, "%6.3f ", weights2[j][i]); } fprintf(nnlog, "\n"); } } /* Log final weight values */ logwts() { int i; int j; FILE *wts; strcpy(namebuf, namebase); strcat(namebuf, ".wts"); wts = fopen(namebuf, "w"); for (i = 0; i < count1; i++) { for (j = 0; j <= count0; j++) { fprintf(wts, "%8.3f ", weights1[j][i]); } fprintf(wts, "\n"); } for (i = 0; i < count2; i++) { for (j = 0; j <= count1; j++) { fprintf(wts, "%8.3f ", weights2[j][i]); } fprintf(wts, "\n"); } fclose(wts); } /* loadwts.c */ /* Load the connection weights from the specified file */ loadwts(wfile) char *wfile; { int i; int j; FILE *wts = NULL; wts = fopen(wfile, "r"); if (wts == NULL) { printf("Couldn't open weight file %s \n", wfile); exit(1); } for (i = 0; i < count1; i++) { for (j = 0; j <= count0; j++) { fscanf(wts, "%f", &weights1[j][i]); } } for (i = 0; i < count2; i++) { for (j = 0; j <= count1; j++) { fscanf(wts, "%f", &weights2[j][i]); } } fclose(wts); printwts(); }