/* CPSC 330 - Fall 2008 - Mark Smotherman program to simulate all possible 4-state FSMs for branch prediction accuracy comparisons this program corresponds to a single, dedicated predictor for a given branch, so there is no branch address hash used as an index into a larger history table input to program is a file of 0s and 1s (0 = untaken, 1 = taken), terminated with a final -1 each possible fsm is encoded as a 20-bit string inputs and predictions: 0 = untaken, 1 = taken four states: 0,1,2,3 (encoded in two bits) .---------- prediction for state 0 (0 or 1) | .------- prediction for state 1 (0 or 1) | | .---- prediction for state 2 (0 or 1) | | | .- prediction for state 3 (0 or 1) | | | | p0 p1 p2 p3 n0[] n1[] n2[] n3[] where ni[] is a 4-bit string | | | | | | | `- next states for state 3 for inputs 0 and 1 | | `------ next states for state 2 for inputs 0 and 1 | `----------- next states for state 1 for inputs 0 and 1 `---------------- next states for state 0 for inputs 0 and 1 e.g., 0x3579b is 0011 0101 0111 1001 1011 --predictions- ------next states for 0 and 1 inputs------ p0 p1 p2 p3 state 0 state 1 state 2 state 3 in=0 in=1 in=0 in=1 in=0 in=1 in=0 in=1 = 0 0 1 1 01 01 01 11 10 01 10 11 = U U T T 1 1 1 3 2 1 2 3 0 1 0 .-. .-. .-. 0,1 \ / 1 \ / 0 \ / {0/0}----->{1/0}----->{3/1}----->{2/1} ^ / `---------------' 1 where {s/p} is s=state and p=prediction (i.e.,output) so the transition table is current state input | next state output ----------------------+-------------------- 0 0 | 1 0 0 1 | 1 0 1 0 | 1 0 1 1 | 3 0 2 0 | 2 1 2 1 | 1 1 3 0 | 2 1 3 1 | 3 1 w/o loss of generality, the state state is defined as 0 the program does not attempt to prune behavioral duplicates, e.g., two fsms with no difference in outputs the 20-bit encoding => 2^20 fsms a comparison is also made to a dedicated gshare with a 3-bit local BHSR and a PHT of eight 2-bit saturating counters, each initialized to 0; the leftmost bit of the selected 2bc is the prediction */ #include #define N 1024*1024 #define T 1000 #define START_STATE 0 int trace[T]; int main(){ int i, j; int hit_count, hits[N], max_hits, max_fsm, count_of_maxes; int state, prediction[4], next_state[4][2]; int bhsr = 0; int pht[8] = {0,0,0,0,0,0,0,0}; i = 0; scanf("%d",&trace[i]); while( trace[i] != -1 ){ if( ( trace[i] != 0 ) && ( trace[i] != 1 ) ){ printf("*** bad branch trace entry: trace[%d] = %d not in {0,1}\n", i,trace[i]); exit(0); } if( i == ( T-1 ) ){ printf("*** too many trace entries: limit is %d\n",T); exit(0); } i++; scanf("%d",&trace[i]); } printf("predictor analysis for trace with %d entries\n\n",i); /* each iteration value i encodes a fsm; process the trace using it */ for( i = 0; i < N ; i++ ){ prediction[0] = ( i & 0x80000 ) >> 19; prediction[1] = ( i & 0x40000 ) >> 18; prediction[2] = ( i & 0x20000 ) >> 17; prediction[3] = ( i & 0x10000 ) >> 16; next_state[0][0] = ( i & 0x0c000 ) >> 14; next_state[0][1] = ( i & 0x03000 ) >> 12; next_state[1][0] = ( i & 0x00c00 ) >> 10; next_state[1][1] = ( i & 0x00300 ) >> 8; next_state[2][0] = ( i & 0x000c0 ) >> 6; next_state[2][1] = ( i & 0x00030 ) >> 4; next_state[3][0] = ( i & 0x0000c ) >> 2; next_state[3][1] = i & 0x00003; hit_count = 0; state = START_STATE; j = 0; while( trace[j] != -1 ){ if( prediction[state] == trace[j] ) hit_count++; state = next_state[state][trace[j]]; j++; } hits[i] = hit_count; } max_hits = 0; max_fsm = 0; for( i=1; i max_hits ){ max_hits = hits[i]; max_fsm = i; } } printf("maxiumum hits = %d\n\n",max_hits); printf("hits for predict taken = %d\n",hits[0x80000]); printf("hits for predict untaken = %d\n",hits[0x00000]); printf("hits for one-bit, init. taken = %d\n",hits[0x84400]); printf("hits for one-bit, init. untaken = %d\n",hits[0x13003]); printf("hits for 2-bit sat. counter, init. taken = %d\n",hits[0x9c678]); printf("hits for 2-bit sat. counter, init. untaken = %d\n",hits[0x3127b]); count_of_maxes = 0; for( i=0; i> 1 ) == trace[i] ){ hit_count++; } /* update selected 2bc */ if( trace[i] ){ /* increment, but saturates at 3 */ if( pht[bhsr] < 3 ) pht[bhsr]++; }else{ /* decrement, but saturates at 0 */ if( pht[bhsr] > 0 ) pht[bhsr]--; } /* update bhsr */ bhsr = (trace[i]<<2) | (bhsr>>1); i++; } printf("gshare hits = %d\n",hit_count); printf("gshare final pht = %d %d %d %d %d %d %d %d\n\n", pht[0],pht[1],pht[2],pht[3],pht[4],pht[5],pht[6],pht[7]); return 0; }