Clemson University -- CPSC 231 -- Spring 2010 Shifts (p. 123) srl - shift right logical - 0 enters from left, bit drops off right end +-------+-------+ +-------+-------+ note: little-endian 0 --> b_31 -> b_30 -> ... -> b_1 --> b_0 --> bit notation +-------+-------+ +-------+-------+ msb lsb "b" for bit a f 5 0 8 9 1 6 10101111010100001000100100010110 gcc generates srl srl by 1 becomes for unsigned int 01010111101010000100010010001011 variable a in 5 7 a 8 4 4 8 b a = a >> 1; sra - shift right arithmetic - sign bit replicated on left, bit drops off right end .---. +-v---|-+-------+ +-------+-------+ | b_31 -> b_30 -> ... -> b_1 --> b_0 --> +-------+-------+ +-------+-------+ sign lsb a f 5 0 8 9 1 6 10101111010100001000100100010110 gcc generates sra sra by 1 becomes for int variable 11010111101010000100010010001011 a in d 7 a 8 4 4 8 b a = a >> 1; sll - shift left logical - 0 enters from right, bit drops off left end +-------+-------+ +-------+-------+ <-- b_31 <- b_30 <- ... <- b_1 <-- b_0 <-- 0 +-------+-------+ +-------+-------+ msb lsb a f 5 0 8 9 1 6 10101111010100001000100100010110 gcc generates sll sll by 1 becomes for a = a << 1; 01011110101000010001001000101100 5 e a 1 1 2 2 c the shift count is given as a 5-bit value (0-31) either as an immediate value or in a register; if you specify a larger value then the actual shift count is the number you specify modulo 32 a shift right has a choice of what to put in the most significant bit (i.e., the sign bit) from the left: either zeros or replicating the sign bit; that is why there are two shift right opcodes a shift left has no choice as to what will go into the sign bit - it has to be one of the bits already in the register, which is determined by the shift amount; zeros always come into the least significant bit on the right Multiplication by small constants (skip ahead to pp. 187-188) 1) convert the constant into a sum of powers of two (can allow a subtract) 2) convert the multiplications by powers of two into left shifts x * 10 = x * (8 + 2) = (x * 8) + (x * 2) = (x << 3) + (x << 1) x * 15 = x * (16 - 1) = (x * 16) - x = (x << 4) - x Rotates rotate %o0 left by 1 bit addcc %o0, %o0, %o0 addx %o0, %g0, %o0 macro for rol(rs1,n,rd) .---------------. | +---+-----+ | `--| A | B |<-' rotate left by n define(rol,` +---+-----+ sll $1,$2,%o0 n 32-n srl $1,eval(32-$2),%o1 or %o0,%o1,$3 +-----+---+ final result is equiv. ') | B | A | to shifting A right by +-----+---+ 32-n and shifting B 32-n n left by n macro for ror(rs1,n,rd) .---------------. | +-----+---+ | `->| A | B |--' rotate right by n define(ror,` +-----+---+ srl $1,$2,%o0 32-n n sll $1,eval(32-$2),%o1 or %o0,%o1,$3 +---+-----+ final result is equiv. ') | B | A | to shifting A right by +---+-----+ n and shifting B left n 32-n by 32-n Field extraction with a bit mask - field already in rightmost bits bit mask - a word where bits are used to zero out or select portions of another word assume we are working with nibbles (4-bit fields) remove (clear) all other bits and work with only the low four bits (the mask will have 0s in all the bit positions we wish to clear and 1s in the bit positions we wish to select) xxxx xxxx xxxx xxxx xxxx xxxx xxxx zzzz %a_r and 0000 0000 0000 0000 0000 0000 0000 1111 and %a_r,0xf,%b_r ----------------------------------------- 0000 0000 0000 0000 0000 0000 0000 zzzz %b_r Field extraction with a bit mask - field not in rightmost bits if you want to work with the next nibble to the left, remove (clear) all other bits and work with only next to last four bits yyyy yyyy yyyy yyyy yyyy yyyy zzzz yyyy %a_r and 0000 0000 0000 0000 0000 0000 1111 0000 and %a_r,0xf0,%b_r ----------------------------------------- 0000 0000 0000 0000 0000 0000 zzzz 0000 %b_r it's usually easier to work with the field when it is shifted to the right 0000 0000 0000 0000 0000 0000 zzzz 0000 %b_r srl %b_r,4,%c_r 0000 0000 0000 0000 0000 0000 0000 zzzz %c_r even better, you can shift and then mask yyyy yyyy yyyy yyyy yyyy yyyy zzzz yyyy %a_r srl %a_r,4,%b_r 0000 yyyy yyyy yyyy yyyy yyyy yyyy zzzz %b_r and 0000 0000 0000 0000 0000 0000 0000 1111 and %b_r,0xf,%c_r ----------------------------------------- 0000 0000 0000 0000 0000 0000 0000 zzzz %c_r Field extraction without a mask a left shift and then a right shift can isolate the desired field yyyy yyyy yyyy yyyy yyyy yyyy zzzz yyyy %a_r sll %a_r,24,%b_r zzzz yyyy 0000 0000 0000 0000 0000 0000 %b_r srl %b_r,28,%b_r 0000 0000 0000 0000 0000 0000 0000 zzzz %b_r use a logical right shift for an unsigned value or an arithmetic right shift for a signed value, which provides sign extension 0001 0010 0011 0100 0101 0110 1101 1000 %a_r sll %a_r,24,%b_r 1101 1000 0000 0000 0000 0000 0000 0000 %b_r sra %b_r,28,%b_r 1111 1111 1111 1111 1111 1111 1111 1101 %b_r Field insertion with a bit mask first, if necessary, clear out the field into which you wish to insert (the mask will have 0s in the bit positions we wish to clear and 1s in all other bit positions) xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx %a_r set 0xffffff0f,%mask_r 1111 1111 1111 1111 1111 1111 0000 1111 %mask_r and %a_r,%mask_r,%b_r xxxx xxxx xxxx xxxx xxxx xxxx 0000 xxxx %b_r alternatively, you can use the instruction andn or the instruction bclr with a selection mask of 0xf0, that is, with a mask that uses 1s to define the field that we will insert xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx %a_r andn %a_r,0xf0,%b_r xxxx xxxx xxxx xxxx xxxx xxxx 0000 xxxx %b_r if necessary, shift the new value to the correct field position before inserting 0000 0000 0000 0000 0000 0000 0000 zzzz %c_r sll %c_r,4,%d_r 0000 0000 0000 0000 0000 0000 zzzz 0000 %d_r then insert the new value into the field 0000 0000 0000 0000 0000 0000 zzzz 0000 %d_r xxxx xxxx xxxx xxxx xxxx xxxx 0000 xxxx %b_r or %d_r,%b_r,%e_r xxxx xxxx xxxx xxxx xxxx xxxx zzzz xxxx %e_r Example extract and insert macros for dealing with packed words ! positional parameters for extract() and insert() ! ! $1 - packed register ! $2 - shift amount ! (can be immediate or register) ! $3 - mask, positioned in least-significant bits ! (can be immediate or register) ! $4 - result register ! ! note that the insert macro also uses %o0 to hold shifted mask ! define(extract,` ifelse($2,0,` and $1, $3, $4',` srl $1, $2, $4 and $4, $3, $4') ') define(insert,` and $4, $3, $4 ifelse($2,0,` andn $1, $3, $1',` mov $3, %o0 sll %o0, $2, %o0 andn $1, %o0, $1 sll $4, $2, $4') or $1, $4, $1 ') ! example use extract(%packed_r,%shift_amt_r,%mask_r,%temp_r) inc %temp_r insert(%packed_r,%shift_amt_r,%mask_r,%temp_r) ! or extract(%packed_r,12,0x3f,%temp_r) dec %temp_r insert(%packed_r,12,0x3f,%temp_r) ! combining the two macros for updating a packed word ! $5 - inc or dec define(update_field,` extract($1,$2,$3,$4) $5 $4 insert($1,$2,$3,$4) ') ! example use update_field(%packed_r,%shift_amt_r,%mask_r,%temp_r,inc) written as cpp macros #define EXTRACT( pack, shift, mask, field ) \ (field) = ((pack) >> (shift)) & (mask) #define INSERT( pack, shift, mask, field ) \ (pack) = ((pack) & ~((mask) << (shift))) | (((field) & (mask)) << (shift)) alternate definitions of EXTRACT using field width to generate mask (~0 form suggested by Mitch Alsup) #define EXTRACT2( pack, shift, width, field ) \ (field) = (((pack) >> (shift)) & ((1 << width) - 1)) #define EXTRACT3( pack, shift, width, field ) \ (field) = (((pack) >> (shift)) & (~((~0) << width)) example #include #define INSERT2( pack, shift, width, field ) \ (pack) = ((pack) & ~(((1 << width) - 1) << (shift))) | \ (((field) & ((1 << width) - 1)) << (shift)) #define EXTRACT2( pack, shift, width, field ) \ (field) = (((pack) >> (shift)) & ((1 << width) - 1)) int main(){ int a = 0x1234abcd; int b; printf("0x%x\n",a); EXTRACT2( a, 12, 8, b ); printf("0x%x\n",b); b = b - 0x4b; printf("0x%x\n",b); INSERT2( a, 12, 8, b ); printf("0x%x\n",a); return 0; } which has output 0x1234abcd 0x4a 0xffffffff 0x123ffbcd simpler macros for a prepositioned mask ! alternate extract/insert definitions if mask is ! pre-positioned at correct bit field (probably means ! that mask is register rather than an immediate value) ! ! same parameters ! define(extract_prepos_mask,` and $1, $3, $4 ifelse($4,0,,`srl $4, $2, $4') ') define(insert_prepos_mask,` andn $1, $3, $1 ifelse($4,0,,`sll $4, $2, $4') and $4, $3, $4 or $1, $4, $1 ') prepositioned-mask approach written as cpp macros #define EXTRACT_PPM( pack, shift, mask, field ) \ (field) = ((pack) & (mask)) >> (shift) #define INSERT_PPM( pack, shift, mask, field ) \ (pack) = ((pack) & ~(mask)) | (((field) << (shift)) & (mask)) /* atobcd() - convert ASCII digit string to packed BCD string * * input parameters: * * s - address of ASCII digit string * * assumptions about input string: * - should contain one ASCII digit per byte ('0'-'9') * - terminated by the null byte * - does not contain a +/- sign * * b - address of buffer area for packed BCD string * * assumptions about buffer area: * - large enough to hold result; floor(strlen(s)/2) + 1 * * return value: * * address of buffer area * * effect/output: * * ASCII digits are converted into two BCD nibbles per byte in the * result buffer in big-endian order. The resulting packed BCD * string is terminated by 0xf in either the left half or the right * half of the last byte of the string. * * typical calling sequence: * * bcd_string = atobcd( digit_string, buffer ); * * local variables: * * save - pointer variable to save the starting address of the buffer * b to use as the return value * * left_half - logical flag indicating which nibble to store * - true means that the next BCD value should be stored * in left-hand nibble of the current byte in buffer * - false means that the next BCD value should be stored * in right-hand nibble */ #include char *atobcd( char *s, char *b ){ char *save = b; int left_half = 1; while( (*s >= '0') && (*s <= '9') ){ if( left_half ){ *b = (*s & 0xf) << 4; }else{ *b = *b | (*s & 0xf); b++; } left_half = 1 - left_half; s++; } if( left_half ){ *b = 0xf0; }else{ *b = *b | 0xf; } return save; } void prt_bytes( char *p, char type ){ if( type == 'b' ){ while( ((*p & 0xf0) != 0xf0) && ((*p & 0xf) != 0xf) ){ printf(" 0x%02x",(unsigned char)*p++); } printf(" 0x%02x\n",(unsigned char)*p); }else{ while( *p ){ printf(" 0x%02x",(unsigned char)*p++); } printf(" 0x%02x\n",(unsigned char)*p); } } int main(){ char test[2][10] = {"12345678","123456789"}; char result[2][10]; atobcd(test[0],result[0]); prt_bytes(test[0],'s'); prt_bytes(result[0],'b'); atobcd(test[1],result[1]); prt_bytes(test[1],'s'); prt_bytes(result[1],'b'); return 0; } output 0x31 0x32 0x33 0x34 0x35 0x36 0x37 0x38 0x00 0x12 0x34 0x56 0x78 0xf0 0x31 0x32 0x33 0x34 0x35 0x36 0x37 0x38 0x39 0x00 0x12 0x34 0x56 0x78 0x9f example conversion of ASCII decimal digits to a binary number scan of digits is left-to-right, that is, from most significant digit to least => use Horner's rule for calculating an n-term polynomial a_(n-1) * x^(n-1) + a_(n-2) * x^(n-2) + ... + a_0 = [ a_(n-1) * x + a_(n-2) ] * x + ... + a_0 or as might appear in a program value = 0; for( i=0; i> value = value + a; } that is, initially: value = 0; after iteration 0: value = a_(n-1) after iteration 1: value = a_(n-1) * x + a_(n-2) after iteration 2: value = [ a_(n-1) * x + a_(n-2) ] * x + a_(n-3) ... For decimal, you multiply by 10 value = 0; for( i=0; i> value = value + a; } (or, better yet, use shifts and adds) value = 10*value; becomes value = (value << 3) + (value << 1);