Clemson University -- CPSC 231 -- Fall 2009 Shifts (p. 123) srl - shift right logical - 0 enters from left, bit drops off right end +-------+-------+ +-------+-------+ note: little-endian 0 --> b_31 -> b_30 -> ... -> b_1 --> b_0 --> bit notation +-------+-------+ +-------+-------+ msb lsb "b" for bit a f 5 0 8 9 1 6 10101111010100001000100100010110 gcc generates srl srl by 1 becomes for unsigned int 01010111101010000100010010001011 variable a in 5 7 a 8 4 4 8 b a = a >> 1; sra - shift right arithmetic - sign bit replicated on left, bit drops off right end .---. +-v---|-+-------+ +-------+-------+ | b_31 -> b_30 -> ... -> b_1 --> b_0 --> +-------+-------+ +-------+-------+ sign lsb a f 5 0 8 9 1 6 10101111010100001000100100010110 gcc generates sra sra by 1 becomes for int variable 11010111101010000100010010001011 a in d 7 a 8 4 4 8 b a = a >> 1; sll - shift left logical - 0 enters from right, bit drops off left end +-------+-------+ +-------+-------+ <-- b_31 <- b_30 <- ... <- b_1 <-- b_0 <-- 0 +-------+-------+ +-------+-------+ msb lsb a f 5 0 8 9 1 6 10101111010100001000100100010110 gcc generates sll sl1 by 1 becomes for a = a << 1; 01011110101000010001001000101100 5 e a 1 1 2 2 c the shift count is given as a 5-bit value (0-31) either as an immediate value or in a register; if you specify a larger value then the actual shift count is the number you specify modulo 32 Multiplication by small constants (skip ahead to pp. 187-188) 1) convert the constant into a sum of powers of two (can allow a subtract) 2) convert the multiplications by powers of two into left shifts x * 10 = x * (8 + 2) = (x * 8) + (x * 2) = (x << 3) + (x << 1) x * 15 = x * (16 - 1) = (x * 16) - x = (x << 4) - x Field extraction with a bit mask - field already in rightmost bits bit mask - a word where bits are used to zero out or select portions of another word assume we are working with nibbles (4-bit fields) remove (clear) all other bits and work with only the low four bits (the mask will have 0s in all the bit positions we wish to clear and 1s in the bit positions we wish to select) xxxx xxxx xxxx xxxx xxxx xxxx xxxx zzzz %a_r and 0000 0000 0000 0000 0000 0000 0000 1111 and %a_r,0xf,%b_r ----------------------------------------- 0000 0000 0000 0000 0000 0000 0000 zzzz %b_r Field extraction with a bit mask - field not in rightmost bits if you want to work with the next nibble to the left, remove (clear) all other bits and work with only next to last four bits yyyy yyyy yyyy yyyy yyyy yyyy zzzz yyyy %a_r and 0000 0000 0000 0000 0000 0000 1111 0000 and %a_r,0xf0,%b_r ----------------------------------------- 0000 0000 0000 0000 0000 0000 zzzz 0000 %b_r it's usually easier to work with the field when it is shifted to the right 0000 0000 0000 0000 0000 0000 zzzz 0000 %b_r srl %b_r,4,%c_r 0000 0000 0000 0000 0000 0000 0000 zzzz %c_r even better, you can shift then mask yyyy yyyy yyyy yyyy yyyy yyyy zzzz yyyy %a_r 0000 yyyy yyyy yyyy yyyy yyyy yyyy zzzz %b_r srl %a_r,4,%b_r and 0000 0000 0000 0000 0000 0000 0000 1111 and %b_r,0xf,%c_r ----------------------------------------- 0000 0000 0000 0000 0000 0000 0000 zzzz %c_r Field insertion with a bit mask first, if necessary, clear out the field into which you wish to insert (the mask will have 0s in the bit positions we wish to clear and 1s in all other bit positions) xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx %a_r set 0xffffff0f,%mask_r and 1111 1111 1111 1111 1111 1111 0000 1111 %mask_r and %a_r,%mask_r,%b_r ----------------------------------------- xxxx xxxx xxxx xxxx xxxx xxxx 0000 xxxx %b_r (alternatively, you can use the instruction andn or the instruction bclr with a selection mask of 0xf0, that is, with a mask that uses 1s to define the field that we will insert) if necessary, shift the new value to the correct field position before inserting 0000 0000 0000 0000 0000 0000 0000 zzzz %c_r sll %c_r,4,%d_r 0000 0000 0000 0000 0000 0000 zzzz 0000 %d_r then insert the new value into the field 0000 0000 0000 0000 0000 0000 zzzz 0000 %d_r or xxxx xxxx xxxx xxxx xxxx xxxx 0000 xxxx %b_r or %d_r,%b_r,%e_r ----------------------------------------- xxxx xxxx xxxx xxxx xxxx xxxx zzzz xxxx %e_r Example extract and insert macros for dealing with packed words ! positional parameters for extract() and insert() ! ! $1 - packed register ! $2 - shift amount ! (can be immediate or register) ! $3 - mask, positioned in least-significant bits ! (can be immediate or register) ! $4 - result register ! ! note that the insert macro also uses %o0 to hold shifted mask ! define(extract,` ifelse($2,0,,`srl $1, $2, $4') and $4, $3, $4 ') define(insert,` and $4, $3, $4 ifelse($2,0,` andn $1, $3, $1',` mov $3, %o0 sll %o0, $2, %o0 andn $1, %o0, $1 sll $4, $2, $4') or $1, $4, $1 ') ! example use extract(%packed_r,%shift_amt_r,%mask_r,%temp_r) inc %temp_r insert(%packed_r,%shift_amt_r,%mask_r,%temp_r) ! or extract(%packed_r,12,0x3f,%temp_r) dec %temp_r insert(%packed_r,12,0x3f,%temp_r) ! combining the two macros for updating a packed word ! $5 - inc or dec define(update_field,` extract($1,$2,$3,$4) $5 $4 insert($1,$2,$3,$4) ') ! example use update_field(%packed_r,%shift_amt_r,%mask_r,%temp_r,inc) written as cpp macros #define EXTRACT( pack, shift, mask, field ) \ (field) = ((pack) >> (shift)) & (mask) #define INSERT( pack, shift, mask, field ) \ (pack) = ((pack) & ~((mask) << (shift))) | (((field) & (mask)) << (shift)) simpler macros for a prepositioned mask ! alternate extract/insert definitions if mask is ! pre-positioned at correct bit field (probably means ! that mask is register rather than an immediate value) ! ! same parameters ! define(extract_w_fixed_mask,` and $1, $3, $4 ifelse($4,0,,`srl $4, $2, $4') ') define(insert_w_fixed_mask,` andn $1, $3, $1 ifelse($4,0,,`sll $4, $2, $4') and $4, $3, $4 or $1, $4, $1 ') prepositioned-mask approach written as cpp macros #define EXTRACT_WFM( pack, shift, mask, field ) \ (field) = ((pack) & (mask)) >> (shift) #define INSERT_WFM( pack, shift, mask, field ) \ (pack) = ((pack) & ~(mask)) | (((field) << (shift)) & (mask)) Example conversion program segment convert ASCII decimal digits to binary number scan of digits is left-to-right, that is, from most significant digit to least => use Horner's rule for calculating an n-term polynomial a_(n-1) * x^(n-1) + a_(n-2) * x^(n-2) + ... + a_0 = [ a_(n-1) * x + a_(n-2) ] * x + ... + a_0 or as might appear in a program value = 0; for( i=0; i> value = value + a; } that is, initially: value = 0; after iteration 0: value = a_(n-1) after iteration 1: value = a_(n-1) * x + a_(n-2) after iteration 2: value = [ a_(n-1) * x + a_(n-2) ] * x + a_(n-3) ... For decimal, you multiply by 10 value = 0; for( i=0; i> value = value + a; } (or, better yet, use shifts and adds) value = 10*value; becomes value = (value << 3) + (value << 1); ! assemble four ASCII characters (decimal digits) into a register, and ! then convert it into a binary number ! ref: exercise 3-1 on page 96 of text .global main main: save %sp, -64, %sp ! register assignments ! ! %o0 ASCII digit string ! %o1 single ASCII character ! ! algorithm ! ! string = 0; ! for( i=0; i<4; i++){ ! c = get_character(); ! string = (string<<8) | c; ! } part1: clr %o0 ! step 0 mov 0x31, %o1 ! '1' sll %o0, 8, %o0 ! or %o0, %o1, %o0 ! %o0 = 0x00000031 ! step 1 mov 0x33, %o1 ! '3' sll %o0, 8, %o0 ! or %o0, %o1, %o0 ! %o0 = 0x00003133 ! step 2 mov 0x36, %o1 ! '6' sll %o0, 8, %o0 ! or %o0, %o1, %o0 ! %o0 = 0x00313336 ! step 3 mov 0x35, %o1 ! '5' sll %o0, 8, %o0 ! or %o0, %o1, %o0 ! %o0 = 0x31333635 ! register assignments ! ! %o0 ASCII digit string ! %o1 sum = binary number ! %o2 ASCII digit ! %o3 8*sum ! %o4 2*sum ! ! algorithm ! ! sum = 0; ! for( i=0; i<4; i++ ){ ! sum = 10 * sum; ! d = (string>>24) & 0xf; ! sum = sum + d; ! string = string<<8; ! } ! ! the following code is unrolled, but a loop would work just as well part2: clr %o1 ! step 0 sll %o1, 3, %o3 ! 8 * sum sll %o1, 1, %o4 ! 2 * sum add %o3, %o4, %o1 ! sum = (8 + 2) * sum = 10 * sum mov %o0, %o2 ! digit is high 8 bits in %o0 srl %o2, 24, %o2 ! shift to low 8 bits in %o2 and %o2, 0xf, %o2 ! convert to binary value for digit add %o1, %o2, %o1 ! add to sum sll %o0, 8, %o0 ! shift to get next ASCII digit ! step 1 sll %o1, 3, %o3 ! 8 * sum sll %o1, 1, %o4 ! 2 * sum add %o3, %o4, %o1 ! sum = (8 + 2) * sum = 10 * sum mov %o0, %o2 ! digit is high 8 bits in %o0 srl %o2, 24, %o2 ! shift to low 8 bits in %o2 and %o2, 0xf, %o2 ! convert to binary value for digit add %o1, %o2, %o1 ! add to sum sll %o0, 8, %o0 ! shift to get next ASCII digit ! step 2 sll %o1, 3, %o3 ! 8 * sum sll %o1, 1, %o4 ! 2 * sum add %o3, %o4, %o1 ! sum = (8 + 2) * sum = 10 * sum mov %o0, %o2 ! digit is high 8 bits in %o0 srl %o2, 24, %o2 ! shift to low 8 bits in %o2 and %o2, 0xf, %o2 ! convert to binary value for digit add %o1, %o2, %o1 ! add to sum sll %o0, 8, %o0 ! shift to get next ASCII digit ! step 3 sll %o1, 3, %o3 ! 8 * sum sll %o1, 1, %o4 ! 2 * sum add %o3, %o4, %o1 ! sum = (8 + 2) * sum = 10 * sum mov %o0, %o2 ! digit is high 8 bits in %o0 srl %o2, 24, %o2 ! shift to low 8 bits in %o2 and %o2, 0xf, %o2 ! convert to binary value for digit add %o1, %o2, %o1 ! add to sum sll %o0, 8, %o0 ! shift to get next ASCII digit done: mov 1, %g1 ta 0 Rotates rotate %o0 left by 1 bit addcc %o0, %o0, %o0 addx %o0, %g0, %o0 macro for rol(rs1,n,rd) .---------------. | +---+-----+ | `--| A | B |<-' rotate left by n define(rol,` +---+-----+ sll $1,$2,%o0 n 32-n srl $1,eval(32-$2),%o1 or %o0,%o1,$3 +-----+---+ final result is equiv. ') | B | A | to shifting A right by +-----+---+ 32-n and shifting B 32-n n left by n macro for ror(rs1,n,rd) .---------------. | +-----+---+ | `->| A | B |--' rotate right by n define(ror,` +-----+---+ srl $1,$2,%o0 32-n n sll $1,eval(32-$2),%o1 or %o0,%o1,$3 +---+-----+ final result is equiv. ') | B | A | to shifting A right by +---+-----+ n and shifting B left n 32-n by 32-n