00001 00002 /*============================================================================ 00003 00004 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 00005 Arithmetic Package, Release 2b. 00006 00007 Written by John R. Hauser. This work was made possible in part by the 00008 International Computer Science Institute, located at Suite 600, 1947 Center 00009 Street, Berkeley, California 94704. Funding was partially provided by the 00010 National Science Foundation under grant MIP-9311980. The original version 00011 of this code was written as part of a project to build a fixed-point vector 00012 processor in collaboration with the University of California at Berkeley, 00013 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 00014 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ 00015 arithmetic/SoftFloat.html'. 00016 00017 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has 00018 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES 00019 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS 00020 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, 00021 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE 00022 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE 00023 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR 00024 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. 00025 00026 Derivative works are acceptable, even for commercial purposes, so long as 00027 (1) the source code for the derivative work includes prominent notice that 00028 the work is derivative, and (2) the source code includes prominent notice with 00029 these four paragraphs for those parts of this code that are retained. 00030 00031 =============================================================================*/ 00032 00033 /*---------------------------------------------------------------------------- 00034 | Shifts `a' right by the number of bits given in `count'. If any nonzero 00035 | bits are shifted off, they are ``jammed'' into the least significant bit of 00036 | the result by setting the least significant bit to 1. The value of `count' 00037 | can be arbitrarily large; in particular, if `count' is greater than 32, the 00038 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 00039 | The result is stored in the location pointed to by `zPtr'. 00040 *----------------------------------------------------------------------------*/ 00041 00042 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) 00043 { 00044 bits32 z; 00045 00046 if ( count == 0 ) { 00047 z = a; 00048 } 00049 else if ( count < 32 ) { 00050 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); 00051 } 00052 else { 00053 z = ( a != 0 ); 00054 } 00055 *zPtr = z; 00056 00057 } 00058 00059 /*---------------------------------------------------------------------------- 00060 | Shifts `a' right by the number of bits given in `count'. If any nonzero 00061 | bits are shifted off, they are ``jammed'' into the least significant bit of 00062 | the result by setting the least significant bit to 1. The value of `count' 00063 | can be arbitrarily large; in particular, if `count' is greater than 64, the 00064 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 00065 | The result is stored in the location pointed to by `zPtr'. 00066 *----------------------------------------------------------------------------*/ 00067 00068 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) 00069 { 00070 bits64 z; 00071 00072 if ( count == 0 ) { 00073 z = a; 00074 } 00075 else if ( count < 64 ) { 00076 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); 00077 } 00078 else { 00079 z = ( a != 0 ); 00080 } 00081 *zPtr = z; 00082 00083 } 00084 00085 /*---------------------------------------------------------------------------- 00086 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 00087 | _plus_ the number of bits given in `count'. The shifted result is at most 00088 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The 00089 | bits shifted off form a second 64-bit result as follows: The _last_ bit 00090 | shifted off is the most-significant bit of the extra result, and the other 00091 | 63 bits of the extra result are all zero if and only if _all_but_the_last_ 00092 | bits shifted off were all zero. This extra result is stored in the location 00093 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. 00094 | (This routine makes more sense if `a0' and `a1' are considered to form 00095 | a fixed-point value with binary point between `a0' and `a1'. This fixed- 00096 | point value is shifted right by the number of bits given in `count', and 00097 | the integer part of the result is returned at the location pointed to by 00098 | `z0Ptr'. The fractional part of the result may be slightly corrupted as 00099 | described above, and is returned at the location pointed to by `z1Ptr'.) 00100 *----------------------------------------------------------------------------*/ 00101 00102 INLINE void 00103 shift64ExtraRightJamming( 00104 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 00105 { 00106 bits64 z0, z1; 00107 int8 negCount = ( - count ) & 63; 00108 00109 if ( count == 0 ) { 00110 z1 = a1; 00111 z0 = a0; 00112 } 00113 else if ( count < 64 ) { 00114 z1 = ( a0<<negCount ) | ( a1 != 0 ); 00115 z0 = a0>>count; 00116 } 00117 else { 00118 if ( count == 64 ) { 00119 z1 = a0 | ( a1 != 0 ); 00120 } 00121 else { 00122 z1 = ( ( a0 | a1 ) != 0 ); 00123 } 00124 z0 = 0; 00125 } 00126 *z1Ptr = z1; 00127 *z0Ptr = z0; 00128 00129 } 00130 00131 /*---------------------------------------------------------------------------- 00132 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 00133 | number of bits given in `count'. Any bits shifted off are lost. The value 00134 | of `count' can be arbitrarily large; in particular, if `count' is greater 00135 | than 128, the result will be 0. The result is broken into two 64-bit pieces 00136 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 00137 *----------------------------------------------------------------------------*/ 00138 00139 INLINE void 00140 shift128Right( 00141 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 00142 { 00143 bits64 z0, z1; 00144 int8 negCount = ( - count ) & 63; 00145 00146 if ( count == 0 ) { 00147 z1 = a1; 00148 z0 = a0; 00149 } 00150 else if ( count < 64 ) { 00151 z1 = ( a0<<negCount ) | ( a1>>count ); 00152 z0 = a0>>count; 00153 } 00154 else { 00155 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; 00156 z0 = 0; 00157 } 00158 *z1Ptr = z1; 00159 *z0Ptr = z0; 00160 00161 } 00162 00163 /*---------------------------------------------------------------------------- 00164 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 00165 | number of bits given in `count'. If any nonzero bits are shifted off, they 00166 | are ``jammed'' into the least significant bit of the result by setting the 00167 | least significant bit to 1. The value of `count' can be arbitrarily large; 00168 | in particular, if `count' is greater than 128, the result will be either 00169 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or 00170 | nonzero. The result is broken into two 64-bit pieces which are stored at 00171 | the locations pointed to by `z0Ptr' and `z1Ptr'. 00172 *----------------------------------------------------------------------------*/ 00173 00174 INLINE void 00175 shift128RightJamming( 00176 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 00177 { 00178 bits64 z0, z1; 00179 int8 negCount = ( - count ) & 63; 00180 00181 if ( count == 0 ) { 00182 z1 = a1; 00183 z0 = a0; 00184 } 00185 else if ( count < 64 ) { 00186 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); 00187 z0 = a0>>count; 00188 } 00189 else { 00190 if ( count == 64 ) { 00191 z1 = a0 | ( a1 != 0 ); 00192 } 00193 else if ( count < 128 ) { 00194 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); 00195 } 00196 else { 00197 z1 = ( ( a0 | a1 ) != 0 ); 00198 } 00199 z0 = 0; 00200 } 00201 *z1Ptr = z1; 00202 *z0Ptr = z0; 00203 00204 } 00205 00206 /*---------------------------------------------------------------------------- 00207 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right 00208 | by 64 _plus_ the number of bits given in `count'. The shifted result is 00209 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are 00210 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 00211 | off form a third 64-bit result as follows: The _last_ bit shifted off is 00212 | the most-significant bit of the extra result, and the other 63 bits of the 00213 | extra result are all zero if and only if _all_but_the_last_ bits shifted off 00214 | were all zero. This extra result is stored in the location pointed to by 00215 | `z2Ptr'. The value of `count' can be arbitrarily large. 00216 | (This routine makes more sense if `a0', `a1', and `a2' are considered 00217 | to form a fixed-point value with binary point between `a1' and `a2'. This 00218 | fixed-point value is shifted right by the number of bits given in `count', 00219 | and the integer part of the result is returned at the locations pointed to 00220 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 00221 | corrupted as described above, and is returned at the location pointed to by 00222 | `z2Ptr'.) 00223 *----------------------------------------------------------------------------*/ 00224 00225 INLINE void 00226 shift128ExtraRightJamming( 00227 bits64 a0, 00228 bits64 a1, 00229 bits64 a2, 00230 int16 count, 00231 bits64 *z0Ptr, 00232 bits64 *z1Ptr, 00233 bits64 *z2Ptr 00234 ) 00235 { 00236 bits64 z0, z1, z2; 00237 int8 negCount = ( - count ) & 63; 00238 00239 if ( count == 0 ) { 00240 z2 = a2; 00241 z1 = a1; 00242 z0 = a0; 00243 } 00244 else { 00245 if ( count < 64 ) { 00246 z2 = a1<<negCount; 00247 z1 = ( a0<<negCount ) | ( a1>>count ); 00248 z0 = a0>>count; 00249 } 00250 else { 00251 if ( count == 64 ) { 00252 z2 = a1; 00253 z1 = a0; 00254 } 00255 else { 00256 a2 |= a1; 00257 if ( count < 128 ) { 00258 z2 = a0<<negCount; 00259 z1 = a0>>( count & 63 ); 00260 } 00261 else { 00262 z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); 00263 z1 = 0; 00264 } 00265 } 00266 z0 = 0; 00267 } 00268 z2 |= ( a2 != 0 ); 00269 } 00270 *z2Ptr = z2; 00271 *z1Ptr = z1; 00272 *z0Ptr = z0; 00273 00274 } 00275 00276 /*---------------------------------------------------------------------------- 00277 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the 00278 | number of bits given in `count'. Any bits shifted off are lost. The value 00279 | of `count' must be less than 64. The result is broken into two 64-bit 00280 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 00281 *----------------------------------------------------------------------------*/ 00282 00283 INLINE void 00284 shortShift128Left( 00285 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 00286 { 00287 00288 *z1Ptr = a1<<count; 00289 *z0Ptr = 00290 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); 00291 00292 } 00293 00294 /*---------------------------------------------------------------------------- 00295 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left 00296 | by the number of bits given in `count'. Any bits shifted off are lost. 00297 | The value of `count' must be less than 64. The result is broken into three 00298 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 00299 | `z1Ptr', and `z2Ptr'. 00300 *----------------------------------------------------------------------------*/ 00301 00302 INLINE void 00303 shortShift192Left( 00304 bits64 a0, 00305 bits64 a1, 00306 bits64 a2, 00307 int16 count, 00308 bits64 *z0Ptr, 00309 bits64 *z1Ptr, 00310 bits64 *z2Ptr 00311 ) 00312 { 00313 bits64 z0, z1, z2; 00314 int8 negCount; 00315 00316 z2 = a2<<count; 00317 z1 = a1<<count; 00318 z0 = a0<<count; 00319 if ( 0 < count ) { 00320 negCount = ( ( - count ) & 63 ); 00321 z1 |= a2>>negCount; 00322 z0 |= a1>>negCount; 00323 } 00324 *z2Ptr = z2; 00325 *z1Ptr = z1; 00326 *z0Ptr = z0; 00327 00328 } 00329 00330 /*---------------------------------------------------------------------------- 00331 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit 00332 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so 00333 | any carry out is lost. The result is broken into two 64-bit pieces which 00334 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 00335 *----------------------------------------------------------------------------*/ 00336 00337 INLINE void 00338 add128( 00339 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 00340 { 00341 bits64 z1; 00342 00343 z1 = a1 + b1; 00344 *z1Ptr = z1; 00345 *z0Ptr = a0 + b0 + ( z1 < a1 ); 00346 00347 } 00348 00349 /*---------------------------------------------------------------------------- 00350 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the 00351 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 00352 | modulo 2^192, so any carry out is lost. The result is broken into three 00353 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 00354 | `z1Ptr', and `z2Ptr'. 00355 *----------------------------------------------------------------------------*/ 00356 00357 INLINE void 00358 add192( 00359 bits64 a0, 00360 bits64 a1, 00361 bits64 a2, 00362 bits64 b0, 00363 bits64 b1, 00364 bits64 b2, 00365 bits64 *z0Ptr, 00366 bits64 *z1Ptr, 00367 bits64 *z2Ptr 00368 ) 00369 { 00370 bits64 z0, z1, z2; 00371 int8 carry0, carry1; 00372 00373 z2 = a2 + b2; 00374 carry1 = ( z2 < a2 ); 00375 z1 = a1 + b1; 00376 carry0 = ( z1 < a1 ); 00377 z0 = a0 + b0; 00378 z1 += carry1; 00379 z0 += ( z1 < carry1 ); 00380 z0 += carry0; 00381 *z2Ptr = z2; 00382 *z1Ptr = z1; 00383 *z0Ptr = z0; 00384 00385 } 00386 00387 /*---------------------------------------------------------------------------- 00388 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the 00389 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 00390 | 2^128, so any borrow out (carry out) is lost. The result is broken into two 00391 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and 00392 | `z1Ptr'. 00393 *----------------------------------------------------------------------------*/ 00394 00395 INLINE void 00396 sub128( 00397 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 00398 { 00399 00400 *z1Ptr = a1 - b1; 00401 *z0Ptr = a0 - b0 - ( a1 < b1 ); 00402 00403 } 00404 00405 /*---------------------------------------------------------------------------- 00406 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' 00407 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'. 00408 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The 00409 | result is broken into three 64-bit pieces which are stored at the locations 00410 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. 00411 *----------------------------------------------------------------------------*/ 00412 00413 INLINE void 00414 sub192( 00415 bits64 a0, 00416 bits64 a1, 00417 bits64 a2, 00418 bits64 b0, 00419 bits64 b1, 00420 bits64 b2, 00421 bits64 *z0Ptr, 00422 bits64 *z1Ptr, 00423 bits64 *z2Ptr 00424 ) 00425 { 00426 bits64 z0, z1, z2; 00427 int8 borrow0, borrow1; 00428 00429 z2 = a2 - b2; 00430 borrow1 = ( a2 < b2 ); 00431 z1 = a1 - b1; 00432 borrow0 = ( a1 < b1 ); 00433 z0 = a0 - b0; 00434 z0 -= ( z1 < borrow1 ); 00435 z1 -= borrow1; 00436 z0 -= borrow0; 00437 *z2Ptr = z2; 00438 *z1Ptr = z1; 00439 *z0Ptr = z0; 00440 00441 } 00442 00443 /*---------------------------------------------------------------------------- 00444 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken 00445 | into two 64-bit pieces which are stored at the locations pointed to by 00446 | `z0Ptr' and `z1Ptr'. 00447 *----------------------------------------------------------------------------*/ 00448 00449 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) 00450 { 00451 bits32 aHigh, aLow, bHigh, bLow; 00452 bits64 z0, zMiddleA, zMiddleB, z1; 00453 00454 aLow = a; 00455 aHigh = a>>32; 00456 bLow = b; 00457 bHigh = b>>32; 00458 z1 = ( (bits64) aLow ) * bLow; 00459 zMiddleA = ( (bits64) aLow ) * bHigh; 00460 zMiddleB = ( (bits64) aHigh ) * bLow; 00461 z0 = ( (bits64) aHigh ) * bHigh; 00462 zMiddleA += zMiddleB; 00463 z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); 00464 zMiddleA <<= 32; 00465 z1 += zMiddleA; 00466 z0 += ( z1 < zMiddleA ); 00467 *z1Ptr = z1; 00468 *z0Ptr = z0; 00469 00470 } 00471 00472 /*---------------------------------------------------------------------------- 00473 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by 00474 | `b' to obtain a 192-bit product. The product is broken into three 64-bit 00475 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and 00476 | `z2Ptr'. 00477 *----------------------------------------------------------------------------*/ 00478 00479 INLINE void 00480 mul128By64To192( 00481 bits64 a0, 00482 bits64 a1, 00483 bits64 b, 00484 bits64 *z0Ptr, 00485 bits64 *z1Ptr, 00486 bits64 *z2Ptr 00487 ) 00488 { 00489 bits64 z0, z1, z2, more1; 00490 00491 mul64To128( a1, b, &z1, &z2 ); 00492 mul64To128( a0, b, &z0, &more1 ); 00493 add128( z0, more1, 0, z1, &z0, &z1 ); 00494 *z2Ptr = z2; 00495 *z1Ptr = z1; 00496 *z0Ptr = z0; 00497 00498 } 00499 00500 /*---------------------------------------------------------------------------- 00501 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the 00502 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit 00503 | product. The product is broken into four 64-bit pieces which are stored at 00504 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 00505 *----------------------------------------------------------------------------*/ 00506 00507 INLINE void 00508 mul128To256( 00509 bits64 a0, 00510 bits64 a1, 00511 bits64 b0, 00512 bits64 b1, 00513 bits64 *z0Ptr, 00514 bits64 *z1Ptr, 00515 bits64 *z2Ptr, 00516 bits64 *z3Ptr 00517 ) 00518 { 00519 bits64 z0, z1, z2, z3; 00520 bits64 more1, more2; 00521 00522 mul64To128( a1, b1, &z2, &z3 ); 00523 mul64To128( a1, b0, &z1, &more2 ); 00524 add128( z1, more2, 0, z2, &z1, &z2 ); 00525 mul64To128( a0, b0, &z0, &more1 ); 00526 add128( z0, more1, 0, z1, &z0, &z1 ); 00527 mul64To128( a0, b1, &more1, &more2 ); 00528 add128( more1, more2, 0, z2, &more1, &z2 ); 00529 add128( z0, z1, 0, more1, &z0, &z1 ); 00530 *z3Ptr = z3; 00531 *z2Ptr = z2; 00532 *z1Ptr = z1; 00533 *z0Ptr = z0; 00534 00535 } 00536 00537 /*---------------------------------------------------------------------------- 00538 | Returns an approximation to the 64-bit integer quotient obtained by dividing 00539 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The 00540 | divisor `b' must be at least 2^63. If q is the exact quotient truncated 00541 | toward zero, the approximation returned lies between q and q + 2 inclusive. 00542 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit 00543 | unsigned integer is returned. 00544 *----------------------------------------------------------------------------*/ 00545 00546 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) 00547 { 00548 bits64 b0, b1; 00549 bits64 rem0, rem1, term0, term1; 00550 bits64 z; 00551 00552 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); 00553 b0 = b>>32; 00554 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; 00555 mul64To128( b, z, &term0, &term1 ); 00556 sub128( a0, a1, term0, term1, &rem0, &rem1 ); 00557 while ( ( (sbits64) rem0 ) < 0 ) { 00558 z -= LIT64( 0x100000000 ); 00559 b1 = b<<32; 00560 add128( rem0, rem1, b0, b1, &rem0, &rem1 ); 00561 } 00562 rem0 = ( rem0<<32 ) | ( rem1>>32 ); 00563 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; 00564 return z; 00565 00566 } 00567 00568 /*---------------------------------------------------------------------------- 00569 | Returns an approximation to the square root of the 32-bit significand given 00570 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 00571 | `aExp' (the least significant bit) is 1, the integer returned approximates 00572 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 00573 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 00574 | case, the approximation returned lies strictly within +/-2 of the exact 00575 | value. 00576 *----------------------------------------------------------------------------*/ 00577 00578 static bits32 estimateSqrt32( int16 aExp, bits32 a ) 00579 { 00580 static const bits16 sqrtOddAdjustments[] = { 00581 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 00582 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 00583 }; 00584 static const bits16 sqrtEvenAdjustments[] = { 00585 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 00586 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 00587 }; 00588 int8 index; 00589 bits32 z; 00590 00591 index = ( a>>27 ) & 15; 00592 if ( aExp & 1 ) { 00593 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; 00594 z = ( ( a / z )<<14 ) + ( z<<15 ); 00595 a >>= 1; 00596 } 00597 else { 00598 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; 00599 z = a / z + z; 00600 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); 00601 if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); 00602 } 00603 return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); 00604 00605 } 00606 00607 /*---------------------------------------------------------------------------- 00608 | Returns the number of leading 0 bits before the most-significant 1 bit of 00609 | `a'. If `a' is zero, 32 is returned. 00610 *----------------------------------------------------------------------------*/ 00611 00612 static int8 countLeadingZeros32( bits32 a ) 00613 { 00614 static const int8 countLeadingZerosHigh[] = { 00615 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 00616 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00617 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00618 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00619 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00620 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00621 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00622 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00623 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00624 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00625 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00626 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 00631 }; 00632 int8 shiftCount; 00633 00634 shiftCount = 0; 00635 if ( a < 0x10000 ) { 00636 shiftCount += 16; 00637 a <<= 16; 00638 } 00639 if ( a < 0x1000000 ) { 00640 shiftCount += 8; 00641 a <<= 8; 00642 } 00643 shiftCount += countLeadingZerosHigh[ a>>24 ]; 00644 return shiftCount; 00645 00646 } 00647 00648 /*---------------------------------------------------------------------------- 00649 | Returns the number of leading 0 bits before the most-significant 1 bit of 00650 | `a'. If `a' is zero, 64 is returned. 00651 *----------------------------------------------------------------------------*/ 00652 00653 static int8 countLeadingZeros64( bits64 a ) 00654 { 00655 int8 shiftCount; 00656 00657 shiftCount = 0; 00658 if ( a < ( (bits64) 1 )<<32 ) { 00659 shiftCount += 32; 00660 } 00661 else { 00662 a >>= 32; 00663 } 00664 shiftCount += countLeadingZeros32( a ); 00665 return shiftCount; 00666 00667 } 00668 00669 /*---------------------------------------------------------------------------- 00670 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' 00671 | is equal to the 128-bit value formed by concatenating `b0' and `b1'. 00672 | Otherwise, returns 0. 00673 *----------------------------------------------------------------------------*/ 00674 00675 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 00676 { 00677 00678 return ( a0 == b0 ) && ( a1 == b1 ); 00679 00680 } 00681 00682 /*---------------------------------------------------------------------------- 00683 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 00684 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'. 00685 | Otherwise, returns 0. 00686 *----------------------------------------------------------------------------*/ 00687 00688 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 00689 { 00690 00691 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); 00692 00693 } 00694 00695 /*---------------------------------------------------------------------------- 00696 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 00697 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, 00698 | returns 0. 00699 *----------------------------------------------------------------------------*/ 00700 00701 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 00702 { 00703 00704 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); 00705 00706 } 00707 00708 /*---------------------------------------------------------------------------- 00709 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is 00710 | not equal to the 128-bit value formed by concatenating `b0' and `b1'. 00711 | Otherwise, returns 0. 00712 *----------------------------------------------------------------------------*/ 00713 00714 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 00715 { 00716 00717 return ( a0 != b0 ) || ( a1 != b1 ); 00718 00719 }