Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / sam / cpus / vonk / ss / lib / cpu / src / SS_Fpu.h
CommitLineData
920dae64
AT
1/*
2* ========== Copyright Header Begin ==========================================
3*
4* OpenSPARC T2 Processor File: SS_Fpu.h
5* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
6* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
7*
8* The above named program is free software; you can redistribute it and/or
9* modify it under the terms of the GNU General Public
10* License version 2 as published by the Free Software Foundation.
11*
12* The above named program is distributed in the hope that it will be
13* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
14* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15* General Public License for more details.
16*
17* You should have received a copy of the GNU General Public
18* License along with this work; if not, write to the Free Software
19* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
20*
21* ========== Copyright Header End ============================================
22*/
23
24#ifndef __SS_Fpu_h__
25#define __SS_Fpu_h__
26
27#include "SS_Types.h"
28#include "SS_Strand.h"
29
30class SS_Fpu
31{
32 public:
33 enum Tininess // Global configuration for rounding
34 {
35 TINY_AFTER_ROUNDING = 0,
36 TINY_BEFORE_ROUNDING = 1
37 };
38 enum Rounding // Field value of fsr.rd and gsr.irnd
39 {
40 ROUND_NEAREST = 0,
41 ROUND_TO_ZERO = 1,
42 ROUND_UP = 2,
43 ROUND_DOWN = 3
44 };
45 enum Exception // Fields in fsr.cexc, fsr.aexc, fsr.tem
46 {
47 EXC_NONE = 0,
48 EXC_INEXACT = 1, // nx
49 EXC_DIVBYZERO = 2, // dz
50 EXC_UNDERFLOW = 4, // uf
51 EXC_OVERFLOW = 8, // of
52 EXC_INVALID = 16 // nv
53 };
54 enum FloatTrapType // Field value of fsr.ftt
55 {
56 FTT_NOTRAP = 0,
57 FTT_IEEE_754_EXCEPTION = 1,
58 FTT_UNFINISHED_FPOP = 2,
59 FTT_UNIMPLEMENTED_FPOP = 3, // Reserved, used in V9
60 FTT_SEQUENCE_ERROR = 4, // Reserved, used in V9
61 FTT_HARDWARE_ERROR = 5, // Reserved, used in V9
62 FTT_INVALID_FP_REGISTER = 6, // Only used in quad pricision, e.g. never
63 FTT_RESERVED = 7 // Reserved
64 };
65
66 friend Exception operator|( Exception a, Exception b ) { return Exception(int(a)|int(b)); }
67 friend Exception operator&( Exception a, Exception b ) { return Exception(int(a)&int(b)); }
68
69 enum ConditionCode // Field value of fsr.fcc0, fsr.fcc1, fsr.fcc2, fsr.fcc3
70 {
71 EQ = 0,
72 LT = 1,
73 GT = 2,
74 UN = 3
75 };
76 enum ConditionField // The condition fields in fsr
77 {
78 FCC0 = 0,
79 FCC1 = 1,
80 FCC2 = 2,
81 FCC3 = 3
82 };
83
84 SS_Fpu();
85
86 void set_fcc( SS_Fsr& fsr, ConditionField cr, ConditionCode cc )
87 {
88 switch (cr)
89 {
90 case FCC0 : fsr.fcc0(cc); break;
91 case FCC1 : fsr.fcc1(cc); break;
92 case FCC2 : fsr.fcc2(cc); break;
93 case FCC3 : fsr.fcc3(cc); break;
94 default: assert(0);
95 }
96 }
97
98 SS_Vaddr exe_end( SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* i, FloatTrapType ftt, int exc )
99 {
100 switch (ftt)
101 {
102 case FTT_UNFINISHED_FPOP:
103 s->fsr.ftt(FTT_UNFINISHED_FPOP);
104 s->set_fsr();
105 return (s->trap)(pc,npc,s,i,SS_Trap::FP_EXCEPTION_OTHER);
106
107 case FTT_IEEE_754_EXCEPTION:
108 s->fsr.cexc(exc).ftt(ftt);
109 s->set_fsr();
110 return (s->trap)(pc,npc,s,i,SS_Trap::FP_EXCEPTION_IEEE_754);
111
112 case FTT_NOTRAP:
113 s->fsr.aexc(s->fsr.aexc()|exc).cexc(exc).ftt(FTT_NOTRAP);
114 s->set_fsr();
115 s->npc = npc + 4;
116 return npc;
117
118 default:
119 assert(0);
120 return -1;
121 }
122 }
123
124 // Conversion methods
125
126 uint32_t int32_to_float32( int32_t );
127 uint64_t int32_to_float64( int32_t );
128
129 uint32_t int64_to_float32( int64_t );
130 uint64_t int64_to_float64( int64_t );
131
132 int32_t float32_to_int32( uint32_t );
133 int64_t float32_to_int64( uint32_t );
134 uint64_t float32_to_float64( uint32_t );
135
136 int32_t float64_to_int32( uint64_t );
137 int64_t float64_to_int64( uint64_t );
138 uint32_t float64_to_float32( uint64_t );
139
140 // Computation methods
141
142 uint32_t float32_add( uint32_t, uint32_t, int, int half=0 );
143 uint32_t float32_sub( uint32_t, uint32_t, int, int half=0 );
144 uint32_t float32_mul( uint32_t, uint32_t, int );
145 uint32_t float32_div( uint32_t, uint32_t );
146 uint32_t float32_sqrt( uint32_t );
147 uint32_t float32_rsqrt( uint32_t );
148 uint32_t float32_madd( uint32_t, uint32_t, uint32_t, int, int);
149
150 uint64_t float64_add( uint64_t, uint64_t, int, int half=0 );
151 uint64_t float64_sub( uint64_t, uint64_t, int, int half=0 );
152 uint64_t float64_mul( uint64_t, uint64_t, int );
153 uint64_t float64_div( uint64_t, uint64_t );
154 uint64_t float64_sqrt( uint64_t );
155 uint64_t float64_rsqrt( uint64_t );
156 uint64_t float64_madd( uint64_t, uint64_t, uint64_t, int, int );
157
158 // Comparisons
159
160 int float32_eq( uint32_t, uint32_t );
161 int float32_lt( uint32_t, uint32_t );
162 int float32_eq_signaling( uint32_t, uint32_t );
163 int float32_lt_quiet( uint32_t, uint32_t );
164
165 int float64_eq( uint64_t, uint64_t );
166 int float64_lt( uint64_t, uint64_t );
167 int float64_eq_signaling( uint64_t, uint64_t );
168 int float64_lt_quiet( uint64_t, uint64_t );
169
170 // Parameters and results
171
172 Rounding float_rounding_mode;
173 int float_exception_flags;
174 int float_partial_exception_flags;
175 int float_round_needed;
176 int float_unfinished_op;
177 Tininess float_detect_tininess;
178
179 // to_sgn() takes the sign from the number are return it in bit 0
180 // to_exp() takes the exponent and returns it in the lower bits
181 // to_mnt() takes the mantissa and returns it
182
183 uint32_t to_sgn( uint32_t f ) { return f >> 31; }
184 uint32_t to_exp( uint32_t f ) { return (f << 1) >> 24; }
185 uint32_t to_mnt( uint32_t f ) { return (f << 9) >> 9; }
186
187 uint64_t to_sgn( uint64_t f ) { return f >> 63; }
188 uint64_t to_exp( uint64_t f ) { return (f << 1) >> 53; }
189 uint64_t to_mnt( uint64_t f ) { return (f << 12) >> 12; }
190
191 uint32_t to_num( uint32_t s, uint32_t e, uint32_t m ) { return (s << 31) + (e << 23) + m; }
192 uint64_t to_num( uint64_t s, uint64_t e, uint64_t m ) { return (s << 63) + (e << 52) + m; }
193
194 uint32_t to_nil( uint32_t f ) { return (f >> 31) << 31; }
195 uint64_t to_nil( uint64_t f ) { return (f >> 63) << 63; }
196
197 uint32_t to_abs( uint32_t f ) { return (f << 1) >> 1; }
198 uint64_t to_abs( uint64_t f ) { return (f << 1) >> 1; }
199
200 uint32_t to_neg( uint32_t f ) { return f ^ (uint32_t(1) << 31); }
201 uint64_t to_neg( uint64_t f ) { return f ^ (uint64_t(1) << 63); }
202
203 uint32_t to_nans( uint64_t f )
204 {
205 uint32_t s = to_sgn(f);
206 uint32_t e = 0x0ff;
207 uint64_t m = to_mnt(f);
208 return to_num(s,e,uint32_t(m >> (52 - 23)));
209 }
210 uint64_t to_nand( uint32_t f )
211 {
212 uint64_t s = to_sgn(f);
213 uint64_t e = 0x7ff;
214 uint64_t m = to_mnt(f);
215 return to_num(s,e,m << (52 - 23));
216 }
217
218 uint32_t to_qnan( uint32_t f ) { return f | (1 << 22); }
219 uint32_t to_snan( uint32_t f ) { return f &~(1 << 22); }
220 uint64_t to_qnan( uint64_t f ) { return f | (uint64_t(1) << 51); }
221 uint64_t to_snan( uint64_t f ) { return f &~(uint64_t(1) << 51); }
222
223 // s11111111qmmmmmmmmmmmmmmmmmmmmmm nan q=1 ? qnan : snan
224 // s1111111100000000000000000000000 inf
225 // seeeeeeeemmmmmmmmmmmmmmmmmmmmmmm num 0 < e < 0xff
226 // s00000000mmmmmmmmmmmmmmmmmmmmmmm sub
227 // s0000000000000000000000000000000 nil
228
229 int is_nan_or_inf( uint32_t f ) { uint32_t em = f << 1; return (em >> 24) == 0xff; }
230 int is_sub_or_nil( uint32_t f ) { uint32_t em = f << 1; return (em >> 24) == 0x00; }
231
232 int is_nan_or_inf( uint64_t f ) { uint64_t em = f << 1; return (em >> 53) == 0x7ff; }
233 int is_sub_or_nil( uint64_t f ) { uint64_t em = f << 1; return (em >> 53) == 0x000; }
234
235 // is_nil() tests for zero
236 // is_sub() tests for subnormals
237 // is_inf() tests for infinite
238 // is_nan() tests for nan (not-a-number) and
239 // is_qnan() tests for quite-nan
240 // is_snan() tests for ignalling-nan.
241
242 int is_nil( uint32_t f ) { return (f << 1) == 0; }
243 int is_sub( uint32_t f ) { uint32_t em = f << 1; return ((em >> 24) == 0x00) && ((em << 8) != 0); }
244 int is_inf( uint32_t f ) { uint32_t em = f << 1; return ((em >> 24) == 0xff) && ((em << 8) == 0); }
245 int is_nan( uint32_t f ) { uint32_t em = f << 1; return ((em >> 24) == 0xff) && ((em << 8) != 0); }
246 int is_qnan( uint32_t f ) { uint32_t em = f << 1; return ((em >> 23) == 0x1ff) && ((em << 8) != 0); }
247 int is_snan( uint32_t f ) { uint32_t em = f << 1; return ((em >> 23) == 0x1fe) && ((em << 8) != 0); }
248
249 int is_nil( uint64_t f ) { return (f << 1) == 0; }
250 int is_sub( uint64_t f ) { uint64_t em = f << 1; return ((em >> 53) == 0x000) && ((em << 11) != 0); }
251 int is_inf( uint64_t f ) { uint64_t em = f << 1; return ((em >> 53) == 0x7ff) && ((em << 11) == 0); }
252 int is_nan( uint64_t f ) { uint64_t em = f << 1; return ((em >> 53) == 0x7ff) && ((em << 11) != 0); }
253 int is_qnan( uint64_t f ) { uint64_t em = f << 1; return ((em >> 52) == 0xfff) && ((em << 11) != 0); }
254 int is_snan( uint64_t f ) { uint64_t em = f << 1; return ((em >> 52) == 0xffe) && ((em << 11) != 0); }
255
256
257 protected:
258 void float_raise( Exception flags ) { float_exception_flags = float_exception_flags | flags; }
259
260 int32_t roundAndPackInt32( int zSign, uint64_t absZ );
261 int64_t roundAndPackInt64( int zSign, uint64_t absZ0, uint64_t absZ1 );
262 uint32_t roundAndPackFloat32( int zSign, int16_t zExp, uint32_t zSig );
263 uint64_t roundAndPackFloat64( int zSign, int16_t zExp, uint64_t zSig );
264 uint32_t normalizeRoundAndPackFloat32( int zSign, int16_t zExp, uint32_t zSig );
265 uint64_t normalizeRoundAndPackFloat64( int zSign, int16_t zExp, uint64_t zSig );
266 void normalizeFloat32Subnormal( uint32_t aSig, int16_t *zExpPtr, uint32_t *zSigPtr );
267 void normalizeFloat64Subnormal( uint64_t aSig, int16_t *zExpPtr, uint64_t *zSigPtr );
268
269 uint32_t addFloat32Sigs( uint32_t a, uint32_t b, int zSign, int half );
270 uint32_t subFloat32Sigs( uint32_t a, uint32_t b, int zSign, int half );
271 uint64_t addFloat64Sigs( uint64_t a, uint64_t b, int zSign, int half );
272 uint64_t subFloat64Sigs( uint64_t a, uint64_t b, int zSign, int half );
273
274 static const uint32_t QNAN_32 = 0x7FFFFFFF;
275 static const uint64_t QNAN_64 = 0x7FFFFFFFFFFFFFFF;
276#if 0
277
278 // is_nil() returns true when both f and g are zero ignoring the sign.
279 // Note that +0.0 and -0.0 compare equal. This is a quick test for that.
280
281 static int is_nil( uint32_t f, uint32_t g ) { return ((f | g) << 1) == 0; }
282 static int is_nil( uint64_t f, uint64_t g ) { return ((f | g) << 1) == 0; }
283
284 // abs() and neg() do the obvious thing to the floating point argument
285
286 static CondCode cmp( uint32_t f, uint32_t g )
287 {
288 if ((f == g) || is_nil(f,g))
289 return EQ;
290 else
291 return (f < g) ? (sgn(g) ? GT : LT) : (sgn(f) ? LT : GT);
292 }
293
294 static CondCode cmp( uint64_t f, uint64_t g )
295 {
296 if ((f == g) || is_nil(f,g))
297 return EQ;
298 else
299 return (f < g) ? (sgn(g) ? GT : LT) : (sgn(f) ? LT : GT);
300 }
301#endif
302
303
304 // Shifts `a' right by the number of bits given in `count'. If any nonzero
305 // bits are shifted off, they are ``jammed'' into the least significant bit of
306 // the result by setting the least significant bit to 1. The value of `count'
307 // can be arbitrarily large; in particular, if `count' is greater than 32, the
308 // result will be either 0 or 1, depending on whether `a' is zero or nonzero.
309 // The result is stored in the location pointed to by `zPtr'.
310
311 void shift32RightJamming( uint32_t a, int16_t count, uint32_t *z )
312 {
313 if ( count == 0 )
314 *z = a;
315 else if ( count < 32 )
316 *z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
317 else
318 *z = ( a != 0 );
319 }
320
321 // Shifts `a' right by the number of bits given in `count'. If any nonzero
322 // bits are shifted off, they are ``jammed'' into the least significant bit of
323 // the result by setting the least significant bit to 1. The value of `count'
324 // can be arbitrarily large; in particular, if `count' is greater than 64, the
325 // result will be either 0 or 1, depending on whether `a' is zero or nonzero.
326 // The result is stored in the location pointed to by `zPtr'.
327
328 void shift64RightJamming( uint64_t a, int16_t count, uint64_t *z )
329 {
330 if ( count == 0 )
331 *z = a;
332 else if ( count < 64 )
333 *z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
334 else
335 *z = ( a != 0 );
336 }
337
338 // Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
339 // _plus_ the number of bits given in `count'. The shifted result is at most
340 // 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
341 // bits shifted off form a second 64-bit result as follows: The _last_ bit
342 // shifted off is the most-significant bit of the extra result, and the other
343 // 63 bits of the extra result are all zero if and only if _all_but_the_last_
344 // bits shifted off were all zero. This extra result is stored in the location
345 // pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
346 // (This routine makes more sense if `a0' and `a1' are considered to form a
347 // fixed-point value with binary point between `a0' and `a1'. This fixed-point
348 // value is shifted right by the number of bits given in `count', and the
349 // integer part of the result is returned at the location pointed to by
350 // `z0Ptr'. The fractional part of the result may be slightly corrupted as
351 // described above, and is returned at the location pointed to by `z1Ptr'.)
352
353 void shift64ExtraRightJamming( uint64_t a0, uint64_t a1, int16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr )
354 {
355 uint64_t z0, z1;
356 int8_t negCount = ( - count ) & 63;
357
358 if ( count == 0 )
359 {
360 z1 = a1;
361 z0 = a0;
362 }
363 else if ( count < 64 )
364 {
365 z1 = ( a0<<negCount ) | ( a1 != 0 );
366 z0 = a0>>count;
367 }
368 else
369 {
370 if ( count == 64 )
371 z1 = a0 | ( a1 != 0 );
372 else
373 z1 = ( ( a0 | a1 ) != 0 );
374 z0 = 0;
375 }
376 *z1Ptr = z1;
377 *z0Ptr = z0;
378 }
379
380 // Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
381 // number of bits given in `count'. If any nonzero bits are shifted off, they
382 // are ``jammed'' into the least significant bit of the result by setting the
383 // least significant bit to 1. The value of `count' can be arbitrarily large;
384 // in particular, if `count' is greater than 128, the result will be either
385 // 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
386 // nonzero. The result is broken into two 64-bit pieces which are stored at
387 // the locations pointed to by `z0Ptr' and `z1Ptr'.
388
389 void shift128RightJamming( uint64_t a0, uint64_t a1, int16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr )
390 {
391 uint64_t z0, z1;
392 int8_t negCount = ( - count ) & 63;
393
394 if ( count == 0 )
395 {
396 z1 = a1;
397 z0 = a0;
398 }
399 else if ( count < 64 )
400 {
401 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
402 z0 = a0>>count;
403 }
404 else
405 {
406 if ( count == 64 )
407 z1 = a0 | ( a1 != 0 );
408 else if ( count < 128 )
409 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
410 else
411 z1 = ( ( a0 | a1 ) != 0 );
412 z0 = 0;
413 }
414 *z1Ptr = z1;
415 *z0Ptr = z0;
416 }
417
418 // Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
419 // number of bits given in `count'. Any bits shifted off are lost. The value
420 // of `count' must be less than 64. The result is broken into two 64-bit
421 // pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
422
423 void shortShift128Left( uint64_t a0, uint64_t a1, int16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr )
424 {
425 *z1Ptr = a1<<count;
426 *z0Ptr = ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
427 }
428
429 // Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
430 // value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
431 // any carry out is lost. The result is broken into two 64-bit pieces which
432 // are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
433
434 void add128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
435 {
436 uint64_t z1;
437
438 z1 = a1 + b1;
439 *z1Ptr = z1;
440 *z0Ptr = a0 + b0 + ( z1 < a1 );
441 }
442
443 // Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
444 // 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
445 // 2^128, so any borrow out (carry out) is lost. The result is broken into two
446 // 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
447 // `z1Ptr'.
448
449 void sub128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
450 {
451 *z1Ptr = a1 - b1;
452 *z0Ptr = a0 - b0 - ( a1 < b1 );
453 }
454
455 // Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
456 // into two 64-bit pieces which are stored at the locations pointed to by
457 // `z0Ptr' and `z1Ptr'.
458
459 void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
460 {
461 uint32_t aHigh, aLow, bHigh, bLow;
462 uint64_t z0, zMiddleA, zMiddleB, z1;
463
464 aLow = a;
465 aHigh = a>>32;
466 bLow = b;
467 bHigh = b>>32;
468 z1 = ( (uint64_t) aLow ) * bLow;
469 zMiddleA = ( (uint64_t) aLow ) * bHigh;
470 zMiddleB = ( (uint64_t) aHigh ) * bLow;
471 z0 = ( (uint64_t) aHigh ) * bHigh;
472 zMiddleA += zMiddleB;
473 z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
474 zMiddleA <<= 32;
475 z1 += zMiddleA;
476 z0 += ( z1 < zMiddleA );
477 *z1Ptr = z1;
478 *z0Ptr = z0;
479 }
480
481 // Returns an approximation to the 64-bit integer quotient obtained by dividing
482 // `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
483 // divisor `b' must be at least 2^63. If q is the exact quotient truncated
484 // toward zero, the approximation returned lies between q and q + 2 inclusive.
485 // If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
486 // unsigned integer is returned.
487
488 uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
489 {
490 uint64_t b0, b1;
491 uint64_t rem0, rem1, term0, term1;
492 uint64_t z;
493
494 if ( b <= a0 ) return 0xFFFFFFFFFFFFFFFF ;
495 b0 = b>>32;
496 z = ( b0<<32 <= a0 ) ? 0xFFFFFFFF00000000 : ( a0 / b0 )<<32;
497 mul64To128( b, z, &term0, &term1 );
498 sub128( a0, a1, term0, term1, &rem0, &rem1 );
499 while ( ( (int64_t) rem0 ) < 0 ) {
500 z -= 0x100000000 ;
501 b1 = b<<32;
502 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
503 }
504 rem0 = ( rem0<<32 ) | ( rem1>>32 );
505 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
506 return z;
507 }
508
509 // Returns an approximation to the square root of the 32-bit significand given
510 // by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
511 // `aExp' (the least significant bit) is 1, the integer returned approximates
512 // 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
513 // is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
514 // case, the approximation returned lies strictly within +/-2 of the exact
515 // value.
516
517 uint32_t estimateSqrt32( int16_t aExp, uint32_t a )
518 {
519 static const uint16_t sqrtOddAdjustments[] = {
520 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
521 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
522 };
523 static const uint16_t sqrtEvenAdjustments[] = {
524 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
525 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
526 };
527 int8_t index;
528 uint32_t z;
529
530 index = ( a>>27 ) & 15;
531 if ( aExp & 1 ) {
532 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
533 z = ( ( a / z )<<14 ) + ( z<<15 );
534 a >>= 1;
535 }
536 else {
537 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
538 z = a / z + z;
539 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
540 if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
541 }
542 return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
543 }
544
545 // Returns the number of leading 0 bits before the most-significant 1 bit of
546 // `a'. If `a' is zero, 32 is returned.
547
548 int8_t countLeadingZeros32( uint32_t a )
549 {
550 static const int8_t countLeadingZerosHigh[] =
551 {
552 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
553 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
554 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
555 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
556 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
557 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
558 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
559 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
568 };
569 int8_t shiftCount;
570
571 shiftCount = 0;
572 if ( a < 0x10000 ) {
573 shiftCount += 16;
574 a <<= 16;
575 }
576 if ( a < 0x1000000 ) {
577 shiftCount += 8;
578 a <<= 8;
579 }
580 shiftCount += countLeadingZerosHigh[ a>>24 ];
581 return shiftCount;
582 }
583
584 // Returns the number of leading 0 bits before the most-significant 1 bit of
585 // `a'. If `a' is zero, 64 is returned.
586
587 int8_t countLeadingZeros64( uint64_t a )
588 {
589 int8_t shiftCount;
590
591 shiftCount = 0;
592 if ( a < ( (uint64_t) 1 )<<32 ) {
593 shiftCount += 32;
594 }
595 else {
596 a >>= 32;
597 }
598 shiftCount += countLeadingZeros32( a );
599 return shiftCount;
600 }
601
602 // Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
603 // than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
604 // returns 0.
605
606 int lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
607 {
608 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
609 }
610
611 // Functions and definitions to determine: (1) whether tininess for underflow
612 // is detected before or after rounding by default, (2) what (if anything)
613 // happens when exceptions are raised, (3) how signaling NaNs are distinguished
614 // from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
615 // are propagated from function inputs to output. These details are target-
616 // specific.
617
618 int float32_is_nan( uint32_t a )
619 {
620 return ( 0xFF000000 < (uint32_t) ( a<<1 ) );
621 }
622 int float32_is_signaling_nan( uint32_t a )
623 {
624 return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
625 }
626 int float64_is_nan( uint64_t a )
627 {
628 return ( 0xFFE0000000000000 < (uint64_t) ( a<<1 ) );
629 }
630 int float64_is_signaling_nan( uint64_t a )
631 {
632 return ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) && ( a & 0x0007FFFFFFFFFFFF );
633 }
634
635 uint32_t propagateFloat32NaN( uint32_t a, uint32_t b )
636 {
637 int aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
638
639 aIsSignalingNaN = float32_is_signaling_nan( a );
640 bIsNaN = float32_is_nan( b );
641 bIsSignalingNaN = float32_is_signaling_nan( b );
642 a |= 0x00400000;
643 b |= 0x00400000;
644 if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( EXC_INVALID );
645 return bIsSignalingNaN ? b : aIsSignalingNaN ? a : bIsNaN ? b : a;
646 }
647 uint32_t propagate3Float32NaN( uint32_t a, uint32_t b, uint32_t c )
648 {
649 int aIsNaN, bIsNaN, cIsNaN;
650 int aIsSignalingNaN, bIsSignalingNaN, cIsSignalingNaN;
651 aIsNaN = float32_is_nan( a );
652 aIsSignalingNaN = float32_is_signaling_nan( a );
653 bIsNaN = float32_is_nan( b );
654 bIsSignalingNaN = float32_is_signaling_nan( b );
655 cIsNaN = float32_is_nan( c );
656 cIsSignalingNaN = float32_is_signaling_nan( c );
657 a |= 0x00400000;
658 b |= 0x00400000;
659 c |= 0x00400000;
660 if ( aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN )
661 float_raise( EXC_INVALID );
662 return cIsSignalingNaN ? c : bIsSignalingNaN ? b : aIsSignalingNaN ? a :
663 cIsNaN ? c : bIsNaN ? b : a;
664 }
665
666 uint64_t propagateFloat64NaN( uint64_t a, uint64_t b )
667 {
668 int aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
669
670 aIsSignalingNaN = float64_is_signaling_nan( a );
671 bIsNaN = float64_is_nan( b );
672 bIsSignalingNaN = float64_is_signaling_nan( b );
673 a |= 0x0008000000000000 ;
674 b |= 0x0008000000000000 ;
675 if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( EXC_INVALID );
676 return bIsSignalingNaN ? b : aIsSignalingNaN ? a : bIsNaN ? b : a;
677 }
678 uint64_t propagate3Float64NaN( uint64_t a, uint64_t b, uint64_t c )
679 {
680 int aIsNaN, bIsNaN, cIsNaN;
681 int aIsSignalingNaN, bIsSignalingNaN, cIsSignalingNaN;
682 aIsNaN = float64_is_nan( a );
683 aIsSignalingNaN = float64_is_signaling_nan( a );
684 bIsNaN = float64_is_nan( b );
685 bIsSignalingNaN = float64_is_signaling_nan( b );
686 cIsNaN = float64_is_nan( c );
687 cIsSignalingNaN = float64_is_signaling_nan( c );
688 a |= 0x0008000000000000 ;
689 b |= 0x0008000000000000 ;
690 c |= 0x0008000000000000 ;
691 if ( aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN )
692 float_raise( EXC_INVALID );
693 return cIsSignalingNaN ? c : bIsSignalingNaN ? b : aIsSignalingNaN ? a :
694 cIsNaN ? c : bIsNaN ? b : a;
695 }
696
697 uint32_t extractFloat32Frac( uint32_t a ) { return a & 0x007FFFFF; }
698 int16_t extractFloat32Exp( uint32_t a ) { return ( a>>23 ) & 0xFF; }
699 int extractFloat32Sign( uint32_t a ) { return a>>31; }
700
701 uint32_t packFloat32( int zSign, int16_t zExp, uint32_t zSig )
702 {
703 return ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig;
704 }
705
706 uint64_t extractFloat64Frac( uint64_t a ) { return a & 0x000FFFFFFFFFFFFF ; }
707 int16_t extractFloat64Exp( uint64_t a ) { return ( a>>52 ) & 0x7FF; }
708 int extractFloat64Sign( uint64_t a ) { return a>>63; }
709
710 uint64_t packFloat64( int zSign, int16_t zExp, uint64_t zSig )
711 {
712 return ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig;
713 }
714
715
716
717};
718
719#endif
720