usr/src/lib/libm/tahoe/cbrt.s

#
# Copyright (c) 1987 Regents of the University of California.
#
# Use and reproduction of this software are granted  in  accordance  with
# the terms and conditions specified in  the  Berkeley  Software  License
# Agreement (in particular, this entails acknowledgement of the programs'
# source, and inclusion of this notice) with the additional understanding
# that  all  recipients  should regard themselves as participants  in  an
# ongoing  research  project and hence should  feel  obligated  to report
# their  experiences (good or bad) with these elementary function  codes,
# using "sendbug 4bsd-bugs@BERKELEY", to the authors.
#
        .data
        .align  2
_sccsid:
.asciz  "@(#)cbrt.s     1.2     (ucb.elefunt) %G%"

# double cbrt(double arg)
# W. Kahan, 10/13/80. revised 1/13/84 for keeping sign symmetry
# Re-coded in tahoe assembly language by Z. Alex Liu (7/13/87)
# Max error less than 0.667 ULPs _if_ +,-,*,/ were all correctly rounded...
        .globl  _cbrt
        .globl  _d_cbrt
        .globl  _dcbrt_
        .text
        .align  2
_cbrt:
_d_cbrt:
        .word   0x01fc          # save r2-r8
        movl    4(fp),r0        # r0:r1 = x
        movl    8(fp),r1
        brb     1f
_dcbrt_:
        .word   0x01fc          # save r2-r8
        movl    4(fp),r8
        movl    (r8),r0
        movl    4(r8),r1        # r0:r1 = x

1:      andl3   $0x7f800000,r0,r2       # biased exponent of x
        beql    return          # dcbrt(0)=0  dcbrt(res)=res. operand
        andl3   $0x80000000,r0,r8       # r8 has sign(x)
        xorl2   r8,r0           # r0 is abs(x)
        movl    r0,r2           # r2 has abs(x)
        divl2   $3,r2           # rough dcbrt with bias/3
        addl2   B,r2            # restore bias, diminish fraction
        ldf     r2              # acc = |q|=|dcbrt| to 5 bits
        mulf    r2              # acc = qq
        divf    r0              # acc = qq/|x|
        mulf    r2              # acc = qqq/|x|
        addf    C               # acc = C+qqq/|x|
        stf     r3              # r3 = s = C+qqq/|x|
        ldf     D               # acc = D
        divf    r3              # acc = D/s
        addf    E               # acc = E+D/s
        addf    r3              # acc = s+E+D/s
        stf     r3              # r3 = s+E+D/s
        ldf     F               # acc = F
        divf    r3              # acc = F/(s+E+D/s)
        addf    G               # acc = G+F/(s+E+D/s)
        mulf    r2              # acc = q*(G+F/(s+E+D/s)) = new q to 23 bits
        stf     r2              # r2 = q*(G+F/(s+E+D/s)) = new q to 23 bits
        clrl    r3              # r2:r3 = q as double float
        ldd     r2              # acc = q as double float
        muld    r2              # acc = qq exactly
        std     r4              # r4:r5 = qq exactly
        ldd     r0              # acc = |x|
        divd    r4              # acc = |x|/(q*q) rounded
        std     r0              # r0:r1 = |x|/(q*q) rounded
        subd    r2              # acc = |x|/(q*q)-q exactly
        std     r6              # r6:r7 = |x|/(q*q)-q exactly
        movl    r2,r4
        clrl    r5              # r4:r5 = q as double float
        addl2   $0x800000,r4    # r4:r5 = 2*q
        ldd     r4              # acc = 2*q
        addd    r0              # acc = 2*q+|x|/(q*q)
        std     r4              # r4:r5 = 2*q+|x|/(q*q)
        ldd     r6              # acc = |x|/(q*q)-q
        divd    r4              # acc = (|x|/(q*q)-q)/(2*q+|x|/(q*q))
        muld    r2              # acc = q*(|x|/(q*q)-q)/(2*q+|x|/(q*q))
        addd    r2              # acc = q+q*(|x|/(q*q)-q)/(2*q+|x|/(q*q))
        std     r0              # r0:r1 = |result|
        orl2    r8,r0           # restore the sign bit
return: ret                     # error less than 0.667ULPs?

        .data
        .align  2
B :     .long   721142941       #(86-0.03306235651)*(2^23)
        .align  2
C:      .long   0x400af8b0      #.float 0f0.5428571429  # 19/35
        .align  2
D:      .long   0xc0348ef1      #.float 0f-0.7053061224 # -864/1225
        .align  2
E:      .long   0x40b50750      #.float 0f1.414285714   # 99/70
        .align  2
F:      .long   0x40cdb6db      #.float 0f1.607142857   # 45/28
        .align  2
G:      .long   0x3fb6db6e      #.float 0f0.3571428571  # 5/14