+#
+# Copyright (c) 1987 Regents of the University of California.
+#
+# Use and reproduction of this software are granted in accordance with
+# the terms and conditions specified in the Berkeley Software License
+# Agreement (in particular, this entails acknowledgement of the programs'
+# source, and inclusion of this notice) with the additional understanding
+# that all recipients should regard themselves as participants in an
+# ongoing research project and hence should feel obligated to report
+# their experiences (good or bad) with these elementary function codes,
+# using "sendbug 4bsd-bugs@BERKELEY", to the authors.
+#
+ .data
+ .align 2
+_sccsid:
+.asciz "@(#)cbrt.s 1.1 (ucb.elefunt) %G%"
+
+# double cbrt(double arg)
+# W. Kahan, 10/13/80. revised 1/13/84 for keeping sign symmetry
+# Revised and tested by Z. Alex Liu (7/11/87)
+# Max error less than 0.667 ULPs _if_ +,-,*,/ were all correctly rounded...
+ .globl _cbrt
+ .globl _d_cbrt
+ .globl _dcbrt_
+ .text
+ .align 1
+_cbrt:
+_d_cbrt:
+ .word 0x01fc # save r2 to r8
+ movl 4(fp),r0 # r0:r1 = x
+ movl 8(fp),r1
+ brb 1f
+_dcbrt_:
+ .word 0x01fc # save r2 to r8
+ movl 4(fp),r8
+ movl (r8),r0
+ movl 4(r8),r1 # r0:r1 = x
+
+1: andl3 $0x7f800000,r0,r2 # biased exponent of x
+ beql return # dcbrt(0)=0 dcbrt(res)=res. operand
+ andl3 $0x80000000,r0,r8 # r8 has sign(x)
+ xorl2 r8,r0 # r0 is abs(x)
+ movl r0,r2 # r2 has abs(x)
+ divl2 $3,r2 # rough dcbrt with bias/3
+ addl2 B,r2 # restore bias, diminish fraction
+ ldf r2 # acc = |q|=|dcbrt| to 5 bits
+ mulf r2 # acc = qq
+ divf r0 # acc = qq/|x|
+ mulf r2 # acc = qqq/|x|
+ addf C # acc = C+qqq/|x|
+ stf r3 # r3 = s = C+qqq/|x|
+ ldf D # acc = D
+ divf r3 # acc = D/s
+ addf E # acc = E+D/s
+ addf r3 # acc = s+E+D/s
+ stf r3 # r3 = s+E+D/s
+ ldf F # acc = F
+ divf r3 # acc = F/(s+E+D/s)
+ addf G # acc = G+F/(s+E+D/s)
+ mulf r2 # acc = q*(G+F/(s+E+D/s)) = new q to 23 bits
+ stf r2 # r2 = q*(G+F/(s+E+D/s)) = new q to 23 bits
+ clrl r3 # r2:r3 = q as double float
+ ldd r2 # acc = q as double float
+ muld r2 # acc = qq exactly
+ std r4 # r4:r5 = qq exactly
+ ldd r0 # acc = |x|
+ divd r4 # acc = |x|/(q*q) rounded
+ std r0 # r0:r1 = |x|/(q*q) rounded
+ subd r2 # acc = |x|/(q*q)-q exactly
+ std r6 # r6:r7 = |x|/(q*q)-q exactly
+ movl r2,r4
+ clrl r5 # r4:r5 = q as double float
+ addl2 $0x800000,r4 # r4:r5 = 2*q
+ ldd r4 # acc = 2*q
+ addd r0 # acc = 2*q+|x|/(q*q)
+ std r4 # r4:r5 = 2*q+|x|/(q*q)
+ ldd r6 # acc = |x|/(q*q)-q
+ divd r4 # acc = (|x|/(q*q)-q)/(2*q+|x|/(q*q))
+ muld r2 # acc = q*(|x|/(q*q)-q)/(2*q+|x|/(q*q))
+ addd r2 # acc = q+q*(|x|/(q*q)-q)/(2*q+|x|/(q*q))
+ std r0 # r0:r1 = |result|
+ orl2 r8,r0 # restore the sign bit
+return: ret # error less than 0.667ULPs?
+
+ .data
+ .align 2
+B : .long 721142941 #(86-0.03306235651)*(2^23)
+ .align 2
+C: .long 0x400af8b0 #.float 0f0.5428571429 # 19/35
+ .align 2
+D: .long 0xc0348ef1 #.float 0f-0.7053061224 # -864/1225
+ .align 2
+E: .long 0x40b50750 #.float 0f1.414285714 # 99/70
+ .align 2
+F: .long 0x40cdb6db #.float 0f1.607142857 # 45/28
+ .align 2
+G: .long 0x3fb6db6e #.float 0f0.3571428571 # 5/14