usr/src/lib/libm/tahoe/cbrt.s

#
# Copyright (c) 1987 Regents of the University of California.
#
# Use and reproduction of this software are granted  in  accordance  with
# the terms and conditions specified in  the  Berkeley  Software  License
# Agreement (in particular, this entails acknowledgement of the programs'
# source, and inclusion of this notice) with the additional understanding
# that  all  recipients  should regard themselves as participants  in  an
# ongoing  research  project and hence should  feel  obligated  to report
# their  experiences (good or bad) with these elementary function  codes,
# using "sendbug 4bsd-bugs@BERKELEY", to the authors.
#
	.data
	.align	2
_sccsid:
.asciz	"@(#)cbrt.s	1.1	(ucb.elefunt) %G%"

# double cbrt(double arg)
# W. Kahan, 10/13/80. revised 1/13/84 for keeping sign symmetry
# Revised and tested by Z. Alex Liu (7/11/87)
# Max error less than 0.667 ULPs _if_ +,-,*,/ were all correctly rounded...
	.globl	_cbrt
	.globl	_d_cbrt
	.globl	_dcbrt_
	.text
	.align	1
_cbrt:
_d_cbrt:
	.word	0x01fc		# save r2 to r8
	movl	4(fp),r0	# r0:r1 = x
	movl	8(fp),r1
	brb	1f
_dcbrt_:
	.word	0x01fc		# save r2 to r8
	movl	4(fp),r8
	movl	(r8),r0
	movl	4(r8),r1	# r0:r1 = x

1:	andl3	$0x7f800000,r0,r2	# biased exponent of x
	beql	return		# dcbrt(0)=0  dcbrt(res)=res. operand
	andl3	$0x80000000,r0,r8	# r8 has sign(x)
	xorl2	r8,r0		# r0 is abs(x)
	movl	r0,r2		# r2 has abs(x)
	divl2	$3,r2		# rough dcbrt with bias/3
	addl2	B,r2		# restore bias, diminish fraction
	ldf	r2		# acc = |q|=|dcbrt| to 5 bits
	mulf	r2		# acc = qq
	divf	r0		# acc = qq/|x|
	mulf	r2		# acc = qqq/|x|
	addf	C		# acc = C+qqq/|x|
	stf	r3		# r3 = s = C+qqq/|x|
	ldf	D		# acc = D
	divf	r3		# acc = D/s
	addf	E		# acc = E+D/s
	addf	r3		# acc = s+E+D/s
	stf	r3		# r3 = s+E+D/s
	ldf	F		# acc = F
	divf	r3		# acc = F/(s+E+D/s)
	addf	G		# acc = G+F/(s+E+D/s)
	mulf	r2		# acc = q*(G+F/(s+E+D/s)) = new q to 23 bits
	stf	r2		# r2 = q*(G+F/(s+E+D/s)) = new q to 23 bits
	clrl	r3		# r2:r3 = q as double float
	ldd	r2		# acc = q as double float
	muld	r2		# acc = qq exactly
	std	r4		# r4:r5 = qq exactly
	ldd	r0		# acc = |x|
	divd	r4		# acc = |x|/(q*q) rounded
	std	r0		# r0:r1 = |x|/(q*q) rounded
	subd	r2		# acc = |x|/(q*q)-q exactly
	std	r6		# r6:r7 = |x|/(q*q)-q exactly
	movl    r2,r4
	clrl	r5		# r4:r5 = q as double float
	addl2	$0x800000,r4	# r4:r5 = 2*q
	ldd	r4		# acc = 2*q
	addd	r0		# acc = 2*q+|x|/(q*q)
	std	r4		# r4:r5 = 2*q+|x|/(q*q)
	ldd	r6		# acc = |x|/(q*q)-q
	divd	r4		# acc = (|x|/(q*q)-q)/(2*q+|x|/(q*q))
	muld	r2		# acc = q*(|x|/(q*q)-q)/(2*q+|x|/(q*q))
	addd	r2		# acc = q+q*(|x|/(q*q)-q)/(2*q+|x|/(q*q))
	std	r0		# r0:r1 = |result|
	orl2	r8,r0		# restore the sign bit
return: ret			# error less than 0.667ULPs?

	.data
	.align	2
B :	.long	721142941	#(86-0.03306235651)*(2^23)
	.align	2
C:	.long	0x400af8b0	#.float	0f0.5428571429	# 19/35
	.align	2
D:	.long	0xc0348ef1	#.float	0f-0.7053061224	# -864/1225
	.align	2
E:	.long	0x40b50750	#.float	0f1.414285714	# 99/70
	.align	2
F:	.long	0x40cdb6db	#.float	0f1.607142857	# 45/28
	.align	2
G:	.long	0x3fb6db6e	#.float	0f0.3571428571	# 5/14
Commit	Line	Data
	1	#
	2	# Copyright (c) 1987 Regents of the University of California.
	3	#
	4	# Use and reproduction of this software are granted in accordance with
	5	# the terms and conditions specified in the Berkeley Software License
	6	# Agreement (in particular, this entails acknowledgement of the programs'
	7	# source, and inclusion of this notice) with the additional understanding
	8	# that all recipients should regard themselves as participants in an
	9	# ongoing research project and hence should feel obligated to report
	10	# their experiences (good or bad) with these elementary function codes,
	11	# using "sendbug 4bsd-bugs@BERKELEY", to the authors.
	12	#
	13	.data
	14	.align 2
	15	_sccsid:
	16	.asciz "@(#)cbrt.s 1.1 (ucb.elefunt) %G%"
	17
	18	# double cbrt(double arg)
	19	# W. Kahan, 10/13/80. revised 1/13/84 for keeping sign symmetry
	20	# Revised and tested by Z. Alex Liu (7/11/87)
	21	# Max error less than 0.667 ULPs _if_ +,-,*,/ were all correctly rounded...
	22	.globl _cbrt
	23	.globl _d_cbrt
	24	.globl _dcbrt_
	25	.text
	26	.align 1
	27	_cbrt:
	28	_d_cbrt:
	29	.word 0x01fc # save r2 to r8
	30	movl 4(fp),r0 # r0:r1 = x
	31	movl 8(fp),r1
	32	brb 1f
	33	_dcbrt_:
	34	.word 0x01fc # save r2 to r8
	35	movl 4(fp),r8
	36	movl (r8),r0
	37	movl 4(r8),r1 # r0:r1 = x
	38
	39	1: andl3 $0x7f800000,r0,r2 # biased exponent of x
	40	beql return # dcbrt(0)=0 dcbrt(res)=res. operand
	41	andl3 $0x80000000,r0,r8 # r8 has sign(x)
	42	xorl2 r8,r0 # r0 is abs(x)
	43	movl r0,r2 # r2 has abs(x)
	44	divl2 $3,r2 # rough dcbrt with bias/3
	45	addl2 B,r2 # restore bias, diminish fraction
	46	ldf r2 # acc = \|q\|=\|dcbrt\| to 5 bits
	47	mulf r2 # acc = qq
	48	divf r0 # acc = qq/\|x\|
	49	mulf r2 # acc = qqq/\|x\|
	50	addf C # acc = C+qqq/\|x\|
	51	stf r3 # r3 = s = C+qqq/\|x\|
	52	ldf D # acc = D
	53	divf r3 # acc = D/s
	54	addf E # acc = E+D/s
	55	addf r3 # acc = s+E+D/s
	56	stf r3 # r3 = s+E+D/s
	57	ldf F # acc = F
	58	divf r3 # acc = F/(s+E+D/s)
	59	addf G # acc = G+F/(s+E+D/s)
	60	mulf r2 # acc = q*(G+F/(s+E+D/s)) = new q to 23 bits
	61	stf r2 # r2 = q*(G+F/(s+E+D/s)) = new q to 23 bits
	62	clrl r3 # r2:r3 = q as double float
	63	ldd r2 # acc = q as double float
	64	muld r2 # acc = qq exactly
	65	std r4 # r4:r5 = qq exactly
	66	ldd r0 # acc = \|x\|
	67	divd r4 # acc = \|x\|/(q*q) rounded
	68	std r0 # r0:r1 = \|x\|/(q*q) rounded
	69	subd r2 # acc = \|x\|/(q*q)-q exactly
	70	std r6 # r6:r7 = \|x\|/(q*q)-q exactly
	71	movl r2,r4
	72	clrl r5 # r4:r5 = q as double float
	73	addl2 $0x800000,r4 # r4:r5 = 2*q
	74	ldd r4 # acc = 2*q
	75	addd r0 # acc = 2q+\|x\|/(qq)
	76	std r4 # r4:r5 = 2q+\|x\|/(qq)
	77	ldd r6 # acc = \|x\|/(q*q)-q
	78	divd r4 # acc = (\|x\|/(qq)-q)/(2q+\|x\|/(q*q))
	79	muld r2 # acc = q(\|x\|/(qq)-q)/(2q+\|x\|/(qq))
	80	addd r2 # acc = q+q(\|x\|/(qq)-q)/(2q+\|x\|/(qq))
	81	std r0 # r0:r1 = \|result\|
	82	orl2 r8,r0 # restore the sign bit
	83	return: ret # error less than 0.667ULPs?
	84
	85	.data
	86	.align 2
	87	B : .long 721142941 #(86-0.03306235651)*(2^23)
	88	.align 2
	89	C: .long 0x400af8b0 #.float 0f0.5428571429 # 19/35
	90	.align 2
	91	D: .long 0xc0348ef1 #.float 0f-0.7053061224 # -864/1225
	92	.align 2
	93	E: .long 0x40b50750 #.float 0f1.414285714 # 99/70
	94	.align 2
	95	F: .long 0x40cdb6db #.float 0f1.607142857 # 45/28
	96	.align 2
	97	G: .long 0x3fb6db6e #.float 0f0.3571428571 # 5/14