[OpenSPARC-T2-SAM] / sam-t2 / sam / cpus / vonk / ss / lib / cpu / src / SS_Ld128Atomic.s

/*
* ========== Copyright Header Begin ==========================================
* 
* OpenSPARC T2 Processor File: SS_Ld128Atomic.s
* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
* 
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
* 
* The above named program is distributed in the hope that it will be 
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* General Public License for more details.
* 
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
* 
* ========== Copyright Header End ============================================
*/
#if defined(ARCH_X64)

	.text
	.align	16
	.globl	ss_ld128atomic
	.type	ss_ld128atomic, @function
ss_ld128atomic:
	movdqa	(%rdi),%xmm0	/* 128-bit load (atomic) */
	movhlps	%xmm0,%xmm1
	movdq	%xmm0,%r8
	bswapq	%r8
	movq	%r8,(%rsi)
	movdq	%xmm1,%r8
	bswapq	%r8
	movq	%r8,8(%rsi)
	ret		
	.size	ss_ld128atomic, [.-ss_ld128atomic]
	
#else
	
.section 	".text"

/*============================================================================*\
 * void ss_ld128atomic( SS_Paddr addr, uint64_t data[2] );
 *----------------------------------------------------------------------------
 * This routines emulates the quad load atomic instruction. It should only be
 * used with a memory implementation that uses the native sparc instructions,
 * including the atomics, to implement simulated memory. This algorithm allows
 * us to implement memory without locks around loadquads and stores.
 *
 * The abstract algorithm is
 *
 * do
 *   ld ref  @ ofs + 0
 *   ld low  @ ofs + 8
 *   ld cmp  @ ofs + 0
 * while ref != cmp
 *
 * The ref and cmp load from the same address. These two loads form a time 
 * window. In this time window we check that the memory location @ ofs + 0 
 * did not change: ref == cmp. If the value did not change then the low value,
 * which must be loaded in the time window, is a correct lower part of the quad. 
 * The ref (or cmp) value the correct upper part f the quad load. Now if the 
 * value @ ofs + 0 did change then we try again.
 *
 * For RMO we inserts membars between the loads to guarantee ordering of the
 * loads.
\*============================================================================*/

.global ss_ld128atomic
.type   ss_ld128atomic, #function

ss_ld128atomic:
	ldx	[%o0 + 0],%o2		! load reference high
1:
	membar	#LoadLoad		! RMO 
	ldx	[%o0 + 8],%o3		! load reference low
	membar	#LoadLoad		! RMO 
	ldx	[%o0 + 0],%o4		! load compare high
	cmp	%o2,%o4			! check high values loaded are the same
	bne,a	%xcc,1b			! which means no store inbetween
	ldx	[%o0 + 0],%o2		
	stx	%o2,[%o1 + 0]		! store high
	retl
	stx	%o3,[%o1 + 8]		! store low


#endif
Commit	Line	Data
920dae64 AT	1	/*
	2	* ========== Copyright Header Begin ==========================================
	3	*
	4	* OpenSPARC T2 Processor File: SS_Ld128Atomic.s
	5	* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
	6	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
	7	*
	8	* The above named program is free software; you can redistribute it and/or
	9	* modify it under the terms of the GNU General Public
	10	* License version 2 as published by the Free Software Foundation.
	11	*
	12	* The above named program is distributed in the hope that it will be
	13	* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	15	* General Public License for more details.
	16	*
	17	* You should have received a copy of the GNU General Public
	18	* License along with this work; if not, write to the Free Software
	19	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
	20	*
	21	* ========== Copyright Header End ============================================
	22	*/
	23	#if defined(ARCH_X64)
	24
	25	.text
	26	.align 16
	27	.globl ss_ld128atomic
	28	.type ss_ld128atomic, @function
	29	ss_ld128atomic:
	30	movdqa (%rdi),%xmm0 /* 128-bit load (atomic) */
	31	movhlps %xmm0,%xmm1
	32	movdq %xmm0,%r8
	33	bswapq %r8
	34	movq %r8,(%rsi)
	35	movdq %xmm1,%r8
	36	bswapq %r8
	37	movq %r8,8(%rsi)
	38	ret
	39	.size ss_ld128atomic, [.-ss_ld128atomic]
	40
	41	#else
	42
	43	.section ".text"
	44
	45	/============================================================================\
	46	* void ss_ld128atomic( SS_Paddr addr, uint64_t data[2] );
	47	*----------------------------------------------------------------------------
	48	* This routines emulates the quad load atomic instruction. It should only be
	49	* used with a memory implementation that uses the native sparc instructions,
	50	* including the atomics, to implement simulated memory. This algorithm allows
	51	* us to implement memory without locks around loadquads and stores.
	52	*
	53	* The abstract algorithm is
	54	*
	55	* do
	56	* ld ref @ ofs + 0
	57	* ld low @ ofs + 8
	58	* ld cmp @ ofs + 0
	59	* while ref != cmp
	60	*
	61	* The ref and cmp load from the same address. These two loads form a time
	62	* window. In this time window we check that the memory location @ ofs + 0
	63	* did not change: ref == cmp. If the value did not change then the low value,
	64	* which must be loaded in the time window, is a correct lower part of the quad.
65	* The ref (or cmp) value the correct upper part f the quad load. Now if the
66	* value @ ofs + 0 did change then we try again.
67	*
68	* For RMO we inserts membars between the loads to guarantee ordering of the
69	* loads.
70	\============================================================================/
71
72	.global ss_ld128atomic
73	.type ss_ld128atomic, #function
74
75	ss_ld128atomic:
76	ldx [%o0 + 0],%o2 ! load reference high
77	1:
78	membar #LoadLoad ! RMO
79	ldx [%o0 + 8],%o3 ! load reference low
80	membar #LoadLoad ! RMO
81	ldx [%o0 + 0],%o4 ! load compare high
82	cmp %o2,%o4 ! check high values loaded are the same
83	bne,a %xcc,1b ! which means no store inbetween
84	ldx [%o0 + 0],%o2
85	stx %o2,[%o1 + 0] ! store high
86	retl
87	stx %o3,[%o1 + 8] ! store low
88
89
90	#endif