Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / sam / cpus / vonk / ss / lib / cpu / src / SS_Ld128Atomic.s
/*
* ========== Copyright Header Begin ==========================================
*
* OpenSPARC T2 Processor File: SS_Ld128Atomic.s
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
*
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*
* ========== Copyright Header End ============================================
*/
#if defined(ARCH_X64)
.text
.align 16
.globl ss_ld128atomic
.type ss_ld128atomic, @function
ss_ld128atomic:
movdqa (%rdi),%xmm0 /* 128-bit load (atomic) */
movhlps %xmm0,%xmm1
movdq %xmm0,%r8
bswapq %r8
movq %r8,(%rsi)
movdq %xmm1,%r8
bswapq %r8
movq %r8,8(%rsi)
ret
.size ss_ld128atomic, [.-ss_ld128atomic]
#else
.section ".text"
/*============================================================================*\
* void ss_ld128atomic( SS_Paddr addr, uint64_t data[2] );
*----------------------------------------------------------------------------
* This routines emulates the quad load atomic instruction. It should only be
* used with a memory implementation that uses the native sparc instructions,
* including the atomics, to implement simulated memory. This algorithm allows
* us to implement memory without locks around loadquads and stores.
*
* The abstract algorithm is
*
* do
* ld ref @ ofs + 0
* ld low @ ofs + 8
* ld cmp @ ofs + 0
* while ref != cmp
*
* The ref and cmp load from the same address. These two loads form a time
* window. In this time window we check that the memory location @ ofs + 0
* did not change: ref == cmp. If the value did not change then the low value,
* which must be loaded in the time window, is a correct lower part of the quad.
* The ref (or cmp) value the correct upper part f the quad load. Now if the
* value @ ofs + 0 did change then we try again.
*
* For RMO we inserts membars between the loads to guarantee ordering of the
* loads.
\*============================================================================*/
.global ss_ld128atomic
.type ss_ld128atomic, #function
ss_ld128atomic:
ldx [%o0 + 0],%o2 ! load reference high
1:
membar #LoadLoad ! RMO
ldx [%o0 + 8],%o3 ! load reference low
membar #LoadLoad ! RMO
ldx [%o0 + 0],%o4 ! load compare high
cmp %o2,%o4 ! check high values loaded are the same
bne,a %xcc,1b ! which means no store inbetween
ldx [%o0 + 0],%o2
stx %o2,[%o1 + 0] ! store high
retl
stx %o3,[%o1 + 8] ! store low
#endif