* ========== Copyright Header Begin ==========================================
* OpenSPARC T2 Processor File: SS_Ld128Atomic.s
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
* ========== Copyright Header End ============================================
.type ss_ld128atomic, @function
movdqa (%rdi),%xmm0 /* 128-bit load (atomic) */
.size ss_ld128atomic, [.-ss_ld128atomic]
/*============================================================================*\
* void ss_ld128atomic( SS_Paddr addr, uint64_t data[2] );
*----------------------------------------------------------------------------
* This routines emulates the quad load atomic instruction. It should only be
* used with a memory implementation that uses the native sparc instructions,
* including the atomics, to implement simulated memory. This algorithm allows
* us to implement memory without locks around loadquads and stores.
* The abstract algorithm is
* The ref and cmp load from the same address. These two loads form a time
* window. In this time window we check that the memory location @ ofs + 0
* did not change: ref == cmp. If the value did not change then the low value,
* which must be loaded in the time window, is a correct lower part of the quad.
* The ref (or cmp) value the correct upper part f the quad load. Now if the
* value @ ofs + 0 did change then we try again.
* For RMO we inserts membars between the loads to guarantee ordering of the
\*============================================================================*/
.type ss_ld128atomic, #function
ldx [%o0 + 0],%o2 ! load reference high
ldx [%o0 + 8],%o3 ! load reference low
ldx [%o0 + 0],%o4 ! load compare high
cmp %o2,%o4 ! check high values loaded are the same
bne,a %xcc,1b ! which means no store inbetween
stx %o2,[%o1 + 0] ! store high
stx %o3,[%o1 + 8] ! store low