/* * ========== Copyright Header Begin ========================================== * * OpenSPARC T2 Processor File: SS_Ld128Atomic.s * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. * * The above named program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License version 2 as published by the Free Software Foundation. * * The above named program is distributed in the hope that it will be * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this work; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. * * ========== Copyright Header End ============================================ */ #if defined(ARCH_X64) .text .align 16 .globl ss_ld128atomic .type ss_ld128atomic, @function ss_ld128atomic: movdqa (%rdi),%xmm0 /* 128-bit load (atomic) */ movhlps %xmm0,%xmm1 movdq %xmm0,%r8 bswapq %r8 movq %r8,(%rsi) movdq %xmm1,%r8 bswapq %r8 movq %r8,8(%rsi) ret .size ss_ld128atomic, [.-ss_ld128atomic] #else .section ".text" /*============================================================================*\ * void ss_ld128atomic( SS_Paddr addr, uint64_t data[2] ); *---------------------------------------------------------------------------- * This routines emulates the quad load atomic instruction. It should only be * used with a memory implementation that uses the native sparc instructions, * including the atomics, to implement simulated memory. This algorithm allows * us to implement memory without locks around loadquads and stores. * * The abstract algorithm is * * do * ld ref @ ofs + 0 * ld low @ ofs + 8 * ld cmp @ ofs + 0 * while ref != cmp * * The ref and cmp load from the same address. These two loads form a time * window. In this time window we check that the memory location @ ofs + 0 * did not change: ref == cmp. If the value did not change then the low value, * which must be loaded in the time window, is a correct lower part of the quad. * The ref (or cmp) value the correct upper part f the quad load. Now if the * value @ ofs + 0 did change then we try again. * * For RMO we inserts membars between the loads to guarantee ordering of the * loads. \*============================================================================*/ .global ss_ld128atomic .type ss_ld128atomic, #function ss_ld128atomic: ldx [%o0 + 0],%o2 ! load reference high 1: membar #LoadLoad ! RMO ldx [%o0 + 8],%o3 ! load reference low membar #LoadLoad ! RMO ldx [%o0 + 0],%o4 ! load compare high cmp %o2,%o4 ! check high values loaded are the same bne,a %xcc,1b ! which means no store inbetween ldx [%o0 + 0],%o2 stx %o2,[%o1 + 0] ! store high retl stx %o3,[%o1 + 8] ! store low #endif