Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / sam / cpus / vonk / ss / lib / cpu / src / SS_Ld128Atomic.s
CommitLineData
920dae64
AT
1/*
2* ========== Copyright Header Begin ==========================================
3*
4* OpenSPARC T2 Processor File: SS_Ld128Atomic.s
5* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
6* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
7*
8* The above named program is free software; you can redistribute it and/or
9* modify it under the terms of the GNU General Public
10* License version 2 as published by the Free Software Foundation.
11*
12* The above named program is distributed in the hope that it will be
13* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
14* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15* General Public License for more details.
16*
17* You should have received a copy of the GNU General Public
18* License along with this work; if not, write to the Free Software
19* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
20*
21* ========== Copyright Header End ============================================
22*/
23#if defined(ARCH_X64)
24
25 .text
26 .align 16
27 .globl ss_ld128atomic
28 .type ss_ld128atomic, @function
29ss_ld128atomic:
30 movdqa (%rdi),%xmm0 /* 128-bit load (atomic) */
31 movhlps %xmm0,%xmm1
32 movdq %xmm0,%r8
33 bswapq %r8
34 movq %r8,(%rsi)
35 movdq %xmm1,%r8
36 bswapq %r8
37 movq %r8,8(%rsi)
38 ret
39 .size ss_ld128atomic, [.-ss_ld128atomic]
40
41#else
42
43.section ".text"
44
45/*============================================================================*\
46 * void ss_ld128atomic( SS_Paddr addr, uint64_t data[2] );
47 *----------------------------------------------------------------------------
48 * This routines emulates the quad load atomic instruction. It should only be
49 * used with a memory implementation that uses the native sparc instructions,
50 * including the atomics, to implement simulated memory. This algorithm allows
51 * us to implement memory without locks around loadquads and stores.
52 *
53 * The abstract algorithm is
54 *
55 * do
56 * ld ref @ ofs + 0
57 * ld low @ ofs + 8
58 * ld cmp @ ofs + 0
59 * while ref != cmp
60 *
61 * The ref and cmp load from the same address. These two loads form a time
62 * window. In this time window we check that the memory location @ ofs + 0
63 * did not change: ref == cmp. If the value did not change then the low value,
64 * which must be loaded in the time window, is a correct lower part of the quad.
65 * The ref (or cmp) value the correct upper part f the quad load. Now if the
66 * value @ ofs + 0 did change then we try again.
67 *
68 * For RMO we inserts membars between the loads to guarantee ordering of the
69 * loads.
70\*============================================================================*/
71
72.global ss_ld128atomic
73.type ss_ld128atomic, #function
74
75ss_ld128atomic:
76 ldx [%o0 + 0],%o2 ! load reference high
771:
78 membar #LoadLoad ! RMO
79 ldx [%o0 + 8],%o3 ! load reference low
80 membar #LoadLoad ! RMO
81 ldx [%o0 + 0],%o4 ! load compare high
82 cmp %o2,%o4 ! check high values loaded are the same
83 bne,a %xcc,1b ! which means no store inbetween
84 ldx [%o0 + 0],%o2
85 stx %o2,[%o1 + 0] ! store high
86 retl
87 stx %o3,[%o1 + 8] ! store low
88
89
90#endif