Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | /* |
2 | * ========== Copyright Header Begin ========================================== | |
3 | * | |
4 | * OpenSPARC T2 Processor File: SS_Ld128Atomic.s | |
5 | * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. | |
6 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. | |
7 | * | |
8 | * The above named program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public | |
10 | * License version 2 as published by the Free Software Foundation. | |
11 | * | |
12 | * The above named program is distributed in the hope that it will be | |
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public | |
18 | * License along with this work; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
20 | * | |
21 | * ========== Copyright Header End ============================================ | |
22 | */ | |
23 | #if defined(ARCH_X64) | |
24 | ||
25 | .text | |
26 | .align 16 | |
27 | .globl ss_ld128atomic | |
28 | .type ss_ld128atomic, @function | |
29 | ss_ld128atomic: | |
30 | movdqa (%rdi),%xmm0 /* 128-bit load (atomic) */ | |
31 | movhlps %xmm0,%xmm1 | |
32 | movdq %xmm0,%r8 | |
33 | bswapq %r8 | |
34 | movq %r8,(%rsi) | |
35 | movdq %xmm1,%r8 | |
36 | bswapq %r8 | |
37 | movq %r8,8(%rsi) | |
38 | ret | |
39 | .size ss_ld128atomic, [.-ss_ld128atomic] | |
40 | ||
41 | #else | |
42 | ||
43 | .section ".text" | |
44 | ||
45 | /*============================================================================*\ | |
46 | * void ss_ld128atomic( SS_Paddr addr, uint64_t data[2] ); | |
47 | *---------------------------------------------------------------------------- | |
48 | * This routines emulates the quad load atomic instruction. It should only be | |
49 | * used with a memory implementation that uses the native sparc instructions, | |
50 | * including the atomics, to implement simulated memory. This algorithm allows | |
51 | * us to implement memory without locks around loadquads and stores. | |
52 | * | |
53 | * The abstract algorithm is | |
54 | * | |
55 | * do | |
56 | * ld ref @ ofs + 0 | |
57 | * ld low @ ofs + 8 | |
58 | * ld cmp @ ofs + 0 | |
59 | * while ref != cmp | |
60 | * | |
61 | * The ref and cmp load from the same address. These two loads form a time | |
62 | * window. In this time window we check that the memory location @ ofs + 0 | |
63 | * did not change: ref == cmp. If the value did not change then the low value, | |
64 | * which must be loaded in the time window, is a correct lower part of the quad. | |
65 | * The ref (or cmp) value the correct upper part f the quad load. Now if the | |
66 | * value @ ofs + 0 did change then we try again. | |
67 | * | |
68 | * For RMO we inserts membars between the loads to guarantee ordering of the | |
69 | * loads. | |
70 | \*============================================================================*/ | |
71 | ||
72 | .global ss_ld128atomic | |
73 | .type ss_ld128atomic, #function | |
74 | ||
75 | ss_ld128atomic: | |
76 | ldx [%o0 + 0],%o2 ! load reference high | |
77 | 1: | |
78 | membar #LoadLoad ! RMO | |
79 | ldx [%o0 + 8],%o3 ! load reference low | |
80 | membar #LoadLoad ! RMO | |
81 | ldx [%o0 + 0],%o4 ! load compare high | |
82 | cmp %o2,%o4 ! check high values loaded are the same | |
83 | bne,a %xcc,1b ! which means no store inbetween | |
84 | ldx [%o0 + 0],%o2 | |
85 | stx %o2,[%o1 + 0] ! store high | |
86 | retl | |
87 | stx %o3,[%o1 + 8] ! store low | |
88 | ||
89 | ||
90 | #endif |