Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | /* |
2 | * ========== Copyright Header Begin ========================================== | |
3 | * | |
4 | * Hypervisor Software File: __align_cpy_4.s | |
5 | * | |
6 | * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. | |
7 | * | |
8 | * - Do no alter or remove copyright notices | |
9 | * | |
10 | * - Redistribution and use of this software in source and binary forms, with | |
11 | * or without modification, are permitted provided that the following | |
12 | * conditions are met: | |
13 | * | |
14 | * - Redistribution of source code must retain the above copyright notice, | |
15 | * this list of conditions and the following disclaimer. | |
16 | * | |
17 | * - Redistribution in binary form must reproduce the above copyright notice, | |
18 | * this list of conditions and the following disclaimer in the | |
19 | * documentation and/or other materials provided with the distribution. | |
20 | * | |
21 | * Neither the name of Sun Microsystems, Inc. or the names of contributors | |
22 | * may be used to endorse or promote products derived from this software | |
23 | * without specific prior written permission. | |
24 | * | |
25 | * This software is provided "AS IS," without a warranty of any kind. | |
26 | * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, | |
27 | * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A | |
28 | * PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN | |
29 | * MICROSYSTEMS, INC. ("SUN") AND ITS LICENSORS SHALL NOT BE LIABLE FOR | |
30 | * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR | |
31 | * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. IN NO EVENT WILL SUN | |
32 | * OR ITS LICENSORS BE LIABLE FOR ANY LOST REVENUE, PROFIT OR DATA, OR | |
33 | * FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE | |
34 | * DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY, | |
35 | * ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, EVEN IF | |
36 | * SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. | |
37 | * | |
38 | * You acknowledge that this software is not designed, licensed or | |
39 | * intended for use in the design, construction, operation or maintenance of | |
40 | * any nuclear facility. | |
41 | * | |
42 | * ========== Copyright Header End ============================================ | |
43 | */ | |
44 | /* | |
45 | * Copyright (c) 1997, Sun Microsystems, Inc. | |
46 | * All rights reserved. | |
47 | */ | |
48 | ||
49 | .ident "@(#)__align_cpy_4.s 1.1 97/02/10 SMI" | |
50 | ||
51 | .file "__align_cpy_4.s" | |
52 | ||
53 | /* __align_cpy_4(s1, s2, n) | |
54 | * | |
55 | * Copy 4-byte aligned source to 4-byte aligned target in multiples of 4 bytes. | |
56 | * | |
57 | * Input: | |
58 | * o0 address of target | |
59 | * o1 address of source | |
60 | * o2 number of bytes to copy (must be a multiple of 4) | |
61 | * Output: | |
62 | * o0 address of target | |
63 | * Caller's registers that have been changed by this function: | |
64 | * o1-o5, g1, g5 | |
65 | * | |
66 | * Note: | |
67 | * This helper routine will not be used by any 32-bit compilations. | |
68 | * To do so would break binary compatibility with previous versions of | |
69 | * Solaris. | |
70 | * | |
71 | * Assumptions: | |
72 | * Source and target addresses are 4-byte aligned. | |
73 | * Bytes to be copied are non-overlapping or _exactly_ overlapping. | |
74 | * The number of bytes to be copied is a multiple of 4. | |
75 | * Call will usually be made with a byte count of more than 4*4 and | |
76 | * less than a few hundred bytes. Legal values are 0 to MAX_SIZE_T. | |
77 | * | |
78 | * Optimization attempt: | |
79 | * Reasonable speed for a generic v9. | |
80 | */ | |
81 | ||
82 | #include <sys/asm_linkage.h> | |
83 | ||
84 | !#include "synonyms.h" | |
85 | ||
86 | ENTRY(__align_cpy_4) | |
87 | brz,pn %o2, .done ! Skip out if no bytes to copy. | |
88 | cmp %o0, %o1 | |
89 | be,pn %xcc, .done ! Addresses are identical--done. | |
90 | and %o0, 7, %o3 ! Is target 8-byte aligned? | |
91 | and %o1, 7, %o4 ! Is source 8-byte aligned? | |
92 | cmp %o3, %o4 | |
93 | bne,pt %icc, .noton8 ! Exactly one of source and target is | |
94 | mov %o0, %g1 ! 8-byte aligned. | |
95 | brz,pt %o3, .both8 ! Both are 8-byte aligned. | |
96 | nop | |
97 | ||
98 | ld [%o1], %o3 ! Neither is aligned, so do 4 bytes; | |
99 | subcc %o2, 4, %o2 ! then both will be aligned. | |
100 | st %o3, [%g1] | |
101 | bz,pn %xcc, .done | |
102 | add %g1, 4, %g1 | |
103 | b .both8 | |
104 | add %o1, 4, %o1 | |
105 | ||
106 | ! Section of code dealing with case where source and target are both 8-byte | |
107 | ! aligned. Get and store 16 bytes at a time using ldx and stx. | |
108 | ||
109 | .align 32 | |
110 | .both8: ! Both source and target are aligned. | |
111 | cmp %o2, 16 | |
112 | bl,a,pn %xcc, .chkwd | |
113 | cmp %o2, 8 | |
114 | ||
115 | sub %o2, 12, %o2 | |
116 | .loop16a: ! Load and store 16 bytes at a time. | |
117 | ldx [%o1], %o3 | |
118 | ldx [%o1+8], %o4 | |
119 | subcc %o2, 16, %o2 | |
120 | stx %o3, [%g1] | |
121 | stx %o4, [%g1+8] | |
122 | add %o1, 16, %o1 | |
123 | bg,pt %xcc, .loop16a ! Have at least 16 bytes left. | |
124 | add %g1, 16, %g1 | |
125 | ||
126 | addcc %o2, 12, %o2 | |
127 | bg,a,pt %xcc, .chkwd ! Have some remaining bytes. | |
128 | cmp %o2, 8 | |
129 | retl | |
130 | nop | |
131 | ||
132 | .chkwd: | |
133 | bl,a,pn %xcc, .wrword ! Only 4 bytes left. | |
134 | ld [%o1], %o3 | |
135 | ||
136 | ldx [%o1], %o3 ! Have 8 or 12, so do 8. | |
137 | stx %o3, [%g1] | |
138 | add %o1, 8, %o1 | |
139 | add %g1, 8, %g1 | |
140 | subcc %o2, 8, %o2 | |
141 | bg,a,pn %xcc, .wrword ! Still have four to do. | |
142 | ld [%o1], %o3 | |
143 | ||
144 | retl | |
145 | nop | |
146 | ||
147 | .wrword: ! Copy final word. | |
148 | st %o3, [%g1] | |
149 | ||
150 | .done: | |
151 | retl | |
152 | nop | |
153 | ||
154 | ! Section of code where either source or target, but not both, are 8-byte | |
155 | ! aligned. So, use ld and st instructions rather than trying to copy stuff | |
156 | ! around in registers. | |
157 | ||
158 | .align 32 ! Ultra cache line boundary. | |
159 | .noton8: | |
160 | add %o1, %o2, %g5 ! Ending address of source. | |
161 | andcc %o2, 15, %o3 ! Mod 16 of number of bytes to copy. | |
162 | bz,pn %xcc, .loop16 ! Copy odd amounts first, then multiples of 16. | |
163 | cmp %o3, 4 | |
164 | bz,pn %xcc, .mod4 | |
165 | cmp %o3, 8 | |
166 | bz,pn %xcc, .mod8 | |
167 | cmp %o3, 12 | |
168 | bz,pt %xcc, .mod12 | |
169 | nop | |
170 | illtrap 0 ! Size not valid. | |
171 | ||
172 | .mod4: ! Do first 4 bytes, then do multiples of 16. | |
173 | lduw [%o1], %o2 | |
174 | add %o1, 4, %o1 | |
175 | st %o2, [%g1] | |
176 | cmp %o1, %g5 | |
177 | bl,a,pt %xcc, .loop16 | |
178 | add %g1, 4, %g1 | |
179 | retl | |
180 | nop | |
181 | .mod8: ! Do first 8 bytes, then do multiples of 16. | |
182 | lduw [%o1], %o2 | |
183 | lduw [%o1+4], %o3 | |
184 | add %o1, 8, %o1 | |
185 | st %o2, [%g1] | |
186 | st %o3, [%g1+4] | |
187 | cmp %o1, %g5 | |
188 | bl,a,pt %xcc, .loop16 | |
189 | add %g1, 8, %g1 | |
190 | retl | |
191 | nop | |
192 | .mod12: ! Do first 12 bytes, then do multiples of 16. | |
193 | lduw [%o1], %o2 | |
194 | lduw [%o1+4], %o3 | |
195 | lduw [%o1+8], %o4 | |
196 | add %o1, 12, %o1 | |
197 | st %o2, [%g1] | |
198 | st %o3, [%g1+4] | |
199 | st %o4, [%g1+8] | |
200 | cmp %o1, %g5 | |
201 | bl,a,pt %xcc, .loop16 | |
202 | add %g1, 12, %g1 | |
203 | retl | |
204 | nop | |
205 | .align 32 ! Ultra cache line boundary. | |
206 | .loop16: ! Do multiples of 16 bytes. | |
207 | lduw [%o1], %o2 | |
208 | lduw [%o1+4], %o3 | |
209 | lduw [%o1+8], %o4 | |
210 | lduw [%o1+12], %o5 | |
211 | add %o1, 16, %o1 | |
212 | st %o2, [%g1] | |
213 | st %o3, [%g1+4] | |
214 | cmp %o1, %g5 | |
215 | st %o4, [%g1+8] | |
216 | st %o5, [%g1+12] | |
217 | bl,a,pt %xcc, .loop16 | |
218 | add %g1, 16,%g1 | |
219 | retl ! Target address is already in o0. | |
220 | nop | |
221 | ||
222 | SET_SIZE(__align_cpy_4) |