Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / hypervisor / src / sample / lib / __align_cpy_4.s
CommitLineData
920dae64
AT
1/*
2* ========== Copyright Header Begin ==========================================
3*
4* Hypervisor Software File: __align_cpy_4.s
5*
6* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
7*
8* - Do no alter or remove copyright notices
9*
10* - Redistribution and use of this software in source and binary forms, with
11* or without modification, are permitted provided that the following
12* conditions are met:
13*
14* - Redistribution of source code must retain the above copyright notice,
15* this list of conditions and the following disclaimer.
16*
17* - Redistribution in binary form must reproduce the above copyright notice,
18* this list of conditions and the following disclaimer in the
19* documentation and/or other materials provided with the distribution.
20*
21* Neither the name of Sun Microsystems, Inc. or the names of contributors
22* may be used to endorse or promote products derived from this software
23* without specific prior written permission.
24*
25* This software is provided "AS IS," without a warranty of any kind.
26* ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
27* INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
28* PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN
29* MICROSYSTEMS, INC. ("SUN") AND ITS LICENSORS SHALL NOT BE LIABLE FOR
30* ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
31* DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. IN NO EVENT WILL SUN
32* OR ITS LICENSORS BE LIABLE FOR ANY LOST REVENUE, PROFIT OR DATA, OR
33* FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE
34* DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY,
35* ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, EVEN IF
36* SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
37*
38* You acknowledge that this software is not designed, licensed or
39* intended for use in the design, construction, operation or maintenance of
40* any nuclear facility.
41*
42* ========== Copyright Header End ============================================
43*/
44/*
45 * Copyright (c) 1997, Sun Microsystems, Inc.
46 * All rights reserved.
47 */
48
49.ident "@(#)__align_cpy_4.s 1.1 97/02/10 SMI"
50
51 .file "__align_cpy_4.s"
52
53/* __align_cpy_4(s1, s2, n)
54 *
55 * Copy 4-byte aligned source to 4-byte aligned target in multiples of 4 bytes.
56 *
57 * Input:
58 * o0 address of target
59 * o1 address of source
60 * o2 number of bytes to copy (must be a multiple of 4)
61 * Output:
62 * o0 address of target
63 * Caller's registers that have been changed by this function:
64 * o1-o5, g1, g5
65 *
66 * Note:
67 * This helper routine will not be used by any 32-bit compilations.
68 * To do so would break binary compatibility with previous versions of
69 * Solaris.
70 *
71 * Assumptions:
72 * Source and target addresses are 4-byte aligned.
73 * Bytes to be copied are non-overlapping or _exactly_ overlapping.
74 * The number of bytes to be copied is a multiple of 4.
75 * Call will usually be made with a byte count of more than 4*4 and
76 * less than a few hundred bytes. Legal values are 0 to MAX_SIZE_T.
77 *
78 * Optimization attempt:
79 * Reasonable speed for a generic v9.
80 */
81
82#include <sys/asm_linkage.h>
83
84!#include "synonyms.h"
85
86 ENTRY(__align_cpy_4)
87 brz,pn %o2, .done ! Skip out if no bytes to copy.
88 cmp %o0, %o1
89 be,pn %xcc, .done ! Addresses are identical--done.
90 and %o0, 7, %o3 ! Is target 8-byte aligned?
91 and %o1, 7, %o4 ! Is source 8-byte aligned?
92 cmp %o3, %o4
93 bne,pt %icc, .noton8 ! Exactly one of source and target is
94 mov %o0, %g1 ! 8-byte aligned.
95 brz,pt %o3, .both8 ! Both are 8-byte aligned.
96 nop
97
98 ld [%o1], %o3 ! Neither is aligned, so do 4 bytes;
99 subcc %o2, 4, %o2 ! then both will be aligned.
100 st %o3, [%g1]
101 bz,pn %xcc, .done
102 add %g1, 4, %g1
103 b .both8
104 add %o1, 4, %o1
105
106! Section of code dealing with case where source and target are both 8-byte
107! aligned. Get and store 16 bytes at a time using ldx and stx.
108
109 .align 32
110.both8: ! Both source and target are aligned.
111 cmp %o2, 16
112 bl,a,pn %xcc, .chkwd
113 cmp %o2, 8
114
115 sub %o2, 12, %o2
116.loop16a: ! Load and store 16 bytes at a time.
117 ldx [%o1], %o3
118 ldx [%o1+8], %o4
119 subcc %o2, 16, %o2
120 stx %o3, [%g1]
121 stx %o4, [%g1+8]
122 add %o1, 16, %o1
123 bg,pt %xcc, .loop16a ! Have at least 16 bytes left.
124 add %g1, 16, %g1
125
126 addcc %o2, 12, %o2
127 bg,a,pt %xcc, .chkwd ! Have some remaining bytes.
128 cmp %o2, 8
129 retl
130 nop
131
132.chkwd:
133 bl,a,pn %xcc, .wrword ! Only 4 bytes left.
134 ld [%o1], %o3
135
136 ldx [%o1], %o3 ! Have 8 or 12, so do 8.
137 stx %o3, [%g1]
138 add %o1, 8, %o1
139 add %g1, 8, %g1
140 subcc %o2, 8, %o2
141 bg,a,pn %xcc, .wrword ! Still have four to do.
142 ld [%o1], %o3
143
144 retl
145 nop
146
147.wrword: ! Copy final word.
148 st %o3, [%g1]
149
150.done:
151 retl
152 nop
153
154! Section of code where either source or target, but not both, are 8-byte
155! aligned. So, use ld and st instructions rather than trying to copy stuff
156! around in registers.
157
158 .align 32 ! Ultra cache line boundary.
159.noton8:
160 add %o1, %o2, %g5 ! Ending address of source.
161 andcc %o2, 15, %o3 ! Mod 16 of number of bytes to copy.
162 bz,pn %xcc, .loop16 ! Copy odd amounts first, then multiples of 16.
163 cmp %o3, 4
164 bz,pn %xcc, .mod4
165 cmp %o3, 8
166 bz,pn %xcc, .mod8
167 cmp %o3, 12
168 bz,pt %xcc, .mod12
169 nop
170 illtrap 0 ! Size not valid.
171
172.mod4: ! Do first 4 bytes, then do multiples of 16.
173 lduw [%o1], %o2
174 add %o1, 4, %o1
175 st %o2, [%g1]
176 cmp %o1, %g5
177 bl,a,pt %xcc, .loop16
178 add %g1, 4, %g1
179 retl
180 nop
181.mod8: ! Do first 8 bytes, then do multiples of 16.
182 lduw [%o1], %o2
183 lduw [%o1+4], %o3
184 add %o1, 8, %o1
185 st %o2, [%g1]
186 st %o3, [%g1+4]
187 cmp %o1, %g5
188 bl,a,pt %xcc, .loop16
189 add %g1, 8, %g1
190 retl
191 nop
192.mod12: ! Do first 12 bytes, then do multiples of 16.
193 lduw [%o1], %o2
194 lduw [%o1+4], %o3
195 lduw [%o1+8], %o4
196 add %o1, 12, %o1
197 st %o2, [%g1]
198 st %o3, [%g1+4]
199 st %o4, [%g1+8]
200 cmp %o1, %g5
201 bl,a,pt %xcc, .loop16
202 add %g1, 12, %g1
203 retl
204 nop
205 .align 32 ! Ultra cache line boundary.
206.loop16: ! Do multiples of 16 bytes.
207 lduw [%o1], %o2
208 lduw [%o1+4], %o3
209 lduw [%o1+8], %o4
210 lduw [%o1+12], %o5
211 add %o1, 16, %o1
212 st %o2, [%g1]
213 st %o3, [%g1+4]
214 cmp %o1, %g5
215 st %o4, [%g1+8]
216 st %o5, [%g1+12]
217 bl,a,pt %xcc, .loop16
218 add %g1, 16,%g1
219 retl ! Target address is already in o0.
220 nop
221
222 SET_SIZE(__align_cpy_4)