Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | /* |
2 | * ========== Copyright Header Begin ========================================== | |
3 | * | |
4 | * OpenSPARC T2 Processor File: tso_n1_binit3.s | |
5 | * Copyright (C) 1995-2007 Sun Microsystems, Inc. All Rights Reserved | |
6 | * 4150 Network Circle, Santa Clara, California 95054, U.S.A. | |
7 | * | |
8 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of the GNU General Public License as published by | |
12 | * the Free Software Foundation; version 2 of the License. | |
13 | * | |
14 | * This program is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | * GNU General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU General Public License | |
20 | * along with this program; if not, write to the Free Software | |
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
22 | * | |
23 | * For the avoidance of doubt, and except that if any non-GPL license | |
24 | * choice is available it will apply instead, Sun elects to use only | |
25 | * the General Public License version 2 (GPLv2) at this time for any | |
26 | * software where a choice of GPL license versions is made | |
27 | * available with the language indicating that GPLv2 or any later version | |
28 | * may be used, or where a choice of which version of the GPL is applied is | |
29 | * otherwise unspecified. | |
30 | * | |
31 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
32 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
33 | * have any questions. | |
34 | * | |
35 | * | |
36 | * ========== Copyright Header End ============================================ | |
37 | */ | |
38 | #define srcaddr %i0 | |
39 | #define dstaddr %i1 | |
40 | #define count %i2 | |
41 | ||
42 | #define ASI_BLK_P 0xf0 | |
43 | ||
44 | #include "hboot.s" | |
45 | ||
46 | .global main | |
47 | ||
48 | .text | |
49 | main: | |
50 | wr %g0, 0x4, %fprs /* make sure fef is 1 */ | |
51 | ta T_CHANGE_PRIV | |
52 | ||
53 | th_fork(th_main,%l0) | |
54 | ||
55 | th_main_0: | |
56 | ||
57 | setx in_stream, %g2, srcaddr | |
58 | setx out_stream, %g2, dstaddr | |
59 | setx 0x100, %g2, count | |
60 | ||
61 | timing_loop0: | |
62 | ldda [srcaddr] 0x22, %l0 | |
63 | add srcaddr, 0x10, srcaddr | |
64 | ldda [srcaddr] 0x22, %l2 | |
65 | add srcaddr, 0x10, srcaddr | |
66 | ldda [srcaddr] 0x22, %l4 | |
67 | add srcaddr, 0x10, srcaddr | |
68 | ldda [srcaddr] 0x22, %l6 | |
69 | add srcaddr, 0x10, srcaddr | |
70 | stxa %l0, [dstaddr] 0x22 | |
71 | add dstaddr, 0x8, dstaddr | |
72 | stxa %l1, [dstaddr] 0x22 | |
73 | add dstaddr, 0x8, dstaddr | |
74 | stxa %l2, [dstaddr] 0x22 | |
75 | add dstaddr, 0x8, dstaddr | |
76 | stxa %l3, [dstaddr] 0x22 | |
77 | add dstaddr, 0x8, dstaddr | |
78 | stxa %l4, [dstaddr] 0x22 | |
79 | add dstaddr, 0x8, dstaddr | |
80 | stxa %l5, [dstaddr] 0x22 | |
81 | add dstaddr, 0x8, dstaddr | |
82 | stxa %l6, [dstaddr] 0x22 | |
83 | add dstaddr, 0x8, dstaddr | |
84 | stxa %l7, [dstaddr] 0x22 | |
85 | add dstaddr, 0x8, dstaddr | |
86 | ||
87 | deccc count | |
88 | bg,pt %xcc,timing_loop0 | |
89 | nop | |
90 | ||
91 | ta GOOD_TRAP | |
92 | ||
93 | th_main_1: | |
94 | setx in_stream1, %g2, srcaddr | |
95 | setx out_stream1, %g2, dstaddr | |
96 | alignaddr srcaddr, %g0, srcaddr | |
97 | ||
98 | ! COUNT bytes to copy | |
99 | set 128, count | |
100 | mulx count, 8*8, count | |
101 | ||
102 | ||
103 | prefetch [srcaddr + 0*64], 1 | |
104 | prefetch [srcaddr + 1*64], 1 | |
105 | prefetch [srcaddr + 2*64], 1 | |
106 | prefetch [srcaddr + 3*64], 1 | |
107 | ldd [srcaddr + 0*8], %f0 | |
108 | prefetch [srcaddr + 4*64], 1 | |
109 | ldd [srcaddr + 1*8], %f2 | |
110 | ldd [srcaddr + 2*8], %f4 | |
111 | faligndata %f0, %f2, %f32 | |
112 | ldd [srcaddr + 3*8], %f6 | |
113 | faligndata %f2, %f4, %f34 | |
114 | ldd [srcaddr + 4*8], %f8 | |
115 | faligndata %f4, %f6, %f36 | |
116 | ldd [srcaddr + 5*8], %f10 | |
117 | faligndata %f6, %f8, %f38 | |
118 | ldd [srcaddr + 6*8], %f12 | |
119 | faligndata %f8, %f10, %f40 | |
120 | ldd [srcaddr + 7*8], %f14 | |
121 | faligndata %f10, %f12, %f42 | |
122 | ldd [srcaddr + 8*8], %f16 | |
123 | subcc count, 64, count | |
124 | add srcaddr, 64, srcaddr | |
125 | ||
126 | timing_loop1: | |
127 | fmovd %f16, %f0 | |
128 | ldd [srcaddr + 1*8], %f2 | |
129 | faligndata %f12, %f14, %f44 | |
130 | ldd [srcaddr + 2*8], %f4 | |
131 | faligndata %f14, %f0, %f46 | |
132 | stda %f32, [dstaddr]ASI_BLK_P | |
133 | ldd [srcaddr + 3*8], %f6 | |
134 | faligndata %f0, %f2, %f32 | |
135 | ldd [srcaddr + 4*8], %f8 | |
136 | faligndata %f2, %f4, %f34 | |
137 | ldd [srcaddr + 5*8], %f10 | |
138 | faligndata %f4, %f6, %f36 | |
139 | ldd [srcaddr + 6*8], %f12 | |
140 | faligndata %f6, %f8, %f38 | |
141 | ldd [srcaddr + 7*8], %f14 | |
142 | faligndata %f8, %f10, %f40 | |
143 | ldd [srcaddr + 8*8], %f16 | |
144 | prefetch [srcaddr + 64], 1 | |
145 | faligndata %f10, %f12, %f42 | |
146 | subcc count, 64, count | |
147 | add dstaddr, 64, dstaddr | |
148 | bg,pt %xcc,timing_loop1 | |
149 | add srcaddr, 64, srcaddr | |
150 | tidy_up: ! should handle the remaining partial block here | |
151 | fmovd %f16, %f0 | |
152 | faligndata %f12, %f14, %f44 | |
153 | faligndata %f14, %f0, %f46 | |
154 | stda %f32, [dstaddr]ASI_BLK_P | |
155 | membar #Sync | |
156 | ||
157 | ta GOOD_TRAP | |
158 | ||
159 | th_main_2: | |
160 | ||
161 | setx in_stream2, %g2, srcaddr | |
162 | setx out_stream2, %g2, dstaddr | |
163 | setx 0x100, %g2, count | |
164 | ||
165 | timing_loop2: | |
166 | ldda [srcaddr] 0x22, %l0 | |
167 | add srcaddr, 0x10, srcaddr | |
168 | ldda [srcaddr] 0x22, %l2 | |
169 | add srcaddr, 0x10, srcaddr | |
170 | ldda [srcaddr] 0x22, %l4 | |
171 | add srcaddr, 0x10, srcaddr | |
172 | ldda [srcaddr] 0x22, %l6 | |
173 | add srcaddr, 0x10, srcaddr | |
174 | stx %l0, [dstaddr] | |
175 | stx %l1, [dstaddr + 0x8] | |
176 | stx %l4, [dstaddr + 0x10] | |
177 | stx %l3, [dstaddr + 0x18] | |
178 | stx %l4, [dstaddr + 0x20] | |
179 | stx %l5, [dstaddr + 0x28] | |
180 | stx %l6, [dstaddr + 0x30] | |
181 | stx %l7, [dstaddr + 0x38] | |
182 | ||
183 | deccc count | |
184 | bg,pt %xcc,timing_loop2 | |
185 | add dstaddr, 0x40, dstaddr | |
186 | ||
187 | ta GOOD_TRAP | |
188 | ||
189 | ||
190 | th_main_3: | |
191 | ||
192 | setx in_stream3, %g2, srcaddr | |
193 | setx out_stream3, %g2, dstaddr | |
194 | setx 0x100, %g2, count | |
195 | ||
196 | timing_loop3: | |
197 | ldda [srcaddr] 0x22, %l0 | |
198 | add srcaddr, 0x10, srcaddr | |
199 | ldda [srcaddr] 0x22, %l2 | |
200 | add srcaddr, 0x10, srcaddr | |
201 | ldda [srcaddr] 0x22, %l4 | |
202 | add srcaddr, 0x10, srcaddr | |
203 | ldda [srcaddr] 0x22, %l6 | |
204 | add srcaddr, 0x10, srcaddr | |
205 | stxa %l0, [dstaddr] 0x22 | |
206 | add dstaddr, 0x8, dstaddr | |
207 | stxa %l1, [dstaddr] 0x22 | |
208 | add dstaddr, 0x8, dstaddr | |
209 | stxa %l2, [dstaddr] 0x22 | |
210 | add dstaddr, 0x8, dstaddr | |
211 | stxa %l3, [dstaddr] 0x22 | |
212 | add dstaddr, 0x8, dstaddr | |
213 | stxa %l4, [dstaddr] 0x22 | |
214 | add dstaddr, 0x8, dstaddr | |
215 | stxa %l5, [dstaddr] 0x22 | |
216 | add dstaddr, 0x8, dstaddr | |
217 | stxa %l6, [dstaddr] 0x22 | |
218 | add dstaddr, 0x8, dstaddr | |
219 | stxa %l7, [dstaddr] 0x22 | |
220 | add dstaddr, 0x8, dstaddr | |
221 | ||
222 | deccc count | |
223 | bg,pt %xcc,timing_loop3 | |
224 | nop | |
225 | ||
226 | ta GOOD_TRAP | |
227 | ||
228 | th_main_4: | |
229 | ||
230 | setx in_stream4, %g2, srcaddr | |
231 | setx out_stream4, %g2, dstaddr | |
232 | setx 0x100, %g2, count | |
233 | ||
234 | ||
235 | timing_loop4: | |
236 | ldda [srcaddr] 0x22, %l0 | |
237 | add srcaddr, 0x10, srcaddr | |
238 | ldda [srcaddr] 0x22, %l2 | |
239 | add srcaddr, 0x10, srcaddr | |
240 | ldda [srcaddr] 0x22, %l4 | |
241 | add srcaddr, 0x10, srcaddr | |
242 | ldda [srcaddr] 0x22, %l6 | |
243 | add srcaddr, 0x10, srcaddr | |
244 | stxa %l0, [dstaddr] 0x22 | |
245 | add dstaddr, 0x8, dstaddr | |
246 | stxa %l1, [dstaddr] 0x22 | |
247 | add dstaddr, 0x8, dstaddr | |
248 | stxa %l2, [dstaddr] 0x22 | |
249 | add dstaddr, 0x8, dstaddr | |
250 | stxa %l3, [dstaddr] 0x22 | |
251 | add dstaddr, 0x8, dstaddr | |
252 | stxa %l4, [dstaddr] 0x22 | |
253 | add dstaddr, 0x8, dstaddr | |
254 | stxa %l5, [dstaddr] 0x22 | |
255 | add dstaddr, 0x8, dstaddr | |
256 | stxa %l6, [dstaddr] 0x22 | |
257 | add dstaddr, 0x8, dstaddr | |
258 | stxa %l7, [dstaddr] 0x22 | |
259 | add dstaddr, 0x8, dstaddr | |
260 | ||
261 | deccc count | |
262 | bg,pt %xcc,timing_loop4 | |
263 | nop | |
264 | ||
265 | ta GOOD_TRAP | |
266 | ||
267 | th_main_5: | |
268 | ||
269 | setx out_stream, %g2, srcaddr | |
270 | setx 0xff * 0x40, %g2, %g3 | |
271 | add srcaddr, %g3, srcaddr | |
272 | setx 0x100, %g2, count | |
273 | ||
274 | timing_loop5: | |
275 | ldda [srcaddr] 0x2a, %l0 | |
276 | sub srcaddr, 0x10, srcaddr | |
277 | ldda [srcaddr] 0x2a, %l2 | |
278 | sub srcaddr, 0x10, srcaddr | |
279 | ldda [srcaddr] 0x2a, %l4 | |
280 | sub srcaddr, 0x10, srcaddr | |
281 | ldda [srcaddr] 0x2a, %l6 | |
282 | sub srcaddr, 0x10, srcaddr | |
283 | ||
284 | deccc count | |
285 | bg,pt %xcc,timing_loop5 | |
286 | nop | |
287 | ||
288 | ta GOOD_TRAP | |
289 | ||
290 | th_main_6: | |
291 | ||
292 | setx out_stream, %g2, srcaddr | |
293 | setx 0xff * 0x40, %g2, %g3 | |
294 | add srcaddr, %g3, srcaddr | |
295 | setx 0x100, %g2, count | |
296 | ||
297 | timing_loop6: | |
298 | ldx [srcaddr], %l0 | |
299 | sub srcaddr, 0x10, srcaddr | |
300 | ldx [srcaddr], %l2 | |
301 | sub srcaddr, 0x10, srcaddr | |
302 | ldx [srcaddr], %l4 | |
303 | sub srcaddr, 0x10, srcaddr | |
304 | ldx [srcaddr], %l6 | |
305 | sub srcaddr, 0x10, srcaddr | |
306 | ||
307 | deccc count | |
308 | bg,pt %xcc,timing_loop6 | |
309 | nop | |
310 | ||
311 | th_main_7: | |
312 | ||
313 | setx out_stream2, %g2, srcaddr | |
314 | setx 0xff * 0x40, %g2, %g3 | |
315 | add srcaddr, %g3, srcaddr | |
316 | setx 0x100, %g2, count | |
317 | ||
318 | timing_loop7: | |
319 | ldx [srcaddr], %l0 | |
320 | sub srcaddr, 0x10, srcaddr | |
321 | ldx [srcaddr], %l2 | |
322 | sub srcaddr, 0x10, srcaddr | |
323 | ldx [srcaddr], %l4 | |
324 | sub srcaddr, 0x10, srcaddr | |
325 | ldx [srcaddr], %l6 | |
326 | sub srcaddr, 0x10, srcaddr | |
327 | ||
328 | deccc count | |
329 | bg,pt %xcc,timing_loop7 | |
330 | nop | |
331 | ta GOOD_TRAP | |
332 | ||
333 | user_text_end: | |
334 | ||
335 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
336 | ||
337 | .global in_stream | |
338 | .global out_stream | |
339 | .global in_stream1 | |
340 | .global out_stream1 | |
341 | .global in_stream2 | |
342 | .global out_stream2 | |
343 | .global in_stream3 | |
344 | .global out_stream3 | |
345 | .global in_stream4 | |
346 | .global out_stream4 | |
347 | .data | |
348 | .align 0x1000 | |
349 | user_data_start: | |
350 | in_stream: | |
351 | .word 0xb1bababa | |
352 | .word 0xb2bababa | |
353 | .word 0xb3bababa | |
354 | .word 0xb4bababa | |
355 | .word 0xb5bababa | |
356 | .word 0xb6bababa | |
357 | .word 0xb7bababa | |
358 | .word 0xb8bababa | |
359 | .word 0xb9bababa | |
360 | .word 0xbabababa | |
361 | .word 0xbbbababa | |
362 | .word 0xbcbababa | |
363 | .word 0xbdbababa | |
364 | .word 0xbebababa | |
365 | .word 0xbfbababa | |
366 | .skip 16000 | |
367 | ||
368 | ! offset the out_stream block | |
369 | .align 0x1000 | |
370 | out_stream: | |
371 | .skip 16000 | |
372 | ||
373 | .align 0x1000 | |
374 | in_stream1: | |
375 | .word 0xb1bababa | |
376 | .word 0xb2bababa | |
377 | .word 0xb3bababa | |
378 | .word 0xb4bababa | |
379 | .word 0xb5bababa | |
380 | .word 0xb6bababa | |
381 | .word 0xb7bababa | |
382 | .word 0xb8bababa | |
383 | .word 0xb9bababa | |
384 | .word 0xbabababa | |
385 | .word 0xbbbababa | |
386 | .word 0xbcbababa | |
387 | .word 0xbdbababa | |
388 | .word 0xbebababa | |
389 | .word 0xbfbababa | |
390 | .skip 16000 | |
391 | ||
392 | ! offset the out_stream block | |
393 | .align 0x1000 | |
394 | out_stream1: | |
395 | .skip 16000 | |
396 | .align 0x1000 | |
397 | in_stream2: | |
398 | .word 0xb1bababa | |
399 | .word 0xb2bababa | |
400 | .word 0xb3bababa | |
401 | .word 0xb4bababa | |
402 | .word 0xb5bababa | |
403 | .word 0xb6bababa | |
404 | .word 0xb7bababa | |
405 | .word 0xb8bababa | |
406 | .word 0xb9bababa | |
407 | .word 0xbabababa | |
408 | .word 0xbbbababa | |
409 | .word 0xbcbababa | |
410 | .word 0xbdbababa | |
411 | .word 0xbebababa | |
412 | .word 0xbfbababa | |
413 | .skip 16000 | |
414 | ||
415 | ! offset the out_stream block | |
416 | .align 0x1000 | |
417 | out_stream2: | |
418 | .skip 16000 | |
419 | ||
420 | .align 0x1000 | |
421 | in_stream3: | |
422 | .word 0xb1bababa | |
423 | .word 0xb2bababa | |
424 | .word 0xb3bababa | |
425 | .word 0xb4bababa | |
426 | .word 0xb5bababa | |
427 | .word 0xb6bababa | |
428 | .word 0xb7bababa | |
429 | .word 0xb8bababa | |
430 | .word 0xb9bababa | |
431 | .word 0xbabababa | |
432 | .word 0xbbbababa | |
433 | .word 0xbcbababa | |
434 | .word 0xbdbababa | |
435 | .word 0xbebababa | |
436 | .word 0xbfbababa | |
437 | .skip 16000 | |
438 | ||
439 | ! offset the out_stream block | |
440 | .align 0x1000 | |
441 | out_stream3: | |
442 | .skip 16000 | |
443 | ||
444 | .align 0x1000 | |
445 | in_stream4: | |
446 | .word 0xb1bababa | |
447 | .word 0xb2bababa | |
448 | .word 0xb3bababa | |
449 | .word 0xb4bababa | |
450 | .word 0xb5bababa | |
451 | .word 0xb6bababa | |
452 | .word 0xb7bababa | |
453 | .word 0xb8bababa | |
454 | .word 0xb9bababa | |
455 | .word 0xbabababa | |
456 | .word 0xbbbababa | |
457 | .word 0xbcbababa | |
458 | .word 0xbdbababa | |
459 | .word 0xbebababa | |
460 | .word 0xbfbababa | |
461 | .skip 16000 | |
462 | ||
463 | ! offset the out_stream block | |
464 | .align 0x1000 | |
465 | out_stream4: | |
466 | .skip 16000 | |
467 | ||
468 | user_data_end: |