Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | /* |
2 | * ========== Copyright Header Begin ========================================== | |
3 | * | |
4 | * OpenSPARC T2 Processor File: n2_mcu_0_all_bcopy_all_banks.s | |
5 | * Copyright (C) 1995-2007 Sun Microsystems, Inc. All Rights Reserved | |
6 | * 4150 Network Circle, Santa Clara, California 95054, U.S.A. | |
7 | * | |
8 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of the GNU General Public License as published by | |
12 | * the Free Software Foundation; version 2 of the License. | |
13 | * | |
14 | * This program is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | * GNU General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU General Public License | |
20 | * along with this program; if not, write to the Free Software | |
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
22 | * | |
23 | * For the avoidance of doubt, and except that if any non-GPL license | |
24 | * choice is available it will apply instead, Sun elects to use only | |
25 | * the General Public License version 2 (GPLv2) at this time for any | |
26 | * software where a choice of GPL license versions is made | |
27 | * available with the language indicating that GPLv2 or any later version | |
28 | * may be used, or where a choice of which version of the GPL is applied is | |
29 | * otherwise unspecified. | |
30 | * | |
31 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
32 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
33 | * have any questions. | |
34 | * | |
35 | * | |
36 | * ========== Copyright Header End ============================================ | |
37 | */ | |
38 | #define srcaddr %i0 | |
39 | #define dstaddr %i1 | |
40 | #define count %i2 | |
41 | #define thread_offset %i3 | |
42 | #define ASI_BLK_P 0xf0 | |
43 | ||
44 | ! Define number of 64 byte subblocks to copy | |
45 | #define SBLKS 128 | |
46 | ||
47 | ! How far ahead is the prefetch stream (in subblocks) | |
48 | #define FETCH_AHEAD 8 | |
49 | ||
50 | #include "hboot.s" | |
51 | ||
52 | .global main | |
53 | ||
54 | .text | |
55 | main: | |
56 | wr %g0, 0x4, %fprs /* make sure fef is 1 */ | |
57 | ta %icc, T_RD_THID | |
58 | !thread in within core become fbd dimm addr bits | |
59 | and %o1, 7, %g2 | |
60 | sllx %g2, 34, thread_offset | |
61 | !core id bits become 16MB offset with fbd dimm | |
62 | and %o1, 0x38, %g2 | |
63 | sllx %g2, 22, %g2 | |
64 | or %g2, thread_offset, thread_offset | |
65 | setup_addresses: | |
66 | setx in_stream, %g2, srcaddr | |
67 | !setx out_stream, %g2, dstaddr | |
68 | setx 0x100000000, %g2, dstaddr | |
69 | add dstaddr, thread_offset, dstaddr | |
70 | alignaddr srcaddr, %g0, srcaddr | |
71 | ||
72 | ! COUNT bytes to copy | |
73 | !-------------------- | |
74 | set SBLKS, count | |
75 | ! mulx count, 8*8, count | |
76 | setx do_bcopy, %g2, %g3 | |
77 | jmpl %g3, %g0 | |
78 | rd %pc, %g7 | |
79 | ||
80 | /*{{{ 4->5*/ | |
81 | setx 0x100000000, %g2, srcaddr | |
82 | add srcaddr, thread_offset, srcaddr | |
83 | alignaddr srcaddr, %g0, srcaddr | |
84 | ||
85 | setx 0x100500000, %g2, dstaddr | |
86 | add dstaddr, thread_offset, dstaddr | |
87 | ||
88 | set SBLKS, count | |
89 | ! mulx count, 8*8, count | |
90 | ||
91 | setx do_bcopy, %g2, %g3 | |
92 | jmpl %g3, %g0 | |
93 | rd %pc, %g7 | |
94 | /*}}} */ | |
95 | /*{{{ 5->5.8*/ | |
96 | setx 0x100500000, %g2, srcaddr | |
97 | add srcaddr, thread_offset, srcaddr | |
98 | alignaddr srcaddr, %g0, srcaddr | |
99 | ||
100 | setx 0x100580000, %g2, dstaddr | |
101 | add dstaddr, thread_offset, dstaddr | |
102 | ||
103 | set SBLKS, count | |
104 | ! mulx count, 8*8, count | |
105 | ||
106 | setx do_bcopy, %g2, %g3 | |
107 | jmpl %g3, %g0 | |
108 | rd %pc, %g7 | |
109 | /*}}} */ | |
110 | /*{{{ 5.8->6*/ | |
111 | setx 0x100580000, %g2, srcaddr | |
112 | add srcaddr, thread_offset, srcaddr | |
113 | alignaddr srcaddr, %g0, srcaddr | |
114 | ||
115 | setx 0x100600000, %g2, dstaddr | |
116 | add dstaddr, thread_offset, dstaddr | |
117 | ||
118 | set SBLKS, count | |
119 | ! mulx count, 8*8, count | |
120 | ||
121 | setx do_bcopy, %g2, %g3 | |
122 | jmpl %g3, %g0 | |
123 | rd %pc, %g7 | |
124 | /*}}} */ | |
125 | /*{{{ 6->6.8*/ | |
126 | setx 0x100600000, %g2, srcaddr | |
127 | add srcaddr, thread_offset, srcaddr | |
128 | alignaddr srcaddr, %g0, srcaddr | |
129 | ||
130 | setx 0x100680000, %g2, dstaddr | |
131 | add dstaddr, thread_offset, dstaddr | |
132 | ||
133 | set SBLKS, count | |
134 | ! mulx count, 8*8, count | |
135 | ||
136 | setx do_bcopy, %g2, %g3 | |
137 | jmpl %g3, %g0 | |
138 | rd %pc, %g7 | |
139 | /*}}} */ | |
140 | /*{{{ 6.8>7*/ | |
141 | setx 0x100680000, %g2, srcaddr | |
142 | add srcaddr, thread_offset, srcaddr | |
143 | alignaddr srcaddr, %g0, srcaddr | |
144 | ||
145 | setx 0x100700000, %g2, dstaddr | |
146 | add dstaddr, thread_offset, dstaddr | |
147 | ||
148 | set SBLKS, count | |
149 | ! mulx count, 8*8, count | |
150 | ||
151 | setx do_bcopy, %g2, %g3 | |
152 | jmpl %g3, %g0 | |
153 | rd %pc, %g7 | |
154 | /*}}} */ | |
155 | ||
156 | trap: | |
157 | ||
158 | ta GOOD_TRAP | |
159 | ||
160 | do_bcopy: | |
161 | #ifdef FPU_COPY | |
162 | /*{{{ */ | |
163 | ! the initial part (preamble) of bcopy. | |
164 | !-------------------------------------- | |
165 | prefetch [srcaddr + 0*64], 1 | |
166 | prefetch [srcaddr + 1*64], 1 | |
167 | prefetch [srcaddr + 2*64], 1 | |
168 | prefetch [srcaddr + 3*64], 1 | |
169 | ldd [srcaddr + 0*8], %f0 | |
170 | prefetch [srcaddr + 4*64], 1 | |
171 | #if FETCH_AHEAD>=4 | |
172 | prefetch [srcaddr + (4+1)*64], 1 | |
173 | #endif | |
174 | #if FETCH_AHEAD>=5 | |
175 | prefetch [srcaddr + (5+1)*64], 1 | |
176 | #endif | |
177 | #if FETCH_AHEAD>=6 | |
178 | prefetch [srcaddr + (6+1)*64], 1 | |
179 | #endif | |
180 | #if FETCH_AHEAD>=7 | |
181 | prefetch [srcaddr + (7+1)*64], 1 | |
182 | #endif | |
183 | ldd [srcaddr + 1*8], %f2 | |
184 | ldd [srcaddr + 2*8], %f4 | |
185 | faligndata %f0, %f2, %f32 | |
186 | ldd [srcaddr + 3*8], %f6 | |
187 | faligndata %f2, %f4, %f34 | |
188 | ldd [srcaddr + 4*8], %f8 | |
189 | faligndata %f4, %f6, %f36 | |
190 | ldd [srcaddr + 5*8], %f10 | |
191 | faligndata %f6, %f8, %f38 | |
192 | ldd [srcaddr + 6*8], %f12 | |
193 | faligndata %f8, %f10, %f40 | |
194 | ldd [srcaddr + 7*8], %f14 | |
195 | faligndata %f10, %f12, %f42 | |
196 | ldd [srcaddr + 8*8], %f16 | |
197 | #if FETCH_AHEAD>=8 | |
198 | prefetch [srcaddr + (8+1)*64], 1 | |
199 | #endif | |
200 | subcc count, 64, count | |
201 | be,pn %xcc,tidy_up | |
202 | add srcaddr, 64, srcaddr | |
203 | ||
204 | ! the loop (the essence of bcopy) | |
205 | !-------------------------------- | |
206 | timing_loop: | |
207 | fmovd %f16, %f0 | |
208 | ldd [srcaddr + 1*8], %f2 | |
209 | faligndata %f12, %f14, %f44 | |
210 | ldd [srcaddr + 2*8], %f4 | |
211 | faligndata %f14, %f0, %f46 | |
212 | stda %f32, [dstaddr]ASI_BLK_P | |
213 | ldd [srcaddr + 3*8], %f6 | |
214 | faligndata %f0, %f2, %f32 | |
215 | ldd [srcaddr + 4*8], %f8 | |
216 | faligndata %f2, %f4, %f34 | |
217 | ldd [srcaddr + 5*8], %f10 | |
218 | faligndata %f4, %f6, %f36 | |
219 | ldd [srcaddr + 6*8], %f12 | |
220 | faligndata %f6, %f8, %f38 | |
221 | ldd [srcaddr + 7*8], %f14 | |
222 | faligndata %f8, %f10, %f40 | |
223 | ldd [srcaddr + 8*8], %f16 | |
224 | prefetch [srcaddr + (FETCH_AHEAD+1)*64], 1 | |
225 | faligndata %f10, %f12, %f42 | |
226 | subcc count, 64, count | |
227 | add dstaddr, 64, dstaddr | |
228 | bg,pt %xcc,timing_loop | |
229 | add srcaddr, 64, srcaddr | |
230 | ||
231 | ! the last part of bcopy | |
232 | ! should handle the remaining partial block here | |
233 | !----------------------------------------------- | |
234 | tidy_up: | |
235 | fmovd %f16, %f0 | |
236 | faligndata %f12, %f14, %f44 | |
237 | faligndata %f14, %f0, %f46 | |
238 | stda %f32, [dstaddr]ASI_BLK_P | |
239 | membar #Sync | |
240 | /*}}} */ | |
241 | #else | |
242 | /*{{{ */ | |
243 | timing_loop0: | |
244 | ldda [srcaddr] 0xe2, %l0 | |
245 | add srcaddr, 0x10, srcaddr | |
246 | ldda [srcaddr] 0xe2, %l2 | |
247 | add srcaddr, 0x10, srcaddr | |
248 | ldda [srcaddr] 0xe2, %l4 | |
249 | add srcaddr, 0x10, srcaddr | |
250 | ldda [srcaddr] 0xe2, %l6 | |
251 | add srcaddr, 0x10, srcaddr | |
252 | stxa %l0, [dstaddr] 0xe2 | |
253 | add dstaddr, 0x8, dstaddr | |
254 | stxa %l1, [dstaddr] 0xe2 | |
255 | add dstaddr, 0x8, dstaddr | |
256 | stxa %l2, [dstaddr] 0xe2 | |
257 | add dstaddr, 0x8, dstaddr | |
258 | stxa %l3, [dstaddr] 0xe2 | |
259 | add dstaddr, 0x8, dstaddr | |
260 | stxa %l4, [dstaddr] 0xe2 | |
261 | add dstaddr, 0x8, dstaddr | |
262 | stxa %l5, [dstaddr] 0xe2 | |
263 | add dstaddr, 0x8, dstaddr | |
264 | stxa %l6, [dstaddr] 0xe2 | |
265 | add dstaddr, 0x8, dstaddr | |
266 | stxa %l7, [dstaddr] 0xe2 | |
267 | add dstaddr, 0x8, dstaddr | |
268 | ||
269 | subcc count, 64, count | |
270 | bg,pt %xcc,timing_loop0 | |
271 | nop | |
272 | /*}}} */ | |
273 | #endif | |
274 | ||
275 | jmp %g7 | |
276 | nop | |
277 | ||
278 | user_text_end: | |
279 | ||
280 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
281 | ||
282 | .global in_stream | |
283 | .global out_stream | |
284 | ||
285 | .data | |
286 | .align 0x40 | |
287 | user_data_start: | |
288 | ||
289 | .skip 0x40 | |
290 | in_stream: | |
291 | init_mem(0x20100000, SBLKS*16, 4, +, 0, +, 0x01010001) | |
292 | ||
293 | ! offset the out_stream block | |
294 | .align 0x1000 | |
295 | .skip 1024 | |
296 | .skip 192 | |
297 | out_stream: | |
298 | init_mem(0x11111111, SBLKS*16, 4, +, 0, +, 0) | |
299 | ||
300 | user_data_end: | |
301 | ||
302 | ||
303 | ||
304 | /*{{{ 0*/ | |
305 | SECTION data_page0 DATA_VA=0x100000000 | |
306 | attr_data { | |
307 | Name = data_page0, | |
308 | VA=0x100000000, | |
309 | RA=0x100000000, | |
310 | PA=ra2pa(0x100000000,0), | |
311 | part_0_ctx_nonzero_tsb_config_0, | |
312 | TTE_G=1, | |
313 | TTE_Context=PCONTEXT, | |
314 | TTE_V=1, | |
315 | TTE_Size=5, | |
316 | TTE_SIZE_PTR=0, | |
317 | TTE_NFO=0, | |
318 | TTE_IE=0, | |
319 | TTE_Soft2=0, | |
320 | TTE_Diag=0, | |
321 | TTE_Soft=0, | |
322 | TTE_L=0, | |
323 | TTE_CP=1, | |
324 | TTE_CV=1, | |
325 | TTE_E=0, | |
326 | TTE_P=0, | |
327 | TTE_W=1 | |
328 | } | |
329 | .data | |
330 | .skip 1024 | |
331 | .word 0x0 | |
332 | .skip 1024 | |
333 | /*}}} */ | |
334 | /*{{{ 1*/ | |
335 | SECTION data_page1 DATA_VA=0x500000000 | |
336 | attr_data { | |
337 | Name = data_page1, | |
338 | VA=0x500000000, | |
339 | RA=0x500000000, | |
340 | PA=ra2pa(0x500000000,0), | |
341 | part_0_ctx_nonzero_tsb_config_0, | |
342 | TTE_G=1, | |
343 | TTE_Context=PCONTEXT, | |
344 | TTE_V=1, | |
345 | TTE_Size=5, | |
346 | TTE_SIZE_PTR=0, | |
347 | TTE_NFO=0, | |
348 | TTE_IE=0, | |
349 | TTE_Soft2=0, | |
350 | TTE_Diag=0, | |
351 | TTE_Soft=0, | |
352 | TTE_L=0, | |
353 | TTE_CP=1, | |
354 | TTE_CV=1, | |
355 | TTE_E=0, | |
356 | TTE_P=0, | |
357 | TTE_W=1 | |
358 | } | |
359 | .data | |
360 | .skip 1024 | |
361 | .word 0x0 | |
362 | .skip 1024 | |
363 | /*}}} */ | |
364 | /*{{{ 2*/ | |
365 | SECTION data_page2 DATA_VA=0x900000000 | |
366 | attr_data { | |
367 | Name = data_page2, | |
368 | VA=0x900000000, | |
369 | RA=0x900000000, | |
370 | PA=ra2pa(0x900000000,0), | |
371 | part_0_ctx_nonzero_tsb_config_0, | |
372 | TTE_G=1, | |
373 | TTE_Context=PCONTEXT, | |
374 | TTE_V=1, | |
375 | TTE_Size=5, | |
376 | TTE_SIZE_PTR=0, | |
377 | TTE_NFO=0, | |
378 | TTE_IE=0, | |
379 | TTE_Soft2=0, | |
380 | TTE_Diag=0, | |
381 | TTE_Soft=0, | |
382 | TTE_L=0, | |
383 | TTE_CP=1, | |
384 | TTE_CV=1, | |
385 | TTE_E=0, | |
386 | TTE_P=0, | |
387 | TTE_W=1 | |
388 | } | |
389 | .data | |
390 | .skip 1024 | |
391 | .word 0x0 | |
392 | .skip 1024 | |
393 | /*}}} */ | |
394 | /*{{{ 3*/ | |
395 | SECTION data_page3 DATA_VA=0xd00000000 | |
396 | attr_data { | |
397 | Name = data_page3, | |
398 | VA=0xd00000000, | |
399 | RA=0xd00000000, | |
400 | PA=ra2pa(0xd00000000,0), | |
401 | part_0_ctx_nonzero_tsb_config_0, | |
402 | TTE_G=1, | |
403 | TTE_Context=PCONTEXT, | |
404 | TTE_V=1, | |
405 | TTE_Size=5, | |
406 | TTE_SIZE_PTR=0, | |
407 | TTE_NFO=0, | |
408 | TTE_IE=0, | |
409 | TTE_Soft2=0, | |
410 | TTE_Diag=0, | |
411 | TTE_Soft=0, | |
412 | TTE_L=0, | |
413 | TTE_CP=1, | |
414 | TTE_CV=1, | |
415 | TTE_E=0, | |
416 | TTE_P=0, | |
417 | TTE_W=1 | |
418 | } | |
419 | .data | |
420 | .skip 1024 | |
421 | .word 0x0 | |
422 | .skip 1024 | |
423 | /*}}} */ | |
424 | /*{{{ 4*/ | |
425 | SECTION data_page4 DATA_VA=0x1100000000 | |
426 | attr_data { | |
427 | Name = data_page4, | |
428 | VA=0x1100000000, | |
429 | RA=0x1100000000, | |
430 | PA=ra2pa(0x1100000000,0), | |
431 | part_0_ctx_nonzero_tsb_config_0, | |
432 | TTE_G=1, | |
433 | TTE_Context=PCONTEXT, | |
434 | TTE_V=1, | |
435 | TTE_Size=5, | |
436 | TTE_SIZE_PTR=0, | |
437 | TTE_NFO=0, | |
438 | TTE_IE=0, | |
439 | TTE_Soft2=0, | |
440 | TTE_Diag=0, | |
441 | TTE_Soft=0, | |
442 | TTE_L=0, | |
443 | TTE_CP=1, | |
444 | TTE_CV=1, | |
445 | TTE_E=0, | |
446 | TTE_P=0, | |
447 | TTE_W=1 | |
448 | } | |
449 | .data | |
450 | .skip 1024 | |
451 | .word 0x0 | |
452 | .skip 1024 | |
453 | /*}}} */ | |
454 | /*{{{ 5*/ | |
455 | SECTION data_page5 DATA_VA=0x1500000000 | |
456 | attr_data { | |
457 | Name = data_page5, | |
458 | VA=0x1500000000, | |
459 | RA=0x1500000000, | |
460 | PA=ra2pa(0x1500000000,0), | |
461 | part_0_ctx_nonzero_tsb_config_0, | |
462 | TTE_G=1, | |
463 | TTE_Context=PCONTEXT, | |
464 | TTE_V=1, | |
465 | TTE_Size=5, | |
466 | TTE_SIZE_PTR=0, | |
467 | TTE_NFO=0, | |
468 | TTE_IE=0, | |
469 | TTE_Soft2=0, | |
470 | TTE_Diag=0, | |
471 | TTE_Soft=0, | |
472 | TTE_L=0, | |
473 | TTE_CP=1, | |
474 | TTE_CV=1, | |
475 | TTE_E=0, | |
476 | TTE_P=0, | |
477 | TTE_W=1 | |
478 | } | |
479 | .data | |
480 | .skip 1024 | |
481 | .word 0x0 | |
482 | .skip 1024 | |
483 | /*}}} */ | |
484 | /*{{{ 6*/ | |
485 | SECTION data_page6 DATA_VA=0x1900000000 | |
486 | attr_data { | |
487 | Name = data_page6, | |
488 | VA=0x1900000000, | |
489 | RA=0x1900000000, | |
490 | PA=ra2pa(0x1900000000,0), | |
491 | part_0_ctx_nonzero_tsb_config_0, | |
492 | TTE_G=1, | |
493 | TTE_Context=PCONTEXT, | |
494 | TTE_V=1, | |
495 | TTE_Size=5, | |
496 | TTE_SIZE_PTR=0, | |
497 | TTE_NFO=0, | |
498 | TTE_IE=0, | |
499 | TTE_Soft2=0, | |
500 | TTE_Diag=0, | |
501 | TTE_Soft=0, | |
502 | TTE_L=0, | |
503 | TTE_CP=1, | |
504 | TTE_CV=1, | |
505 | TTE_E=0, | |
506 | TTE_P=0, | |
507 | TTE_W=1 | |
508 | } | |
509 | .data | |
510 | .skip 1024 | |
511 | .word 0x0 | |
512 | .skip 1024 | |
513 | /*}}} */ | |
514 | /*{{{ 7*/ | |
515 | SECTION data_page7 DATA_VA=0x1d00000000 | |
516 | attr_data { | |
517 | Name = data_page7, | |
518 | VA=0x1d00000000, | |
519 | RA=0x1d00000000, | |
520 | PA=ra2pa(0x1d00000000,0), | |
521 | part_0_ctx_nonzero_tsb_config_0, | |
522 | TTE_G=1, | |
523 | TTE_Context=PCONTEXT, | |
524 | TTE_V=1, | |
525 | TTE_Size=5, | |
526 | TTE_SIZE_PTR=0, | |
527 | TTE_NFO=0, | |
528 | TTE_IE=0, | |
529 | TTE_Soft2=0, | |
530 | TTE_Diag=0, | |
531 | TTE_Soft=0, | |
532 | TTE_L=0, | |
533 | TTE_CP=1, | |
534 | TTE_CV=1, | |
535 | TTE_E=0, | |
536 | TTE_P=0, | |
537 | TTE_W=1 | |
538 | } | |
539 | .data | |
540 | .skip 1024 | |
541 | .word 0x0 | |
542 | .skip 1024 | |
543 | /*}}} */ | |
544 |