Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | /* |
2 | * ========== Copyright Header Begin ========================================== | |
3 | * | |
4 | * OpenSPARC T2 Processor File: tso_n1_bcopy.s | |
5 | * Copyright (C) 1995-2007 Sun Microsystems, Inc. All Rights Reserved | |
6 | * 4150 Network Circle, Santa Clara, California 95054, U.S.A. | |
7 | * | |
8 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of the GNU General Public License as published by | |
12 | * the Free Software Foundation; version 2 of the License. | |
13 | * | |
14 | * This program is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | * GNU General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU General Public License | |
20 | * along with this program; if not, write to the Free Software | |
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
22 | * | |
23 | * For the avoidance of doubt, and except that if any non-GPL license | |
24 | * choice is available it will apply instead, Sun elects to use only | |
25 | * the General Public License version 2 (GPLv2) at this time for any | |
26 | * software where a choice of GPL license versions is made | |
27 | * available with the language indicating that GPLv2 or any later version | |
28 | * may be used, or where a choice of which version of the GPL is applied is | |
29 | * otherwise unspecified. | |
30 | * | |
31 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
32 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
33 | * have any questions. | |
34 | * | |
35 | * | |
36 | * ========== Copyright Header End ============================================ | |
37 | */ | |
38 | #define srcaddr %l0 | |
39 | #define dstaddr %l1 | |
40 | #define count %l2 | |
41 | #define ASI_BLK_P 0xf0 | |
42 | ||
43 | ! Define number of 64 byte subblocks to copy | |
44 | #define SBLKS 128 | |
45 | ||
46 | ! How far ahead is the prefetch stream (in subblocks) | |
47 | #define FETCH_AHEAD 8 | |
48 | ||
49 | #include "hboot.s" | |
50 | ||
51 | .global main | |
52 | ||
53 | .text | |
54 | main: | |
55 | wr %g0, 0x4, %fprs /* make sure fef is 1 */ | |
56 | ||
57 | setup_addresses: | |
58 | setx in_stream, %g2, srcaddr | |
59 | setx out_stream, %g2, dstaddr | |
60 | alignaddr srcaddr, %g0, srcaddr | |
61 | ||
62 | ! COUNT bytes to copy | |
63 | !-------------------- | |
64 | set SBLKS, count | |
65 | mulx count, 8*8, count | |
66 | ||
67 | ||
68 | ! the initial part (preamble) of bcopy. | |
69 | !-------------------------------------- | |
70 | prefetch [srcaddr + 0*64], 1 | |
71 | prefetch [srcaddr + 1*64], 1 | |
72 | prefetch [srcaddr + 2*64], 1 | |
73 | prefetch [srcaddr + 3*64], 1 | |
74 | ldd [srcaddr + 0*8], %f0 | |
75 | prefetch [srcaddr + 4*64], 1 | |
76 | #if FETCH_AHEAD>=4 | |
77 | prefetch [srcaddr + (4+1)*64], 1 | |
78 | #endif | |
79 | #if FETCH_AHEAD>=5 | |
80 | prefetch [srcaddr + (5+1)*64], 1 | |
81 | #endif | |
82 | #if FETCH_AHEAD>=6 | |
83 | prefetch [srcaddr + (6+1)*64], 1 | |
84 | #endif | |
85 | #if FETCH_AHEAD>=7 | |
86 | prefetch [srcaddr + (7+1)*64], 1 | |
87 | #endif | |
88 | ldd [srcaddr + 1*8], %f2 | |
89 | ldd [srcaddr + 2*8], %f4 | |
90 | faligndata %f0, %f2, %f32 | |
91 | ldd [srcaddr + 3*8], %f6 | |
92 | faligndata %f2, %f4, %f34 | |
93 | ldd [srcaddr + 4*8], %f8 | |
94 | faligndata %f4, %f6, %f36 | |
95 | ldd [srcaddr + 5*8], %f10 | |
96 | faligndata %f6, %f8, %f38 | |
97 | ldd [srcaddr + 6*8], %f12 | |
98 | faligndata %f8, %f10, %f40 | |
99 | ldd [srcaddr + 7*8], %f14 | |
100 | faligndata %f10, %f12, %f42 | |
101 | ldd [srcaddr + 8*8], %f16 | |
102 | #if FETCH_AHEAD>=8 | |
103 | prefetch [srcaddr + (8+1)*64], 1 | |
104 | #endif | |
105 | subcc count, 64, count | |
106 | be,pn %xcc,tidy_up | |
107 | add srcaddr, 64, srcaddr | |
108 | ||
109 | ! the loop (the essence of bcopy) | |
110 | !-------------------------------- | |
111 | timing_loop: | |
112 | fmovd %f16, %f0 | |
113 | ldd [srcaddr + 1*8], %f2 | |
114 | faligndata %f12, %f14, %f44 | |
115 | ldd [srcaddr + 2*8], %f4 | |
116 | faligndata %f14, %f0, %f46 | |
117 | stda %f32, [dstaddr]ASI_BLK_P | |
118 | ldd [srcaddr + 3*8], %f6 | |
119 | faligndata %f0, %f2, %f32 | |
120 | ldd [srcaddr + 4*8], %f8 | |
121 | faligndata %f2, %f4, %f34 | |
122 | ldd [srcaddr + 5*8], %f10 | |
123 | faligndata %f4, %f6, %f36 | |
124 | ldd [srcaddr + 6*8], %f12 | |
125 | faligndata %f6, %f8, %f38 | |
126 | ldd [srcaddr + 7*8], %f14 | |
127 | faligndata %f8, %f10, %f40 | |
128 | ldd [srcaddr + 8*8], %f16 | |
129 | prefetch [srcaddr + (FETCH_AHEAD+1)*64], 1 | |
130 | faligndata %f10, %f12, %f42 | |
131 | subcc count, 64, count | |
132 | add dstaddr, 64, dstaddr | |
133 | bg,pt %xcc,timing_loop | |
134 | add srcaddr, 64, srcaddr | |
135 | ||
136 | ! the last part of bcopy | |
137 | ! should handle the remaining partial block here | |
138 | !----------------------------------------------- | |
139 | tidy_up: | |
140 | fmovd %f16, %f0 | |
141 | faligndata %f12, %f14, %f44 | |
142 | faligndata %f14, %f0, %f46 | |
143 | stda %f32, [dstaddr]ASI_BLK_P | |
144 | membar #Sync | |
145 | trap: | |
146 | ta GOOD_TRAP | |
147 | user_text_end: | |
148 | ||
149 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
150 | ||
151 | .global in_stream | |
152 | .global out_stream | |
153 | ||
154 | .data | |
155 | .align 0x40 | |
156 | user_data_start: | |
157 | ||
158 | .skip 0x40 | |
159 | in_stream: | |
160 | init_mem(0x20100000, SBLKS*16, 4, +, 0, +, 0x01010001) | |
161 | ||
162 | ! offset the out_stream block | |
163 | .align 0x1000 | |
164 | .skip 1024 | |
165 | .skip 192 | |
166 | out_stream: | |
167 | init_mem(0x11111111, SBLKS*16, 4, +, 0, +, 0) | |
168 | ||
169 | user_data_end: |