Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | /* |
2 | * ========== Copyright Header Begin ========================================== | |
3 | * | |
4 | * OpenSPARC T2 Processor File: SS_FastMemory.h | |
5 | * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. | |
6 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. | |
7 | * | |
8 | * The above named program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public | |
10 | * License version 2 as published by the Free Software Foundation. | |
11 | * | |
12 | * The above named program is distributed in the hope that it will be | |
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public | |
18 | * License along with this work; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
20 | * | |
21 | * ========== Copyright Header End ============================================ | |
22 | */ | |
23 | #ifndef __SS_FastMemory_h__ | |
24 | #define __SS_FastMemory_h__ | |
25 | ||
26 | #ifdef COMPILE_FOR_SAM | |
27 | ||
28 | // See elsewhere | |
29 | ||
30 | #else // Vonk's own memory | |
31 | ||
32 | #include "SS_Types.h" | |
33 | ||
34 | extern "C" uint8_t ss_ldstub( void* base, uint64_t ofs ); | |
35 | extern "C" uint32_t ss_swap ( uint32_t rd, void* base, uint64_t ofs ); | |
36 | extern "C" uint32_t ss_cas ( uint32_t rd, void* base, uint32_t rs2 ); | |
37 | extern "C" uint64_t ss_casx ( uint64_t rd, void* base, uint64_t rs2 ); | |
38 | extern "C" void ss_stp8 ( double rd, void* base, uint64_t mask ); | |
39 | ||
40 | #include "utils.h" | |
41 | ||
42 | #include "SS_Ld128Atomic.h" | |
43 | ||
44 | #include <string.h> | |
45 | #include <sys/mman.h> | |
46 | #include "BL_Mutex.h" | |
47 | #include "BL_Memory.h" | |
48 | ||
49 | class SS_FastMemory : public BL_Memory | |
50 | { | |
51 | public: | |
52 | SS_FastMemory(); | |
53 | ~SS_FastMemory(); | |
54 | ||
55 | void allocate( uint64_t _ram_size, uint64_t _rom_size, uint_t pa_bits ); | |
56 | ||
57 | void load( const char* filename ); | |
58 | void load( const char* filename, uint64_t addr ); | |
59 | void save( const char* filename, uint64_t addr, uint64_t size ); | |
60 | ||
61 | // Supported User Interface Operations | |
62 | ||
63 | void poke8( uint64_t addr, uint8_t data ) { SS_FastMemory::st8(addr,data); } | |
64 | void poke16( uint64_t addr, uint16_t data ) { SS_FastMemory::st16(addr,data); } | |
65 | void poke32( uint64_t addr, uint32_t data ) { SS_FastMemory::st32(addr,data); } | |
66 | void poke64( uint64_t addr, uint64_t data ) { SS_FastMemory::st64(addr,data); } | |
67 | uint8_t peek8u( uint64_t addr ) { return SS_FastMemory::ld8u(addr); } | |
68 | int8_t peek8s( uint64_t addr ) { return SS_FastMemory::ld8s(addr); } | |
69 | uint16_t peek16u( uint64_t addr ) { return SS_FastMemory::ld16u(addr); } | |
70 | int16_t peek16s( uint64_t addr ) { return SS_FastMemory::ld16s(addr); } | |
71 | uint32_t peek32u( uint64_t addr ) { return SS_FastMemory::ld32u(addr); } | |
72 | int32_t peek32s( uint64_t addr ) { return SS_FastMemory::ld32s(addr); } | |
73 | uint64_t peek64( uint64_t addr ) { return SS_FastMemory::ld64(addr); } | |
74 | ||
75 | void peek128( uint64_t addr ,uint64_t data[2] ) | |
76 | { | |
77 | uint8_t* ptr = get_ptr(addr); | |
78 | data[0] = *(uint64_t*)(ptr + 0); | |
79 | data[1] = *(uint64_t*)(ptr + 8); | |
80 | #if defined(ARCH_X64) | |
81 | data[0] = ss_byteswap64(data[0]); | |
82 | data[1] = ss_byteswap64(data[1]); | |
83 | #endif | |
84 | } | |
85 | ||
86 | void peek256( uint64_t addr ,uint64_t data[4] ) | |
87 | { | |
88 | uint8_t* ptr = get_ptr(addr); | |
89 | data[0] = *(uint64_t*)(ptr + 0); | |
90 | data[1] = *(uint64_t*)(ptr + 8); | |
91 | data[2] = *(uint64_t*)(ptr + 16); | |
92 | data[3] = *(uint64_t*)(ptr + 24); | |
93 | #if defined(ARCH_X64) | |
94 | data[0] = ss_byteswap64(data[0]); | |
95 | data[1] = ss_byteswap64(data[1]); | |
96 | data[2] = ss_byteswap64(data[2]); | |
97 | data[3] = ss_byteswap64(data[3]); | |
98 | #endif | |
99 | } | |
100 | ||
101 | void peek512( uint64_t addr ,uint64_t data[8] ) | |
102 | { | |
103 | uint8_t* ptr = get_ptr(addr); | |
104 | data[0] = *(uint64_t*)(ptr + 0); | |
105 | data[1] = *(uint64_t*)(ptr + 8); | |
106 | data[2] = *(uint64_t*)(ptr + 16); | |
107 | data[3] = *(uint64_t*)(ptr + 24); | |
108 | data[4] = *(uint64_t*)(ptr + 32); | |
109 | data[5] = *(uint64_t*)(ptr + 40); | |
110 | data[6] = *(uint64_t*)(ptr + 48); | |
111 | data[7] = *(uint64_t*)(ptr + 56); | |
112 | #if defined(ARCH_X64) | |
113 | data[0] = ss_byteswap64(data[0]); | |
114 | data[1] = ss_byteswap64(data[1]); | |
115 | data[2] = ss_byteswap64(data[2]); | |
116 | data[3] = ss_byteswap64(data[3]); | |
117 | data[4] = ss_byteswap64(data[4]); | |
118 | data[5] = ss_byteswap64(data[5]); | |
119 | data[6] = ss_byteswap64(data[6]); | |
120 | data[7] = ss_byteswap64(data[7]); | |
121 | #endif | |
122 | } | |
123 | ||
124 | void poke128( uint64_t addr, uint64_t data[2] ) | |
125 | { | |
126 | #if defined(ARCH_X64) | |
127 | data[0] = ss_byteswap64(data[0]); | |
128 | data[1] = ss_byteswap64(data[1]); | |
129 | #endif | |
130 | uint8_t* ptr = get_ptr(addr); | |
131 | *(uint64_t*)(ptr + 0) = data[0]; | |
132 | *(uint64_t*)(ptr + 8) = data[1]; | |
133 | } | |
134 | ||
135 | void poke512( uint64_t addr, uint64_t data[8] ) | |
136 | { | |
137 | #if defined(ARCH_X64) | |
138 | data[0] = ss_byteswap64(data[0]); | |
139 | data[1] = ss_byteswap64(data[1]); | |
140 | data[2] = ss_byteswap64(data[2]); | |
141 | data[3] = ss_byteswap64(data[3]); | |
142 | data[4] = ss_byteswap64(data[4]); | |
143 | data[5] = ss_byteswap64(data[5]); | |
144 | data[6] = ss_byteswap64(data[6]); | |
145 | data[7] = ss_byteswap64(data[7]); | |
146 | #endif | |
147 | uint8_t* ptr = get_ptr(addr); | |
148 | *(uint64_t*)(ptr + 0) = data[0]; | |
149 | *(uint64_t*)(ptr + 8) = data[1]; | |
150 | *(uint64_t*)(ptr + 16) = data[2]; | |
151 | *(uint64_t*)(ptr + 24) = data[3]; | |
152 | *(uint64_t*)(ptr + 32) = data[4]; | |
153 | *(uint64_t*)(ptr + 40) = data[5]; | |
154 | *(uint64_t*)(ptr + 48) = data[6]; | |
155 | *(uint64_t*)(ptr + 56) = data[7]; | |
156 | } | |
157 | ||
158 | ||
159 | // Supported Fetch Operation (instruction fetch) | |
160 | ||
161 | virtual uint32_t fetch32( uint64_t addr ) { return SS_FastMemory::ld32u(addr); } | |
162 | virtual void fetch256( uint64_t addr, uint64_t data[4] ) { SS_FastMemory::ld256(addr,data); } | |
163 | virtual void fetch512( uint64_t addr, uint64_t data[8] ) { SS_FastMemory::ld512(addr,data); } | |
164 | ||
165 | // Supported Store Operations. st8(), st16(), st32() and st64() are gueranteed to be atomic. | |
166 | // st128() and st512() are atomic per 64bit quantity. | |
167 | ||
168 | virtual void st8( uint64_t addr, uint8_t data ) | |
169 | { | |
170 | *(uint8_t*)(get_ptr(addr)) = data; | |
171 | } | |
172 | virtual void st16( uint64_t addr, uint16_t data ) | |
173 | { | |
174 | #if defined(ARCH_X64) | |
175 | data = ss_byteswap16(data); | |
176 | #endif | |
177 | *(uint16_t*)(get_ptr(addr)) = data; | |
178 | } | |
179 | virtual void st32( uint64_t addr, uint32_t data ) | |
180 | { | |
181 | #if defined(ARCH_X64) | |
182 | data = ss_byteswap32(data); | |
183 | #endif | |
184 | *(uint32_t*)(get_ptr(addr)) = data; | |
185 | } | |
186 | virtual void st64( uint64_t addr, uint64_t data ) | |
187 | { | |
188 | #if defined(ARCH_X64) | |
189 | data = ss_byteswap64(data); | |
190 | #endif | |
191 | *(uint64_t*)(get_ptr(addr)) = data; | |
192 | } | |
193 | virtual void st128( uint64_t addr, uint64_t data[2] ) | |
194 | { | |
195 | #if defined(ARCH_X64) | |
196 | data[0] = ss_byteswap64(data[0]); | |
197 | data[1] = ss_byteswap64(data[1]); | |
198 | #endif | |
199 | uint8_t* ptr = get_ptr(addr); | |
200 | *(uint64_t*)(ptr + 0) = data[0]; | |
201 | *(uint64_t*)(ptr + 8) = data[1]; | |
202 | } | |
203 | virtual void st512( uint64_t addr, uint64_t data[8] ) | |
204 | { | |
205 | #if defined(ARCH_X64) | |
206 | data[0] = ss_byteswap64(data[0]); | |
207 | data[1] = ss_byteswap64(data[1]); | |
208 | data[2] = ss_byteswap64(data[2]); | |
209 | data[3] = ss_byteswap64(data[3]); | |
210 | data[4] = ss_byteswap64(data[4]); | |
211 | data[5] = ss_byteswap64(data[5]); | |
212 | data[6] = ss_byteswap64(data[6]); | |
213 | data[7] = ss_byteswap64(data[7]); | |
214 | #endif | |
215 | uint8_t* ptr = get_ptr(addr); | |
216 | *(uint64_t*)(ptr + 0) = data[0]; | |
217 | *(uint64_t*)(ptr + 8) = data[1]; | |
218 | *(uint64_t*)(ptr + 16) = data[2]; | |
219 | *(uint64_t*)(ptr + 24) = data[3]; | |
220 | *(uint64_t*)(ptr + 32) = data[4]; | |
221 | *(uint64_t*)(ptr + 40) = data[5]; | |
222 | *(uint64_t*)(ptr + 48) = data[6]; | |
223 | *(uint64_t*)(ptr + 56) = data[7]; | |
224 | } | |
225 | ||
226 | // Supported Load Operations. ld8[su]() to ld64() are quaranteed to be atomic. ld128() and | |
227 | // above are atomic at the 64 bit granularity. | |
228 | ||
229 | virtual uint8_t ld8u ( uint64_t addr ) | |
230 | { | |
231 | return *(uint8_t *)(get_ptr(addr)); | |
232 | } | |
233 | virtual int8_t ld8s( uint64_t addr ) | |
234 | { | |
235 | return *( int8_t *)(get_ptr(addr)); | |
236 | } | |
237 | virtual uint16_t ld16u( uint64_t addr ) | |
238 | { | |
239 | #if defined(ARCH_X64) | |
240 | uint16_t data = *(uint16_t*)(get_ptr(addr)); | |
241 | return ss_byteswap16(data); | |
242 | #else | |
243 | return *(uint16_t*)(get_ptr(addr)); | |
244 | #endif | |
245 | } | |
246 | virtual int16_t ld16s( uint64_t addr ) | |
247 | { | |
248 | #if defined(ARCH_X64) | |
249 | int16_t data = *( int16_t*)(get_ptr(addr)); | |
250 | return ss_byteswap16(data); | |
251 | #else | |
252 | return *( int16_t*)(get_ptr(addr)); | |
253 | #endif | |
254 | } | |
255 | virtual uint32_t ld32u( uint64_t addr ) | |
256 | { | |
257 | #if defined(ARCH_X64) | |
258 | uint32_t data = *(uint32_t*)(get_ptr(addr)); | |
259 | return ss_byteswap32(data); | |
260 | #else | |
261 | return *(uint32_t*)(get_ptr(addr)); | |
262 | #endif | |
263 | } | |
264 | virtual int32_t ld32s( uint64_t addr ) | |
265 | { | |
266 | #if defined(ARCH_X64) | |
267 | int32_t data = *( int32_t*)(get_ptr(addr)); | |
268 | return ss_byteswap32(data); | |
269 | #else | |
270 | return *( int32_t*)(get_ptr(addr)); | |
271 | #endif | |
272 | } | |
273 | virtual uint64_t ld64( uint64_t addr ) | |
274 | { | |
275 | #if defined(ARCH_X64) | |
276 | uint64_t data = *(uint64_t*)(get_ptr(addr)); | |
277 | return ss_byteswap64(data); | |
278 | #else | |
279 | return *(uint64_t*)(get_ptr(addr)); | |
280 | #endif | |
281 | } | |
282 | virtual void ld128( uint64_t addr, uint64_t data[2] ) | |
283 | { | |
284 | uint8_t* ptr = get_ptr(addr); | |
285 | data[0] = *(uint64_t*)(ptr + 0); | |
286 | data[1] = *(uint64_t*)(ptr + 8); | |
287 | #if defined(ARCH_X64) | |
288 | data[0] = ss_byteswap64(data[0]); | |
289 | data[1] = ss_byteswap64(data[1]); | |
290 | #endif | |
291 | } | |
292 | virtual void ld512( uint64_t addr, uint64_t data[8] ) | |
293 | { | |
294 | uint8_t* ptr = get_ptr(addr); | |
295 | data[0] = *(uint64_t*)(ptr + 0); | |
296 | data[1] = *(uint64_t*)(ptr + 8); | |
297 | data[2] = *(uint64_t*)(ptr + 16); | |
298 | data[3] = *(uint64_t*)(ptr + 24); | |
299 | data[4] = *(uint64_t*)(ptr + 32); | |
300 | data[5] = *(uint64_t*)(ptr + 40); | |
301 | data[6] = *(uint64_t*)(ptr + 48); | |
302 | data[7] = *(uint64_t*)(ptr + 56); | |
303 | #if defined(ARCH_X64) | |
304 | data[0] = ss_byteswap64(data[0]); | |
305 | data[1] = ss_byteswap64(data[1]); | |
306 | data[2] = ss_byteswap64(data[2]); | |
307 | data[3] = ss_byteswap64(data[3]); | |
308 | data[4] = ss_byteswap64(data[4]); | |
309 | data[5] = ss_byteswap64(data[5]); | |
310 | data[6] = ss_byteswap64(data[6]); | |
311 | data[7] = ss_byteswap64(data[7]); | |
312 | #endif | |
313 | } | |
314 | virtual void ld256( uint64_t addr, uint64_t data[4] ) | |
315 | { | |
316 | uint8_t* ptr = get_ptr(addr); | |
317 | data[0] = *(uint64_t*)(ptr + 0); | |
318 | data[1] = *(uint64_t*)(ptr + 8); | |
319 | data[2] = *(uint64_t*)(ptr + 16); | |
320 | data[3] = *(uint64_t*)(ptr + 24); | |
321 | #if defined(ARCH_X64) | |
322 | data[0] = ss_byteswap64(data[0]); | |
323 | data[1] = ss_byteswap64(data[1]); | |
324 | data[2] = ss_byteswap64(data[2]); | |
325 | data[3] = ss_byteswap64(data[3]); | |
326 | #endif | |
327 | } | |
328 | ||
329 | // st64partial() performs 8 byte partial store. The bytes to store are specified by mask. A 1 in bit N of | |
330 | // mask denotes that byte (data >> (8*N)) & 0xff should be written to memory | |
331 | ||
332 | virtual void st64partial( uint64_t addr, uint64_t data, uint64_t mask ) | |
333 | { | |
334 | ss_stp8(*(double*)&data,get_ptr(addr),mask); | |
335 | } | |
336 | ||
337 | // ld128atomic() (aka load twin double, load quad atomic) atomically loads two | |
338 | // 64bit values from memory at addr into rd. rd[0] is the value at addr, rd[1] | |
339 | // is the value at addr + 8. Note ld128 does() not guarantee atomicity. | |
340 | ||
341 | virtual void ld128atomic( uint64_t addr, uint64_t data[2] ) | |
342 | { | |
343 | ss_ld128atomic(get_ptr(addr),data); | |
344 | } | |
345 | ||
346 | // ldstub() return a byte from memory at addr, and set the byte at addr | |
347 | // to 0xff. The ldstub() operation is atomic. | |
348 | ||
349 | virtual uint8_t ldstub( uint64_t addr ) | |
350 | { | |
351 | return ss_ldstub(get_ptr(addr),0); | |
352 | } | |
353 | ||
354 | // swap() stores the 32bit value rd with the 32bit value at addr. | |
355 | // The old 32bit value at addr is returned. The operation is atomic. | |
356 | ||
357 | virtual uint32_t swap( uint64_t addr, uint32_t rd ) | |
358 | { | |
359 | return ss_swap(rd,get_ptr(addr),0); | |
360 | } | |
361 | ||
362 | // casx() compares the 64bit value rs2 with the 64bit value at addr. | |
363 | // If the two values are equal, the value rd is stored in the | |
364 | // 64bit value at addr. In both cases the old 64bit value at addr is | |
365 | // returned, that is the value at addr before the storei happened. | |
366 | // The casx() operation is atomic. | |
367 | ||
368 | virtual uint64_t casx( uint64_t addr, uint64_t rd, uint64_t rs2 ) | |
369 | { | |
370 | return ss_casx(rd,get_ptr(addr),rs2); | |
371 | } | |
372 | ||
373 | // cas() is as casx, but for 32bit. | |
374 | ||
375 | virtual uint32_t cas( uint64_t addr, uint32_t rd, uint32_t rs2 ) | |
376 | { | |
377 | return ss_cas(rd,get_ptr(addr),rs2); | |
378 | } | |
379 | ||
380 | // prefetch() prefetches data from memory into the cache hierarchy. | |
381 | ||
382 | void prefetch( uint64_t addr, uint_t size ) {} | |
383 | ||
384 | // flush() writes dirty data in the cache back to memory. | |
385 | ||
386 | void flush( uint64_t addr, uint_t size ) {} // process does not provide data. | |
387 | ||
388 | static SS_FastMemory memory; | |
389 | private: | |
390 | enum | |
391 | { | |
392 | // Configure the 55 bits of physical address space which is the | |
393 | // current SunSparc limit. Note, there is no processor has | |
394 | // implemented this many address lines yet. Currently the max | |
395 | // is 48. To avoid complains about memory size from people we | |
396 | // should provide 55 bits always. To make a choose, a toplevel | |
397 | // of 2^23 entries of 4GB pages is nice. In v8plus mode however | |
398 | // we use much less ... just enough to make it work. | |
399 | ||
400 | #if defined(ARCH_V9) || defined(ARCH_X64) | |
401 | L1BITS = 23, // 2^L1BITS of | |
402 | L2BITS = 32, // 4GB of NORESERVED mmapped space | |
403 | #elif defined(ARCH_V8) | |
404 | L1BITS = 29, // In v8 mode we have less va space ... | |
405 | L2BITS = 19, // 1MB of NORESERVED mmapped chunks | |
406 | #else | |
407 | #error "Oops" | |
408 | #endif | |
409 | L1SIZE = 1 << L1BITS, | |
410 | L1MASK = L1SIZE - 1 | |
411 | }; | |
412 | ||
413 | uint8_t* get_ptr( uint64_t addr ) | |
414 | { | |
415 | uint8_t** o1 = l1 + ((addr >> L2BITS) & L1MASK); | |
416 | uint8_t* l2 = *o1; | |
417 | if (l2 == 0) | |
418 | { | |
419 | l2lock.lock(); | |
420 | if (*o1 == 0) | |
421 | { | |
422 | l2 = *o1 = (uint8_t*)mmap((char*)0,1ull << L2BITS,PROT_READ|PROT_WRITE, | |
423 | MAP_NORESERVE|MAP_PRIVATE|MAP_ANON|MAP_ALIGN,-1,0); | |
424 | } | |
425 | l2lock.unlock(); | |
426 | } | |
427 | return l2 + (addr & uint64_t((1ull << L2BITS) - 1)); | |
428 | } | |
429 | ||
430 | uint64_t ram_size; | |
431 | uint64_t rom_size; | |
432 | uint64_t page_size; | |
433 | uint64_t page_mask; | |
434 | ||
435 | uint8_t** l1; | |
436 | BL_Mutex l2lock; | |
437 | }; | |
438 | ||
439 | #endif /* COMPILE_FOR_SAM */ | |
440 | #endif /* __SS_FastMemory_h__ */ |