Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | /* |
2 | * ========== Copyright Header Begin ========================================== | |
3 | * | |
4 | * OpenSPARC T2 Processor File: Memory.h | |
5 | * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. | |
6 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. | |
7 | * | |
8 | * The above named program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public | |
10 | * License version 2 as published by the Free Software Foundation. | |
11 | * | |
12 | * The above named program is distributed in the hope that it will be | |
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public | |
18 | * License along with this work; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
20 | * | |
21 | * ========== Copyright Header End ============================================ | |
22 | */ | |
23 | /* | |
24 | * Copyright (C) 2005 Sun Microsystems, Inc. | |
25 | * All rights reserved. | |
26 | */ | |
27 | ||
28 | #ifndef __SAM_Memory_h__ | |
29 | #define __SAM_Memory_h__ | |
30 | ||
31 | #include <synch.h> | |
32 | #include <sys/types.h> | |
33 | #include <string.h> | |
34 | #include <stdlib.h> | |
35 | #include <sys/mman.h> | |
36 | #include <assert.h> | |
37 | #include "BL_Memory.h" | |
38 | ||
39 | extern "C" uint8_t ss_ldstub( void* base, uint64_t ofs ); | |
40 | extern "C" uint32_t ss_swap ( uint32_t rd, void* base, uint64_t ofs ); | |
41 | extern "C" uint32_t ss_cas ( uint32_t rd, void* base, uint32_t rs2 ); | |
42 | extern "C" uint64_t ss_casx ( uint64_t rd, void* base, uint64_t rs2 ); | |
43 | extern "C" void ss_stp8 ( double rd, void* base, uint64_t mask ); | |
44 | ||
45 | #include "utils.h" | |
46 | ||
47 | // build flag to control mem model option: | |
48 | // should be defined MEMORY_SPARSE or MEMORY_FLAT | |
49 | ||
50 | ||
51 | const int SAM_NMEM_LOCKS = 1<<8; //256, should be power of 2 | |
52 | ||
53 | ||
54 | const uint64_t SAM_MEM_DUMP_VERSION = 5<<8; | |
55 | ||
56 | #if defined(MEMORY_FLAT) | |
57 | ||
58 | // flat memory model | |
59 | ||
60 | ||
61 | class SMemory : public BL_Memory | |
62 | { | |
63 | public: | |
64 | SMemory( uint64_t ram_size, uint_t pa_bits=43); | |
65 | ~SMemory(); | |
66 | ||
67 | int init(char *nm=NULL, int is_cp = 0); | |
68 | ||
69 | uint64_t get_size() { return size; } | |
70 | ||
71 | ||
72 | // get_base() is a private method and should not be used | |
73 | // outside this module | |
74 | uint8_t* get_base() { return mem; } | |
75 | ||
76 | // Supported User Interface Operations | |
77 | int load( const char* mem_image_filename ); | |
78 | int load( const char *file, uint64_t addr) { return load_bin(file, addr); } | |
79 | int load_bin( const char *file, uint64_t addr); | |
80 | void save( const char* filename, uint64_t addr, uint64_t size ); | |
81 | ||
82 | /* NOTE: | |
83 | SMemory:: qualifications needed to force inline of the routines so | |
84 | that performance degradation due to virtualization is avoided | |
85 | */ | |
86 | ||
87 | void poke8( uint64_t addr, uint8_t data ) { SMemory::st8(addr,data); } | |
88 | void poke16( uint64_t addr, uint16_t data ) { SMemory::st16(addr,data); } | |
89 | void poke32( uint64_t addr, uint32_t data ) { SMemory::st32(addr,data); } | |
90 | void poke64( uint64_t addr, uint64_t data ) { SMemory::st64(addr,data); } | |
91 | uint8_t peek8u( uint64_t addr ) { return SMemory::ld8u(addr); } | |
92 | int8_t peek8s( uint64_t addr ) { return SMemory::ld8s(addr); } | |
93 | uint16_t peek16u( uint64_t addr ) { return SMemory::ld16u(addr); } | |
94 | int16_t peek16s( uint64_t addr ) { return SMemory::ld16s(addr); } | |
95 | uint32_t peek32u( uint64_t addr ) { return SMemory::ld32u(addr); } | |
96 | int32_t peek32s( uint64_t addr ) { return SMemory::ld32s(addr); } | |
97 | uint64_t peek64( uint64_t addr ) { return SMemory::ld64(addr); } | |
98 | ||
99 | // instruction fetch | |
100 | uint32_t fetch32( uint64_t addr ) | |
101 | { | |
102 | #if defined(ARCH_X64) | |
103 | uint32_t data = *(uint32_t*)(mem + ofs(addr)); | |
104 | return ss_byteswap32(data); | |
105 | #else | |
106 | return *(uint32_t*)(mem + ofs(addr)); | |
107 | #endif | |
108 | } | |
109 | ||
110 | void fetch256( uint64_t addr, uint64_t data[4] ) { SMemory::ld256(addr,data); } | |
111 | void fetch512( uint64_t addr, uint64_t data[8] ) { SMemory::ld512(addr,data); } | |
112 | ||
113 | // Supported Store Operations. st8(), st16(), st32() and st64() are gueranteed to be atomic. | |
114 | // st128() and st512() are atomic per 64bit quantity. | |
115 | ||
116 | void st8( uint64_t addr, uint8_t data ) | |
117 | { | |
118 | *(uint8_t *)(mem + ofs(addr)) = data; | |
119 | } | |
120 | void st16( uint64_t addr, uint16_t data ) | |
121 | { | |
122 | #if defined(ARCH_X64) | |
123 | data = ss_byteswap16(data); | |
124 | #endif | |
125 | *(uint16_t*)(mem + ofs(addr)) = data; | |
126 | } | |
127 | void st32( uint64_t addr, uint32_t data ) | |
128 | { | |
129 | #if defined(ARCH_X64) | |
130 | data = ss_byteswap32(data); | |
131 | #endif | |
132 | *(uint32_t*)(mem + ofs(addr)) = data; | |
133 | } | |
134 | void st64_nl( uint64_t addr, uint64_t data ) | |
135 | { | |
136 | #if defined(ARCH_X64) | |
137 | data = ss_byteswap64(data); | |
138 | #endif | |
139 | *(uint64_t*)(mem + ofs(addr)) = data; | |
140 | } | |
141 | void st64 ( uint64_t addr, uint64_t data ) | |
142 | { | |
143 | #if defined(ARCH_X64) | |
144 | data = ss_byteswap64(data); | |
145 | #endif | |
146 | addr = ofs(addr); | |
147 | lock(addr); | |
148 | *(uint64_t*)(mem + addr) = data; | |
149 | unlock(addr); | |
150 | } | |
151 | ||
152 | void st128( uint64_t addr, uint64_t data[2] ) | |
153 | { | |
154 | #if defined(ARCH_X64) | |
155 | data[0] = ss_byteswap64(data[0]); | |
156 | data[1] = ss_byteswap64(data[1]); | |
157 | #endif | |
158 | *(uint64_t*)(mem + ofs(addr) + 0) = data[0]; | |
159 | *(uint64_t*)(mem + ofs(addr) + 8) = data[1]; | |
160 | } | |
161 | void st512( uint64_t addr, uint64_t data[8] ) | |
162 | { | |
163 | #if defined(ARCH_X64) | |
164 | data[0] = ss_byteswap64(data[0]); | |
165 | data[1] = ss_byteswap64(data[1]); | |
166 | data[2] = ss_byteswap64(data[2]); | |
167 | data[3] = ss_byteswap64(data[3]); | |
168 | data[4] = ss_byteswap64(data[4]); | |
169 | data[5] = ss_byteswap64(data[5]); | |
170 | data[6] = ss_byteswap64(data[6]); | |
171 | data[7] = ss_byteswap64(data[7]); | |
172 | #endif | |
173 | *(uint64_t*)(mem + ofs(addr) + 0) = data[0]; | |
174 | *(uint64_t*)(mem + ofs(addr) + 8) = data[1]; | |
175 | *(uint64_t*)(mem + ofs(addr) + 16) = data[2]; | |
176 | *(uint64_t*)(mem + ofs(addr) + 24) = data[3]; | |
177 | *(uint64_t*)(mem + ofs(addr) + 32) = data[4]; | |
178 | *(uint64_t*)(mem + ofs(addr) + 40) = data[5]; | |
179 | *(uint64_t*)(mem + ofs(addr) + 48) = data[6]; | |
180 | *(uint64_t*)(mem + ofs(addr) + 56) = data[7]; | |
181 | } | |
182 | ||
183 | // Supported Load Operations. ld8[su]() to ld64() are quaranteed to be atomic. ld128() and | |
184 | // above are atomic at the 64 bit granularity. | |
185 | ||
186 | uint8_t ld8u ( uint64_t addr ) | |
187 | { | |
188 | return *(uint8_t *)(mem + ofs(addr)); | |
189 | } | |
190 | int8_t ld8s( uint64_t addr ) | |
191 | { | |
192 | return *( int8_t *)(mem + ofs(addr)); | |
193 | } | |
194 | uint16_t ld16u( uint64_t addr ) | |
195 | { | |
196 | #if defined(ARCH_X64) | |
197 | uint16_t data = *(uint16_t*)(mem + ofs(addr)); | |
198 | return ss_byteswap16(data); | |
199 | #else | |
200 | return *(uint16_t*)(mem + ofs(addr)); | |
201 | #endif | |
202 | } | |
203 | int16_t ld16s( uint64_t addr ) | |
204 | { | |
205 | #if defined(ARCH_X64) | |
206 | int16_t data = *( int16_t*)(mem + ofs(addr)); | |
207 | return ss_byteswap16(data); | |
208 | #else | |
209 | return *( int16_t*)(mem + ofs(addr)); | |
210 | #endif | |
211 | } | |
212 | uint32_t ld32u( uint64_t addr ) | |
213 | { | |
214 | #if defined(ARCH_X64) | |
215 | uint32_t data = *(uint32_t*)(mem + ofs(addr)); | |
216 | return ss_byteswap32(data); | |
217 | #else | |
218 | return *(uint32_t*)(mem + ofs(addr)); | |
219 | #endif | |
220 | } | |
221 | int32_t ld32s( uint64_t addr ) | |
222 | { | |
223 | #if defined(ARCH_X64) | |
224 | int32_t data = *( int32_t*)(mem + ofs(addr)); | |
225 | return ss_byteswap32(data); | |
226 | #else | |
227 | return *( int32_t*)(mem + ofs(addr)); | |
228 | #endif | |
229 | } | |
230 | uint64_t ld64( uint64_t addr ) | |
231 | { | |
232 | #if defined(ARCH_X64) | |
233 | uint64_t data = *(uint64_t*)(mem + ofs(addr)); | |
234 | return ss_byteswap64(data); | |
235 | #else | |
236 | return *(uint64_t*)(mem + ofs(addr)); | |
237 | #endif | |
238 | } | |
239 | void ld128( uint64_t addr, uint64_t data[2] ) | |
240 | { | |
241 | data[0] = *(uint64_t*)(mem + ofs(addr) + 0); | |
242 | data[1] = *(uint64_t*)(mem + ofs(addr) + 8); | |
243 | #if defined(ARCH_X64) | |
244 | data[0] = ss_byteswap64(data[0]); | |
245 | data[1] = ss_byteswap64(data[1]); | |
246 | #endif | |
247 | } | |
248 | void ld512( uint64_t addr, uint64_t data[8] ) | |
249 | { | |
250 | data[0] = *(uint64_t*)(mem + ofs(addr) + 0); | |
251 | data[1] = *(uint64_t*)(mem + ofs(addr) + 8); | |
252 | data[2] = *(uint64_t*)(mem + ofs(addr) + 16); | |
253 | data[3] = *(uint64_t*)(mem + ofs(addr) + 24); | |
254 | data[4] = *(uint64_t*)(mem + ofs(addr) + 32); | |
255 | data[5] = *(uint64_t*)(mem + ofs(addr) + 40); | |
256 | data[6] = *(uint64_t*)(mem + ofs(addr) + 48); | |
257 | data[7] = *(uint64_t*)(mem + ofs(addr) + 56); | |
258 | #if defined(ARCH_X64) | |
259 | data[0] = ss_byteswap64(data[0]); | |
260 | data[1] = ss_byteswap64(data[1]); | |
261 | data[2] = ss_byteswap64(data[2]); | |
262 | data[3] = ss_byteswap64(data[3]); | |
263 | data[4] = ss_byteswap64(data[4]); | |
264 | data[5] = ss_byteswap64(data[5]); | |
265 | data[6] = ss_byteswap64(data[6]); | |
266 | data[7] = ss_byteswap64(data[7]); | |
267 | #endif | |
268 | } | |
269 | void ld256( uint64_t addr, uint64_t data[4] ) | |
270 | { | |
271 | data[0] = *(uint64_t*)(mem + ofs(addr) + 0); | |
272 | data[1] = *(uint64_t*)(mem + ofs(addr) + 8); | |
273 | data[2] = *(uint64_t*)(mem + ofs(addr) + 16); | |
274 | data[3] = *(uint64_t*)(mem + ofs(addr) + 24); | |
275 | #if defined(ARCH_X64) | |
276 | data[0] = ss_byteswap64(data[0]); | |
277 | data[1] = ss_byteswap64(data[1]); | |
278 | data[2] = ss_byteswap64(data[2]); | |
279 | data[3] = ss_byteswap64(data[3]); | |
280 | #endif | |
281 | } | |
282 | ||
283 | // st64partial() performs 8 byte partial store. The bytes to store are specified by mask. A 1 in bit N of | |
284 | // mask denotes that byte (data >> (8*N)) & 0xff should be written to memory | |
285 | ||
286 | void st64partial( uint64_t addr, uint64_t data, uint64_t mask ) | |
287 | { | |
288 | //uint64_t* pntr = (uint64_t*)(mem + ofs(addr)); | |
289 | //*pntr = (data & mask) | (*pntr &~ mask); | |
290 | ss_stp8(*(double*)&data,mem + ofs(addr),mask); | |
291 | } | |
292 | ||
293 | // ld128atomic() (aka load twin double, load quad atomic) atomically loads two | |
294 | // 64bit values from memory at addr into rd. rd[0] is the value at addr, rd[1] | |
295 | // is the value at addr + 8. Note ld128 does() not guarantee atomicity. | |
296 | ||
297 | void ld128atomic( uint64_t addr, uint64_t data[2] ) | |
298 | { | |
299 | addr = ofs(addr); | |
300 | lock(addr); | |
301 | data[0] = *(uint64_t*)(mem + addr + 0); | |
302 | data[1] = *(uint64_t*)(mem + addr + 8); | |
303 | unlock(addr); | |
304 | #if defined(ARCH_X64) | |
305 | data[0] = ss_byteswap64(data[0]); | |
306 | data[1] = ss_byteswap64(data[1]); | |
307 | #endif | |
308 | } | |
309 | ||
310 | // ldstub() return a byte from memory at addr, and set the byte at addr | |
311 | // to 0xff. The ldstub() operation is atomic. | |
312 | ||
313 | uint8_t ldstub( uint64_t addr ) | |
314 | { | |
315 | //uint8_t* pntr = (uint8_t*)(mem + ofs(addr)); | |
316 | //uint8_t temp = *pntr; | |
317 | //*pntr = 0xff; | |
318 | //return temp; | |
319 | uint8_t _rd = ss_ldstub(mem,ofs(addr)); | |
320 | return _rd; | |
321 | } | |
322 | ||
323 | // swap() stores the 32bit value rd with the 32bit value at addr. | |
324 | // The old 32bit value at addr is returned. The operation is atomic. | |
325 | ||
326 | uint32_t swap( uint64_t addr, uint32_t rd ) | |
327 | { | |
328 | //uint32_t* pntr = (uint32_t*)(mem + ofs(addr)); | |
329 | //uint32_t temp = *pntr; | |
330 | //*pntr = rd; | |
331 | //return temp; | |
332 | uint32_t _rd = ss_swap(rd,mem,ofs(addr)); | |
333 | return _rd; | |
334 | } | |
335 | ||
336 | // casx() compares the 64bit value rs2 with the 64bit value at addr. | |
337 | // If the two values are equal, the value rd is stored in the | |
338 | // 64bit value at addr. In both cases the old 64bit value at addr is | |
339 | // returned, that is the value at addr before the storei happened. | |
340 | // The casx() operation is atomic. | |
341 | ||
342 | uint64_t casx( uint64_t addr, uint64_t rd, uint64_t rs2 ) | |
343 | { | |
344 | //uint64_t* pntr = (uint64_t*)(mem + ofs(addr)); | |
345 | //uint64_t temp = *pntr; | |
346 | //if (temp == rs2) | |
347 | //*pntr = rd; | |
348 | //return temp; | |
349 | uint64_t _rd = ss_casx(rd,mem + ofs(addr),rs2); | |
350 | return _rd; | |
351 | } | |
352 | ||
353 | // cas() is as casx, but for 32bit. | |
354 | ||
355 | uint32_t cas( uint64_t addr, uint32_t rd, uint32_t rs2 ) | |
356 | { | |
357 | //uint32_t* pntr = (uint32_t*)(mem + ofs(addr)); | |
358 | //uint32_t temp = *pntr; | |
359 | //if (temp == rs2) | |
360 | //*pntr = rd; | |
361 | //return temp; | |
362 | uint32_t _rd = ss_cas(rd,mem + ofs(addr),rs2); | |
363 | return _rd; | |
364 | } | |
365 | ||
366 | // prefetch() prefetches data from memory into the cache hierarchy. | |
367 | //void prefetch( uint64_t addr, uint_t _size ) {} | |
368 | ||
369 | // flush() writes dirty data in the cache back to memory. | |
370 | //void flush( uint64_t addr, uint_t _size ) {} // process does not provide data. | |
371 | ||
372 | int block_read(uint64_t addr, uint8_t *tgt, int _size) | |
373 | { | |
374 | memcpy(tgt, mem + ofs(addr), _size); | |
375 | return 0; | |
376 | } | |
377 | int block_write(uint64_t addr, const uint8_t *src, int _size) | |
378 | { | |
379 | memcpy(mem + ofs(addr), src, _size); | |
380 | return 0; | |
381 | } | |
382 | ||
383 | int dump ( char *dir_name, char *file_name ); | |
384 | int restore ( char *dir_name ); | |
385 | ||
386 | ||
387 | ||
388 | private: | |
389 | ||
390 | ||
391 | void lock ( uint64_t addr ) { mutex_lock ( &locks[(addr >> 4) & (SAM_NMEM_LOCKS -1)] ); } | |
392 | void unlock ( uint64_t addr ) { mutex_unlock ( &locks[(addr >> 4) & (SAM_NMEM_LOCKS -1)] ); } | |
393 | ||
394 | void handle_out_of_range ( uint64_t addr ); | |
395 | ||
396 | uint64_t ofs( uint64_t addr ) | |
397 | { | |
398 | #ifdef SAM_RAM_RANGE_CHECK | |
399 | if (addr > size) | |
400 | this->handle_out_of_range ( addr ); | |
401 | #endif | |
402 | ||
403 | return addr; | |
404 | } | |
405 | ||
406 | ||
407 | ||
408 | ||
409 | ||
410 | private: | |
411 | ||
412 | char *mem_file; // name of mem mapped file, | |
413 | // could be a temp file or a checkpoint file | |
414 | int mfile; // file descriptor, | |
415 | // if > 0 delete the file when done | |
416 | ||
417 | uint8_t* mem; // mmap'ed image | |
418 | uint64_t size; | |
419 | uint64_t pa_mask; | |
420 | ||
421 | mutex_t locks[SAM_NMEM_LOCKS]; | |
422 | }; | |
423 | ||
424 | #elif defined(MEMORY_SPARSE) | |
425 | ||
426 | // io bit difines mem/io access, bit 39 for Ni, bit 47 for | |
427 | const uint64_t IO_BIT = uint64_t(1)<<39; | |
428 | inline int is_dumbserial_addr(uint64_t pa) | |
429 | { | |
430 | // We really need to get the address space layout controlled. | |
431 | // This special case range is crazy. Next we'll need to add | |
432 | // yet again another 8K to this. Where will it end? However, | |
433 | // SAM does not have a solution for this problem yet; one | |
434 | // was proposed but no action has been taken. Until then just | |
435 | // claim the range below for ram mapped consoles. | |
436 | ||
437 | return ((pa >= 0x1f10000000) && (pa <= 0x1f10003fff)) | |
438 | || ((pa >= 0xfff0c2c000) && (pa <= 0xfff0c2cfff)); // Note this is not a ROM address on. | |
439 | } | |
440 | ||
441 | ||
442 | ||
443 | extern "C" int SYSTEM_physio_access(uint32_t cpu_id, void* obj, uint64_t paddr, | |
444 | int wr, uint32_t size, uint64_t* buf, uint8_t bytemask); | |
445 | ||
446 | ||
447 | // Entry record for each mmaped file, | |
448 | // used to accelerate mem load - mmap each file image; | |
449 | class MappedFileEntry | |
450 | { | |
451 | public: | |
452 | ||
453 | MappedFileEntry(const char *file_name, uint64_t addr=0); | |
454 | ~MappedFileEntry(); | |
455 | ||
456 | char *name; // name of mem mapped bin or a checkpoint file | |
457 | int mfile; // file descriptor, if > 0 delete the file when done | |
458 | ||
459 | uint8_t* mem; // mmap'ed image | |
460 | ||
461 | uint64_t addr; // starting address | |
462 | uint64_t size; // file size | |
463 | ||
464 | MappedFileEntry *next; // next entry in the list | |
465 | ||
466 | is_valid() { return name !=0 && mem != NULL && mem != MAP_FAILED && mfile >= 0; } | |
467 | }; | |
468 | ||
469 | // sparse memory model | |
470 | class SMemory : public BL_Memory | |
471 | { | |
472 | public: | |
473 | SMemory( uint64_t ram_size=0, uint_t pa_bits=48, uint_t _l1bits=18, uint_t _l2bits=10, uint_t _l3bits=20 ); | |
474 | ~SMemory(); | |
475 | ||
476 | int init(char *nm=NULL, int is_cp=0) { return 1; } | |
477 | ||
478 | uint64_t get_size() { return size; } | |
479 | ||
480 | ||
481 | // get_base() is a private method and should not be used | |
482 | // outside this module | |
483 | uint8_t* get_base() { return NULL; } | |
484 | ||
485 | // Supported User Interface Operations | |
486 | int load( const char* mem_image_filename ); | |
487 | int load( const char *file, uint64_t addr) { return load_bin(file, addr); } | |
488 | int load_bin( const char *file, uint64_t addr); | |
489 | void save( const char* filename, uint64_t addr, uint64_t size ); | |
490 | ||
491 | ||
492 | /* NOTE: | |
493 | SMemory:: qualifications needed to force inline of the routines so | |
494 | that performance degradation due to virtualization is avoided | |
495 | */ | |
496 | ||
497 | void poke8( uint64_t addr, uint8_t data ) { SMemory::st8(addr,data); } | |
498 | void poke16( uint64_t addr, uint16_t data ) { SMemory::st16(addr,data); } | |
499 | void poke32( uint64_t addr, uint32_t data ) { SMemory::st32(addr,data); } | |
500 | void poke64( uint64_t addr, uint64_t data ) { SMemory::st64(addr,data); } | |
501 | uint8_t peek8u( uint64_t addr ) { return SMemory::ld8u(addr); } | |
502 | int8_t peek8s( uint64_t addr ) { return SMemory::ld8s(addr); } | |
503 | uint16_t peek16u( uint64_t addr ) { return SMemory::ld16u(addr); } | |
504 | int16_t peek16s( uint64_t addr ) { return SMemory::ld16s(addr); } | |
505 | uint32_t peek32u( uint64_t addr ) { return SMemory::ld32u(addr); } | |
506 | int32_t peek32s( uint64_t addr ) { return SMemory::ld32s(addr); } | |
507 | uint64_t peek64( uint64_t addr ) { return SMemory::ld64(addr); } | |
508 | ||
509 | // instruction fetch | |
510 | uint32_t fetch32( uint64_t addr ) | |
511 | { | |
512 | #if defined(ARCH_X64) | |
513 | uint32_t data = *(uint32_t*)(get_ld_ptr(addr)); | |
514 | return ss_byteswap32(data); | |
515 | #else | |
516 | return *(uint32_t*)(get_ld_ptr(addr)); | |
517 | #endif | |
518 | } | |
519 | ||
520 | void fetch256( uint64_t addr, uint64_t data[4] ) { SMemory::ld256(addr,data); } | |
521 | void fetch512( uint64_t addr, uint64_t data[8] ) { SMemory::ld512(addr,data); } | |
522 | ||
523 | ||
524 | ||
525 | ||
526 | // Supported Store Operations. st8(), st16(), st32() and st64() are gueranteed to be atomic. | |
527 | // st128() and st512() are atomic per 64bit quantity. | |
528 | /* | |
529 | void st8( uint64_t addr, uint8_t data ) | |
530 | { | |
531 | *(uint8_t*)(get_st_ptr(addr)) = data; | |
532 | } | |
533 | */ | |
534 | void st8( uint64_t addr, uint8_t data ) | |
535 | { | |
536 | if(is_dumbserial_addr(addr)) | |
537 | { | |
538 | // console access | |
539 | uint64_t v = uint64_t(data); | |
540 | addr |= IO_BIT; | |
541 | SYSTEM_physio_access(0, 0, addr, 1, 1, &v, ~0); | |
542 | } | |
543 | else // mem access | |
544 | { | |
545 | *(uint8_t*)(get_st_ptr(addr)) = data; | |
546 | } | |
547 | } | |
548 | ||
549 | void st16( uint64_t addr, uint16_t data ) | |
550 | { | |
551 | #if defined(ARCH_X64) | |
552 | data = ss_byteswap16(data); | |
553 | #endif | |
554 | *(uint16_t*)(get_st_ptr(addr)) = data; | |
555 | } | |
556 | void st32( uint64_t addr, uint32_t data ) | |
557 | { | |
558 | #if defined(ARCH_X64) | |
559 | data = ss_byteswap32(data); | |
560 | #endif | |
561 | *(uint32_t*)(get_st_ptr(addr)) = data; | |
562 | } | |
563 | void st64_nl( uint64_t addr, uint64_t data ) | |
564 | { | |
565 | #if defined(ARCH_X64) | |
566 | data = ss_byteswap64(data); | |
567 | #endif | |
568 | *(uint64_t*)(get_st_ptr(addr)) = data; | |
569 | } | |
570 | void st64 ( uint64_t addr, uint64_t data ) | |
571 | { | |
572 | lock(addr); | |
573 | st64_nl(addr,data); | |
574 | unlock(addr); | |
575 | } | |
576 | ||
577 | void st128( uint64_t addr, uint64_t data[2] ) | |
578 | { | |
579 | #if defined(ARCH_X64) | |
580 | data[0] = ss_byteswap64(data[0]); | |
581 | data[1] = ss_byteswap64(data[1]); | |
582 | #endif | |
583 | uint8_t* ptr = get_st_ptr(addr & ~uint64_t(0x1f)); | |
584 | *(uint64_t*)(ptr + 0) = data[0]; | |
585 | *(uint64_t*)(ptr + 8) = data[1]; | |
586 | } | |
587 | void st512( uint64_t addr, uint64_t data[8] ) | |
588 | { | |
589 | #if defined(ARCH_X64) | |
590 | data[0] = ss_byteswap64(data[0]); | |
591 | data[1] = ss_byteswap64(data[1]); | |
592 | data[2] = ss_byteswap64(data[2]); | |
593 | data[3] = ss_byteswap64(data[3]); | |
594 | data[4] = ss_byteswap64(data[4]); | |
595 | data[5] = ss_byteswap64(data[5]); | |
596 | data[6] = ss_byteswap64(data[6]); | |
597 | data[7] = ss_byteswap64(data[7]); | |
598 | #endif | |
599 | uint8_t* ptr = get_st_ptr(addr & ~uint64_t(0x3f)); | |
600 | *(uint64_t*)(ptr + 0) = data[0]; | |
601 | *(uint64_t*)(ptr + 8) = data[1]; | |
602 | *(uint64_t*)(ptr + 16) = data[2]; | |
603 | *(uint64_t*)(ptr + 24) = data[3]; | |
604 | *(uint64_t*)(ptr + 32) = data[4]; | |
605 | *(uint64_t*)(ptr + 40) = data[5]; | |
606 | *(uint64_t*)(ptr + 48) = data[6]; | |
607 | *(uint64_t*)(ptr + 56) = data[7]; | |
608 | } | |
609 | ||
610 | // Supported Load Operations. ld8[su]() to ld64() are quaranteed to be atomic. ld128() and | |
611 | // above are atomic at the 64 bit granularity. | |
612 | /* | |
613 | uint8_t ld8u ( uint64_t addr ) | |
614 | { | |
615 | return *(uint8_t *)(get_ld_ptr(addr)); | |
616 | } | |
617 | */ | |
618 | uint8_t ld8u ( uint64_t addr ) | |
619 | { | |
620 | if(is_dumbserial_addr(addr)) | |
621 | { | |
622 | // console access | |
623 | uint64_t v = 0; | |
624 | addr |= IO_BIT; | |
625 | SYSTEM_physio_access(0, 0, addr, 0, 1, &v, ~0); | |
626 | return uint8_t(v); | |
627 | ||
628 | } | |
629 | else // mem access | |
630 | { | |
631 | return *(uint8_t *)(get_ld_ptr(addr)); | |
632 | } | |
633 | } | |
634 | ||
635 | int8_t ld8s( uint64_t addr ) | |
636 | { | |
637 | return *( int8_t *)(get_ld_ptr(addr)); | |
638 | } | |
639 | uint16_t ld16u( uint64_t addr ) | |
640 | { | |
641 | #if defined(ARCH_X64) | |
642 | uint16_t data = *(uint16_t*)(get_ld_ptr(addr)); | |
643 | return ss_byteswap16(data); | |
644 | #else | |
645 | return *(uint16_t*)(get_ld_ptr(addr)); | |
646 | #endif | |
647 | } | |
648 | int16_t ld16s( uint64_t addr ) | |
649 | { | |
650 | #if defined(ARCH_X64) | |
651 | int16_t data = *( int16_t*)(get_ld_ptr(addr)); | |
652 | return ss_byteswap16(data); | |
653 | #else | |
654 | return *( int16_t*)(get_ld_ptr(addr)); | |
655 | #endif | |
656 | } | |
657 | uint32_t ld32u( uint64_t addr ) | |
658 | { | |
659 | #if defined(ARCH_X64) | |
660 | uint32_t data = *(uint32_t*)(get_ld_ptr(addr)); | |
661 | return ss_byteswap32(data); | |
662 | #else | |
663 | return *(uint32_t*)(get_ld_ptr(addr)); | |
664 | #endif | |
665 | } | |
666 | int32_t ld32s( uint64_t addr ) | |
667 | { | |
668 | #if defined(ARCH_X64) | |
669 | int32_t data = *( int32_t*)(get_ld_ptr(addr)); | |
670 | return ss_byteswap32(data); | |
671 | #else | |
672 | return *( int32_t*)(get_ld_ptr(addr)); | |
673 | #endif | |
674 | } | |
675 | uint64_t ld64( uint64_t addr ) | |
676 | { | |
677 | #if defined(ARCH_X64) | |
678 | uint64_t data = *(uint64_t*)(get_ld_ptr(addr)); | |
679 | return ss_byteswap64(data); | |
680 | #else | |
681 | return *(uint64_t*)(get_ld_ptr(addr)); | |
682 | #endif | |
683 | } | |
684 | void ld128( uint64_t addr, uint64_t data[2] ) | |
685 | { | |
686 | uint8_t* ptr = get_ld_ptr(addr); | |
687 | data[0] = *(uint64_t*)(ptr + 0); | |
688 | data[1] = *(uint64_t*)(ptr + 8); | |
689 | #if defined(ARCH_X64) | |
690 | data[0] = ss_byteswap64(data[0]); | |
691 | data[1] = ss_byteswap64(data[1]); | |
692 | #endif | |
693 | } | |
694 | void ld512( uint64_t addr, uint64_t data[8] ) | |
695 | { | |
696 | uint8_t* ptr = get_ld_ptr(addr & ~uint64_t(0x3f)); | |
697 | data[0] = *(uint64_t*)(ptr + 0); | |
698 | data[1] = *(uint64_t*)(ptr + 8); | |
699 | data[2] = *(uint64_t*)(ptr + 16); | |
700 | data[3] = *(uint64_t*)(ptr + 24); | |
701 | data[4] = *(uint64_t*)(ptr + 32); | |
702 | data[5] = *(uint64_t*)(ptr + 40); | |
703 | data[6] = *(uint64_t*)(ptr + 48); | |
704 | data[7] = *(uint64_t*)(ptr + 56); | |
705 | #if defined(ARCH_X64) | |
706 | data[0] = ss_byteswap64(data[0]); | |
707 | data[1] = ss_byteswap64(data[1]); | |
708 | data[2] = ss_byteswap64(data[2]); | |
709 | data[3] = ss_byteswap64(data[3]); | |
710 | data[4] = ss_byteswap64(data[4]); | |
711 | data[5] = ss_byteswap64(data[5]); | |
712 | data[6] = ss_byteswap64(data[6]); | |
713 | data[7] = ss_byteswap64(data[7]); | |
714 | #endif | |
715 | } | |
716 | void ld256( uint64_t addr, uint64_t data[4] ) | |
717 | { | |
718 | uint8_t* ptr = get_ld_ptr(addr & ~uint64_t(0x1f)); | |
719 | data[0] = *(uint64_t*)(ptr + 0); | |
720 | data[1] = *(uint64_t*)(ptr + 8); | |
721 | data[2] = *(uint64_t*)(ptr + 16); | |
722 | data[3] = *(uint64_t*)(ptr + 24); | |
723 | #if defined(ARCH_X64) | |
724 | data[0] = ss_byteswap64(data[0]); | |
725 | data[1] = ss_byteswap64(data[1]); | |
726 | data[2] = ss_byteswap64(data[2]); | |
727 | data[3] = ss_byteswap64(data[3]); | |
728 | #endif | |
729 | } | |
730 | ||
731 | ||
732 | // st64partial() performs 8 byte partial store. The bytes to store are specified by mask. A 1 in bit N of | |
733 | // mask denotes that byte (data >> (8*N)) & 0xff should be written to memory | |
734 | ||
735 | void st64partial( uint64_t addr, uint64_t data, uint64_t mask ) | |
736 | { | |
737 | ss_stp8(*(double*)&data,get_st_ptr(addr),mask); | |
738 | } | |
739 | ||
740 | // ld128atomic() (aka load twin double, load quad atomic) atomically loads two | |
741 | // 64bit values from memory at addr into rd. rd[0] is the value at addr, rd[1] | |
742 | // is the value at addr + 8. Note ld128 does() not guarantee atomicity. | |
743 | ||
744 | void ld128atomic( uint64_t addr, uint64_t data[2] ) | |
745 | { | |
746 | lock(addr); | |
747 | ld128(addr,data); | |
748 | unlock(addr); | |
749 | } | |
750 | ||
751 | // ldstub() return a byte from memory at addr, and set the byte at addr | |
752 | // to 0xff. The ldstub() operation is atomic. | |
753 | ||
754 | uint8_t ldstub( uint64_t addr ) | |
755 | { | |
756 | uint8_t _rd = ss_ldstub(get_st_ptr(addr),0); | |
757 | return _rd; | |
758 | } | |
759 | ||
760 | // swap() stores the 32bit value rd with the 32bit value at addr. | |
761 | // The old 32bit value at addr is returned. The operation is atomic. | |
762 | ||
763 | uint32_t swap( uint64_t addr, uint32_t rd ) | |
764 | { | |
765 | uint32_t _rd = ss_swap(rd,get_st_ptr(addr),0); | |
766 | return _rd; | |
767 | } | |
768 | ||
769 | // casx() compares the 64bit value rs2 with the 64bit value at addr. | |
770 | // If the two values are equal, the value rd is stored in the | |
771 | // 64bit value at addr. In both cases the old 64bit value at addr is | |
772 | // returned, that is the value at addr before the storei happened. | |
773 | // The casx() operation is atomic. | |
774 | ||
775 | uint64_t casx( uint64_t addr, uint64_t rd, uint64_t rs2 ) | |
776 | { | |
777 | uint64_t _rd = ss_casx(rd,get_st_ptr(addr),rs2); | |
778 | return _rd; | |
779 | } | |
780 | ||
781 | // cas() is as casx, but for 32bit. | |
782 | ||
783 | uint32_t cas( uint64_t addr, uint32_t rd, uint32_t rs2 ) | |
784 | { | |
785 | uint32_t _rd = ss_cas(rd,get_st_ptr(addr),rs2); | |
786 | return _rd; | |
787 | } | |
788 | ||
789 | // prefetch() prefetches data from memory into the cache hierarchy. | |
790 | //void prefetch( uint64_t addr, uint_t _size ) {} | |
791 | ||
792 | // flush() writes dirty data in the cache back to memory. | |
793 | //void flush( uint64_t addr, uint_t _size ) {} // process does not provide data. | |
794 | ||
795 | ||
796 | int block_read(uint64_t addr, uint8_t *tgt, int _size); | |
797 | int block_write(uint64_t addr, const uint8_t *src, int _size); | |
798 | ||
799 | int dump ( char *dir_name, char *file_name ); | |
800 | int restore ( char *dir_name ); | |
801 | ||
802 | // no mem page allocation on load accesses; | |
803 | // if load goes to uninit location - return unknow value; | |
804 | uint8_t* get_ld_ptr ( uint64_t addr ) | |
805 | { | |
806 | uint8_t*** o1 = (uint8_t***)((char*)l1 + ((addr >> l1shft) & l1mask)); | |
807 | uint8_t** l2 = *o1; | |
808 | ||
809 | if (l2 == 0) | |
810 | return uninit_page + ( addr & 0x7 ); | |
811 | ||
812 | uint8_t** o2 = (uint8_t**)((char*)l2 + ((addr >> l2shft) & l2mask)); | |
813 | uint8_t* l3 = *o2; | |
814 | if (l3 == 0) | |
815 | return uninit_page + ( addr & 0x7 ); | |
816 | ||
817 | return mask_dirty(l3) + (addr & l3mask); | |
818 | } | |
819 | ||
820 | // allocate mem page if store goes to uninit location; | |
821 | // acquire a lock to prevent multiple writers on MP run; | |
822 | uint8_t* get_st_ptr( uint64_t addr ) | |
823 | { | |
824 | ||
825 | uint8_t*** o1 = (uint8_t***)((char*)l1 + ((addr >> l1shft) & l1mask) ); | |
826 | uint8_t** l2 = *o1; | |
827 | ||
828 | if (l2 == 0) | |
829 | { | |
830 | mutex_lock(&l2_lock); | |
831 | ||
832 | // check again if level 2 table is already allocated | |
833 | l2 = *o1 ; | |
834 | if(l2 == 0) | |
835 | l2 = *o1 = (uint8_t**)calloc(l2size,sizeof(uint8_t)); | |
836 | ||
837 | mutex_unlock(&l2_lock); | |
838 | } | |
839 | ||
840 | if (l2) | |
841 | { | |
842 | uint8_t** o2 = (uint8_t**)((char*)l2 + ((addr >> l2shft) & l2mask)); | |
843 | uint8_t* l3 = *o2; | |
844 | ||
845 | if (l3 == 0) | |
846 | { | |
847 | mutex_lock(&l3_lock); | |
848 | ||
849 | // check again if level 3 page is already allocated | |
850 | l3 = *o2; | |
851 | if(l3 == 0) | |
852 | l3 = *o2 = (uint8_t*)calloc(l3size,sizeof(uint8_t)); | |
853 | ||
854 | mutex_unlock ( &l3_lock ); | |
855 | } | |
856 | ||
857 | if (l3) { | |
858 | // mark this line dirty. The dirty flag is ONLY read and cleared | |
859 | // at dump time (with SAM stopped) so there is no locking needed. | |
860 | l3 = *o2 = set_dirty(l3); | |
861 | return mask_dirty(l3) + (addr & l3mask); | |
862 | } | |
863 | } | |
864 | ||
865 | fprintf(stderr, "\nMEM: Run out of memory, exit...\n"); | |
866 | exit(1); | |
867 | } | |
868 | ||
869 | ||
870 | ||
871 | uint64_t get_l1size() { return l1size; } | |
872 | uint64_t get_l2size() { return l2size; } | |
873 | uint64_t get_l3size() { return l3size; } | |
874 | ||
875 | // The mlist points to the list of mapped file entries that | |
876 | // will need to be unmapped in the destructor | |
877 | void link(MappedFileEntry *e) { | |
878 | assert(e->next == NULL); e->next = mlist; mlist = e; | |
879 | } | |
880 | ||
881 | int map_page (uint64_t addr, uint8_t *maddr); | |
882 | int map (MappedFileEntry *e); | |
883 | ||
884 | private: | |
885 | ||
886 | void lock ( uint64_t addr ) { mutex_lock ( &locks[(addr >> 4) & (SAM_NMEM_LOCKS -1)] ); } | |
887 | void unlock ( uint64_t addr ) { mutex_unlock ( &locks[(addr >> 4) & (SAM_NMEM_LOCKS -1)] ); } | |
888 | ||
889 | private: | |
890 | ||
891 | uint8_t*** l1; | |
892 | ||
893 | uint_t l1bits; | |
894 | uint_t l2bits; | |
895 | uint_t l3bits; | |
896 | uint_t l1shft; | |
897 | uint_t l2shft; | |
898 | uint64_t l1size; | |
899 | uint64_t l2size; | |
900 | uint64_t l3size; | |
901 | uint64_t l1mask; | |
902 | uint64_t l2mask; | |
903 | uint64_t l3mask; | |
904 | ||
905 | uint64_t size; | |
906 | uint64_t pa_mask; | |
907 | ||
908 | uint8_t uninit_page[512]; | |
909 | ||
910 | mutex_t locks [SAM_NMEM_LOCKS ]; | |
911 | mutex_t l2_lock; | |
912 | mutex_t l3_lock; | |
913 | ||
914 | MappedFileEntry *mlist; // mem mapped file list | |
915 | ||
916 | static const uint64_t dirtyflag = 1ull; | |
917 | ||
918 | bool is_dirty(uint8_t * l3) { | |
919 | return (dirtyflag & (uint64_t) l3); | |
920 | } | |
921 | ||
922 | uint8_t * set_dirty(uint8_t* l3) { | |
923 | return (uint8_t *) (dirtyflag | (uint64_t) l3); | |
924 | } | |
925 | ||
926 | uint8_t * mask_dirty(uint8_t * l3) { | |
927 | return (uint8_t *) (~dirtyflag & (uint64_t) l3); | |
928 | } | |
929 | }; | |
930 | ||
931 | #elif defined(MEMORY_EXTERNAL) | |
932 | ||
933 | #include "SS_ExternalMemory.h" | |
934 | ||
935 | typedef SS_ExternalMemory SMemory; | |
936 | ||
937 | #define st64_nl poke64 | |
938 | ||
939 | #else | |
940 | ||
941 | #pragma "You should define a memory to use though some -D flag" | |
942 | ||
943 | #endif // MEMORY_XX | |
944 | ||
945 | ||
946 | ||
947 | ||
948 | ||
949 | ||
950 | #endif //__SAM_Memory_h__ |