Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / sam / include / Memory.h
CommitLineData
920dae64
AT
1/*
2* ========== Copyright Header Begin ==========================================
3*
4* OpenSPARC T2 Processor File: Memory.h
5* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
6* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
7*
8* The above named program is free software; you can redistribute it and/or
9* modify it under the terms of the GNU General Public
10* License version 2 as published by the Free Software Foundation.
11*
12* The above named program is distributed in the hope that it will be
13* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
14* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15* General Public License for more details.
16*
17* You should have received a copy of the GNU General Public
18* License along with this work; if not, write to the Free Software
19* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
20*
21* ========== Copyright Header End ============================================
22*/
23/*
24 * Copyright (C) 2005 Sun Microsystems, Inc.
25 * All rights reserved.
26 */
27
28#ifndef __SAM_Memory_h__
29#define __SAM_Memory_h__
30
31#include <synch.h>
32#include <sys/types.h>
33#include <string.h>
34#include <stdlib.h>
35#include <sys/mman.h>
36#include <assert.h>
37#include "BL_Memory.h"
38
39extern "C" uint8_t ss_ldstub( void* base, uint64_t ofs );
40extern "C" uint32_t ss_swap ( uint32_t rd, void* base, uint64_t ofs );
41extern "C" uint32_t ss_cas ( uint32_t rd, void* base, uint32_t rs2 );
42extern "C" uint64_t ss_casx ( uint64_t rd, void* base, uint64_t rs2 );
43extern "C" void ss_stp8 ( double rd, void* base, uint64_t mask );
44
45#include "utils.h"
46
47// build flag to control mem model option:
48// should be defined MEMORY_SPARSE or MEMORY_FLAT
49
50
51const int SAM_NMEM_LOCKS = 1<<8; //256, should be power of 2
52
53
54const uint64_t SAM_MEM_DUMP_VERSION = 5<<8;
55
56#if defined(MEMORY_FLAT)
57
58// flat memory model
59
60
61class SMemory : public BL_Memory
62{
63public:
64 SMemory( uint64_t ram_size, uint_t pa_bits=43);
65 ~SMemory();
66
67 int init(char *nm=NULL, int is_cp = 0);
68
69 uint64_t get_size() { return size; }
70
71
72 // get_base() is a private method and should not be used
73 // outside this module
74 uint8_t* get_base() { return mem; }
75
76 // Supported User Interface Operations
77 int load( const char* mem_image_filename );
78 int load( const char *file, uint64_t addr) { return load_bin(file, addr); }
79 int load_bin( const char *file, uint64_t addr);
80 void save( const char* filename, uint64_t addr, uint64_t size );
81
82 /* NOTE:
83 SMemory:: qualifications needed to force inline of the routines so
84 that performance degradation due to virtualization is avoided
85 */
86
87 void poke8( uint64_t addr, uint8_t data ) { SMemory::st8(addr,data); }
88 void poke16( uint64_t addr, uint16_t data ) { SMemory::st16(addr,data); }
89 void poke32( uint64_t addr, uint32_t data ) { SMemory::st32(addr,data); }
90 void poke64( uint64_t addr, uint64_t data ) { SMemory::st64(addr,data); }
91 uint8_t peek8u( uint64_t addr ) { return SMemory::ld8u(addr); }
92 int8_t peek8s( uint64_t addr ) { return SMemory::ld8s(addr); }
93 uint16_t peek16u( uint64_t addr ) { return SMemory::ld16u(addr); }
94 int16_t peek16s( uint64_t addr ) { return SMemory::ld16s(addr); }
95 uint32_t peek32u( uint64_t addr ) { return SMemory::ld32u(addr); }
96 int32_t peek32s( uint64_t addr ) { return SMemory::ld32s(addr); }
97 uint64_t peek64( uint64_t addr ) { return SMemory::ld64(addr); }
98
99 // instruction fetch
100 uint32_t fetch32( uint64_t addr )
101 {
102#if defined(ARCH_X64)
103 uint32_t data = *(uint32_t*)(mem + ofs(addr));
104 return ss_byteswap32(data);
105#else
106 return *(uint32_t*)(mem + ofs(addr));
107#endif
108 }
109
110 void fetch256( uint64_t addr, uint64_t data[4] ) { SMemory::ld256(addr,data); }
111 void fetch512( uint64_t addr, uint64_t data[8] ) { SMemory::ld512(addr,data); }
112
113 // Supported Store Operations. st8(), st16(), st32() and st64() are gueranteed to be atomic.
114 // st128() and st512() are atomic per 64bit quantity.
115
116 void st8( uint64_t addr, uint8_t data )
117 {
118 *(uint8_t *)(mem + ofs(addr)) = data;
119 }
120 void st16( uint64_t addr, uint16_t data )
121 {
122#if defined(ARCH_X64)
123 data = ss_byteswap16(data);
124#endif
125 *(uint16_t*)(mem + ofs(addr)) = data;
126 }
127 void st32( uint64_t addr, uint32_t data )
128 {
129#if defined(ARCH_X64)
130 data = ss_byteswap32(data);
131#endif
132 *(uint32_t*)(mem + ofs(addr)) = data;
133 }
134 void st64_nl( uint64_t addr, uint64_t data )
135 {
136#if defined(ARCH_X64)
137 data = ss_byteswap64(data);
138#endif
139 *(uint64_t*)(mem + ofs(addr)) = data;
140 }
141 void st64 ( uint64_t addr, uint64_t data )
142 {
143#if defined(ARCH_X64)
144 data = ss_byteswap64(data);
145#endif
146 addr = ofs(addr);
147 lock(addr);
148 *(uint64_t*)(mem + addr) = data;
149 unlock(addr);
150 }
151
152 void st128( uint64_t addr, uint64_t data[2] )
153 {
154#if defined(ARCH_X64)
155 data[0] = ss_byteswap64(data[0]);
156 data[1] = ss_byteswap64(data[1]);
157#endif
158 *(uint64_t*)(mem + ofs(addr) + 0) = data[0];
159 *(uint64_t*)(mem + ofs(addr) + 8) = data[1];
160 }
161 void st512( uint64_t addr, uint64_t data[8] )
162 {
163#if defined(ARCH_X64)
164 data[0] = ss_byteswap64(data[0]);
165 data[1] = ss_byteswap64(data[1]);
166 data[2] = ss_byteswap64(data[2]);
167 data[3] = ss_byteswap64(data[3]);
168 data[4] = ss_byteswap64(data[4]);
169 data[5] = ss_byteswap64(data[5]);
170 data[6] = ss_byteswap64(data[6]);
171 data[7] = ss_byteswap64(data[7]);
172#endif
173 *(uint64_t*)(mem + ofs(addr) + 0) = data[0];
174 *(uint64_t*)(mem + ofs(addr) + 8) = data[1];
175 *(uint64_t*)(mem + ofs(addr) + 16) = data[2];
176 *(uint64_t*)(mem + ofs(addr) + 24) = data[3];
177 *(uint64_t*)(mem + ofs(addr) + 32) = data[4];
178 *(uint64_t*)(mem + ofs(addr) + 40) = data[5];
179 *(uint64_t*)(mem + ofs(addr) + 48) = data[6];
180 *(uint64_t*)(mem + ofs(addr) + 56) = data[7];
181 }
182
183 // Supported Load Operations. ld8[su]() to ld64() are quaranteed to be atomic. ld128() and
184 // above are atomic at the 64 bit granularity.
185
186 uint8_t ld8u ( uint64_t addr )
187 {
188 return *(uint8_t *)(mem + ofs(addr));
189 }
190 int8_t ld8s( uint64_t addr )
191 {
192 return *( int8_t *)(mem + ofs(addr));
193 }
194 uint16_t ld16u( uint64_t addr )
195 {
196#if defined(ARCH_X64)
197 uint16_t data = *(uint16_t*)(mem + ofs(addr));
198 return ss_byteswap16(data);
199#else
200 return *(uint16_t*)(mem + ofs(addr));
201#endif
202 }
203 int16_t ld16s( uint64_t addr )
204 {
205#if defined(ARCH_X64)
206 int16_t data = *( int16_t*)(mem + ofs(addr));
207 return ss_byteswap16(data);
208#else
209 return *( int16_t*)(mem + ofs(addr));
210#endif
211 }
212 uint32_t ld32u( uint64_t addr )
213 {
214#if defined(ARCH_X64)
215 uint32_t data = *(uint32_t*)(mem + ofs(addr));
216 return ss_byteswap32(data);
217#else
218 return *(uint32_t*)(mem + ofs(addr));
219#endif
220 }
221 int32_t ld32s( uint64_t addr )
222 {
223#if defined(ARCH_X64)
224 int32_t data = *( int32_t*)(mem + ofs(addr));
225 return ss_byteswap32(data);
226#else
227 return *( int32_t*)(mem + ofs(addr));
228#endif
229 }
230 uint64_t ld64( uint64_t addr )
231 {
232#if defined(ARCH_X64)
233 uint64_t data = *(uint64_t*)(mem + ofs(addr));
234 return ss_byteswap64(data);
235#else
236 return *(uint64_t*)(mem + ofs(addr));
237#endif
238 }
239 void ld128( uint64_t addr, uint64_t data[2] )
240 {
241 data[0] = *(uint64_t*)(mem + ofs(addr) + 0);
242 data[1] = *(uint64_t*)(mem + ofs(addr) + 8);
243#if defined(ARCH_X64)
244 data[0] = ss_byteswap64(data[0]);
245 data[1] = ss_byteswap64(data[1]);
246#endif
247 }
248 void ld512( uint64_t addr, uint64_t data[8] )
249 {
250 data[0] = *(uint64_t*)(mem + ofs(addr) + 0);
251 data[1] = *(uint64_t*)(mem + ofs(addr) + 8);
252 data[2] = *(uint64_t*)(mem + ofs(addr) + 16);
253 data[3] = *(uint64_t*)(mem + ofs(addr) + 24);
254 data[4] = *(uint64_t*)(mem + ofs(addr) + 32);
255 data[5] = *(uint64_t*)(mem + ofs(addr) + 40);
256 data[6] = *(uint64_t*)(mem + ofs(addr) + 48);
257 data[7] = *(uint64_t*)(mem + ofs(addr) + 56);
258#if defined(ARCH_X64)
259 data[0] = ss_byteswap64(data[0]);
260 data[1] = ss_byteswap64(data[1]);
261 data[2] = ss_byteswap64(data[2]);
262 data[3] = ss_byteswap64(data[3]);
263 data[4] = ss_byteswap64(data[4]);
264 data[5] = ss_byteswap64(data[5]);
265 data[6] = ss_byteswap64(data[6]);
266 data[7] = ss_byteswap64(data[7]);
267#endif
268 }
269 void ld256( uint64_t addr, uint64_t data[4] )
270 {
271 data[0] = *(uint64_t*)(mem + ofs(addr) + 0);
272 data[1] = *(uint64_t*)(mem + ofs(addr) + 8);
273 data[2] = *(uint64_t*)(mem + ofs(addr) + 16);
274 data[3] = *(uint64_t*)(mem + ofs(addr) + 24);
275#if defined(ARCH_X64)
276 data[0] = ss_byteswap64(data[0]);
277 data[1] = ss_byteswap64(data[1]);
278 data[2] = ss_byteswap64(data[2]);
279 data[3] = ss_byteswap64(data[3]);
280#endif
281 }
282
283 // st64partial() performs 8 byte partial store. The bytes to store are specified by mask. A 1 in bit N of
284 // mask denotes that byte (data >> (8*N)) & 0xff should be written to memory
285
286 void st64partial( uint64_t addr, uint64_t data, uint64_t mask )
287 {
288 //uint64_t* pntr = (uint64_t*)(mem + ofs(addr));
289 //*pntr = (data & mask) | (*pntr &~ mask);
290 ss_stp8(*(double*)&data,mem + ofs(addr),mask);
291 }
292
293 // ld128atomic() (aka load twin double, load quad atomic) atomically loads two
294 // 64bit values from memory at addr into rd. rd[0] is the value at addr, rd[1]
295 // is the value at addr + 8. Note ld128 does() not guarantee atomicity.
296
297 void ld128atomic( uint64_t addr, uint64_t data[2] )
298 {
299 addr = ofs(addr);
300 lock(addr);
301 data[0] = *(uint64_t*)(mem + addr + 0);
302 data[1] = *(uint64_t*)(mem + addr + 8);
303 unlock(addr);
304#if defined(ARCH_X64)
305 data[0] = ss_byteswap64(data[0]);
306 data[1] = ss_byteswap64(data[1]);
307#endif
308 }
309
310 // ldstub() return a byte from memory at addr, and set the byte at addr
311 // to 0xff. The ldstub() operation is atomic.
312
313 uint8_t ldstub( uint64_t addr )
314 {
315 //uint8_t* pntr = (uint8_t*)(mem + ofs(addr));
316 //uint8_t temp = *pntr;
317 //*pntr = 0xff;
318 //return temp;
319 uint8_t _rd = ss_ldstub(mem,ofs(addr));
320 return _rd;
321 }
322
323 // swap() stores the 32bit value rd with the 32bit value at addr.
324 // The old 32bit value at addr is returned. The operation is atomic.
325
326 uint32_t swap( uint64_t addr, uint32_t rd )
327 {
328 //uint32_t* pntr = (uint32_t*)(mem + ofs(addr));
329 //uint32_t temp = *pntr;
330 //*pntr = rd;
331 //return temp;
332 uint32_t _rd = ss_swap(rd,mem,ofs(addr));
333 return _rd;
334 }
335
336 // casx() compares the 64bit value rs2 with the 64bit value at addr.
337 // If the two values are equal, the value rd is stored in the
338 // 64bit value at addr. In both cases the old 64bit value at addr is
339 // returned, that is the value at addr before the storei happened.
340 // The casx() operation is atomic.
341
342 uint64_t casx( uint64_t addr, uint64_t rd, uint64_t rs2 )
343 {
344 //uint64_t* pntr = (uint64_t*)(mem + ofs(addr));
345 //uint64_t temp = *pntr;
346 //if (temp == rs2)
347 //*pntr = rd;
348 //return temp;
349 uint64_t _rd = ss_casx(rd,mem + ofs(addr),rs2);
350 return _rd;
351 }
352
353 // cas() is as casx, but for 32bit.
354
355 uint32_t cas( uint64_t addr, uint32_t rd, uint32_t rs2 )
356 {
357 //uint32_t* pntr = (uint32_t*)(mem + ofs(addr));
358 //uint32_t temp = *pntr;
359 //if (temp == rs2)
360 //*pntr = rd;
361 //return temp;
362 uint32_t _rd = ss_cas(rd,mem + ofs(addr),rs2);
363 return _rd;
364 }
365
366 // prefetch() prefetches data from memory into the cache hierarchy.
367 //void prefetch( uint64_t addr, uint_t _size ) {}
368
369 // flush() writes dirty data in the cache back to memory.
370 //void flush( uint64_t addr, uint_t _size ) {} // process does not provide data.
371
372 int block_read(uint64_t addr, uint8_t *tgt, int _size)
373 {
374 memcpy(tgt, mem + ofs(addr), _size);
375 return 0;
376 }
377 int block_write(uint64_t addr, const uint8_t *src, int _size)
378 {
379 memcpy(mem + ofs(addr), src, _size);
380 return 0;
381 }
382
383 int dump ( char *dir_name, char *file_name );
384 int restore ( char *dir_name );
385
386
387
388 private:
389
390
391 void lock ( uint64_t addr ) { mutex_lock ( &locks[(addr >> 4) & (SAM_NMEM_LOCKS -1)] ); }
392 void unlock ( uint64_t addr ) { mutex_unlock ( &locks[(addr >> 4) & (SAM_NMEM_LOCKS -1)] ); }
393
394 void handle_out_of_range ( uint64_t addr );
395
396 uint64_t ofs( uint64_t addr )
397 {
398 #ifdef SAM_RAM_RANGE_CHECK
399 if (addr > size)
400 this->handle_out_of_range ( addr );
401 #endif
402
403 return addr;
404 }
405
406
407
408
409
410private:
411
412 char *mem_file; // name of mem mapped file,
413 // could be a temp file or a checkpoint file
414 int mfile; // file descriptor,
415 // if > 0 delete the file when done
416
417 uint8_t* mem; // mmap'ed image
418 uint64_t size;
419 uint64_t pa_mask;
420
421 mutex_t locks[SAM_NMEM_LOCKS];
422};
423
424#elif defined(MEMORY_SPARSE)
425
426// io bit difines mem/io access, bit 39 for Ni, bit 47 for
427const uint64_t IO_BIT = uint64_t(1)<<39;
428inline int is_dumbserial_addr(uint64_t pa)
429{
430 // We really need to get the address space layout controlled.
431 // This special case range is crazy. Next we'll need to add
432 // yet again another 8K to this. Where will it end? However,
433 // SAM does not have a solution for this problem yet; one
434 // was proposed but no action has been taken. Until then just
435 // claim the range below for ram mapped consoles.
436
437 return ((pa >= 0x1f10000000) && (pa <= 0x1f10003fff))
438 || ((pa >= 0xfff0c2c000) && (pa <= 0xfff0c2cfff)); // Note this is not a ROM address on.
439}
440
441
442
443extern "C" int SYSTEM_physio_access(uint32_t cpu_id, void* obj, uint64_t paddr,
444 int wr, uint32_t size, uint64_t* buf, uint8_t bytemask);
445
446
447// Entry record for each mmaped file,
448// used to accelerate mem load - mmap each file image;
449class MappedFileEntry
450{
451public:
452
453 MappedFileEntry(const char *file_name, uint64_t addr=0);
454 ~MappedFileEntry();
455
456 char *name; // name of mem mapped bin or a checkpoint file
457 int mfile; // file descriptor, if > 0 delete the file when done
458
459 uint8_t* mem; // mmap'ed image
460
461 uint64_t addr; // starting address
462 uint64_t size; // file size
463
464 MappedFileEntry *next; // next entry in the list
465
466 is_valid() { return name !=0 && mem != NULL && mem != MAP_FAILED && mfile >= 0; }
467};
468
469// sparse memory model
470class SMemory : public BL_Memory
471{
472 public:
473 SMemory( uint64_t ram_size=0, uint_t pa_bits=48, uint_t _l1bits=18, uint_t _l2bits=10, uint_t _l3bits=20 );
474 ~SMemory();
475
476 int init(char *nm=NULL, int is_cp=0) { return 1; }
477
478 uint64_t get_size() { return size; }
479
480
481 // get_base() is a private method and should not be used
482 // outside this module
483 uint8_t* get_base() { return NULL; }
484
485 // Supported User Interface Operations
486 int load( const char* mem_image_filename );
487 int load( const char *file, uint64_t addr) { return load_bin(file, addr); }
488 int load_bin( const char *file, uint64_t addr);
489 void save( const char* filename, uint64_t addr, uint64_t size );
490
491
492 /* NOTE:
493 SMemory:: qualifications needed to force inline of the routines so
494 that performance degradation due to virtualization is avoided
495 */
496
497 void poke8( uint64_t addr, uint8_t data ) { SMemory::st8(addr,data); }
498 void poke16( uint64_t addr, uint16_t data ) { SMemory::st16(addr,data); }
499 void poke32( uint64_t addr, uint32_t data ) { SMemory::st32(addr,data); }
500 void poke64( uint64_t addr, uint64_t data ) { SMemory::st64(addr,data); }
501 uint8_t peek8u( uint64_t addr ) { return SMemory::ld8u(addr); }
502 int8_t peek8s( uint64_t addr ) { return SMemory::ld8s(addr); }
503 uint16_t peek16u( uint64_t addr ) { return SMemory::ld16u(addr); }
504 int16_t peek16s( uint64_t addr ) { return SMemory::ld16s(addr); }
505 uint32_t peek32u( uint64_t addr ) { return SMemory::ld32u(addr); }
506 int32_t peek32s( uint64_t addr ) { return SMemory::ld32s(addr); }
507 uint64_t peek64( uint64_t addr ) { return SMemory::ld64(addr); }
508
509 // instruction fetch
510 uint32_t fetch32( uint64_t addr )
511 {
512#if defined(ARCH_X64)
513 uint32_t data = *(uint32_t*)(get_ld_ptr(addr));
514 return ss_byteswap32(data);
515#else
516 return *(uint32_t*)(get_ld_ptr(addr));
517#endif
518 }
519
520 void fetch256( uint64_t addr, uint64_t data[4] ) { SMemory::ld256(addr,data); }
521 void fetch512( uint64_t addr, uint64_t data[8] ) { SMemory::ld512(addr,data); }
522
523
524
525
526 // Supported Store Operations. st8(), st16(), st32() and st64() are gueranteed to be atomic.
527 // st128() and st512() are atomic per 64bit quantity.
528 /*
529 void st8( uint64_t addr, uint8_t data )
530 {
531 *(uint8_t*)(get_st_ptr(addr)) = data;
532 }
533 */
534 void st8( uint64_t addr, uint8_t data )
535 {
536 if(is_dumbserial_addr(addr))
537 {
538 // console access
539 uint64_t v = uint64_t(data);
540 addr |= IO_BIT;
541 SYSTEM_physio_access(0, 0, addr, 1, 1, &v, ~0);
542 }
543 else // mem access
544 {
545 *(uint8_t*)(get_st_ptr(addr)) = data;
546 }
547 }
548
549 void st16( uint64_t addr, uint16_t data )
550 {
551#if defined(ARCH_X64)
552 data = ss_byteswap16(data);
553#endif
554 *(uint16_t*)(get_st_ptr(addr)) = data;
555 }
556 void st32( uint64_t addr, uint32_t data )
557 {
558#if defined(ARCH_X64)
559 data = ss_byteswap32(data);
560#endif
561 *(uint32_t*)(get_st_ptr(addr)) = data;
562 }
563 void st64_nl( uint64_t addr, uint64_t data )
564 {
565#if defined(ARCH_X64)
566 data = ss_byteswap64(data);
567#endif
568 *(uint64_t*)(get_st_ptr(addr)) = data;
569 }
570 void st64 ( uint64_t addr, uint64_t data )
571 {
572 lock(addr);
573 st64_nl(addr,data);
574 unlock(addr);
575 }
576
577 void st128( uint64_t addr, uint64_t data[2] )
578 {
579#if defined(ARCH_X64)
580 data[0] = ss_byteswap64(data[0]);
581 data[1] = ss_byteswap64(data[1]);
582#endif
583 uint8_t* ptr = get_st_ptr(addr & ~uint64_t(0x1f));
584 *(uint64_t*)(ptr + 0) = data[0];
585 *(uint64_t*)(ptr + 8) = data[1];
586 }
587 void st512( uint64_t addr, uint64_t data[8] )
588 {
589#if defined(ARCH_X64)
590 data[0] = ss_byteswap64(data[0]);
591 data[1] = ss_byteswap64(data[1]);
592 data[2] = ss_byteswap64(data[2]);
593 data[3] = ss_byteswap64(data[3]);
594 data[4] = ss_byteswap64(data[4]);
595 data[5] = ss_byteswap64(data[5]);
596 data[6] = ss_byteswap64(data[6]);
597 data[7] = ss_byteswap64(data[7]);
598#endif
599 uint8_t* ptr = get_st_ptr(addr & ~uint64_t(0x3f));
600 *(uint64_t*)(ptr + 0) = data[0];
601 *(uint64_t*)(ptr + 8) = data[1];
602 *(uint64_t*)(ptr + 16) = data[2];
603 *(uint64_t*)(ptr + 24) = data[3];
604 *(uint64_t*)(ptr + 32) = data[4];
605 *(uint64_t*)(ptr + 40) = data[5];
606 *(uint64_t*)(ptr + 48) = data[6];
607 *(uint64_t*)(ptr + 56) = data[7];
608 }
609
610 // Supported Load Operations. ld8[su]() to ld64() are quaranteed to be atomic. ld128() and
611 // above are atomic at the 64 bit granularity.
612 /*
613 uint8_t ld8u ( uint64_t addr )
614 {
615 return *(uint8_t *)(get_ld_ptr(addr));
616 }
617 */
618 uint8_t ld8u ( uint64_t addr )
619 {
620 if(is_dumbserial_addr(addr))
621 {
622 // console access
623 uint64_t v = 0;
624 addr |= IO_BIT;
625 SYSTEM_physio_access(0, 0, addr, 0, 1, &v, ~0);
626 return uint8_t(v);
627
628 }
629 else // mem access
630 {
631 return *(uint8_t *)(get_ld_ptr(addr));
632 }
633 }
634
635 int8_t ld8s( uint64_t addr )
636 {
637 return *( int8_t *)(get_ld_ptr(addr));
638 }
639 uint16_t ld16u( uint64_t addr )
640 {
641#if defined(ARCH_X64)
642 uint16_t data = *(uint16_t*)(get_ld_ptr(addr));
643 return ss_byteswap16(data);
644#else
645 return *(uint16_t*)(get_ld_ptr(addr));
646#endif
647 }
648 int16_t ld16s( uint64_t addr )
649 {
650#if defined(ARCH_X64)
651 int16_t data = *( int16_t*)(get_ld_ptr(addr));
652 return ss_byteswap16(data);
653#else
654 return *( int16_t*)(get_ld_ptr(addr));
655#endif
656 }
657 uint32_t ld32u( uint64_t addr )
658 {
659#if defined(ARCH_X64)
660 uint32_t data = *(uint32_t*)(get_ld_ptr(addr));
661 return ss_byteswap32(data);
662#else
663 return *(uint32_t*)(get_ld_ptr(addr));
664#endif
665 }
666 int32_t ld32s( uint64_t addr )
667 {
668#if defined(ARCH_X64)
669 int32_t data = *( int32_t*)(get_ld_ptr(addr));
670 return ss_byteswap32(data);
671#else
672 return *( int32_t*)(get_ld_ptr(addr));
673#endif
674 }
675 uint64_t ld64( uint64_t addr )
676 {
677#if defined(ARCH_X64)
678 uint64_t data = *(uint64_t*)(get_ld_ptr(addr));
679 return ss_byteswap64(data);
680#else
681 return *(uint64_t*)(get_ld_ptr(addr));
682#endif
683 }
684 void ld128( uint64_t addr, uint64_t data[2] )
685 {
686 uint8_t* ptr = get_ld_ptr(addr);
687 data[0] = *(uint64_t*)(ptr + 0);
688 data[1] = *(uint64_t*)(ptr + 8);
689#if defined(ARCH_X64)
690 data[0] = ss_byteswap64(data[0]);
691 data[1] = ss_byteswap64(data[1]);
692#endif
693 }
694 void ld512( uint64_t addr, uint64_t data[8] )
695 {
696 uint8_t* ptr = get_ld_ptr(addr & ~uint64_t(0x3f));
697 data[0] = *(uint64_t*)(ptr + 0);
698 data[1] = *(uint64_t*)(ptr + 8);
699 data[2] = *(uint64_t*)(ptr + 16);
700 data[3] = *(uint64_t*)(ptr + 24);
701 data[4] = *(uint64_t*)(ptr + 32);
702 data[5] = *(uint64_t*)(ptr + 40);
703 data[6] = *(uint64_t*)(ptr + 48);
704 data[7] = *(uint64_t*)(ptr + 56);
705#if defined(ARCH_X64)
706 data[0] = ss_byteswap64(data[0]);
707 data[1] = ss_byteswap64(data[1]);
708 data[2] = ss_byteswap64(data[2]);
709 data[3] = ss_byteswap64(data[3]);
710 data[4] = ss_byteswap64(data[4]);
711 data[5] = ss_byteswap64(data[5]);
712 data[6] = ss_byteswap64(data[6]);
713 data[7] = ss_byteswap64(data[7]);
714#endif
715 }
716 void ld256( uint64_t addr, uint64_t data[4] )
717 {
718 uint8_t* ptr = get_ld_ptr(addr & ~uint64_t(0x1f));
719 data[0] = *(uint64_t*)(ptr + 0);
720 data[1] = *(uint64_t*)(ptr + 8);
721 data[2] = *(uint64_t*)(ptr + 16);
722 data[3] = *(uint64_t*)(ptr + 24);
723#if defined(ARCH_X64)
724 data[0] = ss_byteswap64(data[0]);
725 data[1] = ss_byteswap64(data[1]);
726 data[2] = ss_byteswap64(data[2]);
727 data[3] = ss_byteswap64(data[3]);
728#endif
729 }
730
731
732 // st64partial() performs 8 byte partial store. The bytes to store are specified by mask. A 1 in bit N of
733 // mask denotes that byte (data >> (8*N)) & 0xff should be written to memory
734
735 void st64partial( uint64_t addr, uint64_t data, uint64_t mask )
736 {
737 ss_stp8(*(double*)&data,get_st_ptr(addr),mask);
738 }
739
740 // ld128atomic() (aka load twin double, load quad atomic) atomically loads two
741 // 64bit values from memory at addr into rd. rd[0] is the value at addr, rd[1]
742 // is the value at addr + 8. Note ld128 does() not guarantee atomicity.
743
744 void ld128atomic( uint64_t addr, uint64_t data[2] )
745 {
746 lock(addr);
747 ld128(addr,data);
748 unlock(addr);
749 }
750
751 // ldstub() return a byte from memory at addr, and set the byte at addr
752 // to 0xff. The ldstub() operation is atomic.
753
754 uint8_t ldstub( uint64_t addr )
755 {
756 uint8_t _rd = ss_ldstub(get_st_ptr(addr),0);
757 return _rd;
758 }
759
760 // swap() stores the 32bit value rd with the 32bit value at addr.
761 // The old 32bit value at addr is returned. The operation is atomic.
762
763 uint32_t swap( uint64_t addr, uint32_t rd )
764 {
765 uint32_t _rd = ss_swap(rd,get_st_ptr(addr),0);
766 return _rd;
767 }
768
769 // casx() compares the 64bit value rs2 with the 64bit value at addr.
770 // If the two values are equal, the value rd is stored in the
771 // 64bit value at addr. In both cases the old 64bit value at addr is
772 // returned, that is the value at addr before the storei happened.
773 // The casx() operation is atomic.
774
775 uint64_t casx( uint64_t addr, uint64_t rd, uint64_t rs2 )
776 {
777 uint64_t _rd = ss_casx(rd,get_st_ptr(addr),rs2);
778 return _rd;
779 }
780
781 // cas() is as casx, but for 32bit.
782
783 uint32_t cas( uint64_t addr, uint32_t rd, uint32_t rs2 )
784 {
785 uint32_t _rd = ss_cas(rd,get_st_ptr(addr),rs2);
786 return _rd;
787 }
788
789 // prefetch() prefetches data from memory into the cache hierarchy.
790 //void prefetch( uint64_t addr, uint_t _size ) {}
791
792 // flush() writes dirty data in the cache back to memory.
793 //void flush( uint64_t addr, uint_t _size ) {} // process does not provide data.
794
795
796 int block_read(uint64_t addr, uint8_t *tgt, int _size);
797 int block_write(uint64_t addr, const uint8_t *src, int _size);
798
799 int dump ( char *dir_name, char *file_name );
800 int restore ( char *dir_name );
801
802 // no mem page allocation on load accesses;
803 // if load goes to uninit location - return unknow value;
804 uint8_t* get_ld_ptr ( uint64_t addr )
805 {
806 uint8_t*** o1 = (uint8_t***)((char*)l1 + ((addr >> l1shft) & l1mask));
807 uint8_t** l2 = *o1;
808
809 if (l2 == 0)
810 return uninit_page + ( addr & 0x7 );
811
812 uint8_t** o2 = (uint8_t**)((char*)l2 + ((addr >> l2shft) & l2mask));
813 uint8_t* l3 = *o2;
814 if (l3 == 0)
815 return uninit_page + ( addr & 0x7 );
816
817 return mask_dirty(l3) + (addr & l3mask);
818 }
819
820 // allocate mem page if store goes to uninit location;
821 // acquire a lock to prevent multiple writers on MP run;
822 uint8_t* get_st_ptr( uint64_t addr )
823 {
824
825 uint8_t*** o1 = (uint8_t***)((char*)l1 + ((addr >> l1shft) & l1mask) );
826 uint8_t** l2 = *o1;
827
828 if (l2 == 0)
829 {
830 mutex_lock(&l2_lock);
831
832 // check again if level 2 table is already allocated
833 l2 = *o1 ;
834 if(l2 == 0)
835 l2 = *o1 = (uint8_t**)calloc(l2size,sizeof(uint8_t));
836
837 mutex_unlock(&l2_lock);
838 }
839
840 if (l2)
841 {
842 uint8_t** o2 = (uint8_t**)((char*)l2 + ((addr >> l2shft) & l2mask));
843 uint8_t* l3 = *o2;
844
845 if (l3 == 0)
846 {
847 mutex_lock(&l3_lock);
848
849 // check again if level 3 page is already allocated
850 l3 = *o2;
851 if(l3 == 0)
852 l3 = *o2 = (uint8_t*)calloc(l3size,sizeof(uint8_t));
853
854 mutex_unlock ( &l3_lock );
855 }
856
857 if (l3) {
858 // mark this line dirty. The dirty flag is ONLY read and cleared
859 // at dump time (with SAM stopped) so there is no locking needed.
860 l3 = *o2 = set_dirty(l3);
861 return mask_dirty(l3) + (addr & l3mask);
862 }
863 }
864
865 fprintf(stderr, "\nMEM: Run out of memory, exit...\n");
866 exit(1);
867 }
868
869
870
871 uint64_t get_l1size() { return l1size; }
872 uint64_t get_l2size() { return l2size; }
873 uint64_t get_l3size() { return l3size; }
874
875 // The mlist points to the list of mapped file entries that
876 // will need to be unmapped in the destructor
877 void link(MappedFileEntry *e) {
878 assert(e->next == NULL); e->next = mlist; mlist = e;
879 }
880
881 int map_page (uint64_t addr, uint8_t *maddr);
882 int map (MappedFileEntry *e);
883
884 private:
885
886 void lock ( uint64_t addr ) { mutex_lock ( &locks[(addr >> 4) & (SAM_NMEM_LOCKS -1)] ); }
887 void unlock ( uint64_t addr ) { mutex_unlock ( &locks[(addr >> 4) & (SAM_NMEM_LOCKS -1)] ); }
888
889private:
890
891 uint8_t*** l1;
892
893 uint_t l1bits;
894 uint_t l2bits;
895 uint_t l3bits;
896 uint_t l1shft;
897 uint_t l2shft;
898 uint64_t l1size;
899 uint64_t l2size;
900 uint64_t l3size;
901 uint64_t l1mask;
902 uint64_t l2mask;
903 uint64_t l3mask;
904
905 uint64_t size;
906 uint64_t pa_mask;
907
908 uint8_t uninit_page[512];
909
910 mutex_t locks [SAM_NMEM_LOCKS ];
911 mutex_t l2_lock;
912 mutex_t l3_lock;
913
914 MappedFileEntry *mlist; // mem mapped file list
915
916 static const uint64_t dirtyflag = 1ull;
917
918 bool is_dirty(uint8_t * l3) {
919 return (dirtyflag & (uint64_t) l3);
920 }
921
922 uint8_t * set_dirty(uint8_t* l3) {
923 return (uint8_t *) (dirtyflag | (uint64_t) l3);
924 }
925
926 uint8_t * mask_dirty(uint8_t * l3) {
927 return (uint8_t *) (~dirtyflag & (uint64_t) l3);
928 }
929};
930
931#elif defined(MEMORY_EXTERNAL)
932
933#include "SS_ExternalMemory.h"
934
935typedef SS_ExternalMemory SMemory;
936
937#define st64_nl poke64
938
939#else
940
941#pragma "You should define a memory to use though some -D flag"
942
943#endif // MEMORY_XX
944
945
946
947
948
949
950#endif //__SAM_Memory_h__