Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / sam / include / Memory.h
/*
* ========== Copyright Header Begin ==========================================
*
* OpenSPARC T2 Processor File: Memory.h
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
*
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*
* ========== Copyright Header End ============================================
*/
/*
* Copyright (C) 2005 Sun Microsystems, Inc.
* All rights reserved.
*/
#ifndef __SAM_Memory_h__
#define __SAM_Memory_h__
#include <synch.h>
#include <sys/types.h>
#include <string.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <assert.h>
#include "BL_Memory.h"
extern "C" uint8_t ss_ldstub( void* base, uint64_t ofs );
extern "C" uint32_t ss_swap ( uint32_t rd, void* base, uint64_t ofs );
extern "C" uint32_t ss_cas ( uint32_t rd, void* base, uint32_t rs2 );
extern "C" uint64_t ss_casx ( uint64_t rd, void* base, uint64_t rs2 );
extern "C" void ss_stp8 ( double rd, void* base, uint64_t mask );
#include "utils.h"
// build flag to control mem model option:
// should be defined MEMORY_SPARSE or MEMORY_FLAT
const int SAM_NMEM_LOCKS = 1<<8; //256, should be power of 2
const uint64_t SAM_MEM_DUMP_VERSION = 5<<8;
#if defined(MEMORY_FLAT)
// flat memory model
class SMemory : public BL_Memory
{
public:
SMemory( uint64_t ram_size, uint_t pa_bits=43);
~SMemory();
int init(char *nm=NULL, int is_cp = 0);
uint64_t get_size() { return size; }
// get_base() is a private method and should not be used
// outside this module
uint8_t* get_base() { return mem; }
// Supported User Interface Operations
int load( const char* mem_image_filename );
int load( const char *file, uint64_t addr) { return load_bin(file, addr); }
int load_bin( const char *file, uint64_t addr);
void save( const char* filename, uint64_t addr, uint64_t size );
/* NOTE:
SMemory:: qualifications needed to force inline of the routines so
that performance degradation due to virtualization is avoided
*/
void poke8( uint64_t addr, uint8_t data ) { SMemory::st8(addr,data); }
void poke16( uint64_t addr, uint16_t data ) { SMemory::st16(addr,data); }
void poke32( uint64_t addr, uint32_t data ) { SMemory::st32(addr,data); }
void poke64( uint64_t addr, uint64_t data ) { SMemory::st64(addr,data); }
uint8_t peek8u( uint64_t addr ) { return SMemory::ld8u(addr); }
int8_t peek8s( uint64_t addr ) { return SMemory::ld8s(addr); }
uint16_t peek16u( uint64_t addr ) { return SMemory::ld16u(addr); }
int16_t peek16s( uint64_t addr ) { return SMemory::ld16s(addr); }
uint32_t peek32u( uint64_t addr ) { return SMemory::ld32u(addr); }
int32_t peek32s( uint64_t addr ) { return SMemory::ld32s(addr); }
uint64_t peek64( uint64_t addr ) { return SMemory::ld64(addr); }
// instruction fetch
uint32_t fetch32( uint64_t addr )
{
#if defined(ARCH_X64)
uint32_t data = *(uint32_t*)(mem + ofs(addr));
return ss_byteswap32(data);
#else
return *(uint32_t*)(mem + ofs(addr));
#endif
}
void fetch256( uint64_t addr, uint64_t data[4] ) { SMemory::ld256(addr,data); }
void fetch512( uint64_t addr, uint64_t data[8] ) { SMemory::ld512(addr,data); }
// Supported Store Operations. st8(), st16(), st32() and st64() are gueranteed to be atomic.
// st128() and st512() are atomic per 64bit quantity.
void st8( uint64_t addr, uint8_t data )
{
*(uint8_t *)(mem + ofs(addr)) = data;
}
void st16( uint64_t addr, uint16_t data )
{
#if defined(ARCH_X64)
data = ss_byteswap16(data);
#endif
*(uint16_t*)(mem + ofs(addr)) = data;
}
void st32( uint64_t addr, uint32_t data )
{
#if defined(ARCH_X64)
data = ss_byteswap32(data);
#endif
*(uint32_t*)(mem + ofs(addr)) = data;
}
void st64_nl( uint64_t addr, uint64_t data )
{
#if defined(ARCH_X64)
data = ss_byteswap64(data);
#endif
*(uint64_t*)(mem + ofs(addr)) = data;
}
void st64 ( uint64_t addr, uint64_t data )
{
#if defined(ARCH_X64)
data = ss_byteswap64(data);
#endif
addr = ofs(addr);
lock(addr);
*(uint64_t*)(mem + addr) = data;
unlock(addr);
}
void st128( uint64_t addr, uint64_t data[2] )
{
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
#endif
*(uint64_t*)(mem + ofs(addr) + 0) = data[0];
*(uint64_t*)(mem + ofs(addr) + 8) = data[1];
}
void st512( uint64_t addr, uint64_t data[8] )
{
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
data[2] = ss_byteswap64(data[2]);
data[3] = ss_byteswap64(data[3]);
data[4] = ss_byteswap64(data[4]);
data[5] = ss_byteswap64(data[5]);
data[6] = ss_byteswap64(data[6]);
data[7] = ss_byteswap64(data[7]);
#endif
*(uint64_t*)(mem + ofs(addr) + 0) = data[0];
*(uint64_t*)(mem + ofs(addr) + 8) = data[1];
*(uint64_t*)(mem + ofs(addr) + 16) = data[2];
*(uint64_t*)(mem + ofs(addr) + 24) = data[3];
*(uint64_t*)(mem + ofs(addr) + 32) = data[4];
*(uint64_t*)(mem + ofs(addr) + 40) = data[5];
*(uint64_t*)(mem + ofs(addr) + 48) = data[6];
*(uint64_t*)(mem + ofs(addr) + 56) = data[7];
}
// Supported Load Operations. ld8[su]() to ld64() are quaranteed to be atomic. ld128() and
// above are atomic at the 64 bit granularity.
uint8_t ld8u ( uint64_t addr )
{
return *(uint8_t *)(mem + ofs(addr));
}
int8_t ld8s( uint64_t addr )
{
return *( int8_t *)(mem + ofs(addr));
}
uint16_t ld16u( uint64_t addr )
{
#if defined(ARCH_X64)
uint16_t data = *(uint16_t*)(mem + ofs(addr));
return ss_byteswap16(data);
#else
return *(uint16_t*)(mem + ofs(addr));
#endif
}
int16_t ld16s( uint64_t addr )
{
#if defined(ARCH_X64)
int16_t data = *( int16_t*)(mem + ofs(addr));
return ss_byteswap16(data);
#else
return *( int16_t*)(mem + ofs(addr));
#endif
}
uint32_t ld32u( uint64_t addr )
{
#if defined(ARCH_X64)
uint32_t data = *(uint32_t*)(mem + ofs(addr));
return ss_byteswap32(data);
#else
return *(uint32_t*)(mem + ofs(addr));
#endif
}
int32_t ld32s( uint64_t addr )
{
#if defined(ARCH_X64)
int32_t data = *( int32_t*)(mem + ofs(addr));
return ss_byteswap32(data);
#else
return *( int32_t*)(mem + ofs(addr));
#endif
}
uint64_t ld64( uint64_t addr )
{
#if defined(ARCH_X64)
uint64_t data = *(uint64_t*)(mem + ofs(addr));
return ss_byteswap64(data);
#else
return *(uint64_t*)(mem + ofs(addr));
#endif
}
void ld128( uint64_t addr, uint64_t data[2] )
{
data[0] = *(uint64_t*)(mem + ofs(addr) + 0);
data[1] = *(uint64_t*)(mem + ofs(addr) + 8);
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
#endif
}
void ld512( uint64_t addr, uint64_t data[8] )
{
data[0] = *(uint64_t*)(mem + ofs(addr) + 0);
data[1] = *(uint64_t*)(mem + ofs(addr) + 8);
data[2] = *(uint64_t*)(mem + ofs(addr) + 16);
data[3] = *(uint64_t*)(mem + ofs(addr) + 24);
data[4] = *(uint64_t*)(mem + ofs(addr) + 32);
data[5] = *(uint64_t*)(mem + ofs(addr) + 40);
data[6] = *(uint64_t*)(mem + ofs(addr) + 48);
data[7] = *(uint64_t*)(mem + ofs(addr) + 56);
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
data[2] = ss_byteswap64(data[2]);
data[3] = ss_byteswap64(data[3]);
data[4] = ss_byteswap64(data[4]);
data[5] = ss_byteswap64(data[5]);
data[6] = ss_byteswap64(data[6]);
data[7] = ss_byteswap64(data[7]);
#endif
}
void ld256( uint64_t addr, uint64_t data[4] )
{
data[0] = *(uint64_t*)(mem + ofs(addr) + 0);
data[1] = *(uint64_t*)(mem + ofs(addr) + 8);
data[2] = *(uint64_t*)(mem + ofs(addr) + 16);
data[3] = *(uint64_t*)(mem + ofs(addr) + 24);
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
data[2] = ss_byteswap64(data[2]);
data[3] = ss_byteswap64(data[3]);
#endif
}
// st64partial() performs 8 byte partial store. The bytes to store are specified by mask. A 1 in bit N of
// mask denotes that byte (data >> (8*N)) & 0xff should be written to memory
void st64partial( uint64_t addr, uint64_t data, uint64_t mask )
{
//uint64_t* pntr = (uint64_t*)(mem + ofs(addr));
//*pntr = (data & mask) | (*pntr &~ mask);
ss_stp8(*(double*)&data,mem + ofs(addr),mask);
}
// ld128atomic() (aka load twin double, load quad atomic) atomically loads two
// 64bit values from memory at addr into rd. rd[0] is the value at addr, rd[1]
// is the value at addr + 8. Note ld128 does() not guarantee atomicity.
void ld128atomic( uint64_t addr, uint64_t data[2] )
{
addr = ofs(addr);
lock(addr);
data[0] = *(uint64_t*)(mem + addr + 0);
data[1] = *(uint64_t*)(mem + addr + 8);
unlock(addr);
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
#endif
}
// ldstub() return a byte from memory at addr, and set the byte at addr
// to 0xff. The ldstub() operation is atomic.
uint8_t ldstub( uint64_t addr )
{
//uint8_t* pntr = (uint8_t*)(mem + ofs(addr));
//uint8_t temp = *pntr;
//*pntr = 0xff;
//return temp;
uint8_t _rd = ss_ldstub(mem,ofs(addr));
return _rd;
}
// swap() stores the 32bit value rd with the 32bit value at addr.
// The old 32bit value at addr is returned. The operation is atomic.
uint32_t swap( uint64_t addr, uint32_t rd )
{
//uint32_t* pntr = (uint32_t*)(mem + ofs(addr));
//uint32_t temp = *pntr;
//*pntr = rd;
//return temp;
uint32_t _rd = ss_swap(rd,mem,ofs(addr));
return _rd;
}
// casx() compares the 64bit value rs2 with the 64bit value at addr.
// If the two values are equal, the value rd is stored in the
// 64bit value at addr. In both cases the old 64bit value at addr is
// returned, that is the value at addr before the storei happened.
// The casx() operation is atomic.
uint64_t casx( uint64_t addr, uint64_t rd, uint64_t rs2 )
{
//uint64_t* pntr = (uint64_t*)(mem + ofs(addr));
//uint64_t temp = *pntr;
//if (temp == rs2)
//*pntr = rd;
//return temp;
uint64_t _rd = ss_casx(rd,mem + ofs(addr),rs2);
return _rd;
}
// cas() is as casx, but for 32bit.
uint32_t cas( uint64_t addr, uint32_t rd, uint32_t rs2 )
{
//uint32_t* pntr = (uint32_t*)(mem + ofs(addr));
//uint32_t temp = *pntr;
//if (temp == rs2)
//*pntr = rd;
//return temp;
uint32_t _rd = ss_cas(rd,mem + ofs(addr),rs2);
return _rd;
}
// prefetch() prefetches data from memory into the cache hierarchy.
//void prefetch( uint64_t addr, uint_t _size ) {}
// flush() writes dirty data in the cache back to memory.
//void flush( uint64_t addr, uint_t _size ) {} // process does not provide data.
int block_read(uint64_t addr, uint8_t *tgt, int _size)
{
memcpy(tgt, mem + ofs(addr), _size);
return 0;
}
int block_write(uint64_t addr, const uint8_t *src, int _size)
{
memcpy(mem + ofs(addr), src, _size);
return 0;
}
int dump ( char *dir_name, char *file_name );
int restore ( char *dir_name );
private:
void lock ( uint64_t addr ) { mutex_lock ( &locks[(addr >> 4) & (SAM_NMEM_LOCKS -1)] ); }
void unlock ( uint64_t addr ) { mutex_unlock ( &locks[(addr >> 4) & (SAM_NMEM_LOCKS -1)] ); }
void handle_out_of_range ( uint64_t addr );
uint64_t ofs( uint64_t addr )
{
#ifdef SAM_RAM_RANGE_CHECK
if (addr > size)
this->handle_out_of_range ( addr );
#endif
return addr;
}
private:
char *mem_file; // name of mem mapped file,
// could be a temp file or a checkpoint file
int mfile; // file descriptor,
// if > 0 delete the file when done
uint8_t* mem; // mmap'ed image
uint64_t size;
uint64_t pa_mask;
mutex_t locks[SAM_NMEM_LOCKS];
};
#elif defined(MEMORY_SPARSE)
// io bit difines mem/io access, bit 39 for Ni, bit 47 for
const uint64_t IO_BIT = uint64_t(1)<<39;
inline int is_dumbserial_addr(uint64_t pa)
{
// We really need to get the address space layout controlled.
// This special case range is crazy. Next we'll need to add
// yet again another 8K to this. Where will it end? However,
// SAM does not have a solution for this problem yet; one
// was proposed but no action has been taken. Until then just
// claim the range below for ram mapped consoles.
return ((pa >= 0x1f10000000) && (pa <= 0x1f10003fff))
|| ((pa >= 0xfff0c2c000) && (pa <= 0xfff0c2cfff)); // Note this is not a ROM address on.
}
extern "C" int SYSTEM_physio_access(uint32_t cpu_id, void* obj, uint64_t paddr,
int wr, uint32_t size, uint64_t* buf, uint8_t bytemask);
// Entry record for each mmaped file,
// used to accelerate mem load - mmap each file image;
class MappedFileEntry
{
public:
MappedFileEntry(const char *file_name, uint64_t addr=0);
~MappedFileEntry();
char *name; // name of mem mapped bin or a checkpoint file
int mfile; // file descriptor, if > 0 delete the file when done
uint8_t* mem; // mmap'ed image
uint64_t addr; // starting address
uint64_t size; // file size
MappedFileEntry *next; // next entry in the list
is_valid() { return name !=0 && mem != NULL && mem != MAP_FAILED && mfile >= 0; }
};
// sparse memory model
class SMemory : public BL_Memory
{
public:
SMemory( uint64_t ram_size=0, uint_t pa_bits=48, uint_t _l1bits=18, uint_t _l2bits=10, uint_t _l3bits=20 );
~SMemory();
int init(char *nm=NULL, int is_cp=0) { return 1; }
uint64_t get_size() { return size; }
// get_base() is a private method and should not be used
// outside this module
uint8_t* get_base() { return NULL; }
// Supported User Interface Operations
int load( const char* mem_image_filename );
int load( const char *file, uint64_t addr) { return load_bin(file, addr); }
int load_bin( const char *file, uint64_t addr);
void save( const char* filename, uint64_t addr, uint64_t size );
/* NOTE:
SMemory:: qualifications needed to force inline of the routines so
that performance degradation due to virtualization is avoided
*/
void poke8( uint64_t addr, uint8_t data ) { SMemory::st8(addr,data); }
void poke16( uint64_t addr, uint16_t data ) { SMemory::st16(addr,data); }
void poke32( uint64_t addr, uint32_t data ) { SMemory::st32(addr,data); }
void poke64( uint64_t addr, uint64_t data ) { SMemory::st64(addr,data); }
uint8_t peek8u( uint64_t addr ) { return SMemory::ld8u(addr); }
int8_t peek8s( uint64_t addr ) { return SMemory::ld8s(addr); }
uint16_t peek16u( uint64_t addr ) { return SMemory::ld16u(addr); }
int16_t peek16s( uint64_t addr ) { return SMemory::ld16s(addr); }
uint32_t peek32u( uint64_t addr ) { return SMemory::ld32u(addr); }
int32_t peek32s( uint64_t addr ) { return SMemory::ld32s(addr); }
uint64_t peek64( uint64_t addr ) { return SMemory::ld64(addr); }
// instruction fetch
uint32_t fetch32( uint64_t addr )
{
#if defined(ARCH_X64)
uint32_t data = *(uint32_t*)(get_ld_ptr(addr));
return ss_byteswap32(data);
#else
return *(uint32_t*)(get_ld_ptr(addr));
#endif
}
void fetch256( uint64_t addr, uint64_t data[4] ) { SMemory::ld256(addr,data); }
void fetch512( uint64_t addr, uint64_t data[8] ) { SMemory::ld512(addr,data); }
// Supported Store Operations. st8(), st16(), st32() and st64() are gueranteed to be atomic.
// st128() and st512() are atomic per 64bit quantity.
/*
void st8( uint64_t addr, uint8_t data )
{
*(uint8_t*)(get_st_ptr(addr)) = data;
}
*/
void st8( uint64_t addr, uint8_t data )
{
if(is_dumbserial_addr(addr))
{
// console access
uint64_t v = uint64_t(data);
addr |= IO_BIT;
SYSTEM_physio_access(0, 0, addr, 1, 1, &v, ~0);
}
else // mem access
{
*(uint8_t*)(get_st_ptr(addr)) = data;
}
}
void st16( uint64_t addr, uint16_t data )
{
#if defined(ARCH_X64)
data = ss_byteswap16(data);
#endif
*(uint16_t*)(get_st_ptr(addr)) = data;
}
void st32( uint64_t addr, uint32_t data )
{
#if defined(ARCH_X64)
data = ss_byteswap32(data);
#endif
*(uint32_t*)(get_st_ptr(addr)) = data;
}
void st64_nl( uint64_t addr, uint64_t data )
{
#if defined(ARCH_X64)
data = ss_byteswap64(data);
#endif
*(uint64_t*)(get_st_ptr(addr)) = data;
}
void st64 ( uint64_t addr, uint64_t data )
{
lock(addr);
st64_nl(addr,data);
unlock(addr);
}
void st128( uint64_t addr, uint64_t data[2] )
{
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
#endif
uint8_t* ptr = get_st_ptr(addr & ~uint64_t(0x1f));
*(uint64_t*)(ptr + 0) = data[0];
*(uint64_t*)(ptr + 8) = data[1];
}
void st512( uint64_t addr, uint64_t data[8] )
{
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
data[2] = ss_byteswap64(data[2]);
data[3] = ss_byteswap64(data[3]);
data[4] = ss_byteswap64(data[4]);
data[5] = ss_byteswap64(data[5]);
data[6] = ss_byteswap64(data[6]);
data[7] = ss_byteswap64(data[7]);
#endif
uint8_t* ptr = get_st_ptr(addr & ~uint64_t(0x3f));
*(uint64_t*)(ptr + 0) = data[0];
*(uint64_t*)(ptr + 8) = data[1];
*(uint64_t*)(ptr + 16) = data[2];
*(uint64_t*)(ptr + 24) = data[3];
*(uint64_t*)(ptr + 32) = data[4];
*(uint64_t*)(ptr + 40) = data[5];
*(uint64_t*)(ptr + 48) = data[6];
*(uint64_t*)(ptr + 56) = data[7];
}
// Supported Load Operations. ld8[su]() to ld64() are quaranteed to be atomic. ld128() and
// above are atomic at the 64 bit granularity.
/*
uint8_t ld8u ( uint64_t addr )
{
return *(uint8_t *)(get_ld_ptr(addr));
}
*/
uint8_t ld8u ( uint64_t addr )
{
if(is_dumbserial_addr(addr))
{
// console access
uint64_t v = 0;
addr |= IO_BIT;
SYSTEM_physio_access(0, 0, addr, 0, 1, &v, ~0);
return uint8_t(v);
}
else // mem access
{
return *(uint8_t *)(get_ld_ptr(addr));
}
}
int8_t ld8s( uint64_t addr )
{
return *( int8_t *)(get_ld_ptr(addr));
}
uint16_t ld16u( uint64_t addr )
{
#if defined(ARCH_X64)
uint16_t data = *(uint16_t*)(get_ld_ptr(addr));
return ss_byteswap16(data);
#else
return *(uint16_t*)(get_ld_ptr(addr));
#endif
}
int16_t ld16s( uint64_t addr )
{
#if defined(ARCH_X64)
int16_t data = *( int16_t*)(get_ld_ptr(addr));
return ss_byteswap16(data);
#else
return *( int16_t*)(get_ld_ptr(addr));
#endif
}
uint32_t ld32u( uint64_t addr )
{
#if defined(ARCH_X64)
uint32_t data = *(uint32_t*)(get_ld_ptr(addr));
return ss_byteswap32(data);
#else
return *(uint32_t*)(get_ld_ptr(addr));
#endif
}
int32_t ld32s( uint64_t addr )
{
#if defined(ARCH_X64)
int32_t data = *( int32_t*)(get_ld_ptr(addr));
return ss_byteswap32(data);
#else
return *( int32_t*)(get_ld_ptr(addr));
#endif
}
uint64_t ld64( uint64_t addr )
{
#if defined(ARCH_X64)
uint64_t data = *(uint64_t*)(get_ld_ptr(addr));
return ss_byteswap64(data);
#else
return *(uint64_t*)(get_ld_ptr(addr));
#endif
}
void ld128( uint64_t addr, uint64_t data[2] )
{
uint8_t* ptr = get_ld_ptr(addr);
data[0] = *(uint64_t*)(ptr + 0);
data[1] = *(uint64_t*)(ptr + 8);
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
#endif
}
void ld512( uint64_t addr, uint64_t data[8] )
{
uint8_t* ptr = get_ld_ptr(addr & ~uint64_t(0x3f));
data[0] = *(uint64_t*)(ptr + 0);
data[1] = *(uint64_t*)(ptr + 8);
data[2] = *(uint64_t*)(ptr + 16);
data[3] = *(uint64_t*)(ptr + 24);
data[4] = *(uint64_t*)(ptr + 32);
data[5] = *(uint64_t*)(ptr + 40);
data[6] = *(uint64_t*)(ptr + 48);
data[7] = *(uint64_t*)(ptr + 56);
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
data[2] = ss_byteswap64(data[2]);
data[3] = ss_byteswap64(data[3]);
data[4] = ss_byteswap64(data[4]);
data[5] = ss_byteswap64(data[5]);
data[6] = ss_byteswap64(data[6]);
data[7] = ss_byteswap64(data[7]);
#endif
}
void ld256( uint64_t addr, uint64_t data[4] )
{
uint8_t* ptr = get_ld_ptr(addr & ~uint64_t(0x1f));
data[0] = *(uint64_t*)(ptr + 0);
data[1] = *(uint64_t*)(ptr + 8);
data[2] = *(uint64_t*)(ptr + 16);
data[3] = *(uint64_t*)(ptr + 24);
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
data[2] = ss_byteswap64(data[2]);
data[3] = ss_byteswap64(data[3]);
#endif
}
// st64partial() performs 8 byte partial store. The bytes to store are specified by mask. A 1 in bit N of
// mask denotes that byte (data >> (8*N)) & 0xff should be written to memory
void st64partial( uint64_t addr, uint64_t data, uint64_t mask )
{
ss_stp8(*(double*)&data,get_st_ptr(addr),mask);
}
// ld128atomic() (aka load twin double, load quad atomic) atomically loads two
// 64bit values from memory at addr into rd. rd[0] is the value at addr, rd[1]
// is the value at addr + 8. Note ld128 does() not guarantee atomicity.
void ld128atomic( uint64_t addr, uint64_t data[2] )
{
lock(addr);
ld128(addr,data);
unlock(addr);
}
// ldstub() return a byte from memory at addr, and set the byte at addr
// to 0xff. The ldstub() operation is atomic.
uint8_t ldstub( uint64_t addr )
{
uint8_t _rd = ss_ldstub(get_st_ptr(addr),0);
return _rd;
}
// swap() stores the 32bit value rd with the 32bit value at addr.
// The old 32bit value at addr is returned. The operation is atomic.
uint32_t swap( uint64_t addr, uint32_t rd )
{
uint32_t _rd = ss_swap(rd,get_st_ptr(addr),0);
return _rd;
}
// casx() compares the 64bit value rs2 with the 64bit value at addr.
// If the two values are equal, the value rd is stored in the
// 64bit value at addr. In both cases the old 64bit value at addr is
// returned, that is the value at addr before the storei happened.
// The casx() operation is atomic.
uint64_t casx( uint64_t addr, uint64_t rd, uint64_t rs2 )
{
uint64_t _rd = ss_casx(rd,get_st_ptr(addr),rs2);
return _rd;
}
// cas() is as casx, but for 32bit.
uint32_t cas( uint64_t addr, uint32_t rd, uint32_t rs2 )
{
uint32_t _rd = ss_cas(rd,get_st_ptr(addr),rs2);
return _rd;
}
// prefetch() prefetches data from memory into the cache hierarchy.
//void prefetch( uint64_t addr, uint_t _size ) {}
// flush() writes dirty data in the cache back to memory.
//void flush( uint64_t addr, uint_t _size ) {} // process does not provide data.
int block_read(uint64_t addr, uint8_t *tgt, int _size);
int block_write(uint64_t addr, const uint8_t *src, int _size);
int dump ( char *dir_name, char *file_name );
int restore ( char *dir_name );
// no mem page allocation on load accesses;
// if load goes to uninit location - return unknow value;
uint8_t* get_ld_ptr ( uint64_t addr )
{
uint8_t*** o1 = (uint8_t***)((char*)l1 + ((addr >> l1shft) & l1mask));
uint8_t** l2 = *o1;
if (l2 == 0)
return uninit_page + ( addr & 0x7 );
uint8_t** o2 = (uint8_t**)((char*)l2 + ((addr >> l2shft) & l2mask));
uint8_t* l3 = *o2;
if (l3 == 0)
return uninit_page + ( addr & 0x7 );
return mask_dirty(l3) + (addr & l3mask);
}
// allocate mem page if store goes to uninit location;
// acquire a lock to prevent multiple writers on MP run;
uint8_t* get_st_ptr( uint64_t addr )
{
uint8_t*** o1 = (uint8_t***)((char*)l1 + ((addr >> l1shft) & l1mask) );
uint8_t** l2 = *o1;
if (l2 == 0)
{
mutex_lock(&l2_lock);
// check again if level 2 table is already allocated
l2 = *o1 ;
if(l2 == 0)
l2 = *o1 = (uint8_t**)calloc(l2size,sizeof(uint8_t));
mutex_unlock(&l2_lock);
}
if (l2)
{
uint8_t** o2 = (uint8_t**)((char*)l2 + ((addr >> l2shft) & l2mask));
uint8_t* l3 = *o2;
if (l3 == 0)
{
mutex_lock(&l3_lock);
// check again if level 3 page is already allocated
l3 = *o2;
if(l3 == 0)
l3 = *o2 = (uint8_t*)calloc(l3size,sizeof(uint8_t));
mutex_unlock ( &l3_lock );
}
if (l3) {
// mark this line dirty. The dirty flag is ONLY read and cleared
// at dump time (with SAM stopped) so there is no locking needed.
l3 = *o2 = set_dirty(l3);
return mask_dirty(l3) + (addr & l3mask);
}
}
fprintf(stderr, "\nMEM: Run out of memory, exit...\n");
exit(1);
}
uint64_t get_l1size() { return l1size; }
uint64_t get_l2size() { return l2size; }
uint64_t get_l3size() { return l3size; }
// The mlist points to the list of mapped file entries that
// will need to be unmapped in the destructor
void link(MappedFileEntry *e) {
assert(e->next == NULL); e->next = mlist; mlist = e;
}
int map_page (uint64_t addr, uint8_t *maddr);
int map (MappedFileEntry *e);
private:
void lock ( uint64_t addr ) { mutex_lock ( &locks[(addr >> 4) & (SAM_NMEM_LOCKS -1)] ); }
void unlock ( uint64_t addr ) { mutex_unlock ( &locks[(addr >> 4) & (SAM_NMEM_LOCKS -1)] ); }
private:
uint8_t*** l1;
uint_t l1bits;
uint_t l2bits;
uint_t l3bits;
uint_t l1shft;
uint_t l2shft;
uint64_t l1size;
uint64_t l2size;
uint64_t l3size;
uint64_t l1mask;
uint64_t l2mask;
uint64_t l3mask;
uint64_t size;
uint64_t pa_mask;
uint8_t uninit_page[512];
mutex_t locks [SAM_NMEM_LOCKS ];
mutex_t l2_lock;
mutex_t l3_lock;
MappedFileEntry *mlist; // mem mapped file list
static const uint64_t dirtyflag = 1ull;
bool is_dirty(uint8_t * l3) {
return (dirtyflag & (uint64_t) l3);
}
uint8_t * set_dirty(uint8_t* l3) {
return (uint8_t *) (dirtyflag | (uint64_t) l3);
}
uint8_t * mask_dirty(uint8_t * l3) {
return (uint8_t *) (~dirtyflag & (uint64_t) l3);
}
};
#elif defined(MEMORY_EXTERNAL)
#include "SS_ExternalMemory.h"
typedef SS_ExternalMemory SMemory;
#define st64_nl poke64
#else
#pragma "You should define a memory to use though some -D flag"
#endif // MEMORY_XX
#endif //__SAM_Memory_h__