Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / sam / cpus / vonk / ss / lib / cpu / src / SS_FastMemory.h
/*
* ========== Copyright Header Begin ==========================================
*
* OpenSPARC T2 Processor File: SS_FastMemory.h
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
*
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*
* ========== Copyright Header End ============================================
*/
#ifndef __SS_FastMemory_h__
#define __SS_FastMemory_h__
#ifdef COMPILE_FOR_SAM
// See elsewhere
#else // Vonk's own memory
#include "SS_Types.h"
extern "C" uint8_t ss_ldstub( void* base, uint64_t ofs );
extern "C" uint32_t ss_swap ( uint32_t rd, void* base, uint64_t ofs );
extern "C" uint32_t ss_cas ( uint32_t rd, void* base, uint32_t rs2 );
extern "C" uint64_t ss_casx ( uint64_t rd, void* base, uint64_t rs2 );
extern "C" void ss_stp8 ( double rd, void* base, uint64_t mask );
#include "utils.h"
#include "SS_Ld128Atomic.h"
#include <string.h>
#include <sys/mman.h>
#include "BL_Mutex.h"
#include "BL_Memory.h"
class SS_FastMemory : public BL_Memory
{
public:
SS_FastMemory();
~SS_FastMemory();
void allocate( uint64_t _ram_size, uint64_t _rom_size, uint_t pa_bits );
void load( const char* filename );
void load( const char* filename, uint64_t addr );
void save( const char* filename, uint64_t addr, uint64_t size );
// Supported User Interface Operations
void poke8( uint64_t addr, uint8_t data ) { SS_FastMemory::st8(addr,data); }
void poke16( uint64_t addr, uint16_t data ) { SS_FastMemory::st16(addr,data); }
void poke32( uint64_t addr, uint32_t data ) { SS_FastMemory::st32(addr,data); }
void poke64( uint64_t addr, uint64_t data ) { SS_FastMemory::st64(addr,data); }
uint8_t peek8u( uint64_t addr ) { return SS_FastMemory::ld8u(addr); }
int8_t peek8s( uint64_t addr ) { return SS_FastMemory::ld8s(addr); }
uint16_t peek16u( uint64_t addr ) { return SS_FastMemory::ld16u(addr); }
int16_t peek16s( uint64_t addr ) { return SS_FastMemory::ld16s(addr); }
uint32_t peek32u( uint64_t addr ) { return SS_FastMemory::ld32u(addr); }
int32_t peek32s( uint64_t addr ) { return SS_FastMemory::ld32s(addr); }
uint64_t peek64( uint64_t addr ) { return SS_FastMemory::ld64(addr); }
void peek128( uint64_t addr ,uint64_t data[2] )
{
uint8_t* ptr = get_ptr(addr);
data[0] = *(uint64_t*)(ptr + 0);
data[1] = *(uint64_t*)(ptr + 8);
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
#endif
}
void peek256( uint64_t addr ,uint64_t data[4] )
{
uint8_t* ptr = get_ptr(addr);
data[0] = *(uint64_t*)(ptr + 0);
data[1] = *(uint64_t*)(ptr + 8);
data[2] = *(uint64_t*)(ptr + 16);
data[3] = *(uint64_t*)(ptr + 24);
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
data[2] = ss_byteswap64(data[2]);
data[3] = ss_byteswap64(data[3]);
#endif
}
void peek512( uint64_t addr ,uint64_t data[8] )
{
uint8_t* ptr = get_ptr(addr);
data[0] = *(uint64_t*)(ptr + 0);
data[1] = *(uint64_t*)(ptr + 8);
data[2] = *(uint64_t*)(ptr + 16);
data[3] = *(uint64_t*)(ptr + 24);
data[4] = *(uint64_t*)(ptr + 32);
data[5] = *(uint64_t*)(ptr + 40);
data[6] = *(uint64_t*)(ptr + 48);
data[7] = *(uint64_t*)(ptr + 56);
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
data[2] = ss_byteswap64(data[2]);
data[3] = ss_byteswap64(data[3]);
data[4] = ss_byteswap64(data[4]);
data[5] = ss_byteswap64(data[5]);
data[6] = ss_byteswap64(data[6]);
data[7] = ss_byteswap64(data[7]);
#endif
}
void poke128( uint64_t addr, uint64_t data[2] )
{
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
#endif
uint8_t* ptr = get_ptr(addr);
*(uint64_t*)(ptr + 0) = data[0];
*(uint64_t*)(ptr + 8) = data[1];
}
void poke512( uint64_t addr, uint64_t data[8] )
{
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
data[2] = ss_byteswap64(data[2]);
data[3] = ss_byteswap64(data[3]);
data[4] = ss_byteswap64(data[4]);
data[5] = ss_byteswap64(data[5]);
data[6] = ss_byteswap64(data[6]);
data[7] = ss_byteswap64(data[7]);
#endif
uint8_t* ptr = get_ptr(addr);
*(uint64_t*)(ptr + 0) = data[0];
*(uint64_t*)(ptr + 8) = data[1];
*(uint64_t*)(ptr + 16) = data[2];
*(uint64_t*)(ptr + 24) = data[3];
*(uint64_t*)(ptr + 32) = data[4];
*(uint64_t*)(ptr + 40) = data[5];
*(uint64_t*)(ptr + 48) = data[6];
*(uint64_t*)(ptr + 56) = data[7];
}
// Supported Fetch Operation (instruction fetch)
virtual uint32_t fetch32( uint64_t addr ) { return SS_FastMemory::ld32u(addr); }
virtual void fetch256( uint64_t addr, uint64_t data[4] ) { SS_FastMemory::ld256(addr,data); }
virtual void fetch512( uint64_t addr, uint64_t data[8] ) { SS_FastMemory::ld512(addr,data); }
// Supported Store Operations. st8(), st16(), st32() and st64() are gueranteed to be atomic.
// st128() and st512() are atomic per 64bit quantity.
virtual void st8( uint64_t addr, uint8_t data )
{
*(uint8_t*)(get_ptr(addr)) = data;
}
virtual void st16( uint64_t addr, uint16_t data )
{
#if defined(ARCH_X64)
data = ss_byteswap16(data);
#endif
*(uint16_t*)(get_ptr(addr)) = data;
}
virtual void st32( uint64_t addr, uint32_t data )
{
#if defined(ARCH_X64)
data = ss_byteswap32(data);
#endif
*(uint32_t*)(get_ptr(addr)) = data;
}
virtual void st64( uint64_t addr, uint64_t data )
{
#if defined(ARCH_X64)
data = ss_byteswap64(data);
#endif
*(uint64_t*)(get_ptr(addr)) = data;
}
virtual void st128( uint64_t addr, uint64_t data[2] )
{
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
#endif
uint8_t* ptr = get_ptr(addr);
*(uint64_t*)(ptr + 0) = data[0];
*(uint64_t*)(ptr + 8) = data[1];
}
virtual void st512( uint64_t addr, uint64_t data[8] )
{
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
data[2] = ss_byteswap64(data[2]);
data[3] = ss_byteswap64(data[3]);
data[4] = ss_byteswap64(data[4]);
data[5] = ss_byteswap64(data[5]);
data[6] = ss_byteswap64(data[6]);
data[7] = ss_byteswap64(data[7]);
#endif
uint8_t* ptr = get_ptr(addr);
*(uint64_t*)(ptr + 0) = data[0];
*(uint64_t*)(ptr + 8) = data[1];
*(uint64_t*)(ptr + 16) = data[2];
*(uint64_t*)(ptr + 24) = data[3];
*(uint64_t*)(ptr + 32) = data[4];
*(uint64_t*)(ptr + 40) = data[5];
*(uint64_t*)(ptr + 48) = data[6];
*(uint64_t*)(ptr + 56) = data[7];
}
// Supported Load Operations. ld8[su]() to ld64() are quaranteed to be atomic. ld128() and
// above are atomic at the 64 bit granularity.
virtual uint8_t ld8u ( uint64_t addr )
{
return *(uint8_t *)(get_ptr(addr));
}
virtual int8_t ld8s( uint64_t addr )
{
return *( int8_t *)(get_ptr(addr));
}
virtual uint16_t ld16u( uint64_t addr )
{
#if defined(ARCH_X64)
uint16_t data = *(uint16_t*)(get_ptr(addr));
return ss_byteswap16(data);
#else
return *(uint16_t*)(get_ptr(addr));
#endif
}
virtual int16_t ld16s( uint64_t addr )
{
#if defined(ARCH_X64)
int16_t data = *( int16_t*)(get_ptr(addr));
return ss_byteswap16(data);
#else
return *( int16_t*)(get_ptr(addr));
#endif
}
virtual uint32_t ld32u( uint64_t addr )
{
#if defined(ARCH_X64)
uint32_t data = *(uint32_t*)(get_ptr(addr));
return ss_byteswap32(data);
#else
return *(uint32_t*)(get_ptr(addr));
#endif
}
virtual int32_t ld32s( uint64_t addr )
{
#if defined(ARCH_X64)
int32_t data = *( int32_t*)(get_ptr(addr));
return ss_byteswap32(data);
#else
return *( int32_t*)(get_ptr(addr));
#endif
}
virtual uint64_t ld64( uint64_t addr )
{
#if defined(ARCH_X64)
uint64_t data = *(uint64_t*)(get_ptr(addr));
return ss_byteswap64(data);
#else
return *(uint64_t*)(get_ptr(addr));
#endif
}
virtual void ld128( uint64_t addr, uint64_t data[2] )
{
uint8_t* ptr = get_ptr(addr);
data[0] = *(uint64_t*)(ptr + 0);
data[1] = *(uint64_t*)(ptr + 8);
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
#endif
}
virtual void ld512( uint64_t addr, uint64_t data[8] )
{
uint8_t* ptr = get_ptr(addr);
data[0] = *(uint64_t*)(ptr + 0);
data[1] = *(uint64_t*)(ptr + 8);
data[2] = *(uint64_t*)(ptr + 16);
data[3] = *(uint64_t*)(ptr + 24);
data[4] = *(uint64_t*)(ptr + 32);
data[5] = *(uint64_t*)(ptr + 40);
data[6] = *(uint64_t*)(ptr + 48);
data[7] = *(uint64_t*)(ptr + 56);
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
data[2] = ss_byteswap64(data[2]);
data[3] = ss_byteswap64(data[3]);
data[4] = ss_byteswap64(data[4]);
data[5] = ss_byteswap64(data[5]);
data[6] = ss_byteswap64(data[6]);
data[7] = ss_byteswap64(data[7]);
#endif
}
virtual void ld256( uint64_t addr, uint64_t data[4] )
{
uint8_t* ptr = get_ptr(addr);
data[0] = *(uint64_t*)(ptr + 0);
data[1] = *(uint64_t*)(ptr + 8);
data[2] = *(uint64_t*)(ptr + 16);
data[3] = *(uint64_t*)(ptr + 24);
#if defined(ARCH_X64)
data[0] = ss_byteswap64(data[0]);
data[1] = ss_byteswap64(data[1]);
data[2] = ss_byteswap64(data[2]);
data[3] = ss_byteswap64(data[3]);
#endif
}
// st64partial() performs 8 byte partial store. The bytes to store are specified by mask. A 1 in bit N of
// mask denotes that byte (data >> (8*N)) & 0xff should be written to memory
virtual void st64partial( uint64_t addr, uint64_t data, uint64_t mask )
{
ss_stp8(*(double*)&data,get_ptr(addr),mask);
}
// ld128atomic() (aka load twin double, load quad atomic) atomically loads two
// 64bit values from memory at addr into rd. rd[0] is the value at addr, rd[1]
// is the value at addr + 8. Note ld128 does() not guarantee atomicity.
virtual void ld128atomic( uint64_t addr, uint64_t data[2] )
{
ss_ld128atomic(get_ptr(addr),data);
}
// ldstub() return a byte from memory at addr, and set the byte at addr
// to 0xff. The ldstub() operation is atomic.
virtual uint8_t ldstub( uint64_t addr )
{
return ss_ldstub(get_ptr(addr),0);
}
// swap() stores the 32bit value rd with the 32bit value at addr.
// The old 32bit value at addr is returned. The operation is atomic.
virtual uint32_t swap( uint64_t addr, uint32_t rd )
{
return ss_swap(rd,get_ptr(addr),0);
}
// casx() compares the 64bit value rs2 with the 64bit value at addr.
// If the two values are equal, the value rd is stored in the
// 64bit value at addr. In both cases the old 64bit value at addr is
// returned, that is the value at addr before the storei happened.
// The casx() operation is atomic.
virtual uint64_t casx( uint64_t addr, uint64_t rd, uint64_t rs2 )
{
return ss_casx(rd,get_ptr(addr),rs2);
}
// cas() is as casx, but for 32bit.
virtual uint32_t cas( uint64_t addr, uint32_t rd, uint32_t rs2 )
{
return ss_cas(rd,get_ptr(addr),rs2);
}
// prefetch() prefetches data from memory into the cache hierarchy.
void prefetch( uint64_t addr, uint_t size ) {}
// flush() writes dirty data in the cache back to memory.
void flush( uint64_t addr, uint_t size ) {} // process does not provide data.
static SS_FastMemory memory;
private:
enum
{
// Configure the 55 bits of physical address space which is the
// current SunSparc limit. Note, there is no processor has
// implemented this many address lines yet. Currently the max
// is 48. To avoid complains about memory size from people we
// should provide 55 bits always. To make a choose, a toplevel
// of 2^23 entries of 4GB pages is nice. In v8plus mode however
// we use much less ... just enough to make it work.
#if defined(ARCH_V9) || defined(ARCH_X64)
L1BITS = 23, // 2^L1BITS of
L2BITS = 32, // 4GB of NORESERVED mmapped space
#elif defined(ARCH_V8)
L1BITS = 29, // In v8 mode we have less va space ...
L2BITS = 19, // 1MB of NORESERVED mmapped chunks
#else
#error "Oops"
#endif
L1SIZE = 1 << L1BITS,
L1MASK = L1SIZE - 1
};
uint8_t* get_ptr( uint64_t addr )
{
uint8_t** o1 = l1 + ((addr >> L2BITS) & L1MASK);
uint8_t* l2 = *o1;
if (l2 == 0)
{
l2lock.lock();
if (*o1 == 0)
{
l2 = *o1 = (uint8_t*)mmap((char*)0,1ull << L2BITS,PROT_READ|PROT_WRITE,
MAP_NORESERVE|MAP_PRIVATE|MAP_ANON|MAP_ALIGN,-1,0);
}
l2lock.unlock();
}
return l2 + (addr & uint64_t((1ull << L2BITS) - 1));
}
uint64_t ram_size;
uint64_t rom_size;
uint64_t page_size;
uint64_t page_mask;
uint8_t** l1;
BL_Mutex l2lock;
};
#endif /* COMPILE_FOR_SAM */
#endif /* __SS_FastMemory_h__ */