* ========== Copyright Header Begin ==========================================
* OpenSPARC T2 Processor File: SS_FastMemory.h
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
* ========== Copyright Header End ============================================
#ifndef __SS_FastMemory_h__
#define __SS_FastMemory_h__
#else // Vonk's own memory
extern "C" uint8_t ss_ldstub( void* base
, uint64_t ofs
);
extern "C" uint32_t ss_swap ( uint32_t rd
, void* base
, uint64_t ofs
);
extern "C" uint32_t ss_cas ( uint32_t rd
, void* base
, uint32_t rs2
);
extern "C" uint64_t ss_casx ( uint64_t rd
, void* base
, uint64_t rs2
);
extern "C" void ss_stp8 ( double rd
, void* base
, uint64_t mask
);
#include "SS_Ld128Atomic.h"
class SS_FastMemory
: public BL_Memory
void allocate( uint64_t _ram_size
, uint64_t _rom_size
, uint_t pa_bits
);
void load( const char* filename
);
void load( const char* filename
, uint64_t addr
);
void save( const char* filename
, uint64_t addr
, uint64_t size
);
// Supported User Interface Operations
void poke8( uint64_t addr
, uint8_t data
) { SS_FastMemory::st8(addr
,data
); }
void poke16( uint64_t addr
, uint16_t data
) { SS_FastMemory::st16(addr
,data
); }
void poke32( uint64_t addr
, uint32_t data
) { SS_FastMemory::st32(addr
,data
); }
void poke64( uint64_t addr
, uint64_t data
) { SS_FastMemory::st64(addr
,data
); }
uint8_t peek8u( uint64_t addr
) { return SS_FastMemory::ld8u(addr
); }
int8_t peek8s( uint64_t addr
) { return SS_FastMemory::ld8s(addr
); }
uint16_t peek16u( uint64_t addr
) { return SS_FastMemory::ld16u(addr
); }
int16_t peek16s( uint64_t addr
) { return SS_FastMemory::ld16s(addr
); }
uint32_t peek32u( uint64_t addr
) { return SS_FastMemory::ld32u(addr
); }
int32_t peek32s( uint64_t addr
) { return SS_FastMemory::ld32s(addr
); }
uint64_t peek64( uint64_t addr
) { return SS_FastMemory::ld64(addr
); }
void peek128( uint64_t addr
,uint64_t data
[2] )
uint8_t* ptr
= get_ptr(addr
);
data
[0] = *(uint64_t*)(ptr
+ 0);
data
[1] = *(uint64_t*)(ptr
+ 8);
data
[0] = ss_byteswap64(data
[0]);
data
[1] = ss_byteswap64(data
[1]);
void peek256( uint64_t addr
,uint64_t data
[4] )
uint8_t* ptr
= get_ptr(addr
);
data
[0] = *(uint64_t*)(ptr
+ 0);
data
[1] = *(uint64_t*)(ptr
+ 8);
data
[2] = *(uint64_t*)(ptr
+ 16);
data
[3] = *(uint64_t*)(ptr
+ 24);
data
[0] = ss_byteswap64(data
[0]);
data
[1] = ss_byteswap64(data
[1]);
data
[2] = ss_byteswap64(data
[2]);
data
[3] = ss_byteswap64(data
[3]);
void peek512( uint64_t addr
,uint64_t data
[8] )
uint8_t* ptr
= get_ptr(addr
);
data
[0] = *(uint64_t*)(ptr
+ 0);
data
[1] = *(uint64_t*)(ptr
+ 8);
data
[2] = *(uint64_t*)(ptr
+ 16);
data
[3] = *(uint64_t*)(ptr
+ 24);
data
[4] = *(uint64_t*)(ptr
+ 32);
data
[5] = *(uint64_t*)(ptr
+ 40);
data
[6] = *(uint64_t*)(ptr
+ 48);
data
[7] = *(uint64_t*)(ptr
+ 56);
data
[0] = ss_byteswap64(data
[0]);
data
[1] = ss_byteswap64(data
[1]);
data
[2] = ss_byteswap64(data
[2]);
data
[3] = ss_byteswap64(data
[3]);
data
[4] = ss_byteswap64(data
[4]);
data
[5] = ss_byteswap64(data
[5]);
data
[6] = ss_byteswap64(data
[6]);
data
[7] = ss_byteswap64(data
[7]);
void poke128( uint64_t addr
, uint64_t data
[2] )
data
[0] = ss_byteswap64(data
[0]);
data
[1] = ss_byteswap64(data
[1]);
uint8_t* ptr
= get_ptr(addr
);
*(uint64_t*)(ptr
+ 0) = data
[0];
*(uint64_t*)(ptr
+ 8) = data
[1];
void poke512( uint64_t addr
, uint64_t data
[8] )
data
[0] = ss_byteswap64(data
[0]);
data
[1] = ss_byteswap64(data
[1]);
data
[2] = ss_byteswap64(data
[2]);
data
[3] = ss_byteswap64(data
[3]);
data
[4] = ss_byteswap64(data
[4]);
data
[5] = ss_byteswap64(data
[5]);
data
[6] = ss_byteswap64(data
[6]);
data
[7] = ss_byteswap64(data
[7]);
uint8_t* ptr
= get_ptr(addr
);
*(uint64_t*)(ptr
+ 0) = data
[0];
*(uint64_t*)(ptr
+ 8) = data
[1];
*(uint64_t*)(ptr
+ 16) = data
[2];
*(uint64_t*)(ptr
+ 24) = data
[3];
*(uint64_t*)(ptr
+ 32) = data
[4];
*(uint64_t*)(ptr
+ 40) = data
[5];
*(uint64_t*)(ptr
+ 48) = data
[6];
*(uint64_t*)(ptr
+ 56) = data
[7];
// Supported Fetch Operation (instruction fetch)
virtual uint32_t fetch32( uint64_t addr
) { return SS_FastMemory::ld32u(addr
); }
virtual void fetch256( uint64_t addr
, uint64_t data
[4] ) { SS_FastMemory::ld256(addr
,data
); }
virtual void fetch512( uint64_t addr
, uint64_t data
[8] ) { SS_FastMemory::ld512(addr
,data
); }
// Supported Store Operations. st8(), st16(), st32() and st64() are gueranteed to be atomic.
// st128() and st512() are atomic per 64bit quantity.
virtual void st8( uint64_t addr
, uint8_t data
)
*(uint8_t*)(get_ptr(addr
)) = data
;
virtual void st16( uint64_t addr
, uint16_t data
)
data
= ss_byteswap16(data
);
*(uint16_t*)(get_ptr(addr
)) = data
;
virtual void st32( uint64_t addr
, uint32_t data
)
data
= ss_byteswap32(data
);
*(uint32_t*)(get_ptr(addr
)) = data
;
virtual void st64( uint64_t addr
, uint64_t data
)
data
= ss_byteswap64(data
);
*(uint64_t*)(get_ptr(addr
)) = data
;
virtual void st128( uint64_t addr
, uint64_t data
[2] )
data
[0] = ss_byteswap64(data
[0]);
data
[1] = ss_byteswap64(data
[1]);
uint8_t* ptr
= get_ptr(addr
);
*(uint64_t*)(ptr
+ 0) = data
[0];
*(uint64_t*)(ptr
+ 8) = data
[1];
virtual void st512( uint64_t addr
, uint64_t data
[8] )
data
[0] = ss_byteswap64(data
[0]);
data
[1] = ss_byteswap64(data
[1]);
data
[2] = ss_byteswap64(data
[2]);
data
[3] = ss_byteswap64(data
[3]);
data
[4] = ss_byteswap64(data
[4]);
data
[5] = ss_byteswap64(data
[5]);
data
[6] = ss_byteswap64(data
[6]);
data
[7] = ss_byteswap64(data
[7]);
uint8_t* ptr
= get_ptr(addr
);
*(uint64_t*)(ptr
+ 0) = data
[0];
*(uint64_t*)(ptr
+ 8) = data
[1];
*(uint64_t*)(ptr
+ 16) = data
[2];
*(uint64_t*)(ptr
+ 24) = data
[3];
*(uint64_t*)(ptr
+ 32) = data
[4];
*(uint64_t*)(ptr
+ 40) = data
[5];
*(uint64_t*)(ptr
+ 48) = data
[6];
*(uint64_t*)(ptr
+ 56) = data
[7];
// Supported Load Operations. ld8[su]() to ld64() are quaranteed to be atomic. ld128() and
// above are atomic at the 64 bit granularity.
virtual uint8_t ld8u ( uint64_t addr
)
return *(uint8_t *)(get_ptr(addr
));
virtual int8_t ld8s( uint64_t addr
)
return *( int8_t *)(get_ptr(addr
));
virtual uint16_t ld16u( uint64_t addr
)
uint16_t data
= *(uint16_t*)(get_ptr(addr
));
return ss_byteswap16(data
);
return *(uint16_t*)(get_ptr(addr
));
virtual int16_t ld16s( uint64_t addr
)
int16_t data
= *( int16_t*)(get_ptr(addr
));
return ss_byteswap16(data
);
return *( int16_t*)(get_ptr(addr
));
virtual uint32_t ld32u( uint64_t addr
)
uint32_t data
= *(uint32_t*)(get_ptr(addr
));
return ss_byteswap32(data
);
return *(uint32_t*)(get_ptr(addr
));
virtual int32_t ld32s( uint64_t addr
)
int32_t data
= *( int32_t*)(get_ptr(addr
));
return ss_byteswap32(data
);
return *( int32_t*)(get_ptr(addr
));
virtual uint64_t ld64( uint64_t addr
)
uint64_t data
= *(uint64_t*)(get_ptr(addr
));
return ss_byteswap64(data
);
return *(uint64_t*)(get_ptr(addr
));
virtual void ld128( uint64_t addr
, uint64_t data
[2] )
uint8_t* ptr
= get_ptr(addr
);
data
[0] = *(uint64_t*)(ptr
+ 0);
data
[1] = *(uint64_t*)(ptr
+ 8);
data
[0] = ss_byteswap64(data
[0]);
data
[1] = ss_byteswap64(data
[1]);
virtual void ld512( uint64_t addr
, uint64_t data
[8] )
uint8_t* ptr
= get_ptr(addr
);
data
[0] = *(uint64_t*)(ptr
+ 0);
data
[1] = *(uint64_t*)(ptr
+ 8);
data
[2] = *(uint64_t*)(ptr
+ 16);
data
[3] = *(uint64_t*)(ptr
+ 24);
data
[4] = *(uint64_t*)(ptr
+ 32);
data
[5] = *(uint64_t*)(ptr
+ 40);
data
[6] = *(uint64_t*)(ptr
+ 48);
data
[7] = *(uint64_t*)(ptr
+ 56);
data
[0] = ss_byteswap64(data
[0]);
data
[1] = ss_byteswap64(data
[1]);
data
[2] = ss_byteswap64(data
[2]);
data
[3] = ss_byteswap64(data
[3]);
data
[4] = ss_byteswap64(data
[4]);
data
[5] = ss_byteswap64(data
[5]);
data
[6] = ss_byteswap64(data
[6]);
data
[7] = ss_byteswap64(data
[7]);
virtual void ld256( uint64_t addr
, uint64_t data
[4] )
uint8_t* ptr
= get_ptr(addr
);
data
[0] = *(uint64_t*)(ptr
+ 0);
data
[1] = *(uint64_t*)(ptr
+ 8);
data
[2] = *(uint64_t*)(ptr
+ 16);
data
[3] = *(uint64_t*)(ptr
+ 24);
data
[0] = ss_byteswap64(data
[0]);
data
[1] = ss_byteswap64(data
[1]);
data
[2] = ss_byteswap64(data
[2]);
data
[3] = ss_byteswap64(data
[3]);
// st64partial() performs 8 byte partial store. The bytes to store are specified by mask. A 1 in bit N of
// mask denotes that byte (data >> (8*N)) & 0xff should be written to memory
virtual void st64partial( uint64_t addr
, uint64_t data
, uint64_t mask
)
ss_stp8(*(double*)&data
,get_ptr(addr
),mask
);
// ld128atomic() (aka load twin double, load quad atomic) atomically loads two
// 64bit values from memory at addr into rd. rd[0] is the value at addr, rd[1]
// is the value at addr + 8. Note ld128 does() not guarantee atomicity.
virtual void ld128atomic( uint64_t addr
, uint64_t data
[2] )
ss_ld128atomic(get_ptr(addr
),data
);
// ldstub() return a byte from memory at addr, and set the byte at addr
// to 0xff. The ldstub() operation is atomic.
virtual uint8_t ldstub( uint64_t addr
)
return ss_ldstub(get_ptr(addr
),0);
// swap() stores the 32bit value rd with the 32bit value at addr.
// The old 32bit value at addr is returned. The operation is atomic.
virtual uint32_t swap( uint64_t addr
, uint32_t rd
)
return ss_swap(rd
,get_ptr(addr
),0);
// casx() compares the 64bit value rs2 with the 64bit value at addr.
// If the two values are equal, the value rd is stored in the
// 64bit value at addr. In both cases the old 64bit value at addr is
// returned, that is the value at addr before the storei happened.
// The casx() operation is atomic.
virtual uint64_t casx( uint64_t addr
, uint64_t rd
, uint64_t rs2
)
return ss_casx(rd
,get_ptr(addr
),rs2
);
// cas() is as casx, but for 32bit.
virtual uint32_t cas( uint64_t addr
, uint32_t rd
, uint32_t rs2
)
return ss_cas(rd
,get_ptr(addr
),rs2
);
// prefetch() prefetches data from memory into the cache hierarchy.
void prefetch( uint64_t addr
, uint_t size
) {}
// flush() writes dirty data in the cache back to memory.
void flush( uint64_t addr
, uint_t size
) {} // process does not provide data.
static SS_FastMemory memory
;
// Configure the 55 bits of physical address space which is the
// current SunSparc limit. Note, there is no processor has
// implemented this many address lines yet. Currently the max
// is 48. To avoid complains about memory size from people we
// should provide 55 bits always. To make a choose, a toplevel
// of 2^23 entries of 4GB pages is nice. In v8plus mode however
// we use much less ... just enough to make it work.
#if defined(ARCH_V9) || defined(ARCH_X64)
L1BITS
= 23, // 2^L1BITS of
L2BITS
= 32, // 4GB of NORESERVED mmapped space
L1BITS
= 29, // In v8 mode we have less va space ...
L2BITS
= 19, // 1MB of NORESERVED mmapped chunks
uint8_t* get_ptr( uint64_t addr
)
uint8_t** o1
= l1
+ ((addr
>> L2BITS
) & L1MASK
);
l2
= *o1
= (uint8_t*)mmap((char*)0,1ull << L2BITS
,PROT_READ
|PROT_WRITE
,
MAP_NORESERVE
|MAP_PRIVATE
|MAP_ANON
|MAP_ALIGN
,-1,0);
return l2
+ (addr
& uint64_t((1ull << L2BITS
) - 1));
#endif /* COMPILE_FOR_SAM */
#endif /* __SS_FastMemory_h__ */