Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / sam / cpus / vonk / ss / lib / cpu / src / SS_FastMemory.h
CommitLineData
920dae64
AT
1/*
2* ========== Copyright Header Begin ==========================================
3*
4* OpenSPARC T2 Processor File: SS_FastMemory.h
5* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
6* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
7*
8* The above named program is free software; you can redistribute it and/or
9* modify it under the terms of the GNU General Public
10* License version 2 as published by the Free Software Foundation.
11*
12* The above named program is distributed in the hope that it will be
13* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
14* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15* General Public License for more details.
16*
17* You should have received a copy of the GNU General Public
18* License along with this work; if not, write to the Free Software
19* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
20*
21* ========== Copyright Header End ============================================
22*/
23#ifndef __SS_FastMemory_h__
24#define __SS_FastMemory_h__
25
26#ifdef COMPILE_FOR_SAM
27
28// See elsewhere
29
30#else // Vonk's own memory
31
32#include "SS_Types.h"
33
34extern "C" uint8_t ss_ldstub( void* base, uint64_t ofs );
35extern "C" uint32_t ss_swap ( uint32_t rd, void* base, uint64_t ofs );
36extern "C" uint32_t ss_cas ( uint32_t rd, void* base, uint32_t rs2 );
37extern "C" uint64_t ss_casx ( uint64_t rd, void* base, uint64_t rs2 );
38extern "C" void ss_stp8 ( double rd, void* base, uint64_t mask );
39
40#include "utils.h"
41
42#include "SS_Ld128Atomic.h"
43
44#include <string.h>
45#include <sys/mman.h>
46#include "BL_Mutex.h"
47#include "BL_Memory.h"
48
49class SS_FastMemory : public BL_Memory
50{
51 public:
52 SS_FastMemory();
53 ~SS_FastMemory();
54
55 void allocate( uint64_t _ram_size, uint64_t _rom_size, uint_t pa_bits );
56
57 void load( const char* filename );
58 void load( const char* filename, uint64_t addr );
59 void save( const char* filename, uint64_t addr, uint64_t size );
60
61 // Supported User Interface Operations
62
63 void poke8( uint64_t addr, uint8_t data ) { SS_FastMemory::st8(addr,data); }
64 void poke16( uint64_t addr, uint16_t data ) { SS_FastMemory::st16(addr,data); }
65 void poke32( uint64_t addr, uint32_t data ) { SS_FastMemory::st32(addr,data); }
66 void poke64( uint64_t addr, uint64_t data ) { SS_FastMemory::st64(addr,data); }
67 uint8_t peek8u( uint64_t addr ) { return SS_FastMemory::ld8u(addr); }
68 int8_t peek8s( uint64_t addr ) { return SS_FastMemory::ld8s(addr); }
69 uint16_t peek16u( uint64_t addr ) { return SS_FastMemory::ld16u(addr); }
70 int16_t peek16s( uint64_t addr ) { return SS_FastMemory::ld16s(addr); }
71 uint32_t peek32u( uint64_t addr ) { return SS_FastMemory::ld32u(addr); }
72 int32_t peek32s( uint64_t addr ) { return SS_FastMemory::ld32s(addr); }
73 uint64_t peek64( uint64_t addr ) { return SS_FastMemory::ld64(addr); }
74
75 void peek128( uint64_t addr ,uint64_t data[2] )
76 {
77 uint8_t* ptr = get_ptr(addr);
78 data[0] = *(uint64_t*)(ptr + 0);
79 data[1] = *(uint64_t*)(ptr + 8);
80#if defined(ARCH_X64)
81 data[0] = ss_byteswap64(data[0]);
82 data[1] = ss_byteswap64(data[1]);
83#endif
84 }
85
86 void peek256( uint64_t addr ,uint64_t data[4] )
87 {
88 uint8_t* ptr = get_ptr(addr);
89 data[0] = *(uint64_t*)(ptr + 0);
90 data[1] = *(uint64_t*)(ptr + 8);
91 data[2] = *(uint64_t*)(ptr + 16);
92 data[3] = *(uint64_t*)(ptr + 24);
93#if defined(ARCH_X64)
94 data[0] = ss_byteswap64(data[0]);
95 data[1] = ss_byteswap64(data[1]);
96 data[2] = ss_byteswap64(data[2]);
97 data[3] = ss_byteswap64(data[3]);
98#endif
99 }
100
101 void peek512( uint64_t addr ,uint64_t data[8] )
102 {
103 uint8_t* ptr = get_ptr(addr);
104 data[0] = *(uint64_t*)(ptr + 0);
105 data[1] = *(uint64_t*)(ptr + 8);
106 data[2] = *(uint64_t*)(ptr + 16);
107 data[3] = *(uint64_t*)(ptr + 24);
108 data[4] = *(uint64_t*)(ptr + 32);
109 data[5] = *(uint64_t*)(ptr + 40);
110 data[6] = *(uint64_t*)(ptr + 48);
111 data[7] = *(uint64_t*)(ptr + 56);
112#if defined(ARCH_X64)
113 data[0] = ss_byteswap64(data[0]);
114 data[1] = ss_byteswap64(data[1]);
115 data[2] = ss_byteswap64(data[2]);
116 data[3] = ss_byteswap64(data[3]);
117 data[4] = ss_byteswap64(data[4]);
118 data[5] = ss_byteswap64(data[5]);
119 data[6] = ss_byteswap64(data[6]);
120 data[7] = ss_byteswap64(data[7]);
121#endif
122 }
123
124 void poke128( uint64_t addr, uint64_t data[2] )
125 {
126#if defined(ARCH_X64)
127 data[0] = ss_byteswap64(data[0]);
128 data[1] = ss_byteswap64(data[1]);
129#endif
130 uint8_t* ptr = get_ptr(addr);
131 *(uint64_t*)(ptr + 0) = data[0];
132 *(uint64_t*)(ptr + 8) = data[1];
133 }
134
135 void poke512( uint64_t addr, uint64_t data[8] )
136 {
137#if defined(ARCH_X64)
138 data[0] = ss_byteswap64(data[0]);
139 data[1] = ss_byteswap64(data[1]);
140 data[2] = ss_byteswap64(data[2]);
141 data[3] = ss_byteswap64(data[3]);
142 data[4] = ss_byteswap64(data[4]);
143 data[5] = ss_byteswap64(data[5]);
144 data[6] = ss_byteswap64(data[6]);
145 data[7] = ss_byteswap64(data[7]);
146#endif
147 uint8_t* ptr = get_ptr(addr);
148 *(uint64_t*)(ptr + 0) = data[0];
149 *(uint64_t*)(ptr + 8) = data[1];
150 *(uint64_t*)(ptr + 16) = data[2];
151 *(uint64_t*)(ptr + 24) = data[3];
152 *(uint64_t*)(ptr + 32) = data[4];
153 *(uint64_t*)(ptr + 40) = data[5];
154 *(uint64_t*)(ptr + 48) = data[6];
155 *(uint64_t*)(ptr + 56) = data[7];
156 }
157
158
159 // Supported Fetch Operation (instruction fetch)
160
161 virtual uint32_t fetch32( uint64_t addr ) { return SS_FastMemory::ld32u(addr); }
162 virtual void fetch256( uint64_t addr, uint64_t data[4] ) { SS_FastMemory::ld256(addr,data); }
163 virtual void fetch512( uint64_t addr, uint64_t data[8] ) { SS_FastMemory::ld512(addr,data); }
164
165 // Supported Store Operations. st8(), st16(), st32() and st64() are gueranteed to be atomic.
166 // st128() and st512() are atomic per 64bit quantity.
167
168 virtual void st8( uint64_t addr, uint8_t data )
169 {
170 *(uint8_t*)(get_ptr(addr)) = data;
171 }
172 virtual void st16( uint64_t addr, uint16_t data )
173 {
174#if defined(ARCH_X64)
175 data = ss_byteswap16(data);
176#endif
177 *(uint16_t*)(get_ptr(addr)) = data;
178 }
179 virtual void st32( uint64_t addr, uint32_t data )
180 {
181#if defined(ARCH_X64)
182 data = ss_byteswap32(data);
183#endif
184 *(uint32_t*)(get_ptr(addr)) = data;
185 }
186 virtual void st64( uint64_t addr, uint64_t data )
187 {
188#if defined(ARCH_X64)
189 data = ss_byteswap64(data);
190#endif
191 *(uint64_t*)(get_ptr(addr)) = data;
192 }
193 virtual void st128( uint64_t addr, uint64_t data[2] )
194 {
195#if defined(ARCH_X64)
196 data[0] = ss_byteswap64(data[0]);
197 data[1] = ss_byteswap64(data[1]);
198#endif
199 uint8_t* ptr = get_ptr(addr);
200 *(uint64_t*)(ptr + 0) = data[0];
201 *(uint64_t*)(ptr + 8) = data[1];
202 }
203 virtual void st512( uint64_t addr, uint64_t data[8] )
204 {
205#if defined(ARCH_X64)
206 data[0] = ss_byteswap64(data[0]);
207 data[1] = ss_byteswap64(data[1]);
208 data[2] = ss_byteswap64(data[2]);
209 data[3] = ss_byteswap64(data[3]);
210 data[4] = ss_byteswap64(data[4]);
211 data[5] = ss_byteswap64(data[5]);
212 data[6] = ss_byteswap64(data[6]);
213 data[7] = ss_byteswap64(data[7]);
214#endif
215 uint8_t* ptr = get_ptr(addr);
216 *(uint64_t*)(ptr + 0) = data[0];
217 *(uint64_t*)(ptr + 8) = data[1];
218 *(uint64_t*)(ptr + 16) = data[2];
219 *(uint64_t*)(ptr + 24) = data[3];
220 *(uint64_t*)(ptr + 32) = data[4];
221 *(uint64_t*)(ptr + 40) = data[5];
222 *(uint64_t*)(ptr + 48) = data[6];
223 *(uint64_t*)(ptr + 56) = data[7];
224 }
225
226 // Supported Load Operations. ld8[su]() to ld64() are quaranteed to be atomic. ld128() and
227 // above are atomic at the 64 bit granularity.
228
229 virtual uint8_t ld8u ( uint64_t addr )
230 {
231 return *(uint8_t *)(get_ptr(addr));
232 }
233 virtual int8_t ld8s( uint64_t addr )
234 {
235 return *( int8_t *)(get_ptr(addr));
236 }
237 virtual uint16_t ld16u( uint64_t addr )
238 {
239#if defined(ARCH_X64)
240 uint16_t data = *(uint16_t*)(get_ptr(addr));
241 return ss_byteswap16(data);
242#else
243 return *(uint16_t*)(get_ptr(addr));
244#endif
245 }
246 virtual int16_t ld16s( uint64_t addr )
247 {
248#if defined(ARCH_X64)
249 int16_t data = *( int16_t*)(get_ptr(addr));
250 return ss_byteswap16(data);
251#else
252 return *( int16_t*)(get_ptr(addr));
253#endif
254 }
255 virtual uint32_t ld32u( uint64_t addr )
256 {
257#if defined(ARCH_X64)
258 uint32_t data = *(uint32_t*)(get_ptr(addr));
259 return ss_byteswap32(data);
260#else
261 return *(uint32_t*)(get_ptr(addr));
262#endif
263 }
264 virtual int32_t ld32s( uint64_t addr )
265 {
266#if defined(ARCH_X64)
267 int32_t data = *( int32_t*)(get_ptr(addr));
268 return ss_byteswap32(data);
269#else
270 return *( int32_t*)(get_ptr(addr));
271#endif
272 }
273 virtual uint64_t ld64( uint64_t addr )
274 {
275#if defined(ARCH_X64)
276 uint64_t data = *(uint64_t*)(get_ptr(addr));
277 return ss_byteswap64(data);
278#else
279 return *(uint64_t*)(get_ptr(addr));
280#endif
281 }
282 virtual void ld128( uint64_t addr, uint64_t data[2] )
283 {
284 uint8_t* ptr = get_ptr(addr);
285 data[0] = *(uint64_t*)(ptr + 0);
286 data[1] = *(uint64_t*)(ptr + 8);
287#if defined(ARCH_X64)
288 data[0] = ss_byteswap64(data[0]);
289 data[1] = ss_byteswap64(data[1]);
290#endif
291 }
292 virtual void ld512( uint64_t addr, uint64_t data[8] )
293 {
294 uint8_t* ptr = get_ptr(addr);
295 data[0] = *(uint64_t*)(ptr + 0);
296 data[1] = *(uint64_t*)(ptr + 8);
297 data[2] = *(uint64_t*)(ptr + 16);
298 data[3] = *(uint64_t*)(ptr + 24);
299 data[4] = *(uint64_t*)(ptr + 32);
300 data[5] = *(uint64_t*)(ptr + 40);
301 data[6] = *(uint64_t*)(ptr + 48);
302 data[7] = *(uint64_t*)(ptr + 56);
303#if defined(ARCH_X64)
304 data[0] = ss_byteswap64(data[0]);
305 data[1] = ss_byteswap64(data[1]);
306 data[2] = ss_byteswap64(data[2]);
307 data[3] = ss_byteswap64(data[3]);
308 data[4] = ss_byteswap64(data[4]);
309 data[5] = ss_byteswap64(data[5]);
310 data[6] = ss_byteswap64(data[6]);
311 data[7] = ss_byteswap64(data[7]);
312#endif
313 }
314 virtual void ld256( uint64_t addr, uint64_t data[4] )
315 {
316 uint8_t* ptr = get_ptr(addr);
317 data[0] = *(uint64_t*)(ptr + 0);
318 data[1] = *(uint64_t*)(ptr + 8);
319 data[2] = *(uint64_t*)(ptr + 16);
320 data[3] = *(uint64_t*)(ptr + 24);
321#if defined(ARCH_X64)
322 data[0] = ss_byteswap64(data[0]);
323 data[1] = ss_byteswap64(data[1]);
324 data[2] = ss_byteswap64(data[2]);
325 data[3] = ss_byteswap64(data[3]);
326#endif
327 }
328
329 // st64partial() performs 8 byte partial store. The bytes to store are specified by mask. A 1 in bit N of
330 // mask denotes that byte (data >> (8*N)) & 0xff should be written to memory
331
332 virtual void st64partial( uint64_t addr, uint64_t data, uint64_t mask )
333 {
334 ss_stp8(*(double*)&data,get_ptr(addr),mask);
335 }
336
337 // ld128atomic() (aka load twin double, load quad atomic) atomically loads two
338 // 64bit values from memory at addr into rd. rd[0] is the value at addr, rd[1]
339 // is the value at addr + 8. Note ld128 does() not guarantee atomicity.
340
341 virtual void ld128atomic( uint64_t addr, uint64_t data[2] )
342 {
343 ss_ld128atomic(get_ptr(addr),data);
344 }
345
346 // ldstub() return a byte from memory at addr, and set the byte at addr
347 // to 0xff. The ldstub() operation is atomic.
348
349 virtual uint8_t ldstub( uint64_t addr )
350 {
351 return ss_ldstub(get_ptr(addr),0);
352 }
353
354 // swap() stores the 32bit value rd with the 32bit value at addr.
355 // The old 32bit value at addr is returned. The operation is atomic.
356
357 virtual uint32_t swap( uint64_t addr, uint32_t rd )
358 {
359 return ss_swap(rd,get_ptr(addr),0);
360 }
361
362 // casx() compares the 64bit value rs2 with the 64bit value at addr.
363 // If the two values are equal, the value rd is stored in the
364 // 64bit value at addr. In both cases the old 64bit value at addr is
365 // returned, that is the value at addr before the storei happened.
366 // The casx() operation is atomic.
367
368 virtual uint64_t casx( uint64_t addr, uint64_t rd, uint64_t rs2 )
369 {
370 return ss_casx(rd,get_ptr(addr),rs2);
371 }
372
373 // cas() is as casx, but for 32bit.
374
375 virtual uint32_t cas( uint64_t addr, uint32_t rd, uint32_t rs2 )
376 {
377 return ss_cas(rd,get_ptr(addr),rs2);
378 }
379
380 // prefetch() prefetches data from memory into the cache hierarchy.
381
382 void prefetch( uint64_t addr, uint_t size ) {}
383
384 // flush() writes dirty data in the cache back to memory.
385
386 void flush( uint64_t addr, uint_t size ) {} // process does not provide data.
387
388 static SS_FastMemory memory;
389 private:
390 enum
391 {
392 // Configure the 55 bits of physical address space which is the
393 // current SunSparc limit. Note, there is no processor has
394 // implemented this many address lines yet. Currently the max
395 // is 48. To avoid complains about memory size from people we
396 // should provide 55 bits always. To make a choose, a toplevel
397 // of 2^23 entries of 4GB pages is nice. In v8plus mode however
398 // we use much less ... just enough to make it work.
399
400#if defined(ARCH_V9) || defined(ARCH_X64)
401 L1BITS = 23, // 2^L1BITS of
402 L2BITS = 32, // 4GB of NORESERVED mmapped space
403#elif defined(ARCH_V8)
404 L1BITS = 29, // In v8 mode we have less va space ...
405 L2BITS = 19, // 1MB of NORESERVED mmapped chunks
406#else
407#error "Oops"
408#endif
409 L1SIZE = 1 << L1BITS,
410 L1MASK = L1SIZE - 1
411 };
412
413 uint8_t* get_ptr( uint64_t addr )
414 {
415 uint8_t** o1 = l1 + ((addr >> L2BITS) & L1MASK);
416 uint8_t* l2 = *o1;
417 if (l2 == 0)
418 {
419 l2lock.lock();
420 if (*o1 == 0)
421 {
422 l2 = *o1 = (uint8_t*)mmap((char*)0,1ull << L2BITS,PROT_READ|PROT_WRITE,
423 MAP_NORESERVE|MAP_PRIVATE|MAP_ANON|MAP_ALIGN,-1,0);
424 }
425 l2lock.unlock();
426 }
427 return l2 + (addr & uint64_t((1ull << L2BITS) - 1));
428 }
429
430 uint64_t ram_size;
431 uint64_t rom_size;
432 uint64_t page_size;
433 uint64_t page_mask;
434
435 uint8_t** l1;
436 BL_Mutex l2lock;
437};
438
439#endif /* COMPILE_FOR_SAM */
440#endif /* __SS_FastMemory_h__ */