Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / sam / cpus / vonk / ss / api / memsync / src / MemorySync.cc
CommitLineData
920dae64
AT
1// ========== Copyright Header Begin ==========================================
2//
3// OpenSPARC T2 Processor File: MemorySync.cc
4// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
5// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
6//
7// The above named program is free software; you can redistribute it and/or
8// modify it under the terms of the GNU General Public
9// License version 2 as published by the Free Software Foundation.
10//
11// The above named program is distributed in the hope that it will be
12// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14// General Public License for more details.
15//
16// You should have received a copy of the GNU General Public
17// License along with this work; if not, write to the Free Software
18// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19//
20// ========== Copyright Header End ============================================
21/************************************************************************
22**
23** Copyright (C) 2002, Sun Microsystems, Inc.
24**
25** Sun considers its source code as an unpublished, proprietary
26** trade secret and it is available only under strict license provisions.
27** This copyright notice is placed here only to protect Sun in the event
28** the source is deemed a published work. Disassembly, decompilation,
29** or other means of reducing the object code to human readable form
30** is prohibited by the license agreement under which this code is
31** provided to the user or company in possession of this copy."
32**
33*************************************************************************/
34#include "MemorySync.h"
35#include <sstream>
36#include <cstdlib>
37#include <time.h>
38#include "MemoryTransaction.h"
39#include "SS_Strand.h"
40
41
42using namespace std;
43////////////////////////////////////////////////
44typedef void (MemorySync::* mem_type)(MemoryTransaction&);
45
46// static variable
47MemorySync* MemorySync::msyncObj = NULL;
48// static function, dump buffer content
49string MemorySync::dumpBuffers()
50{
51 if (MemorySync::msyncObj) {
52 return MemorySync::msyncObj->toString();
53 }
54 else {
55 return string("");
56 }
57}
58
59MemorySync::MemorySync() : addrTrans(NULL), socket(NULL)
60{
61}
62
63MemorySync::MemorySync(int max_strands, int strands_per_core, int cores_per_cpu, int memDebug, int tsoChecker, int callback) :
64 memDebug_(memDebug),
65 addrTrans(NULL)
66{
67 int i;
68
69 // keep this in sas.log, so it is easier to check whether msync is enabled
70 // or not.
71 time_t clock;
72 time(&clock);
73
74 rif_.setMaxStrands(max_strands);
75 rif_.setCoreStrands(strands_per_core);
76 rif_.setCpuCores(cores_per_cpu);
77 mab_.setRieslingInterface(&rif_);
78
79 ldb_ = new LoadStoreBuffer[MAX_STRANDS];
80 stb_ = new LoadStoreBuffer[MAX_STRANDS];
81 retStb_ = new LoadStoreBuffer[MAX_STRANDS];
82 rmoStb_ = new LoadStoreBuffer[MAX_STRANDS];
83 ifb_ = new LoadStoreBuffer[MAX_STRANDS];
84
85 iseq_ = new uint64_t[MAX_STRANDS];
86
87 for (i = 0; i < MAX_STRANDS; i++) {
88 ldb_[i].setBufName((char*) "LoadBuffer");
89 stb_[i].setBufName((char*) "StoreBuffer");
90 retStb_[i].setBufName((char*) "RetiredBuffer");
91 rmoStb_[i].setBufName((char*) "RMOBuffer");
92 ifb_[i].setBufName((char*) "FetchBuffer");
93 iseq_[i] = 1;
94 }
95
96 /**
97 * set Memory preMemoryAccess and postMemoryAccess function pointer
98 */
99 if (callback > 0)
100 {
101 SS_Memory::memory.msync_object = this;
102 SS_Memory::memory.msync_pre_access = pre_memory_access;
103 SS_Memory::memory.msync_post_access = post_memory_access;
104
105 SS_Io::io.msync_object = this;
106 SS_Io::io.msync_pre_access = pre_memory_access;
107 SS_Io::io.msync_post_access = post_memory_access;
108 }
109
110 MemorySyncMessage::debugLevel = memDebug;
111 if ((tsoChecker > 0) && (callback > 0)) {
112 // tsoChecker can become heavy in a multi-strand environment, use it
113 // with caution. By default tsoChecker is off, use
114 // -sas_run_args=-DTSO_CHECKER to enable it.
115 tsoChecker_ = new TsoChecker();
116 tsoChecker_->init(max_strands);
117 }
118 else {
119 tsoChecker_ = NULL;
120 }
121
122 if (MemorySync::msyncObj == NULL) {
123 MemorySync::msyncObj = this;
124 }
125
126 // by default, only core_0 of node_0 is enabled and is able to produce
127 // cache invalidation request.
128 inv_vec_mask[0] = 0x1;
129 for (int i = 1; i < INV_VEC_SIZE; i++)
130 inv_vec_mask[i] = 0x0;
131}
132
133////////////////////////////////////////////////
134
135MemorySync::MemorySync( const MemorySync & orig )
136{
137
138}
139
140////////////////////////////////////////////////
141
142MemorySync::~MemorySync()
143{
144}
145
146////////////////////////////////////////////////
147
148const MemorySync &
149MemorySync::operator=( const MemorySync & rhs )
150{
151 // Replace the following line with your function body.
152 // RIESLING_THROW_DOMAIN_ERROR( "Unimplemented function." );
153
154 return *this;
155}
156
157////////////////////////////////////////////////
158
159bool
160MemorySync::operator==( const MemorySync & rhs ) const
161{
162 // Replace the following line with your function body.
163 // RIESLING_THROW_DOMAIN_ERROR( "Unimplemented function." );
164 return false;
165}
166
167////////////////////////////////////////////////
168
169string
170MemorySync::toString() const
171{
172 ostringstream os;
173
174 os << "mab: " << mab_.toString();
175 for (int i = 0; i < MAX_STRANDS; i++) {
176 if (ldb_[i].size() > 0) {
177 os << "ldb[" << i << "]: " << ldb_[i].toString();
178 }
179 if (stb_[i].size() > 0) {
180 os << "stb[" << i << "]: " << stb_[i].toString();
181 }
182 if (retStb_[i].size() > 0) {
183 os << "retStb[" << i << "]: " << retStb_[i].toString();
184 }
185 if (rmoStb_[i].size() > 0) {
186 os << "rmoStb[" << i << "]: " << rmoStb_[i].toString();
187 }
188 if (ifb_[i].size() > 0) {
189 os << "ifb[" << i << "]: " << ifb_[i].toString();
190 }
191 }
192
193 return os.str();
194}
195
196/******************************************************************************
197 * This implementation assumes each load sends only one LoadIssue command.
198 * If an environment prefers to send 8 LoadIssues for a Block-Load and 2
199 * LoadIssues for a Quad-Load, then the code can be simplied to handle all
200 * types of load like the default case.
201 ******************************************************************************/
202void
203MemorySync::handleLoadIssue(LoadStoreCmd& cmd)
204{
205 if (addrTrans)
206 {
207 cmd.setAddr(addrTrans(cmd.getAddr()));
208 }
209 list<LoadStoreEntry>::iterator ii;
210 LoadStoreEntry* entry;
211 LoadStoreBuffer& ldb = ldb_[cmd.getThrdId()];
212
213#ifdef N2MODEL
214 /******************************************************************************
215 * In general, the RTL should send the data source along with the LoadData.
216 * However, N2 made a special request to also check STB bypassing in Memory
217 * Sync model for double check RTL function. It was told to check this STB
218 * bypassing when receiving LoadData command. However, it turns out that the
219 * RTL actually checks STB bypassing at LoadIssue. (5/17/04). Hence, this
220 * function is moved to here. This is one case of drawback that a model is too
221 * microarchitecture-dependent. Accompany with this change, they also change
222 * their STB bypass check criteria. Hence, my code.
223 ******************************************************************************/
224 list<LoadStoreEntry>::iterator ste;
225 LoadStoreBuffer& stb = stb_[cmd.getThrdId()];
226
227 if (!cmd.isIO() &&
228 cmd.getItype() != ITYPE_BLOCK_LOAD &&
229 cmd.getItype() != ITYPE_DOUBLE_LOAD && // added 6/1/04 TPS
230 cmd.getItype() != ITYPE_QUAD_LOAD &&
231 cmd.getItype() != ITYPE_ATOMIC ) {
232 ste = stb.findN2DataBypassing(cmd.getAddr(), cmd.getSizeV());
233 if (ste != stb.end()) { // found in STB
234// if (cmd.getDsrc() != DSRC_STB) {
235// MS_ERROR("MemorySync found the data in STB; while RTL's data is from %s", mmdsrc[cmd.getDsrc()]);
236// }
237 cmd.setDsrc(DSRC_STB);
238 } else { // else find data from L2_MEMORY
239// if (cmd.getDsrc() == DSRC_STB) {
240// MS_ERROR("RTL indicates data is from STB; while MemorySync does not find the data in STB");
241// }
242 cmd.setDsrc(DSRC_NONE);
243 }
244 }
245#endif
246
247 // uint64_t iseq = iSeq(cmd.getThrdId());
248 uint64_t iseq;
249
250 switch (cmd.getItype()) {
251 case ITYPE_BLOCK_LOAD:
252 cmd.setSize(8);
253 for (int i = 0; i < 8; i++) {
254 entry = new LoadStoreEntry(cmd);
255 entry->setIseq(iSeq(cmd.getThrdId())); // each entry set different iseq
256 ii = ldb.pushBack(*entry);
257 cmd.setAddr(cmd.getAddr() + 8ull);
258 MSYNC_DEBUG(2, "Last Enter to LDB %s", ii->toString().c_str());
259 }
260 break;
261 case ITYPE_QUAD_LOAD:
262 iseq = iSeq(cmd.getThrdId());
263 cmd.setSize(8);
264 for (int i = 0; i < 2; i++) {
265 entry = new LoadStoreEntry(cmd);
266 entry->setIseq(iseq); // each entry set same iseq
267 ii = ldb.pushBack(*entry);
268 cmd.setAddr(cmd.getAddr() + 8ull);
269 MSYNC_DEBUG(2, "Last Enter to LDB %s", ii->toString().c_str());
270 }
271 break;
272 default:
273 entry = new LoadStoreEntry(cmd);
274 entry->setIseq(iSeq(cmd.getThrdId()));
275 ii = ldb.pushBack(*entry);
276 MSYNC_DEBUG(2, "Last Enter to LDB %s", ii->toString().c_str());
277 break;
278 }
279}
280
281void
282MemorySync::handleLoadData(LoadStoreCmd& cmd)
283{
284 if (addrTrans)
285 {
286 cmd.setAddr(addrTrans(cmd.getAddr()));
287 }
288 list<MemoryAccessEntry>::iterator mlink;
289 list<LoadStoreEntry>::iterator lde;
290 list<LoadStoreEntry>::iterator ste;
291 MemoryAccessEntry* mae;
292 int nMaePerLoadData;
293 uint32_t cid = cmd.getCoreId();
294 uint32_t tid = cmd.getThrdId();
295 enum DATA_SRC dsrc = cmd.getDsrc();
296
297 LoadStoreBuffer& ldb = ldb_[tid];
298
299 /******************************************************************************
300 * Find corresponding load instruction in the Load Buffer
301 * - Note that <address> and <size> are generally not countable since if the
302 * access is to L2, usually only address bits necessary to access cache lines
303 * are observable in the RTL, so does the size parameter.
304 * - <id> is certainly the most dependable way of matching. However, machines
305 * that only allow one outstanding load may consider sending this info over
306 * a un-necessary effort.
307 *
308 * This implementation searches the load buffer for a LoadData by finding the
309 * oldest one that has not yet been associated with a LoadData and whose <id>
310 * and <address> match with the <id> and <address> of the LoadData, where the
311 * <address> comparison ignores the 3 least significant bits.
312 *
313 * In case the <id> is not implemented, then the mechanism still works as long
314 * as the access to the same cache line (or relax more, the same 8-byte block)
315 * results in in-order LoadData. Machines allowing only one outstanding load
316 * certainly meet this condition. I believe many machines that allows multiple
317 * outstanding loads should also meet this criteria. Note that the environment
318 * that does not implement <id> should always set this field 0.
319 ******************************************************************************/
320 lde = ldb.find1stNoLink (cmd.getId(), cmd.getAddr(), BLK_ADDR_MASK);
321 if (lde == ldb_[tid].end())
322 {
323 MS_ERROR("LoadData failed to find matched LoadIssue. tid=%d PA=%llx", tid, cmd.getAddr());
324 return;
325 }
326
327 /* Assign size info to the cmd from load buffer so that the MEM_LOAD_DATA
328 can have exact data size. This satisfies two purposes: (1) N2 requests
329 to remove <size> info from LoadData command since it is hard for them
330 to probe correct <size> info in multi-core environment. (6/22/04) (2)
331 the TsoChecker needs to have correct <size> info for load. */
332 cmd.setSize(lde->getSize());
333 cmd.setSizeV(lde->getSizeV());
334
335#ifdef N2MODEL_REMOVE
336 /******************************************************************************
337 * In general, the RTL should send the data source along with the LoadData.
338 * However, N2 made a special request to only indicate whether the data is
339 * from L1$ or not. If the data is not from L1$, then the Memory Sync Model
340 * will have to find out whether the data is in the store buffer. Also, in
341 * N2, one LoadData represent 16 bytes of data for block load and quad load.
342 * ^^^^^^^^^^^ This check moves to LoadIssue. See note in LoadIssue. 5/17/04
343 ******************************************************************************/
344 /******************************************************************************
345 * Four types of accesses in N2 will never get data from the store buffer.
346 * They are IO, Block Load, Qual Load, and atomic accesses.
347 ******************************************************************************/
348 if (dsrc == DSRC_L2_MEMORY &&
349 !cmd.isIO() &&
350 lde->getItype() != ITYPE_BLOCK_LOAD &&
351 lde->getItype() != ITYPE_QUAD_LOAD &&
352 lde->getItype() != ITYPE_ATOMIC ) {
353 ste = stb_[tid].findDataBypassing(cmd.getAddr(), cmd.getSizeV());
354 if (ste != stb_[tid].end()) { // find in STB
355 dsrc = DSRC_STB;
356 } // else find data from L2_MEMORY
357 }
358#endif
359
360#ifdef N2MODEL
361 /* If the STB hit at LoadIssue, then the LoadData data source is ignored. */
362 if (lde->getDsrc() == DSRC_STB) {
363 if (dsrc != DSRC_STB) {
364 MS_ERROR("MemorySync found the data in STB; while RTL's data is from %s", mmdsrc[cmd.getDsrc()]);
365 return;
366 }
367 } else { // else find data from L2_MEMORY
368 if (dsrc == DSRC_STB) {
369 MS_ERROR("RTL indicates data is from STB; while MemorySync does not find the data in STB");
370 return;
371 }
372 }
373
374 if (lde->getItype() == ITYPE_BLOCK_LOAD ||
375 lde->getItype() == ITYPE_QUAD_LOAD) {
376 nMaePerLoadData = 2;
377 } else {
378 nMaePerLoadData = 1;
379 }
380#else
381 nMaePerLoadData = 1;
382#endif
383
384 mae = new MemoryAccessEntry (cmd);
385 mae->setItype(lde->getItype());
386 mae->setDsrc(dsrc);
387 mae->setIseq(lde->getIseq());
388 mlink = mab_.pushBack(*mae);
389 lde->setLink(mlink); // load buffer entry has link to the correponding MAB entry
390
391 /**************************************
392 * if (LDDATA & dsrc=3),
393 * then scan MAB for any STCOM to this PA. STCOM inv_vect does not matter in this case.
394 *
395 * if (LDDATA & dsrc=2)
396 * then scan MAB for STCOM
397 * if (STCOM inv_vect[mycore]=0), then use this Store
398 * else if (STCOM inv_vect[mycore]=1) and (STINV received), then use this Store
399 * else if (STCOM inv_vect[mycore]=1) and (STINV not received), then can't use this Store, keep scanning MAB.
400 *
401 * where mycore is the core number of the LDDATA.
402 */
403
404 switch (dsrc) {
405 case DSRC_STB:
406 {
407 LoadStoreBuffer& stb = stb_[tid];
408 LoadStoreBuffer& rstb = retStb_[tid];
409
410 /* find data from store buffer */
411
412 /******************************************************************************
413 * STB bypassing is checked different for N1 and N2
414 * (also see comments in how to model stores)
415 * o N1 checks both stb_ and retStb_. It checks retired_buf_ becuase there
416 * is still a timing gap allowed for bypassing after a StoreAck is received
417 * in N1. This implementation does not check rmoStb_, however. The assumption
418 * is that no STB bypassing of the rmo store data is possible. In theory, this
419 * assumption is not always true. The timing gap mentioned above should also
420 * possible in RMO case. In general, if a load would like to use the data of
421 * a RMO store, a membar instruction should be introduced. If this is strictly
422 * followed, then there is no any problem not to check rmoStb_. Diags may
423 * fail to guarantee this, the implementation takes the risk any way.
424 * o N2 checks only stb_. It must be there since it is checked before by the
425 * program.
426 ******************************************************************************/
427 ste = stb.findDataBypassing(cmd.getAddr(), cmd.getSizeV());
428 if (ste == stb.end()) { // not in STB
429 ste = rstb.findDataBypassing(cmd.getAddr(), cmd.getSizeV()); // try retired_stb
430 if (ste == rstb.end())
431 {
432 MS_ERROR("LoadData failed to find matched STB bypassing entry. tid=%d PA=%llx", tid, cmd.getAddr());
433 return;
434 }
435
436 MSYNC_DEBUG(1, "Store Bypassing gets data from Retired Buffer %s",
437 ste->toString().c_str());
438 }
439 /* set data in the memory access entry */
440#ifdef N2MODEL
441 if (!ste->isDataValid())
442 {
443 MS_ERROR("LoadData finds Store Buffer data is not ready. tid=%d PA=%llx", tid, cmd.getAddr());
444 return;
445 }
446#endif
447 // In N1 swerver-memory.cc environment, the data may not be ready at this time
448 if (ste->isDataValid()) {
449 mlink->setData(ste->getData());
450 } else { // N2 should not go this path
451 mlink->setData(0ull); // set arbitrary data
452 ste->setLink2(mlink); // when data is ready, it can also set the MAB entry
453 }
454 mlink->setDsrcMid(mlink->getId());
455 break;
456 }
457 case DSRC_L1:
458 {
459 list<MemoryAccessEntry>::iterator dsrcMlink;
460
461 dsrcMlink = mab_.findL1DataEntry(cmd);
462 if (dsrcMlink == mab_.end()) {
463 mlink->setData(rif_.readMemory(cid, tid, cmd.getAddr() & ADDR_MASK, 8)); // read aligned 8 bytes
464 // mlink->setDsrcMid(mlink->getId());
465 mlink->setDsrcMid(mab_.begin()->getId() - 1); // source is from before MAB head
466 } else {
467 mlink->setData(dsrcMlink->getData());
468 mlink->setDsrcMid(dsrcMlink->getDsrcMid());
469 }
470 break;
471 }
472 case DSRC_L2_MEMORY:
473 // if it is a I/O address, let's defer the data setting til the
474 // corresponding load instruction is invoked, where we may pick up a
475 // (follow-me) CSR_READ
476 if ((cmd.getAddr() & IO_ADDR_BIT_MASK) != IO_ADDR_BIT_MASK)
477 {
478 // not an I/O address, should set the value here so that entries
479 // after this may use it.
480 mlink->setData(getL2Data(mab_.end(), cmd));
481 }
482 mlink->setDsrcMid(mlink->getId());
483
484 if (nMaePerLoadData > 1) {
485 for (int i = 1; i < nMaePerLoadData; i++) {
486 lde++;
487 cmd.setAddr(cmd.getAddr() + 8); // must be 8 byte per access if nMaePerLoadData > 1
488 if (lde == ldb_[tid].end())
489 {
490 MS_ERROR("LoadData failed to find matched LoadIssue. tid=%d PA=%llx", tid, cmd.getAddr());
491 return;
492 }
493 MSYNC_DEBUG(2, "lde->getAddr()=%llx, cmd.getAddr()=%llx",
494 lde->getAddr(), cmd.getAddr());
495 if (lde->getAddr() != cmd.getAddr())
496 {
497 MS_ERROR("Mismatch address between LoadData and LoadIssue. tid=%d PA=%llx", tid, cmd.getAddr());
498 return;
499 }
500
501 mae = new MemoryAccessEntry (cmd);
502 mae->setItype(lde->getItype());
503 mae->setDsrc(dsrc);
504 // if it is not a I/O address, ditto
505 if ((cmd.getAddr() & IO_ADDR_BIT_MASK) != IO_ADDR_BIT_MASK)
506 {
507 // ditto
508 mae->setData(getL2Data(mlink, cmd));
509 }
510 mae->setDsrcMid(mae->getId());
511 mae->setIseq(lde->getIseq());
512 mlink = mab_.pushBack(*mae);
513 lde->setLink(mlink); // load buffer entry has link to the correponding MAB entry
514 }
515 }
516 break;
517 default:
518 MS_ERROR("LoadData receives unknown data source. tid=%d PA=%llx", tid, cmd.getAddr());
519 return;
520 }
521}
522
523void
524MemorySync::handleLoadFill (LoadStoreCmd& cmd)
525{
526 if (addrTrans)
527 {
528 cmd.setAddr(addrTrans(cmd.getAddr()));
529 }
530 list<MemoryAccessEntry>::iterator mlink, newlink;
531 MemoryAccessEntry entry (cmd);
532
533 mlink = mab_.findLoadFillSrc(cmd);
534 if (mlink == mab_.end())
535 {
536 MS_ERROR("LoadFill failed to find matched LoadData. tid=%d PA=%llx", cmd.getThrdId(), cmd.getAddr());
537 return;
538 }
539// //TODO this assumes the corresponding load entry is popped by LOAD_POP,
540// // but it can be an error instead, so need a better solution.
541// if (mlink == mab_.end()) return;
542
543 entry.setData(mlink->getData());
544 entry.setExecuted(true); // set for being retired from MAB when it reaches to head
545 entry.setLink(mlink);
546 entry.setDsrcMid(mlink->getId());
547 newlink = mab_.pushBack(entry);
548 mlink->setLink(newlink);
549
550 /* Also get the other part of L1$ line data, TPS 4/22/04 */
551 MemoryAccessEntry* e;
552 uint64_t line_addr_mask = ~((uint64_t) (DCACHE_LINE_SIZE - 1));
553 uint64_t addr = cmd.getAddr() & line_addr_mask;
554
555 for (int i = 0; i < DCACHE_LINE_SIZE/8; i++, addr += 8) {
556 if ((mlink->getAddr() & ADDR_MASK) == (addr & ADDR_MASK)) {
557 continue;
558 }
559
560 cmd.setAddr(addr);
561 e = new MemoryAccessEntry (cmd);
562 e->setData(getL2Data(mlink, cmd)); // the same line must get data from the same time
563 e->setExecuted(true); // set true so that when it reach to head, it can be removed
564 e->setLink(mlink); // set to mlink indicating its search starts from here
565 e->setDsrcMid(mlink->getId());
566 mab_.pushBack(*e);
567 }
568}
569
570/******************************************************************************
571 * %%% Store Behavior
572 *
573 * %% General Implementation Note
574 *
575 * - StoreAck
576 * By definition, StoreAck is sent when the corresponding StoreIssue is to be
577 * removed from the Store Buffer so that no STB bypassing from this entry is
578 * possible.
579 *
580 * However, N1 still has a timing window that the STB bypassing is allowed
581 * after the StoreAck is sent. Hence, a retStb_ is added to temporarily
582 * hold the Acked entries.
583 *
584 * N2 does obey this rule, its behavior is described below:
585 * 1. Normal store
586 * In Niagara 2, the design of the StoreAck can be abstracted as follows:
587 * StoreAcks are queued, and are released from the queue in order. If a
588 * StoreAck also causes store update or invalidation, the StoreAck will be
589 * held in the queue until its update/invalidation is done.
590 *
591 * Upon a StoreAck is released from the queue, a StoreAck is sent to the
592 * Memory Sync model followed by, if any, a StoreUpdate or StoreInv. Hence,
593 * the Memory Sync model should see in-order StoreAcks.
594 *
595 * For normal store, StoreAck is always after StoreCommit.
596 *
597 * 2. RMO Store (Block Store, stores using *_BLOCK_INIT_ST_QUAD_LDD_*)
598 * By definition, these stores do not check data dependence, and follow RMO
599 * memory consistency model. They leave the StoreBuffer when they are issued.
600 * A StoreAck will be sent to reflect this fact. Hence, this type of stores
601 * should have StoreAck before its StoreCommit. A data structure, rmoStb_
602 * is introduced to hold these stores after they are removed from the store
603 * buffer so that the StoreCommit can still find the matched entry to extract
604 * some needed info from StoreIssue.
605 *
606 * - Order Between STEP and StoreAck
607 * Note that this implementation does not assume the order of a STEP and its
608 * corresonding StoreAck. Different CPU can have different implementation.
609 * The following are some cases:
610 * > Normal stores usually have their STEP before StoreAck.
611 * > Atomic alwyas have STEP after StoreAck in N2, but may have STEP before
612 * StoreAck in N1.
613 * > RMO Stores could have StoreAck before or after STEP.
614 *
615 *
616 * %% MemorySync Store Model
617 *
618 * - When a StoreIssue is received, an entry is place into store buffer
619 *
620 * - When a StoreAck is received, the corresponding StoreIssue must be at the head
621 * of the STB. The entry is then moved to
622 * o rmoStb_[tid], if cmd.isRMOstore() is true
623 * o retStb_[tid], otherwise
624 *
625 * - When a StoreCommit is received, the program searches for both stb_[tid]
626 * (must be in-order) and rmo_stb[tid] (may be out-of-order). Normal stores
627 * must obey TSO, hence, N1/N2 cannot issue next store, unless in the same
628 * cache line, until a StoreAck is received. Therefore, the StoreCommit must
629 * be in-order.
630 * However, RMO stores follow RMO memory consistency model. A RMO store can
631 * be issued from the store buffer without waiting for the commit of a prior
632 * RMO store. As a result, the MemorySync may see out-of-order StoreCommit
633 * for RMO stores since stores access different banks may commit out-of issue
634 * order. Nevertheless, accesses to the same bank should still follow issue order.
635 *
636 * In summay, in either case, StoreCommit for the same cache line should be
637 * in-order, which is the assumption made to match StoreCommit-StoreIssue in
638 * handleStoreCommit() method.
639 *
640 * - When a STEP (callback) is received, the program searches for both stb_[tid]
641 * (must be in-order) and rmo_stb[tid] (out-of-order or in-order ?).
642 *
643 * - When a StoreUpdate/StoreInv is received, it finds the StoreCommit that
644 * initiates this StoreUpdate/StoreInv. No need to match it with entry in
645 * either stb_, rmoStb_, or retStb_.
646 *
647 * => a rmoStb_ entry is removed when it has issued to L2 and is executed
648 * => a retire_stb_ entry is removed when a new entry is entered and the
649 * retire_stb_ size is greater than 2 (2 is a arbitrary number since this
650 * retire_stb_ is for N1 compatibility to keep the entry enough time for
651 * STB bypassing)
652 ******************************************************************************/
653
654void
655MemorySync::handleStoreIssue(LoadStoreCmd& cmd)
656{
657 if (addrTrans)
658 {
659 cmd.setAddr(addrTrans(cmd.getAddr()));
660 }
661 list<LoadStoreEntry>::iterator ii;
662 LoadStoreEntry ste (cmd);
663
664#ifndef N1MODEL // <data> is expected, however, N1 does not have data at this moment
665 ste.setState(LS_TDATA);
666#endif
667 if (cmd.getItype() == ITYPE_ATOMIC) { // assume atomic-store always directly
668 ste.setIseq(getIseq(cmd.getThrdId()) - 1); // follows atomic-load
669 } else {
670 ste.setIseq(iSeq(cmd.getThrdId()));
671 }
672 ii = stb_[cmd.getThrdId()].pushBack(ste);
673
674 MSYNC_DEBUG(2, "Last Enter to STB %s", ii->toString().c_str());
675}
676
677void
678MemorySync::handleStoreCommit (LoadStoreCmd& cmd)
679{
680 if (addrTrans)
681 {
682 cmd.setAddr(addrTrans(cmd.getAddr()));
683 }
684 int tid = cmd.getThrdId();
685 list<MemoryAccessEntry>::iterator mlink;
686 list<LoadStoreEntry>::iterator slink;
687 LoadStoreBuffer& stb = stb_[tid];
688 LoadStoreBuffer& rmostb = rmoStb_[tid];
689 MemoryAccessEntry entry (cmd);
690 uint64_t mdata; // mab data
691 uint64_t sdata; // stb data
692 uint8_t mszv; // size vector
693 uint8_t sszv; // size vector
694 bool zeroCacheLine;
695
696 /******************************************************************************
697 * Similar to the reason in LoadData, when matching the StoreCommit to
698 * StoreIssue, the implementation searches the store buffer by finding the
699 * oldest one that has not yet been associated with a StoreCommit and whose <id>
700 * and <address> match with the <id> and <address> of the StoreCommit, where the
701 * <address> comparison ignores the 3 least significant bits.
702 *
703 * When the <id> is not implemented, this mechanism works as long as the access
704 * to the same cache line (or relax more, the same 8-byte block) results in
705 * in-order StoreCommit. This condition should be true for cacheable access.
706 * For non-cacheable access, then whether the condition is met depends on the
707 * implementation decision. The guess is yes. For I/O, Sun's CPU design will
708 * most likely to enforce SYNC before and after. Note that the environment
709 * that does not implement <id> should always set this field 0.
710 ******************************************************************************/
711
712#ifdef N3MODEL // N1 assume RMO store's StoreAck is after StoreCommit as other store
713 slink = stb.find1stNoLink (cmd.getId(), cmd.getAddr(), BLK_ADDR_MASK);
714 if (slink == stb.end())
715 {
716 MS_ERROR("StoreCommit failed to find matched StoreIssue. tid=%d PA=%llx", tid, cmd.getAddr());
717 return;
718 }
719 slink->setState(LS_ISSUE);
720
721#else // N2's RMO store has StoreAck before StoreCommit
722 slink = rmostb.find1stNoLink (cmd.getId(), cmd.getAddr(), BLK_ADDR_MASK);
723 if (slink == rmostb.end()) {
724 slink = stb.find1stNoLink (cmd.getId(), cmd.getAddr(), BLK_ADDR_MASK);
725 if (slink == stb.end())
726 {
727 MS_ERROR("StoreCommit failed to find matched StoreIssue. tid=%d PA=%llx", tid, cmd.getAddr());
728 return;
729 }
730 slink->setState(LS_ISSUE);
731 } else {
732 entry.setAcked(true);
733 }
734#endif
735 /******************************************************************************
736 * Special handling for BLK_INIT store that in a certain case will zero the
737 * whole case line. This type of instruction is supported in N1 and N2.
738 ******************************************************************************/
739 if ((slink->getItype() == ITYPE_STORE_INIT) &&
740 !slink->isIO() && ((slink->getAddr() & 0x3f) == 0) && !cmd.isL2hit()) {
741 zeroCacheLine = true;
742 } else {
743 zeroCacheLine = false;
744 }
745
746 /* merge data and saved the merged data in the correponding memory access entry */
747 sdata = slink->getData();
748
749 /* Origianl, sizeV == 0 indicates CAS comparison false. Change to use a separate flag
750 to indicate this status. 9/10/04 */
751 // sszv = (cmd.getSizeV() == 0) ? 0 : slink->getSizeV(); // size_vector = 0 in CMD => cas comparison false
752 sszv = (!cmd.isSwitchData()) ? 0 : slink->getSizeV(); // !isSwitchData() => cas comparison false
753
754 entry.setMerged(false);
755 if (zeroCacheLine) { // special case for STORE_INIT
756 mdata = merge (0ull, sdata, sszv);
757 } else {
758 if (slink->getSize() == 8 && sszv == 0xff) { // no merge needed
759 mdata = sdata;
760 } else { // sszv = 0 still need to get correct data in the entry
761 mdata = getL2Data(mab_.end(), cmd);
762 mdata = merge (mdata, sdata, sszv);
763 // keep merge-related data in case we need to repeat it (with a different
764 // mdata)
765 entry.setMerged(true);
766 entry.setOrigData(sdata);
767 entry.setOrigSizeV(sszv);
768 }
769 }
770 entry.setData(mdata);
771 entry.setExecuted(slink->isExecuted());
772
773 if (sszv == 0) entry.setSizeV(0); // This is possible in N1 for CAS
774
775 entry.setItype(slink->getItype());
776 entry.setIseq(slink->getIseq());
777 mlink = mab_.pushBack(entry);
778 mlink->setLink(mlink); // point to itself
779 slink->setLink(mlink); // STB entry's link pointed to the STORE_COMMIT in MAB
780 // if inv_vec is not zero, re-adjust it to mask out sniper related
781 // invalidation, because sniper does not generate related inv command,
782 // otheriwse the inv_vec bit(s) related to sniper will stay on, and the
783 // store_commit will stay in mab forever.
784 if (mlink->getInv())
785 {
786 uint32_t mask = 0;
787 for (int i=0; i<4; i++)
788 {
789 mask |= (((uint32_t)inv_vec_mask[i]) << (i*8));
790 }
791 mlink->setInv(mlink->getInv() & mask);
792 }
793
794 if (zeroCacheLine) { // Add StoreCommit entries with 0 data for the l2 cache line
795 MemoryAccessEntry* zero_entry;
796 for (int i = 1; i < L2CACHE_LINE_SIZE/8; i++) {
797 zero_entry = new MemoryAccessEntry (cmd);
798 zero_entry->setAddr(slink->getAddr() + (i << 3));
799 zero_entry->setData(0ull);
800 zero_entry->setExecuted(true);
801 zero_entry->setSizeV(0xff);
802 zero_entry->setInv(0);
803 zero_entry->setAcked(true);
804 zero_entry->setItype(ITYPE_STORE_INIT);
805 zero_entry->setIseq(slink->getIseq());
806 mlink = mab_.pushBack(*zero_entry);
807 mlink->setLink(mlink); // point to itself
808 }
809 }
810
811 /* Manage the rmoStb_ */
812 if (slink->isRMOstore() && slink->isExecuted()) {
813 rmostb.erase(slink);
814 }
815
816 /* Added to speed up the drain of MAB - 6/3/04 TPS */
817 vector<MemoryAccessEntry> wdbuf;
818
819 mab_.popFront(&wdbuf, tsoChecker_);
820 bufWriteMemory(wdbuf);
821}
822
823void
824MemorySync::handleStoreAck (LoadStoreCmd& cmd)
825{
826 if (addrTrans)
827 {
828 cmd.setAddr(addrTrans(cmd.getAddr()));
829 }
830 uint32_t tid = cmd.getThrdId();
831 LoadStoreBuffer& stb = stb_[tid];
832 list<LoadStoreEntry>::iterator ii;
833
834 MSYNC_DEBUG(2, "stb=%s", stb.toString().c_str());
835
836 /******************************************************************************
837 * In Niagara 1, the StoreAck is issued in-order, but they may
838 * Note that in N1, rmo stores, BLK_INIT store and BLOCK_STORE, are Acked same
839 * as other stores. Since rmo stores can be issued to L2 without waiting for
840 * the ACK before issuing another one, their StoreAcks can be out-of-order if
841 * they are from different banks.
842 * In N2, StoreAck for a rmo store is sent when it is released from the store
843 * buffer. Hence, it is still in order.
844 *
845 * The N1MODEL may have a potential problem. STORE_ACK CMD has only address,
846 * N1's swerver-memory.cc uses address and size for searching. So, a <size>
847 * field may be need in my specification. However, as the development of this
848 * code, N1 is not a target to support.
849 ******************************************************************************/
850#ifdef N1MODEL // cannot just search by address, entries may have same address
851 ii = stb.findMatchO2Y(cmd.getAddr(), cmd.isRMOstore(), FULL_ADDR_MASK);
852 if (ii == stb.end())
853 {
854 MS_ERROR("StoreAck failed to find matched StoreIssue. tid=%d PA=%llx", tid, cmd.getAddr());
855 return;
856 }
857#else // N2 does not send address, and guarantees to send StoreAck in order
858 ii = stb.begin();
859 if (ii == stb.end())
860 {
861 MS_ERROR("StoreAck sees empty STB. tid=%d PA=%llx", tid, cmd.getAddr());
862 return;
863 }
864#endif
865
866#ifdef N1MODEL // N2's StoreAck does not have address
867 MSYNC_DEBUG (2, "addr-stb=%#llx addr-cmd=%#llx", ii->getAddr(), cmd.getAddr());
868 if (ii->getAddr() != cmd.getAddr())
869 {
870 MS_ERROR("StoreAck is issued out-of-order, should be in-order. tid=%d PA=%llx", tid, cmd.getAddr());
871 return;
872 }
873#endif
874
875 /* In n1 swerer-memory.cc, if the atomic, block store, or store init needs
876 not to invalidate the L1$, then they are also copied to the thrd_retired_buf.
877 Removing the conditions below for compatibility. For cases that not copying
878 into the thrd_retired_buf, the code is inserted with the RetireBuffer.popBack()
879 to remove the entry. */
880// if (ii->isExecuted() && ii->getItype() != ITYPE_ATOMIC &&
881// ii->getItype() != ITYPE_BLOCK_STORE && ii->getItype() != ITYPE_STORE_INIT) {
882
883 if (cmd.isRMOstore()) {
884 /* No state change for RMO store whose StoreAck is before StoreCommit */
885 if (!ii->isRMOstore())
886 {
887 MS_ERROR("RMO StoreAck find non-RMO entry in STB. tid=%d PA=%llx", tid, cmd.getAddr());
888 return;
889 }
890 rmoStb_[tid].pushBack(*ii);
891 } else {
892 ii->setState(LS_ACK);
893 if (!ii->isExecuted() && (ii->getItype() != ITYPE_ATOMIC))
894 {
895 MS_ERROR("StoreAck before SSTEP for non-atomic non-RMO store. tid=%d PA=%llx", tid, cmd.getAddr());
896 return;
897 }
898 retStb_[tid].pushBack(*ii); // move to retired store buffer, for N1
899 if (retStb_[tid].size() > 2) {
900 retStb_[tid].popFront();
901 }
902 }
903 stb.erase(ii);
904
905 /* Add to speed-up the drain of MAB */
906 vector<MemoryAccessEntry> wdbuf;
907
908 mab_.popFront(&wdbuf, tsoChecker_);
909 bufWriteMemory(wdbuf);
910
911 /* StoreAck is not invloved in determining if a StoreCommit can be removed,
912 hence, there is no need to call handleStoreInv(cmd)
913 */
914}
915
916void
917MemorySync::handleStoreInv (LoadStoreCmd& cmd)
918{
919 if (addrTrans)
920 {
921 cmd.setAddr(addrTrans(cmd.getAddr()));
922 }
923 uint32_t tid = cmd.getThrdId();
924 LoadStoreBuffer& rstb = retStb_[tid];
925 LoadStoreBuffer& rmostb = rmoStb_[tid];
926 list<LoadStoreEntry>::iterator rse, rme;
927
928 /******************************************************************************
929 * StoreInv command does not provide enough info for us to check matching
930 * store buffer entry as that is done in StoreUpate. StoreUpdate can only
931 * happens on normal store, hence, we only have to check retStb_. But,
932 * StoreInv can also caused by RMO store. The StoreInv API is not required
933 * to differentiate RMO store from others. Therefore, in general a StoreInv
934 * can match with StoreIssue in either retStb_ or rmoStb_, or both.
935 * It is possible to implement this check. The intruction type can be obtained
936 * by matched StoreCommit. Another issue is that the code does not required
937 * a store in a store buffer waiting the StoreUpdate/StoreInv before it can
938 * be removed. So, just remove the check.
939 ******************************************************************************/
940// if (cmd.getThrdId() == cmd.getSrcTid()) { // same thread
941
942// #ifdef N2MODEL
943// /* N2's StoreInv set 3 LSBs to 0, hence cannot compare exact address.
944// However, it must be Acked and Inv in order. */
945// rse = --(rstb.end()); // note that (rsb.end())-- does not work
946// MS_ASSERT((cmd.getAddr() & ADDR_MASK) == (rse->getAddr() & ADDR_MASK), "StoreInv finds last StoreAck having different address", cmd.getAddr());
947// #else
948// rse = rstb.findMatchO2Y(cmd.getAddr(), LS_ACK); // update or inv to the same core should be in-order
949// MS_ASSERT(rse != rstb.end(), "StoreInv failed to find matched one in ther RetiredBuffer", cmd.getAddr());
950// #endif
951// rse->setState(LS_INV);
952// }
953
954 completeStoreInvStoreUpdate(cmd);
955}
956
957/********************************************************************************
958 * StoreUpdate command provides two purposes. One is exact the same as StoreInv
959 * that prevent the corresponding StoreCommit from removing from the MAB too
960 * early. The other is to provide data for the following LoadData with L1 hit.
961 * Recall that the LoadData with L1 hit will search for its data from the same
962 * core LoadFill or StoreUpdate record in the MAB.
963 ********************************************************************************/
964
965void
966MemorySync::handleStoreUpdate (LoadStoreCmd& cmd)
967{
968 if (addrTrans)
969 {
970 cmd.setAddr(addrTrans(cmd.getAddr()));
971 }
972 uint32_t tid = cmd.getThrdId();
973 LoadStoreBuffer& rstb = retStb_[tid];
974 list<LoadStoreEntry>::iterator ii;
975
976 /******************************************************************************
977 * The following code does check. It is not functionally critical. In SUN's
978 * implementation, usually the block stores (and in N2 BLK_INIT stores) do not
979 * update L1$, instead, they invalidate L1$. Hence, only retStb_ needs to
980 * perform search for matching StoreIssue.
981 *
982 * In addition, Niagara 2 sends StoreAck and StoreUpdate or StoreInv (same cid)
983 * in the same cycle. In N2, the design of the StoreAck/StoreUpdate/StoreInv can
984 * be abstracted as follows:
985 * StoreAcks are queued, and are released from the queue in order. If a
986 * StoreAck also causes store update or invalidation, the StoreAck will be
987 * held in the queue until its update/invalidation is done.
988 *
989 * Niagara 1, however, does not queue the StoreAck. Hence, the StoreUpdate (and
990 * StoreInv) may not related to the last StoreAck since not all StoreAcks cause
991 * StoreUpdate/StoreInv.
992 ******************************************************************************/
993#ifdef N2MODEL
994 /* N2's StoreUpdate set 3 LSBs to 0, hence cannot compare exact address.
995 However, it must be Acked and Updated in order. */
996 ii = --(rstb.end()); // note that (rsb.end())-- does not work
997 // cerr << "MemSync: " << ii->toString();
998 if ((cmd.getAddr() & ADDR_MASK) != (ii->getAddr() & ADDR_MASK))
999 {
1000 MS_ERROR("StoreUpdate finds last StoreAck having different address. tid=%d PA=%llx", tid, cmd.getAddr());
1001 return;
1002 }
1003#else
1004 /******************************************************************************
1005 * In general, searching for state LS_ACK is not ok since as discussed in the
1006 * handleStoreInv StoreInv is not appropriate to perform such check, and thus
1007 * the state of the corresponding StoreIssue remains at state LS_ACK. As a
1008 * result, the search may find wrong entry. The longer the retire_stb_, the
1009 * more likely the error. So far, the retire_stb_ maximum size is set to 2.
1010 ******************************************************************************/
1011 ii = rstb.findMatchO2Y(cmd.getAddr(), LS_ACK, BLK_ADDR_MASK); // update or inv to the same core should be in-order
1012 if (ii == rstb.end())
1013 {
1014 MS_ERROR("StoreUpdate failed to find matched one in the RetiredBuffer. tid=%d PA=%llx", tid, cmd.getAddr());
1015 return;
1016 }
1017#endif
1018 ii->setState(LS_UPDATE);
1019
1020 completeStoreInvStoreUpdate(cmd);
1021}
1022
1023void
1024MemorySync::completeStoreInvStoreUpdate (LoadStoreCmd& cmd)
1025{
1026 list<MemoryAccessEntry>::iterator srclink;
1027 list<MemoryAccessEntry>::iterator newlink;
1028 vector<MemoryAccessEntry> wdbuf;
1029 MemoryAccessEntry mae (cmd);
1030 uint32_t cbit = 1 << cmd.getCoreId();
1031
1032 srclink = mab_.findStoreInvStoreUpdateSrc (cmd);
1033
1034#ifdef N1MODEL // it seems that in N1 IO access does not have StoreCommit signal
1035 if (cmd.isIO() && srclink == mab_.end()) {
1036 return;
1037 }
1038#endif
1039
1040 if (srclink == mab_.end())
1041 {
1042 MS_ERROR("StoreInv/Update failed to find matched StoreCommit. tid=%d PA=%llx", cmd.getSrcTid(), cmd.getAddr());
1043 return;
1044 }
1045 MSYNC_DEBUG(4, "StoreInv-StoreUpdate %s", srclink->toString().c_str());
1046 srclink->addCinv(cbit); // record invalidated/updated core
1047
1048 mae.setData(srclink->getData()); // although only StoreUpdate needs data, it's ok for StoreInv
1049 mae.setExecuted(true); // required for retirement from MAB when it reaches to head
1050
1051 // initially, a StoreCommit will have a link to itself
1052 if (!srclink->isLinkValid())
1053 {
1054 MS_ERROR("StoreCommit does not have a valid link");
1055 return;
1056 }
1057 mae.setLink(srclink->getLink()); // set link chain for debugging purpose
1058 mae.setDsrcMid(srclink->getId()); // data source MAE id
1059 srclink->setGlobal(mae.getId()); // Global will be set the last StoreInv or StoreUpdate, eventually
1060 newlink = mab_.pushBack(mae);
1061 srclink->setLink(newlink);
1062
1063 /* Add to speed-up the drain of MAB */
1064 mab_.popFront(&wdbuf, tsoChecker_);
1065 bufWriteMemory(wdbuf);
1066}
1067
1068void
1069MemorySync::handleEvict (LoadStoreCmd& cmd)
1070{
1071 if (addrTrans)
1072 {
1073 cmd.setAddr(addrTrans(cmd.getAddr()));
1074 }
1075 /* Note that if the D$ line size is different from the I$ line size,
1076 then the following code may have to be modified. Although N2 has
1077 differernt line size of D$ and I$, it is ok to just use the line
1078 size of D$ since the N2 is decided not to handle I-Fetch side of
1079 operation. */
1080 /* For MemorySync to function correctly, only one EVICT entry per
1081 EVICT command is enough since it functions as a fence. However,
1082 for TSOchecker to fucntion correctly, each 8-byte needs to have
1083 a EVICT so that the TSOchecker can depend on this to retire store
1084 node in the memory access history structure. */
1085 if (cmd.getInv() == 0)
1086 {
1087 MS_ERROR("MEM_EVICT has inv_vec==0x0. cmd=%s", cmd.toString().c_str());
1088 return;
1089 }
1090 uint64_t base = cmd.getAddr() & (~(DCACHE_LINE_SIZE - 1));
1091 uint64_t addr;
1092 for (int i = 0; i < DCACHE_LINE_SIZE/8; i++)
1093 {
1094 addr = base + (8 * i);
1095 cmd.setAddr(addr);
1096 cmd.setSrcBank ((addr & L2_BANK_ADDR_BITS) >> L2_BANK_ADDR_SFT);
1097 list<MemoryAccessEntry>::iterator ii = mab_.findDmaStoreStart(cmd, DMA_EVICT);
1098 if (ii != mab_.end())
1099 {
1100 // a remote evict for a dma_store, store its inv in the dma_store_start,
1101 // it will be picked up when the corresponding dma_store is issued later
1102 ii->setInv((ii->getInv() | cmd.getInv()));
1103 }
1104 else
1105 {
1106 ii = mab_.findDmaStoreEntry(cmd, DMA_EVICT);
1107 if (ii != mab_.end())
1108 {
1109 // find one, this evict is for a dma_store entry, it is mainly used to
1110 // fill in the dma_store's inv_vec field. No need to add this entry
1111 // to MAB. the dma_store may contain remote evict, so OR them together
1112 ii->setInv((ii->getInv() | cmd.getInv()));
1113 // we should see no more evict associated with this entry
1114 ii->setInvSet(true);
1115 // let inv/cinv decide when this entry can be removed from MAB
1116 ii->setExecuted(true);
1117 //ii->setSrcBank((cmd.getAddr() & L2_BANK_ADDR_BITS) >> L2_BANK_ADDR_SFT);
1118 MSYNC_DEBUG(4, "EVICT DMA_STORE %s", ii->toString().c_str());
1119 }
1120 else
1121 {
1122 // normal MEM_EVICT
1123 list<MemoryAccessEntry>::iterator newlink;
1124 MemoryAccessEntry* mae = new MemoryAccessEntry (cmd);
1125 //mae->setAddr(addr);
1126 mae->setData(0ull); // Evict does not set real data
1127 mae->setExecuted(true); // needed for retirement from MAB
1128 mae->setAcked(true);
1129 //mae->setSrcBank ((addr & L2_BANK_ADDR_BITS) >> L2_BANK_ADDR_SFT);
1130 mae->setSizeV(0xff);
1131 newlink = mab_.pushBack(*mae);
1132 newlink->setLink(newlink); // set link to itself
1133 }
1134 }
1135 }
1136}
1137
1138void
1139MemorySync::handleEvictInv (LoadStoreCmd& cmd)
1140{
1141 if (addrTrans)
1142 {
1143 cmd.setAddr(addrTrans(cmd.getAddr()));
1144 }
1145 // evict_inv does not have addr value, so it is always 0 to begin with.
1146 uint64_t base = cmd.getAddr() & (~(DCACHE_LINE_SIZE - 1));
1147 uint64_t addr;
1148 uint32_t cbit = 1 << cmd.getCoreId();
1149 /**
1150 * Note using DCACHE_LINE_SIZE implies only one EvictInv can be sent
1151 * from each core assocaited with one Evict command.
1152 */
1153 for (int i = 0; i < DCACHE_LINE_SIZE/8; i++)
1154 {
1155 addr = base + (8 * i);
1156 cmd.setAddr(addr);
1157 // for evict_inv, srcBank is provided by testbench, this can be a problem
1158 // when partial bank mode is used. ===> it will not, testbench uses static
1159 // bank calculation too.
1160 list<MemoryAccessEntry>::iterator ii = mab_.findDmaStoreStart(cmd, DMA_EVICT_INV);
1161 if (ii != mab_.end())
1162 {
1163 // a remote evict_inv for a dma_store, update the cinv in the
1164 // dma_store_start, it will be picked up when the corresponding
1165 // dma_store is issued later
1166 ii->addCinv(cbit);
1167 }
1168 else
1169 {
1170 ii = mab_.findDmaStoreEntry(cmd, DMA_EVICT_INV);
1171 if (ii != mab_.end())
1172 {
1173 // find one, this evict_inv is for a dma_store entry, mask the
1174 // corresponding bit in inv_vec.
1175 ii->addCinv(cbit);
1176 MSYNC_DEBUG(4, "EVICT_INC DMA_STORE %s", ii->toString().c_str());
1177 }
1178 else
1179 {
1180 // normal MEM_EVICT_INV
1181 list<MemoryAccessEntry>::iterator srclink;
1182 list<MemoryAccessEntry>::iterator newlink;
1183 MemoryAccessEntry* mae;
1184 srclink = mab_.findEvictInvSrc(cmd);
1185 if (srclink == mab_.end())
1186 {
1187 MS_ERROR("EvictInv failed to find matched Evict. tid=%d PA=%llx", cmd.getThrdId(), cmd.getAddr());
1188 return;
1189 }
1190 srclink->addCinv(cbit);
1191
1192 mae = new MemoryAccessEntry(cmd);
1193 mae->setExecuted(true); // required for retirement from MAB when it reaches to head
1194 //assert (srclink->isLinkValid()); // initially, an Evict will have a link to itself
1195 mae->setLink(srclink->getLink()); // set link chain for debugging purpose
1196 srclink->setGlobal(mae->getId()); // set dinv
1197 newlink = mab_.pushBack(*mae);
1198 srclink->setLink(newlink);
1199 }
1200 }
1201 }
1202}
1203
1204/******************************************************************************
1205 To allow DMA accesses from I/O. It occurres when a SIU Store (from ENET
1206 or PCI) is seen in the L2 or when Bench back-door stores to memory.
1207******************************************************************************/
1208void
1209MemorySync::handleStoreSlam(StoreIssueCmd& cmd)
1210{
1211 if (addrTrans)
1212 {
1213 cmd.setAddr(addrTrans(cmd.getAddr()));
1214 }
1215 uint64_t paddr = cmd.getAddr();
1216 if (cmd.getSizeV() != 0xff) {
1217 // need to read in data for merging
1218 uint64_t srcData = rif_.readMemory(0, 0, (paddr & ADDR_MASK), 8);
1219 uint64_t tarData = align8byte(cmd.getData(), cmd.getSize(), (paddr & ~ADDR_MASK));
1220 uint64_t newData = merge(srcData, tarData, cmd.getSizeV());
1221 rif_.slamMemory(0, 0, (paddr & ADDR_MASK), newData, 8);
1222 }
1223 else {
1224 if ((paddr & 0x7ULL) != 0x0)
1225 {
1226 MS_ERROR("Address 0x%llx is not 8-byte aligned", paddr);
1227 return;
1228 }
1229 rif_.slamMemory(0, 0, paddr, cmd.getData(), 8);
1230 }
1231}
1232
1233//=============================================================================
1234// signal the beginning of a dma_store operation, there is just one
1235// dma_store_start, regardless of the dma_store is 8 bytes or 64 bytes.
1236// For a 64 bytes dma_store, we will see 8 dma_store commands.
1237//=============================================================================
1238void
1239MemorySync::handleDmaStoreStart(LoadStoreCmd& cmd)
1240{
1241 if (addrTrans)
1242 {
1243 cmd.setAddr(addrTrans(cmd.getAddr()));
1244 }
1245 int totalSize = cmd.getTsize();
1246 uint64_t addr = cmd.getAddr();
1247 for (int i = 0; i < totalSize/8; i++)
1248 {
1249 // create one DMA_STORE_START, in MAB, for every 8-byte DMA_STORE
1250 cmd.setAddr(addr);
1251 MemoryAccessEntry* mae = new MemoryAccessEntry(cmd);
1252 mae->setEntryType(MEM_DMA_STORE_START);
1253 mab_.pushBack(*mae);
1254 addr += 8;
1255 }
1256}
1257
1258/******************************************************************************
1259 Similar to MEM_SLAM, but allow inv_vec to handle L1 conflict.
1260******************************************************************************/
1261void
1262MemorySync::handleDmaStore(LoadStoreCmd& cmd)
1263{
1264 if (addrTrans)
1265 {
1266 cmd.setAddr(addrTrans(cmd.getAddr()));
1267 }
1268 // treat it as a store_commit with evict & evict_inv
1269 // we will have one evict to fill in the inv_vec, then one or more
1270 // evict_inv to mask each able bit in inv_vec.
1271 // 2/14/06, all DMA_STORE must go through MAB
1272 list<MemoryAccessEntry>::iterator mlink;
1273 uint64_t mdata;
1274 uint64_t sdata;
1275 uint8_t sszv;
1276 int tid = cmd.getThrdId();
1277 StoreCommitCmd commit(cmd.getThrdId(), 0, 0, cmd.getAddr(), cmd.getSizeV(), true, (cmd.getSizeV()==0?false:true), cmd.getCycle());
1278 // each DMA_STORE operation can be either 8 bytes or 64 bytes, but
1279 // each DMA_STORE command only represent 8 bytes, we need the total
1280 // size informaiton to handle the corresponding EVICT and EVICT_INV
1281 // commands, as there will be only one EVICT command for each
1282 // DMA_STORE operation (which can have 1 or 8 DMA_STORE commands)
1283 commit.setTsize(cmd.getTsize());
1284 commit.setData(cmd.getData());
1285 MemoryAccessEntry entry(commit);
1286 sdata = commit.getData();
1287 sszv = commit.getSizeV();
1288 entry.setMerged(false);
1289 if ((commit.getSize() == 8) && (sszv == 0xff))
1290 {
1291 // no merge needed
1292 mdata = sdata;
1293 }
1294 else
1295 {
1296 // sszv = 0 still need to get correct data in the entry
1297 mdata = getL2Data(mab_.end(), commit);
1298 mdata = merge(mdata, sdata, sszv);
1299 // keep merge-related data in case we need to repeat it (with a
1300 // different mdata)
1301 entry.setMerged(true);
1302 entry.setOrigData(sdata);
1303 entry.setOrigSizeV(sszv);
1304 }
1305 entry.setData(mdata);
1306 list<MemoryAccessEntry>::iterator ii = mab_.findDmaStoreStart(cmd, DMA_STORE);
1307 if (ii != mab_.end())
1308 {
1309 // there is a dma_store_start associated with this dma_store, use the
1310 // inv and cinv there. mark the dma_store_start as executed so that it
1311 // can be removed from MAB
1312 ii->setExecuted(true);
1313 entry.setInv(ii->getInv());
1314 entry.setCinv(ii->getCinv());
1315 }
1316 if (cmd.getInv() == 0)
1317 {
1318 // there is no local evict associated with the dma_store, so set inv
1319 // setting as done, also set this entry as executed, and let inv/cinv
1320 // decide when this entry's data can be committed to memory.
1321 entry.setInvSet(true);
1322 entry.setExecuted(true);
1323 }
1324 entry.setItype(ITYPE_STORE);
1325 entry.setIseq(iSeq(tid));
1326 entry.setDmaStore(true);
1327 mlink = mab_.pushBack(entry);
1328 mlink->setLink(mlink); // point to itself
1329
1330 // buffer for writing data back to memory
1331 vector<MemoryAccessEntry> wdbuf;
1332 mab_.popFront(&wdbuf, tsoChecker_);
1333 /* write STORE_COMMIT data to memory */
1334 bufWriteMemory(wdbuf);
1335}
1336
1337/************************************************************************************
1338 Reset the state back to LS_ACK. This implementation assumes only the
1339 cacheable request can have FetchFill where the state is set to LS_ACK.
1340 In general, this is inaccurate since LS_ACK in Fetch means instruction
1341 is received. This may be ok if only cacheable instruction can have
1342 self-modifying-code.
1343
1344 In general, a fetch has
1345 - FetchIssue: instruction fetch issue
1346 - FetchData: instruction access gets data from memory system
1347 - FetchFill: line fill to L1
1348 - FetchAck: instruction data come back to fetch buffer
1349
1350 So far, only FetchData and FetchFill is implemented.
1351*************************************************************************************/
1352
1353void
1354MemorySync::handleFetchIssue(LoadStoreCmd& cmd)
1355{
1356 if (addrTrans)
1357 {
1358 cmd.setAddr(addrTrans(cmd.getAddr()));
1359 }
1360 list<LoadStoreEntry>::iterator ii;
1361 LoadStoreEntry* entry;
1362 uint32_t tid = cmd.getThrdId();
1363
1364 entry = new LoadStoreEntry(cmd);
1365 ii = ifb_[tid].pushBack(*entry);
1366 MSYNC_DEBUG(2, "Last Enter to IFB %s", ii->toString().c_str());
1367}
1368
1369void
1370MemorySync::handleFetchData(LoadStoreCmd& cmd)
1371{
1372 if (addrTrans)
1373 {
1374 cmd.setAddr(addrTrans(cmd.getAddr()));
1375 }
1376 list<MemoryAccessEntry>::iterator mlink;
1377 list<LoadStoreEntry>::iterator ife;
1378 vector<MemoryAccessEntry> wdbuf;
1379 MemoryAccessEntry* mae;
1380 uint32_t tid = cmd.getThrdId();
1381 enum DATA_SRC dsrc = cmd.getDsrc();
1382 LoadStoreBuffer& ifb = ifb_[tid];
1383
1384 // ife = ifb.findMatchO2Y (cmd.getId(), cmd.getAddr(), cmd.getSizeV());
1385 ife = ifb.findMatchO2Y (cmd.getAddr(), LS_NEW, BLK_ADDR_MASK);
1386 if (ife == ifb.end())
1387 {
1388 MS_ERROR("Matched FetchIssue expected. tid=%d PA=%llx", tid, cmd.getAddr());
1389 return;
1390 }
1391
1392 mae = new MemoryAccessEntry (cmd);
1393 mae->setItype(ITYPE_FETCH);
1394
1395 /* Reset the state back to LS_ACK. This implementation assumes only the
1396 cacheable request can have FetchFill where the state is set to LS_ACK.
1397 In general, this is inaccurate since LS_ACK in Fetch means instruction
1398 is received. This may be ok if only cacheable instruction can have
1399 self-modifying-code.
1400
1401 In general, a fetch has
1402 - FetchIssue: instruction fetch issue
1403 - FetchData: instruction access gets data from memory system
1404 - FetchFill: line fill to L1
1405 - FetchAck: instruction data come back to fetch buffer
1406
1407 So far, only FetchIssue, FetchData and FetchFill is implemented.
1408 */
1409
1410 switch (dsrc) {
1411 case DSRC_L1:
1412 mae->setData(getL1Instr(cmd));
1413 break;
1414 case DSRC_L2_MEMORY:
1415 MSYNC_DEBUG(2, "mab_=%s", mab_.toString().c_str());
1416 mae->setData(getL2Data(mab_.end(), cmd));
1417 break;
1418 case DSRC_IO: // access instruction directly from Riesling memory
1419 mae->setData(rif_.readMemory(cmd.getCoreId(), cmd.getThrdId(), cmd.getAddr() & ADDR_MASK, 8));
1420 break;
1421 default:
1422 MS_ERROR("Wrong dsrc value %d", dsrc);
1423 return;
1424 }
1425 mlink = mab_.pushBack(*mae);
1426 ife->setLink(mlink);
1427 ife->setState(LS_RDATA);
1428}
1429
1430/**
1431 LoadFill must bring in the whole L1 cache line, TPS 4/22/04
1432*/
1433void
1434MemorySync::handleFetchFill (LoadStoreCmd& cmd)
1435{
1436 if (addrTrans)
1437 {
1438 cmd.setAddr(addrTrans(cmd.getAddr()));
1439 }
1440 list<MemoryAccessEntry>::iterator mlink, newlink;
1441 list<LoadStoreEntry>::iterator ife;
1442 MemoryAccessEntry entry (cmd);
1443 uint32_t tid = cmd.getThrdId();
1444 LoadStoreBuffer& ifb = ifb_[tid];
1445
1446 /***********************************************************************************
1447 * The design assumes FetchFill of the same 8-byte block should come back in order.
1448 * In addition, only fetches whose data source is L2/Memory and is cacheable to L1
1449 * can have FetchFill. The following do-while statement searches for such entry in
1450 * the fetch buffer. Entries that fetch the same 8-byte block but do not need
1451 * FetchFill are assigned LS_ACK state. This is to indicate that those entries are
1452 * considered done with fetched instruction backed to the fetch buffer already.
1453 * (Recall that FetchAck is not implemented so far and it maybe never needed.)
1454 * Note that after an entry is assigned LS_ACK, the search in the next iteration
1455 * will skip it.
1456 ***********************************************************************************/
1457 do {
1458 MSYNC_DEBUG(2, "ifb=%s", ifb.toString().c_str());
1459 ife = ifb.findMatchO2Y(cmd.getAddr(), LS_RDATA, BLK_ADDR_MASK);
1460 if (ife == ifb.end())
1461 {
1462 MS_ERROR("FetchFill failed to find matched entry in FetchBuffer. tid=%d PA=%llx", tid, cmd.getAddr());
1463 return;
1464 }
1465 if (!ife->isLinkValid())
1466 {
1467 MS_ERROR("FetchFill's matched FetchBuffer entry does not have FetchData. tid=%d PA=%llx", tid, cmd.getAddr());
1468 return;
1469 }
1470 ife->setState(LS_ACK); // this could be the one found, or one does not need FetchFill
1471 } while (!(ife->getLink()->getDsrc() == DSRC_L2_MEMORY && ife->getLink()->isCacheL1()));
1472
1473 /* The following search in FetchBuffer is actually not needed. This provides
1474 additional layer of check to see if its link is the same as that found
1475 from the MAB search. When the protocol stablizes, can determine if to
1476 remove this segment of code */
1477
1478 mlink = mab_.findFetchFillSrc(cmd);
1479 MSYNC_DEBUG(2, "ife=%s", (ife->toString()).c_str());
1480 MSYNC_DEBUG(2, "mlink=%s", (mlink->toString()).c_str());
1481 if (mlink == mab_.end())
1482 {
1483 MS_ERROR("FetchFill failed to find matched FetchData. tid=%d PA=%llx", tid, cmd.getAddr());
1484 return;
1485 }
1486 if (ife->getLink() != mlink)
1487 {
1488 MS_ERROR("FetchFill's matched FetchBuffer entry's FetchData mismatch. tid=%d PA=%llx", tid, cmd.getAddr());
1489 return;
1490 }
1491
1492 entry.setData(mlink->getData());
1493 entry.setLink(mlink);
1494 entry.setExecuted(true);// set for being retired from MAB when it reaches to head
1495 newlink = mab_.pushBack(entry);
1496 mlink->setLink(newlink);
1497
1498 /* Also get the other part of L1$ line data, TPS 5/4/04 */
1499 MemoryAccessEntry* e;
1500 uint64_t line_addr_mask = ~((uint64_t) (ICACHE_LINE_SIZE - 1));
1501 uint64_t addr = cmd.getAddr() & line_addr_mask;
1502 uint64_t maeAddr = mlink->getAddr();
1503
1504 for (int i = 0; i < ICACHE_LINE_SIZE/8; i++, addr += 8) {
1505 if ((maeAddr & ADDR_MASK) == (addr & ADDR_MASK)) {
1506 continue;
1507 }
1508
1509 cmd.setAddr(addr);
1510 e = new MemoryAccessEntry (cmd);
1511 e->setData(getL2Data(mlink, cmd));
1512 e->setExecuted(true); // set true so that when it reach to head, it can be removed
1513 e->setLink(mlink); // set to mlink indicating its data got from here
1514 mab_.pushBack(*e);
1515 }
1516}
1517
1518void
1519MemorySync::removeRetiredStore (uint64_t addr, uint32_t tid)
1520{
1521 LoadStoreBuffer& rstb = retStb_[tid];
1522 list<LoadStoreEntry>::iterator ii;
1523
1524 /* must be the first entry of the STB */
1525 ii = rstb.begin();
1526
1527 if (rstb.getBufPtr()->size() == 0)
1528 {
1529 MS_ERROR("Attempt to remove entry in empty RetiredBuffer. tid=%d PA=%llx", tid, addr);
1530 return;
1531 }
1532// if (rstb.getBufPtr()->size() == 0) {
1533// cerr << "MemModel: WARNING[removeRetiredStore()] rstb.size() == 0" << endl;
1534// return;
1535// }
1536
1537 MSYNC_DEBUG (1, "Remove RetiredBuffer Entry tid=%d addr=%#llx addr_buf=%#llx executed=%d",
1538 tid, addr, ii->getAddr(), (int) ii->isExecuted());
1539 MSYNC_DEBUG (2, "%s", toString().c_str());
1540
1541 if (ii->getAddr() != addr)
1542 {
1543 MS_ERROR("Remove RetiredBuffer head entry mismatches with the address. tid=%d PA=%llx", tid, addr);
1544 return;
1545 }
1546
1547 /* For CAS instruction, if the store part is not sent then the retired instruction
1548 may not be executed. The following code assumes that when an instruction is to
1549 be removed from this buffer, it must have been executed.
1550 */
1551 if (!ii->isExecuted()) {
1552 if (ii->getItype() != ITYPE_ATOMIC)
1553 {
1554 MS_ERROR("Found non-executed instr. in RetiredBuffer to be removed. tid=%d PA=%llx", tid, addr);
1555 return;
1556 }
1557 ii->setExecuted(true);
1558 if (ii->isLinkValid()) {
1559 ii->getLink()->setExecuted(true);
1560 }
1561 }
1562
1563 rstb.popFront();
1564}
1565
1566
1567/************************************************************************************
1568 * preMemoryAccess()
1569 * - takes the data to be written by the reference model,
1570 * - checks that with the one in Memory Sync Model, and
1571 * - sets flag so that the reference model won't update its memory.
1572 *
1573 * The MemoryTransaction is used to store the data. The data is in
1574 * 8-byte chunks, and each is arranged in big endian fashion. If the
1575 * size is less than 8, only data[0] is used and the data is shifted
1576 * to the right (toward least significant byte).
1577 ************************************************************************************/
1578
1579void MemorySync::pre_memory_access( void* msync_obj, MemoryTransaction &xact )
1580{
1581 MemorySync* self = (MemorySync*)msync_obj;
1582 if (self)
1583 self->preMemoryAccess(xact);
1584}
1585
1586void MemorySync::preMemoryAccess ( MemoryTransaction& xact)
1587{
1588
1589 if (MemorySyncMessage::skipCallback == 1) {
1590 //cerr << "ERROR: MSYNC-SKIP: T" << dec << rif_.getTid(xact) << " skip preMemoryAccess()\n";
1591 return;
1592 }
1593
1594 list<LoadStoreEntry>::iterator ii;
1595 uint64_t mask;
1596 uint64_t rdata[8];
1597 uint64_t rpa = xact.paddr();
1598 if (addrTrans)
1599 {
1600 rpa = addrTrans(rpa);
1601 }
1602 uint64_t rva = xact.vaddr();
1603 uint8_t rsize = xact.size();
1604 int nrdata = 0;
1605
1606 int tid;
1607 int i,j;
1608
1609 if ((rpa & 0xff00000000ULL) == 0x9000000000ULL) {
1610 // Bench is not sending any ldst_sync messages if PA[39:32]=0x90.
1611 // This is true for ST, STXA and LD, LDXA
1612 return;
1613 }
1614
1615 tid = xact.getStrand();
1616
1617 if (tid >= MAX_STRANDS) { // indicates the xact is not normal instruction access
1618 return;
1619 }
1620
1621 if (xact.referenceType() == MemoryTransaction::INSTR) {
1622 return;
1623 }
1624
1625// if (xact.noOperationXact()) {
1626// return;
1627// }
1628 /**********************************************************************************
1629 * In Riesling, atomic is performed in two transactions, one read and one write.
1630 * This following checks return normal load as well as the load part of an atomic
1631 **********************************************************************************/
1632// if (xact.readXact()) {
1633// return;
1634// }
1635
1636 if (!xact.writeXact()) {
1637 return;
1638 }
1639
1640 if (xact.internalXact() || xact.tablewalk()) {
1641 return;
1642 }
1643
1644 /* store or atomic instructions */
1645 LoadStoreBuffer& stb = stb_[tid];
1646
1647 MSYNC_DEBUG(1, "STEP (preMemAcc) tid=%d va=%#llx pa=%#llx size=%d atomic=%d", tid, rva, rpa, rsize, (int) xact.atomicXact());
1648
1649 if (rsize > 8) {
1650 uint8_t buf[64];
1651 if (rsize != 64 && rsize != 16) {
1652 MS_ERROR("Unsupported store data size %d", xact.size());
1653 return;
1654 }
1655 for (i = 0; i < rsize/8; i++) {
1656 rdata[i] = xact.getData(i);
1657 nrdata = 8;
1658 }
1659 } else {
1660 rdata[0] = xact.getData(0);
1661 nrdata = 1;
1662 }
1663
1664 bool atomic = xact.atomicXact();
1665 uint64_t addr;
1666 LoadStoreBuffer& rstb = retStb_[tid];
1667 LoadStoreBuffer& rmostb = rmoStb_[tid];
1668 /**********************************************************************************
1669 * The assumption made is only atomic and rmo stores can have StoreAck issued
1670 * before SSTEP. Therefore, for these two types of instructions, search matched
1671 * StoreIssue must begin with either retStb_ (atomic) or rm_stb_ (rmo).
1672 **********************************************************************************/
1673 LoadStoreBuffer& ret_or_rmo_stb = (atomic) ? rstb : rmostb;
1674
1675 MSYNC_DEBUG(2, "rstb=%s", rstb.toString().c_str());
1676 MSYNC_DEBUG(2, "stb=%s", stb.toString().c_str());
1677 MSYNC_DEBUG(2, "rmostb=%s", rmostb.toString().c_str());
1678
1679 for (i = 0; i < nrdata; i++) {
1680 addr = rpa + (i << 3);
1681
1682 ii = ret_or_rmo_stb.find1stNonExe();
1683 if (ii == ret_or_rmo_stb.end()) {
1684 ii = stb.find1stNonExe();
1685 //MS_ASSERT(ii != stb.end(), "STEP failed to find match StoreIssue in STB. tid=%d PA=%llx", tid, addr);
1686 if (ii == stb.end()) {
1687 if (atomic) {
1688 MS_ERROR("STEP (store part of an atomic instr) failed to find match StoreIssue in STB. tid=%d PA=%llx", tid, addr);
1689 return;
1690 }
1691 else {
1692 MS_ERROR("STEP failed to find match StoreIssue in STB. tid=%d PA=%llx", tid, addr);
1693 return;
1694 }
1695 }
1696 }
1697
1698 /* Note that RTL does not check store address and data, MemorySync performs
1699 this additional check for completion */
1700 if ((addr & ADDR_MASK) != (ii->getAddr() & ADDR_MASK)) {
1701 MS_ERROR (" Store Address Mismatch (bits 2-0 ignored) cid=%d tid=%d va=%#llx pa-ref=%#llx pa-rtl=%#llx",
1702 tid/NSTRANDS_PER_CORE, tid, rva + (i << 3), addr, ii->getAddr());
1703 return;
1704 }
1705 if ((addr & ADDR_MASK) != (ii->getAddr() & ADDR_MASK))
1706 {
1707 MS_ERROR("STEP's address mismatches with 1st non-executed StoreIssue entry. tid=%d PA=%llx", tid, addr);
1708 return;
1709 }
1710
1711// #ifdef N2MODEL
1712// if (rif_.isPartialStore(xact.asi())) {
1713// rdata[i] = align8byte (rdata[i], rsize, addr & ~ADDR_MASK);
1714// } else {
1715// rdata[i] = align8byte (rdata[i], ii->getSize(), ii->getAddr() & ~ADDR_MASK);
1716// }
1717// #else
1718// rdata[i] = align8byte (rdata[i], ii->getSize(), ii->getAddr() & ~ADDR_MASK);
1719// #endif
1720
1721 /* It should be fine to use xact info to aling data */
1722 rdata[i] = align8byte (rdata[i], rsize, addr & ~ADDR_MASK);
1723 /* size_vector exists only in MemorySync model */
1724 mask = byteMask(ii->getVbyte());
1725
1726 MSYNC_DEBUG(1, "STEP (preMemAcc) cid=%d tid=%d iseq=%lld va=%#llx pa=%#llx data-ref=%#llx data-rtl=%#llx mask=%#llx",
1727 tid/NSTRANDS_PER_CORE, tid, ii->getIseq(), rva + (i << 3), addr, rdata[i],
1728 ii->getData(), mask);
1729
1730 /**********************************************************************************
1731 * Note that in N1 swerver-memory.cc environment, store data is not always known
1732 * before the SSTEP command. (Data is known @PCX_L2 stage that can happen after
1733 * SSTEP). In this case, the swerver-memory.cc saves the data obtained at SSTEP
1734 * to thrd_write_buf, i.e., STB. So, a load with STB Bypass can still get the
1735 * correct data.
1736 *
1737 * In order to make this memory sync model work in that environment, the data in
1738 * the STB is also updated, the state then is changed to LS_RDATA.
1739 **********************************************************************************/
1740 if (ii->isDataValid()) { // indicates data is set, in N2, StoreIssue should set state to LS_TDATA
1741 if ((rdata[i] & mask) != (ii->getData() & mask)) {
1742 MS_ERROR (" (Store Data) cid=%d tid=%d va=%#llx pa=%#llx data-ref=%#llx data-rtl=%#llx mask=%#llx",
1743 tid/NSTRANDS_PER_CORE, tid, rva + (i << 3), addr,
1744 rdata[i], ii->getData(), mask);
1745 return;
1746 }
1747 } else { // in N2, this should not happen
1748 ii->setData(rdata[i]); // ii points to the store buffer entry
1749 ii->setState(LS_RDATA); // set data from Reference Model
1750 /* Does the following statements needed for N1, definitely not needed for N2? TPS, 6/16/04 */
1751// if (ii->isLink2Valid()) {
1752// ii->getLink2()->setData(data);
1753// }
1754 }
1755
1756 ii->setExecuted(true);
1757 if (ii->isLinkValid()) {
1758 (ii->getLink())->setExecuted(true);
1759 }
1760
1761 /* Manage the rmoStb_ */
1762 if (ii->isRMOstore() && ii->isLinkValid()) {
1763 rmostb.erase(ii);
1764 }
1765
1766 /* Two conditions must be satisfied before a store buffer entry is removed:
1767 1). must be executed by the reference model, 2). must be acked
1768 Except for atomic instruction, all stores will have STEP before ACK.
1769 If this happens, perform popFront() here. Others will perform popFront()
1770 when they receive ACK.
1771 */
1772 }
1773 xact.access(MemoryTransaction::WRITE | MemoryTransaction::NOP);
1774
1775 /* in order not to alter the memory access, no mae_.popFront() and write memory here. */
1776 /* Don't know why the above statement is here? Add anyway in order to speed up the mab
1777 retirement so that the self-modifying code will not cause problem - 6/3/04 */
1778
1779 vector<MemoryAccessEntry> wdbuf; // buffer for writing data back to memory
1780
1781 mab_.popFront(&wdbuf, tsoChecker_);
1782 /* write STORE_COMMIT data to memory */
1783 bufWriteMemory(wdbuf);
1784
1785 return;
1786}
1787
1788/************************************************************************************
1789 * postMemoryAccess()
1790 * - takes the data stored in the memory sync model, and
1791 * - puts the data back to the MemoryTrasaction so that the reference
1792 * model actually read data provided by the memory sync model
1793 *
1794 * The MemoryTransaction is used to store the data. The data is in
1795 * 8-byte chunks, and each is arranged in big endian fashion. If the
1796 * size is less than 8, only data[0] is used and the data is shifted
1797 * to the right (toward least significant byte).
1798 ************************************************************************************/
1799
1800void MemorySync::post_memory_access( void* msync_obj, MemoryTransaction &xact )
1801{
1802 MemorySync* self = (MemorySync*)msync_obj;
1803 if (self)
1804 self->postMemoryAccess(xact);
1805}
1806
1807void MemorySync::postMemoryAccess (MemoryTransaction& xact)
1808{
1809 if (MemorySyncMessage::skipCallback == 1) {
1810 //cerr << "ERROR: MSYNC-SKIP: T" << dec << rif_.getTid(xact) << " skip postMemoryAccess()\n";
1811 return;
1812 }
1813
1814 vector<MemoryAccessEntry> wdbuf; // buffer for writing data back to memory
1815 list<MemoryAccessEntry>::iterator mii;
1816 list<LoadStoreEntry>::iterator ii;
1817 uint64_t mask;
1818 uint64_t data;
1819 uint64_t rpa = xact.paddr();
1820 if (addrTrans)
1821 {
1822 rpa = addrTrans(rpa);
1823 }
1824 uint64_t rva = xact.vaddr();
1825 uint8_t rsize = xact.size();
1826
1827 int tid;
1828 int iter;
1829 int i,j;
1830
1831 if ((rpa & 0xff00000000ULL) == 0x9000000000ULL) {
1832 // Bench is not sending any ldst_sync messages if PA[39:32]=0x90.
1833 // This is true for ST, STXA and LD, LDXA
1834 return;
1835 }
1836
1837 tid = xact.getStrand();
1838
1839 if (tid >= MAX_STRANDS) { // indicates the xact is not normal instruction access
1840 return;
1841 }
1842
1843 if (xact.referenceType() == MemoryTransaction::INSTR) {
1844 MSYNC_DEBUG(1, "I-Fetch tid=%d va=%#llx pa=%#llx iword=%#010llx size=%d",
1845 tid, rva, rpa, xact.getData(), rsize);
1846
1847 /* As of 5/18/04, only N1 sends Fetch related command */
1848#ifdef N1MODEL
1849 if (rva == 0 && rpa == 0) return; // remove glitch during RESET_INTERRUPT
1850
1851 LoadStoreCmd cmd;
1852 list<LoadStoreEntry>::iterator ife, ii;
1853 list<MemoryAccessEntry>::iterator mae;
1854 LoadStoreBuffer& ifb = ifb_[tid];
1855
1856 cmd.setAddr(rpa);
1857
1858 // ife = ifb.findMatchY2O(rpa, cmd.sz2szv(rsize, rpa & ~ADDR_MASK));
1859 ife = ifb.find1stNonExeFetch (rpa);
1860 // MS_ASSERT(ife != ifb.end(), "I-Fetch cannot find matched entry in FetchBuffer", cmd.getAddr());
1861 if (ife == ifb.end()) {
1862 if (MSYNC_DEBUGLevel != 88) {
1863 MemorySyncMessage::warning("I-Fetch cannot find matched entry in FetchBuffer tid=%d va=%#llx pa=%#llx iword=%#010llx", tid, rva, rpa, xact.getData());
1864 }
1865 else {
1866 MemorySyncMessage::warning("I-Fetch cannot find matched entry in FetchBuffer tid=%d va=%#llx pa=%#llx iword='%s'", tid, rva, rpa, Hv_InstructionWord::disassemble(xact.getData()).c_str());
1867 }
1868 return; // use the one in nas-memory
1869 }
1870 // if (ife != (ii = ifb.find1stNonExe())) {
1871 if (ife != (ii = ifb.begin())) {
1872 for (ii; ii != ife;) {
1873 ii->setExecuted(true);
1874 // MS_ASSERT(ii->isLinkValid(), "I-Fetch entry in FetchBuffer does not have FetchData", cmd.getAddr());
1875 if (ii->isLinkValid()) // some fetch issue may not be real?
1876 ii->getLink()->setExecuted(true);
1877 MSYNC_DEBUG(1, "I-Fetch removes non-used Fetch Entries tid=%d pa=%#llx size=%d",
1878 tid, ii->getAddr(), ii->getSize());
1879 ii++;
1880 ifb.popFront();
1881 }
1882 }
1883
1884//#ifdef N1MODEL // There is cases in N1 that FetchIssue does not have FetchData, timing?
1885 if (!ife->isLinkValid()) return;
1886//#endif
1887 if (!ife->isLinkValid())
1888 {
1889 MS_ERROR("I-Fetch entry in FetchBuffer does not have FetchData. tid=%d PA=%llx", tid, cmd.getAddr());
1890 return;
1891 }
1892 ife->setExecuted(true);
1893 mae = ife->getLink();
1894 mae->setExecuted(true);
1895 data = align2addr(mae->getData(), 4, (rpa & ~ADDR_MASK));
1896 if (data != xact.getData() && !xact.noWriteXact()) {
1897 MemorySyncMessage::warning("I-Fetch detects on-the-fly modified code tid=%d va=%#llx pa=%#llx i-ref=%#10llx i-rtl=%#10llx",
1898 tid, rva, rpa, xact.getData(), data);
1899 xact.setData(data);
1900 }
1901 ifb.popFront();
1902 mab_.popFront(&wdbuf, tsoChecker_);
1903 bufWriteMemory(wdbuf);
1904#endif // #ifdef N1MODEL
1905 return;
1906 }
1907
1908// if (xact.noOperationXact()) {
1909// return;
1910// }
1911
1912 /**********************************************************************************
1913 * In Riesling, atomic is performed in two transactions, one read and one write.
1914 * This following checks return normal store as well as the store part of an atomic
1915 **********************************************************************************/
1916// if (xact.writeXact()) {
1917// return;
1918// }
1919
1920 if (!xact.readXact()) {
1921 return;
1922 }
1923
1924 if (xact.internalXact()) {
1925 return;
1926 }
1927
1928 if (xact.tablewalk()) {
1929 // accessing TSB, look for matching ST_COMMIT
1930 uint64_t data = mab_.getL2Data(mab_.end(), tid/NSTRANDS_PER_CORE, tid, rpa, true);
1931 xact.setData(0, data);
1932 return;
1933 }
1934
1935 /**********************************************************************************
1936 * In Riesling, partial store sends load and store transaction.
1937 * The first load should be ignored in this model, the data check in store part
1938 * should check only those indicated by the size_vector.
1939 **********************************************************************************/
1940 //TODO why do we comment this out? 11/22/06
1941 //if (rif_.isPartialStore(xact.asi())) {
1942 // return;
1943 //}
1944
1945 /* read and atomic instructions */
1946
1947 LoadStoreBuffer& ldb = ldb_[tid];
1948
1949 MSYNC_DEBUG(1, "STEP (postMemAcc) tid=%d va=%#llx pa=%#llx size=%d atomic=%d", tid, rva, rpa, rsize, (int) xact.atomicXact());
1950
1951 int sz;
1952 if (rsize > 8) {
1953 if (rsize != 64 && rsize != 16) {
1954 MS_ERROR("Unsupported load data size %d", xact.size());
1955 return;
1956 }
1957 iter = rsize / 8;
1958 sz = 8; // each one equals 8 bytes
1959 } else {
1960 iter = 1;
1961 sz = rsize;
1962 }
1963 uint64_t addr;
1964 for (int i = 0; i < iter; i++) {
1965 addr = rpa + (i << 3);
1966
1967 if ((addr & IO_ADDR_BIT_MASK) == IO_ADDR_BIT_MASK) { // IO address
1968 ii = ldb.find1stNonExeMatchedAddr(addr); // IO can be out-of-order
1969 } else {
1970 ii = ldb.find1stNonExe();
1971 }
1972
1973 MSYNC_DEBUG (4, "ldb=%s", ldb.toString().c_str());
1974
1975 if (ii == ldb.end())
1976 {
1977 MS_ERROR("STEP failed to find LoadIssue entry (possibly DUT took trap & Riesling did not). tid=%d PA=%llx", tid, addr);
1978 return;
1979 }
1980 /* Note that RTL does not check load address, MemorySync performs
1981 this additional check for completion */
1982 if (addr != ii->getAddr()) {
1983 MS_ERROR (" Load Address Mismatch cid=%d tid=%d va=%#llx pa-ref=%#llx pa-rtl=%#llx",
1984 tid/NSTRANDS_PER_CORE, tid, rva + (i << 3), addr, ii->getAddr());
1985 return;
1986 }
1987 if (addr != ii->getAddr())
1988 {
1989 MS_ERROR("STEP's address mismatches with the 1st non-executed Load. tid=%d PA=%llx", tid, addr);
1990 return;
1991 }
1992 if (!ii->isLinkValid())
1993 {
1994 MS_ERROR("STEP's corresponding LoadData has not yet been issued. tid=%d PA=%llx", tid, addr);
1995 return;
1996 }
1997 if (xact.atomicXact()) { // this assert is to make sure we can use xact.atomic() later
1998 if (ii->getItype() != ITYPE_ATOMIC)
1999 {
2000 MS_ERROR("STEP (postMemAcc) atomic matches non-atomic load entry. tid=%d PA=%llx", tid, addr);
2001 return;
2002 }
2003 }
2004
2005 // If Load to I/O space (PA[39]=1), Riesling will see the following
2006 // messages
2007 // LDISSUE
2008 // LDDATA
2009 // [CSR_READ] [Optional]
2010 // SSTEP
2011 //
2012 // Riesling will have to queue up the CSR_READ messages from the Bench.
2013 // When Riesling sees an SSTEP for an Load to I/O, it must first
2014 // process the LDDATA normally. Then, if the oldest CSR_READ in the
2015 // queue has a matching PA, use it to override the LDDATA value and
2016 // pop it from the queue. If oldest CSR_READ does not have a matching
2017 // PA, then use LDDATA value and do not pop from queue.
2018
2019 // xact contains data from either memory or I/O address follow-me, we
2020 // cannot tell which here.
2021
2022 //TODO so we can be in trouble here, e.g.
2023 // LDISSUE
2024 // LDDATA
2025 // STCOMMIT
2026 // SSTEP
2027 // in this case the data from memory (there is no csr_read follow-me) can
2028 // be different to the value in LDDATA, yet we cannot tell whether the
2029 // value is from follow-me or not.
2030 // xact.getData() will conduct data re-arrangement according to the
2031 // related instruction, we don't want that here, we only want the original
2032 // raw data, so use no-fault version of getData().
2033 //TODO there is no getDataNF(), is getData() good enough?
2034 uint64_t memData = xact.getData(i);
2035 // if the load entry's data comes from a store_commit entry, make sure
2036 // that entry is indeed executed, it may happen that the entry is voided
2037 // due to error injection
2038 //===> shouldn't we mark it as 'popped' in such case? 2/15/06
2039 if (ii->getLink()->getDsrc() == DSRC_L2_MEMORY) {
2040 if (!xact.followmeXact()) {
2041 // if the xact data is coming from a follow-me, don't do another
2042 // read here, we may pop out another follow-me (intended for the
2043 // next load)
2044 //ii->getLink()->setData(mab_.getL2Data(ii->getLink(), tid/NSTRANDS_PER_CORE, tid, addr, true));
2045 ii->getLink()->setData(mab_.getL2Data(ii->getLink(), tid/NSTRANDS_PER_CORE, tid, addr));
2046 }
2047 }
2048 // now we should have the correct value
2049 data = ii->getLink()->getData();
2050 uint64_t ldData = data;
2051 if (xact.followmeXact()) {
2052 //TODO if memData is not from csr_read follow-me, this is wrong,
2053 // do we have a way to tell? 10/26/05
2054 // memData from xact should already be aligned to address/size,
2055 // no need to go through align2addr() again.
2056 data = memData;
2057 }
2058 else {
2059 data = align2addr(data, sz, (addr & ~ADDR_MASK));
2060 }
2061
2062 MSYNC_DEBUG(1, "STEP (postMemAcc) cid=%d tid=%d iseq=%lld va=%#llx pa=%#llx data-ref=%#llx (aligned=%#llx) data-rtl=%#llx size=%d ---> use data=%#llx", tid/NSTRANDS_PER_CORE, tid, ii->getIseq(), rva + (i << 3), addr, memData, align2addr(memData, sz, (addr & ~ADDR_MASK)), align2addr(ldData, sz, (addr & ~ADDR_MASK)), sz, data);
2063
2064 if (!xact.noWriteXact()) {
2065 xact.setData(i, data);
2066 }
2067 else {
2068 // although the noWriteXact() indicates we must use the data in xact,
2069 // the data still has to follow aligning rules.
2070 //---> memData alignment should be handled in xact already, no
2071 // need to do it again.
2072 //xact.setData(i, align2addr(memData, sz, (addr & ~ADDR_MASK)));
2073 xact.setData(i, memData);
2074 }
2075 ii->setExecuted(true);
2076 ii->getLink()->setExecuted(true);
2077 mii = ii->getLink();
2078 ldb.erase(ii);
2079 }
2080
2081 /**********************************************************************************
2082 * The following code is to handle CAS instruction
2083 * Two issues related to the CAS:
2084 * 1. The Memory Sync model needs to know the comparison results so that it knows
2085 * which data to compare
2086 * The solution employed is 1) the StoreIssue of a CAS instruction sends r[rd]
2087 * in the <data> field, and 2) if mismatch, reset the StoreCommit size_vector
2088 * to zero.
2089 *
2090 * N1 is different. In ni environment, the atomic STEP is sent before the
2091 * STORE_COMMIT. Hence, for N1, CAS mismatch will reset
2092 * the <size_vector> of the StoreIssue to 0, rather than StoreCommit. However,
2093 * the StoreCommit will then copy this value to its field.
2094 *
2095 * 2. The Riesling does not send a second write memory transaction if the CAS
2096 * comparison results in a false.
2097 * In this case, the the executed_ bit of the StoreIssue and StoreCommit must be
2098 * set in this method. (Normally, the executed_ bit of a StoreIssue or StoreCommit
2099 * is set in preMemoryAccess() when a write memory transaction is issued by the
2100 * Riesling model.
2101 *
2102 * Therefore, in this atomic-load transaction, if the size-vector is 0, it
2103 * indicates no write transaction, and the execute_ bit will be set accordingly.
2104 **********************************************************************************/
2105 if (xact.atomicXact()) {
2106 list<LoadStoreEntry>::iterator slink;
2107 LoadStoreBuffer& stb = stb_[tid];
2108 LoadStoreBuffer& rstb = retStb_[tid];
2109
2110 /* find the corresponding StoreIssue */
2111 slink = rstb.find1stNonExe(); // must step in order
2112 if (slink == rstb.end()) {
2113 slink = stb.find1stNonExe();
2114 if (slink == stb.end())
2115 {
2116 MS_ERROR("STEP(at) failed to find non-executed StoreIssue entry (possibly DUT took trap & Riesling did not). tid=%d PA=%llx", tid, addr);
2117 return;
2118 }
2119 }
2120 if (rpa != slink->getAddr())
2121 {
2122 MS_ERROR("STEP(at) mis-matches addr with the 1st non-executed StoreIssue. tid=%d PA=%llx", tid, addr);
2123 return;
2124 }
2125 if (slink->getItype() != ITYPE_ATOMIC)
2126 {
2127 MS_ERROR("STEP(at) found 1st non-executed StoreIssue non-atomic. tid=%d PA=%llx", tid, addr);
2128 return;
2129 }
2130
2131 /* set the executed_ bit if needed */
2132#ifdef N1MODEL
2133 MSYNC_DEBUG(2, "PostMemAccess atomic sizeV=%#x", (int) slink->getSizeV());
2134 if (slink->getSizeV() == 0) { // CAS comparison is false => no write necessary
2135 slink->setExecuted(true);
2136 if (slink->isLinkValid()) {
2137 slink->getLink()->setExecuted(true);
2138 /* Fill the store part the same data written info as it load part --- TPS 9/10/04 */
2139 slink->getLink()->setData(mii->getData());
2140 slink->getLink()->setSizeV(mii->getSizeV());
2141 }
2142 }
2143#else
2144 if (!slink->isLinkValid())
2145 {
2146 MS_ERROR("STEP(at) misses StoreCommit. tid=%d PA=%llx", tid, addr);
2147 return;
2148 }
2149 if (slink->getLink()->getSizeV() == 0) { // CAS comparison is false => no write necessary
2150 slink->setExecuted(true);
2151 slink->getLink()->setExecuted(true);
2152 /* Fill the store part the same data written info as it load part --- TPS 9/10/04 */
2153 slink->getLink()->setData(mii->getData());
2154 slink->getLink()->setSizeV(mii->getSizeV());
2155 }
2156#endif
2157 MSYNC_DEBUG(2, "slink=%s", (slink->toString()).c_str());
2158 }
2159
2160 mab_.popFront(&wdbuf, tsoChecker_);
2161 /* write STORE_COMMIT data to memory */
2162 bufWriteMemory(wdbuf);
2163
2164 return;
2165}
2166
2167/* write STORE_COMMIT data to memory */
2168void
2169MemorySync::bufWriteMemory (vector<MemoryAccessEntry>& wdbuf)
2170{
2171 vector<MemoryAccessEntry>::iterator wi;
2172 int qsize = wdbuf.size();
2173 int i = 0;
2174
2175 wi = wdbuf.begin();
2176 while (i < qsize) {
2177 if (wi->getSizeV() != 0 && // (sizeV == 0) => no need to store
2178 !(wi->getItype() == ITYPE_ATOMIC && !wi->isSwitchData())) { // cas false, no need to store
2179 rif_.writeMemory(wi->getCoreId(), wi->getThrdId(), (wi->getAddr() & ADDR_MASK), wi->getData(), 8);
2180 }
2181 i++;
2182 wi++;
2183 }
2184 wdbuf.clear();
2185}
2186
2187void
2188MemorySync::setTestBenchData (uint32_t tid, uint64_t addr, uint64_t data, bool rmo)
2189{
2190 if (addrTrans)
2191 {
2192 addr = addrTrans(addr);
2193 }
2194 LoadStoreBuffer& stb = stb_[tid];
2195 LoadStoreBuffer& rmostb = rmoStb_[tid];
2196 list<LoadStoreEntry>::iterator ii;
2197
2198 if (rmo) {
2199 ii = rmostb.findNeedTDataO2Y(addr);
2200 if (ii == rmostb.end()) {
2201 ii = stb.findNeedTDataO2Y(addr);
2202 if (ii == rmostb.end())
2203 {
2204 MS_ERROR("setTestBenchData failed to find RMO entry in (RMO)STB. tid=%d PA=%llx", tid, addr);
2205 return;
2206 }
2207 if (!ii->isRMOstore())
2208 {
2209 MS_ERROR("setTestBenchData expected match-address entry in STB to be RMO. tid=%d PA=%llx", tid, addr);
2210 return;
2211 }
2212 }
2213 } else {
2214 ii = stb.findNeedTDataO2Y(addr);
2215 if (ii == stb.end())
2216 {
2217 MS_ERROR("setTestBenchData failed to find entry in (RMO)STB. tid=%d PA=%llx", tid, addr);
2218 return;
2219 }
2220 }
2221
2222 if (ii->isDataValid()) {
2223 // can only be RDATA, Reference Model data set in STEP
2224 if (ii->getState() != LS_RDATA)
2225 {
2226 MS_ERROR("The entry is not in LS_RDATA state");
2227 return;
2228 }
2229 uint64_t mask = byteMask(ii->getVbyte());
2230 if ((data & mask) != (ii->getData() & mask)) {
2231 MS_ERROR (" (Store Data) cid=%d tid=%d pa=%#llx data-ref=%#llx data-rtl=%#llx mask=%#x",
2232 ii->getCoreId(), ii->getThrdId(), ii->getAddr(),
2233 ii->getData(), data, mask);
2234 return;
2235 }
2236 } else {
2237 ii->setData(data);
2238 /* Does the following statements needed for N1, definitely not needed for N2? TPS, 6/16/04 */
2239// if (ii->isLink2Valid()) {
2240// ii->getLink2()->setData(data);
2241// }
2242 }
2243 ii->setState(LS_TDATA);
2244}
2245
2246/* assume data is aligned to 8 byte chunk, the result ha
2247 1. data shift to the least significant bytes, also
2248 2. make the higher byte outside the size range 0, which is
2249 required when this function is used in postMemoryAccess()
2250 and provide data back to Riesling Reference Model since
2251 Riesling zero out all bytes not in the request data size
2252 range.
2253*/
2254uint64_t
2255MemorySync::align2addr (uint64_t data, uint8_t size, uint32_t addr_offset)
2256{
2257 uint64_t result;
2258 int sft;
2259
2260 switch (size) {
2261 case 8:
2262 assert ((addr_offset & 0x7ULL) == 0);
2263 result = data;
2264 break;
2265 case 4:
2266 assert ((addr_offset & 0x3ULL) == 0);
2267 sft = 32 - ((addr_offset & 0x4ULL) << 3);
2268 result = (data >> sft) & 0xffffffffULL;
2269 break;
2270 case 2:
2271 assert ((addr_offset & 0x1ULL) == 0);
2272 sft = 48 - ((addr_offset & 0x6ULL) << 3);
2273 result = (data >> sft) & 0xffffULL;
2274 break;
2275 case 1:
2276 sft = 56 - (addr_offset << 3);
2277 result = (data >> sft) & 0xffULL;
2278 break;
2279 default:
2280 assert(0);
2281 result = data;
2282 break;
2283 }
2284 return (result);
2285}
2286
2287/* assume data is at the least significant bytes */
2288uint64_t
2289MemorySync::align8byte (uint64_t data, uint8_t size, uint32_t addr_offset)
2290{
2291 uint64_t result;
2292 int sft;
2293
2294 switch (size) {
2295 case 64:
2296 case 16:
2297 case 8:
2298 assert ((addr_offset & 0x7ULL) == 0);
2299 result = data;
2300 break;
2301 case 4:
2302 assert ((addr_offset & 0x3ULL) == 0);
2303 sft = 32 - ((addr_offset & 0x4ULL) << 3);
2304 result = data << sft;
2305 break;
2306 case 2:
2307 assert ((addr_offset & 0x1ULL) == 0);
2308 sft = 48 - ((addr_offset & 0x6ULL) << 3);
2309 result = data << sft;
2310 break;
2311 case 1:
2312 sft = 56 - (addr_offset << 3);
2313 result = data << sft;
2314 break;
2315 case 0:
2316 result = data;
2317 break;
2318 default:
2319 assert(0);
2320 result = data;
2321 break;
2322 }
2323 return (result);
2324}
2325
2326uint64_t
2327MemorySync::merge (uint64_t todata, uint64_t fromdata, uint8_t mgvec)
2328{
2329 uint64_t data;
2330 uint64_t byteMask1 = byteMask(~mgvec);
2331 uint64_t byteMask2 = byteMask(mgvec);
2332 data = (todata & byteMask1) | (fromdata & byteMask2);
2333
2334 MSYNC_DEBUG(4, "merge: todata=0x%llx fdata=0x%llx merge=0x%x result=0x%llx",
2335 todata, fromdata, (int) mgvec, data);
2336
2337 return (data);
2338}
2339
2340
2341uint64_t
2342MemorySync::byteMask(const uint8_t vbyte) {
2343 uint64_t mask = 0ull;
2344 uint8_t bitSelector = 0x80; // 10000000
2345
2346 for (int i = 0; i < 8; i++) {
2347 mask = mask << 8;
2348 mask = ((vbyte & bitSelector) == 0) ? mask : mask | 0xffull;
2349 bitSelector >>= 1;
2350 }
2351 // cerr << "byteMask: in=0x" << hex << (int) vbyte << " out=0x" << hex << mask << endl;
2352 return (mask);
2353}
2354
2355uint64_t
2356MemorySync::getL1Data(LoadStoreCmd& cmd)
2357{
2358 list<MemoryAccessEntry>::iterator mae;
2359
2360 mae = mab_.findL1DataEntry(cmd);
2361 if (mae == mab_.end()) {
2362 return (rif_.readMemory(cmd.getCoreId(), cmd.getThrdId(), cmd.getAddr() & ADDR_MASK, 8)); // read aligned 8 bytes
2363 } else {
2364 return (mae->getData());
2365 }
2366}
2367
2368uint64_t
2369MemorySync::getL2Data(list<MemoryAccessEntry>::iterator from, LoadStoreCmd& cmd)
2370{
2371 list<MemoryAccessEntry>::iterator mae;
2372
2373 mae = mab_.findL2DataEntry(from, cmd);
2374 if (mae == mab_.end()) {
2375 return (rif_.readMemory(cmd.getCoreId(), cmd.getThrdId(), cmd.getAddr() & ADDR_MASK, 8)); // read aligned 8 bytes
2376 } else {
2377 return (mae->getData());
2378 }
2379}
2380
2381uint64_t
2382MemorySync::getL1Instr(LoadStoreCmd& cmd)
2383{
2384 list<MemoryAccessEntry>::iterator mae;
2385 uint32_t tid = cmd.getThrdId();
2386
2387 mae = mab_.findL1InstrEntry(cmd);
2388 if (mae == mab_.end()) {
2389 return (rif_.readMemory(cmd.getCoreId(), cmd.getThrdId(), cmd.getAddr() & ADDR_MASK, 8)); // read aligned 8 bytes
2390 } else {
2391 return (mae->getData());
2392 }
2393}
2394
2395//=============================================================================
2396//=============================================================================
2397void
2398MemorySync::nullMsyncCallback()
2399{
2400 // turn off msync related callbacks
2401 MemorySyncMessage::skipCallback = 1;
2402}
2403
2404//=============================================================================
2405//=============================================================================
2406void
2407MemorySync::flushMsyncCallback(int tid)
2408{
2409 MSYNC_DEBUG(1, "flushMsyncCallback( tid=%d )", tid);
2410
2411 ldb_[tid].empty();
2412
2413 //TODO not sure if we should empty the following buffers as well
2414 //stb_[tid].empty();
2415 //ifb_[tid].empty();
2416
2417 mab_.empty(tid);
2418 // we rely on the entry in MAB to determine which entry needs to be removed
2419 // from retStb and rmoStb, so the "mab_.empty" must come first.
2420 retStb_[tid].markPop();
2421 rmoStb_[tid].markPop();
2422}
2423
2424//=============================================================================
2425//=============================================================================
2426void
2427MemorySync::handleLoadPop(int tid)
2428{
2429 MSYNC_DEBUG(1, "handleLoadPop( MEM_LD_POP, tid=%d )", tid);
2430
2431 list<LoadStoreEntry>::iterator ii = ldb_[tid].queryBack();
2432 if (ii == ldb_[tid].end())
2433 {
2434 MS_ERROR("No load entry to be popped");
2435 return;
2436 }
2437 int count;
2438 switch ((ii)->getItype()) {
2439 case ITYPE_BLOCK_LOAD:
2440 count = 8;
2441 break;
2442 case ITYPE_QUAD_LOAD:
2443 count = 2;
2444 break;
2445 default:
2446 count = 1;
2447 break;
2448 }
2449
2450 if (count > ldb_[tid].size())
2451 {
2452 MS_ERROR("Load buffer does not have enough entries to be popped, ldb.size=%d pop=%d", ldb_[tid].size(), count);
2453 return;
2454 }
2455
2456 ldb_[tid].popBack(count);
2457 mab_.popBack(tid, count);
2458}
2459
2460//=============================================================================
2461//=============================================================================
2462void
2463MemorySync::handleStorePop(int tid)
2464{
2465 MSYNC_DEBUG(1, "handleStorePop( MEM_ST_POP, tid=%d )", tid);
2466
2467 list<LoadStoreEntry>::iterator ii = stb_[tid].queryBack();
2468 if (ii == stb_[tid].end())
2469 {
2470 MS_ERROR("No store entry to be popped");
2471 return;
2472 }
2473 int count;
2474 switch ((ii)->getItype()) {
2475 case ITYPE_BLOCK_STORE:
2476 count = 8;
2477 break;
2478 default:
2479 count = 1;
2480 break;
2481 }
2482
2483 if (count > stb_[tid].size())
2484 {
2485 MS_ERROR("Store buffer does not have enough entries to be popped, stb.size=%d pop=%d", stb_[tid].size(), count);
2486 return;
2487 }
2488
2489 stb_[tid].popBack(count);
2490 //TODO do we need to pop store entries in MAB?
2491}
2492
2493/******************************************************************************
2494 compare a 64-byte data block in memory with the data provided by testbench,
2495 throw error if miscompare.
2496******************************************************************************/
2497void
2498MemorySync::handleMemoryCheck(uint64_t paddr, uint64_t* data, int dataSize)
2499{
2500 if (addrTrans)
2501 {
2502 paddr = addrTrans(paddr);
2503 }
2504 if ((paddr % (dataSize*8)) != 0)
2505 {
2506 MS_ERROR("paddr=%#llx is not %d-byte aligned", paddr, (dataSize*8));
2507 return;
2508 }
2509
2510 for (int i = 0; i < dataSize; i++) {
2511 // DMA_STORE always goes through msync, so MEM_CHECK should go through
2512 // msync to find matched entry.
2513 //uint64_t memData = rif_.readMemory(0, 0, (paddr+8*i), 8);
2514 uint64_t memData = mab_.getL2Data(mab_.end(), 0, 0, (paddr+8*i), true);
2515 if (data[i] != memData)
2516 {
2517 MS_ERROR("memory-check mismatch at addr=%#llx, RTL=%#llx, riesling=%#llx", (paddr+8*i), data[i], memData);
2518 return;
2519 }
2520 }
2521}
2522
2523//=============================================================================
2524//=============================================================================
2525void
2526MemorySync::flushAll()
2527{
2528 // flush out all entries
2529 mab_.empty();
2530 for (int i = 0; i < MAX_STRANDS; i++)
2531 {
2532 ldb_[i].empty();
2533 stb_[i].empty();
2534 retStb_[i].empty();
2535 rmoStb_[i].empty();
2536 ifb_[i].empty();
2537 }
2538}
2539
2540//=============================================================================
2541// sniper is used by testbench to generate load/store traffic from
2542// simulated core(s), the goal is to test multi-core load/store traffic
2543// before multi-core RTL is available. The faked load/store traffic will
2544// be communicated with riesling through pli-msync commands. With real
2545// load/store, there are real load/store instructions to match/process them,
2546// but that won't be the case for sniper-generated load/store, so a new pli
2547// command SSTEP_SNIPER is added, it will be used to match up sniper-generated
2548// load/store events in msync, the SSTEP_SNIPER command will not cause any
2549// cpu architecture state change.
2550//=============================================================================
2551void MemorySync::handleSniper(int tid, uint64_t addr, INSTR_TYPE itype, uint64_t data)
2552{
2553 MSYNC_DEBUG(4, "SNIPER: STEP (postMemAcc) tid=%d pa=%#llx itype=%d data=%#llx", tid, addr, (int)itype, data);
2554
2555 if ((itype == ITYPE_LOAD) ||
2556 (itype == ITYPE_BLOCK_LOAD) ||
2557 (itype == ITYPE_DOUBLE_LOAD) ||
2558 (itype == ITYPE_QUAD_LOAD) ||
2559 (itype == ITYPE_ATOMIC))
2560 {
2561 list<LoadStoreEntry>::iterator ii;
2562 LoadStoreBuffer& ldb = ldb_[tid];
2563 if ((addr & IO_ADDR_BIT_MASK) == IO_ADDR_BIT_MASK)
2564 { // IO address, IO can be out-of-order
2565 ii = ldb.find1stNonExeMatchedAddr(addr);
2566 }
2567 else
2568 {
2569 ii = ldb.find1stNonExe();
2570 }
2571 MSYNC_DEBUG (4, "ldb=%s", ldb.toString().c_str());
2572 if (ii == ldb.end())
2573 {
2574 MS_ERROR("SNIPER: STEP failed to find LoadIssue entry (possibly DUT took trap & Riesling did not). tid=%d PA=%llx", tid, addr);
2575 return;
2576 }
2577 // Note that RTL does not check load address, MemorySync performs
2578 // this additional check for completion
2579 if (addr != ii->getAddr())
2580 {
2581 MS_ERROR("SNIPER: STEP's address mismatches with the 1st non-executed Load. tid=%d PA=%llx", tid, addr);
2582 return;
2583 }
2584 if (!ii->isLinkValid())
2585 {
2586 MS_ERROR("SNIPER: STEP's corresponding LoadData has not yet been issued. tid=%d PA=%llx", tid, addr);
2587 return;
2588 }
2589 if (itype == ITYPE_ATOMIC)
2590 {
2591 // this assert is to make sure we can check atomic later
2592 if (ii->getItype() != ITYPE_ATOMIC)
2593 {
2594 MS_ERROR("SNIPER: STEP (postMemAcc) atomic matches non-atomic load entry. tid=%d PA=%llx", tid, addr);
2595 return;
2596 }
2597 }
2598 if (ii->getLink()->getDsrc() == DSRC_L2_MEMORY)
2599 {
2600 ii->getLink()->setData(mab_.getL2Data(ii->getLink(), tid/NSTRANDS_PER_CORE, tid, addr));
2601 }
2602 // msync data does not match the one provided by testbench
2603 if (ii->getData() != data)
2604 {
2605 MS_ERROR("SNIPER: load data (%#llx) mismatches with testbench data (%#llx). tid=%d PA=%llx", ii->getData(), data, tid, addr);
2606 return;
2607 }
2608 ii->setExecuted(true);
2609 ii->getLink()->setExecuted(true);
2610 list<MemoryAccessEntry>::iterator mii = ii->getLink();
2611 ldb.erase(ii);
2612
2613 if (itype == ITYPE_ATOMIC)
2614 {
2615 list<LoadStoreEntry>::iterator slink;
2616 LoadStoreBuffer& stb = stb_[tid];
2617 LoadStoreBuffer& rstb = retStb_[tid];
2618
2619 // find the corresponding StoreIssue
2620 slink = rstb.find1stNonExe(); // must step in order
2621 if (slink == rstb.end())
2622 {
2623 slink = stb.find1stNonExe();
2624 if (slink == stb.end())
2625 {
2626 MS_ERROR("SNIPER: STEP(at) failed to find non-executed StoreIssue entry (possibly DUT took trap & Riesling did not). tid=%d PA=%llx", tid, addr);
2627 return;
2628 }
2629 }
2630 if (addr != slink->getAddr())
2631 {
2632 MS_ERROR("SNIPER: STEP(at) mis-matches addr with the 1st non-executed StoreIssue. tid=%d PA=%llx", tid, addr);
2633 return;
2634 }
2635 if (slink->getItype() != ITYPE_ATOMIC)
2636 {
2637 MS_ERROR("SNIPER: STEP(at) found 1st non-executed StoreIssue non-atomic. tid=%d PA=%llx", tid, addr);
2638 return;
2639 }
2640 if (!slink->isLinkValid())
2641 {
2642 MS_ERROR("SNIPER: STEP(at) misses StoreCommit. tid=%d PA=%llx", tid, addr);
2643 return;
2644 }
2645 if (slink->getLink()->getSizeV() == 0)
2646 {
2647 // CAS comparison is false => no write necessary
2648 slink->setExecuted(true);
2649 slink->getLink()->setExecuted(true);
2650 // Fill the store part the same data written info as it load part
2651 // --- TPS 9/10/04
2652 slink->getLink()->setData(mii->getData());
2653 slink->getLink()->setSizeV(mii->getSizeV());
2654 }
2655 MSYNC_DEBUG(2, "slink=%s", (slink->toString()).c_str());
2656 }
2657 // process completed entries in MAB, write STORE_COMMIT data to memory
2658 // if ready
2659 vector<MemoryAccessEntry> wdbuf;
2660 mab_.popFront(&wdbuf, tsoChecker_);
2661 bufWriteMemory(wdbuf);
2662 }
2663
2664 if ((itype == ITYPE_STORE) ||
2665 (itype == ITYPE_BLOCK_STORE) ||
2666 (itype == ITYPE_STORE_INIT) ||
2667 (itype == ITYPE_ATOMIC))
2668 {
2669 list<LoadStoreEntry>::iterator ii;
2670 LoadStoreBuffer& stb = stb_[tid];
2671 LoadStoreBuffer& rstb = retStb_[tid];
2672 LoadStoreBuffer& rmostb = rmoStb_[tid];
2673 LoadStoreBuffer& ret_or_rmo_stb = (itype == ITYPE_ATOMIC) ? rstb : rmostb;
2674 MSYNC_DEBUG(2, "SNIPER: rstb=%s", rstb.toString().c_str());
2675 MSYNC_DEBUG(2, "SNIPER: stb=%s", stb.toString().c_str());
2676 MSYNC_DEBUG(2, "SNIPER: rmostb=%s", rmostb.toString().c_str());
2677 ii = ret_or_rmo_stb.find1stNonExe();
2678 if (ii == ret_or_rmo_stb.end())
2679 {
2680 ii = stb.find1stNonExe();
2681 if (ii == stb.end())
2682 {
2683 if (itype == ITYPE_ATOMIC)
2684 {
2685 MS_ERROR("SNIPER: STEP (store part of an atomic instr) failed to find match StoreIssue in STB. tid=%d PA=%llx", tid, addr);
2686 return;
2687 }
2688 else
2689 {
2690 MS_ERROR("SNIPER: STEP failed to find match StoreIssue in STB. tid=%d PA=%llx", tid, addr);
2691 return;
2692 }
2693 }
2694 }
2695 if ((addr & ADDR_MASK) != (ii->getAddr() & ADDR_MASK))
2696 {
2697 MS_ERROR("SNIPER: STEP's address mismatches with 1st non-executed StoreIssue entry. tid=%d PA=%llx", tid, addr);
2698 return;
2699 }
2700 ii->setExecuted(true);
2701 if (ii->isLinkValid())
2702 {
2703 (ii->getLink())->setExecuted(true);
2704 }
2705 if (ii->isRMOstore() && ii->isLinkValid())
2706 {
2707 rmostb.erase(ii);
2708 }
2709 vector<MemoryAccessEntry> wdbuf;
2710 mab_.popFront(&wdbuf, tsoChecker_);
2711 // write STORE_COMMIT data to memory
2712 bufWriteMemory(wdbuf);
2713 }
2714}
2715
2716void MemorySync::setCoreEnable(int node_index, uint64_t data)
2717{
2718 uint8_t bits = 0;
2719 for (int i=0; i<8; i++)
2720 {
2721 if (data & 0xff)
2722 {
2723 bits |= (0x1 << i);
2724 }
2725 data = data >> 8;
2726 }
2727 inv_vec_mask[node_index] = bits;
2728}