sam-t2/sam/cpus/vonk/ss/api/memsync/src/MemorySync.h

/*
* ========== Copyright Header Begin ==========================================
*
* OpenSPARC T2 Processor File: MemorySync.h
* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
*
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*
* ========== Copyright Header End ============================================
*/
#ifndef _MEMORYMODEL_H
#define _MEMORYMODEL_H
/************************************************************************
**
**  Copyright (C) 2002, Sun Microsystems, Inc.
**
**  Sun considers its source code as an unpublished, proprietary
**  trade secret and it is available only under strict license provisions.
**  This copyright notice is placed here only to protect Sun in the event
**  the source is deemed a published work. Disassembly, decompilation,
**  or other means of reducing the object code to human readable form
**  is prohibited by the license agreement under which this code is
**  provided to the user or company in possession of this copy."
**
*************************************************************************/
#include <iostream>
#include <vector>

#include "MemoryTransaction.h"
#include "MemorySyncDefs.h"
#include "LoadStoreCmd.h"
#include "MemoryAccessBuffer.h"
#include "LoadStoreBuffer.h"
#include "RieslingInterface.h"
#include "MemorySyncMessage.h"
#include "TsoChecker.h"
#include "SS_PliSocket.h"

// using namespace Tso;

  /**
   * MemorySync class extends Riesling's memory to provide Riesling same
   * memory images as RTL-TestBench
   * <p>
   * When RTL and Riesling perform lock-step simulation, RTL and Riesling
   * may see different data, especially for multi-thread microprocessors.
   * For example, considering the following STEP execution sequence
   * <UL>
   *   <LI> (1) TO stores to address A (where T0 means thread 0)
   *   <LI> (2) TO reads address A
   *   <LI> (3) T1 reads address A     (where T1 means thread 1)
   * </UL>
   * Assume all load/store are are normal load/store, and not to IO address
   * space. Then, it is pretty sure that instruction (2) will read data
   * written by (1). However, this is not necessary true for (3). If the store
   * data is still in the store buffer of thread 0, then instruction (3) will
   * not see this data. Without any other help, Riesling will always read
   * data written by (1), which is wrong in this scenario.
   * <p>
   * Suppose in the above sequence the T0 and T1 are replaced with C0 (core 0)
   * and C1 (core 1):<br>
   * (Assume every core has its own D$)
   * <UL>
   *    <LI> (1) CO stores to address A
   *    <LI> (2) CO reads address A
   *    <LI> (3) C1 reads address A
   *  </UL>
   * Similarly, it cannot tell which data (3) will get based on just the sequence.
   * Riesling, without any help, will agian always get data from (1). However,
   * it is possible (3) gets data from its own D$ which still contains data
   * before (1). (The execution of (1) will send an invalidation to core 1,
   * but it may not yet reach to core 1's D$ before (3) is executed.)
   * <p>
   * This MemorySync model is designed to provide Riesling the same memory image
   * as the RTL. To do so, three things are needed
   * - data structures to maintain transient data,
   * - mechanism to map STEP (instruction execution by Riesling) to right data,
   * - APIs to RTL to guide data flow
   * Riesling can see this consistent memory view by not accessing memory directly,
   * instead it should access memory through this model.
   * <p>
   * This module models 4 essential data structures: fetch buffer, load buffer,
   * store buffer, and memory access buffer. The fetch buffer, load buffer, and
   * and store buffer are created for each thread; while the memory access buffer
   * is shared among all threads.
   * <p>
   * The memory access buffer (MAB) maintains the global data access order, as
   * well as the memory image at the moment the related command is issued. The
   * memory images maintained include per core L1$ content and global L2$ and
   * Memory content. The L2$ and physical memory images are considered the same
   * because (1) the model assumes L2$ is the first memory component shared by
   * all of the cores, and (2) a write committed to L2$ is observable immediately
   * by all of them.
   * <p>
   * The first condition indicates that all (cacheable) accesses have to access
   * L2$ first, which guarantees consistency between L2 and memory. (Strictly
   * speaking, we should also assume that cacheable data cannot be changed to
   * non-cacheable data to avoid different image between L2 and memory.)
   * The second condition indicates a read from L2$ after a write to L2$ must
   * get the data written by the previous write. This is to guarantee there
   * exists only one L2 image. If any of these conditions is violated, then
   * the model may need to be revised.
   * <p>
   * Data is stored in the <data> field of an MAB entry, and the type indicates
   * which image it maintains. The StoreCommit entry contains data for L2/Memory,
   * the LoadFill/StoreUpdate entry contains data for the D$, and the FetchFill
   * type contains data for the I$. L1$ image needs to be presented as unit of
   * a cache line. Therefore, each LoadFill/FetchFill command create multiple
   * entries in the MAB since the data size of a MAB entry is limited to 8 bytes.
   * L2$ image does not have this need since it is actually the same as memory.
   * If an access supposed to get data from StoreCommit entry and cannot find
   * any match in the MAB, then it can just get data from the (Riesling) memory.
   * <p>
   * The fetch buffer has an entry associated with every intruction fetch. The
   * order of these entries is the same as the instruction fetch order. A link
   * pointing to the corresponding FetchData in MAB is set when the FetchData
   * command is received and executed.
   * <p>
   * When an instruction is about to be executed, Riesling calls MemorySync::
   * postMemoryAccess() in order to obtain the instruction word. This callback
   * method maps the instruction with one of the entries in the fetch buffer.
   * Then, via the link of that entry, the method gets the instruction word
   * from the FetchData in the MAB, and assigns that back to the Riesling.
   * In matching the instruction with the fetch buffer entry, the model makes
   * the assumption that intructions are executed in the same order as the
   * instruction fetch. However, the model allows speculative fetches. Hence,
   * the matching is done by looking at the oldest, non-executed, and same PC
   * entry in the fetch buffer. All non-executed FetchIssue entries before this
   * matched one are considered as speculative fetch operations, and are thus
   * removed.
   * <p>
   * Similar to the fetch buffer, the load buffer has an entry associated with
   * every load instruction. But, unlike the fetch buffer, entries in the load
   * buffer must have a one-to-one mapping with load instructions. In addition,
   * the order in the load buffer must be in the program order as the LoadIssue
   * commands are required to be sent in the program order. Also similar to the
   * fetch buffer entry, a link pointing to the corresponding LoadData is set
   * when the LoadData command is received and executed.
   * <p>
   * When a load is executed, Riesling again calls MemorySync::postMemoryAccess()
   * to obtain its load data. This callback method maps the instruction with one
   * of the entries in the load buffer. Then, via the link of that entry, it gets
   * the data from the LoadData in the MAB, and assigns that back to the Riesling.
   * In matching the instruction with the fetch buffer entry, the model makes the
   * assumption that the Riesling is requested (by the RTL) to execute the load
   * instructions in the program order, except for I/O access, which is allowed
   * to be executed out-of-order relative to other loads. As a result, the method
   * maps the load instruction to the first non-executed entry in the load buffer
   * unless it is an I/O access, in which address is also considered.
   * <p>
   * Note that the condition that requires load instruction be executed in program
   * order in Riesling can be relaxed to the condition that requires only loads
   * accessing to the same 8-byte block be executed in the program order with minor
   * program modification. (Refer to the algorithm that matches the LoadData with
   * LoadIssue in MemorySync::handleLoadData().)
   * <p>
   * The store buffer also has an entry associated with every store. Difference
   * from fetch buffer and load buffer entry is that the store buffer entry also
   * has <data> field and a <byte_mask> indicating which byte is valid. When a
   * StoreCommit command is received by the module, it merges the data of the
   * matched store buffer entry with the current L2/Memory image of that 8-byte
   * block. The merged data is then saved in the StoreCommit entry in the MAB.
   * <p>
   * When a store is executed, Riesling calls MemorySync::preMemoryAccess(),
   * and pass the data to be written by it. The method compares the data with
   * the data in the model's store buffer, which represents RTL's data, and
   * reports error if happens.
   * The assumption made to map the store to the store buffer entry is that
   * the stores are executed in program order. So, the match simply to find
   * the first non-executed StoreIssue entry.
   * <p>
   * To simplify design and to handle some special cases, the store buffer is
   * actually modeled by three differnt data structures: stb_, rmoStb_ (rmo
   * store buffer), and retStb_ (retired store buffer). All StoreIssue commands
   * first create entries in the stb_. Upon receiving its StoreAck command, the
   * entry is moved to either rmoStb_ (if RMO stores) or retStb_ (otherwise).
   * As a result, the stb_ is suppose to contain the same store instructions as
   * RTL's store buffer so that the store buffer bypassing search can just search
   * the stb_. However, in some case, such as the current Niagara 1 set-up, due
   * to the difficulty to get the exact time, the RTL may still allows bypassing
   * even after its StoreAck is issued. To tolerate such situation, normal stores
   * are moved to retStb_ on StoreAck. Store buffer bypassing search then also
   * searches this buffer if none is found matched in the stb_.
   * <p>
   * The rmoStb_ is introduced to solve another issue. By definition, RMO stores
   * are not required to perform dependence check, and are only required to obey
   * the RMO memory consistency protocol. Hence, a RMO store can be issued (from
   * store buffer) to L2 without waiting for the commit of a prior store. It is
   * removed from the store buffer when it is issued. Therefore, its StoreAck is
   * sent before StoreCommit; while normal stores have StoreAck after StoreCommit.
   * The rmoStb_ is designed to hold the RMO StoreIssue entries when they are
   * removed from stb_ so that the instruction-data mapping is possible.
   * RMO Stores include block stores and stores using *_BLOCK_INIT_ST_QUAD_LDD_*.
   * <p>
   * Except for the assumptions describe above, the model also has some other
   * assumption due to the implementation and testbench issue. This paragraph
   * summarizes all assumptions here:
   * <OL>
   * <LI> L2$ is the first memory component shared by all cores
   * <LI> a write committed to L2$ is observable immediately by all of the cores.
   * <LI> cacheable data cannot be changed to noncacheable data during the test
   * <LI> intructions are executed in the same order as the instruction fetch
   *      (but allows speculative fetches)
   * <LI> load instructions are executed in the program order, except for I/O
   * <LI> store instructions are executed in the program order
   * <LI> LoadData to the same 8-byte block is in the program order
   * <LI> StoreCommit to the same 8-byte block is in the program order
   * <LI> no STB bypassing of the rmo store data is possible
   * </OL>
   * <p>
   * The design attempts to be microarchitecture-independent. The microarchitecture
   * dependent part is parameterized. These microarchitecture dependent parameters
   * are either defined in MemorySyncDefs.h or passed as arguments when creating
   * the MemorySync object. These parameters include:
   * - system type (N1, N2, ...)
   * - number of cores per chip,
   * - number of threads per core,
   * - D$ line size,
   * - I$ line size
   * <p>
   * @see LoadStoreCmd.h
   * @see LoadStoreBuffer.h
   * @see MemoryAccessBuffer.h
   * @see MemorySyncDefs.h
   */

  class MemorySync{

  public:
      static MemorySync* msyncObj;
      static std::string dumpBuffers();

  public:
    /**
     * Default constructor
     */
    MemorySync();

    /**
     * Constructor
     * @memDebug debug level
     */
    MemorySync(int max_strands, int strands_per_core, int cores_per_cpu, int memDebug, int tsoChecker=1, int callback=1);

    /**
     * Copy constructor
     *
     * @param orig The MemorySync object to copy.
     */
    MemorySync( const MemorySync &orig );

    /**
     * Destructor
     */
    virtual ~MemorySync();

    /**
     * Equality operator
     *
     * @param rhs The right hand side of the equality operator
     * @return Return true if this objec and rhs are equal,
     * otherwise return false
     */
    bool operator==( const MemorySync &rhs ) const;

    /**
     * Assignment operator
     *
     * @param rhs The right hand side of the assignment operator.
     * @return The lvalue of the assignment.
     */
    const MemorySync & operator=( const MemorySync &rhs );

    /**
     * Return a string representation of this MemorySync object.
     */
    std::string toString() const;

    /**
     * return a pointer to the (global) memory access buffer
     */
    MemoryAccessBuffer* getMABPtr () { return &mab_; }

    /**
     * return a pointer to a specified strand's (thread's) load buffer
     * @param  tid thread id
     * @return pointer to the load buffer of the specified thread, ldb_[tid]
     */
    LoadStoreBuffer* getLDBPtr (int tid) { return &ldb_[tid]; };

    /**
     * return a pointer to a specified thread's store buffer
     * @param  tid thread id
     * @return pointer to the store buffer of specified tread, stb_[tid]
     */
    LoadStoreBuffer* getSTBPtr (int tid) { return &stb_[tid]; };

    /**
     * return a pointer to a specified thread's store buffer
     * @param  tid thread id
     * @return pointer to the store buffer of specified tread, stb_[tid]
     */
    LoadStoreBuffer* getRSTBPtr (int tid) { return &retStb_[tid]; }

     /**
     * assign data to StoreIssue entry
     * @param tid thread id
     * @param addr address
     * @param data data to be written
     * @param rmo  indicates if this is for a rmo store
     */
    void setTestBenchData(uint32_t tid, uint64_t addr, uint64_t data, bool rmo);

     /**
     * remove a StoreIssue entry from the specified thread's retired store buffer
     * @param tid thread id
     * @param addr address
     */
    void removeRetiredStore (uint64_t addr, uint32_t tid);


    /**
     * Executes LoadIssue MemorySync API command
     * <p>
     * Depending on the instruction type, this method pushes 1-8 entries into the
     * load buffer of that thread: <br>
     * - Block-Load, which accesses 64 bytes, results in 8 entries; <br>
     * - Quad-Load, which accesses 16 bytes, results in 2 entries;  <br>
     * - normal load, which accesses 1-8 bytes, results in 1 entry.
     * <p>
     * In N2, this method has to figure out whether the data source is store buffer
     * or not. If it is, then the result will be used for LoadData sent later.
     * <p>
     * @param cmd a LoadStoreCmd object reference that contains the info of the LoadIssue
     */
    void handleLoadIssue(LoadStoreCmd& cmd);

    /**
     * Executes LoadData MemorySync API command
     * <p>
     * Depending on the instruction type, this method pushes 1 or 2 (only for N2 so far)
     * entries into the global memory access buffer. In addition, it accesses data for
     * each entry according to the data source specified in the command. Note that the
     * MemorySync model for N2 actually has to figure out if the data source is store
     * buffer or not by itself.
     * <p>
     * If the data source is store buffer, then data should get from the store buffer
     * of that thread <br>
     * If the data source is L1, then data comes from LoadFill/StoreUpdate of the MAB,
     * or, if not found, from Riesling memory <br>
     * If the data source is L2/Memory, then data comes from StoreCommit of the MAB,
     * or, if not found, from Riesling memory <br>
     * <p>
     * @param cmd a LoadStoreCmd object reference that contains the info of the LoadData
     */
    void handleLoadData(LoadStoreCmd& cmd);

    /**
     * Executes LoadFill MemorySync API command
     * <p>
     * This method pushes DCACHE_LINE_SIZE/8 entries into the global memory access buffer.
     * Each entry is filled with data at the moment the corresponding LoadData is issued.
     * <p>
     * Note that while searching for the matched LoadData entry, the matched one should
     * also have cacheL1 set. However, N2 does not specify cacheL1 flag in its LoadData
     * command. It requires the MemorySync model follow the assumption that every thread
     * can have only one outstanding load, i.e., the LoadData that results in the LoadFill
     * must be the only non-executed LoadData entry of that thread.
     * <p>
     * @param cmd a LoadStoreCmd object reference that contains the info of the LoadFill
     */
    void handleLoadFill(LoadStoreCmd& cmd);

    /**
     * Executes StoreIssue MemorySync API command
     * This method pushes one entry for each of this command into its store buffer.
     * Although <data> is expected, some environment may not have data ready when
     * this command is issued. In this case, another method setTestBenchData() can
     * be used to set the data in the store buffer.
     * <p>
     * @param cmd a LoadStoreCmd object reference that contains the info of the StoreIssue
     */
    void handleStoreIssue(LoadStoreCmd& cmd);

    /**
     * Executes StoreCommit MemorySync API command
     * This method merges the corresponding StoreIssue data with the L2/momory data
     * at the time when the StoreCommit is issued. The merged data is stored in the
     * StoreCommit entry in the MAB, which represents the L2/Memory image at that
     * time. Usually, a StoreCommit has one matched entry in the store buffer and
     * results in one entry in MAB. One exception of this is the BLK_INIT store.
     * If satisfying certain conditions, a BLK_INIT store can zero a L2 cache line.
     * In this case, the model inserts L2CACHE_LINE_SIZE/8 entries into MAB with 0
     * in their <data> fields, except for the one that has data written by this
     * instruction.
     * <p>
     * @param cmd a LoadStoreCmd object reference that contains the info of the StoreCommit
     */
    void handleStoreCommit (LoadStoreCmd& cmd) ;

    /**
     * Executes StoreAck MemorySync API command
     * This method move the matched StoreIssue in the stb_ into either retStb_ (for
     * normal stores) or rmoStb_ (for RMO stores).
     * <p>
     * @param cmd a LoadStoreCmd object reference that contains the info of the StoreAck
     */
    void handleStoreAck (LoadStoreCmd& cmd);

    /**
     * Executes StoreInv MemorySync API command
     * This method notifies the StoreCommit that cuases the StoreInv that a StoreInv
     * is performed.
     * <p>
     * The function of StoreInv command is to prevent the StoreCommit that initiates
     * the StoreInv from retiring from the Memory Aceess Buffer (MAB) too early. A
     * StoreCommit cannot be removed from the MAB until it reaches to the head of the
     * MAB and all corresponding StoreInv and StoreUpdate are done. If the latter
     * condition is violate, then correct data may not be obtained. Considering the
     * following sequence:
     *   - C0T0 StoreCommit A, inv=8'b00000010
     *   - C0T1 LoadData    A (L1 hit)
     *   - C1Tx StoreInv    A
     * If the first StoreCommit is retired from the MAB without waiting for the
     * StoreInv completes, then since the MAB StoreCommit retirement also results in
     * writing its data to the Riesling memory, the LoadData would read StoreCommit's
     * data, which is wrong. The LoadData should get data before the StoreCommit.
     *
     * The implementation also creates an entry for the StoreInv in the MAB after it
     * notifies the StoreCommit it is done. This is actually not necessary, doing so
     * allows us examine the command sequence in MAB, which helps debug.
     * <p>
     * @param cmd a LoadStoreCmd object reference that contains the info of the StoreInv
     */
    void handleStoreInv (LoadStoreCmd& cmd);

    /**
     * Executes StoreUpdate MemorySync API command
     * This method notifies the StoreCommit that cuases the StoreUpdate that a
     * StoreUpdate is performed. In addition, it also creates a StoreUpdate
     * entry in the MAB, and copy the data in the StoreCommit to the <data> field
     * of the StoreUpdate. Recall that the StoreUpdate keeps the D$ data image.
     * <p>
     * The other function of StoreUpdate command is the same as StoreInv, that is
     * is to prevent the StoreCommit that initiates the StoreUpdate from retiring
     * from the Memory Aceess Buffer (MAB) too early. Reference the example in the
     * handleStoreInv() for more details.
     * <p>
     * @param cmd a LoadStoreCmd object reference that contains the info of the StoreUpdate
     */
    void handleStoreUpdate (LoadStoreCmd& cmd);

    /**
     * Executes Evict MemorySync API command
     * This method creates DCACHE_LINE_SIZE/8 Evict entries in the MAB to indicate
     * a L2 cache line is evicted. Each D$ line needs to have one Evict since its
     * <inv> may be different.
     * <p>
     * The function of Evict and EvictInv is to mimic L2 cache line eviction. Let's
     * use an example to explain the necessity of implementing them. Considering the
     * same sequence used in handleStoreInv():
     *   - C0T0 StoreCommit A, inv=8'b00000010
     *   - C0T1 LoadData    A (L1 hit)
     *   - C1Tx StoreInv    A
     * From the value of <inv>, we know that the same line exists in core1's L1.
     * Suppose that before the StoreCommit, the line in L2 is evicted. Then, the
     * the StoreCommit would have <inv>=8'b00000000. Consequently, similar error will
     * happen: StoreCommit is removed too early so that the LoadData (L1 hit) gets
     * wrong data. Evict and EvictInv can solve this problem. Including these two
     * command, the above situation will have the following sequence:
     *   -      Evict       A, inv=8'b00000010
     *   - C0T0 StoreCommit A, inv=8'b00000010
     *   - C0T1 LoadData    A (L1 hit)
     *   - C1Tx EvictInv    A
     *
     * Again, EvictInv prevents Evict from being removed too early.
     * The implementation also creates an entry for the EvictInv in the MAB after it
     * notifies the StoreCommit it is done. This is actually not necessary, doing so
     * allows us examine the command sequence in MAB, which helps debug.
     * <p>
     * @param cmd a LoadStoreCmd object reference that contains the info of the Evict
     */
    void handleEvict (LoadStoreCmd& cmd);

    /**
     * Executes EvictInv MemorySync API command
     * This method creates L2CACHE_LINE_SIZE/8 EvictInv entries in the MAB, and
     * notifies the corresponding Evict entries in the MAB that the EvictInv is
     * performed. Reference to handleEvict() for more discussion.
     * <p>
     * @param cmd a LoadStoreCmd object reference that contains the info of the EvictInv
     */
    void handleEvictInv (LoadStoreCmd& cmd);

    /**
     * To allow DMA accesses from I/O. It occurres when a SIU Store (from ENET
     * or PCI) is seen in the L2 or when Bench back-door stores to memory.
     *
     * Riesling will update memory immediately when MemorySlam message is
     * received.
     *
     * No SSTEP will be sent with this message.
     * No checking is performed by Reisling on this message.
     * No checking against any load/store in-flight.
     */
    void handleStoreSlam(StoreIssueCmd& cmd);
    /**
     * DMA_STORE, similar to MEM_SLAM, but allow inv_vec to handle conflict
     * with L1 data.
     */
    void handleDmaStoreStart(LoadStoreCmd& cmd);
    void handleDmaStore (LoadStoreCmd& cmd) ;

    void handleFetchIssue(LoadStoreCmd& cmd);
    void handleFetchData(LoadStoreCmd& cmd);
    void handleFetchFill(LoadStoreCmd& cmd);

    void preMemoryAccess( MemoryTransaction &xact );
    void postMemoryAccess( MemoryTransaction& xact );

    static void pre_memory_access( void* msync, MemoryTransaction& xact );
    static void post_memory_access( void* msync, MemoryTransaction& xact );

    uint64_t getL1Instr(LoadStoreCmd& cmd);
    uint64_t getL1Data(LoadStoreCmd& cmd);
    uint64_t getL2Data(std::list<MemoryAccessEntry>::iterator from, LoadStoreCmd& cmd);
    uint64_t align2addr (uint64_t data, uint8_t size, uint32_t addr_offset);
    uint64_t align8byte (uint64_t data, uint8_t size, uint32_t addr_offset);
    uint64_t merge (uint64_t todata, uint64_t fromdata, uint8_t fromszv);
    void     bufWriteMemory (std::vector<MemoryAccessEntry>& wdbuf);
    uint64_t byteMask(const uint8_t vbyte);

    uint64_t iSeq (uint32_t tid) { return (iseq_[tid]++); }
    uint64_t getIseq (uint32_t tid) { return iseq_[tid]; }

    Tso::TsoChecker* getTsoCheckerPtr () { return tsoChecker_; }

    /**
     * turn off msync model related callbacks
     */
    void nullMsyncCallback();
    /*
     * empty the buffers of a specified strand-id
     */
    void flushMsyncCallback(int tid);

    /*
     * Riesling:
     *   if LoadPop, pop and discard the Load at the top of your queue.
     *   You will have received a LDISSUE and LDDATA for this Load before
     *   receiving the LDPOP.
     *
     * Bench:
     *   It is not possible for bench to send the <pa> on LDPOP.
     *   So, Riesling can't check that correct Load is popped.
     *   However, if wrong Load is discarded, the next Load will likely fail.
     */
    void handleLoadPop(int tid);

    /*
     * Riesling:
     *   if StorePop, pop and discard the Store at the top of your queue.
     *   You will have received a STISSUE for this Store before
     *   receiving the STPOP.
     */
    void handleStorePop(int tid);

    /*
     * compare a N-byte data block in memory with the data provided by
     * testbench, throw error if miscompare.
     */
    void handleMemoryCheck(uint64_t paddr, uint64_t* data, int dataSize);

    /*
     * flush out all entries, invoked when encounter INTP=0 system-wide
     * warm-reset.
     */
    void flushAll();

    /**
     * a hook for address translation. For example, dma_loopback that
     * translates a I/O address to RAM address.
     */
    typedef uint64_t (* AddrTransCB)(uint64_t);
    void setAddrTransCB(AddrTransCB cb) { addrTrans = cb; }

    // handle sniper related load/store
    void handleSniper(int tid, uint64_t addr, INSTR_TYPE itype, uint64_t data);
    void setCoreEnable(int node_index, uint64_t data);

    // socket file descriptor to RTL testbench, used to report error
    SS_PliSocket* socket;

  protected:
    void      completeStoreInvStoreUpdate (LoadStoreCmd& cmd);

  private:
    MemoryAccessBuffer mab_;  // global memory access buffer
    LoadStoreBuffer*   ldb_;    // load buffer
    LoadStoreBuffer*   stb_;    // store buffer
    LoadStoreBuffer*   retStb_;
    LoadStoreBuffer*   rmoStb_;

    LoadStoreBuffer*   ifb_; // I-Fetch Buffer

    uint64_t*          iseq_; // load/store instruction sequence number

    /* The following buffer, retStb_, is introduced to store entries removed
       from the stb_ whose data may be need for the following load. This is to
       solve the Ni signal's timing issue. May not need for N2. */
/*     LoadStoreBuffer retStb_[MAX_STRANDS];  */
    /* This buffer is for storing RMO-store instructions. In N2, this type of
       stores leaves STB when it is issued to L2, hence StoreAck will be before
       StoreL2Commit. In addition, they may have out-of-order invalidation
       behavior. */
/*     LoadStoreBuffer rmoStb_[MAX_STRANDS];  */
    RieslingInterface rif_;
    int               memDebug_;
    Tso::TsoChecker*  tsoChecker_;
    // hook to provide optional (I/O) address translation
    AddrTransCB addrTrans;
    // inv_vec mask, 1 bit per core,
    // inv_vec in st_commit is arranged from left to right as: core_n_1
    // core_n_2 ... core_1 core_0.
    // inv_vec_mask is arranged differently, inv_vec_mask[0] keeps core_7 ...
    // core_0, from left to right. inv_vec_mask[1] keeps core_15 ... core_8,
    // from left to right, and so on.
    uint8_t inv_vec_mask[INV_VEC_SIZE];
  };

#endif /* _MEMORYMODEL_H */