// ========== Copyright Header Begin ==========================================
// OpenSPARC T2 Processor File: ccxDevMemBFM.vr
// Copyright (C) 1995-2007 Sun Microsystems, Inc. All Rights Reserved
// 4150 Network Circle, Santa Clara, California 95054, U.S.A.
// * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; version 2 of the License.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
// For the avoidance of doubt, and except that if any non-GPL license
// choice is available it will apply instead, Sun elects to use only
// the General Public License version 2 (GPLv2) at this time for any
// software where a choice of GPL license versions is made
// available with the language indicating that GPLv2 or any later version
// may be used, or where a choice of which version of the GPL is applied is
// otherwise unspecified.
// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
// CA 95054 USA or visit www.sun.com if you need additional information or
// ========== Copyright Header End ============================================
#include <vera_defines.vrh>
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// To use this class, you must have in your bench a files called globals.vri
// that has all global extern declerations in it.
#include <ccxDevicesDefines.vri>
#include <std_display_defines.vri>
#include <std_display_class.vrh>
#include <basePktClass.vrh>
#include <cpxPktClass.vrh>
#include <pcxPktClass.vrh>
#include <baseParamsClass.vrh>
#include <sparcParams.vrh>
#include <ccxDevBaseBFM.vrh>
#include <baseUtilsClass.vrh>
#include <sparcBenchUtils.vrh>
#include <ccx_tag_class.vrh>
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// To use this class, you must have in your bench a class that extends
// sparcBenchUtils.vr. It must be in files named utilsClass.vr/utilsClass.vrh
// AND have a global handle called gUtil.
#include <utilsClass.vrh>
//#define CCXDEVMEMBFM_DEBUG
// #define GNT_ATTEMPTS 20
#define CLASSNAME CcxDevMemBFM
#define CLASSNAMEQ "CcxDevMemBFM"
class CLASSNAME extends CcxDevBaseBFM {
local reg [63:0] cas1Data[64], cas1Addr[64];
// local reg [31:0] invVectorCAS;
local integer ldstSyncLock;
task new(integer instatnce, reg passiveIn=0,
reg cacheoff = 0, reg flagUnexpected=0, reg ccxOnly=0);
task recv(BasePkt pktHndl);
task cancelRecv(BasePkt pktHndl);
local task respond(PcxPkt reqPkt);
local function reg [3:0] lineState(PcxPkt reqPkt,
string why="debug lineState:",
task sendIntr(reg [5:0] tid,
local task updateItag(PcxPkt reqPkt, CpxPkt rspPkt, CpxPkt rspPkt2 = null,
reg [2:0] cpuId, integer how = TAG_VAL);
local task updateDtag(PcxPkt reqPkt, CpxPkt rspPkt,
reg [2:0] cpuId, integer how = TAG_VAL);
local function reg data_equal(reg [63:0] data1,
// local task create_vector(reg table, reg [3:0] way,
// var reg [31:0] vect, reg [2:0] core_num);
local function reg [31:0] getInvalVector(integer type,
local task ldstSync(reg [2:0] cpuId, CpxPkt rspPkt);
public task enqueueEvict(reg [7:0] coreEnable,
reg [39:0] evictPA = 40'hffffffffff,
integer dCacheWeight = 60);
local task burstResp(integer amount);
task CLASSNAME::new(integer instatnce, reg passiveIn=0,
reg cacheoff = 0, reg flagUnexpected=0, reg ccxOnly=0) {
super.new(instatnce, passiveIn, CLASSNAMEQ);
super.ccxOnly = ccxOnly; // only testing ccx
super.flagUnexpected = flagUnexpected;
ldstSyncLock = alloc( SEMAPHORE, 0, 1, 1 ); // this may not be needed anymore
super.stallStart = gParam.stallStart;
super.stallStop = gParam.stallStop;
if (myPort == DEV_NCU) lineHash[0] = 0;
gPcxPort[myPort].$stall <= 0;
gCpxPort[myPort].$req <= 0;
// gCpxPort[myPort].$datao <= 0;
if (myPort !== DEV_NCU) gCpxPort[myPort].$atmo <= 0;
@(posedge gPcxPort[myPort].$clk);
gCpxPort[myPort].$gnt == 0;
gPcxPort[myPort].$rdy == 0;
gPcxPort[myPort].$datai == 0;
if (myPort !== DEV_NCU) gPcxPort[myPort].$atmi == 0;
// service mailboxes, send packets
fork super.serviceSends2(PP_CPX);
if (gParam.burstAmount) {
fork burstResp(gParam.burstAmount);
// Wait for data from PCX, we are a cache/IO BFM
task CLASSNAME::slave() {
ccxPort portVar = gPcxPort[myPort];
// we can get back to back packets...
if (portVar.$rdy && stalling) {
error("ERROR FAIL: should not get rdy when stalled\n");
// need to fork to handle back to back reqs
reqPkt.loadPkt(portVar.$datai, myPort);
// if on L2 port, look at portVar.$atmi and save value
if (myPort !== DEV_NCU) reqPkt.atm_wire = portVar.$atmi;
#ifdef CCXDEVMEMBFM_DEBUG
printf("%0d: CcxDevMemBFM[%2d]::slave: got req packet, outstandingReqs++=%0d, atm=%0d, ccxSourced=%0d, vec=%h\n",get_time(LO),myPort,outstandingReqs,reqPkt.atm_wire,reqPkt.ccxSourced,reqPkt.getVector());
// for CCX testing, anyone waiting for this packet?
// was it expected? should we auto respond?
tmpVec = reqPkt.makeSignature();
if (assoc_index(CHECK, expectedSig, tmpVec)) {
expectedSig[tmpVec].loadPkt(reqPkt.getVector(), myPort);
expectedSig[tmpVec].arrivalTime = get_cycle();
expectedSig[tmpVec].pktArrived = ~expectedSig[tmpVec].pktArrived;
} else if (flagUnexpected) {
PR_ERROR(CLASSNAMEQ, MON_ERROR,
psprintf ("Unexpected packet on port %0d, vector=%h",
myPort,reqPkt.getVector()));
// check how/if we should respond and do it
suspend_thread(); // do as last thing in this time slot.
if (!ccxOnly && reqPkt.valid) {
printf("%0d: CcxDevMemBFM[%2d]::slave: got invalid req packet, outstandingReqs++=%0d, atm=%0d, ccxSourced=%0d, vec=%h\n",get_time(LO),myPort,outstandingReqs,reqPkt.atm_wire,reqPkt.ccxSourced,reqPkt.getVector());
{ // stall signal management
wait_var(outstandingReqs);
//printf("%0d: wait_var at stall check outstandingReqs=%0d\n", get_time(LO), outstandingReqs);
// stall? Takes 3 clocks for the CCX to actually stall
if (outstandingReqs >= stallStart && !stalling) {
//printf("%0d: CcxDevMemBFM[%2d]::stalling, outstandingReqs=%0d\n", get_time(LO),myPort, outstandingReqs);
repeat (2) @(negedge portVar.$clk);
if (outstandingReqs <= stallStop && stalling) {
//printf("%0d: CcxDevMemBFM[%2d]::un-stalling, outstandingReqs=%0d\n", get_time(LO),myPort, outstandingReqs);
stalling = 0; // yes, twice, in case we started stalling above
//printf("%0d: CcxDevMemBFM[%2d]::done un-stalling, outstandingReqs=%0d\n", get_time(LO),myPort, outstandingReqs);
task CLASSNAME::sendIntr(reg [5:0] tid,
reqPkt.createIntr(tid,type,vect); // INTR_RESET,INTR_POR
reqPkt.sendPorts = 1 << myPort;
reqPkt.targetPorts = 1 << tid[5:3]; // not multicast
// check how/if we should respond and do it.
// check response signature and use user response if hit (review)
// else return a built in response.
task CLASSNAME::respond(PcxPkt reqPkt) {
CpxPkt rspPkt, rspPkt2, rspPkt3;
reg [7:0] cas1size, targetCores;
reg [31:0] invVector = 0;
reg [31:0] tmpInvVect = 0;
ccxPort portVar = gCpxPort[myPort];
// do not drive on this port
thread = {reqPkt.cpuId,reqPkt.tid};
// check response signature & use user pkt if hit
// else return built in response pkt based on request pkt fields.
// use fast resp mailbox.
// Some may be illegal for L2 port, check port number and address!
if (myPort !== DEV_NCU && reqPkt.addr[39] == 1 &&
(reqPkt.addr[39:32] < 8'hA0 || reqPkt.addr[39:32] > 8'hBF)) {
PR_ERROR (CLASSNAMEQ, MON_ERR,
psprintf ("T%d Port=%0d request type [128:124]=%b. ERROR FAIL: Illegal I/O on non-NCU port!",thread,myPort,reqPkt.rqtyp));
// create response packets
rspPkt.sendPorts = 1 << myPort;
rspPkt.addr = reqPkt.addr;
#ifdef CCXDEVMEMBFM_DEBUG
if (reqId == 10000) reqId = 0;
reqPkt.reqTime = get_time(LO);
rspPkt.reqTime = reqPkt.reqTime;
// state of tag for line at this time
reqPkt.lineWay = lineState(reqPkt, *, 1);
case ({reqPkt.inv,reqPkt.pf}) {
if (myPort !== DEV_NCU) {
// can't tell diag load from load so both are load!
rspPkt.rtntyp = CPX_LD; // code is done
rspPkt.rtntypU = U_CPX_LD;
// can't tell diag load from load so both are load!
rspPkt.rtntyp = CPX_NCU_LD; // code is done
rspPkt.rtntypU = U_CPX_NCU_LD;
rspPkt.rtntyp = CPX_PREF;
rspPkt.rtntypU = U_CPX_PREF;
rspPkt.rtntyp = CPX_D_INVAL;
rspPkt.rtntypU = U_CPX_D_INVAL;
rspPkt.rtntyp = CPX_PREF_ICE;
rspPkt.rtntypU = U_CPX_PREF_ICE;
if (reqPkt.l1wayBis && !reqPkt.pf) {
rspPkt.rtntyp = CPX_ST; // code is done
rspPkt.rtntypU = U_CPX_BIS;
} else if (reqPkt.l1wayBis && reqPkt.pf) {
rspPkt.rtntyp = CPX_ST; // code is done
rspPkt.rtntypU = U_CPX_BLK_ST;
// can't tell diag store from store so both are store!
rspPkt.rtntyp = CPX_ST; // code is done
rspPkt.rtntypU = U_CPX_ST;
rspPkt.rtntyp = CPX_CAS_RTN;
rspPkt.rtntypU = U_CPX_CAS_RTN;
rspPkt.rtntyp = CPX_CAS_ACK;
rspPkt.rtntypU = U_CPX_CAS_ACK;
rspPkt.rtntyp = CPX_STR_LD; // code is done
rspPkt.rtntypU = U_CPX_STR_LD;
rspPkt.rtntyp = CPX_STR_ST; // code is done
rspPkt.rtntypU = U_CPX_STR_ST;
rspPkt.rtntyp = CPX_SWAP_RTN; // will do ack as well.
rspPkt.rtntypU = U_CPX_SWAP_RTN; // will do U_CPX_SWAP_ACK as well.
rspPkt.rtntyp = CPX_MMU_RTN; // code is done
rspPkt.rtntypU = U_CPX_MMU_RTN;
if (myPort !== DEV_NCU) {
rspPkt.rtntyp = CPX_IFILL; // code is done
rspPkt.rtntypU = U_CPX_IFILL;
rspPkt.rtntyp = CPX_NCU_IFILL; // code is done
rspPkt.rtntypU = U_CPX_NCU_IFILL;
rspPkt.rtntyp = CPX_I_INVAL;
rspPkt.rtntypU = U_CPX_I_INVAL;
PR_ERROR (CLASSNAMEQ, MON_ERR,
psprintf ("T%d Port=%0d request type [128:124]=%b. ERROR FAIL: Unsupported rqtyp",thread,myPort,reqPkt.rqtyp));
reqPkt = null; rspPkt2 = null;
// finalize random values for response times
void = rspPkt.randomize() with {hit == 1;};
void = rspPkt.randomize();
U_CPX_STR_LD, U_CPX_MMU_RTN, U_CPX_PREF,
U_CPX_PREF_ICE, U_CPX_NCU_LD: {
repeat (ordering(rspPkt, "LD RTN")) @(posedge portVar.$clk);
// get semaphore for this clock
semaphore_get(WAIT, ldstSyncLock, 1);
if (reqPkt.addr[39] && myPort == DEV_NCU) {
// is it a special address for us?
// if (reqPkt.addr[IO_ASI_ADDR_NCU] == IO_ASI_CPU ||
// reqPkt.addr[IO_ASI_ADDR_NCU] == IO_ASI_NCU) {
rspPkt.err[1] = ! gUtil.ioSpaceAccess(reqPkt.addr,
PR_ERROR(CLASSNAMEQ, MON_ERROR, psprintf("TID %0d is doing a 16 byte load to I/O addr %0h!",reqPkt.tid,reqPkt.addr));
rspPkt.data = gUtil.copyDataByte(tmpData,reqPkt.size,reqPkt.addr[3:0]);
// } else if (reqPkt.addr[39] && myPort !== DEV_NCU &&
// (reqPkt.addr[IO_ASI_ADDR_NCU] < 8'hA0 ||
// reqPkt.addr[IO_ASI_ADDR_NCU] > 8'hBF) ) {
// // I/O but not L2 CSRs
// error("I/O LD to L2, but is not L2 CSRs");
} else { // LD from non-NCU space or L2 CSRs
// Address in req is quad word aligned.
// Access memory at double word boundaries.
tmpAddr = {reqPkt.addr[39:4],4'b0000};
rspPkt.data = gMem.read128(tmpAddr,myPort,1);
if (gParam.mcuMemPrint[READ]) {
printf("\n%7d00: dumpMem: dumping memory related to this request pkt:",get_cycle());
gMem.dumpMem(reqPkt.addr[39:0] & 40'hFFFFFFFFC0, 8);
if (rspPkt.rtntypU == U_CPX_MMU_RTN)
rspPkt.wayMMUid = reqPkt.l1wayMMUid;
if (rspPkt.rtntypU == U_CPX_PREF) {
// prefetch data is irrelevant since data does not allocate in L1
rspPkt.data = {urandom(),urandom(),urandom(),urandom()};
// does this imply the the L1 does not have this line?
//invVector = getInvalVector(rspPkt.rtntypU, reqPkt, reqPkt.cpuId);
// evict line from L1 & L2.
// send invalidates to other cores for this address.
// do not send response to initiating core!!!
// Used by SW to flush lines in L2 based on an index and a
// way specified as part of the Physical Address in the instruction
// itself. Bits [39:37] of the PA has to be driven as 3'b011 by SW and
// the way,index,bank information would be on PA[21:18], PA[17:9] and
if (rspPkt.rtntypU == U_CPX_PREF_ICE) {
// the BFM is not going to support this because the "address"
// only makes sense to L2 RTL. Can't properly handle this.
printf("\n\n%7d: WARNING, L2 BFM does not support prefetchICE, ignoring!\n\n\n");
// create a single EVICTION packet and send it to every core needing it
rspPkt.targetPorts[8:0] = gParam.coreEnable;
rspPkt.rtntypU = U_CPX_EVICT;
rspPkt.rtntyp = CPX_EVICT;
tmpPa = reqPkt.addr[39:0];
// gets evict vector and invals dup tags.
rspPkt.data = gUtil.evictVector(gParam.coreEnable,
targetCores); // return val for target cores
rspPkt.targetPorts = targetCores;
// inval line from L1 & L2.
// send invalidates to other cores for this address.
// do not send response to initiating core!!!
// notify LDST sync just once when doing U_CPX_PREF_ICE
// no matter what targets get invalidated.
ldstSync(reqPkt.cpuId,rspPkt);
semaphore_put(ldstSyncLock, 1 );
return; // due to U_CPX_PREF_ICE
// Plusargs to dump LD/ST to logfile
if (rspPkt.rtntypU == U_CPX_STR_LD)
PR_NORMAL(CLASSNAMEQ, MON_NORMAL, psprintf("Tx LOAD PA = %h DATA = %h TYPE = %0d",reqPkt.addr,rspPkt.data,rspPkt.rtntypU));
PR_NORMAL(CLASSNAMEQ, MON_NORMAL, psprintf("T%d LOAD PA = %h DATA = %h TYPE = %0d",reqPkt.tid,reqPkt.addr,rspPkt.data,rspPkt.rtntypU));
ldstSync(reqPkt.cpuId,rspPkt);
if (reqPkt.nc == 0 && rspPkt.rtntypU !== U_CPX_PREF_ICE)
updateDtag(reqPkt, rspPkt, reqPkt.cpuId, TAG_VAL);
#ifdef CCXDEVMEMBFM_DEBUG
// state of tag for line at this time
rspPkt.lineWay = lineState(reqPkt, *, 1);
// queue packet for delivery, in this case, this BFM will drive it.
semaphore_put(ldstSyncLock, 1 );
U_CPX_STR_ST, U_CPX_BIS, U_CPX_BLK_ST: {
repeat (ordering(rspPkt, "ST RTN")) @(posedge portVar.$clk);
// get semaphore for this clock
semaphore_get(WAIT, ldstSyncLock, 1);
if (reqPkt.addr[39] && myPort == DEV_NCU) { // I/O ?
// is it a special address for us?
// if (reqPkt.addr[IO_ASI_ADDR_NCU] == IO_ASI_CPU ||
// reqPkt.addr[IO_ASI_ADDR_NCU] == IO_ASI_NCU) {
if (reqPkt.addr[IO_ASI_ADDR_REG] == ASI_SWVR_UDB_INTR_W) {
// Send Store Ack but don't store to anything
// Send INTERRUPT packet behind it
rspPkt2 = rspPkt.object_copy();
// second packets must not have this set because it will
// cause outstandingRequests to decrement twice.
rspPkt2.sendPorts = 1 << myPort;
rspPkt2.createIntr(reqPkt.data[13:8],reqPkt.data[15:14],reqPkt.data[5:0]);
tmpData[63:0] = reqPkt.data;
void = gUtil.ioSpaceAccess(reqPkt.addr, tmpData, 0,
reqPkt.size, thread, myPort);
// } else if (reqPkt.addr[39] && myPort !== DEV_NCU &&
// (reqPkt.addr[39:32] < 8'hA0 || reqPkt.addr[39:32] > 8'hBF) ) {
// // I/O but not L2 CSRs
// error("I/O ST to L2, but it is not a L2 CSR!");
} else { // not NCU space, L2 or L2 CSR
// this only gets done on hit. nas never does this so we wont either.
// if (rspPkt.rtntypU == U_CPX_BIS)
// gMem.write512({reqPkt.addr[39:6],6'b000000}, 0, myPort);
// do not write if inv is set! BIS does not init w/ zeros.
gMem.writeBM({reqPkt.addr[39:3],3'b000}, reqPkt.data, reqPkt.size, myPort);
if (gParam.mcuMemPrint[WRITE]) {
printf("\n%7d00: dumpMem: dumping memory related to this request pkt:",get_cycle());
gMem.dumpMem(reqPkt.addr & 40'hFFFFFFFFC0, 8);
// Plusargs to dump LD/ST to logfile
if (rspPkt.rtntypU == U_CPX_STR_ST)
PR_NORMAL(CLASSNAMEQ, MON_NORMAL,
psprintf("Tx STORE PA = %h DATA = %h BYTE_MASK = %b TYPE = %0d",
reqPkt.addr,reqPkt.data,reqPkt.size,rspPkt.rtntypU));
PR_NORMAL(CLASSNAMEQ, MON_NORMAL,
psprintf("T%d STORE PA = %h DATA = %h BYTE_MASK = %b TYPE = %0d",
reqPkt.tid,reqPkt.addr,reqPkt.data,reqPkt.size,rspPkt.rtntypU));
// L1 cache tags, get inv vec and invalidate all our dupe tags as needed. ST
// always target the requester
targetCores[reqPkt.cpuId] = 1;
for (i=0;i<=gParam.coreMax;i++) {
tmpInvVect = getInvalVector(rspPkt.rtntypU, reqPkt, i);
if (tmpInvVect) targetCores[i] = 1;
invVector = invVector | tmpInvVect;
rspPkt.data = {2'b0, reqPkt.l1wayBis,
invField, reqPkt.addr[5:4], reqPkt.cpuId[2:0],
reqPkt.addr[11:6],7'b0, reqPkt.addr[3],
reqPkt.size[7:0],invVector, reqPkt.data[63:0]};
if (rspPkt.rtntypU == U_CPX_STR_ST)
#ifdef CCXDEVMEMBFM_DEBUG
// state of tag for line at this time
rspPkt.lineWay = lineState(reqPkt, *, 1);
// queue packet(s) for delivery, in this case, this BFM will drive it.
if ((reqPkt.addr[IO_ASI_ADDR_NCU] == IO_ASI_CPU ||
reqPkt.addr[IO_ASI_ADDR_NCU] == IO_ASI_NCU) &&
reqPkt.addr[IO_ASI_ADDR_REG] == ASI_SWVR_UDB_INTR_W) {
repeat (rspPkt.pkt2Delay) @(posedge portVar.$clk);
// special interrupt "reflection"
// need to invalidate other cores on store/blkSt/BIS!
rspPkt.targetPorts = targetCores;
ldstSync(reqPkt.cpuId,rspPkt);
semaphore_put(ldstSyncLock, 1 );
} // U_CPX_ST, U_CPX_DIAG_ST, U_CPX_STR_ST, U_CPX_BIS
U_CPX_D_INVAL, U_CPX_I_INVAL: {
repeat (ordering(rspPkt, "INVAL")) @(posedge portVar.$clk);
// get semaphore for this clock
semaphore_get(WAIT, ldstSyncLock, 1);
// Core wants to invalidate all entries in the cache line
if (invField == D_INVAL) {
dtag[reqPkt.cpuId].write_tag(i,reqPkt.addr[10:4],29'b0,TAG_INVAL);
itag[reqPkt.cpuId].write_tag(i,{1'b0,reqPkt.addr[10:5]},29'b0,TAG_INVAL);
rspPkt.data = {2'b0,reqPkt.l1wayBis,invField,reqPkt.addr[5:4],reqPkt.cpuId,
reqPkt.addr[11:6],7'b0,reqPkt.addr[3],reqPkt.size,invVector,reqPkt.data[63:0]};
#ifdef CCXDEVMEMBFM_DEBUG
// state of tag for line at this time
rspPkt.lineWay = lineState(reqPkt, *, 1);
// queue packet(s) for delivery, in this case, this BFM will drive it.
ldstSync(reqPkt.cpuId,rspPkt);
semaphore_put(ldstSyncLock, 1 );
} // U_CPX_D_INVAL, U_CPX_I_INVAL
if (myPort == DEV_NCU) error("CAS not allowed at NCU"); // I/O
repeat (ordering(rspPkt, "CAS RTN")) @(posedge portVar.$clk);
// get semaphore for this clock
semaphore_get(WAIT, ldstSyncLock, 1);
// save away cas1Data, cas1Addr.
// hold until next packet (cas2)
cas1Addr[thread] = reqPkt.addr;
cas1Data[thread] = reqPkt.data;
tmpAddr = reqPkt.addr[39:0];
tmpData = gMem.read_mem(tmpAddr,myPort);
// remember to swap on following CAS2 pkt.
cas_swap[thread] = data_equal(cas1Data[thread],tmpData,reqPkt.size);
rspPkt.CASstore = cas_swap[thread];
// always return the LOAD data at cas1Addr in first response pkt.
rspPkt.data = gMem.read128(tmpAddr,myPort,1);
if (gParam.mcuMemPrint[READ]) {
printf("\n%7d00: dumpMem: dumping memory related to this request pkt:",get_cycle());
gMem.dumpMem(tmpAddr & 40'hFFFFFFFFC0, 8);
// rspPkt.recvPort = reqPkt.cpuId;
// rspPkt.recvPorts = 1 << reqPkt.cpuId;
PR_NORMAL(CLASSNAMEQ, MON_NORMAL,
psprintf("T%d LOAD PA = %h DATA = %h TYPE = CAS swap will be true", reqPkt.tid,reqPkt.addr,tmpData));
PR_NORMAL(CLASSNAMEQ, MON_NORMAL,
psprintf("T%d LOAD PA = %h DATA = %h TYPE = CAS swap will be false", reqPkt.tid,reqPkt.addr,tmpData));
#ifdef CCXDEVMEMBFM_DEBUG
// state of tag for line at this time
rspPkt.lineWay = lineState(reqPkt, *, 1);
ldstSync(reqPkt.cpuId,rspPkt);
semaphore_put(ldstSyncLock, 1 );
if (myPort == DEV_NCU) error("CAS not allowed at NCU"); // I/O
repeat (ordering(rspPkt, "CAS ACK")) @(posedge portVar.$clk);
// get semaphore for this clock
semaphore_get(WAIT, ldstSyncLock, 1);
// do the swap here on the second pkt.
// compare the data at cas1Addr to the data cas1Data.
// if ==, swap cas2 data with the data at cas1Addr.
// always return the data at cas1Addr in first response pkt.
// cas1Data has rs2 data.
// cas1Addr has rs1 addr.
// if cas_swap true from previous packet, write the rd/cas2 data to mem.
tmpAddr = cas1Addr[thread];
gMem.writeBM(tmpAddr[39:0], reqPkt.data, reqPkt.size, myPort);
if (gParam.mcuMemPrint[WRITE]) {
printf("\n%7d00: dumpMem: dumping memory related to this request pkt:",get_cycle());
gMem.dumpMem(reqPkt.addr & 40'hFFFFFFFFC0, 8);
// Plusargs to dump LD/ST to logfile
if (gParam.show_store && cas_swap[thread])
PR_NORMAL(CLASSNAMEQ, MON_NORMAL,
psprintf("T%d STORE PA = %h DATA = %h BYTE_MASK = %b TYPE = CAS swap true",
reqPkt.tid,reqPkt.addr,reqPkt.data,reqPkt.size));
// L1 cache tags, get vec and invalidate all duplicate tags as needed. CAS
// always target the requester
targetCores[reqPkt.cpuId] = 1;
for (i=0;i<=gParam.coreMax;i++) {
tmpInvVect = getInvalVector(rspPkt.rtntypU, reqPkt, i);
if (tmpInvVect) targetCores[i] = 1;
invVector = invVector | tmpInvVect;
rspPkt.data = {2'b0,reqPkt.l1wayBis,invField,reqPkt.addr[5:4],reqPkt.cpuId,
reqPkt.addr[11:6],7'b0,reqPkt.addr[3],reqPkt.size,invVector,reqPkt.data[63:0]};
#ifdef CCXDEVMEMBFM_DEBUG
// state of tag for line at this time
rspPkt.lineWay = lineState(reqPkt, *, 1);
// queue packet for delivery, in this case, this BFM will drive it.
rspPkt.targetPorts = targetCores;
ldstSync(reqPkt.cpuId,rspPkt);
semaphore_put(ldstSyncLock, 1 );
U_CPX_SWAP_RTN, U_CPX_SWAP_ACK:
// do the swap, the return pkt, and the ack pkt here.
// we get addr and 32 bit swap data. Use size mask.
if (myPort == DEV_NCU) error("SWAP not allowed at NCU"); // I/O
repeat (ordering(rspPkt, "SWAP RTN")) @(posedge portVar.$clk);
// get semaphore for this clock
semaphore_get(WAIT, ldstSyncLock, 1);
tmpAddr = {reqPkt.addr[39:4],4'b0000};
rspPkt.data = gMem.read128(tmpAddr,myPort,1);
if (gParam.mcuMemPrint[READ]) {
printf("\n%7d00: dumpMem: dumping memory related to this request pkt:",get_cycle());
gMem.dumpMem(tmpAddr & 40'hFFFFFFFFC0, 8);
// Plusargs to dump LD/ST to logfile
PR_NORMAL(CLASSNAMEQ, MON_NORMAL, psprintf("T%d LOAD PA = %h DATA = %h TYPE = SWAP",
reqPkt.tid,reqPkt.addr,rspPkt.data));
// used by fork 2 when it proceeds
rspPkt2 = rspPkt.object_copy();
#ifdef CCXDEVMEMBFM_DEBUG
// state of tag for line at this time
rspPkt.lineWay = lineState(reqPkt, *, 1);
ldstSync(reqPkt.cpuId,rspPkt);
semaphore_put(ldstSyncLock, 1 );
/////////////////////////////////
repeat (ordering(rspPkt, "SWAP ACK")) @(posedge portVar.$clk);
// get semaphore for this clock
semaphore_get(WAIT, ldstSyncLock, 1);
gMem.writeBM({reqPkt.addr[39:3],3'b000}, reqPkt.data, reqPkt.size, myPort);
if (gParam.mcuMemPrint[WRITE]) {
printf("\n%7d00: dumpMem: dumping memory related to this request pkt:",get_cycle());
gMem.dumpMem(reqPkt.addr & 40'hFFFFFFFFC0, 8);
rspPkt2.ccxSourced2 = reqPkt.ccxSourced;
// second packet must not have ccxSourced set because it will
// cause outstandingRequests to decrement twice.
rspPkt2.rtntyp = CPX_SWAP_ACK;
rspPkt2.rtntypU = U_CPX_SWAP_ACK;
// L1 cache tags, get vec and invalidate all duplicate tags as needed. SW
// always target the requester
targetCores[reqPkt.cpuId] = 1;
for (i=0;i<=gParam.coreMax;i++) {
tmpInvVect = getInvalVector(rspPkt2.rtntypU, reqPkt, i);
if (tmpInvVect) targetCores[i] = 1;
invVector = invVector | tmpInvVect;
rspPkt2.data = {2'b0,reqPkt.l1wayBis,invField,reqPkt.addr[5:4],reqPkt.cpuId,
reqPkt.addr[11:6],7'b0,reqPkt.addr[3],reqPkt.size,invVector,reqPkt.data[63:0]};
// Plusargs to dump LD/ST to logfile
PR_NORMAL(CLASSNAMEQ, MON_NORMAL, psprintf("T%d STORE PA = %h DATA = %h BYTE_MASK = %b TYPE = SWAP",reqPkt.tid,reqPkt.addr,reqPkt.data,reqPkt.size));
#ifdef CCXDEVMEMBFM_DEBUG
// state of tag for line at this time
rspPkt2.lineWay = lineState(reqPkt, *, 1);
// queue packet for delivery, in this case, this BFM will drive it.
rspPkt2.targetPorts = targetCores;
ldstSync(reqPkt.cpuId,rspPkt2);
semaphore_put(ldstSyncLock, 1 );
U_CPX_IFILL, U_CPX_NCU_IFILL: {
repeat (ordering(rspPkt, "IFILL")) @(posedge portVar.$clk);
// get semaphore for this clock
semaphore_get(WAIT, ldstSyncLock, 1);
// are we NCU? return 1 packet, not 2
// Set error bit on fetch to non-boot I/O to match NCU behavior
if (tmpAddr[39] == 1 && tmpAddr[39:32] != 8'hff)
rspPkt.err = 2'b10; // uncorrectable error
rspPkt.wayf4b = 1; // 4 byte fill
tmpAddr = {reqPkt.addr[39:4],4'b0}; // 4 byte addressing!!!
rspPkt.data = gMem.read128(tmpAddr,myPort,1);
if (gParam.mcuMemPrint[READ]) {
printf("\n%7d00: dumpMem: dumping memory related to this request pkt:",get_cycle());
gMem.dumpMem(tmpAddr & 40'hFFFFFFFFC0, 8);
//printf("%0d: CcxDevMemBFM[%2d]::respond: T%d read @ reqPkt/tmp %0h/%0h\n", get_time(LO),myPort,reqPkt.tid,reqPkt.addr,tmpAddr);
//printf("%0d: CcxDevMemBFM[%2d]::respond: T%d read data %0h\n", get_time(LO),myPort,reqPkt.tid,rspPkt.data);
// queue packet for delivery, this BFM instance will end up driving it.
rspPkt2 = rspPkt.object_copy();
tmpAddr = {reqPkt.addr[39:5],5'b0};
rspPkt.data = gMem.read128(tmpAddr,myPort, 1);
if (gParam.mcuMemPrint[READ]) {
printf("\n%7d00: dumpMem: dumping memory related to this request pkt:",get_cycle());
gMem.dumpMem(tmpAddr & 40'hFFFFFFFFC0, 8);
// second packets must not have this set because it will
// cause outstandingRequests to decrement twice.
tmpAddr = tmpAddr + 16; //5'b10000;
rspPkt2.data = gMem.read128(tmpAddr,myPort, 1);
#ifdef CCXDEVMEMBFM_DEBUG
// state of tag for line at this time
rspPkt.lineWay = lineState(reqPkt, *, 1);
updateItag(reqPkt, rspPkt, rspPkt2, reqPkt.cpuId, TAG_VAL);
// queue packets for delivery.
// Atomics must stay in order. The CPX rtl will hold the first
// pkt until the second pkt arrives. They will arrive at the core
// back to back. Putting time between pkt1 and pkt2 tests the CCX only,
// the core never sees time between them. The 2 pkts MUST go into the
// mailbox back to back to avoid another pkt from this port getting between
// ldstSync(reqPkt.cpuId,rspPkt);
semaphore_put(ldstSyncLock, 1 );
PR_ERROR (CLASSNAMEQ, MON_ERR,
psprintf ("T%d Port=%0d rtntyp=%b. Unsupported rtntyp for CCX MEM device BFM.",thread,myPort, rspPkt.rtntyp));
task CLASSNAME::updateDtag(PcxPkt reqPkt, CpxPkt rspPkt, reg [2:0] cpuId, integer how = TAG_VAL)
if (myPort == DEV_NCU || cacheOff || reqPkt.nc == 1) return;
dtag[cpuId].write_tag (reqPkt.l1wayMMUid,
// Invalidate the other (I) tag table
match = itag[cpuId].get_way ("D, chk 4 hit in I:",
{1'b0,reqPkt.addr[10:5]},
itag[cpuId].write_tag(match[3:1],{1'b0,reqPkt.addr[10:5]},29'b0,TAG_INVAL);
rspPkt.wayMMUid = match[3:2];
rspPkt.wayf4b = match[1];
task CLASSNAME::updateItag(PcxPkt reqPkt, CpxPkt rspPkt,
CpxPkt rspPkt2 = null, reg [2:0] cpuId, integer how = TAG_VAL)
if (myPort == DEV_NCU || cacheOff) return;
PR_ERROR(CLASSNAMEQ, MON_ERR,
psprintf ("ERROR itag: DUT request to L2$ 0x%0h should have hit in the L1$.\n\n",reqPkt.addr));
itag[cpuId].write_tag ({reqPkt.l1wayBis,reqPkt.l1wayMMUid},
{1'b0,reqPkt.addr[10:5]},
reqPkt.addr[39:11],how,1);
// Invalidate the other (D) tag table (whether nc=0|1)
match = dtag[cpuId].get_way ("I, chk 4 hit in D:",
dtag[cpuId].write_tag({1'b0,match[3:2]},reqPkt.addr[10:4],29'b0,TAG_INVAL);
rspPkt.wayMMUid = match[3:2];
// ifill covers 2 D$ lines
match = dtag[cpuId].get_way ("I, chk 4 hit in D:",
dtag[cpuId].write_tag({1'b0,match[3:2]},reqPkt.addr[10:4]+1,29'b0,TAG_INVAL);
rspPkt2.wayMMUid = match[3:2];
// use to receive an expected packet.
// mainly for CCX testing.
// user passes in a packet whos fields are set to match the
// packet that should show up at this port. When it does, the
// caller is notified (toggle a var in the passed in packet) and
// the passed in packet will be populated with what showed up at the
// destinatin port. Unexpected (not registered via a call to this task)
// packets will cause failure.
task CLASSNAME::recv(BasePkt pktHndl) {
// assign/cast pktHndl to be of PcxPkt type rather than base
//cast_assign(pcxPkt,pktHndl);
// load signature hash. key is signature and data is clk count at call time.
// expectedSig[pcxPkt.getVector()] = pktHndl;
expectedSig[pktHndl.getVector()] = pktHndl;
// Mainly for CCX testing. Call when a pkt should no longer arrive.
// For CCX, a pkt should never intentionally get dropped so this may not get used.
task CLASSNAME::cancelRecv(BasePkt pktHndl) {
// assign/cast pktHndl to be of PcxPkt type rather than base
//cast_assign(pcxPkt,pktHndl);
// void = assoc_index(DELETE,expectedSig,pcxPkt.getVector());
void = assoc_index(DELETE,expectedSig,pktHndl.getVector());
//----------------------------------------------------------
// Compare 2 data vectors using size as a mask
function reg CLASSNAME::data_equal (reg [63:0] data1,
reg eq0,eq1,eq2,eq3,eq4,eq5,eq6,eq7;
eq7 = !size[7] | (data1[63:56]==data2[63:56]);
eq6 = !size[6] | (data1[55:48]==data2[55:48]);
eq5 = !size[5] | (data1[47:40]==data2[47:40]);
eq4 = !size[4] | (data1[39:32]==data2[39:32]);
eq3 = !size[3] | (data1[31:24]==data2[31:24]);
eq2 = !size[2] | (data1[23:16]==data2[23:16]);
eq1 = !size[1] | (data1[15: 8]==data2[15: 8]);
eq0 = !size[0] | (data1[ 7: 0]==data2[ 7: 0]);
data_equal = eq7 & eq6 & eq5 & eq4 & eq3 & eq2 & eq1 & eq0;
// hit indication, what L2 thinks the core L1 has.
// returns invalidation vector for the indicated core.
// Does the invalidate of our duplicate tags.
function reg [31:0] CLASSNAME::getInvalVector(integer type,
reg [31:0] invVect_d, invVect_i;
reg [3:0] dmatch, imatch;
if (myPort == DEV_NCU || cacheOff) {
#ifdef CCXDEVMEMBFM_DEBUG
// state of tag for line at this time
dtag[cpuId].dump_line("getInvalVector D1:",
// Check if store hit the D$ of given cpuId
dmatch = dtag[cpuId].get_way("getInvalVector:",
//create_vector (INSTR_TAG,dmatch,invVect_d,cpuId);
invVect_d = dmatch << (cpuId * 4);
// Invalidate entry in given cpuId (all cpu's) D$ if found...
// Do this for Stream ST, Atomics (CAS/SWAP), Block *ST.
if ((type == U_CPX_STR_ST ||
type == U_CPX_SWAP_RTN ||
type == U_CPX_SWAP_ACK ||
reqPkt.cpuId !== cpuId) && // hitting core is not requesting core
dtag[cpuId].write_tag({1'b0,dmatch[3:2]},
// Check if store hit the I$
imatch = itag[cpuId].get_way("getInvalVector:",
{1'b0,reqPkt.addr[10:5]},
// Invalidate entry in I$ if found
if (imatch) itag[cpuId].write_tag(imatch[3:1],
{1'b0,reqPkt.addr[10:5]},
//create_vector (INSTR_TAG,imatch,invVect_i,cpuId);
invVect_i = imatch << (cpuId * 4);
// Per spec, you cannot get match in both I$ & D$
if ((imatch!=0)&(dmatch!=0)) {
PR_ERROR(CLASSNAMEQ, MON_ERR,
psprintf ("found match in both D$ and I$ for core %0d. D$=%b, I$=%b",cpuId,dmatch,imatch));
getInvalVector = invVect_d | invVect_i;
#ifdef CCXDEVMEMBFM_DEBUG
// state of tag for line at this time
dtag[cpuId].dump_line("getInvalVector D2:",
task CLASSNAME::ldstSync(reg [2:0] cpuId, CpxPkt rspPkt) {
if (rspPkt.ccxSourced || rspPkt.ccxSourced2 || rspPkt.rtntyp == CPX_EVICT) {
// notify nas LD/ST Sync for all but these types.
// CAS that writes to L2 (CASstore), gntTarget, pa, pkt.
rspPkt.rtntypU !== U_CPX_STR_LD &&
rspPkt.rtntypU !== U_CPX_MMU_RTN &&
rspPkt.rtntypU !== U_CPX_PREF) {
// these are NR0 interface types (return to zero)
gLdStSyncPort[myPort].$cid <= cpuId;
gLdStSyncPort[myPort].$ctrue <= rspPkt.CASstore;
//gLdStSyncPort[myPort].$swap <= (rspPkt.rtntypU == U_CPX_SWAP_ACK || rspPkt.rtntypU == U_CPX_SWAP_RTN); // swap;
gLdStSyncPort[myPort].$swap <= 0;
gLdStSyncPort[myPort].$pa <= rspPkt.addr;
gLdStSyncPort[myPort].$pkt <= rspPkt.getVector();
// special task to potentially create an eviction packet to
// send to some cores. This task will make sure that LDST sync
// gets notified correctly. It also makes sure that
// duplicate tag updating is ordered and sane. This "psudo request"
// is ordered like any other.
// Caller can specify a target address to evict. Will do nothing if address
// is not cached. Otherwise, we find an address.
task CcxDevMemBFM::enqueueEvict(reg [7:0] coreEnable,
reg [39:0] evictPA = 40'hffffffffff,
integer dCacheWeight = 60) {
ccxPort portVar = gCpxPort[myPort];
//pkt.responseDelay = pkt.pkt2Delay;
pkt.sendPorts = 1 << myPort;
pkt.rtntypU = U_CPX_EVICT;
repeat (ordering(pkt, "XEVICT")) @(posedge portVar.$clk);
// get semaphore for this clock
semaphore_get(WAIT, ldstSyncLock, 1);
// get vector and update L1 dup tags
vect = gUtil.evictVector (coreEnable,
// vect = gUtil.evictVinv(coreEnable,
#ifdef CCXDEVMEMBFM_DEBUG
if (! targets) printf("EVICTION gUtil.evictVector did not return any targets!!!\n");
else printf("EVICTION gUtil.evictVector return targets = %b vec = %h\n", targets,vect);
pkt.targetPorts = targets;
ldstSync(0,pkt); // notify
PR_NORMAL(CLASSNAMEQ, MON_NORMAL,
psprintf ("Sending EVICTION pkt to cores targets=0x%h, a=0x%h, vec=0x%h",
pkt.targetPorts,pkt.addr,pkt.data));
semaphore_put(ldstSyncLock, 1 );
function reg [3:0] CLASSNAME::lineState(PcxPkt reqPkt,
string why="debug lineState:",
if (reqPkt.rqtypU == U_PCX_IFILL || reqPkt.rqtyp == PCX_IFILL) {
lineState = itag[reqPkt.cpuId].get_way(why,
{1'b0,reqPkt.addr[10:5]},
itag[reqPkt.cpuId].dump_line(why, reqPkt.addr[10:4],MON_INFO);
lineState = dtag[reqPkt.cpuId].get_way(why,
dtag[reqPkt.cpuId].dump_line(why, reqPkt.addr[10:4],MON_INFO);
// hold off the responses until outstandingReqs reaches amount.
// this will clump responses into bursts periodically.
task CLASSNAME::burstResp(integer amount)
ccxPort portVar = gCpxPort[myPort];
if (gParam.burstSync == myPort-8 || gParam.burstSync == myPort) iSync = 1;
wait = gParam.burstHoldoff;
repeat (10) @(negedge gPcxPort[myPort].$clk);
tmp = gUtil.getThreadEnables();
while (tmp !== gOutOfBoot) wait_var(gOutOfBoot); // all threads out of boot
// make stall agreeable so we do not stall cores requests when trying to
PR_ALWAYS(CLASSNAMEQ, MON_ALWAYS,
psprintf("Port %2d, stallStart changed to %0d due to burst option.",myPort, stallStart));
if (outstandingReqs < amount) { // just drain naturally if too many in queue
semaphore_get(WAIT, ldstSyncLock, 1); // stop responding
// start respoding after amount or burstSync or x clocks
while (outstandingReqs < amount) wait_var(outstandingReqs); // wait
repeat (wait) @(posedge gPcxPort[myPort].$clk); // but not too long
wait_var(burstSync); // burst on sync when in use
terminate; // kill remaining forks
if (iSync) burstSync = ~burstSync; // signal the other banks
if (outstandingReqs >= 2)
PR_INFO(CLASSNAMEQ, MON_INFO,
psprintf("Port %2d, Letting %0d packets burst through (reason=%0d)",myPort, outstandingReqs, reason));
semaphore_put(ldstSyncLock, 1 ); // allow normal responses
repeat (5) @(posedge gPcxPort[myPort].$clk); // give a little break