Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / sam / analyzers / rstracer / rstracer.cc
// ========== Copyright Header Begin ==========================================
//
// OpenSPARC T2 Processor File: rstracer.cc
// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
//
// The above named program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public
// License version 2 as published by the Free Software Foundation.
//
// The above named program is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public
// License along with this work; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
//
// ========== Copyright Header End ============================================
/* rstracer.cc -- rstrace for SAM v5 */
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <limits.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <assert.h>
#include "rstf/rstf.h"
#include "spix_sparc.h"
#define USE_RZ3
#ifdef USE_RZ3
#include "rstzip/Rstzip.H"
#endif
#include "system.h"
#include "dev_registry.h"
#include "vtracer.h"
#include "rstracer.h"
static class rstracer * thetracer = NULL;
// non-reentrant function, called from the UI thread
// when the module is loaded
extern "C" void vtracer_fini();
extern "C" void * vtracer_init(const char *tmp_modname)
{
atexit(vtracer_fini);
thetracer = new rstracer(tmp_modname);
return (void *) thetracer;
}
//non-reentrant function, called from the UI thread
int rstrace_cmd_action(void * /* usrdata */, int argc, char **tmp_argv)
{
return thetracer->process_ui_cmd(argc, tmp_argv);
}
extern void UI_register_cmd_2 (char * name, char *help, int (*efn)(void *, int, char **), int (*hfn)());
extern void UI_invalidate_cmd (char * name);
char help_str[] = "rstrace -o <file> -n <icount>";
#define MASK_PSTATE_AM(SA_pstate) ( (((SA_pstate)>>3)&1) ? ((uint64_t)(~0u)) : (~0ull) )
// non-reentrant function, called from the UI thread
// (from vtracer_init())
rstracer::rstracer(const char * tmp_modname)
{
modname = strdup(tmp_modname);
SAM_intf = NULL;
// FIXME: get ncpus from the VTracer_SAM_intf
ncpus = g_nvcpu;
first_vcpu_id = -1;
last_vcpu_id = g_vcpu_id_max;
tracefilename[0] = 0;
pcs = new rst_pct[last_vcpu_id + 1];
tracing = false;
mutex_init(&mu, USYNC_THREAD, NULL);
sync_count = 0;
// FIXME: this is stricly temporary. to be replaced by ui_cmd method
UI_register_cmd_2(strdup("rstrace"), help_str, rstrace_cmd_action, NULL);
}
// non-reentrant function, called when module is loaded
int rstracer::attach(VTracer_SAM_intf * sam_intf) {
SAM_intf = sam_intf;
return 0;
}
const char usage[] =
"rstrace # print rstracer status\n"
"rstrace off # turn off tracing and close trace file\n"
"rstrace [-o <file>] [-n <insts-per-cpu>] [-d <initial-delay>] [-x <ntraces> [-p <period>]]\n"
"Alternative rstrace command format:\n"
"rstrace <file> [+<delay>] [<totalinsts>] [<ntraces> [<period>]]\n"
" <file> should be base name to which cpu<n>.rz3.gz is appended.\n"
" <file> can be - for default /tmp/rstracer<pid>_<date>.cpu<n>.rz3.gz\n"
" +<delay> can be +0 or + for immediate; must be specified\n"
" period is the interval between starting pts of periodic traces\n"
" period assumed to be equal to trace size unless larger value specified\n"
" totalinsts, if unspecified, is indefinite (until next rstrace off cmd)\n";
// FIXME: add sampling options
// non-reentrant function, called from UI thread
int rstracer::process_ui_cmd(int argc, char **tmp_argv)
{
if (argc == 1) {
print_status();
return 0;
}
insts_per_cpu = LLONG_MAX;
ntraces = 1;
initial_delay = 0;
trace_period = 0;
// argv[1] can be "off", -, filename or -flag
if (strcmp(tmp_argv[1], "off") == 0) {
if (!tracing) {
fprintf(stderr, "%s: tracing is already off\n", id);
return 0;
}
// FIXME: blaze must be stopped
ntraces = 0; // reset target number of traces
trace_off();
return 0;
}
// tracing must be off for any other command
if (tracing) {
fprintf(stderr, "%s: ERROR: tracing is currently on. Usage: %s", id, usage);
return 0;
}
if ((strcmp(tmp_argv[1], "-") == 0) || (tmp_argv[1][0] != '-')) {
// v4 format
if (parse_args_v4(argc, (const char **) tmp_argv) != 0) {
return 0;
}
} else {
if (parse_args_v5(argc, (const char **) tmp_argv) != 0) {
return 0;
}
}
// if we are still here, this was a trace start command
tracing = true;
traces_done = 0;
delay = initial_delay;
trace_on();
return 0;
} // int rstracer::process_ui_cmd(int argc, char **tmp_argv)
// non-reentrant function - called from process_ui_cmd()
int rstracer::parse_args_v4(int argc, const char * tmp_argv[])
{
if (argc < 3) {
fprintf(stderr, "%s: ERROR: insufficient number of arguments. Usage: \n%s", id, usage);
return 1;
}
if (strcmp(tmp_argv[1], "-") == 0) {
// use default trace file name
} else {
// FIXME: the legacy format expects to append the time & date to the filename?
strcpy(tracefilename, tmp_argv[1]);
}
if (tmp_argv[2][0] != '+') {
fprintf(stderr, "%s: ERROR: second argument must be +[<delay>]. Usage: \n%s", id, usage);
return 1;
}
if (tmp_argv[2][1] == 0) {
// use default
} else {
initial_delay = strtoll(tmp_argv[2] + 1, NULL, 0);
}
// trace size
if (argc >= 4) {
int64_t trsize = strtoll(tmp_argv[3], NULL, 0);
insts_per_cpu = (trsize + ncpus - 1) / ncpus;
} else {
// use default
}
// number of traces
if (argc >= 5) {
ntraces = (int) strtol(tmp_argv[4], NULL, 0);
}
// trace period
if (argc >= 6) {
int64_t psize = (int) strtoll(tmp_argv[4], NULL, 0);
trace_period = (psize + ncpus - 1)/ncpus;
if (trace_period < insts_per_cpu) {
if (trace_period != 0) {
fprintf(stderr, "%s: WARNING: period (%lld insts/cpu) < trace size (%lld insts/cpu). ignoring.\n",
id, trace_period, insts_per_cpu);
}
trace_period = insts_per_cpu;
}
}
if (argc >= 7) {
fprintf(stderr, "%s: ERROR: too many arguments. Usage: \n%s", id, usage);
return 1;
}
return 0;
} // void rstracer::parse_args_v4(int argc, const char * tmp_argv[])
// non-reentrant function - called from process_ui_cmd()
int rstracer::parse_args_v5(int argc, const char * tmp_argv[])
{
// preferred command format
int i = 1;
while(i < argc) {
const char * arg = tmp_argv[i++];
if (strcmp(arg, "-o") == 0) {
if (i == argc) {
fprintf(stderr, "%s: ERROR: -o requires an argument. Usage: \n%s", id, usage);
return 1;
}
strcpy(tracefilename, tmp_argv[i++]);
} else if (strcmp(arg, "-n") == 0) {
if (i == argc) {
fprintf(stderr, "%s: ERROR: -n requires an argument. Usage: \n%s", id, usage);
return 1;
}
insts_per_cpu = strtoll(tmp_argv[i++], NULL, 0);
} else if (strcmp(arg, "-d") == 0) {
if (i == argc) {
fprintf(stderr, "%s: ERROR: -d requires an argument. Usage: \n%s", id, usage);
return 1;
}
initial_delay = strtoll(tmp_argv[i++], NULL, 0);
} else if(strcmp(arg, "-x") == 0) {
if (i == argc) {
fprintf(stderr, "%s: ERROR: -x requires an argument. Usage: \n%s", id, usage);
return 1;
}
ntraces = (int) strtol(tmp_argv[i++], NULL, 0);
} else if (strcmp(arg, "-p") == 0) {
if (i == argc) {
fprintf(stderr, "%s: ERROR: -x requires an argument. Usage: \n%s", id, usage);
return 1;
}
trace_period = strtoll(tmp_argv[i++], NULL, 0);
} else {
fprintf(stderr, "%s: ERROR: invalid argument %s. Usage: \n%s", id, arg, usage);
return 1;
}
}
// check args
if (trace_period < insts_per_cpu) {
if (trace_period != 0) {
fprintf(stderr, "%s: WARNING: period (%lld insts/cpu) < trace size (%lld insts/cpu). ignoring.\n",
id, trace_period, insts_per_cpu);
}
trace_period = insts_per_cpu;
}
return 0;
} // void rstracer::parse_args_v5(int argc, const char * tmp_argv[])
// NON-REENTRANT function
// called from parse_args() (while blaze is stopped) from the UI thread.
// also called from trace_off() (while all cpus are done tracing) in case
// one more trace is needed. In that case, trace_off() should not modify
// state
void rstracer::trace_on()
{
char fname[PATH_MAX];
if (tracefilename[0] == 0) {
// default trace file name
time_t curlocaltime = time(NULL);
struct tm * localtm = localtime(&curlocaltime);
sprintf(fname, "/tmp/rstrace%d_%04d%02d%02d_%02d%02d%02d",
getpid(), localtm->tm_year+1900, localtm->tm_mon, localtm->tm_mday,
localtm->tm_hour, localtm->tm_min, localtm->tm_sec);
} else {
strcpy(fname, tracefilename);
}
if (ntraces > 1) {
char str[16];
sprintf(str, ".trace%03d", traces_done);
strcat(fname, str);
}
int i;
for (i=0; i<=last_vcpu_id; i++) {
if (get_vcpu(i))
{
if (first_vcpu_id<0)
first_vcpu_id = i;
pcs[i].init(i, fname);
// if there is no delay to the trace, do not synchronize
pcs[i].state = delay ? rst_pct::state_DELAY : rst_pct::state_TRACE_START;
} else {
pcs[i].init(-1, NULL);
}
} // for each cpuid
} // void rstracer::trace_on()
// NON-REENTRANT function - called from trace_on()
void rst_pct::init(int arg_cpuid, const char * tmp_tracefilename)
{
cpuid = arg_cpuid;
icontext = dcontext = ~0u;
pc_pavadiff = ea_pavadiff = ~0ull;
ninsts = 0;
nrecs = 0;
dinsts = 0;
if (!tmp_tracefilename)
{
state = rst_pct::state_NIL;
return;
}
regval.rtype = REGVAL_T;
regval.postInstr = 1;
rstf_regvalT_set_cpuid(&regval, cpuid);
regval.regtype[0] = regval.regtype[1] = RSTREG_UNUSED_RT;
hpr = pr = 0;
memcache = new uint64_t [RSTF_MEMVAL_CACHE_LINES];
int i;
for (i=0; i<RSTF_MEMVAL_CACHE_LINES; i++) {
memcache[i] = ~0ull;
}
memset(&mv64, 0, sizeof(mv64));
mv64.rtype = MEMVAL_T;
rstf_memval64T_set_cpuid(&mv64, cpuid);
mv64.size = 8;
memset(&mv128, 0, sizeof(mv64));
mv128.rtype = MEMVAL_T;
mv128.ismemval128 = 1;
mv128.isContRec = 1;
rstf_memval128T_set_cpuid(&mv128, cpuid);
#ifdef USE_RZ3
sprintf(fname, "%s.cpu%d.rz3.gz", tmp_tracefilename, cpuid);
rz = new Rstzip;
int rzerr = rz->open(fname, "w", "verbose=0");
if (rzerr != RSTZIP_OK) {
perror(fname);
exit(1);
}
#else
sprintf(fname, "%s.cpu%d.rst", tmp_tracefilename, cpuid);
trf = fopen(fname, "w");
if (trf == NULL) {
perror(fname);
exit(1);
}
#endif
} // void rst_pct::init(int arg_cpuid, const char * tmp_tracefilename)
static const uint8_t CH_MMU_CONTEXTREG_ASI = 0x58;
static const uint8_t UA_MMU_CONTEXTREG_ASI = 0x21; // ultrasparc arch 2005 and newer
// REENTRANT function - called from per cpu instr callback
void rst_pct::emit_trace_preamble()
{
printf("%s: starting trace for cpu%d\n", id, cpuid);
rstf_unionT ru;
ru.proto.rtype = RSTHEADER_T;
ru.header.majorVer = RSTF_MAJOR_VERSION;
ru.header.minorVer = RSTF_MINOR_VERSION;
ru.header.percent = '%';
sprintf(ru.header.header_str, "%s v%s", RSTF_MAGIC, RSTF_VERSION_STR);
addrec(&ru);
// Traceinfo
ru.tlevel.rtype = TRACEINFO_T;
ru.tlevel.rtype2 = RSTT2_NLEVEL_T;
ru.tlevel.level = 0;
ru.tlevel.val32 = 0;
time_t curtime = (uint64_t) time(NULL);
ru.tlevel.time64 = curtime;
addrec(&ru);
memset(&ru, 0, sizeof(ru));
ru.cpuinfo.rtype = TRACEINFO_T;
ru.cpuinfo.rtype2 = RSTT2_CPUINFO_T;
ru.cpuinfo.numcpus = 1; // in this cpu's trace
ru.cpuinfo.min_cpu_id = ru.cpuinfo.max_cpu_id = cpuid;
addrec(&ru);
memset(&ru, 0, sizeof(ru));
ru.cpuidinfo.rtype = TRACEINFO_T;
ru.cpuidinfo.rtype2 = RSTT2_CPUIDINFO_T;
ru.cpuidinfo.cpuids[0] = cpuid;
addrec(&ru);
char desc[512];
// descriptor string records
sprintf(desc, "SAM [rstracer.so]");
string2rst(desc);
struct tm localtm;
localtime_r((const time_t *)&curtime, &localtm);
sprintf(desc, "date=%04d-%02d-%02d_%02d:%02d:%02d",
localtm.tm_year+1900, localtm.tm_mon+1, localtm.tm_mday,
localtm.tm_hour, localtm.tm_min, localtm.tm_sec);
string2rst(desc);
sprintf(desc, "host:");
string2rst(desc);
gethostname(desc, 512);
string2rst(desc);
sprintf(desc, "<SAMinfo>");
string2rst(desc);
sprintf(desc, "blz::version=%s", SYSTEM_get_infostr());
string2rst(desc);
// get device ids/names
// FIXME: in the next putback, replace this direct access
// to sam internal structures with the system abstraction
extern devRegistry * samDevs;
int devid=1;
while(1) {
const char * devname = samDevs->getName(devid);
if (strcmp(devname, "unknown device") == 0) {
break;
} else {
int namelen = strlen(devname);
if (namelen > 18) namelen = 18;
// output a record for this device
rstf_devidstrT devidstr = {0};
devidstr.rtype = DEVIDSTR_T;
devidstr.id = devid;
strncpy(devidstr.str, devname, namelen);
addrec((rstf_unionT*)&devidstr);
}
devid++;
}
sprintf(desc, "blz::ncpus=%d", g_nvcpu);
string2rst(desc);
Vcpu * my_vcpu = g_vcpu[cpuid];
VCPU_TLB * tlb_entries = NULL;
int n_entries = my_vcpu->get_tlb_entries(tlb_entries);
int i;
if(n_entries > 0) {
rstf_tlbT tlbrec = {0};
rstf_tlbT_set_cpuid(&tlbrec, cpuid);
tlbrec.rtype = TLB_T;
for (i=0; i<n_entries; i++) {
tlbrec.tlb_type = tlb_entries[i].tlb_type;
tlbrec.tlb_index = tlb_entries[i].tlb_index;
tlbrec.tlb_no = tlb_entries[i].tlb_no;
tlbrec.tte_tag = tlb_entries[i].tte_tag;
tlbrec.tte_data = tlb_entries[i].tte_data;
// FIXME: this needs changes in rst, rstzip etc.
// for sun4v, we use the field currently named "unused16" for context
// and bit 0 of the field "unused" for "is_real"
if (tlb_entries[i].format == 1) { // sun4v
tlbrec.unused16 = tlb_entries[i].tte_context;
tlbrec.unused = tlb_entries[i].is_real;
}
addrec((rstf_unionT *)&tlbrec);
}
free(tlb_entries);
}
uint64_t v64;
// hpriv bit
int rv = my_vcpu->get_reg(VCPU_HPR_HPSTATE, &v64);
if (rv == 0) {
hpr = (v64>>2) & 1;
} else {
hpr = 0;
}
// regvals: hpriv regs
for (i=0; i<=32; i++) {
if (my_vcpu->get_reg(VCPU_HPR_0 + i, &v64) == 0) {
add_regval(RSTREG_HPRIV_RT, i, v64);
}
}
flush_regval();
// regvals: priv regs
for (i=0; i<32; i++) {
int regid = VCPU_PR_0 + i;
if (my_vcpu->get_reg(regid, &v64)==0) {
add_regval(RSTREG_PRIV_RT, i, v64);
if (regid == VCPU_PR_PSTATE) {
pr = (v64>>2) & 1;
mask_pstate_am = MASK_PSTATE_AM(v64);
}
}
}
flush_regval();
// regvals: trap-level regs
uint64_t curtl;
my_vcpu->get_reg(VCPU_PR_TL, &curtl);
int tl;
for (tl=1; tl<=curtl; tl++) {
my_vcpu->set_reg(VCPU_PR_TL, tl);
my_vcpu->get_reg(VCPU_PR_TPC, &v64);
add_regval(RSTREG_PRIV_RT, RSTREG_TPC_RBASE + 8*0 + tl, v64);
my_vcpu->get_reg(VCPU_PR_TNPC, &v64);
add_regval(RSTREG_PRIV_RT, RSTREG_TPC_RBASE + 8*1 + tl, v64);
my_vcpu->get_reg(VCPU_PR_TSTATE, &v64);
add_regval(RSTREG_PRIV_RT, RSTREG_TPC_RBASE + 8*2 + tl, v64);
my_vcpu->get_reg(VCPU_PR_TT, &v64);
add_regval(RSTREG_PRIV_RT, RSTREG_TPC_RBASE + 8*3 + tl, v64);
}
my_vcpu->set_reg(VCPU_PR_TL, curtl);
flush_regval();
// regvals: asr regs
for (i=0; i<32; i++) {
if (my_vcpu->get_reg(VCPU_ASR_0 + i, &v64)==0) {
add_regval(RSTREG_OTHER_RT, i, v64);
}
}
flush_regval();
// regvals: cur int regs
for (i=0; i<32; i++) {
my_vcpu->get_reg(VCPU_IRF_0 + i, &v64);
add_regval(RSTREG_INT_RT, i, v64);
}
flush_regval();
// all globals
// regvals: win int regs
// regvals: fp regs
// for dregs, the regnum encoding is same as in sparcv9
// regid is EVEN, and up to 6 bits (0, 2, .. 62).
// regnum = {regid[4:1],regid[5]}
unsigned regid;
for (regid=0; regid<64; regid+=2) {
int regnum = (regid & 0x1e) | (regid >> 5);
my_vcpu->get_reg(VCPU_DRF_0 + regid/2, &v64);
add_regval(RSTREG_FLOAT_RT, 32+(regid/2), v64);
}
flush_regval();
// icontext and dcontext regs
// FIXME: using magic numbers from UltraSPARC architecture for now
uint64_t reg64;
uint8_t mmu_asi;
if ((my_vcpu->config.cpu_type & VCPU_IMPL_SIM_MASK) == VCPU_IMPL_SIM_BLAZE) {
mmu_asi = CH_MMU_CONTEXTREG_ASI;
} else {
mmu_asi = UA_MMU_CONTEXTREG_ASI;
}
my_vcpu->get_asi(mmu_asi, RSTREG_MMU_PCONTEXT, reg64); pcontext = (uint32_t) reg64;
my_vcpu->get_asi(mmu_asi, RSTREG_MMU_SCONTEXT, reg64); scontext = (uint32_t) reg64;
add_regval(RSTREG_MMU_RT, RSTREG_MMU_PCONTEXT, pcontext);
add_regval(RSTREG_MMU_RT, RSTREG_MMU_SCONTEXT, scontext);
flush_regval();
} // void rst_pct::emit_trace_preamble()
void rst_pct::addrec(rstf_unionT * ru)
{
#ifdef USE_RZ3
rz->compress(ru, 1);
#else
rv = fwrite(ru, sizeof(rstf_unionT), 1, trf);
if (rv != 1) perror(fname);
#endif
nrecs++;
} // void rst_pct::addrec(rstf_unionT * ru)
void rst_pct::string2rst(const char * str)
{
int n = (int) strlen(str);
// the last strdesc record contains 22 bytes (and a zero byte to terminate)
// each preceding strcont record contains 23 bytes
// thus total number of records is: strlen/23 + 1
int nsr = 1 + n/23;
rstf_unionT ru;
int i;
for (i=0; i<nsr-1; i++) {
ru.string.rtype = STRCONT_T;
strncpy(ru.string.string, str, 23);
str += 23;
addrec(&ru);
}
ru.string.rtype = STRDESC_T;
strcpy(ru.string.string, str);
addrec(&ru);
} // void rst_pct::string2rst(const char * str)
void rst_pct::add_regval(int rstregtype, int rstregid, uint64_t v64) {
if (regval.regtype[0] == RSTREG_UNUSED_RT) {
regval.regtype[0] = rstregtype;
regval.regid[0] = rstregid;
regval.reg64[0] = v64;
} else {
regval.regtype[1] = rstregtype;
regval.regid[1] = rstregid;
regval.reg64[1] = v64;
flush_regval();
}
} // void rst_pct::add_regval() {
void rst_pct::flush_regval()
{
if(regval.regtype[0] != RSTREG_UNUSED_RT) {
addrec((rstf_unionT *) &regval);
regval.regtype[0] = regval.regtype[1] = RSTREG_UNUSED_RT;
}
} // void rst_pct::flush_regval()
const uint64_t MEMCACHE_TAGMASK = ~((uint64_t)(RSTF_MEMVAL_CACHE_BLOCKSIZE-1));
int rst_pct::memcache_ref(uint64_t pa)
{
uint64_t tag = pa & MEMCACHE_TAGMASK;
uint64_t idx = tag & (RSTF_MEMVAL_CACHE_LINES-1);
int rv = (tag == memcache[idx]);
if (rv == 0) {
// process miss
memcache[idx] = tag;
// output memvals: 2x memval64, 7x memval128
uint64_t addr = tag;
mv64.addr = addr;
mv64.val = memread64u(mm1, addr);
addrec((rstf_unionT*) &mv64);
addr += 8;
mv64.addr = addr;
mv64.val = memread64u(mm1, addr);
addrec((rstf_unionT*) &mv64);
addr += 8;
int i;
for (i=0; i<7; i++) {
rstf_memval128T_set_addr(&mv128, addr);
mv128.val[0] = memread64u(mm1, addr);
addr += 8;
mv128.val[1] = memread64u(mm1, addr);
addrec((rstf_unionT*)&mv128);
addr += 8;
}
} // hit or miss?
return rv;
} // int rst_pct::memcache_ref(uint64_t pa)
// this function is called from a cpu after all cpus are done
// tracing, or the rstrace off ui command. In either case,
// it is NON-REENTRANT
void rstracer::trace_off()
{
int i;
if (! tracing) {
fprintf(stderr, "%s: tracing is already off\n", id);
} else {
printf("%s: finalizing trace(s)...\n", id);
// close files
for (i=0; i<=last_vcpu_id; i++) {
pcs[i].fini();
}
traces_done++;
if (traces_done >= ntraces) {
tracing = false;
} else {
// start next trace
delay = trace_period - insts_per_cpu;
trace_on();
}
} // tracing?
} // void rstracer::trace_off()
void rst_pct::fini()
{
if (state == rst_pct::state_NIL)
return;
#ifdef USE_RZ3
rz->close();
delete rz;
rz = NULL;
#else
fclose(trf);
trf = NULL;
#endif
printf("%s: cpu%d: trace written to %s - %lld insts, %lld records\n", id, cpuid, fname, ninsts, nrecs);
} // void vtrace_per_cpu_tracer::fini()
void rstracer::print_status()
{
// use cpu0 to identify status
if (!tracing) {
printf("%s: idle\n", id);
} else {
enum rst_pct::state_e state = pcs[first_vcpu_id].state;
int64_t dleft;
switch(state) {
case rst_pct::state_DELAY:
dleft = (delay-pcs[first_vcpu_id].dinsts);
if (dleft < 0) dleft = 0;
printf("%s: in delayed tracing mode. remaining delay is approx %lld insts/cpu (%lld total)\n",
id, dleft, ncpus*dleft);
if (ntraces > 1) {
printf(" %d traces out of %d done\n", traces_done, ntraces);
}
break;
case rst_pct::state_TRACING:
case rst_pct::state_TRACE_START:
case rst_pct::state_WAIT_SYNC_START:
case rst_pct::state_WAIT_START:
case rst_pct::state_WAIT_SYNC_STOP:
case rst_pct::state_WAIT_STOP:
printf("%s: tracing: approx %lld insts/cpu out of %lld done\n",
id, pcs[first_vcpu_id].ninsts, insts_per_cpu);
if (ntraces > 1) {
printf(" trace number %d of 0..%d in progress\n", traces_done, ntraces-1);
}
break;
default:
fprintf(stderr, "%s: ERROR: in invalid state (%d)\n", id, state);
}
} // tracing?
} // void rstracer::print_status()
static const int vcpu_rtype_to_rst[] = {
RSTREG_UNUSED_RT, // unused in vcpu
RSTREG_PRIV_RT,
RSTREG_OTHER_RT,
RSTREG_INT_RT,
RSTREG_FLOAT_RT, // single
RSTREG_FLOAT_RT, // double
RSTREG_HPRIV_RT, // hyperprivileged registers
};
#define RSTRACER_IOP_IS_FLUSH(_IOP_) (((_IOP_)==SPIX_SPARC_IOP_FLUSH)||((_IOP_)==SPIX_SPARC_IOP_FLUSHA))
// REENTRANT function
int rstracer::instr(VCPU_Instruction * ii)
{
if (!tracing) {
return 0;
}
int cpuid = ii->cpuid;
if (pcs[cpuid].state != rst_pct::state_TRACING) { // rule out common case quickly
int i;
switch(pcs[cpuid].state) {
case rst_pct::state_DELAY:
if (pcs[cpuid].dinsts >= delay) {
pcs[cpuid].state = rst_pct::state_WAIT_SYNC_START;
return instr(ii); // recursive call (with changed state)
}
pcs[cpuid].dinsts++;
return 0;
case rst_pct::state_WAIT_SYNC_START:
mutex_lock(&mu);
// increment sync_count. if ==ncpu, start everyone
sync_count++;
if (sync_count == ncpus) {
for (i=0; i<ncpus; i++) {
pcs[i].state = rst_pct::state_TRACE_START;
}
sync_count = 0;
} else {
pcs[cpuid].state = rst_pct::state_WAIT_START;
}
mutex_unlock(&mu);
return instr(ii); // recursive call (with changed state)
case rst_pct::state_WAIT_START:
return 0;
case rst_pct::state_WAIT_SYNC_STOP:
mutex_lock(&mu);
// increment sync_count. if ==ncpu, start everyone
sync_count++;
if (sync_count == ncpus) {
trace_off();
sync_count = 0;
} else {
pcs[cpuid].state = rst_pct::state_WAIT_STOP;
}
mutex_unlock(&mu);
return instr(ii); // recursive call (with changed state)
case rst_pct::state_WAIT_STOP:
return 0;
case rst_pct::state_TRACE_START:
pcs[cpuid].emit_trace_preamble();
pcs[cpuid].state = rst_pct::state_TRACING;
// include this instruction in trace
return instr(ii); // recursive call (with changed state)
default:
fprintf(stderr, "%s: ERROR: rstracer::instr() - invalid state (%d)\n",
id, (int) pcs[cpuid].state);
assert(0);
} // switch(state)
} // if not state_TRACING
// in state_TRACING: generate trace records for current instruction
rstf_unionT ru;
rstf_pavadiffT pd;
memset(&ru, 0, sizeof(ru));
memset(&pd, 0, sizeof(pd));
uint64_t pstate_v;
// vcpu:pr may already have changed; instr.pr should reflect pr before instr retired
ru.instr.hpriv = pcs[cpuid].hpr;
ru.instr.pr = pcs[cpuid].pr;
// ifetch trap?
if (ii->pc_pa == 0) {
rstf_trapping_instrT ti = {0};
ti.rtype = TRAPPING_INSTR_T;
rstf_trapping_instrT_set_cpuid(&ti, ii->cpuid);
ti.hpriv = ru.instr.hpriv;
ti.priv = ru.instr.pr;
ti.iftrap = 1;
ti.pc_va = ii->pc_va;
if (!ti.hpriv) {
ti.pc_va &= pcs[cpuid].mask_pstate_am;
}
pcs[cpuid].addrec((rstf_unionT *)&ti);
return 0;
}
pcs[cpuid].memcache_ref(ii->pc_pa);
bool need_pavadiff = false;
// generate instr, pavadiff records
ru.proto.rtype = INSTR_T;
rstf_instrT_set_cpuid(&ru.instr, cpuid);
pd.icontext = ii->icontext;
if (ii->icontext != pcs[cpuid].icontext) {
pcs[cpuid].icontext = ii->icontext;
need_pavadiff = true;
}
ru.instr.pc_va = ii->pc_va;
if (!ru.instr.hpriv) {
ru.instr.pc_va &= pcs[cpuid].mask_pstate_am;
}
pd.pc_pa_va = ii->pc_pa - ru.instr.pc_va;
if (pd.pc_pa_va != pcs[cpuid].pc_pavadiff) {
need_pavadiff = true;
pcs[cpuid].pc_pavadiff = pd.pc_pa_va;
}
ru.instr.instr = ii->opcode;
spix_sparc_iop_t iop = spix_sparc_iop(SPIX_SPARC_V9, &(ii->opcode));
uint64_t pstate64;
uint64_t tl64;
bool is_done_retry = false;
if (spix_sparc_iop_isload(iop) || spix_sparc_iop_iscstore(iop) || spix_sparc_iop_isustore(iop) ||
RSTRACER_IOP_IS_FLUSH(iop)) {
uint64_t ea_va = ii->ea_va;
if (!ru.instr.hpriv) {
ea_va &= pcs[cpuid].mask_pstate_am;
}
uint64_t ea_pa = ii->ea_pa;
if (ea_pa != 0x0) {
ru.instr.ea_valid = 1;
ru.instr.ea_va = ea_va;
pd.ea_valid = 1;
pd.ea_pa_va = ea_pa - ea_va;
pd.dcontext = ii->dcontext;
if (ii->itype & (VCPU_LOAD_ITYPE|VCPU_STORE_ITYPE)) {
pcs[cpuid].memcache_ref(ea_pa);
}
} else if (ii->exception) {
// output trapping instr record
rstf_trapping_instrT ti = {0};
rstf_trapping_instrT_set_cpuid(&ti, cpuid);
ti.rtype = TRAPPING_INSTR_T;
ti.hpriv = ru.instr.hpriv;
ti.priv = ru.instr.pr;
ti.iftrap = 0;
ti.ea_va_valid = 1;
ti.ea_pa_valid = 0;
ti.instr = ru.instr.instr;
ti.pc_va = ii->pc_va;
ti.ea_va = ea_va;
pcs[cpuid].addrec((rstf_unionT *)&ti);
} else if (ii->itype & (VCPU_ASI_LOAD_ITYPE|VCPU_ASI_STORE_ITYPE)) {
//ld/st to internal asi - PA not relevant
ru.instr.ea_valid = 1;
ru.instr.ea_va = ea_va;
pd.ea_valid = 0;
need_pavadiff = true;
} else { // ea_pa is 0x0 but there is no exception. should be a prefetch instr
#if 0
if (! spix_sparc_iop_isprefetch(iop) && ! RSTRACER_IOP_IS_FLUSH(iop)) {
fprintf(stderr, "WARNING: rstracer: cpu%d rec%lld: ea_pa==0 and exception==0???",
cpuid, pcs[cpuid].nrecs);
}
#endif
}
} else if (spix_sparc_iop_isdcti(iop)) {
ru.instr.ea_valid = 1;
g_vcpu[cpuid]->get_reg(ii->annul? VCPU_ASR_PC:VCPU_ASR_NPC, &ru.instr.ea_va);
if (!ru.instr.hpriv) {
ru.instr.ea_va &= pcs[cpuid].mask_pstate_am;
}
} else if (iop == SPIX_SPARC_IOP_RETRY) {
uint64_t v64;
ru.instr.ea_valid = 1;
g_vcpu[cpuid]->get_reg(VCPU_ASR_PC, &v64);
if (!ru.instr.hpriv) {
ru.instr.ea_va = v64 & pcs[cpuid].mask_pstate_am;
}
// get hpstate
int rv = g_vcpu[cpuid]->get_reg(VCPU_HPR_HPSTATE, &v64);
if (rv == 0) {
pcs[cpuid].hpr = (v64>>2) & 1;
}
// get pstate
g_vcpu[cpuid]->get_reg(VCPU_PR_PSTATE, &pstate64);
if (pcs[cpuid].hpr == 0) {
pcs[cpuid].pr = (pstate64 >> 2) & 1;
} else {
pcs[cpuid].pr = 0;
}
g_vcpu[cpuid]->get_reg(VCPU_PR_TL, &tl64);
pcs[cpuid].mask_pstate_am = MASK_PSTATE_AM(pstate64);
is_done_retry = true;
} else if (iop == SPIX_SPARC_IOP_DONE) {
ru.instr.ea_valid = 1;
uint64_t v64;
g_vcpu[cpuid]->get_reg(VCPU_ASR_NPC, &v64);
ru.instr.ea_va = v64;
if (!ru.instr.hpriv) {
ru.instr.ea_va &= pcs[cpuid].mask_pstate_am;
}
// get hpstate
int rv = g_vcpu[cpuid]->get_reg(VCPU_HPR_HPSTATE, &v64);
if (rv == 0) {
pcs[cpuid].hpr = (v64>>2) & 1;
}
// get pstate
g_vcpu[cpuid]->get_reg(VCPU_PR_PSTATE, &pstate64);
if (pcs[cpuid].hpr == 0) {
pcs[cpuid].pr = (pstate64 >> 2) & 1;
} else {
pcs[cpuid].pr = 0;
}
pcs[cpuid].mask_pstate_am = MASK_PSTATE_AM(pstate64);
// get TL
g_vcpu[cpuid]->get_reg(VCPU_PR_TL, &tl64);
is_done_retry = true;
}
if (pd.ea_valid) {
if (pd.ea_pa_va != pcs[cpuid].ea_pavadiff) {
need_pavadiff = true;
pcs[cpuid].ea_pavadiff = pd.ea_pa_va;
}
if (pd.dcontext != pcs[cpuid].dcontext) {
need_pavadiff = true;
pcs[cpuid].dcontext = pd.dcontext;
}
}
// ru.instr.tr = ii->dmmu_trap||ii->exception;
if (ii->exception) {
ru.instr.tr = 1;
}
ru.instr.bt = ii->taken; // FIXME - only if cti or cmov
if (need_pavadiff) {
// ALSO check if pcontext/scontext have changed
uint64_t reg64;
uint32_t newpcontext, newscontext;
uint8_t mmu_asi;
if ((g_vcpu[cpuid]->config.cpu_type & VCPU_IMPL_SIM_MASK) == VCPU_IMPL_SIM_BLAZE) {
mmu_asi = CH_MMU_CONTEXTREG_ASI;
} else {
mmu_asi = UA_MMU_CONTEXTREG_ASI;
}
g_vcpu[cpuid]->get_asi(mmu_asi, RSTREG_MMU_PCONTEXT, reg64);
newpcontext = (uint32_t) reg64;
g_vcpu[cpuid]->get_asi(mmu_asi, RSTREG_MMU_SCONTEXT, reg64);
newscontext = (uint32_t) reg64;
if ((newpcontext != pcs[cpuid].pcontext) || (newscontext != pcs[cpuid].scontext)) {
pcs[cpuid].pcontext = newpcontext;
pcs[cpuid].scontext = newscontext;
pcs[cpuid].add_regval(RSTREG_MMU_RT, RSTREG_MMU_PCONTEXT, newpcontext);
pcs[cpuid].add_regval(RSTREG_MMU_RT, RSTREG_MMU_SCONTEXT, newscontext);
pcs[cpuid].flush_regval();
}
pd.rtype = PAVADIFF_T;
rstf_pavadiffT_set_cpuid(&pd, cpuid);
pcs[cpuid].addrec((rstf_unionT *)&pd);
memset(&pd, 0, sizeof(pd));
}
pcs[cpuid].addrec(&ru);
// regvals
int i;
for (i=0; i<ii->nregs; i++) {
// need to map vtracer regtypes to rst regtypes:
int regtype = ii->dreg[i].r.type;
int regid = ii->dreg[i].r.id;
if (regtype == VCPU_FP_DOUBLE_RTYPE) {
regid = (regid/2) + 32;
} else if (regtype == VCPU_PR_RTYPE && regid == VCPU_PR_PSTATE) {
pstate64 = ii->dval[i];
pcs[cpuid].pr = (pstate64>>2) & 1;
if (pcs[cpuid].hpr) pcs[cpuid].pr = 0;
pcs[cpuid].mask_pstate_am = MASK_PSTATE_AM(pstate64);
} else if (regtype == VCPU_PR_RTYPE && regid == VCPU_PR_TL) {
tl64 = ii->dval[i];
} else if (regtype == VCPU_HPR_RTYPE) {
if (regid == VCPU_HPR_HPSTATE) {
uint64_t v64 = ii->dval[i];
pcs[cpuid].hpr = (v64 >> 2) & 1;
if (pcs[cpuid].hpr) pcs[cpuid].pr = 0;
}
}
pcs[cpuid].add_regval(vcpu_rtype_to_rst[regtype], regid, ii->dval[i]);
}
pcs[cpuid].flush_regval();
if (is_done_retry) {
pcs[cpuid].add_regval(RSTREG_PRIV_RT, RSTREG_PSTATE_R, pstate64);
pcs[cpuid].add_regval(RSTREG_PRIV_RT, RSTREG_TL_R, tl64);
pcs[cpuid].flush_regval();
}
if (ii->annul) {
ru.instr.an = 1;
ru.instr.pc_va = ii->pc_va + 4;
ru.instr.instr = 0x0;
ru.instr.ea_valid = 0;
pcs[cpuid].addrec(&ru);
}
pcs[cpuid].ninsts++;
if (cpuid == 0) {
if ((pcs[cpuid].ninsts & 0xffffffull) == 0) {
printf("%s: approx %lld insts/cpu traced (out of %lld max)\n",
id, pcs[cpuid].ninsts, insts_per_cpu);
}
}
if (pcs[cpuid].ninsts >= insts_per_cpu) {
pcs[cpuid].state = rst_pct::state_WAIT_SYNC_STOP;
return instr(ii);
// trace_off();
}
return 0;
} // rstracer:instr()
// in SAM v5, the trap call happens AFTER the trapping instruction call
int rstracer::trap ( VCPU_Trap * ti)
{
int cpuid = ti->cpuid;
if (!tracing || (pcs[cpuid].state != rst_pct::state_TRACING)) return 0;
rstf_trapT tr = {0};
tr.rtype = TRAP_T;
rstf_trapT_set_cpuid(&tr, cpuid);
tr.is_async = ti->is_async;
tr.ttype = ti->tno;
tr.pc = ti->pc_va;
tr.npc = ti->npc_va;
if (ti->tno == 0x108 || ti->tno == 0x140) { /* syscalls */
tr.syscall = ti->syscallno;
} else if (ti->tno == 0x060) { /* mondo-intrs */
tr.syscall = ti->intrino;
}
// FIXME: the VCPU_Trap structure does not contain regiater values.
// get pstate and tl values
uint64_t tl64, pstate64, v64;
// get hpstate
int rv = g_vcpu[cpuid]->get_reg(VCPU_HPR_HPSTATE, &v64);
if (rv == 0) {
pcs[cpuid].hpr = (v64>>2) & 1;
}
g_vcpu[cpuid]->get_reg(VCPU_PR_TL, &tl64);
g_vcpu[cpuid]->get_reg(VCPU_PR_PSTATE, &pstate64);
pcs[cpuid].pr = (pstate64>>2) & 1;
if (pcs[cpuid].hpr) pcs[cpuid].pr = 0;
pcs[cpuid].mask_pstate_am = MASK_PSTATE_AM(pstate64);
tr.tl = (unsigned) tl64;
tr.pstate = (uint16_t) pstate64;
pcs[cpuid].addrec((rstf_unionT *) &tr);
// in addition, emit regvals for pstate and tl
pcs[cpuid].add_regval(RSTREG_PRIV_RT, RSTREG_TL_R, tl64);
pcs[cpuid].add_regval(RSTREG_PRIV_RT, RSTREG_PSTATE_R, pstate64);
return 0;
} //int rstracer::trap ( VCPU_Trap * ti)
int rstracer::tlb ( VCPU_TLB * ti)
{
if (!tracing || (pcs[ti->cpuid].state != rst_pct::state_TRACING)) return 0;
rstf_tlbT tr = {0};
tr.rtype = TLB_T;
rstf_tlbT_set_cpuid(&tr, ti->cpuid);
tr.demap = ti->demap;
tr.tlb_type = ti->tlb_type;
tr.tlb_index = ti->tlb_index;
tr.tlb_no = ti->tlb_no;
tr.tte_tag = ti->tte_tag;
tr.tte_data = ti->tte_data;
if (ti->format == 1) { // sun4v
tr.unused16 = ti->tte_context;
tr.unused = ti->is_real;
}
pcs[ti->cpuid].addrec((rstf_unionT *)&tr);
return 0;
} // int rstracer::tlb ( VCPU_TLB * ti)
int rstracer::async(VCPU_AsyncData * di)
{
if (!tracing || (pcs[first_vcpu_id].state != rst_pct::state_TRACING)) return 0;
if (di->dma.rtype == DMA_T) {
rstf_unionT dr;
dr.dma.rtype = DMA_T;
dr.dma.iswrite = di->dma.iswrite;
dr.dma.nbytes = (int) di->dma.nbytes;
dr.dma.start_pa = di->dma.pa;
dr.dma.devid = di->dma.devid;
pcs[first_vcpu_id].addrec(&dr);
} else if (di->strdata.rtype == STRDESC_T) {
di->strdata.s[22] = 0; // even if it is already 0
pcs[first_vcpu_id].string2rst(di->strdata.s);
} else {
fprintf(stderr, "%s: ERROR: invalid asyncdata rtype (%d)\n",
id, (int) di->strdata.rtype);
}
return 0;
} // int rstracer::async(VCPU_AsyncData * di)
int rstracer::sync (VCPU_Sync * si)
{
if (!tracing || (pcs[si->cpuid].state != rst_pct::state_TRACING)) return 0;
// output sync record
rstf_timesyncT ts = {0};
ts.rtype = TIMESYNC_T;
ts.subtype = si->synctype;
ts.cpuid = si->cpuid;
ts.data = si->data;
ts.sequence_number = si->syncid;
pcs[si->cpuid].addrec((rstf_unionT*) &ts);
return 0;
}
int rstracer::hwop(VCPU_HwOp * hi)
{
if (!tracing || (pcs[hi->cpuid].state != rst_pct::state_TRACING)) return 0;
rstf_tsb_accessT tr;
memset(&tr, 0, sizeof(tr));
tr.rtype = TSB_ACCESS_T;
tr.isdata = hi->op_type;
rstf_tsb_accessT_set_cpuid(&tr, hi->cpuid);
tr.pa = hi->addr;
pcs[hi->cpuid].addrec((rstf_unionT *) &tr);
} // int rstracer::hwop(VCPU_HwOp * hi)
rstracer::~rstracer()
{
// unregister UI cmds
char * cmd = strdup("rstrace");
if (tracing) {
trace_off();
}
UI_invalidate_cmd(cmd);
free(cmd);
} // rstracer::~rstracer()
void vtracer_fini()
{
if (thetracer != NULL) {
delete thetracer;
}
} // void vtracer_fini()