sam-t2/sam/cpus/vonk/ss/lib/cpu/src/SS_Instr.h

/*
* ========== Copyright Header Begin ==========================================
*
* OpenSPARC T2 Processor File: SS_Instr.h
* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
*
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*
* ========== Copyright Header End ============================================
*/
#ifndef __SS_Instr_h__
#define __SS_Instr_h__

#include "SS_Types.h"
#include "SS_Opcode.h"
#include "SS_Tte.h"
#include "SS_Chain.h"

class SS_Strand;
class SS_BreakPoint;

#undef REG_ASI

// The SS_Instr class captures the decoding of an intruction.  The
// decode cache, after decoding, contains SS_Instr objects.

// Danger, danger, danger!!
//
// NB: The SS_Instr objects are interwoven in the decode cache and
// overlap each other.  An SS_Instr object's data layout is:
//
//      Size            Content
//      ----            ------
//      16 bytes        Data block 0
//      15*16 bytes     Unused hole
//      16 bytes        Data block 1
//      15*16 bytes     Unused hole
//      16 bytes        Data block 2
//      15*16 bytes     Unused hole
//      16 bytes        Data block 3
//      15*16 bytes     Unused hole
//
// For a total size of 1024 bytes, only 1/16 of which hold meaningful
// data.
//
// In the decode cache, 16 SS_Instr objects are overlapped at
// intervals of 16 bytes.  The key advantage is to share a D-cache
// line among the "data block 0" areas of two SS_Instr objects.  Since
// D-cache lines are typically 32 bytes, this packing eliminates
// almost D-cache misses for the second "data block 0" of the two
// SS_Instr object sharing the cache line, at least in the case of
// sequential execution.
//
// Do not modify this class without considering these packing
// constraints.  As a wise girl once said, "You *can* touch this --
// with your eyes."


class SS_Instr
{
  public:
    enum
    {
      BITS = 6,
      SIZE = (1 << BITS),

      SKEW = 2,

      LINE_BITS = 4,
      LINE_SIZE = 1 << LINE_BITS,
      LINE_MASK = LINE_SIZE - 1,

      HOLE = (LINE_SIZE - 1) * (1 << (BITS - SKEW)) / sizeof(uint64_t)
    };

    SS_Instr* line_index( uint_t n )
    {
      return (SS_Instr*)((char*)this + (n << (BITS - SKEW)));
    }

    SS_Instr() { assert(HOLE == 30); }

    union
    {
      SS_Execute exe;           // The decode or execute routine ...
      uint64_t   align0;        // 64bit alignment in v8plus mode
    };

    // Todo: AsiFlag and LsuFlag use 8 bits all together.
    // The asi flags don;t serve real meaning anymore as
    // each have thier own execute (so REG_ASI is known
    // without looking at this flag. I think we should use
    //
    // 000 NON_LSU
    // 001 READ
    // 010 WRITE
    // 011 ATOMIC
    // 101 FETCH
    // 110 FLUSH
    // 110 -
    // 111 -

    enum AsiFlag
    {
      NON_LSU  = 0x00,                  // All non LSU instructions
      DFT_ASI  = 0x01,
      REG_ASI  = 0x02,
      IMM_ASI  = 0x04
    };

    enum LsuFlag
    {
      READ     = 0x10,
      WRITE    = 0x20,
      FETCH    = 0x40,
      FLUSH    = 0x80
    };

    int is_lsu()                { return flg != NON_LSU; }
    int is_dft_asi()            { return flg & DFT_ASI; }
    int is_reg_asi()            { return flg & REG_ASI; }
    int is_imm_asi()            { return flg & IMM_ASI; }

    int is_read()               { return flg & READ;  }
    int is_write()              { return flg & WRITE; }
    int is_atomic()             { return (flg & (READ|WRITE)) == (READ|WRITE); }

    int is_fetch()              { return flg & FETCH; }
    int is_flush()              { return flg & FLUSH; }
    int is_cohere()             { return flg & (FETCH|FLUSH); }

    // All register values below (rd, rs1, rd2, rs3) are not the
    // register number mapping directly to %g3, %l5, etc.  Instead,
    // they are the ***byte offset*** into the Strand's integer
    // register file.  So instruction referencing %g2 as its rd would
    // store 16 in "rd" below because registers are 8 bytes in size.

    int16_t    rd;
    int16_t    rs1;
    int16_t    rs2;             // Index to rs2 or signed immediate value
    union
    {
      int16_t  rs3;             // Index to rs3
      uint16_t asi;             // The asi used by LSU instructions (dft/reg/imm) @@ha144505 Todo -> uint8!
    };

    uint64_t   stride0[HOLE];   // For overlapping 16 SS_Instr in the decode cache

    SS_Opcode  opc;
    uint8_t    flg;             // Flags specifing memory operation type
    uint8_t    len;             // The size of the memory operation
    uint16_t   spare0;

    union
    {
      SS_Tte*        tte;       // Data TTE for LSU instructions. Bit0 is endianess, Bit1 is MEM(0)/IO(1)
      SS_BreakPoint* bp;        // Pointer to breakpoint set on this instruction
    };

    uint64_t   stride1[HOLE];   // For overlapping 16 SS_Instr in the decode cache

    SS_Chain   lnk;             // Link same TTE together

    //uint64_t   stride2[HOLE]; // For overlapping 16 SS_Instr in the decode cache
    SS_Chain   stride2[LINE_SIZE-1];

    uint16_t   exe_tbl_idx;     // Index into exe_table for this instruction

    uint16_t   spare1;          // Reserved space
    uint32_t   spare2;
    uint64_t   spare3;

    uint64_t   stride3[HOLE];   // For overlapping 16 SS_Instr in the decode cache

    SS_Tte* get_tte()           { return (SS_Tte*)(long(tte) &~ long(3)); }
    int     tte_le()            { return long(tte) & 1; }
    int     tte_io()            { return long(tte) & 2; }
    int     tte_le_io()         { return long(tte) & 3; }

    SS_Tte* set_tte( uint_t le, uint_t io, SS_Tte* t )
    {
      assert((le == 0) || (le == 1));
      assert((io == 0) || (io == 2));
      tte = (SS_Tte*)(long(t) + long(le) + long(io));
      return tte;
    }


};

#endif