!-----------------------------------------------------------------------
! Run 8 threads on one core with DMA on thread 5.
!-----------------------------------------------------------------------
TSOTOOL.PROCESSOR niagara2.rtl
TSOTOOL.MODE GEN
TSOTOOL.N_THREADS 8
TSOTOOL.TEST_NAME fc1_prop_diag
TSOTOOL.BATCH Y
!
!  It appears that DMA generating threads are about 18% efficient.
!  In other words, if all threads try to execute the same number
!  of 'instructions' the non-DMA threads will finish long before
!  the DMA threads. The sollowing weighting seems to work for an
!  FC1 model with one thread doing DMA.
!
!                        0    1    2    3    4    5    6    7
GEN.N_INSTR_PER_THREAD 100, 100, 100, 100, 100,  19, 100, 100, 
GEN.AVG_LOOP_SIZE 0
GEN.AVG_LOOP_ITER 0
! GEN.SEED 499406026
ADMAP.RTL.REGION_PA_SEPARATION 8M
ADMAP.N_REGIONS 4
ADMAP.REGION_SIZE 64K
ADMAP.REGION_OFFSETS 916-920-984-1012, 180-1416-2228-3228, 316-692-1268-1520-1560-2404, 2108-2132-2156-2304-2412-2544-2556
ADMAP.ATTRIBUTES CV=1110,CP=1110
ADMAP.NC_MEMMAP 0xc800002000:8G
ADMAP.N_ALIASES 0
ADMAP.ALIAS_FREQUENCY 64
ADMAP.ALIAS_OFFSET 8388608
RUN.SIMULATOR no_simulator_defined
WT.PCT_FP_INSTR 10
WT.PCT_LITTLE_ENDIAN 5
WT.PCT_LOADS_NF 0
WT.PCT_NFS_FAULT 0
WT.PCT_PREFETCH_FAULT 20
WT.PCT_PREFETCH_UNIMP 20
WT.PCT_CBRANCH 0
WT.PCT_SECONDARY_CTX 0
WT.PCT_NUCLEUS_CTX 0
WT.REPLACEMENT 5 0 cpu=5
WT.INTERRUPT 0
WT.LD 10 0 cpu=5
WT.BLD 0 5 cpu=0-4,6,7 region=0-2 
WT.DWLD 0 1 cpu=0-4,6,7 region=0-2 
WT.LDD 0
WT.QWLD 0
WT.AQLD 0 1 cpu=0-4,6,7 region=1
WT.ST 5 0 cpu=7
WT.BST 0 10 cpu=0-4,6,7 region=0-2
WT.BSTC 0
WT.ST_BINIT 0
WT.DWST_BINIT 0
WT.DWST 0 10 cpu=0-4,6,7 region=0-2
WT.QWST 0
WT.SWAP 0 5 cpu=0-4,6,7 region=0-2
WT.CAS 0 5 cpu=0-4,6,7 region=0-2
WT.CASX 0 5 cpu=0-4,6,7 region=0-2
WT.ASI_L2_FLUSH 0
WT.FLUSHI 0
WT.MEMBAR 0
WT.PREFETCH 10 0 cpu=5
WT.NOP 0
ADV.L2_WAYS 16
ADV.RESULTS_TO_MEM N
ADV.BST_MEMBARS Y
ADV.BLD_MEMBARS Y
ADV.FP_FLUSH_MEMBARS Y
ADV.CAS_IMPLICIT_MEMBARS Y
ADV.WARMUP_ITERATIONS 0
ADV.TEST_REPETITIONS 1

! PEP based DMA operation weights
! The weights here try to skew for mostly small
! and large DMAs, with the emphasis on large and
! skewed towards writes.

WT.MACRO.ALM_DMA0_RD 0 10 cpu=5 region=0-2
WT.MACRO.ALM_DMA1_RD 0 10 cpu=5 region=0-2
WT.MACRO.ALM_DMA2_RD 0 10 cpu=5 region=0-2
WT.MACRO.ALM_DMA3_RD 0 10 cpu=5 region=0-2
WT.MACRO.DMA0_RD_0x8 0 15 cpu=5 region=0-2
WT.MACRO.DMA0_RD_0xC 0 4 cpu=5 region=0-2
WT.MACRO.DMA0_RD_0x10 0 4 cpu=5 region=0-2
WT.MACRO.DMA0_RD_0x14 0 4 cpu=5 region=0-2
WT.MACRO.DMA0_RD_0x20 0 4 cpu=5 region=0-2
WT.MACRO.DMA0_RD_0x30 0 4 cpu=5 region=0-2
WT.MACRO.DMA0_RD_0x3C 0 30 cpu=5 region=0-2
WT.MACRO.DMA0_RD_0x40 0 10 cpu=5 region=0-2
WT.MACRO.DMA1_RD_0x40 0 10 cpu=5 region=0-2
WT.MACRO.DMA2_RD_0x40 0 10 cpu=5 region=0-2
WT.MACRO.DMA3_RD_0x40 0 10 cpu=5 region=0-2

WT.MACRO.ALM_DMA0_WR 0 10 cpu=5 region=0-2
WT.MACRO.ALM_DMA1_WR 0 10 cpu=5 region=0-2
WT.MACRO.ALM_DMA2_WR 0 10 cpu=5 region=0-2
WT.MACRO.ALM_DMA3_WR 0 10 cpu=5 region=0-2
WT.MACRO.DMA0_WR_0x8 0 30 cpu=5 region=0-2
WT.MACRO.DMA0_WR_0xC 0 10 cpu=5 region=0-2
WT.MACRO.DMA0_WR_0x10 0 10 cpu=5 region=0-2
WT.MACRO.DMA0_WR_0x14 0 10 cpu=5 region=0-2
WT.MACRO.DMA0_WR_0x20 0 10 cpu=5 region=0-2
WT.MACRO.DMA0_WR_0x30 0 10 cpu=5 region=0-2
WT.MACRO.DMA0_WR_0x3C 0 80 cpu=5 region=0-2
WT.MACRO.DMA0_WR_0x40 0  25 cpu=5 region=0-2
WT.MACRO.DMA1_WR_0x40 0  25 cpu=5 region=0-2
WT.MACRO.DMA2_WR_0x40 0  25 cpu=5 region=0-2
WT.MACRO.DMA3_WR_0x40 0  25 cpu=5 region=0-2

WT.MACRO.ALM_DMA0_INT 0 40 cpu=5 region=0-2