| 1 | !----------------------------------------------------------------------- |
| 2 | ! Run 16 threads on two cores with DMA on thread 5. |
| 3 | !----------------------------------------------------------------------- |
| 4 | TSOTOOL.PROCESSOR niagara2.rtl |
| 5 | TSOTOOL.MODE GEN |
| 6 | TSOTOOL.N_THREADS 16 |
| 7 | TSOTOOL.TEST_NAME fc2_prop_diag |
| 8 | TSOTOOL.BATCH Y |
| 9 | ! |
| 10 | ! It appears that DMA generating threads are about 18% efficient. |
| 11 | ! In other words, if all threads try to execute the same number |
| 12 | ! of 'instructions' the non-DMA threads will finish long before |
| 13 | ! the DMA threads. The sollowing weighting seems to work for an |
| 14 | ! FC1 model with one thread doing DMA. |
| 15 | ! |
| 16 | ! 0 0 1 2 3 4 5 6 7 1 0 1 2 3 4 5 6 7 |
| 17 | GEN.N_INSTR_PER_THREAD 100, 100, 100, 100, 100, 19, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100 |
| 18 | GEN.AVG_LOOP_SIZE 0 |
| 19 | GEN.AVG_LOOP_ITER 0 |
| 20 | ! GEN.SEED 499406026 |
| 21 | ADMAP.RTL.REGION_PA_SEPARATION 8M |
| 22 | ADMAP.N_REGIONS 4 |
| 23 | ADMAP.REGION_SIZE 64K |
| 24 | ADMAP.REGION_OFFSETS 916-920-984-1012, 180-1416-2228-3228, 316-692-1268-1520-1560-2404, 2108-2132-2156-2304-2412-2544-2556 |
| 25 | ADMAP.ATTRIBUTES CV=1110,CP=1110 |
| 26 | ADMAP.NC_MEMMAP 0xc800002000:8G |
| 27 | ADMAP.N_ALIASES 0 |
| 28 | ADMAP.ALIAS_FREQUENCY 64 |
| 29 | ADMAP.ALIAS_OFFSET 8388608 |
| 30 | RUN.SIMULATOR no_simulator_defined |
| 31 | WT.PCT_FP_INSTR 10 |
| 32 | WT.PCT_LITTLE_ENDIAN 5 |
| 33 | WT.PCT_LOADS_NF 0 |
| 34 | WT.PCT_NFS_FAULT 0 |
| 35 | WT.PCT_PREFETCH_FAULT 20 |
| 36 | WT.PCT_PREFETCH_UNIMP 20 |
| 37 | WT.PCT_CBRANCH 0 |
| 38 | WT.PCT_SECONDARY_CTX 0 |
| 39 | WT.PCT_NUCLEUS_CTX 0 |
| 40 | WT.REPLACEMENT 5 0 cpu=5 |
| 41 | WT.INTERRUPT 0 |
| 42 | WT.LD 10 0 cpu=5 |
| 43 | WT.BLD 0 5 cpu=0-4,6-15 region=0-2 |
| 44 | WT.DWLD 0 1 cpu=0-4,6-15 region=0-2 |
| 45 | WT.LDD 0 |
| 46 | WT.QWLD 0 |
| 47 | WT.AQLD 0 1 cpu=0-4,6-15 region=1 |
| 48 | WT.ST 5 0 cpu=5 |
| 49 | WT.BST 0 10 cpu=0-4,6-15 region=0-2 |
| 50 | WT.BSTC 0 |
| 51 | WT.ST_BINIT 0 |
| 52 | WT.DWST_BINIT 0 |
| 53 | WT.DWST 0 10 cpu=0-4,6-15 region=0-2 |
| 54 | WT.QWST 0 |
| 55 | WT.SWAP 0 5 cpu=0-4,6-15 region=0-2 |
| 56 | WT.CAS 0 5 cpu=0-4,6-15 region=0-2 |
| 57 | WT.CASX 0 5 cpu=0-4,6-15 region=0-2 |
| 58 | WT.ASI_L2_FLUSH 0 |
| 59 | WT.FLUSHI 0 |
| 60 | WT.MEMBAR 0 |
| 61 | WT.PREFETCH 10 0 cpu=5 |
| 62 | WT.NOP 0 |
| 63 | ADV.L2_WAYS 16 |
| 64 | ADV.RESULTS_TO_MEM N |
| 65 | ADV.BST_MEMBARS Y |
| 66 | ADV.BLD_MEMBARS Y |
| 67 | ADV.FP_FLUSH_MEMBARS Y |
| 68 | ADV.CAS_IMPLICIT_MEMBARS Y |
| 69 | ADV.WARMUP_ITERATIONS 0 |
| 70 | ADV.TEST_REPETITIONS 1 |
| 71 | |
| 72 | ! PEP based DMA operation weights |
| 73 | ! The weights here try to skew for mostly small |
| 74 | ! and large DMAs, with the emphasis on large and |
| 75 | ! skewed towards writes. |
| 76 | |
| 77 | WT.MACRO.ALM_DMA0_RD 0 10 cpu=5 region=0-2 |
| 78 | WT.MACRO.ALM_DMA1_RD 0 10 cpu=5 region=0-2 |
| 79 | WT.MACRO.ALM_DMA2_RD 0 10 cpu=5 region=0-2 |
| 80 | WT.MACRO.ALM_DMA3_RD 0 10 cpu=5 region=0-2 |
| 81 | WT.MACRO.DMA0_RD_0x8 0 15 cpu=5 region=0-2 |
| 82 | WT.MACRO.DMA0_RD_0xC 0 4 cpu=5 region=0-2 |
| 83 | WT.MACRO.DMA0_RD_0x10 0 4 cpu=5 region=0-2 |
| 84 | WT.MACRO.DMA0_RD_0x14 0 4 cpu=5 region=0-2 |
| 85 | WT.MACRO.DMA0_RD_0x20 0 4 cpu=5 region=0-2 |
| 86 | WT.MACRO.DMA0_RD_0x30 0 4 cpu=5 region=0-2 |
| 87 | WT.MACRO.DMA0_RD_0x3C 0 30 cpu=5 region=0-2 |
| 88 | WT.MACRO.DMA0_RD_0x40 0 10 cpu=5 region=0-2 |
| 89 | WT.MACRO.DMA1_RD_0x40 0 10 cpu=5 region=0-2 |
| 90 | WT.MACRO.DMA2_RD_0x40 0 10 cpu=5 region=0-2 |
| 91 | WT.MACRO.DMA3_RD_0x40 0 10 cpu=5 region=0-2 |
| 92 | |
| 93 | WT.MACRO.ALM_DMA0_WR 0 10 cpu=5 region=0-2 |
| 94 | WT.MACRO.ALM_DMA1_WR 0 10 cpu=5 region=0-2 |
| 95 | WT.MACRO.ALM_DMA2_WR 0 10 cpu=5 region=0-2 |
| 96 | WT.MACRO.ALM_DMA3_WR 0 10 cpu=5 region=0-2 |
| 97 | WT.MACRO.DMA0_WR_0x8 0 30 cpu=5 region=0-2 |
| 98 | WT.MACRO.DMA0_WR_0xC 0 10 cpu=5 region=0-2 |
| 99 | WT.MACRO.DMA0_WR_0x10 0 10 cpu=5 region=0-2 |
| 100 | WT.MACRO.DMA0_WR_0x14 0 10 cpu=5 region=0-2 |
| 101 | WT.MACRO.DMA0_WR_0x20 0 10 cpu=5 region=0-2 |
| 102 | WT.MACRO.DMA0_WR_0x30 0 10 cpu=5 region=0-2 |
| 103 | WT.MACRO.DMA0_WR_0x3C 0 80 cpu=5 region=0-2 |
| 104 | WT.MACRO.DMA0_WR_0x40 0 25 cpu=5 region=0-2 |
| 105 | WT.MACRO.DMA1_WR_0x40 0 25 cpu=5 region=0-2 |
| 106 | WT.MACRO.DMA2_WR_0x40 0 25 cpu=5 region=0-2 |
| 107 | WT.MACRO.DMA3_WR_0x40 0 25 cpu=5 region=0-2 |
| 108 | |
| 109 | WT.MACRO.ALM_DMA0_INT 0 40 cpu=5 region=0-2 |