| 1 | /* |
| 2 | * ========== Copyright Header Begin ========================================== |
| 3 | * |
| 4 | * OpenSPARC T2 Processor File: tso_n1_bcopy.s |
| 5 | * Copyright (C) 1995-2007 Sun Microsystems, Inc. All Rights Reserved |
| 6 | * 4150 Network Circle, Santa Clara, California 95054, U.S.A. |
| 7 | * |
| 8 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 9 | * |
| 10 | * This program is free software; you can redistribute it and/or modify |
| 11 | * it under the terms of the GNU General Public License as published by |
| 12 | * the Free Software Foundation; version 2 of the License. |
| 13 | * |
| 14 | * This program is distributed in the hope that it will be useful, |
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | * GNU General Public License for more details. |
| 18 | * |
| 19 | * You should have received a copy of the GNU General Public License |
| 20 | * along with this program; if not, write to the Free Software |
| 21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 22 | * |
| 23 | * For the avoidance of doubt, and except that if any non-GPL license |
| 24 | * choice is available it will apply instead, Sun elects to use only |
| 25 | * the General Public License version 2 (GPLv2) at this time for any |
| 26 | * software where a choice of GPL license versions is made |
| 27 | * available with the language indicating that GPLv2 or any later version |
| 28 | * may be used, or where a choice of which version of the GPL is applied is |
| 29 | * otherwise unspecified. |
| 30 | * |
| 31 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 32 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 33 | * have any questions. |
| 34 | * |
| 35 | * |
| 36 | * ========== Copyright Header End ============================================ |
| 37 | */ |
| 38 | #define srcaddr %l0 |
| 39 | #define dstaddr %l1 |
| 40 | #define count %l2 |
| 41 | #define ASI_BLK_P 0xf0 |
| 42 | |
| 43 | ! Define number of 64 byte subblocks to copy |
| 44 | #define SBLKS 128 |
| 45 | |
| 46 | ! How far ahead is the prefetch stream (in subblocks) |
| 47 | #define FETCH_AHEAD 8 |
| 48 | |
| 49 | #include "hboot.s" |
| 50 | |
| 51 | .global main |
| 52 | |
| 53 | .text |
| 54 | main: |
| 55 | wr %g0, 0x4, %fprs /* make sure fef is 1 */ |
| 56 | |
| 57 | setup_addresses: |
| 58 | setx in_stream, %g2, srcaddr |
| 59 | setx out_stream, %g2, dstaddr |
| 60 | alignaddr srcaddr, %g0, srcaddr |
| 61 | |
| 62 | ! COUNT bytes to copy |
| 63 | !-------------------- |
| 64 | set SBLKS, count |
| 65 | mulx count, 8*8, count |
| 66 | |
| 67 | |
| 68 | ! the initial part (preamble) of bcopy. |
| 69 | !-------------------------------------- |
| 70 | prefetch [srcaddr + 0*64], 1 |
| 71 | prefetch [srcaddr + 1*64], 1 |
| 72 | prefetch [srcaddr + 2*64], 1 |
| 73 | prefetch [srcaddr + 3*64], 1 |
| 74 | ldd [srcaddr + 0*8], %f0 |
| 75 | prefetch [srcaddr + 4*64], 1 |
| 76 | #if FETCH_AHEAD>=4 |
| 77 | prefetch [srcaddr + (4+1)*64], 1 |
| 78 | #endif |
| 79 | #if FETCH_AHEAD>=5 |
| 80 | prefetch [srcaddr + (5+1)*64], 1 |
| 81 | #endif |
| 82 | #if FETCH_AHEAD>=6 |
| 83 | prefetch [srcaddr + (6+1)*64], 1 |
| 84 | #endif |
| 85 | #if FETCH_AHEAD>=7 |
| 86 | prefetch [srcaddr + (7+1)*64], 1 |
| 87 | #endif |
| 88 | ldd [srcaddr + 1*8], %f2 |
| 89 | ldd [srcaddr + 2*8], %f4 |
| 90 | faligndata %f0, %f2, %f32 |
| 91 | ldd [srcaddr + 3*8], %f6 |
| 92 | faligndata %f2, %f4, %f34 |
| 93 | ldd [srcaddr + 4*8], %f8 |
| 94 | faligndata %f4, %f6, %f36 |
| 95 | ldd [srcaddr + 5*8], %f10 |
| 96 | faligndata %f6, %f8, %f38 |
| 97 | ldd [srcaddr + 6*8], %f12 |
| 98 | faligndata %f8, %f10, %f40 |
| 99 | ldd [srcaddr + 7*8], %f14 |
| 100 | faligndata %f10, %f12, %f42 |
| 101 | ldd [srcaddr + 8*8], %f16 |
| 102 | #if FETCH_AHEAD>=8 |
| 103 | prefetch [srcaddr + (8+1)*64], 1 |
| 104 | #endif |
| 105 | subcc count, 64, count |
| 106 | be,pn %xcc,tidy_up |
| 107 | add srcaddr, 64, srcaddr |
| 108 | |
| 109 | ! the loop (the essence of bcopy) |
| 110 | !-------------------------------- |
| 111 | timing_loop: |
| 112 | fmovd %f16, %f0 |
| 113 | ldd [srcaddr + 1*8], %f2 |
| 114 | faligndata %f12, %f14, %f44 |
| 115 | ldd [srcaddr + 2*8], %f4 |
| 116 | faligndata %f14, %f0, %f46 |
| 117 | stda %f32, [dstaddr]ASI_BLK_P |
| 118 | ldd [srcaddr + 3*8], %f6 |
| 119 | faligndata %f0, %f2, %f32 |
| 120 | ldd [srcaddr + 4*8], %f8 |
| 121 | faligndata %f2, %f4, %f34 |
| 122 | ldd [srcaddr + 5*8], %f10 |
| 123 | faligndata %f4, %f6, %f36 |
| 124 | ldd [srcaddr + 6*8], %f12 |
| 125 | faligndata %f6, %f8, %f38 |
| 126 | ldd [srcaddr + 7*8], %f14 |
| 127 | faligndata %f8, %f10, %f40 |
| 128 | ldd [srcaddr + 8*8], %f16 |
| 129 | prefetch [srcaddr + (FETCH_AHEAD+1)*64], 1 |
| 130 | faligndata %f10, %f12, %f42 |
| 131 | subcc count, 64, count |
| 132 | add dstaddr, 64, dstaddr |
| 133 | bg,pt %xcc,timing_loop |
| 134 | add srcaddr, 64, srcaddr |
| 135 | |
| 136 | ! the last part of bcopy |
| 137 | ! should handle the remaining partial block here |
| 138 | !----------------------------------------------- |
| 139 | tidy_up: |
| 140 | fmovd %f16, %f0 |
| 141 | faligndata %f12, %f14, %f44 |
| 142 | faligndata %f14, %f0, %f46 |
| 143 | stda %f32, [dstaddr]ASI_BLK_P |
| 144 | membar #Sync |
| 145 | trap: |
| 146 | ta GOOD_TRAP |
| 147 | user_text_end: |
| 148 | |
| 149 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| 150 | |
| 151 | .global in_stream |
| 152 | .global out_stream |
| 153 | |
| 154 | .data |
| 155 | .align 0x40 |
| 156 | user_data_start: |
| 157 | |
| 158 | .skip 0x40 |
| 159 | in_stream: |
| 160 | init_mem(0x20100000, SBLKS*16, 4, +, 0, +, 0x01010001) |
| 161 | |
| 162 | ! offset the out_stream block |
| 163 | .align 0x1000 |
| 164 | .skip 1024 |
| 165 | .skip 192 |
| 166 | out_stream: |
| 167 | init_mem(0x11111111, SBLKS*16, 4, +, 0, +, 0) |
| 168 | |
| 169 | user_data_end: |