/* * ========== Copyright Header Begin ========================================== * * OpenSPARC T2 Processor File: tso_n1_bcopy.s * Copyright (C) 1995-2007 Sun Microsystems, Inc. All Rights Reserved * 4150 Network Circle, Santa Clara, California 95054, U.S.A. * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * For the avoidance of doubt, and except that if any non-GPL license * choice is available it will apply instead, Sun elects to use only * the General Public License version 2 (GPLv2) at this time for any * software where a choice of GPL license versions is made * available with the language indicating that GPLv2 or any later version * may be used, or where a choice of which version of the GPL is applied is * otherwise unspecified. * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. * * * ========== Copyright Header End ============================================ */ #define srcaddr %l0 #define dstaddr %l1 #define count %l2 #define ASI_BLK_P 0xf0 ! Define number of 64 byte subblocks to copy #define SBLKS 128 ! How far ahead is the prefetch stream (in subblocks) #define FETCH_AHEAD 8 #include "hboot.s" .global main .text main: wr %g0, 0x4, %fprs /* make sure fef is 1 */ setup_addresses: setx in_stream, %g2, srcaddr setx out_stream, %g2, dstaddr alignaddr srcaddr, %g0, srcaddr ! COUNT bytes to copy !-------------------- set SBLKS, count mulx count, 8*8, count ! the initial part (preamble) of bcopy. !-------------------------------------- prefetch [srcaddr + 0*64], 1 prefetch [srcaddr + 1*64], 1 prefetch [srcaddr + 2*64], 1 prefetch [srcaddr + 3*64], 1 ldd [srcaddr + 0*8], %f0 prefetch [srcaddr + 4*64], 1 #if FETCH_AHEAD>=4 prefetch [srcaddr + (4+1)*64], 1 #endif #if FETCH_AHEAD>=5 prefetch [srcaddr + (5+1)*64], 1 #endif #if FETCH_AHEAD>=6 prefetch [srcaddr + (6+1)*64], 1 #endif #if FETCH_AHEAD>=7 prefetch [srcaddr + (7+1)*64], 1 #endif ldd [srcaddr + 1*8], %f2 ldd [srcaddr + 2*8], %f4 faligndata %f0, %f2, %f32 ldd [srcaddr + 3*8], %f6 faligndata %f2, %f4, %f34 ldd [srcaddr + 4*8], %f8 faligndata %f4, %f6, %f36 ldd [srcaddr + 5*8], %f10 faligndata %f6, %f8, %f38 ldd [srcaddr + 6*8], %f12 faligndata %f8, %f10, %f40 ldd [srcaddr + 7*8], %f14 faligndata %f10, %f12, %f42 ldd [srcaddr + 8*8], %f16 #if FETCH_AHEAD>=8 prefetch [srcaddr + (8+1)*64], 1 #endif subcc count, 64, count be,pn %xcc,tidy_up add srcaddr, 64, srcaddr ! the loop (the essence of bcopy) !-------------------------------- timing_loop: fmovd %f16, %f0 ldd [srcaddr + 1*8], %f2 faligndata %f12, %f14, %f44 ldd [srcaddr + 2*8], %f4 faligndata %f14, %f0, %f46 stda %f32, [dstaddr]ASI_BLK_P ldd [srcaddr + 3*8], %f6 faligndata %f0, %f2, %f32 ldd [srcaddr + 4*8], %f8 faligndata %f2, %f4, %f34 ldd [srcaddr + 5*8], %f10 faligndata %f4, %f6, %f36 ldd [srcaddr + 6*8], %f12 faligndata %f6, %f8, %f38 ldd [srcaddr + 7*8], %f14 faligndata %f8, %f10, %f40 ldd [srcaddr + 8*8], %f16 prefetch [srcaddr + (FETCH_AHEAD+1)*64], 1 faligndata %f10, %f12, %f42 subcc count, 64, count add dstaddr, 64, dstaddr bg,pt %xcc,timing_loop add srcaddr, 64, srcaddr ! the last part of bcopy ! should handle the remaining partial block here !----------------------------------------------- tidy_up: fmovd %f16, %f0 faligndata %f12, %f14, %f44 faligndata %f14, %f0, %f46 stda %f32, [dstaddr]ASI_BLK_P membar #Sync trap: ta GOOD_TRAP user_text_end: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! .global in_stream .global out_stream .data .align 0x40 user_data_start: .skip 0x40 in_stream: init_mem(0x20100000, SBLKS*16, 4, +, 0, +, 0x01010001) ! offset the out_stream block .align 0x1000 .skip 1024 .skip 192 out_stream: init_mem(0x11111111, SBLKS*16, 4, +, 0, +, 0) user_data_end: