* ========== Copyright Header Begin ==========================================
* OpenSPARC T2 Processor File: n2_mcu_0_all_bcopy_all_banks.s
* Copyright (C) 1995-2007 Sun Microsystems, Inc. All Rights Reserved
* 4150 Network Circle, Santa Clara, California 95054, U.S.A.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* For the avoidance of doubt, and except that if any non-GPL license
* choice is available it will apply instead, Sun elects to use only
* the General Public License version 2 (GPLv2) at this time for any
* software where a choice of GPL license versions is made
* available with the language indicating that GPLv2 or any later version
* may be used, or where a choice of which version of the GPL is applied is
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* ========== Copyright Header End ============================================
#define thread_offset %i3
! Define number of 64 byte subblocks to copy
! How far ahead is the prefetch stream (in subblocks)
wr %g0, 0x4, %fprs /* make sure fef is 1 */
!thread in within core become fbd dimm addr bits
sllx %g2, 34, thread_offset
!core id bits become 16MB offset with fbd dimm
or %g2, thread_offset, thread_offset
setx in_stream, %g2, srcaddr
!setx out_stream, %g2, dstaddr
setx 0x100000000, %g2, dstaddr
add dstaddr, thread_offset, dstaddr
alignaddr srcaddr, %g0, srcaddr
setx 0x100000000, %g2, srcaddr
add srcaddr, thread_offset, srcaddr
alignaddr srcaddr, %g0, srcaddr
setx 0x100500000, %g2, dstaddr
add dstaddr, thread_offset, dstaddr
setx 0x100500000, %g2, srcaddr
add srcaddr, thread_offset, srcaddr
alignaddr srcaddr, %g0, srcaddr
setx 0x100580000, %g2, dstaddr
add dstaddr, thread_offset, dstaddr
setx 0x100580000, %g2, srcaddr
add srcaddr, thread_offset, srcaddr
alignaddr srcaddr, %g0, srcaddr
setx 0x100600000, %g2, dstaddr
add dstaddr, thread_offset, dstaddr
setx 0x100600000, %g2, srcaddr
add srcaddr, thread_offset, srcaddr
alignaddr srcaddr, %g0, srcaddr
setx 0x100680000, %g2, dstaddr
add dstaddr, thread_offset, dstaddr
setx 0x100680000, %g2, srcaddr
add srcaddr, thread_offset, srcaddr
alignaddr srcaddr, %g0, srcaddr
setx 0x100700000, %g2, dstaddr
add dstaddr, thread_offset, dstaddr
! the initial part (preamble) of bcopy.
!--------------------------------------
prefetch [srcaddr + 0*64], 1
prefetch [srcaddr + 1*64], 1
prefetch [srcaddr + 2*64], 1
prefetch [srcaddr + 3*64], 1
prefetch [srcaddr + 4*64], 1
prefetch [srcaddr + (4+1)*64], 1
prefetch [srcaddr + (5+1)*64], 1
prefetch [srcaddr + (6+1)*64], 1
prefetch [srcaddr + (7+1)*64], 1
faligndata %f0, %f2, %f32
faligndata %f2, %f4, %f34
faligndata %f4, %f6, %f36
ldd [srcaddr + 5*8], %f10
faligndata %f6, %f8, %f38
ldd [srcaddr + 6*8], %f12
faligndata %f8, %f10, %f40
ldd [srcaddr + 7*8], %f14
faligndata %f10, %f12, %f42
ldd [srcaddr + 8*8], %f16
prefetch [srcaddr + (8+1)*64], 1
! the loop (the essence of bcopy)
!--------------------------------
faligndata %f12, %f14, %f44
faligndata %f14, %f0, %f46
stda %f32, [dstaddr]ASI_BLK_P
faligndata %f0, %f2, %f32
faligndata %f2, %f4, %f34
ldd [srcaddr + 5*8], %f10
faligndata %f4, %f6, %f36
ldd [srcaddr + 6*8], %f12
faligndata %f6, %f8, %f38
ldd [srcaddr + 7*8], %f14
faligndata %f8, %f10, %f40
ldd [srcaddr + 8*8], %f16
prefetch [srcaddr + (FETCH_AHEAD+1)*64], 1
faligndata %f10, %f12, %f42
! should handle the remaining partial block here
!-----------------------------------------------
faligndata %f12, %f14, %f44
faligndata %f14, %f0, %f46
stda %f32, [dstaddr]ASI_BLK_P
add srcaddr, 0x10, srcaddr
add srcaddr, 0x10, srcaddr
add srcaddr, 0x10, srcaddr
add srcaddr, 0x10, srcaddr
add dstaddr, 0x8, dstaddr
add dstaddr, 0x8, dstaddr
add dstaddr, 0x8, dstaddr
add dstaddr, 0x8, dstaddr
add dstaddr, 0x8, dstaddr
add dstaddr, 0x8, dstaddr
add dstaddr, 0x8, dstaddr
add dstaddr, 0x8, dstaddr
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
init_mem(0x20100000, SBLKS*16, 4, +, 0, +, 0x01010001)
! offset the out_stream block
init_mem(0x11111111, SBLKS*16, 4, +, 0, +, 0)
SECTION data_page0 DATA_VA=0x100000000
part_0_ctx_nonzero_tsb_config_0,
SECTION data_page1 DATA_VA=0x500000000
part_0_ctx_nonzero_tsb_config_0,
SECTION data_page2 DATA_VA=0x900000000
part_0_ctx_nonzero_tsb_config_0,
SECTION data_page3 DATA_VA=0xd00000000
part_0_ctx_nonzero_tsb_config_0,
SECTION data_page4 DATA_VA=0x1100000000
PA=ra2pa(0x1100000000,0),
part_0_ctx_nonzero_tsb_config_0,
SECTION data_page5 DATA_VA=0x1500000000
PA=ra2pa(0x1500000000,0),
part_0_ctx_nonzero_tsb_config_0,
SECTION data_page6 DATA_VA=0x1900000000
PA=ra2pa(0x1900000000,0),
part_0_ctx_nonzero_tsb_config_0,
SECTION data_page7 DATA_VA=0x1d00000000
PA=ra2pa(0x1d00000000,0),
part_0_ctx_nonzero_tsb_config_0,