* ========== Copyright Header Begin ==========================================
* Hypervisor Software File: errors_subr.s
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
* - Do no alter or remove copyright notices
* - Redistribution and use of this software in source and binary forms, with
* or without modification, are permitted provided that the following
* - Redistribution of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistribution in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* Neither the name of Sun Microsystems, Inc. or the names of contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* This software is provided "AS IS," without a warranty of any kind.
* ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
* INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
* PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN
* MICROSYSTEMS, INC. ("SUN") AND ITS LICENSORS SHALL NOT BE LIABLE FOR
* ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
* DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. IN NO EVENT WILL SUN
* OR ITS LICENSORS BE LIABLE FOR ANY LOST REVENUE, PROFIT OR DATA, OR
* FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE
* DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY,
* ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, EVEN IF
* SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
* You acknowledge that this software is not designed, licensed or
* intended for use in the design, construction, operation or maintenance of
* ========== Copyright Header End ============================================
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
#pragma ident "@(#)errors_subr.s 1.6 07/08/03 SMI"
#include <sys/asm_linkage.h>
ENTRY(enable_errors_strand)
mov CORE_ERR_REPORT_EN, %g4
setx CORE_ERRORS_ENABLE, %g5, %g6
stxa %g6, [%g4]ASI_ERR_EN
mov CORE_ERR_TRAP_EN, %g4
setx CORE_ERROR_TRAP_ENABLE, %g5, %g6
stxa %g6, [%g4]ASI_ERR_EN
SET_SIZE(enable_errors_strand)
ENTRY(enable_errors_chip)
* Target SOC/L2 errors at current strand
setx SOC_ERRORSTEER_REG, %g3, %g4
set L2_ERRORSTEER_MASK, %g1
sllx %g2, L2_ERRORSTEER_SHIFT, %g2
setx L2_CONTROL_REG, %g4, %g5
set (NO_L2_BANKS - 1), %g3
SKIP_DISABLED_L2_BANK(%g3, %g4, %g6, 2f)
sllx %g3, L2_BANK_SHIFT, %g4
* Clear DRAM ESR/FBD/ND for all banks
* Set the DRAM ECC/FBR Error Count registers
set (NO_DRAM_BANKS - 1), %g3
! skip banks which are disabled. causes hang.
SKIP_DISABLED_DRAM_BANK(%g3, %g4, %g5, 2f)
setx DRAM_ESR_BASE, %g4, %g5
sllx %g3, DRAM_BANK_SHIFT, %g2
stx %g4, [%g2] ! clear DRAM ESR RW1C
stx %g0, [%g2] ! clear DRAM ESR RW
setx DRAM_FBD_BASE, %g4, %g5
sllx %g3, DRAM_BANK_SHIFT, %g4
stx %g0, [%g4] ! clear DRAM FBD SYND RW
setx DRAM_FBR_COUNT_BASE, %g4, %g5
sllx %g3, DRAM_BANK_SHIFT, %g4
mov DRAM_ERROR_COUNTER_FBR_RATIO, %g4
! enable L2$ and DRAM error traps
set (NO_L2_BANKS - 1), %g3
! skip banks which are disabled. causes hang.
SKIP_DISABLED_L2_BANK(%g3, %g4, %g5, 2f)
setx L2_ERROR_STATUS_REG, %g4, %g5
sllx %g3, L2_BANK_SHIFT, %g4
stx %g5, [%g4] ! clear ESR RW1C
stx %g0, [%g4] ! clear ESR RW
setx L2_ERROR_ADDRESS_REG, %g4, %g5
sllx %g3, L2_BANK_SHIFT, %g4
stx %g0, [%g4] ! clear EAR RW
setx L2_ERROR_NOTDATA_REG, %g4, %g5
sllx %g3, L2_BANK_SHIFT, %g4
stx %g5, [%g4] ! clear NDESR RW1C
stx %g0, [%g4] ! clear NDESR RW
setx L2_ERROR_ENABLE_REG, %g4, %g5
sllx %g3, L2_BANK_SHIFT, %g4
or %g2, (L2_NCEEN | L2_CEEN), %g2
! clear the SOC STATUS register before enabling logs/traps
setx SOC_ERROR_STATUS_REG, %g5, %g6
! enable all SOC error recording -- reset/config?
setx SOC_ERROR_LOG_ENABLE, %g5, %g6
setx SOC_ALL_ERRORS, %g3, %g1
! enable all SOC error traps -- reset/config?
setx SOC_ERROR_TRAP_ENABLE, %g5, %g6
setx SOC_ALL_ERRORS, %g3, %g1
! enable all SOC fatal errors -- reset/config?
setx SOC_FATAL_ERROR_ENABLE, %g5, %g6
setx SOC_FATAL_ERRORS, %g3, %g1
SET_SIZE(enable_errors_chip)
* Print Service Error Report (SER) to console
GET_ERR_DIAG_BUF(%g1, %g2)
PRINT("Error type : 0x");
ldx [%g1 + ERR_DIAG_RPRT_ERROR_TYPE], %g2
PRINT("Report type : 0x");
ldx [%g1 + ERR_DIAG_RPRT_REPORT_TYPE], %g2
ldx [%g1 + ERR_DIAG_RPRT_TOD], %g2
ldx [%g1 + ERR_DIAG_RPRT_EHDL], %g2
ldx [%g1 + ERR_DIAG_RPRT_ERR_STICK], %g2
ldx [%g1 + ERR_DIAG_RPRT_CPUVER], %g2
ldx [%g1 + ERR_DIAG_RPRT_SERIAL], %g2
ldx [%g1 + ERR_DIAG_RPRT_TSTATE], %g2
ldx [%g1 + ERR_DIAG_RPRT_HTSTATE], %g2
ldx [%g1 + ERR_DIAG_RPRT_TPC], %g2
lduh [%g1 + ERR_DIAG_RPRT_CPUID], %g2
lduh [%g1 + ERR_DIAG_RPRT_TT], %g2
ldub [%g1 + ERR_DIAG_RPRT_TL], %g2
* print diag buf data to console
GET_ERR_DIAG_BUF(%g1, %g2)
GET_ERR_DIAG_DATA_BUF(%g1, %g2)
ldx [%g1 + ERR_DIAG_BUF_SPARC_ISFSR], %g2
ldx [%g1 + ERR_DIAG_BUF_SPARC_DSFSR], %g2
ldx [%g1 + ERR_DIAG_BUF_SPARC_DSFAR], %g2
ldx [%g1 + ERR_DIAG_BUF_SPARC_DESR], %g2
ldx [%g1 + ERR_DIAG_BUF_SPARC_DFESR], %g2
PRINT("BANK 0: L2_ESR :0x");
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ESR + (ERR_DIAG_BUF_L2_CACHE_ESR_INCR * 0)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_EAR + (ERR_DIAG_BUF_L2_CACHE_EAR_INCR * 0)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ND + (ERR_DIAG_BUF_L2_CACHE_ND_INCR * 0)], %g2
PRINT("BANK 1: L2_ESR :0x");
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ESR + (ERR_DIAG_BUF_L2_CACHE_ESR_INCR * 1)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_EAR + (ERR_DIAG_BUF_L2_CACHE_EAR_INCR * 1)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ND + (ERR_DIAG_BUF_L2_CACHE_ND_INCR * 1)], %g2
PRINT("BANK 2: L2_ESR :0x");
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ESR + (ERR_DIAG_BUF_L2_CACHE_ESR_INCR * 2)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_EAR + (ERR_DIAG_BUF_L2_CACHE_EAR_INCR * 2)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ND + (ERR_DIAG_BUF_L2_CACHE_ND_INCR * 2)], %g2
PRINT("BANK 3: L2_ESR :0x");
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ESR + (ERR_DIAG_BUF_L2_CACHE_ESR_INCR * 3)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_EAR + (ERR_DIAG_BUF_L2_CACHE_EAR_INCR * 3)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ND + (ERR_DIAG_BUF_L2_CACHE_ND_INCR * 3)], %g2
PRINT("BANK 4: L2_ESR :0x");
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ESR + (ERR_DIAG_BUF_L2_CACHE_ESR_INCR * 4)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_EAR + (ERR_DIAG_BUF_L2_CACHE_EAR_INCR * 4)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ND + (ERR_DIAG_BUF_L2_CACHE_ND_INCR * 4)], %g2
PRINT("BANK 5: L2_ESR :0x");
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ESR + (ERR_DIAG_BUF_L2_CACHE_ESR_INCR * 5)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_EAR + (ERR_DIAG_BUF_L2_CACHE_EAR_INCR * 5)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ND + (ERR_DIAG_BUF_L2_CACHE_ND_INCR * 5)], %g2
PRINT("BANK 6: L2_ESR :0x");
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ESR + (ERR_DIAG_BUF_L2_CACHE_ESR_INCR * 6)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_EAR + (ERR_DIAG_BUF_L2_CACHE_EAR_INCR * 6)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ND + (ERR_DIAG_BUF_L2_CACHE_ND_INCR * 6)], %g2
PRINT("BANK 7: L2_ESR :0x");
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ESR + (ERR_DIAG_BUF_L2_CACHE_ESR_INCR * 7)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_EAR + (ERR_DIAG_BUF_L2_CACHE_EAR_INCR * 7)], %g2
ldx [%g1 + ERR_DIAG_BUF_L2_CACHE_ND + (ERR_DIAG_BUF_L2_CACHE_ND_INCR * 7)], %g2
PRINT("Bank 0: DRAM_ESR :0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_ESR + (ERR_DIAG_BUF_DRAM_ESR_INCR * 0)], %g2
PRINT(" : DRAM_EAR :0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_EAR + (ERR_DIAG_BUF_DRAM_EAR_INCR * 0)], %g2
PRINT(" : DRAM_LOC: 0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_LOC + (ERR_DIAG_BUF_DRAM_LOC_INCR * 0)], %g2
ldx [%g1 + ERR_DIAG_BUF_DRAM_CTR + (ERR_DIAG_BUF_DRAM_CTR_INCR * 0)], %g2
PRINT(" : DRAM_FBD :0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_FBD + (ERR_DIAG_BUF_DRAM_FBD_INCR * 0)], %g2
PRINT("Bank 1: DRAM_ESR :0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_ESR + (ERR_DIAG_BUF_DRAM_ESR_INCR * 1)], %g2
PRINT(" : DRAM_EAR :0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_EAR + (ERR_DIAG_BUF_DRAM_EAR_INCR * 1)], %g2
PRINT(" : DRAM_LOC: 0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_LOC + (ERR_DIAG_BUF_DRAM_LOC_INCR * 1)], %g2
ldx [%g1 + ERR_DIAG_BUF_DRAM_CTR + (ERR_DIAG_BUF_DRAM_CTR_INCR * 1)], %g2
PRINT(" : DRAM_FBD :0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_FBD + (ERR_DIAG_BUF_DRAM_FBD_INCR * 1)], %g2
PRINT("Bank 2: DRAM_ESR :0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_ESR + (ERR_DIAG_BUF_DRAM_ESR_INCR * 2)], %g2
PRINT(" : DRAM_EAR :0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_EAR + (ERR_DIAG_BUF_DRAM_EAR_INCR * 2)], %g2
PRINT(" : DRAM_LOC: 0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_LOC + (ERR_DIAG_BUF_DRAM_LOC_INCR * 2)], %g2
ldx [%g1 + ERR_DIAG_BUF_DRAM_CTR + (ERR_DIAG_BUF_DRAM_CTR_INCR * 2)], %g2
PRINT(" : DRAM_FBD :0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_FBD + (ERR_DIAG_BUF_DRAM_FBD_INCR * 2)], %g2
PRINT("Bank 3: DRAM_ESR :0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_ESR + (ERR_DIAG_BUF_DRAM_ESR_INCR * 3)], %g2
PRINT(" : DRAM_EAR :0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_EAR + (ERR_DIAG_BUF_DRAM_EAR_INCR * 3)], %g2
PRINT(" : DRAM_LOC: 0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_LOC + (ERR_DIAG_BUF_DRAM_LOC_INCR * 3)], %g2
ldx [%g1 + ERR_DIAG_BUF_DRAM_CTR + (ERR_DIAG_BUF_DRAM_CTR_INCR * 3)], %g2
PRINT(" : DRAM_FBD :0x");
ldx [%g1 + ERR_DIAG_BUF_DRAM_FBD + (ERR_DIAG_BUF_DRAM_FBD_INCR * 3)], %g2
* print sun4v erpt data to console
GET_ERR_SUN4V_RPRT_BUF(%g1, %g2)
ldx [%g1 + ERR_SUN4V_RPRT_G_EHDL], %g2 ! ehdl
ldx [%g1 + ERR_SUN4V_RPRT_G_STICK], %g2 ! stick
ld [%g1 + ERR_SUN4V_RPRT_EDESC], %g2 ! edesc
ld [%g1 + ERR_SUN4V_RPRT_ATTR], %g2 ! attr
ldx [%g1 + ERR_SUN4V_RPRT_ADDR], %g2 ! addr
ld [%g1 + ERR_SUN4V_RPRT_SZ], %g2 ! sz
lduh [%g1 + ERR_SUN4V_RPRT_G_CPUID], %g2 ! cpuid
lduh [%g1 + ERR_SUN4V_RPRT_G_SECS], %g2 ! secs
lduh [%g1 + ERR_SUN4V_RPRT_ASI], %g2 ! asi/pad
lduh [%g1 + ERR_SUN4V_RPRT_REG], %g2 ! reg
SET_SIZE(print_sun4v_erpt)
ENTRY(relocate_error_tables)
setx instruction_access_MMU_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx data_access_MMU_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx internal_processor_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx hw_corrected_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx store_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx data_access_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx sw_recoverable_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx instruction_access_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx l2c_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx soc_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx dram_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx precise_dau_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx disrupting_dau_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx precise_ldau_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx disrupting_ldau_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx dbu_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
setx sw_abort_errors, %g2, %g3
HVCALL(relocate_error_table_entries)
SET_SIZE(relocate_error_tables)
* Relocate the function pointers in an error table
* %g2, %g4, %g6 preserved
ENTRY(relocate_error_table_entries)
ldx [%g3 + ERR_GUEST_REPORT_FCN], %g1
stx %g1, [%g3 + ERR_GUEST_REPORT_FCN]
ldx [%g3 + ERR_REPORT_FCN], %g1
stx %g1, [%g3 + ERR_REPORT_FCN]
ldx [%g3 + ERR_CORRECT_FCN], %g1
stx %g1, [%g3 + ERR_CORRECT_FCN]
ldx [%g3 + ERR_STORM_FCN], %g1
stx %g1, [%g3 + ERR_STORM_FCN]
ldx [%g3 + ERR_PRINT_FCN], %g1
stx %g1, [%g3 + ERR_PRINT_FCN]
ld [%g3 + ERR_FLAGS], %g1
btst ERR_LAST_IN_TABLE, %g1
add %g3, ERROR_TABLE_ENTRY_SIZE, %g3
SET_SIZE(relocate_error_table_entries)
* If we get an error trap which we cannot identify we want
* a basic service report (TT, TPC etc) sent to the FERG.
* To make this happen the error_table_entry for that trap
* must have an error report function.
* Is there any useful information we could gather here ?
GET_ERR_DIAG_DATA_BUF(%g1, %g2)
* Clear AMB FBDIMM memory errors
* These regs are RWCST, which is write 1 to clear,
* and sticky through a link reset.
STORE_ERR_RETURN_ADDR(%g7, %g1, %g2)
set (NO_DRAM_BANKS - 1), %g3
! skip banks which are disabled. causes hang.
SKIP_DISABLED_DRAM_BANK(%g3, %g4, %g5, 4f)
* How many channels to clear ?
setx DRAM_SNGL_CHNL_MODE_BASE, %g2, %g1
sllx %g3, DRAM_BANK_SHIFT, %g2
and %g2, 1, %g2 ! %g2 == 1 single channel mode
movrz %g2, 1, %g1 ! loop counter 1 for 2 channels
movrnz %g2, 0, %g1 ! loop counter 0 for 1 channel
* How many DIMMs per channel ?
setx DRAM_DIMM_PRESENT_BASE, %g2, %g4
sllx %g3, DRAM_BANK_SHIFT, %g2
and %g4, 0xf, %g4 ! max AMB ID
sllx %g4, CONFIG_ADDR_AMB_POS, %g6
! %g6 AMB ID of Configuration register access
set ((CONFIG_FUNCTION_FBD << CONFIG_FUNCTION_SHIFT) | DRAM_FBDIMM_FERR), %g5
or %g5, %g6, %g5 ! AMB ID
sllx %g1, CONFIG_ADDR_CH_POS, %g2 ! Channel of Configuration register access
! %g5 channel/AMB ID/FERR
setx DRAM_CONFIG_REG_ACC_ADDR_BASE, %g2, %g7
sllx %g3, DRAM_BANK_SHIFT, %g2
setx DRAM_CONFIG_REG_ACC_DATA_BASE, %g2, %g7
sllx %g3, DRAM_BANK_SHIFT, %g2
! config registers are RWCST
set ((CONFIG_FUNCTION_FBD << CONFIG_FUNCTION_SHIFT) | DRAM_FBDIMM_NERR), %g5
or %g5, %g6, %g5 ! AMB ID
sllx %g1, CONFIG_ADDR_CH_POS, %g2 ! Channel of Configuration register access
! %g5 channel/AMB ID/FERR
setx DRAM_CONFIG_REG_ACC_ADDR_BASE, %g2, %g7
sllx %g3, DRAM_BANK_SHIFT, %g2
setx DRAM_CONFIG_REG_ACC_DATA_BASE, %g2, %g7
sllx %g3, DRAM_BANK_SHIFT, %g2
! config registers are RWCST
brgz,pt %g1, 1b ! next channel
GET_ERR_RETURN_ADDR(%g7, %g2)
SET_SIZE(clear_amb_errors)
* Determine whether a particular error has been steered to this
* CPU rather than actually occurring on a resource owned by this
* guest. If it has, send the error details to a CPU owned by the
* guest which owns the resource which took the error and then just
* allow this CPU/guest to continue.
ENTRY(errors_check_steering)
* Errors causing precise/deferred traps will never require rerouting.
* Also, errors causing hw_corrected_error traps are always corrected
* by the hardware so no guest intervention is required. Only
* sw_recoverable_error traps might require a sun4v guest error
* report to be rerouted to a different guests CPU.
bne,pt %xcc, errors_check_steering_exit
* Only MEM reports might need rerouting
GET_ERR_SUN4V_RPRT_BUF(%g2, %g4)
ld [%g2 + ERR_SUN4V_RPRT_ATTR], %g4 ! attr
sllx %g3, SUN4V_MEM_RPRT, %g3
brz,pt %g4, errors_check_steering_exit
* Only MEM reports with a valid RA can be rerouted
ldx [%g2 + ERR_SUN4V_RPRT_ADDR], %g4
setx CPU_ERR_INVALID_RA, %g3, %g5
be,pn %xcc, errors_check_steering_exit
* Does this RA belong to this guest ?
RA2PA_RANGE_CHECK(%g2, %g4, ERPT_MEM_SIZE, 1f, %g5)
ba,pt %xcc, errors_check_steering_exit
* Find the guest which owns this RA.
* For each guest loop through the ra2pa_segment array and check the
* RA against the base/limit
ldx [%g2 + CONFIG_GUESTS], %g2 ! &guests[0]
set NGUESTS - 1, %g3 ! %g3 guest loop counter
RA2PA_RANGE_CHECK(%g2, %g4, ERPT_MEM_SIZE, 2f, %g5)
! we have a valid RA so this is the guest for this error
add %g2, %g5, %g2 ! guest++
! no guest found for this RA
ba,pt %xcc, errors_check_steering_exit
! is it for the guest we are running on ? (redundant check ...)
be,pt %xcc, errors_check_steering_exit
! go and finish re-routing this error
* If cpu_reroute_error() returns it has failed to reroute the
* error so just return and take the sun4v report on this guest
errors_check_steering_exit:
SET_SIZE(errors_check_steering)
* re-route an error report (cont'd)
* 1. select one of the active CPUs for that guest
* 2. Copy the data from the error erport into that
* 3. Send a VECINTR_ERROR_XCALL to that CPU
* 4. Clear the diag_buf/sun4v erpt in_use bits
ENTRY_NP(cpu_reroute_error)
* find first live cpu in guest->vcpus
* Then deliver the error to that vcpu, and interrupt
* the strand it is running on to make that happen.
add %g2, GUEST_VCPUS, %g2
be,pn %xcc, cpu_reroute_error_exit
mulx %g3, GUEST_VCPUS_INCR, %g5
! check whether this CPU is running guest code ?
ldx [%g1 + CPU_STATUS], %g5
cmp %g5, CPU_STATE_RUNNING
ldx [%g1 + CPU_STRAND], %g1
* It is possible that the CPUs rerouted data is already in use.
* We use the rerouted_addr field as a spinlock. The target CPU
* will set this to 0 after reading the error data allowing us
* to re-use the rerouting fields.
* See cpu_err_rerouted() below.
set STRAND_REROUTED_ADDR, %g2
! get the data out of the current STRAND's sun4v erpt and store
! in the target STRAND struct
GET_ERR_SUN4V_RPRT_BUF(%g5, %g6)
set STRAND_REROUTED_CPU, %g4
ldx [%g5 + ERR_SUN4V_RPRT_G_EHDL], %g6 ! ehdl
set STRAND_REROUTED_EHDL, %g4
ld [%g5 + ERR_SUN4V_RPRT_ATTR], %g6 ! attr
set STRAND_REROUTED_ATTR, %g4
ldx [%g5 + ERR_SUN4V_RPRT_G_STICK], %g6 ! stick
! STICK is probably not necessary. I doubt if FMA checks
! both EHDL/STICK when looking for duplicate reports,
! but it doesn't kill us to do it.
set STRAND_REROUTED_STICK, %g4
! send an x-call to the target CPU
ldub [%g1 + STRAND_ID], %g3
sllx %g3, INT_VEC_DIS_VCID_SHIFT, %g3
mov VECINTR_ERROR_XCALL, %g5
stxa %g3, [%g0]ASI_INTR_UDB_W
* Clear the in_use bit on the sun4v report buffer
GET_ERR_SUN4V_RPRT_BUF(%g2, %g4)
stub %g0, [%g2 + ERR_SUN4V_RPRT_IN_USE]
* Clear the error report in_use field
GET_ERR_DIAG_BUF(%g1, %g2)
stub %g0, [%g1 + ERR_DIAG_RPRT_IN_USE]
* error is rerouted, get out of here
GET_ERR_TABLE_ENTRY(%g1, %g2)
* Does the trap handler for this error park the strands ?
* If yes, resume them here.
ld [%g1 + ERR_FLAGS], %g2
btst ERR_STRANDS_PARKED, %g2
RESUME_ALL_STRANDS(%g3, %g4, %g5, %g6)
* check whether we stored the globals and re-used
* failed to find a guest to send this error to ...
SET_SIZE(cpu_reroute_error)
* An error has been re-routed to this STRAND.
* The EHDL/ADDR/STICK/ATTR have been stored in the STRAND struct
* by the STRAND that originally detected the error.
* Note: STICK may not be strictly necessary
ENTRY_NP(cpu_err_rerouted)
set STRAND_REROUTED_ATTR, %g4
HVCALL(error_handler_sun4v_report)
* Must ensure that we get a sun4v report buffer, spin if necessary
GET_ERR_SUN4V_RPRT_BUF(%g2, %g3)
set STRAND_REROUTED_CPU, %g4
stx %g4, [%g2 + ERR_SUN4V_RPRT_G_CPUID]
STRAND_PUSH(%g4, %g3, %g5)
set STRAND_REROUTED_EHDL, %g4
stx %g4, [%g2 + ERR_SUN4V_RPRT_G_EHDL]
set STRAND_REROUTED_STICK, %g4
stx %g4, [%g2 + ERR_SUN4V_RPRT_G_STICK]
set STRAND_REROUTED_ATTR, %g4
stw %g4, [%g2 + ERR_SUN4V_RPRT_ATTR]
STRAND_PUSH(%g4, %g3, %g5)
! keep ADDR after EHDL/STICK/ATTR to avoid race
set STRAND_REROUTED_ADDR, %g4
! Clear the strand->rerouted-addr field now to let other
stx %g1, [%g2 + ERR_SUN4V_RPRT_ADDR]
set EDESC_UE_RESUMABLE, %g4
stw %g4, [%g2 + ERR_SUN4V_RPRT_EDESC]
st %g4, [%g2 + ERR_SUN4V_RPRT_SZ]
* gueue a resumable error report and exit
add %g2, ERR_SUN4V_CPU_ERPT, %g2
HVCALL(queue_resumable_erpt)
* Clear the in_use bit on the sun4v report buffer
GET_ERR_SUN4V_RPRT_BUF(%g2, %g4)
stub %g0, [%g2 + ERR_SUN4V_RPRT_IN_USE]
! get the error CPUID to do the necessary cleanup
STRAND_POP(%g2, %g3) ! ATTR
* This should be a CPU error report for a strand in error
* Must be a different CPU ID for a strand in error
ldub [%g3 + CPU_VID], %g3
* get the vcpu and strand for the vcpu that took the error
sllx %g1, GUEST_VCPUS_SHIFT, %g1
add %g1, GUEST_VCPUS, %g1
ldx [%g1], %g1 ! err vcpu struct
ldx [%g1 + CPU_STRAND], %g2 ! err strand struct
! deschedule and stop the vcpu
HVCALL(desched_n_stop_vcpu)
* If the heartbeat is disabled then it was running on the failed
* cpu and needs to be restarted on this cpu.
ldx [%g2 + CONFIG_HEARTBEAT_CPU], %g2
* and exit the x-call handler
SET_SIZE(cpu_err_rerouted)
ENTRY_NP(strand_in_error)
ldub [%g5 + STRAND_ID], %g5
ROOT_STRUCT(%g2) ! config ptr
! clear this strand from the active list
ldx [%g2 + CONFIG_STACTIVE], %g3
stx %g3, [%g2 + CONFIG_STACTIVE]
! set this strand in the halted list
ldx [%g2 + CONFIG_STHALT], %g3
stx %g3, [%g2 + CONFIG_STHALT]
! find another idle strand for re-targetting
ldx [%g2 + CONFIG_STIDLE], %g3
bnz,a %xcc, .found_a_strand
bne,pn %xcc, .find_strand
* No usable active strands are left in the
* system, force host exit
* handoff L2 Steering CPU
* If we are the steering cpu, migrate it to our chosen one
setx L2_CONTROL_REG, %g3, %g4
ldx [%g4], %g2 ! current setting
srlx %g2, L2_ERRORSTEER_SHIFT, %g3
and %g3, (NSTRANDS - 1), %g3
cmp %g3, %g5 ! is this steering strand ?
! It is the L2 Steering strand. Migrate responsibility to tgt strand
sllx %g3, L2_ERRORSTEER_SHIFT, %g3
andn %g3, %g2, %g2 ! remove this strand
sllx %g6, L2_ERRORSTEER_SHIFT, %g3
mov %g5, %g1 ! this strand
mov %g6, %g2 ! target strand
STRAND_PUSH(%g1, %g3, %g4)
STRAND_PUSH(%g2, %g3, %g4)
HVCALL(ssi_redistribute_interrupts)
* Disable heartbeat interrupts if they're on this cpu.
* cpu_in_error_finish will invoke heartbeat_enable on the
* remote cpu if the heartbeat was disabled.
STRAND_PUSH(%g1, %g3, %g4)
STRAND_PUSH(%g2, %g3, %g4)
HVCALL(heartbeat_disable)
* if this guest owns a PCIE bus, redirect
ldx [%g4 + CONFIG_PCIE_BUSSES], %g4
ldx [%g4 + PCIE_DEVICE_GUESTP], %g5
STRAND_PUSH(%g1, %g3, %g4)
STRAND_PUSH(%g2, %g3, %g4)
HVCALL(piu_intr_redistribution)
#if defined(CONFIG_FPGA) && defined(CONFIG_FPGA_UART)
* redirect serial uart interrupts
STRAND_PUSH(%g1, %g3, %g4)
STRAND_PUSH(%g2, %g3, %g4)
HVCALL(fpga_uart_intr_redistribute)
#endif /* CONFIG_FPGA`&& CONFIG_FPGA_UART */
STRAND_PUSH(%g1, %g3, %g4)
STRAND_PUSH(%g2, %g3, %g4)
HVCALL(vdev_intr_redistribution)
* Now pick another VCPU in this guest to target the erpt
* Ensure that the VCPU is not bound to the strand in error
add %g2, GUEST_VCPUS, %g2
! %g2 - array of vcpus in guest
ldx [%g2], %g4 ! vcpu struct
brz,pn %g4, .find_cpu_continue
be,pn %xcc, .find_cpu_continue
! check whether this CPU is running guest code ?
ldx [%g4 + CPU_STATUS], %g6
cmp %g6, CPU_STATE_RUNNING
bne,pt %xcc, .find_cpu_continue
! check the error queues.. if not set, not a good candidate
ldx [%g4 + CPU_ERRQR_BASE], %g6
brz,pt %g6, .find_cpu_continue
* find the strand this vcpu is ON, make sure it is idle
* NOTE: currently this check is not necessary, more
* likely when we have sub-strand scheduling
! %g2 - curr vcpu in guest vcpu array
! %g4 - target vcpus struct
STRAND_STRUCT(%g5) ! this strand
ldx [%g4 + CPU_STRAND], %g6 ! vcpu->strand
be,pn %xcc, .find_cpu_continue
! check if the target strand is IDLE
ldub [%g6 + STRAND_ID], %g6 ! vcpu->strand->id
VCPU2ROOT_STRUCT(%g1, %g5)
ldx [%g5 + CONFIG_STIDLE], %g5
bnz,pt %xcc, .found_a_cpu
add %g2, GUEST_VCPUS_INCR, %g2
bne,pn %xcc, .find_cpu_loop
! If we got here, we didn't find a good tgt cpu
! do not send an erpt, exit the guest
! %g4 - target vcpu struct
STRAND_STRUCT(%g1) ! this strand
ldx [%g4 + CPU_STRAND], %g3
* It is possible that the target STRANDs rerouted data is already in use.
* We use the rerouted_addr field as a spinlock. The target strand
* will set this to 0 after reading the error data allowing us
* to re-use the rerouting fields.
* See cpu_err_rerouted() below.
set STRAND_REROUTED_ADDR, %g5
! %g3 target strand struct
ldub [%g1 + CPU_VID], %g6
set STRAND_REROUTED_CPU, %g4
set STRAND_REROUTED_EHDL, %g4
set STRAND_REROUTED_ATTR, %g4
set STRAND_REROUTED_STICK, %g4
* Send a xcall to the target cpu so it can finish the work
ldub [%g2 + STRAND_ID], %g2 ! tgt strand id
sllx %g2, INT_VEC_DIS_VCID_SHIFT, %g5
or %g5, VECINTR_CPUINERR, %g5
stxa %g5, [%g0]ASI_INTR_UDB_W
RESUME_ALL_STRANDS(%g3, %g4, %g5, %g6)
* Clear the error report in_use field
GET_ERR_DIAG_BUF(%g4, %g5)
stub %g0, [%g4 + ERR_DIAG_RPRT_IN_USE]
* Clear the sun4v report in_use field
GET_ERR_SUN4V_RPRT_BUF(%g4, %g5)
stub %g0, [%g4 + ERR_SUN4V_RPRT_IN_USE]
ldub [%g6 + STRAND_ID], %g6
ROOT_STRUCT(%g2) ! %g2 config
add %g2, CONFIG_STACTIVE, %g3
andn %g4, %g6, %g4 ! %g6 my strand
stx %g4, [%g3] ! pull myself off from active CPUs
add %g2, CONFIG_STIDLE, %g2
andn %g6, %g3, %g3 ! %g6 my strand
st %g3, [%g2] ! remove myself from idle CPUs
mov CMP_CORE_RUNNING_W1C, %g2
stxa %g6, [%g2]ASI_CMP_CHIP
* If we get here someone else resumed this strand by mistake
* hvabort to catch the mistake
SET_SIZE(strand_in_error)
STRAND_PUSH(%g7, %g2, %g3)
GET_ERR_DIAG_BUF(%g1, %g2)
add %g1, ERR_DIAG_ABORT_DATA, %g1
ldx [%g2 + STRAND_ABORT_PC], %g3
stx %g3, [%g1 + ERR_ABORT_PC]
add %g1, ERR_ABORT_VERSION, %g2
mov ABORT_VERSION_INFO_SIZE, %g3
stx %g3, [%g1 + ERR_ABORT_CWP]
! store this strands register windows
add %g1, ERR_ABORT_REG_WINDOWS, %g2
stx %i2, [%g5 + (10 * 8)]
stx %i3, [%g5 + (11 * 8)]
stx %i4, [%g5 + (12 * 8)]
stx %i5, [%g5 + (13 * 8)]
stx %i6, [%g5 + (14 * 8)]
stx %i7, [%g5 + (15 * 8)]
stx %l0, [%g5 + (16 * 8)]
stx %l1, [%g5 + (17 * 8)]
stx %l2, [%g5 + (18 * 8)]
stx %l3, [%g5 + (19 * 8)]
stx %l4, [%g5 + (20 * 8)]
stx %l5, [%g5 + (21 * 8)]
stx %l6, [%g5 + (22 * 8)]
stx %l7, [%g5 + (23 * 8)]
wrpr %g3, %cwp ! restore %cwp
brz,pn %g4, .no_trap_stack
add %g1, ERR_ABORT_TRAP_REGS, %g2
mulx %g4, ERR_TRAP_REGS_SIZE, %g5
stx %g6, [%g5 + ERR_TNPC]
stx %g6, [%g5 + ERR_TSTATE]
stx %g6, [%g5 + ERR_HTSTATE]
wrpr %g3, %tl ! restore %tl
! now I have all those local registers to play with ....
! store this strands register windows
add %g1, ERR_ABORT_GLOBAL_REGS, %l2
wrpr %l7, %gl ! restore %gl
GET_ERR_TABLE_ENTRY(%g3, %g2)
lduw [%g3 + ERR_FLAGS], %g3
bz,pn %xcc, .c_dump_hvabort
* This is an assembler-initiated abort
* This is a C-initiated abort
HV_PRINT_NOTRAP("ABORT: Failure 0x");
! ASM abort errors use sw_abort_errors[0]
setx sw_abort_errors, %g2, %g3
ba error_handler ! tail call
setx sw_abort_errors, %g2, %g3
! C abort errors use sw_abort_errors[1]
set ERROR_TABLE_ENTRY_SIZE, %g2