/* * ========== Copyright Header Begin ========================================== * * Hypervisor Software File: cpu_errs.s * * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. * * - Do no alter or remove copyright notices * * - Redistribution and use of this software in source and binary forms, with * or without modification, are permitted provided that the following * conditions are met: * * - Redistribution of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * - Redistribution in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Sun Microsystems, Inc. or the names of contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * This software is provided "AS IS," without a warranty of any kind. * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN * MICROSYSTEMS, INC. ("SUN") AND ITS LICENSORS SHALL NOT BE LIABLE FOR * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. IN NO EVENT WILL SUN * OR ITS LICENSORS BE LIABLE FOR ANY LOST REVENUE, PROFIT OR DATA, OR * FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE * DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY, * ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, EVEN IF * SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. * * You acknowledge that this software is not designed, licensed or * intended for use in the design, construction, operation or maintenance of * any nuclear facility. * * ========== Copyright Header End ============================================ */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ .ident "@(#)cpu_errs.s 1.82 07/05/03 SMI" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * HW issues err. HV attempts to handle the error where appropiate. * HV translates it to a sun4v format. Sends it to the queue. */ /* * Macro that calls the function to dump the L2$ set diagnostic * data into the error report. * arg1 MUST be specified as %g1, used as arg1 to function * arg2 MUST be specified as %g2, used as arg2 to function * ret7 MUST be specified as %g7, used for return address * scr1 is scratch register */ /* BEGIN CSTYLED */ #define ASMCALL_DUMP_L2_DATA_FOR_CE(arg1, arg2, scr1, ret7) \ STRAND_STRUCT(scr1) ;\ add scr1, STRAND_CE_RPT, arg2 /* set %g2 to ce_rpt pointer */ ;\ add arg2, STRAND_EVBSC_L2_AFAR(0), arg1 ;\ ldx [arg1], arg1 /* %g1 has physical address */ ;\ ba dump_l2_set_tag_data_ecc ;\ rd %pc, ret7 /* END CSTYLED */ /* BEGIN CSTYLED */ #define SET_CPU_IN_ERROR(scr1, scr2) \ VCPU_STRUCT(scr1) /* FIXME: or strand? */ ;\ mov CPU_STATE_ERROR, scr2 ;\ stx scr2, [scr1 + CPU_STATUS] /* END CSTYLED */ /* * Queue the UE error report as a resumable error to the guest */ /* BEGIN CSTYLED */ #define ASMCALL_RQ_ERPT(E_OFFT, reg1, reg2, reg3, reg4, reg5, reg6, reg7)\ PRINT("queue RESUMABLE\r\n") ;\ STRAND_STRUCT(reg1) ;\ add reg1, E_OFFT, reg2 /* erpt buf ptr */ ;\ ba queue_resumable_erpt /* %g1 = strand, %g2 = erpt */ ;\ rd %pc, reg7 /* END CSTYLED */ /* * The erpt pointer should be passed in %g6 as %g6 is preserved across * print routines. The second argument, reg1, should be %g1, which is * used as the argument to PRINTX. * Arguments: * %g6 - as erpt - pointer to the strand error buffer * %g1 - as reg1 * all registers are used. */ #ifdef NIAGARA_BRINGUP /* BEGIN CSTYLED */ #define CONSOLE_PRINT_DIAG_ERPT(erpt, reg1) \ PRINT("ehdl = ") ;\ ldx [erpt + STRAND_VBSC_ERPT + EVBSC_EHDL], reg1 /* ehdl */ ;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("stick = ") ;\ ldx [erpt + STRAND_VBSC_ERPT + EVBSC_STICK], reg1 /* stick */ ;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("cpuver = ") ;\ ldx [erpt + STRAND_VBSC_ERPT + EVBSC_CPUVER], reg1 /* cpuver */;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("sparc_afsr = ") ;\ ldx [erpt + STRAND_VBSC_ERPT + EVBSC_SPARC_AFSR], reg1 /* sparc afsr */;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("sparc_afar = ") ;\ ldx [erpt + STRAND_VBSC_ERPT + EVBSC_SPARC_AFAR], reg1 /* sparc afar */;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("jbus_err_log = ") ;\ ldx [erpt + STRAND_VBSC_ERPT + EVBSC_JBI_ERR_LOG], reg1 ;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("L2 ESRs\r\n") ;\ ldx [erpt + STRAND_EVBSC_L2_AFSR(0)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_L2_AFSR(1)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_L2_AFSR(2)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_L2_AFSR(3)], reg1 ;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("L2 EARs\r\n") ;\ ldx [erpt + STRAND_EVBSC_L2_AFAR(0)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_L2_AFAR(1)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_L2_AFAR(2)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_L2_AFAR(3)], reg1 ;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("DRAM ESRs\r\n") ;\ ldx [erpt + STRAND_EVBSC_DRAM_AFSR(0)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_DRAM_AFSR(1)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_DRAM_AFSR(2)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_DRAM_AFSR(3)], reg1 ;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("DRAM EARs\r\n") ;\ ldx [erpt + STRAND_EVBSC_L2_AFAR(0)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_L2_AFAR(1)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_L2_AFAR(2)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_L2_AFAR(3)], reg1 ;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("DRAM ELRs\r\n") ;\ ldx [erpt + STRAND_EVBSC_DRAM_LOC(0)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_DRAM_LOC(1)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_DRAM_LOC(2)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_DRAM_LOC(3)], reg1 ;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("DRAM ECRs\r\n") ;\ ldx [erpt + STRAND_EVBSC_DRAM_CNTR(0)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_DRAM_CNTR(1)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_DRAM_CNTR(2)], reg1 ;\ PRINTX(reg1) ;\ PRINT(" ") ;\ ldx [erpt + STRAND_EVBSC_DRAM_CNTR(3)], reg1 ;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("tstate = ") ;\ ldx [erpt + STRAND_VBSC_ERPT + EVBSC_TSTATE], reg1 /* tstate */;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("htstate = ") ;\ ldx [erpt + STRAND_VBSC_ERPT + EVBSC_HTSTATE], reg1 /* htstate */;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("tpc = ") ;\ ldx [erpt + STRAND_VBSC_ERPT + EVBSC_TPC], reg1 /* tpc */ ;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("cpuid = ") ;\ lduh [erpt + STRAND_VBSC_ERPT + EVBSC_CPUID], reg1 /* cpuid */;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("TT = ") ;\ lduh [erpt + STRAND_VBSC_ERPT + EVBSC_TT], reg1 /* tt */;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("TL = ") ;\ ldub [erpt + STRAND_VBSC_ERPT + EVBSC_TL], reg1 /* tl */;\ PRINTX(reg1) ;\ PRINT("\r\n") ;\ PRINT("------END-------\r\n") /* END CSTYLED */ #else /* NIAGARA_BRINGUP */ #define CONSOLE_PRINT_DIAG_ERPT(erpt, reg1) #endif /* NIAGARA_BRINGUP */ /* * Correctable error traps can be taken only if PSTATE.IE = 1. * The hypervisor is run with PSTATE.IE = 0, so no CE traps * will be taken when running in hypervisor. Therefore, CE * trap handler is entered only from supervisor which means: * - no need to check for %htstate.hpriv * - no need to check for %tstate.gl == MAXGL * Assume the CE trap taken when executing in supervisor mode. * If TL > MAXPTL * then * watchdog_reset * else * handle error * * For CEs no error report is sent to the sun4v guest. Hence * the sun4v guest error report members of the erpt struct * are not filled in. Only the diagnostic error report is * constructed and sent. * * At entry, PSTATE.IE = 0. * * Register usage: where ever possible * g1-3 = scratch * g4-6 : preserved across PRINT* macros * g5 : error report pointer * g6 : strand struct pointer */ ENTRY_NP(ce_poll_entry) /* entry point for the error daemon */ stx %g7, [%g6 + STRAND_ERR_RET] ! save return address ENTRY_NP(ce_err) /* get strand, CE buffer in %g6-5, they are safe across calls */ STRAND_ERPT_STRUCT(STRAND_CE_RPT, %g6, %g5) ! g6->strand, g5->strand.ce_rpt ! get the lock SPINLOCK_ENTER_ERRORLOCK(%g1, %g2, %g3) ! XXX set the buffer busy flag PRINT("CE_ERR\r\n") CONSOLE_PRINT_ESRS(%g1, %g2, %g3, %g4) /* * Niagara PRM Programming Note: To minimize the possibility of * missing notification of another error, software should clear any * multiple error indication as soon as possible. * * Note: - hardware insures that we will not clear a non-CE error * See PRM 12.4.2 Table 12-6. */ .ce_0: ldxa [%g0]ASI_SPARC_ERR_STATUS, %g4 ! SPARC afsr .ce_rd_sa: ldxa [%g0]ASI_SPARC_ERR_ADDR, %g3 ! SPARC afar ldxa [%g0]ASI_SPARC_ERR_STATUS, %g1 ! re-read afsr cmp %g1, %g4 ! same? bnz,a %xcc, .ce_rd_sa ! no: read both again mov %g1, %g4 ! save last status stxa %g4, [%g0]ASI_SPARC_ERR_STATUS ! clear everything seen stx %g4, [%g5 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFSR] ! save afsr stx %g3, [%g5 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFAR] ! save afar stx %g0, [%g5 + STRAND_VBSC_ERPT + EVBSC_JBI_ERR_LOG] /* * Check to see if there is any error to process */ CE_CHECK(%g6, %g4, %g1, %g2, %g3) ! strand, spesr, bz,a %xcc, .ce_unlock_exit ! none: exit nop /* * Generate a basic error report * * Sparc status & address are already loaded */ LOAD_BASIC_ERPT(%g6, %g5, %g1, %g2) ! now we have a base diagnostic error report captured that ! can be sent to the SC or diagnosis service provider !! %g5 -> ce_rpt !! %g6 -> strand ! XXX check for TL saturation - why do this for CEs? ! Too drastic to watchdog reset a guest on a corrected error! ! rdpr %tl, %g3 ! get trap level ! cmp %g3, MAXPTL ! is it at max? ! bg,pn %xcc, 1f ! if TL > MAXPTL, watchdog reset ! nop #ifdef DEBUG .pushlocals setx 0xdeadbeefdeadbeef,%g3, %g4 set STRAND_VBSC_ERPT + EVBSC_DIAG_BUF + DIAG_BUF_SIZE-8, %g3 1: stx %g4, [%g5 + %g3] cmp %g3, STRAND_VBSC_ERPT + EVBSC_DIAG_BUF bgu,pt %xcc, 1b dec 8, %g3 .poplocals #endif /* DEBUG */ /* * At this point we now look for the specific errors: */ lduw [%g6 + STRAND_ERR_FLAG], %g3 btst ERR_FLAG_SPARC, %g3 ! blackout? ldx [%g5 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFSR], %g3 ! sparc status bnz %xcc, .ce_check_l2 ! yes: check l2 dram set SPARC_CE_BITS, %g4 btst %g4, %g3 ! any valid CE bit set? bz %xcc, .ce_check_l2 ! no SPARC, check L2DRAM nop /* * Sparc Errors: */ mov %g5, %g2 ! g2 = cpu.ce_erpt set SPARC_ESR_IRC, %g4 btst %g4, %g3 ! is IRC set? bnz %xcc, .ce_irc_err nop set SPARC_ESR_FRC, %g4 btst %g4, %g3 ! is FRC set? bnz %xcc, .ce_frc_err nop set SPARC_ESR_DTC, %g4 btst %g4, %g3 ! is DTC set? bnz %xcc, .ce_dtc_err nop set SPARC_ESR_DDC, %g4 btst %g4, %g3 ! is DDC set? bnz %xcc, .ce_ddc_err nop set SPARC_ESR_IDC, %g4 btst %g4, %g3 ! is IDC set? bnz %xcc, .ce_idc_err nop set SPARC_ESR_ITC, %g4 btst %g4, %g3 ! is ITC set? bnz %xcc, .ce_itc_err nop ! SPARC ESR may have a CE bit and/or MEC bit set set SPARC_ESR_MEC, %g4 btst %g4, %g3 ! MEC bit set? bnz %xcc, .ce_just_mec nop ! should not get here as all CE conditions have been tested PRINT("NOTE: Sparc CE: failed to find error bit set!!") ba,a .ce_no_error ! IRC error handler .ce_irc_err: PRINT("IRC DIAG\r\n") ! set up %g1 as first arg to irc_check() ldx [%g5 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFAR], %g1 ! arg1 = EAR HVCALL(irc_check) ! %g2 is return value cmp %g2, RF_TRANSIENT be 1f ! transient IRC nop ! persistent IRC error, ! let storm protection throttle irc and iru reports PRINT("persistent IRC error\r\n") ba .ce_sparc_storm ! finish up clr %g1 ! no print or send 1: ! send the sparc_err_ebl reg to the diag eng ldxa [%g0]ASI_SPARC_ERR_EN, %g1 stx %g1, [%g5 + EVBSC_DIAG_BUF + DIAG_BUF_REG_INFO] ba,a .ce_send_sparc_erpt ! send report & finish up ! Default CE error handler. ! This just sends the CE diagnostic error report to the ! vBSC to generate an FMA error report. /* * L1 Instruction Cache: */ .ce_itc_err: /* Tag */ PRINT("ITC DIAG\r\n") DUMP_ICACHE_INFO(STRAND_CE_RPT, %g1, %g5, %g3, %g4, %g2, %g6, %g7) ba,a .ce_send_sparc_erpt .ce_idc_err: PRINT("IDC DIAG\r\n") DUMP_ICACHE_INFO(STRAND_CE_RPT, %g1, %g5, %g3, %g4, %g2, %g6, %g7) ba,a .ce_send_sparc_erpt /* * L1 Data Cache: */ .ce_dtc_err: /* Tag */ PRINT("DTC DIAG\r\n") DUMP_DCACHE_INFO(STRAND_CE_RPT, %g6, %g5, %g1, %g2, %g3, %g4, %g7) ba,a .ce_send_sparc_erpt .ce_ddc_err: /* Data */ PRINT("DDC DIAG\r\n") DUMP_DCACHE_INFO(STRAND_CE_RPT, %g6, %g5, %g1, %g2, %g3, %g4, %g7) ba,a .ce_send_sparc_erpt /* * Float Register Correctable: */ .ce_frc_err: PRINT("FRC DIAG\r\n") ! set up %g1 as first arg to frc_check() ldx [%g5 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFAR], %g1 !! %g1 = EAR HVCALL(frc_check) !! %g2 = return value cmp %g2, RF_TRANSIENT be 1f ! transient FRC nop ! persistent FRC error, ! let storm protection throttle frc and fru reports PRINT("persistent FRC error\r\n") ba .ce_sparc_storm ! finish up clr %g1 ! no print or send 1: ! send the sparc_err_ebl reg to the diag eng ldxa [%g0]ASI_SPARC_ERR_EN, %g1 stx %g1, [%g5 + EVBSC_DIAG_BUF + DIAG_BUF_REG_INFO] ba,a .ce_send_sparc_erpt ! send report & finish up .ce_just_mec: PRINT("JUST MEC\r\n") ba,a .ce_send_sparc_erpt ! send report & finish up .ce_send_sparc_erpt: /* * Note: this path is taken also for "MEC only" and "nothing found". * It will throttle "false" interrupts. */ STRAND_STRUCT(%g6) add %g6, STRAND_CE_RPT, %g5 ! g5 -> strand.ce_rpt set ERR_SEND_DIAG, %g1 SET_STRAND_RPTFLAGS(%g6, %g1) /* * Storm Prevention: * * This code prevents more than one error every time period from * the group: SPARC Register File & L1$ */ .ce_sparc_storm: lduw [%g6 + STRAND_ERR_FLAG], %g2 btst ERR_FLAG_SPARC, %g2 ! handler installed? bnz,pn %xcc, .ce_sparc_storm_done ! yes bset ERR_FLAG_SPARC, %g2 ! no: set it STRAND2CONFIG_STRUCT(%g6, %g1) ! ->configp ldx [%g1 + CONFIG_CE_BLACKOUT], %g1 brz,a,pn %g1, .ce_sparc_storm_done ! zero: blackout disabled nop stw %g2, [%g6 + STRAND_ERR_FLAG] ! flag as installed ! g1 = delta tick HVCALL(err_set_sparc_bits) ! g2 = handler address set CEEN, %g3 ! g3 = arg 0 : bit(s) to set clr %g4 ! g4 = arg 1 : not used HVCALL(cyclic_add_rel) /* ( del_tick, address, arg0, arg1 ) */ .ce_sparc_storm_done: ba,a ce_err_ret /* * L2DRAM Error Handling: */ /* g6->strand, g5->ce_rpt */ .ce_check_l2: /* * L2DRAM errors are global and may not be valid for this cpu. * Process if PID == ERRORSTEER, or this cpu was sent the error. */ DUMP_L2_DRAM_ERROR_LOGS(%g6, %g5, %g1, %g2, %g3, %g4, %g7) /* * Only one error in one bank will be processed * each pass through here. * * Note: storm prevention will block processing of banks * in a blackout */ ! go through each L2 bank and check for valid CE bits .ce_check_l2_b0: CE_CHECK_L2_ESR(0, %g6, %g4, %g1, %g2) bz %xcc, .ce_check_l2_b1 ! check next bank nop SET_STRAND_L2BANK(0, %g6, %g7) ! save bank# ! dump all of the l2 info. must pass the registers as is DUMP_L2_SET_TAG_DATA(0, STRAND_CE_RPT, %g6, %g5, %g1, %g2) ! dram data here since all L2 esr need it ldx [%g6 + STRAND_CE_RPT + STRAND_EVBSC_L2_AFSR(0)], %g4 ! l2esr setx L2_ESR_CE_NO_EAR_BITS, %g1, %g2 btst %g4, %g2 bz,pn %xcc, 1f nop CLEAR_DRAM_CONTENTS(0, STRAND_CE_RPT, %g6, %g5) ba 2f nop 1: DUMP_DRAM_CONTENTS(0, STRAND_CE_RPT, %g6, %g5, %g1, %g2) 2: /* %g6->cpu %g4=l2esr */ CLEAR_L2_ESR(0, %g4, %g1, %g2) /* 6->strand 4=l2esr */ PROCESS_CE_IN_L2_ESR(0, %g6, %g5, %g4, %g1, %g2, %g3) /* 6->strand 5->erpt 4=flags: action */ ba,a .ce_l2_all .ce_check_l2_b1: CE_CHECK_L2_ESR(1, %g6, %g4, %g1, %g2) bz %xcc, .ce_check_l2_b2 ! check next bank nop SET_STRAND_L2BANK(1, %g6, %g7) ! save bank# ! dump all of the l2 info. must pass the registers as is DUMP_L2_SET_TAG_DATA(1, STRAND_CE_RPT, %g6, %g5, %g1, %g2) ! dram data here since all L2 esr need it ldx [%g6 + STRAND_CE_RPT + STRAND_EVBSC_L2_AFSR(1)], %g4 ! l2esr setx L2_ESR_CE_NO_EAR_BITS, %g1, %g2 btst %g4, %g2 bz,pn %xcc, 1f nop CLEAR_DRAM_CONTENTS(0, STRAND_CE_RPT, %g6, %g5) ba 2f nop 1: DUMP_DRAM_CONTENTS(1, STRAND_CE_RPT, %g6, %g5, %g1, %g2) 2: /* %g6->cpu %g4=l2esr */ CLEAR_L2_ESR(1, %g4, %g1, %g2) PROCESS_CE_IN_L2_ESR(1, %g6, %g5, %g4, %g1, %g2, %g3) ba,a .ce_l2_all .ce_check_l2_b2: CE_CHECK_L2_ESR(2, %g6, %g4, %g1, %g2) bz %xcc, .ce_check_l2_b3 ! check next bank nop SET_STRAND_L2BANK(2, %g6, %g7) ! save bank# ! dump all of the l2 info. must pass the registers as is DUMP_L2_SET_TAG_DATA(2, STRAND_CE_RPT, %g6, %g5, %g1, %g2) ! dram data here since all L2 esr need it ldx [%g6 + STRAND_CE_RPT + STRAND_EVBSC_L2_AFSR(2)], %g4 ! l2esr setx L2_ESR_CE_NO_EAR_BITS, %g1, %g2 btst %g4, %g2 bz,pn %xcc, 1f nop CLEAR_DRAM_CONTENTS(0, STRAND_CE_RPT, %g6, %g5) ba 2f nop 1: DUMP_DRAM_CONTENTS(2, STRAND_CE_RPT, %g6, %g5, %g1, %g2) 2: /* %g6->cpu %g4=l2esr */ CLEAR_L2_ESR(2, %g4, %g1, %g2) PROCESS_CE_IN_L2_ESR(2, %g6, %g5, %g4, %g1, %g2, %g3) ba,a .ce_l2_all .ce_check_l2_b3: CE_CHECK_L2_ESR(3, %g6, %g4, %g1, %g2) bz %xcc, .ce_no_error nop SET_STRAND_L2BANK(3, %g6, %g7) ! save bank# ! dump all of the l2 info. must pass the registers as is DUMP_L2_SET_TAG_DATA(3, STRAND_CE_RPT, %g6, %g5, %g1, %g2) ! dram data here since all L2 esr need it ldx [%g6 + STRAND_CE_RPT + STRAND_EVBSC_L2_AFSR(3)], %g4 ! l2esr setx L2_ESR_CE_NO_EAR_BITS, %g1, %g2 btst %g4, %g2 bz,pn %xcc, 1f nop CLEAR_DRAM_CONTENTS(0, STRAND_CE_RPT, %g6, %g5) ba 2f nop 1: DUMP_DRAM_CONTENTS(3, STRAND_CE_RPT, %g6, %g5, %g1, %g2) 2: /* %g6->cpu %g4=l2esr */ CLEAR_L2_ESR(3, %g4, %g1, %g2) PROCESS_CE_IN_L2_ESR(3, %g6, %g5, %g4, %g1, %g2, %g3) ba,a .ce_l2_all .ce_l2_all: brlz %g4, .ce_no_error ! no error found - exit now nop SET_STRAND_RPTFLAGS(%g6, %g4) /* * Storm Prevention: * * This code prevents more than one error every six seconds from * the groups: L2$, DRAM Banks. Since the enables are system wide * we use the error enable bits to indicate the blackout period. * The callback flag is used to indicate if the handler is enabled * on this cpu. */ /* * There is a very small window where multiple interrupts can be * delivered to more than one cpu. * Only one will get through this set successfully. */ .ce_l2dram_storm: GET_STRAND_L2BANK(%g6, %g4) BCLR_L2_BANK_EEN(%g4, CEEN, %g1, %g2) ! g4 = bank# bz %xcc, .ce_l2dram_storm_done ! already disabled nop mov ERR_FLAG_L2DRAM, %g1 ! L2DRAM flag sll %g1, %g4, %g1 ! << bank# lduw [%g6 + STRAND_ERR_FLAG], %g2 ! installed flags btst %g1, %g2 ! handler installed? bnz,pn %xcc, .ce_l2dram_storm_done ! yes bset %g1, %g2 ! no: set it STRAND2CONFIG_STRUCT(%g6, %g1) ! ->configp ldx [%g1 + CONFIG_CE_BLACKOUT], %g1 brz,a,pn %g1, .ce_l2dram_storm_done ! zero: blackout disabled nop stw %g2, [%g6 + STRAND_ERR_FLAG] ! handler installed ! g1 = delta tick HVCALL(err_set_l2_bits) ! g2 = handler address mov CEEN, %g3 ! g3 = arg 0 : bit(s) to set ! g4 = arg 1 : B5-0: bank # HVCALL(cyclic_add_rel) /* ( del_tick, address, arg0, arg1 ) */ .ce_l2dram_storm_done: ba,a ce_err_ret ENTRY_NP(ce_err_ret) STRAND_STRUCT(%g6) GET_STRAND_RPTFLAGS(%g6, %g4) ! g4: flags: action btst ERR_SEND_DIAG, %g4 ! send diag report? bz %xcc, .ce_unlock_exit ! no nop ! send CE diag report add %g6, STRAND_CE_RPT + STRAND_VBSC_ERPT, %g1 ! erpt.vbsc add %g6, STRAND_CE_RPT + STRAND_UNSENT_PKT, %g2 ! erpt.unsent flag mov EVBSC_SIZE, %g3 ! size HVCALL(send_diag_erpt) ! g4-6 clobbered STRAND_STRUCT(%g6) SET_STRAND_RPTFLAGS(%g6, %g0) ! clear report flags ba,a .ce_unlock_exit ! handler epilogue ! XXX CEs should never watchdog_reset a guest??? ! XXX It should also not inadvertently let a guest run at TL > MAXPTL ! send the error report to the diagnostic service provider ! before watchdog_guest ! ba,a watchdog_guest /*NOTREACHED*/ /* * Sparc and L2DRAM checked with no error to report: */ .ce_no_error: ! Some other thread beat us to it, or we don't own it, or ! the blackout(s) have left us nothing to report. PRINT("NOTE: No Reportable Error\r\n") /* * CE epilogue * The CE error handlers return here after handling the error. */ .ce_unlock_exit: /* MUST leave Sparc CEEN enabled to get L2DRAM interrupts! */ /* Reenable CEEN */ ldxa [%g0]ASI_SPARC_ERR_EN, %g1 ! get current bset CEEN, %g1 ! enable CEEN stxa %g1, [%g0] ASI_SPARC_ERR_EN ! .. /* * With CE storm prevention, the CEEN will be reenabled by the * hstick_match handler when errors stop. */ SPINLOCK_EXIT_ERRORLOCK(%g1) ! release lock ba,a .ce_exit ! exit now .ce_exit: ldx [%g6 + STRAND_ERR_RET], %g7 ! get return address brnz,a %g7, .ce_return ! valid: clear it & return stx %g0, [%g6+ STRAND_ERR_RET] ! .. SET_SIZE(ce_poll_entry) ! NULL: return from interrupt retry ! return from CE interrupt .ce_return: HVRET SET_SIZE(ce_err_ret) SET_SIZE(ce_err) /* * Disrupting uncorrectable error handler. * All of these errors are resumable errors to the guest. I.e. they * are not nonresumable errors. * * At entry, PSTATE.IE = 0, so no furthur disrupting error traps. * * The CE error report buffer is used for reporting. */ ENTRY_NP(dis_ue_err) /* * Check for DBU in DRAM ESR */ CHECK_DRAM_ERROR(DRAM_ESR_DBU, %g1, %g2, %g3, %g4) bnz,pn %xcc, .fatal_reset_dbu ! yes: bail now nop /* get strand, CE buffer in %g6-5, they are safe across calls */ STRAND_ERPT_STRUCT(STRAND_CE_RPT, %g6, %g5) ! g6->strand, g5->strand.ce_rpt /* * We do not idle all strands if the scrubber got a UE */ CHECK_L2_ERROR(L2_ESR_LDSU, %g1, %g2, %g3) bnz,pn %xcc, .dis_ue_no_idle mov ERR_FLAG_STRANDS_NOT_IDLED, %g1 CHECK_DRAM_ERROR(DRAM_ESR_DSU, %g1, %g2, %g3, %g4) bnz,pn %xcc, .dis_ue_no_idle mov ERR_FLAG_STRANDS_NOT_IDLED, %g1 SPINLOCK_IDLE_ALL_STRAND(%g6, %g1, %g2, %g3, %g4) ! At this point, this is the only strand executing mov %g0, %g1 .dis_ue_no_idle: lduw [%g6 + STRAND_ERR_FLAG], %g2 ! installed flags bclr ERR_FLAG_STRANDS_NOT_IDLED, %g2 ! reset STRANDS_IDLED or %g2, %g1, %g2 stw %g2, [%g6 + STRAND_ERR_FLAG] ! .. PRINT("DATA ERR\r\n") CONSOLE_PRINT_ESRS(%g1, %g2, %g3, %g4) /* * Niagara PRM Programming Note: To minimize the possibility of * missing notification of an error, software should any multiple * error indication as soon as possible. */ ldxa [%g0]ASI_SPARC_ERR_STATUS, %g4 ! SPARC afsr .dis_ue_rd_sa: ldxa [%g0]ASI_SPARC_ERR_ADDR, %g3 ! SPARC afar ldxa [%g0]ASI_SPARC_ERR_STATUS, %g1 ! re-read afsr cmp %g1, %g4 ! same? bnz,a %xcc, .dis_ue_rd_sa ! no: read both again mov %g1, %g4 ! save last status stxa %g4, [%g0]ASI_SPARC_ERR_STATUS ! clear SPARC afsr ! save ce_rpt.sparc_afsr stx %g4, [%g5 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFSR] ! save ce_rpt.sparc_afar stx %g3, [%g5 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFAR] stx %g0, [%g5 + STRAND_VBSC_ERPT + EVBSC_JBI_ERR_LOG] /* * Generate a basic error report * * Sparc status & address are already loaded */ LOAD_BASIC_ERPT(%g6, %g5, %g1, %g2) mov %g6, %g1 ! strand mov %g5, %g2 ! strand.ue_erpt #ifdef DEBUG .pushlocals setx 0xdeadbeefdeadbeef,%g3, %g4 set STRAND_VBSC_ERPT + EVBSC_DIAG_BUF + DIAG_BUF_SIZE-8, %g3 1: stx %g4, [%g5 + %g3] cmp %g3, STRAND_VBSC_ERPT + EVBSC_DIAG_BUF bgu,pt %xcc, 1b dec 8, %g3 .poplocals #endif ! check for MAU error /* Dump the L2 and DRAM registers also */ ! %g1 has strand pointer, %g2 has &ce_rpt - pointer to error report DUMP_L2_DRAM_ERROR_LOGS(%g1, %g2, %g3, %g4, %g5, %g6, %g7) ! go through each L2 bank and check for valid UE bits .dis_ue_check_l2_b0: DIS_UE_CHECK_L2_ESR(0, %g1, %g2, %g3, %g4) ! %g1 = L2ESR bz %xcc, .dis_ue_check_l2_b1 ! check next bank nop /* save the state of the line */ SAVE_L2_LINE_STATE(0, STRAND_CE_RPT, %g1, %g2) !! %g1= strand !! %g2 = erpt DUMP_L2_SET_TAG_DATA(0, STRAND_CE_RPT, %g1, %g2, %g1, %g2) !! %g1 = cpu !! %g2 = cpu.erpt ldx [%g2 + STRAND_EVBSC_L2_AFSR(0)], %g4 ! l2esr CLEAR_L2_ESR(0, %g4, %g5, %g6) ! clear L2 ESR PROCESS_DIS_UE_IN_L2_ESR(0, %g1, %g2, %g3, %g4, %g5, %g6, %g7, \ .dis_ue_err_ret, .ue_resume_exit) .dis_ue_check_l2_b1: DIS_UE_CHECK_L2_ESR(1, %g1, %g2, %g3, %g4) ! %g1 = L2ESR bz %xcc, .dis_ue_check_l2_b2 ! check next bank nop /* save the state of the line */ SAVE_L2_LINE_STATE(1, STRAND_CE_RPT, %g1, %g2) !! %g1= strand !! %g2 = erpt DUMP_L2_SET_TAG_DATA(1, STRAND_CE_RPT, %g1, %g2, %g1, %g2) ldx [%g2 + STRAND_EVBSC_L2_AFSR(1)], %g4 ! l2esr CLEAR_L2_ESR(1, %g4, %g5, %g6) ! clear L2 ESR PROCESS_DIS_UE_IN_L2_ESR(1, %g1, %g2, %g3, %g4, %g5, %g6, %g7, \ .dis_ue_err_ret, .ue_resume_exit) .dis_ue_check_l2_b2: DIS_UE_CHECK_L2_ESR(2, %g1, %g2, %g3, %g4) ! %g1 = L2ESR bz %xcc, .dis_ue_check_l2_b3 ! check next bank nop /* save the state of the line */ SAVE_L2_LINE_STATE(2, STRAND_CE_RPT, %g1, %g2) !! %g1= strand !! %g2 = erpt DUMP_L2_SET_TAG_DATA(2, STRAND_CE_RPT, %g1, %g2, %g1, %g2) ldx [%g2 + STRAND_EVBSC_L2_AFSR(2)], %g4 ! l2esr CLEAR_L2_ESR(2, %g4, %g5, %g6) ! clear L2 ESR PROCESS_DIS_UE_IN_L2_ESR(2, %g1, %g2, %g3, %g4, %g5, %g6, %g7, \ .dis_ue_err_ret, .ue_resume_exit) .dis_ue_check_l2_b3: DIS_UE_CHECK_L2_ESR(3, %g1, %g2, %g3, %g4) ! %g1 = L2ESR bz %xcc, .dis_ue_no_error ! XXX spurious? nop /* save the state of the line */ SAVE_L2_LINE_STATE(3, STRAND_CE_RPT, %g1, %g2) !! %g1= strand !! %g2 = erpt DUMP_L2_SET_TAG_DATA(3, STRAND_CE_RPT, %g1, %g2, %g1, %g2) ldx [%g2 + STRAND_EVBSC_L2_AFSR(3)], %g4 ! l2esr CLEAR_L2_ESR(3, %g4, %g5, %g6) ! clear L2 ESR PROCESS_DIS_UE_IN_L2_ESR(3, %g1, %g2, %g3, %g4, %g5, %g6, %g7, \ .dis_ue_err_ret, .ue_resume_exit) ! ! All banks checked, now return ! ba,a .dis_ue_err_ret ! UE handler epilogue /*NOTREACHED*/ .dis_ue_no_error: PRINT("NO DIS UE ERROR\r\n") ! some other thread beat us to it. ! no bits in L2, simply return (XXX send a service error report?) ! send CE diag report STRAND_STRUCT(%g6) add %g6, STRAND_CE_RPT + STRAND_VBSC_ERPT, %g1 ! erpt.vbsc add %g6, STRAND_CE_RPT + STRAND_UNSENT_PKT, %g2 ! erpt.unsent flag mov EVBSC_SIZE, %g3 ! size HVCALL(send_diag_erpt) ba,a .dis_ue_err_ret ! CE handler epilogue /*NOTREACHED*/ SET_SIZE(dis_ue_err) /* * General handling of UEs * if HTSTATE[TL].GL == MAXPGL * reset chip and partitions * else if HTSTATE.PRIV == 1 * reset chip and partitions * else if TL > MAXPTL then watchdog_reset * else call common handler */ /* * Uncorrectable error traps can be taken any time NCEEN * in the SPARC error status register is set. * UEs can occur when executing in the hypervisor, supervisor, * or user code. * * XXX UEs when executing in hypervisor resets the system XXX * TL overflow causes guest to be reset */ ENTRY_NP(ue_poll_entry) /* entry point for the error daemon */ ! %g6->strand stx %g7, [%g6 + STRAND_ERR_RET] ! save return address ba,a ue_err_notrap .empty ENTRY_NP(ue_err) /* * Check for global register saturation and save the current * global register set if necessary. */ SAVE_UE_GLOBALS() ue_err_notrap: /* * Check for DBU in DRAM ESR */ CHECK_DRAM_ERROR(DRAM_ESR_DBU, %g1, %g2, %g3, %g4) bnz,pn %xcc, .fatal_reset_dbu ! yes: bail now nop /* get strand, UE buffer in %g6-5, they are safe across calls */ STRAND_ERPT_STRUCT(STRAND_UE_RPT, %g6, %g5) ! g6->strand, g5->strand.ue_rpt /* * check to see if UE occurred in hypervisor * We check early in order to avoid a deadlock situation. * in the previous trap, we were handling either a dis UE or a CE */ rdhpr %htstate, %g1 btst HTSTATE_HPRIV, %g1 bnz %xcc, .ue_get_status_addr ! UE in hypervisor nop SPINLOCK_IDLE_ALL_STRAND(%g6, %g1, %g2, %g3, %g4) ! At this point, this is the only strand executing #ifdef DEBUG ldxa [%g0]ASI_SPARC_ERR_STATUS, %g1 ! SPARC afsr set SPARC_ESR_NCU, %g4 ! Ifetch/Load from IO space bit btst %g4, %g1 ! NCU set? bnz %xcc, .skip_print_esrs ! skip printing ESRs nop PRINT("UE_ERR\r\n") CONSOLE_PRINT_ESRS(%g1, %g2, %g3, %g4) .skip_print_esrs: #endif /* DEBUG */ .ue_get_status_addr: /* * Niagara PRM Programming Note: To minimize the possibility of * missing notification of an error, software should clear the * error indication as soon as possible. */ ldxa [%g0]ASI_SPARC_ERR_STATUS, %g4 ! SPARC afsr .ue_rd_sa: ldxa [%g0]ASI_SPARC_ERR_ADDR, %g3 ! SPARC afar ldxa [%g0]ASI_SPARC_ERR_STATUS, %g1 ! re-read afsr cmp %g1, %g4 ! same? bnz,a %xcc, .ue_rd_sa ! no: read both again mov %g1, %g4 ! save last status stxa %g4, [%g0]ASI_SPARC_ERR_STATUS ! clear everything seen ! save ue_rpt.sparc_afsr stx %g4, [%g5 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFSR] ! save ue_rpt.sparc_afar stx %g3, [%g5 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFAR] stx %g0, [%g5 + STRAND_VBSC_ERPT + EVBSC_JBI_ERR_LOG] /* * Check to see if there is any error to process */ UE_CHECK(SPARC_UE_MEU_BITS, L2_ESR_UE_BITS, %g4, %g1, %g2, %g3) bz,a %xcc, .ue_resume_exit ! none: exit nop /* * Generate a basic error report * * Sparc status & address are already loaded */ LOAD_BASIC_ERPT(%g6, %g5, %g1, %g2) mov %g6, %g1 ! strand mov %g5, %g2 ! strand.ue_erpt #ifdef DEBUG .pushlocals setx 0xdeadbeefdeadbeef,%g3, %g4 set STRAND_VBSC_ERPT + EVBSC_DIAG_BUF + DIAG_BUF_SIZE-8, %g3 1: stx %g4, [%g5 + %g3] cmp %g3, STRAND_VBSC_ERPT + EVBSC_DIAG_BUF bgu,pt %xcc, 1b dec 8, %g3 .poplocals #endif ! set error descriptor to UE resumable set EDESC_UE_RESUMABLE, %g3 ! edesc in guest erpt st %g3, [%g2 + STRAND_SUN4V_ERPT + ESUN4V_EDESC] ! check SPARC ESR for thread-specific errors ! %g3 = saved sparc_afsr ldx [%g2 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFSR], %g3 set SPARC_UE_MEU_BITS, %g4 btst %g4, %g3 ! any UE or MEU bit set? bz %xcc, .ue_dump_l2 ! no UEs, check L2 nop ! a UE/MEU bit is set in the SPARC ESR. If it is LDAU, then ! it is L2$/DRAM related. set SPARC_ESR_LDAU, %g4 ! LDAU bit btst %g4, %g3 ! LDAU set? bnz %xcc, .ue_ldau_err nop set SPARC_ESR_NCU, %g4 ! NCU bit btst %g4, %g3 ! NCU set? bnz %xcc, .ue_ncu_err nop set SPARC_ESR_IRU, %g4 ! IRU bit btst %g4, %g3 ! IRU set? bnz %xcc, .ue_iru_err nop set SPARC_ESR_FRU, %g4 ! FRU bit btst %g4, %g3 ! FRU set? bnz %xcc, .ue_fru_err nop /* * check to see if UE occurred in hypervisor * We check early in order to avoid a deadlock situation. * in the previous trap, we were handling either a dis UE or a CE */ rdhpr %htstate, %g1 btst HTSTATE_HPRIV, %g1 bnz %xcc, .hpriv_ue ! UE in hypervisor nop set SPARC_ESR_MAU, %g4 ! MAU bit btst %g4, %g3 ! MAU set? bnz %xcc, .ue_mau_err nop set SPARC_ESR_IMDU, %g4 ! IMDU bit btst %g4, %g3 ! IMDU set? bnz %xcc, .ue_imdu_err nop set SPARC_ESR_IMTU, %g4 ! IMTU bit btst %g4, %g3 ! IMTU set? bnz %xcc, .ue_imtu_err nop set SPARC_ESR_DMTU, %g4 ! DMTU bit btst %g4, %g3 ! DMTU set? bnz %xcc, .ue_dmtu_err nop set SPARC_ESR_DMDU, %g4 ! DMDU bit btst %g4, %g3 ! DMDU set? bnz %xcc, .ue_dmdu_err nop set SPARC_ESR_DMSU, %g4 ! DMSU bit btst %g4, %g3 ! DMSU set? bnz %xcc, .ue_dmsu_err nop set SPARC_ESR_MEU, %g4 ! MEU bit btst %g4, %g3 ! MEU set? bnz %xcc, .ue_just_meu_err nop /*NOTREACHED*/ ! Should not get here as all UE bits have been tested PRINT("NOTREACHED\r\n") ba,a .ue_send_resume_exit /* * FRU: Float Register File uncorrectable ECC error */ ! If the error is unrecoverable, mark the cpu in error. Else ! fill out the ue error report in cpu structure. send service ! entity diagnosis report, then call precise_ue_err_ret. In ! precise_ue_err_ret, it will queue the error report to guest. .ue_fru_err: STRAND_ERPT_STRUCT(STRAND_UE_RPT, %g2, %g2) ! ->cpu.ue_rpt ldx [%g2 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFAR], %g1 !! %g1 = sparc afar HVCALL(clear_fregerr) ! %g1 = input, g2 = output !! %g2 contains a 0 if we got FRU after FRC for a persistent error brnz %g2, .ue_not_from_frc ! it is a new FRU nop ! Took an FRU trap from the FRC handler reread. Return to FRC handler PRINT("FRU FROM FRC DIAG\r\n"); STRAND_STRUCT(%g6) SPINLOCK_RESUME_ALL_STRAND(%g6, %g1, %g2, %g3, %g4) RESTORE_UE_GLOBALS() done ! complete reread of reg .ue_not_from_frc: /* * check to see if UE occurred in hypervisor */ rdhpr %htstate, %g3 btst HTSTATE_HPRIV, %g3 bnz %xcc, .hpriv_ue ! UE in hypervisor nop HVCALL(fru_check) ! g2 = status ! %g2 contains whether the error is transient, persistent or a failed RF cmp %g2, RF_TRANSIENT ! transient? bne .ue_fru_cpu ! no: unrecoverable nop ! FRU is recoverable, send a nonresumable error to the guest SET_ERPT_EDESC_EATTR(STRAND_UE_RPT, EATTR_FRF, EDESC_PRECISE_NONRESUMABLE, %g1, %g2, %g3) CLEAR_SPARC_ESR(STRAND_UE_RPT, SPARC_ESR_FRU, %g1, %g2, %g3, %g4) PRINT("FRU DIAG\r\n") ba,a .ue_eer_send_ue_rpt /* FRU is unrecoverable, mark CPU in error */ .ue_fru_cpu: PRINT("CPU in ERROR -FRU\r\n") ! Set the CPU_ERROR status flag SET_CPU_IN_ERROR(%g1, %g2) SET_ERPT_EDESC_EATTR(STRAND_UE_RPT, EATTR_CPU, EDESC_UE_RESUMABLE, %g1, %g2, %g3) ba,a .ue_send_resume_exit /* * IMDU: ITLB Data Parity Error (precise) * Detected on instruction translation as well as with loads * to ASI_ITLB_DATA_ACCESS_REG. */ .ue_imdu_err: PRINT("IMDU DIAG\r\n") STRAND_STRUCT(%g1) add %g1, STRAND_UE_RPT, %g2 ! %g2 = strand.ue_rpt ! dump the ITLB entries into cpu.ue_rpt.diag_buf DUMP_ITLB(%g1, %g2, %g3, %g4, %g5, %g6, %g7) mov I_INVALIDATE, %g1 stxa %g0, [%g1] ASI_TLB_INVALIDATE #if 0 /* { FIXME: no longer required */ mov MAP_ITLB, %g1 HVCALL(remap_perm_addr) #endif /* } */ ! log the TLB entries on the console CONSOLE_PRINT_TLB_DATA("ITLB Tag Data\r\n", %g1, %g2, %g3, %g4, \ %g5, %g6, %g7) ! For bringup, dump out the TLB entries after demap page #ifdef NIAGARA_BRINGUP PRINT("IMDU demap\r\n") STRAND_STRUCT(%g1) add %g1, STRAND_UE_RPT, %g2 ! %g2 = strand.ue_rpt add %g2, 0x400, %g2 ! use the second 1KB area ! dump the ITLB entries into strand diag buffer area DUMP_ITLB(%g1, %g2, %g3, %g4, %g5, %g6, %g7) ! log the TLB entries on the console for bringup CONSOLE_PRINT_TLB_DATA_2("ITLB Tag Data\r\n", %g1, %g2, %g3, \ %g4, %g5, %g6, %g7) #endif ba,a .ue_send_resume_exit ! resumable error /*NOTREACHED*/ /* * IMTU: ITLB Tag Parity Error * Parity error when accessed via a load from ASI_ITLB_TAG_READ * Action: Reset the platform. */ .ue_imtu_err: PRINT("IMTU DIAG\r\n") ! Can't dump tlb since there is no safe mechanism ba,a .ue_send_rpt_and_abort ! reset /*NOTREACHED*/ /* * DMTU: DTLB Tag Parity Error * Parity error when accessed via a load from ASI_DTLB_TAG_READ * Action: reset the platform. */ .ue_dmtu_err: PRINT("DMTU DIAG\r\n") ! Can't dump tlb since there is no safe mechanism ba,a .ue_send_rpt_and_abort ! reset /*NOTREACHED*/ /* * DMDU: DTLB Data Parity Error on Load and Atomics * Parity error on atomic or load translation as well * as with loads to ASI_DTLB_DATA_ACCESS_REG. */ .ue_dmdu_err: PRINT("DMDU DIAG\r\n") STRAND_STRUCT(%g1) add %g1, STRAND_UE_RPT, %g2 ! %g2 = strand.ue_rpt ! dump the DTLB entries into the strand diag buffer DUMP_DTLB(%g1, %g2, %g3, %g4, %g5, %g6, %g7) ! log the TLB data on the console CONSOLE_PRINT_TLB_DATA("DTLB Tag Data\r\n", %g1, %g2, %g3, %g4, \ %g5, %g6, %g7) mov D_INVALIDATE, %g1 stxa %g0, [%g1] ASI_TLB_INVALIDATE #if 0 /* { FIXME: no longer required */ mov MAP_DTLB, %g1 HVCALL(remap_perm_addr) #endif /* } */ ! For bringup, dump out the TLB entries after demap page #ifdef NIAGARA_BRINGUP PRINT("after demap\r\n") STRAND_STRUCT(%g1) add %g1, STRAND_UE_RPT, %g2 ! %g2 = strand.ue_rpt add %g2, 1024, %g2 ! use next 1KB area ! dump the dtlb entries into the strand diag buffer DUMP_DTLB(%g1, %g2, %g3, %g4, %g5, %g6, %g7) ! log the tlb entries on the console for bringup CONSOLE_PRINT_TLB_DATA_2("DTLB Tag Data\r\n", %g1, %g2, %g3, \ %g4, %g5, %g6, %g7) #endif ba,a .ue_send_resume_exit ! resumable UE /*NOTREACHED*/ /* * IRU: IRF Uncorrectable ECC Error */ .ue_iru_err: STRAND_ERPT_STRUCT(STRAND_UE_RPT, %g2, %g2) ldx [%g2 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFAR], %g1 !! %g1 = sparc afar HVCALL(clear_iregerr) ! %g1 = input, %g2 = output !! %g2 = 0 if we got IRU after IRC for a persistent error bit brnz %g2, .ue_not_from_irc ! it is a new IRU nop ! Took an IRU trap from the IRC handler reread. Return to IRC handler PRINT("IRU FROM IRC DIAG\r\n") STRAND_STRUCT(%g6) SPINLOCK_RESUME_ALL_STRAND(%g6, %g1, %g2, %g3, %g4) RESTORE_UE_GLOBALS() done ! complete reread of reg .ue_not_from_irc: /* * check to see if UE occurred in hypervisor */ rdhpr %htstate, %g3 btst HTSTATE_HPRIV, %g3 bnz %xcc, .hpriv_ue ! UE in hypervisor nop HVCALL(iru_check) ! g2 = status cmp %g2, RF_TRANSIENT ! transient? bne .ue_iru_cpu ! no: unrecoverable nop ! IRU is recoverable, send a nonresumable error to the guest SET_ERPT_EDESC_EATTR(STRAND_UE_RPT, EATTR_IRF, EDESC_PRECISE_NONRESUMABLE, %g1, %g2, %g3) CLEAR_SPARC_ESR(STRAND_UE_RPT, SPARC_ESR_IRU, %g1, %g2, %g3, %g4) PRINT("IRU DIAG\r\n") .ue_eer_send_ue_rpt: ! send the sparc_err_ebl reg to the diag eng STRAND_ERPT_STRUCT(STRAND_UE_RPT, %g1, %g2) ldxa [%g0]ASI_SPARC_ERR_EN, %g3 stx %g3, [%g2 + STRAND_VBSC_ERPT + EVBSC_DIAG_BUF + DIAG_BUF_REG_INFO] ba,a .sendnr_ue_resume_exit /*NOTREACHED*/ ! IRU is unrecoverable, mark CPU in error .ue_iru_cpu: PRINT("CPU in ERROR - IRU\r\n") ! Set the CPU_ERROR status flag SET_CPU_IN_ERROR(%g1, %g2) SET_ERPT_EDESC_EATTR(STRAND_UE_RPT, EATTR_CPU, EDESC_UE_RESUMABLE, %g1, %g2, %g3) ba,a .ue_resume_exit /* * DMSU: DTLB Data Parity Error on Store * Parity error on store translation. */ .ue_dmsu_err: PRINT("DMSU DIAG\r\n") mov D_INVALIDATE, %g1 stxa %g0, [%g1] ASI_TLB_INVALIDATE STRAND_STRUCT(%g1) add %g1, STRAND_UE_RPT, %g2 ! %g2 = strand.ue_rpt ! dump the DTLB entries into the strand diag buffer DUMP_DTLB(%g1, %g2, %g3, %g4, %g5, %g6, %g7) ! log the TLB data on the console CONSOLE_PRINT_TLB_DATA("DTLB Tag Data\r\n", %g1, %g2, %g3, %g4, \ %g5, %g6, %g7) #if 0 /* { FIXME: no longer required */ mov MAP_DTLB, %g1 HVCALL(remap_perm_addr) #endif /* } */ ! For bringup we dump the TLB after the demap operation #ifdef NIAGARA_BRINGUP PRINT("DMSU demap\r\n") STRAND_STRUCT(%g1) add %g1, STRAND_UE_RPT, %g2 ! %g2 = strand.ue_rpt add %g2, 1024, %g2 ! use the next 1KB area ! dump the dtlb entries DUMP_DTLB(%g1, %g2, %g3, %g4, %g5, %g6, %g7) ! log the dtlb to the console CONSOLE_PRINT_TLB_DATA_2("DTLB Tag Data\r\n", %g1, %g2, %g3, \ %g4, %g5, %g6, %g7) #endif ba,a .ue_send_resume_exit ! resumable error /*NOTREACHED*/ /* * MEU: Multiple Uncorrectable Error bit * Sometimes only the MEU bit will be set. It is treated as * a resumable error. */ .ue_just_meu_err: PRINT("JUST MEU\r\n") ba,a .ue_send_resume_exit ! resumable UE /*NOTREACHED*/ .ue_send_rpt_and_abort: ! send UE diag report STRAND_STRUCT(%g6) add %g6, STRAND_UE_RPT + STRAND_VBSC_ERPT, %g1 ! erpt.vbsc set STRAND_UE_RPT + STRAND_UNSENT_PKT, %g2 add %g6, %g2, %g2 ! erpt.unsent flag mov EVBSC_SIZE, %g3 ! size HVCALL(send_diag_erpt) SPINLOCK_RESUME_ALL_STRAND(%g6, %g1, %g2, %g3, %g4) ! abort HV ba,pt %xcc, hvabort rd %pc, %g1 /*NOTREACHED*/ /* * NCU: IO Load/Instruction Fetch Error */ .ue_ncu_err: ! check for io_prot STRAND_STRUCT(%g1) set STRAND_IO_PROT, %g2 ldx [%g1 + %g2], %g2 ! strand.io_prot brz %g2, 1f ! if zero, no error protection nop ! under i/o error protection ! set the i/o error flag in the cpu structure and complete the ! instruction set STRAND_IO_ERROR, %g2 mov 1, %g3 stx %g3, [%g1 + %g2] ! strand.io_error = 1 ! clear JBI_ERR_LOG, JBI_ERR_OVF setx JBI_ERR_LOG, %g3, %g4 ldx [%g4], %g5 stx %g5, [%g4] ! clear JBI_ERROR_LOG setx JBI_ERR_OVF, %g3, %g4 ldx [%g4], %g5 stx %g5, [%g4] ! clear JBI_ERROR_OVF SPINLOCK_RESUME_ALL_STRAND(%g1, %g3, %g4, %g5, %g6) RESTORE_UE_GLOBALS() done ! complete the instruction ! process error 1: PRINT("NCU DIAG\r\n") rdhpr %htstate, %g1 btst HTSTATE_HPRIV, %g1 bnz %xcc, .hpriv_ue nop ! collect all diagnostic data STRAND_STRUCT(%g1) add %g1, STRAND_UE_RPT, %g2 ! %g2 = strand.ue_rpt DUMP_JBI_SSI(%g1, %g2, %g3, %g4, %g5, %g6, %g7) ! clear JBI_ERR_LOG, JBI_ERR_OVF, SSI_LOG setx JBI_ERR_LOG, %g3, %g4 ldx [%g4], %g5 brz %g5, .ue_check_ssi stx %g5, [%g4] ! clear JBI_ERROR_LOG setx JBI_ERR_OVF, %g3, %g4 ldx [%g4], %g5 stx %g5, [%g4] ! clear JBI_ERROR_OVF ba,a .ue_ncu_diag ! check SSI .ue_check_ssi: setx SSI_LOG, %g3, %g4 ldx [%g4], %g5 brz %g5, .ue_no_ncu_info stx %g5, [%g4] ba,a .ue_ncu_diag .ue_no_ncu_info: PRINT("NO ERROR LOGGED IN JBI SSI LOG\r\n") ba,a .ue_ncu_diag .ue_ncu_diag: CONSOLE_PRINT_JBI_SSI("JBI SSI Log\r\n", %g1, %g2, %g3, %g4, \ %g5, %g6, %g7) ! send UE diag report STRAND_ERPT_STRUCT(STRAND_UE_RPT, %g6, %g1) inc STRAND_VBSC_ERPT, %g1 ! erpt.vbsc set STRAND_UE_RPT + STRAND_UNSENT_PKT, %g2 add %g6, %g2, %g2 ! erpt.unsent flag mov EVBSC_SIZE, %g3 ! size HVCALL(send_diag_erpt) SET_ERPT_EDESC_EATTR(STRAND_UE_RPT, EATTR_PIO, \ EDESC_PRECISE_NONRESUMABLE, %g4, %g5, %g6) STRAND_ERPT_STRUCT(STRAND_UE_RPT, %g1, %g2) ldx [%g2 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFAR], %g3 ! VA stx %g3, [%g2 + STRAND_SUN4V_ERPT + ESUN4V_ADDR] ba,a precise_ue_err_ret ! UE error epilogue /*NOTREACHED*/ .ue_mau_err: PRINT("MAU DIAG\r\n") ba,a .sendnr_ue_resume_exit ! non-resumable UE epilogue /* * Precise UEs that are nonresumable errors get here. * Here the diagnostic erpt is sent before executing * the handler epilogue. */ .sendnr_ue_resume_exit: ! non-resumable UE epilogue ! send UE diag report STRAND_STRUCT(%g6) add %g6, STRAND_UE_RPT + STRAND_VBSC_ERPT, %g1 ! erpt.vbsc set STRAND_UE_RPT + STRAND_UNSENT_PKT, %g2 add %g6, %g2, %g2 ! erpt.unsent flag mov EVBSC_SIZE, %g3 ! size HVCALL(send_diag_erpt) ba,a precise_ue_err_ret ! UE error epilogue ! %g1 has the strand pointer, %g2 has the UE error report buffer .ue_ldau_err: .ue_dump_l2: DUMP_L2_DRAM_ERROR_LOGS(%g1, %g2, %g3, %g4, %g5, %g6, %g7) /* * check to see if UE occurred in hypervisor * We check early in order to avoid a deadlock situation. * in the previous trap, we were handling either a dis UE or a CE */ rdhpr %htstate, %g1 btst HTSTATE_HPRIV, %g1 bnz %xcc, .hpriv_ue ! UE in hypervisor nop ! check for privileged TL overflow rdpr %tl, %g1 ! get trap level cmp %g1, MAXPTL ! is it at max? bgu,pn %xcc, .tl_overflow ! TL > MAXPTL nop ! check for SPARC_ESR.LDAU ! go through each L2 bank and check for valid UE bits .ue_check_l2_b0: UE_CHECK_L2_ESR(0, %g1, %g2, %g3, %g4) ! %g1 = L2ESR bz %xcc, .ue_check_l2_b1 ! check next bank nop SAVE_L2_LINE_STATE(0, STRAND_UE_RPT, %g1, %g2) DUMP_L2_SET_TAG_DATA(0, STRAND_UE_RPT, %g1, %g2, %g1, %g2) !! %g1->strand !! %g2->erpt ldx [%g2 + STRAND_EVBSC_L2_AFSR(0)], %g4 ! l2esr CLEAR_L2_ESR(0, %g4, %g5, %g6) ! clear L2 ESR PROCESS_UE_IN_L2_ESR(0, %g1, %g2, %g3, %g4, %g5, %g6, %g7, \ .sendnr_ue_resume_exit, .ue_senddiag_resume_exit, \ .ue_resume_exit) .ue_check_l2_b1: UE_CHECK_L2_ESR(1, %g1, %g2, %g3, %g4) ! %g1 = L2ESR bz %xcc, .ue_check_l2_b2 ! check next bank nop SAVE_L2_LINE_STATE(1, STRAND_UE_RPT, %g1, %g2) DUMP_L2_SET_TAG_DATA(1, STRAND_UE_RPT, %g1, %g2, %g1, %g2) ldx [%g2 + STRAND_EVBSC_L2_AFSR(1)], %g4 ! l2esr CLEAR_L2_ESR(1, %g4, %g5, %g6) ! clear L2 ESR PROCESS_UE_IN_L2_ESR(1, %g1, %g2, %g3, %g4, %g5, %g6, %g7, \ .sendnr_ue_resume_exit, .ue_senddiag_resume_exit, \ .ue_resume_exit) .ue_check_l2_b2: UE_CHECK_L2_ESR(2, %g1, %g2, %g3, %g4) ! %g1 = L2ESR bz %xcc, .ue_check_l2_b3 ! check next bank nop SAVE_L2_LINE_STATE(2, STRAND_UE_RPT, %g1, %g2) DUMP_L2_SET_TAG_DATA(2, STRAND_UE_RPT, %g1, %g2, %g1, %g2) ldx [%g2 + STRAND_EVBSC_L2_AFSR(2)], %g4 ! l2esr CLEAR_L2_ESR(2, %g4, %g5, %g6) ! clear L2 ESR PROCESS_UE_IN_L2_ESR(2, %g1, %g2, %g3, %g4, %g5, %g6, %g7, \ .sendnr_ue_resume_exit, .ue_senddiag_resume_exit, \ .ue_resume_exit) .ue_check_l2_b3: UE_CHECK_L2_ESR(3, %g1, %g2, %g3, %g4) ! %g1 = L2ESR bz %xcc, .ue_no_error ! XXX spurious? nop SAVE_L2_LINE_STATE(3, STRAND_UE_RPT, %g1, %g2) DUMP_L2_SET_TAG_DATA(3, STRAND_UE_RPT, %g1, %g2, %g1, %g2) ldx [%g2 + STRAND_EVBSC_L2_AFSR(3)], %g4 ! l2esr CLEAR_L2_ESR(3, %g4, %g5, %g6) ! clear L2 ESR PROCESS_UE_IN_L2_ESR(3, %g1, %g2, %g3, %g4, %g5, %g6, %g7, \ .sendnr_ue_resume_exit, .ue_senddiag_resume_exit, \ .ue_resume_exit) ! ! All banks checked, now return ! PRINT("NOTREACHED!\r\n") ba,a .ue_resume_exit .ue_no_error: PRINT("NO_UE_ERROR\r\n") ! some other thread beat us to it. ! no bits in L2, simply return (XXX send a service error report?) .ue_send_resume_exit: /* * Precise UEs that are resumable errors get here. * Here the diagnostic erpt is sent before executing * the instruction retry. */ ! send UE diag report STRAND_STRUCT(%g6) add %g6, STRAND_UE_RPT + STRAND_VBSC_ERPT, %g1 ! erpt.vbsc set STRAND_UE_RPT + STRAND_UNSENT_PKT, %g2 add %g6, %g2, %g2 ! erpt.unsent flag mov EVBSC_SIZE, %g3 ! size HVCALL(send_diag_erpt) ba,a .ue_resume_exit ! resumable UE epilogue .tl_overflow: PRINT("TL OVERFLOW\r\n") ! send UE diag report STRAND_STRUCT(%g6) add %g6, STRAND_UE_RPT + STRAND_VBSC_ERPT, %g1 ! erpt.vbsc set STRAND_UE_RPT + STRAND_UNSENT_PKT, %g2 add %g6, %g2, %g2 ! erpt.unsent flag mov EVBSC_SIZE, %g3 ! size HVCALL(send_diag_erpt) RESTORE_UE_GLOBALS() ba,a watchdog_guest .hpriv_ue: ! send UE diag report STRAND_STRUCT(%g6) add %g6, STRAND_UE_RPT + STRAND_VBSC_ERPT, %g1 ! erpt.vbsc set STRAND_UE_RPT + STRAND_UNSENT_PKT, %g2 add %g6, %g2, %g2 ! erpt.unsent flag mov EVBSC_SIZE, %g3 ! size HVCALL(send_diag_erpt) HV_PRINT_SPINLOCK_ENTER(%g1, %g2, %g3) HV_PRINT_NOTRAP("UE in hypervisor - reset the system\r\n") rdpr %tl, %g2 HV_PRINT_NOTRAP("TPC: 0x") rdpr %tpc, %g1 HV_PRINTX_NOTRAP(%g1) HV_PRINT_NOTRAP("\r\n") HV_PRINT_NOTRAP("TT: 0x") rdpr %tt, %g1 HV_PRINTX_NOTRAP(%g1) HV_PRINT_NOTRAP("\r\n") HV_PRINT_NOTRAP("TSTATE: 0x") rdpr %tstate, %g1 HV_PRINTX_NOTRAP(%g1) HV_PRINT_NOTRAP("\r\n") HV_PRINT_SPINLOCK_EXIT(%g1) STRAND_STRUCT(%g6) SPINLOCK_RESUME_ALL_STRAND(%g6, %g1, %g2, %g3, %g4) LEGION_EXIT(3) ! abort HV ba,pt %xcc, hvabort rd %pc, %g1 .err_resume_bad_guest_err_q: SET_CPU_IN_ERROR(%g1, %g2) SET_ERPT_EDESC_EATTR(STRAND_UE_RPT, EATTR_CPU, EDESC_UE_RESUMABLE, %g1, %g2, %g3) ba,a .ue_send_resume_exit ! resumable UE .fatal_reset_dbu: /* this is where we take the system down! */ ! don't care how we got here, stop everything now PRINT("Reset the System: sir 0 %o0=1 fatal error\r\n") 1: PRINT("TT 0x") rdpr %tt, %g1 PRINTX(%g1) PRINT(" TL 0x") rdpr %tl, %g2 PRINTX(%g2) PRINT(" TPC 0x") rdpr %tpc, %g1 PRINTX(%g1) PRINT(" TNPC 0x") rdpr %tnpc, %g1 PRINTX(%g1) PRINT(" TSTATE 0x") rdpr %tstate, %g1 PRINTX(%g1) PRINT("\r\n") sub %g2, 1, %g2 brnz %g2, 1b wrpr %g2, %tl mov SIR_TYPE_FATAL_DBU, %o0 sir 0 /* * Disrupting UE error handler epilogue * The disrupting UE error handlers return here after handling * the error * NCEEN was not disabled, so disrupting UE handler did not * mask any UEs. But we could have hit some CEs or other * disrupting UEs whose trap will be taken when we return. * Here we queue up the resumable error report to the guest. * * Disrupting UEs use the CE error buffer */ .dis_ue_err_ret: PRINT("DIS UE_ERR_RET\r\n") /* send diag report to vbsc */ STRAND_STRUCT(%g6) add %g6, STRAND_CE_RPT + STRAND_VBSC_ERPT, %g1 add %g6, STRAND_CE_RPT + STRAND_UNSENT_PKT, %g2 ! erpt.unsent flag mov EVBSC_SIZE, %g3 HVCALL(send_diag_erpt) .dis_ue_err_rerouting: /* * Check if this error needs to be re-routed * Find which L2 ESR is set and check whether the * error requires re-routing. If the ESR is non-zero * but not re-routing, continue as normal. */ setx L2_ESR_REROUTED_BITS, %g5, %g4 STRAND_STRUCT(%g6) add %g6, STRAND_CE_RPT, %g6 ldx [%g6 + STRAND_EVBSC_L2_AFSR(0)], %g5 btst %g5, %g4 bnz,pt %xcc, .dis_ue_err_ret_rerouting mov 0, %g1 ! bank number brnz,pt %g5, .dis_ue_err_ret_no_rerouting nop ldx [%g6 + STRAND_EVBSC_L2_AFSR(1)], %g5 btst %g5, %g4 bnz,pt %xcc, .dis_ue_err_ret_rerouting mov 1, %g1 ! bank number brnz,pt %g5, .dis_ue_err_ret_no_rerouting nop ldx [%g6 + STRAND_EVBSC_L2_AFSR(2)], %g5 btst %g5, %g4 bnz,pt %xcc, .dis_ue_err_ret_rerouting mov 2, %g1 ! bank number brnz,pt %g5, .dis_ue_err_ret_no_rerouting nop ldx [%g6 + STRAND_EVBSC_L2_AFSR(3)], %g5 btst %g5, %g4 bnz,pn %xcc, .dis_ue_err_ret_rerouting mov 3, %g1 ! bank number nop ba .dis_ue_err_ret_no_rerouting nop /* * re-route an error report * 1. Get the PA of the error from the diag report * 2. determine whch guest this PA belongs to */ .dis_ue_err_ret_rerouting: ! %g1 bank number ! %g5 L2 ESR ! %g6 strand->ce_rprt /* * Need to get the PA from either the DRAM or L2 EAR */ setx (L2_ESR_DAU | L2_ESR_DSU), %g3, %g2 btst %g5, %g2 be,pt %xcc, .dis_ue_err_ret_rerouting_l2 nop ! DRAM error ! %g1 bank number mulx %g1, EVBSC_DRAM_AFAR_INCR, %g1 add %g1, EVBSC_DRAM_AFAR, %g1 ldx [%g6 + %g1], %g4 ! PA ba .dis_ue_err_ret_rerouting_find_guest nop .dis_ue_err_ret_rerouting_l2: ! %g1 bank number mulx %g1, EVBSC_L2_AFAR_INCR, %g1 add %g1, EVBSC_L2_AFAR, %g1 ldx [%g6 + %g1], %g4 ! PA .dis_ue_err_ret_rerouting_find_guest: /* * Find the guest which owns this PA. * For each guest loop through the ra2pa_segment array and check the * PA against the base/limit * %g4 PA */ ROOT_STRUCT(%g2) ldx [%g2 + CONFIG_GUESTS], %g2 ! &guests[0] set NGUESTS - 1, %g3 ! %g3 guest loop counter 1: ! PA2RA_CONV(guestp, paddr, raddr, scr1, scr2) PA2RA_CONV(%g2, %g4, %g6, %g1, %g5) ! we got a valid RA (%g6), so this is the guest for this PA brz,pt %g5, 4f nop 2: set GUEST_SIZE, %g5 add %g2, %g5, %g2 ! guest++ brnz,pt %g3, 1b dec %g3 ! nguests-- ! no guest found for this PA ba .dis_ue_err_ret_no_rerouting nop 4: ! %g2 &guest ! %g4 PA ! is it for the guest we are running on ? GUEST_STRUCT(%g1) cmp %g1, %g2 be .dis_ue_err_ret_no_rerouting nop ! go and finish re-routing this error ba cpu_reroute_error nop /* * send resumable error report on this CPU */ .dis_ue_err_ret_no_rerouting: ASMCALL_RQ_ERPT(STRAND_CE_RPT, %g1, %g2, %g3, %g4, %g5, %g6, %g7) ba,a .dis_ue_resume_exit #if 1 /* XXXX DEAD CODE */ /* * Precise UE but ressumable error handler epilogue * The precise UE error handlers return here after handling the error * A resumable error will be queued to the affected guest. */ ENTRY_NP(precise_ue_res_ret) PRINT("RES UE_ERR_RET\r\n") ! Call the function to queue the resumable report ASMCALL_RQ_ERPT(STRAND_UE_RPT, %g1, %g2, %g3, %g4, %g5, %g6, %g7) ba,a .ue_resume_exit #endif .ue_senddiag_resume_exit: ! send UE diag report STRAND_STRUCT(%g6) add %g6, STRAND_UE_RPT + STRAND_VBSC_ERPT, %g1 ! erpt.vbsc set STRAND_UE_RPT + STRAND_UNSENT_PKT, %g2 add %g6, %g2, %g2 ! erpt.unsent flag mov EVBSC_SIZE, %g3 ! size HVCALL(send_diag_erpt) .dis_ue_resume_exit: .ue_resume_exit: ! See if CPU is in ERROR and handle the case VCPU_STRUCT(%g1) IS_CPU_IN_ERROR(%g1, %g2) bne %xcc, .ue_continue nop ! Mark the corresponding strand in error HVCALL(strand_in_error) .ue_continue: STRAND_STRUCT(%g6) /* * Check whether the UE error handler idled the * strands */ lduw [%g6 + STRAND_ERR_FLAG], %g2 btst ERR_FLAG_STRANDS_NOT_IDLED, %g2 bnz %xcc, .ue_continue_not_idled ! strands were not idled bclr ERR_FLAG_STRANDS_NOT_IDLED, %g2 ! reset STRANDS_IDLED SPINLOCK_RESUME_ALL_STRAND(%g6, %g1, %g2, %g3, %g4) ba .ue_continue_idled ! flag is not set, nop ! so skip clearing it .ue_continue_not_idled: stw %g2, [%g6 + STRAND_ERR_FLAG] ! .. .ue_continue_idled: ldx [%g6 + STRAND_ERR_RET], %g7 ! get return address brnz,a %g7, .ue_return ! valid: clear it & return stx %g0, [%g6+ STRAND_ERR_RET] ! .. ! NULL: return from interrupt RESTORE_UE_GLOBALS() retry ! return from UE interrupt .ue_return: HVRET SET_SIZE(ue_poll_entry) SET_SIZE(ue_err) /* * Precise UE error handler epilogue * The precise UE error handlers return here after handling the error * A nonresumable error will be queued to the affected guest. */ ENTRY_NP(precise_ue_err_ret) PRINT("precise_ue_err_ret\r\n") ! queue nonresumable error report STRAND_ERPT_STRUCT(STRAND_UE_RPT, %g1, %g2) /* * Translate error address * * When EATTR_PIO, the error PA is in the RA field of the erpt. * For others, check the four L2 AFARs to find a non-zero * address. */ lduw [%g2 + STRAND_SUN4V_ERPT + ESUN4V_ATTR], %g4 btst EATTR_PIO, %g4 bz,pt %xcc, .precise_ue_err_ret_mem nop .precise_ue_err_ret_io: /* No affected memory region */ stw %g0, [%g2 + STRAND_SUN4V_ERPT + ESUN4V_SZ] ldx [%g2 + STRAND_SUN4V_ERPT + ESUN4V_ADDR], %g4 VCPU_STRUCT(%g1) CPU_ERR_IO_PA_TO_RA(%g1, %g4, %g4, %g3, %g5, %g6, .precise_ue_err_ret_io) ba,pt %xcc, 2f nop .precise_ue_err_ret_mem: mov ERPT_MEM_SIZE, %g4 stw %g4, [%g2 + STRAND_SUN4V_ERPT + ESUN4V_SZ] ldx [%g2 + STRAND_EVBSC_L2_AFAR(0)], %g4 brnz %g4, 1f nop ldx [%g2 + STRAND_EVBSC_L2_AFAR(1)], %g4 brnz %g4, 1f nop ldx [%g2 + STRAND_EVBSC_L2_AFAR(2)], %g4 brnz %g4, 1f nop ldx [%g2 + STRAND_EVBSC_L2_AFAR(3)], %g4 brnz %g4, 1f nop ba,pt %xcc, 2f mov CPU_ERR_INVALID_RA, %g4 1: VCPU_STRUCT(%g1) /* FIXME: or strand? */ CPU_ERR_PA_TO_RA(%g1, %g4, %g4, %g5, %g6) 2: stx %g4, [%g2 + STRAND_SUN4V_ERPT + ESUN4V_ADDR] !! %g1 = cpup !! %g2 = erpt HVCALL(queue_nonresumable_erpt) STRAND_STRUCT(%g1) SPINLOCK_RESUME_ALL_STRAND(%g1, %g3, %g4, %g5, %g6) ba,pt %xcc, nonresumable_error_trap nop /*NOTREACHED*/ SET_SIZE(precise_ue_err_ret) #if STRAND_SUN4V_ERPT != 0 #error "STRAND_SUN4V_ERPT must be 0" #endif /* * Queue a resumable error report on this CPU * %g1 contains pointer to the STRAND structure * %g2 contains pointer to the error report * (STRAND_SUN4V_ERPT *must* be 0x0 for this to be called generically) * * XXX If there is no free entry in the resumable error queue * print a message and return. XXX */ ENTRY_NP(queue_resumable_erpt) VCPU_STRUCT(%g1) ldx [%g1 + CPU_ERRQR_BASE_RA], %g3 ! get q base RA brnz %g3, 1f ! if base RA is zero, skip nop mov %g7, %g6 PRINT("RQ NOT ALLOC\r\n") mov %g6, %g7 ! The resumable error queue is not allocated/initialized ! simply return. No guest is there to receive it. jmp %g7 + 4 nop 1: /* * Translate error address * * When EATTR_PIO, the error PA is in the RA field of the erpt. * For others, check the four L2 AFARs to find a non-zero * address. */ lduw [%g2 + STRAND_SUN4V_ERPT + ESUN4V_ATTR], %g4 btst EATTR_PIO, %g4 bz,pt %xcc, .dis_ue_err_ret_mem nop .dis_ue_err_ret_io: /* No affected memory region */ stw %g0, [%g2 + STRAND_SUN4V_ERPT + ESUN4V_SZ] ldx [%g2 + STRAND_SUN4V_ERPT + ESUN4V_ADDR], %g4 CPU_ERR_IO_PA_TO_RA(%g1, %g4, %g4, %g3, %g5, %g6, .dis_ue_err_ret_io) ba,pt %xcc, 2f nop .dis_ue_err_ret_mem: mov ERPT_MEM_SIZE, %g4 stw %g4, [%g2 + STRAND_SUN4V_ERPT + ESUN4V_SZ] ldx [%g2 + STRAND_EVBSC_L2_AFAR(0)], %g4 brnz %g4, 1f nop ldx [%g2 + STRAND_EVBSC_L2_AFAR(1)], %g4 brnz %g4, 1f nop ldx [%g2 + STRAND_EVBSC_L2_AFAR(2)], %g4 brnz %g4, 1f nop ldx [%g2 + STRAND_EVBSC_L2_AFAR(3)], %g4 brnz %g4, 1f nop ba,pt %xcc, 2f mov CPU_ERR_INVALID_RA, %g4 1: VCPU_STRUCT(%g1) /* FIXME: or strand? */ CPU_ERR_PA_TO_RA(%g1, %g4, %g4, %g5, %g6) 2: stx %g4, [%g2 + STRAND_SUN4V_ERPT + ESUN4V_ADDR] /* * If this is a MEM error report, ensure that it has a valid * RA for this guest */ ld [%g2 + STRAND_SUN4V_ERPT + ESUN4V_ATTR], %g4 ! attr btst EATTR_MEM, %g4 bz,pt %xcc, 1f nop ldx [%g2 + STRAND_SUN4V_ERPT + ESUN4V_ADDR], %g4 ! ra cmp %g4, CPU_ERR_INVALID_RA bne,pt %xcc, 1f nop ! not for this guest, return jmp %g7 + 4 nop 1: mov ERROR_RESUMABLE_QUEUE_TAIL, %g3 ldxa [%g3]ASI_QUEUE, %g5 ! %g5 = rq_tail add %g5, 0x40, %g6 ! %g6 = rq_next = rq_tail++ ldx [%g1 + CPU_ERRQR_MASK], %g4 and %g6, %g4, %g6 ! %g6 = rq_next mod mov ERROR_RESUMABLE_QUEUE_HEAD, %g3 ldxa [%g3] ASI_QUEUE, %g4 ! %g4 = rq_head cmp %g6, %g4 ! head = ++tail? be %xcc, .rq_full mov ERROR_RESUMABLE_QUEUE_TAIL, %g3 stxa %g6, [%g3] ASI_QUEUE ! new tail = rq_next ! write up the queue record ldx [%g1 + CPU_ERRQR_BASE], %g4 add %g5, %g4, %g3 ! %g3 = base + tail ldx [%g2 + STRAND_SUN4V_ERPT + ESUN4V_G_EHDL], %g4 ! ehdl stx %g4, [%g3 + 0x0] ldx [%g2 + STRAND_SUN4V_ERPT + ESUN4V_G_STICK], %g4 ! stick stx %g4, [%g3 + 0x8] ld [%g2 + STRAND_SUN4V_ERPT + ESUN4V_EDESC], %g4 ! edesc st %g4, [%g3 + 0x10] ld [%g2 + STRAND_SUN4V_ERPT + ESUN4V_ATTR], %g4 ! attr st %g4, [%g3 + 0x14] ldx [%g2 + STRAND_SUN4V_ERPT + ESUN4V_ADDR], %g4 ! ra stx %g4, [%g3 + 0x18] ld [%g2 + STRAND_SUN4V_ERPT + ESUN4V_SZ], %g4 ! sz st %g4, [%g3 + 0x20] lduh [%g2 + STRAND_SUN4V_ERPT + ESUN4V_G_CPUID], %g4 ! cpuid stuh %g4, [%g3 + 0x24] lduh [%g2 + STRAND_SUN4V_ERPT + ESUN4V_G_SECS], %g4 stuh %g4, [%g3 + 0x26] ! pad/secs stx %g0, [%g3 + 0x28] ! word5 stx %g0, [%g3 + 0x30] ! word6 stx %g0, [%g3 + 0x38] ! word7 jmp %g7 + 4 nop .rq_full: ! The resumable error queue is full. ! simply return mov %g7, %g6 PRINT("RQ FULL\r\n") mov %g6, %g7 jmp %g7 + 4 nop SET_SIZE(queue_resumable_erpt) /* * Queue a nonresumable error report on this CPU * %g2 contains pointer to the error report * %g1, %g3 - %g6 clobbered * %g7 return address * * Check to see what is the guest state: * switch(guestp->state) { * case GUEST_STATE_SUSPENDED: * case GUEST_STATE_NORMAL: * ! calculate new head * oldtail = [ERROR_NONRESUMABLE_QUEUE_TAIL]ASI_QUEUE * qnr_mask =vpup->errqnr_mask; * newtail = (oldtail + qsize) & mask; * head = [ERROR_NONRESUMABLE_QUEUE_HEAD]ASI_QUEUE * if (vcpup->cpu_errqnr_base_ra == 0 || (head == newhead)) { * sir_guest() * } else { * deliver_pkt(pkt); * } * break; * case GUEST_STATE_EXITING: * case GUEST_STATE_STOPPED: * case GUEST_STATE_UNCONFIGURED: * drop_pkt(); * * break; * } * * This routine just moves the erpt to the queue, it does not * modify the data. */ ENTRY_NP(queue_nonresumable_erpt) VCPU_STRUCT(%g1) ! Get the guest structure this vcpu belongs VCPU2GUEST_STRUCT(%g1, %g5) ! Determine the guest state lduw [%g5 + GUEST_STATE], %g4 set GUEST_STATE_SUSPENDED, %g3 cmp %g4, %g3 be,pn %xcc, .check_vcpu_queues set GUEST_STATE_NORMAL, %g3 cmp %g4, %g3 be,pn %xcc, .check_vcpu_queues set GUEST_STATE_EXITING, %g3 cmp %g4, %g3 be,pn %xcc, .drop_nrq_pkt set GUEST_STATE_STOPPED, %g3 cmp %g4, %g3 be,pn %xcc, .drop_nrq_pkt set GUEST_STATE_UNCONFIGURED, %g3 cmp %g4, %g3 be,pn %xcc, .drop_nrq_pkt nop .check_vcpu_queues: ! %g1 vcpup ldx [%g1 + CPU_ERRQNR_BASE_RA], %g3 ! get q base RA brz,pn %g3, .queue_nonresumable_bad_queue nop mov ERROR_NONRESUMABLE_QUEUE_TAIL, %g3 ldxa [%g3]ASI_QUEUE, %g5 ! %g5 = rq_tail add %g5, 0x40, %g6 ! %g6 = rq_next = rq_tail++ ldx [%g1 + CPU_ERRQNR_MASK], %g4 and %g6, %g4, %g6 ! %g6 = rq_next mod mov ERROR_NONRESUMABLE_QUEUE_HEAD, %g3 ldxa [%g3] ASI_QUEUE, %g4 ! %g4 = rq_head cmp %g6, %g4 ! head = ++tail? be,pn %xcc, .queue_nonresumable_full_queue mov ERROR_NONRESUMABLE_QUEUE_TAIL, %g3 /* * Deliver NR error pkt to guest */ stxa %g6, [%g3]ASI_QUEUE ! new tail = rq_next ! write the queue record ldx [%g1 + CPU_ERRQNR_BASE], %g4 add %g5, %g4, %g3 ! %g3 = base + tail ldx [%g2 + STRAND_SUN4V_ERPT + ESUN4V_G_EHDL], %g4 ! ehdl stx %g4, [%g3 + 0x0] ldx [%g2 + STRAND_SUN4V_ERPT + ESUN4V_G_STICK], %g4 ! stick stx %g4, [%g3 + 0x8] ld [%g2 + STRAND_SUN4V_ERPT + ESUN4V_EDESC], %g4 ! edesc st %g4, [%g3 + 0x10] ld [%g2 + STRAND_SUN4V_ERPT + ESUN4V_ATTR], %g4 ! attr st %g4, [%g3 + 0x14] ldx [%g2 + STRAND_SUN4V_ERPT + ESUN4V_ADDR], %g4 ! ra stx %g4, [%g3 + 0x18] ld [%g2 + STRAND_SUN4V_ERPT + ESUN4V_SZ], %g4 ! sz st %g4, [%g3 + 0x20] lduh [%g2 + STRAND_SUN4V_ERPT + ESUN4V_G_CPUID], %g4 ! cpuid stuh %g4, [%g3 + 0x24] stuh %g0, [%g3 + 0x26] ! pad stx %g0, [%g3 + 0x28] ! word5 stx %g0, [%g3 + 0x30] ! word6 stx %g0, [%g3 + 0x38] ! word7 mov %g7, %g6 PRINT("queue_nonresumable_erpt: entry enqueued\r\n") mov %g6, %g7 HVRET .drop_nrq_pkt: /* * The guest is not in the proper state to receive pkts * Drop packet by just returning */ #ifdef DEBUG mov %g7, %g6 PRINT("no guest to deliver NR error pkt. Dropping it\r\n") mov %g6, %g7 #endif HVRET .queue_nonresumable_full_queue: /* * The nonresumable error queue is full. * Reset the guest */ #ifdef DEBUG mov %g7, %g6 PRINT("queue_nonresumable_erpt: nrq full - exiting guest\r\n") mov %g6, %g7 #endif ba,a .queue_nonresumable_reset .queue_nonresumable_bad_queue: /* * The nonresumable error queue is not allocated/initialized * Reset the guest */ #ifdef DEBUG mov %g7, %g6 PRINT("NRQ NOT ALLOC - exiting guest\r\n") mov %g6, %g7 #endif /* fall through */ .queue_nonresumable_reset: #ifdef NIAGARA_BRINGUP rdpr %tl, %g2 deccc %g2 bz %xcc, 1f nop wrpr %g2, %tl PRINT("TPC \r\n") rdpr %tpc, %g1 PRINTX(%g1) PRINT("\r\n") PRINT("TT \r\n") rdpr %tt, %g1 PRINTX(%g1) PRINT("\r\n") PRINT("TSTATE \r\n") rdpr %tstate, %g1 PRINTX(%g1) PRINT("\r\n") 1: #endif ba,a .err_resume_bad_guest_err_q SET_SIZE(queue_nonresumable_erpt) /* * JBUS error */ ENTRY(ue_jbus_err) STRAND_ERPT_STRUCT(STRAND_UE_RPT, %g6, %g5) ! g6->strand, g5->strand.ue_rpt SPINLOCK_IDLE_ALL_STRAND(%g6, %g1, %g2, %g3, %g4) ! At this point, this is the only strand executing /* * Generate a basic error report */ LOAD_BASIC_ERPT(%g6, %g5, %g1, %g2) /* * Clear unused diag buf fields */ stx %g0, [%g5 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFSR] stx %g0, [%g5 + STRAND_VBSC_ERPT + STRAND_EVBSC_L2_AFSR(0)] stx %g0, [%g5 + STRAND_VBSC_ERPT + STRAND_EVBSC_L2_AFSR(1)] stx %g0, [%g5 + STRAND_VBSC_ERPT + STRAND_EVBSC_L2_AFSR(2)] stx %g0, [%g5 + STRAND_VBSC_ERPT + STRAND_EVBSC_L2_AFSR(3)] stx %g0, [%g5 + STRAND_VBSC_ERPT + STRAND_EVBSC_DRAM_AFSR(0)] stx %g0, [%g5 + STRAND_VBSC_ERPT + STRAND_EVBSC_DRAM_AFSR(1)] stx %g0, [%g5 + STRAND_VBSC_ERPT + STRAND_EVBSC_DRAM_AFSR(2)] stx %g0, [%g5 + STRAND_VBSC_ERPT + STRAND_EVBSC_DRAM_AFSR(3)] /* * Store JBUS error data in error report */ DUMP_JBI_SSI(%g6, %g5, %g3, %g4, %g1, %g2, %g7) /* * Clear the JBI errors logged in the erpt */ STRAND_ERPT_STRUCT(STRAND_UE_RPT, %g6, %g5) ! g6->strand, g5->strand.ue_rpt ldx [%g5 + STRAND_VBSC_ERPT + EVBSC_JBI_ERR_LOG], %g1 setx JBI_ERR_LOG, %g3, %g2 stx %g1, [%g2] ldx [%g5 + STRAND_VBSC_ERPT + EVBSC_DIAG_BUF + JS_JBI_ERR_OVF], %g4 setx JBI_ERR_OVF, %g3, %g2 stx %g4, [%g2] or %g1, %g4, %g1 ! combine primary and overflow for fatal check CPU_PUSH(%g1, %g2, %g3, %g4) /* save JBI_ERR_LOG|JVI_ERR_OVF */ /* * send UE diag report */ add %g6, STRAND_UE_RPT + STRAND_VBSC_ERPT, %g1 ! erpt.vbsc set STRAND_UE_RPT + STRAND_UNSENT_PKT, %g2 add %g6, %g2, %g2 ! erpt.unsent flag mov EVBSC_SIZE, %g3 ! size HVCALL(send_diag_erpt) STRAND_STRUCT(%g6) SPINLOCK_RESUME_ALL_STRAND(%g6, %g1, %g2, %g3, %g4) /* * Clear interrupt */ setx IOBBASE, %g3, %g2 stx %g0, [%g2 + INT_CTL + INT_CTL_DEV_OFF(IOBDEV_SSIERR)] /* * Get saved JBI error log register and check for fatal errors */ CPU_POP(%g1, %g2, %g3, %g4) btst JBI_ABORT_ERRS, %g1 bnz,pn %xcc, .ue_jbus_err_fatal nop /* * Not a fatal JBI error, we sent the info to vbsc so just * return to whatever this strand was doing. */ retry .ue_jbus_err_fatal: LEGION_EXIT(3) ! abort HV ba,pt %xcc, hvabort rd %pc, %g1 SET_SIZE(ue_jbus_err) /* * irc_check(uint64_t sparc_ear) [Non-LEAF] * * Checks whether the IRC error is transient or persistent. * Before we re-read the register in error, we set the irc_ear * in the CPU struct to the SPARC EAR value, which has the reg# * and the syndrome. A zero syndrome is not possible for error, * therefore irc_ear == 0 means IRC trap didn't set it. * (Note %g0 like other registers can generate errors.) * If the IRU trap is taken because of a persistent uncorrectable error, * the IRU trap handler will check the irc_ear field with the SPARC_EAR * logged. If they are the same, then IRU trap handler clears the * irc_ear field and returns. * * set_ircear(sparc_ear) * irf_reread(sparc_ear) * if (CPU.irc_ear == 0) * return RF_PERSISTENT; * else { * CPU.irc_ear = 0; * return RF_TRANSIENT; * } * Arguments: * %g1 - input - SPARC EAR - clobbered * %g2 - output (RF_TRANSIENT, RF_PERSISTENT) * %g3 - scratch * %g4 - scratch * %g5 - erpt * %g6 - strand * %g7 - return address */ ENTRY_NP(irc_check) ! init STRAND.irc_ear stx %g1, [%g6 + STRAND_REGERR] ! CPU.irc_ear = sparc EAR (!=0) ! reread register mov %g7, %g6 ! save return address HVCALL(irf_reread) ! %g1 has SPARC EAR mov %g6, %g7 ! restore return address STRAND_STRUCT(%g6) ! restore g6->strand ! check STRAND.irc_ear ldx [%g6 + STRAND_REGERR], %g2 ! read STRAND.irc_ear brz %g2, .irc_ret ! persistent error mov RF_PERSISTENT, %g2 ! transient error. H/W has fixed it now after the reread ! get back to interrupted program stx %g0, [%g6 + STRAND_REGERR] ! clear irc_ear mov RF_TRANSIENT, %g2 ! return transient .irc_ret: HVRET SET_SIZE(irc_check) /* * int iru_check(uint64_t sparc_ear) [Non-Leaf] * * Check whether the IRU error is transient, persistent * or if the integer register file is flaky. * * clear_irf_ue(sparc_ear); * irf_reread(sparc_ear); * if (SPARC_ESR.IRU == 0) { * return RF_TRANSIENT; * } * if (SPARC_EAR == sparc_ear) * return RF_PERSISTENT; * } else { * return RF_FAILURE; * } * Arguments: * %g1 - input - SPARC EAR - clobbered * %g2 - output (RF_TRANSIENT, RF_PERSISTENT, RF_FAILURE) * %g3 - scratch * %g4 - scratch * %g5 - erpt pointer * %g6 - strand pointer */ ENTRY_NP(iru_check) mov %g7, %g6 ! save return address HVCALL(clear_irf_ue) ! %g1 has SPARC EAR ! reread register HVCALL(irf_reread) ! %g1 has SPARC EAR mov %g6, %g7 ! restore return address STRAND_STRUCT(%g6) ! restore strand ! check SPARC ESR for IRU error ldxa [%g0]ASI_SPARC_ERR_STATUS, %g4 ! get SPARC ESR set SPARC_ESR_IRU, %g3 ! IRU bit btst %g3, %g4 ! check for IRU bz %xcc, .iru_ret ! no: mov RF_TRANSIENT, %g2 ! return transient ! persistent IRU error? ! check EAR for match ldxa [%g0]ASI_SPARC_ERR_ADDR, %g2 ! get SPARC EAR ldx [%g5 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFAR], %g1 ! saved EAR xor %g2, %g1, %g2 ! Are they the same? andcc %g2, SPARC_EAR_IREG_MASK, %g2 ! (ignore non-register bits) bnz %xcc, .iru_ret ! no: mov RF_FAILURE, %g2 ! return reg file failure stxa %g4, [%g0]ASI_SPARC_ERR_STATUS ! yes: clear SPARC ESR mov RF_PERSISTENT, %g2 ! return persistent error .iru_ret: HVRET ! return to caller SET_SIZE(iru_check) /* * void irf_reread(uint64_t sparc_ear) [LEAF function] * * Caller: IRC or IRU handler * * Re-read integer register in error * Arguments: * %g1 - input - SPARC_EAR * %g2 - %g4 - scratch * %g5, %g6 - preserved * %g7 - return address */ ENTRY_NP(irf_reread) and %g1, SPARC_EAR_IREG_MASK, %g2 srlx %g2, SPARC_EAR_IREG_SHIFT, %g2 ! %g2 has int reg num ! %g2 has the int reg# in error. ! Current window is pointing to the window of the reg in error ! get the register number within the set and %g2, 0x1f, %g2 ! mask off GL/CWP cmp %g2, 8 ! is reg# < 8? bl .glob ! yes, then global reg nop ! Now re-read the register in error ba 1f ! do reread rd %pc, %g3 ! get reread instr base addr ! an array of instruction blocks indexed by register number to ! reread the non-global register reported in error. or %g0, %o0, %o0 ! reread %o0 ba,a .reread_done or %g0, %o1, %o1 ! reread %o1 ba,a .reread_done or %g0, %o2, %o2 ! reread %o2 ba,a .reread_done or %g0, %o3, %o3 ! reread %o3 ba,a .reread_done or %g0, %o4, %o4 ! reread %o4 ba,a .reread_done or %g0, %o5, %o5 ! reread %o5 ba,a .reread_done or %g0, %o6, %o6 ! reread %o6 ba,a .reread_done or %g0, %o7, %o7 ! reread %o7 ba,a .reread_done or %g0, %l0, %l0 ! reread %l0 ba,a .reread_done or %g0, %l1, %l1 ! reread %l1 ba,a .reread_done or %g0, %l2, %l2 ! reread %l2 ba,a .reread_done or %g0, %l3, %l3 ! reread %l3 ba,a .reread_done or %g0, %l4, %l4 ! reread %l4 ba,a .reread_done or %g0, %l5, %l5 ! reread %l5 ba,a .reread_done or %g0, %l6, %l6 ! reread %l6 ba,a .reread_done or %g0, %l7, %l7 ! reread %l7 ba,a .reread_done or %g0, %i0, %i0 ! reread %i0 ba,a .reread_done or %g0, %i1, %i1 ! reread %i1 ba,a .reread_done or %g0, %i2, %i2 ! reread %i2 ba,a .reread_done or %g0, %i3, %i3 ! reread %i3 ba,a .reread_done or %g0, %i4, %i4 ! reread %i4 ba,a .reread_done or %g0, %i5, %i5 ! reread %i5 ba,a .reread_done or %g0, %i6, %i6 ! reread %i6 ba,a .reread_done or %g0, %i7, %i7 ! reread %i7 ba,a .reread_done 1: sub %g2, 8, %g2 ! skip globals sllx %g2, 3, %g2 ! offset = reg# * 8 add %g3, %g2, %g3 ! %g3 = instruction block addr ldxa [%g0]ASI_SPARC_ERR_EN, %g2 ! save current in %g2 andn %g2, CEEN, %g4 ! disable CEEN stxa %g4, [%g0] ASI_SPARC_ERR_EN ! .. jmp %g3 + SZ_INSTR ! jmp to reread register nop ! restore gl from value in %o0, and restore %o0 .gl_reread_done: wrpr %o0, %gl ! restore %gl mov %g4, %o0 ! restore %o0 ! Here, we check the iregerr field after the reread. If it ! is zero, then we know it is a persistent uncorrectable error. ! If it is nonzero, then we know it is a transient error. .reread_done: stxa %g2, [%g0] ASI_SPARC_ERR_EN ! restore CEEN HVRET ! return to caller ! %g2 has the register number .glob: ! now re-read the global register in error ba 1f rd %pc, %g3 ! reread instruction base addr ! an array of instructions blocks indexed by global register number ! to reread the global register reported in error. ! %gl points to the error global set or %g0, %g0, %g0 ! reread %g0 (yay!) ba,a .gl_reread_done or %g0, %g1, %g1 ! reread %g1 ba,a .gl_reread_done or %g0, %g2, %g2 ! reread %g2 ba,a .gl_reread_done or %g0, %g3, %g3 ! reread %g3 ba,a .gl_reread_done or %g0, %g4, %g4 ! reread %g4 ba,a .gl_reread_done or %g0, %g5, %g5 ! reread %g5 ba,a .gl_reread_done or %g0, %g6, %g6 ! reread %g6 ba,a .gl_reread_done or %g0, %g7, %g7 ! reread %g7 ba,a .gl_reread_done 1: sllx %g2, 3, %g2 ! offset (2 instrs) add %g3, %g2, %g3 ! %g3 = instruction entry ldxa [%g0]ASI_SPARC_ERR_EN, %g2 ! save current in %g2 andn %g2, CEEN, %g4 ! disable CEEN stxa %g4, [%g0] ASI_SPARC_ERR_EN ! .. mov %o0, %g4 ! save %o0 in %g4 rdpr %gl, %o0 ! save %gl in %o0 ! set gl to error global and %g1, SPARC_EAR_GL_MASK, %g1 ! get global set from EAR srlx %g1, SPARC_EAR_GL_SHIFT, %g1 ! %g1 has %gl value jmp %g3 + SZ_INSTR ! jump to reread global wrpr %g1, %gl ! set gl to error gl SET_SIZE(irf_reread) /* clear_iregerr(sparc_ear) [LEAF Function] * * Clear CPU.iregerr if the IRU register in error == CPU.iregerr * Return 0 if CPU.iregerr matches, and 1 if no match * Arguments: * %g1 - SPARC EAR * %g2 - output - 0 if CPU.iregerr matches, 1 if no match * %g3, %g4 - scratch * %g5 - erpt pointer * %g6 - strand pointer * %g7 - return address */ ENTRY_NP(clear_iregerr) ldx [%g6 + STRAND_REGERR], %g3 ! %g3 = STRAND.iregerr ! compare the register number from EAR xor %g3, %g1, %g3 ! Are they the same? andcc %g3, SPARC_EAR_IREG_MASK, %g3 ! (ignore non-register bits) bz %xcc, .ireg_match ! yes, then clear nop mov 1, %g2 ! return 1 for no match HVRET ! %g4 has CPU.iregerr address ! IRU was taken from IRC trap handler reread attempt .ireg_match: stx %g0, [%g6 + STRAND_REGERR] ! clear STRAND.iregerr mov %g0, %g2 ! return 0 for ireg match HVRET SET_SIZE(clear_iregerr) /* * void clear_irf_ue(uint64_t sparc_ear) * * Clear the UE in the integer register file * Arguments: * %g1 - input - SPARC EAR * %g2-%g4 -scratch * %g5, %g6 - preserved * %g7 - return address */ ENTRY_NP(clear_irf_ue) and %g1, SPARC_EAR_IREG_MASK, %g2 srlx %g2, SPARC_EAR_IREG_SHIFT, %g2 ! %g2 has int reg num ! get the register number within the set and %g2, 0x1f, %g2 ! mask off GL/CWP cmp %g2, 8 ! is reg# < 8? bl .glob_ue ! yes, then global reg nop ! Now clear the register in error ba 1f ! clear register rd %pc, %g3 ! get clear instr base addr ! an array of instruction blocks indexed by register number to ! clear the non-global register reported in error. mov %g0, %o0 ! clear %o0 ba,a .clear_done mov %g0, %o1 ! clear %o1 ba,a .clear_done mov %g0, %o2 ! clear %o2 ba,a .clear_done mov %g0, %o3 ! clear %o3 ba,a .clear_done mov %g0, %o4 ! clear %o4 ba,a .clear_done mov %g0, %o5 ! clear %o5 ba,a .clear_done mov %g0, %o6 ! clear %o6 ba,a .clear_done mov %g0, %o7 ! clear %o7 ba,a .clear_done mov %g0, %l0 ! clear %l0 ba,a .clear_done mov %g0, %l1 ! clear %l1 ba,a .clear_done mov %g0, %l2 ! clear %l2 ba,a .clear_done mov %g0, %l3 ! clear %l3 ba,a .clear_done mov %g0, %l4 ! clear %l4 ba,a .clear_done mov %g0, %l5 ! clear %l5 ba,a .clear_done mov %g0, %l6 ! clear %l6 ba,a .clear_done mov %g0, %l7 ! clear %l7 ba,a .clear_done mov %g0, %i0 ! clear %i0 ba,a .clear_done mov %g0, %i1 ! clear %i1 ba,a .clear_done mov %g0, %i2 ! clear %i2 ba,a .clear_done mov %g0, %i3 ! clear %i3 ba,a .clear_done mov %g0, %i4 ! clear %i4 ba,a .clear_done mov %g0, %i5 ! clear %i5 ba,a .clear_done mov %g0, %i6 ! clear %i6 ba,a .clear_done mov %g0, %i7 ! clear %i7 ba,a .clear_done 1: sub %g2, 8, %g2 ! skip globals sllx %g2, 3, %g2 ! offset = reg# * 8 add %g3, %g2, %g3 ! %g3 = instruction block addr jmp %g3 + SZ_INSTR ! jmp to clear register nop ! restore gl from value in %o0, and restore %o0 .gl_clear_done: wrpr %o0, %gl ! restore %gl mov %g4, %o0 ! restore %o0 ! Here, we check the iregerr field after the reread. If it ! is zero, then we know it is a persistent uncorrectable error. ! If it is nonzero, then we know it is a transient error. .clear_done: HVRET ! return to caller ! %g2 has the gl + register number .glob_ue: ! now re-read the global register in error ba 1f rd %pc, %g3 ! get clear instr base addr ! an array of instructions blocks indexed by global register number ! to clear the global register reported in error. ! %gl points to the error global set mov %g0, %g0 ! clear %g0 (yay!) ba,a .gl_clear_done mov %g0, %g1 ! clear %g1 ba,a .gl_clear_done mov %g0, %g2 ! clear %g2 ba,a .gl_clear_done mov %g0, %g3 ! clear %g3 ba,a .gl_clear_done mov %g0, %g4 ! clear %g4 ba,a .gl_clear_done mov %g0, %g5 ! clear %g5 ba,a .gl_clear_done mov %g0, %g6 ! clear %g6 ba,a .gl_clear_done mov %g0, %g7 ! clear %g7 ba,a .gl_clear_done 1: sllx %g2, 3, %g2 ! offset (2 instrs) add %g3, %g2, %g3 ! %g3 = instruction entry mov %o0, %g4 ! save %o0 in %g4 rdpr %gl, %o0 ! save %gl in %o0 ! set gl to error global and %g1, SPARC_EAR_GL_MASK, %g2 ! get global set from EAR srlx %g2, SPARC_EAR_GL_SHIFT, %g2 ! %g2 has %gl value jmp %g3 + SZ_INSTR ! jump to clear global wrpr %g2, %gl ! set gl to error gl SET_SIZE(clear_irf_ue) /* * frc_check(uint64_t sparc_ear) [Non-Leaf] * * Check whether the FRC error is transient or persistent. * Before we re-read the register in error, we set the frc_ear * in the CPU struct to the SPARC EAR value, which has the reg# * and the syndrome. A zero syndrome is not possible for error, * therefore frc_ear == 0 means FRC trap didn't set it. * (Note %g0 like other registers can generate errors.) * If the FRU trap is taken because of a persistent uncorrectable error, * the FRU trap handler will check the frc_ear field with the SPARC_EAR * logged. If they are the same, then FRU trap handler clears the * frc_ear field and returns. * * set_frcear(sparc_ear) * frf_reread(sparc_ear) * if (cpu.frc_ear == 0) * return RF_PERSISTENT; * else { * cpu.frc_ear = 0; * return RF_TRANSIENT; * } * Arguments: * %g1 - input - SPARC EAR - clobbered * %g2 - output (RF_TRANSIENT, RF_PERSISTENT) * %g3 - scratch * %g4 - scratch * %g5 - erpt pointer * %g6 - strand pointer */ ENTRY_NP(frc_check) ! init strand.frc_ear stx %g1, [%g6 + STRAND_REGERR] ! strand.frc_ear = sparc EAR (!=0) /* * It is possible that FPRS.FEF was disabled when we took the * disrupting trap caused by the FP CE. We must ensure that FPRS.FEF * is enabled before calling frf_reread(). * * Note that the Sparc V9 spec mandates that PSTATE.PEF be enabled * when we take a trap if there is an FPU present. As this error * condition can only occur with an FPU we do not need to verify * PSTATE.PEF here. */ rd %fprs, %g5 btst FPRS_FEF, %g5 ! FPRS.FEF set ? bz,a,pn %xcc, 1f ! no: set it wr %g5, FPRS_FEF, %fprs ! yes: annulled 1: ! reread register mov %g7, %g6 ! save return address HVCALL(frf_reread) ! %g1 has SPARC EAR, ! %g5/%g6 preserved wr %g5, %g0, %fprs ! restore FPRS mov %g6, %g7 ! restore return address STRAND_ERPT_STRUCT(STRAND_CE_RPT, %g6, %g5) ! g6->strand, g5->strand.ce_rpt ! check strand.frc_ear ldx [%g6 + STRAND_REGERR], %g2 ! read strand.frc_ear brz %g2, .frc_ret ! persistent error mov RF_PERSISTENT, %g2 ! transient error. H/W has fixed it now after the reread ! get back to interrupted program stx %g0, [%g6 + STRAND_REGERR] ! clear frc_ear mov RF_TRANSIENT, %g2 ! return transient .frc_ret: HVRET SET_SIZE(frc_check) /* * fru_check(uint64_t sparc_ear) [Non-Leaf] * * Check whether the FRU error is transient or persistent * or if the floating point register file is failing. * clear_frf_ue(sparc_ear); * frf_reread(sparc_ear); * if (SPARC_ESR.FRU == 0) { * return RF_TRANSIENT; * } * if (SPARC_EAR == sparc_ear) * return RF_PERSISTENT; * } else { * return RF_FAILURE; * } * Arguments: * %g1 - input - SPARC EAR - clobbered * %g2 - output (RF_TRANSIENT, RF_PERSISTENT, RF_FAILURE) * %g3 - scratch * %g4 - scratch * %g5 - erpt pointer * %g6 - strand pointer */ ENTRY_NP(fru_check) mov %g7, %g6 ! save return address HVCALL(clear_frf_ue) ! %g1 has SPARC EAR ! reread register HVCALL(frf_reread) ! %g1 has SPARC EAR mov %g6, %g7 ! restore return address STRAND_STRUCT(%g6) ! restore strand ! check SPARC ESR for FRU error ldxa [%g0]ASI_SPARC_ERR_STATUS, %g4 ! get SPARC ESR set SPARC_ESR_FRU, %g3 ! FRU bit btst %g3, %g4 ! check for FRU bz %xcc, .fru_ret ! no: mov RF_TRANSIENT, %g2 ! return transient ! persistent FRU error? ! check EAR for match ldxa [%g0]ASI_SPARC_ERR_ADDR, %g2 ! get SPARC EAR ldx [%g5 + STRAND_VBSC_ERPT + EVBSC_SPARC_AFAR], %g1 ! saved EAR xor %g2, %g1, %g2 ! Are they the same? andcc %g2, SPARC_EAR_FPREG_MASK, %g2 ! (ignore non-register bits) bnz %xcc, .fru_ret ! no: mov RF_FAILURE, %g2 ! return reg file failure stxa %g4, [%g0]ASI_SPARC_ERR_STATUS ! yes: clear SPARC ESR mov RF_PERSISTENT, %g2 ! return persistent .fru_ret: HVRET ! return to caller SET_SIZE(fru_check) /* * IRF Uncorrectible ECC Error * * if (clear_iregerr(sparc_ear) == MATCH from IRC) { * DONE; * } else { * if (iru_check(sparc_ear) == RF_PERSISTENT) { * CPU.status = mark CPU in ERROR; * if ((CPUnext = avail(partID)) != NULL) { * x_call(CPUnext, I_AM_IN_ERROR); * stop_self(); * } else { * q_service_error_report(spi); * stop_self(); - watchdog reset later? * } * } else { * q_sun4v_error_report(nrq); * q_service_error_report(spi); * jmp nonresumable_error trap handler * } * } */ /* * frf_reread(uint64_t sparc_ear) [LEAF function] * * Reread the FRF register in error. * Arguments: * %g1 - input - SPARC EAR * %g2 - %g4 - scratch * %g5, %g6 - preserved * %g7 - return address */ ENTRY_NP(frf_reread) and %g1, SPARC_EAR_FPREG_MASK, %g2 srlx %g2, SPARC_EAR_FPREG_SHIFT, %g2 ! %g2 has 6-bit fpreg number ! Now reread the register in error ba 1f rd %pc, %g3 ! %g3 = base address ! an array of instruction blocks indexed by register number to ! reread the floating-point register reported in error ! The first 32 entries use single-precision register ! The next 32 entries reread the double-precision register fmovs %f0, %f0 ! reread %f0 ba,a .fp_reread_done fmovs %f1, %f1 ! reread %f1 ba,a .fp_reread_done fmovs %f2, %f2 ! reread %f2 ba,a .fp_reread_done fmovs %f3, %f3 ! reread %f3 ba,a .fp_reread_done fmovs %f4, %f4 ! reread %f4 ba,a .fp_reread_done fmovs %f5, %f5 ! reread %f5 ba,a .fp_reread_done fmovs %f6, %f6 ! reread %f6 ba,a .fp_reread_done fmovs %f7, %f7 ! reread %f7 ba,a .fp_reread_done fmovs %f8, %f8 ! reread %f8 ba,a .fp_reread_done fmovs %f9, %f9 ! reread %f9 ba,a .fp_reread_done fmovs %f10, %f10 ! reread %f10 ba,a .fp_reread_done fmovs %f11, %f11 ! reread %f11 ba,a .fp_reread_done fmovs %f12, %f12 ! reread %f12 ba,a .fp_reread_done fmovs %f13, %f13 ! reread %f13 ba,a .fp_reread_done fmovs %f14, %f14 ! reread %f14 ba,a .fp_reread_done fmovs %f15, %f15 ! reread %f15 ba,a .fp_reread_done fmovs %f16, %f16 ! reread %f16 ba,a .fp_reread_done fmovs %f17, %f17 ! reread %f17 ba,a .fp_reread_done fmovs %f18, %f18 ! reread %f18 ba,a .fp_reread_done fmovs %f19, %f19 ! reread %f19 ba,a .fp_reread_done fmovs %f20, %f20 ! reread %f20 ba,a .fp_reread_done fmovs %f21, %f21 ! reread %f21 ba,a .fp_reread_done fmovs %f22, %f22 ! reread %f22 ba,a .fp_reread_done fmovs %f23, %f23 ! reread %f23 ba,a .fp_reread_done fmovs %f24, %f24 ! reread %f24 ba,a .fp_reread_done fmovs %f25, %f25 ! reread %f25 ba,a .fp_reread_done fmovs %f26, %f26 ! reread %f26 ba,a .fp_reread_done fmovs %f27, %f27 ! reread %f27 ba,a .fp_reread_done fmovs %f28, %f28 ! reread %f28 ba,a .fp_reread_done fmovs %f29, %f29 ! reread %f29 ba,a .fp_reread_done fmovs %f30, %f30 ! reread %f30 ba,a .fp_reread_done fmovs %f30, %f31 ! reread %f31 ba,a .fp_reread_done ! double precision register pairs, reread both of them on errors fmovd %f32, %f32 ! reread %f32 ba,a .fp_reread_done fmovd %f32, %f32 ! reread %f32 ba,a .fp_reread_done fmovd %f34, %f34 ! reread %f34 ba,a .fp_reread_done fmovd %f34, %f34 ! reread %f34 ba,a .fp_reread_done fmovd %f36, %f36 ! reread %f36 ba,a .fp_reread_done fmovd %f36, %f36 ! reread %f36 ba,a .fp_reread_done fmovd %f38, %f38 ! reread %f38 ba,a .fp_reread_done fmovd %f38, %f38 ! reread %f38 ba,a .fp_reread_done fmovd %f40, %f40 ! reread %f40 ba,a .fp_reread_done fmovd %f40, %f40 ! reread %f40 ba,a .fp_reread_done fmovd %f42, %f42 ! reread %f42 ba,a .fp_reread_done fmovd %f42, %f42 ! reread %f42 ba,a .fp_reread_done fmovd %f44, %f44 ! reread %f44 ba,a .fp_reread_done fmovd %f44, %f44 ! reread %f44 ba,a .fp_reread_done fmovd %f46, %f46 ! reread %f46 ba,a .fp_reread_done fmovd %f46, %f46 ! reread %f46 ba,a .fp_reread_done fmovd %f48, %f48 ! reread %f48 ba,a .fp_reread_done fmovd %f48, %f48 ! reread %f48 ba,a .fp_reread_done fmovd %f50, %f50 ! reread %f50 ba,a .fp_reread_done fmovd %f50, %f50 ! reread %f50 ba,a .fp_reread_done fmovd %f52, %f52 ! reread %f52 ba,a .fp_reread_done fmovd %f52, %f52 ! reread %f52 ba,a .fp_reread_done fmovd %f54, %f54 ! reread %f54 ba,a .fp_reread_done fmovd %f54, %f54 ! reread %f54 ba,a .fp_reread_done fmovd %f56, %f56 ! reread %f56 ba,a .fp_reread_done fmovd %f56, %f56 ! reread %f56 ba,a .fp_reread_done fmovd %f58, %f58 ! reread %f58 ba,a .fp_reread_done fmovd %f58, %f58 ! reread %f58 ba,a .fp_reread_done fmovd %f60, %f60 ! reread %f60 ba,a .fp_reread_done fmovd %f60, %f60 ! reread %f60 ba,a .fp_reread_done fmovd %f62, %f62 ! reread %f62 ba,a .fp_reread_done fmovd %f62, %f62 ! reread %f62 ba,a .fp_reread_done 1: ! %g2 has freg number, %g3 has base address-4 sllx %g2, 3, %g2 ! offset = freg# * 8 add %g3, %g2, %g3 ! %g3 = instruction block addr ldxa [%g0]ASI_SPARC_ERR_EN, %g2 ! save current in %g2 andn %g2, CEEN, %g4 ! disable CEEN stxa %g4, [%g0] ASI_SPARC_ERR_EN ! .. jmp %g3 + SZ_INSTR ! jmp to reread register nop .fp_reread_done: stxa %g2, [%g0] ASI_SPARC_ERR_EN ! restore CEEN HVRET ! return to caller SET_SIZE(frf_reread) /* * clear_fregerr(sparc_ear) [LEAF Function] * * Clear cpu.fregerr if the FRU register in error == cpu.fregerr * Return 0 if cpu.fregerr matches, and 1 if no match * Arguments: * %g1 - SPARC EAR * %g2 - output - 0 if cpu.fregerr matches, 1 if no match * %g3, %g4 - scratch * %g5 - erpt pointer * %g6 - strand pointer * %g7 - return address */ ENTRY_NP(clear_fregerr) ldx [%g6 + STRAND_REGERR], %g3 ! %g3 = strand.fregerr ! get register number from EAR xor %g3, %g1, %g3 ! Are they the same? andcc %g3, SPARC_EAR_FPREG_MASK, %g3 ! (ignore non-register bits) bz %xcc, .freg_match ! yes, then clear nop mov 1, %g2 ! return 1 for no match HVRET ! %g4 has cpu.fregerr address ! FRU was taken from FRC trap handler reread attempt .freg_match: stx %g0, [%g6 + STRAND_REGERR] ! clear strand.fregerr mov %g0, %g2 ! return 0 for freg match HVRET SET_SIZE(clear_fregerr) /* * clear_frf_ue(uint64_t sparc_ear) [LEAF function] * * Clear the UE in the floating-point register file * Arguments: * %g1 - SPARC EAR * %g2 - %g4 - scratch * %g5, %g6 - preserverd * %g7 - return address */ ENTRY_NP(clear_frf_ue) and %g1, SPARC_EAR_FPREG_MASK, %g2 srlx %g2, SPARC_EAR_FPREG_SHIFT, %g2 ! %g2 has 6-bit fpreg number ! Now clear the register in error ba 1f rd %pc, %g3 ! %g3 = base address ! an array of instruction blocks indexed by register number to ! clear the floating-point register reported in error ! The first 32 entries use single-precision register ! The next 32 entries clear the double-precision register fzeros %f0 ! clear %f0 ba,a .fp_clear_done fzeros %f1 ! clear %f1 ba,a .fp_clear_done fzeros %f2 ! clear %f2 ba,a .fp_clear_done fzeros %f3 ! clear %f3 ba,a .fp_clear_done fzeros %f4 ! clear %f4 ba,a .fp_clear_done fzeros %f5 ! clear %f5 ba,a .fp_clear_done fzeros %f6 ! clear %f6 ba,a .fp_clear_done fzeros %f7 ! clear %f7 ba,a .fp_clear_done fzeros %f8 ! clear %f8 ba,a .fp_clear_done fzeros %f9 ! clear %f9 ba,a .fp_clear_done fzeros %f10 ! clear %f10 ba,a .fp_clear_done fzeros %f11 ! clear %f11 ba,a .fp_clear_done fzeros %f12 ! clear %f12 ba,a .fp_clear_done fzeros %f13 ! clear %f13 ba,a .fp_clear_done fzeros %f14 ! clear %f14 ba,a .fp_clear_done fzeros %f15 ! clear %f15 ba,a .fp_clear_done fzeros %f16 ! clear %f16 ba,a .fp_clear_done fzeros %f17 ! clear %f17 ba,a .fp_clear_done fzeros %f18 ! clear %f18 ba,a .fp_clear_done fzeros %f19 ! clear %f19 ba,a .fp_clear_done fzeros %f20 ! clear %f20 ba,a .fp_clear_done fzeros %f21 ! clear %f21 ba,a .fp_clear_done fzeros %f22 ! clear %f22 ba,a .fp_clear_done fzeros %f23 ! clear %f23 ba,a .fp_clear_done fzeros %f24 ! clear %f24 ba,a .fp_clear_done fzeros %f25 ! clear %f25 ba,a .fp_clear_done fzeros %f26 ! clear %f26 ba,a .fp_clear_done fzeros %f27 ! clear %f27 ba,a .fp_clear_done fzeros %f28 ! clear %f28 ba,a .fp_clear_done fzeros %f29 ! clear %f29 ba,a .fp_clear_done fzeros %f30 ! clear %f30 ba,a .fp_clear_done fzeros %f31 ! clear %f31 ba,a .fp_clear_done ! double precision register pairs, clear both of them on errors fzero %f32 ! clear %f32 ba,a .fp_clear_done fzero %f32 ! clear %f32 ba,a .fp_clear_done fzero %f34 ! clear %f34 ba,a .fp_clear_done fzero %f34 ! clear %f34 ba,a .fp_clear_done fzero %f36 ! clear %f36 ba,a .fp_clear_done fzero %f36 ! clear %f36 ba,a .fp_clear_done fzero %f38 ! clear %f38 ba,a .fp_clear_done fzero %f38 ! clear %f38 ba,a .fp_clear_done fzero %f40 ! clear %f40 ba,a .fp_clear_done fzero %f40 ! clear %f40 ba,a .fp_clear_done fzero %f42 ! clear %f42 ba,a .fp_clear_done fzero %f42 ! clear %f42 ba,a .fp_clear_done fzero %f44 ! clear %f44 ba,a .fp_clear_done fzero %f44 ! clear %f44 ba,a .fp_clear_done fzero %f46 ! clear %f46 ba,a .fp_clear_done fzero %f46 ! clear %f46 ba,a .fp_clear_done fzero %f48 ! clear %f48 ba,a .fp_clear_done fzero %f48 ! clear %f48 ba,a .fp_clear_done fzero %f50 ! clear %f50 ba,a .fp_clear_done fzero %f50 ! clear %f50 ba,a .fp_clear_done fzero %f52 ! clear %f52 ba,a .fp_clear_done fzero %f52 ! clear %f52 ba,a .fp_clear_done fzero %f54 ! clear %f54 ba,a .fp_clear_done fzero %f54 ! clear %f54 ba,a .fp_clear_done fzero %f56 ! clear %f56 ba,a .fp_clear_done fzero %f56 ! clear %f56 ba,a .fp_clear_done fzero %f58 ! clear %f58 ba,a .fp_clear_done fzero %f58 ! clear %f58 ba,a .fp_clear_done fzero %f60 ! clear %f60 ba,a .fp_clear_done fzero %f60 ! clear %f60 ba,a .fp_clear_done fzero %f62 ! clear %f62 ba,a .fp_clear_done fzero %f62 ! clear %f62 ba,a .fp_clear_done 1: ! %g2 has freg number, %g3 has base address-4 sllx %g2, 3, %g2 ! offset = freg# * 8 add %g3, %g2, %g3 ! %g3 = instruction block addr jmp %g3 + SZ_INSTR ! jmp to clear register nop .fp_clear_done: HVRET ! return to caller SET_SIZE(clear_frf_ue) /* * FRC Uncorrectible ECC Error * * FRU Error Handler: Check for persistent error * if (fru_check(sparc_ear) == RF_TRANSIENT) { * q_sun4v_error_report(nrq); * q_service_error_report(spi); * jmp nonresumable_error trap handler; * } else { * CPU.status = mark CPU in ERROR; * if ((CPUnext = avail(partID)) != NULL) { * x_call(CPUnext, I_AM_IN_ERROR); * stop_self(); * } else { * q_service_error_report(spi); * stop_self(); causes watchdog reset later? * } * } */ /* * Handler to set bit(s) in the SPARC Error Enable Register * * Called to get handler callback address (avoid relocation problems) * * Entry Data: * none * * Return Data: * %g2: handler address * * Registers modified: * %g2 */ ENTRY_NP(err_set_sparc_bits) RETURN_HANDLER_ADDRESS(%g2) ! in %g2 /* * Callback from interrupt: * * This will re-enable the Sparc interrupts. * Process in this order: * - clear any Sparc CE's * - clear blackout * - enable Sparc EEN * * Entry Data: * %g1: bit(s) to set * %g2: * * Return Data: * none * * Registers modified: * %g1-6 */ .err_set_sparc_bits: /* This is the actual function entry */ mov %g1, %g5 ! bits !! %g5 = bits to set set SPARC_CE_BITS, %g1 ldxa [%g0]ASI_SPARC_ERR_STATUS, %g3 ! SPARC afsr btst %g1, %g3 ! is a CE pending? bz .err_set_sparc_1 ! no: set SPARC_ESR_PRIV, %g2 or %g1, %g2, %g1 ! yes: include PRIV and %g3, %g1, %g3 ! just the CE bits stxa %g3, [%g0]ASI_SPARC_ERR_STATUS ! clear SPARC CE afsr bits .err_set_sparc_1: STRAND_STRUCT(%g3) lduw [%g3 + STRAND_ERR_FLAG], %g1 ! installed flags bclr ERR_FLAG_SPARC, %g1 ! reset SPARC ESR stw %g1, [%g3 + STRAND_ERR_FLAG] ! .. ldxa [%g0]ASI_SPARC_ERR_EN, %g3 ! get current or %g3, %g5, %g3 ! set bit(s) stxa %g3, [%g0]ASI_SPARC_ERR_EN ! store back HVRET SET_SIZE(err_set_sparc_bits) /* * Handler to set bit(s) in the L2 Error Enable Register * * This will re-enable the L2/DRAM interrupts. * Process in this order: * - clear any DRAM CE's * - clear any L2 CE's * - clear blackout * - enable L2DRAM EEN * * Called to get handler callback address (avoid relocation problems) * * Entry Data: * none * * Return Data: * %g2: handler address * * Registers modified: * %g2 */ ENTRY_NP(err_set_l2_bits) RETURN_HANDLER_ADDRESS(%g2) ! in %g2 /* * Callback from interrupt: * * Entry Data: * %g1: bit(s) to set * %g2: B:5-0 = bank # * * Return Data: * none * * Registers modified: * %g1-6 */ .err_set_l2_bits: /* This is the actual function entry */ mov %g1, %g5 ! bits and %g2, NO_L2_BANKS - 1, %g6 ! bank # !! %g5 = bits to set !! %g6 = bank# setx DRAM_ESR_CE_BITS | DRAM_ESR_MEC, %g1, %g2 setx DRAM_ESR_BASE, %g1, %g3 ! DRAM base sllx %g6, DRAM_BANK_SHIFT, %g4 ! + bank offset ldx [%g3 + %g4], %g1 ! get ESR[bank] and %g1, %g2, %g1 ! reset CE bits only stx %g1, [%g3 + %g4] setx L2_ESR_CE_BITS | L2_ESR_VEC, %g1, %g2 setx L2_ESR_BASE, %g1, %g3 ! L2 base sll %g6, L2_BANK_SHIFT, %g4 ! + bank offset ldx [%g3 + %g4], %g1 ! get ESR[bank] and %g1, %g2, %g1 ! reset CE bits only stx %g1, [%g3 + %g4] STRAND_STRUCT(%g3) mov ERR_FLAG_L2DRAM, %g1 ! L2DRAM flag sll %g1, %g6, %g1 ! << bank# lduw [%g3 + STRAND_ERR_FLAG], %g2 ! installed flags bclr %g1, %g2 ! reset L2DRAM[bank] stw %g2, [%g3 + STRAND_ERR_FLAG] ! .. !! %g1 = bits !! %g6 = bank# BSET_L2_BANK_EEN(%g6, %g5, %g2, %g3) ! L2 Bank EEN[%g6] |= %g5 HVRET SET_SIZE(err_set_l2_bits) /* * Poll to detect errors that did not cause an interrupt for one * reason or another. * Most common cause: L2/DRAM error from prefetch. * * Called to get handler callback address (avoid relocation problems) * * Entry Data: * none * * Return Data: * %g2: handler address * * Registers modified: * %g2 */ ENTRY_NP(err_poll_daemon) RETURN_HANDLER_ADDRESS(%g2) ! in %g2 /* * Callback from interrupt: * * Entry Data: * %g1: 0 * %g2: 0 * %g3: Interrupt Tick Time * * Return Data: * none * * Registers modified: * %g1-6 */ .err_poll_daemon: /* * Get strand, CE buffer in %g6-5, they are safe across calls */ STRAND_STRUCT(%g6) stx %g3, [%g6 + STRAND_ERR_POLL_ITT] ! save interrupt tick time stx %g7, [%g6 + STRAND_ERR_POLL_RET] ! save return address /* * Look for Sparc errors: test only, * the error handler will do the work */ .err_poll_sparc: ldxa [%g0]ASI_SPARC_ERR_STATUS, %g4 ! SPARC afsr ! ! Check for any UE: ! UE_CHECK(SPARC_UE_MEU_BITS, L2_ESR_UE_BITS, %g4, %g1, %g2, %g3) bz %xcc, .err_poll_no_ue ! no nop HVCALL(ue_poll_entry) ! yes: go process ba,a .err_poll_sparc ! and re-check Sparc status .err_poll_no_ue: ! ! Check for any CE: ! CE_CHECK(%g6, %g4, %g1, %g2, %g3) ! cpup, spesr, bz %xcc, .err_poll_no_ce ! no nop HVCALL(ce_poll_entry) ! yes ba,a .err_poll_sparc ! go re-check Sparc status .err_poll_no_ce: /* * reinstall poll handler */ STRAND2CONFIG_STRUCT(%g6, %g1) ! ->config ldx [%g1 + CONFIG_CE_POLL_TIME], %g1 ! g1 = time interval brz %g1, 9f ! disabled: branch nop ldx [%g6 + STRAND_ERR_POLL_ITT], %g2 ! this interrupt tick time add %g1, %g2, %g1 ! abs time for next poll HVCALL(err_poll_daemon) ! g2 = handler address clr %g3 ! g3 = arg 0 : n/a clr %g4 ! g4 = arg 1 : n/a HVCALL(cyclic_add_abs) /* ( abs_tick, address, arg0, arg1 ) */ 9: STRAND_STRUCT(%g6) ldx [%g6 + STRAND_ERR_POLL_RET], %g7 ! restore return address HVRET SET_SIZE(err_poll_daemon) /* * Function to start error polling daemon: * * Entry Data: * none * * Return Data: * %g1: status * 0 - success (started) * 1 - failed (already running) * 2 - failed to start * * Registers modified: * %g1-6 */ ENTRY_NP(err_poll_daemon_start) STRAND_STRUCT(%g6) stx %g7, [%g6 + STRAND_ERR_POLL_RET] ! save return address lduw [%g6 + STRAND_ERR_FLAG], %g2 btst ERR_FLAG_POLLD, %g2 ! handler flags bnz,a %xcc, 9f ! poll deamon installed? mov 1, %g1 ! yes: return "running" bset ERR_FLAG_POLLD, %g2 ! set it stw %g2, [%g6 + STRAND_ERR_FLAG] ! store /* * Install the callback handler: just start at now + ce_poll_time */ STRAND2CONFIG_STRUCT(%g6, %g1) ! ->config ldx [%g1 + CONFIG_CE_POLL_TIME], %g1 ! g1 = cycle time in ticks HVCALL(err_poll_daemon) ! g2 = handler address clr %g3 ! g3 = arg 0 : error bits clr %g4 ! g3 = arg 1 : HVCALL(cyclic_add_rel) /* ( del_tick, address, arg0, arg1 ) */ STRAND_STRUCT(%g6) ldx [%g6 + STRAND_ERR_POLL_RET], %g7 ! restore return address clr %g1 ! status = success 9: HVRET ! %g1 = status SET_SIZE(err_poll_daemon_start) #if EVBSC_L2_AFSR_INCR == 8 #define EVBSC_L2_AFSR_SHIFT 3 #else #error "EVBSC_L2_AFSR_INCR is not 8" #endif #if EVBSC_L2_AFAR_INCR == 8 #define EVBSC_L2_AFAR_SHIFT 3 #else #error "EVBSC_L2_AFAR_INCR is not 8" #endif /* * This function determines if an error is transient, sticky or * permanent. We only check disposition on Memory CE's. Hence, * we only work the L2 error registers. * The algorithm to classify the error is as follows: * 1) Displacement flush the E$ line corresponding to %addr. * The first ldxa guarantees that the %addr is no longer in * M, O, or E (goes to I or S (if instruction fetch also * happens). * 2) "Write" the data using a ldx %addrm %scr CAS %addr,%scr,%scr. * The casxa guarantees a transition from I to M or S to M. * There are two possibilities that the sequence does not act * as intended: * - the line is displaced between the ld and the cas: * we still have the correct value in %scr and the cas will * reload the line - this is OK since the ld was to get the * value, no to get the line in the cache. * - the line is written between the ld and the cas: * the intent to modify the line has effectively succeeded * 3) Displacement flush the E$ line corresponding to %addr. * The second ldxa pushes the M line out of the ecache, * into the writeback buffers, on the way to memory. * 4) The "membar #Sync" pushes the cache line out of * the writeback buffers onto the bus, on the way to * dram finally. * %g1 - bank number * * XXX - Need to handle race with HW scrubber */ ENTRY_NP(err_determine_disposition) CPU_PUSH(%g7, %g4, %g5, %g6) ! save return address ! Read and save the current enable mov %g1, %g6 ! bank # GET_L2_BANK_EEN(%g6, %g5, %g4) CPU_PUSH(%g5, %g3, %g4, %g2) ! save for later ! disable CEEN BCLR_L2_BANK_EEN(%g6, CEEN, %g4, %g3) ! get err address into %g1 STRAND_STRUCT(%g1) add %g1, STRAND_CE_RPT + STRAND_VBSC_ERPT, %g1 sllx %g6, EVBSC_L2_AFAR_SHIFT, %g4 add %g4, EVBSC_L2_AFAR, %g2 ldx [%g1 + %g2], %g1 ! Mask AFAR to get only valid bits and %g1, ~L2_EAR_DRAM_MASK, %g1 ! l2_flush_line garbles %g6. Save %g6 which ! contains the BNUM CPU_PUSH(%g6, %g3, %g4, %g2) ! save for later /* * displace and cause a write back * Niagara works differently than previous generations. * On previous generations, a cas will mark the line dirty, * regardless of the success of the compare. * In Niagara, the line only gets mark dirty if the swap occurs. * Hence, we need to first load the value and store it back via the cas */ HVCALL(l2_flush_line) ldx [%g1], %g6 casx [%g1], %g6, %g6 HVCALL(l2_flush_line) ! push cache line out of the write back buffers membar #Sync CPU_POP(%g6, %g2, %g3, %g4) /* * Read the errs registers again and compare them with our saved * version. If they are the same, then error is persistent */ ! read err regs setx L2_ESR_DRAM_CE_BITS, %g3, %g2 setx L2_ESR_BASE, %g3, %g5 ! L2 base sll %g6, L2_BANK_SHIFT, %g4 ! + bank offset ldx [%g5 + %g4], %g5 ! get ESR[bank] and %g5, %g2, %g5 ! compare only CEs ! get our copy STRAND_STRUCT(%g3) add %g3, STRAND_CE_RPT + STRAND_VBSC_ERPT, %g3 sllx %g6, EVBSC_L2_AFSR_SHIFT, %g4 add %g4, EVBSC_L2_AFSR, %g4 ldx [%g3 + %g4], %g4 ! orig AFSR and %g4, %g2, %g4 ! only CEs ! clear the disposition to have none mov CE_XDIAG_NONE, %g2 stx %g2, [%g3 + EVBSC_DIAG_BUF + DRAM_DISPOSITION] ! compare with stored cmp %g4, %g5 bne,pt %xcc, 2f nop ! now check afar and see if same. ! %g1 still contains the stored afar setx L2_EAR_BASE, %g4, %g5 ! L2 base sll %g6, L2_BANK_SHIFT, %g4 ! + bank offset ldx [%g5 + %g4], %g4 ! mask only valid bits and %g4, ~L2_EAR_DRAM_MASK, %g4 cmp %g1, %g4 bne,pt %xcc, 2f mov CE_XDIAG_CE1, %g4 ! set ce1 if match stx %g4, [%g3 + EVBSC_DIAG_BUF + DRAM_DISPOSITION] ! clear the error reg sllx %g6, EVBSC_L2_AFSR_SHIFT, %g4 add %g4, EVBSC_L2_AFSR, %g4 ldx [%g3 + %g4], %g4 ! orig AFSR setx L2_ESR_BASE, %g2, %g5 ! L2 base sll %g6, L2_BANK_SHIFT, %g2 ! + bank offset stx %g4, [%g5 + %g2] ! clear ESR[bank] /* * Read data again. data should now come from memory. We check * for errors. If the saved version and new errs registers are the * same then it is a stuck bit * %g1 still contains our stored afar */ 2: ldx [%g1], %g2 ! read regs setx L2_ESR_DRAM_CE_BITS, %g5, %g2 setx L2_ESR_BASE, %g4, %g5 ! L2 base sll %g6, L2_BANK_SHIFT, %g4 ! + bank offset ldx [%g5 + %g4], %g5 ! get ESR[bank] and %g5, %g2, %g5 ! compare only CEs ! stored value sllx %g6, EVBSC_L2_AFSR_SHIFT, %g4 add %g4, EVBSC_L2_AFSR, %g4 ldx [%g3 + %g4], %g4 ! orig AFSR and %g4, %g2, %g4 ! only CEs ! compare with stored cmp %g4, %g5 bne,pt %xcc, 1f nop ! now check afar and see if same. ! %g1 still contains the stored afar setx L2_EAR_BASE, %g4, %g5 ! L2 base sll %g6, L2_BANK_SHIFT, %g4 ! + bank offset ldx [%g5 + %g4], %g4 ! mask only valid bits and %g4, ~L2_EAR_DRAM_MASK, %g4 cmp %g1, %g4 bne,pt %xcc, 1f mov CE_XDIAG_CE2, %g5 ! set ce2 if match ldx [%g3 + EVBSC_DIAG_BUF + DRAM_DISPOSITION], %g2 or %g5, %g2, %g2 stx %g2, [%g3 + EVBSC_DIAG_BUF + DRAM_DISPOSITION] 1: ! restore orig ce CPU_POP(%g5, %g2, %g3, %g4) SET_L2_EEN_BASE(%g2) sllx %g6, L2_BANK_SHIFT, %g3 ! bank offset add %g2, %g3, %g2 ! bank address stx %g5, [%g2] ! restore value CPU_POP(%g7, %g1, %g2, %g3) HVRET SET_SIZE(err_determine_disposition) /* * Handle strand in error * All other strands are idle * This strand: * - search for another "good" strand * - flag as halted (bit mask) * - Remove cyclic (Error Daemon) * - handoff interrupt steering * - Migrate all intrs * - notify good strand to finish rest of work * - put myself into idle * Selected Good strand: * - send resumable error to guest * %g6 should not be clobbered */ ENTRY_NP(strand_in_error) ! Remove this cpu from the active bitmask and add it to halted STRAND_STRUCT(%g5) ldub [%g5 + STRAND_ID], %g5 mov 1, %g4 sllx %g4, %g5, %g4 !! %g5 - strand id ROOT_STRUCT(%g2) ! config ptr ! clear this strand from the active list ldx [%g2 + CONFIG_STACTIVE], %g3 bclr %g4, %g3 stx %g3, [%g2 + CONFIG_STACTIVE] ! set this cpu in the halted list ldx [%g2 + CONFIG_STHALT], %g3 bset %g4, %g3 stx %g3, [%g2 + CONFIG_STHALT] ! find another idle strand for re-targetting ldx [%g2 + CONFIG_STIDLE], %g3 mov 0, %g6 .find_strand: cmp %g5, %g6 be,pn %xcc, .next_strand mov 1, %g4 sllx %g4, %g6, %g4 andcc %g3, %g4, %g0 bnz,a %xcc, .found_a_strand nop .next_strand: inc %g6 cmp %g6, NSTRANDS bne,pn %xcc, .find_strand nop /* * No usable active strands are left in the * system, force host exit */ #ifdef CONFIG_VBSC_SVC ba,a vbsc_guest_exit #else LEGION_EXIT(%o0) #endif .found_a_strand: /* * handoff L2 Steering CPU * If we are the steering cpu, migrate it to our chosen one */ !! %g5 - this strand ID !! %g6 - target strand ID setx L2_CONTROL_REG, %g3, %g4 ldx [%g4], %g2 ! current setting srlx %g2, L2_ERRORSTEER_SHIFT, %g3 and %g3, (NSTRANDS - 1), %g3 cmp %g3, %g5 ! is this steering strand ? bnz,pt %xcc, 1f nop ! It is the L2 Steering strand. Migrate responsibility to tgt strand sllx %g3, L2_ERRORSTEER_SHIFT, %g3 andn %g3, %g2, %g2 ! remove this strand sllx %g6, L2_ERRORSTEER_SHIFT, %g3 or %g2, %g3, %g2 stx %g2, [%g4] 1: mov %g5, %g1 mov %g6, %g2 !! %g1 - this strand ID !! %g2 - target strand ID #ifdef CONFIG_FPGA /* * Migrate SSI intrs */ STRAND_PUSH(%g1, %g3, %g4) STRAND_PUSH(%g2, %g3, %g4) HVCALL(ssi_intr_redistribution) STRAND_POP(%g2, %g3) STRAND_POP(%g1, %g3) #endif #if 0 /* XXX */ /* * XXX err_poll_daemon (collapse into heartbeat?) */ #endif /* * Disable heartbeat interrupts if they're on this cpu. * cpu_in_error_finish will invoke heartbeat_enable on the * remote cpu if the heartbeat was disabled. */ STRAND_PUSH(%g1, %g3, %g4) STRAND_PUSH(%g2, %g3, %g4) HVCALL(heartbeat_disable) STRAND_POP(%g2, %g3) STRAND_POP(%g1, %g3) #ifdef CONFIG_FIRE /* * if this guest owns a fire bus, redirect * fire interrupts */ GUEST_STRUCT(%g3) ROOT_STRUCT(%g4) ldx [%g4 + CONFIG_PCIE_BUSSES], %g4 ! check leaf A ldx [%g4 + PCIE_DEVICE_GUESTP], %g5 cmp %g3, %g5 be %xcc, 2f nop ! check leaf B ldx [%g4 + PCIE_DEVICE_GUESTP + PCIE_DEVICE_SIZE], %g5 cmp %g3, %g5 bne %xcc, 3f nop 2: /* * Migrate fire intrs */ STRAND_PUSH(%g1, %g3, %g4) STRAND_PUSH(%g2, %g3, %g4) HVCALL(fire_intr_redistribution) STRAND_POP(%g2, %g3) STRAND_POP(%g1, %g3) /* * Migrate fire err intrs */ STRAND_PUSH(%g1, %g3, %g4) STRAND_PUSH(%g2, %g3, %g4) HVCALL(fire_err_intr_redistribution) STRAND_POP(%g2, %g3) STRAND_POP(%g1, %g3) 3: #endif /* * Migrate vdev intrs */ STRAND_PUSH(%g1, %g3, %g4) STRAND_PUSH(%g2, %g3, %g4) HVCALL(vdev_intr_redistribution) STRAND_POP(%g2, %g3) STRAND_POP(%g1, %g3) /* * Now pick another VCPU in this guest to target the erpt * Ensure that the VCPU is not bound to the strand in error */ VCPU_STRUCT(%g1) GUEST_STRUCT(%g2) add %g2, GUEST_VCPUS, %g2 mov 0, %g3 !! %g1 - this vcpu struct !! %g2 - array of vcpus in guest !! %g3 - vcpu array idx .find_cpu_loop: ldx [%g2], %g4 ! vcpu struct brz,pn %g4, .find_cpu_continue nop ! ignore this vcpu cmp %g4, %g1 be,pn %xcc, .find_cpu_continue nop ! check whether this CPU is running guest code ? ldx [%g4 + CPU_STATUS], %g6 cmp %g6, CPU_STATE_RUNNING bne,pt %xcc, .find_cpu_continue nop ! check the error queues.. if not set, not a good candidate ldx [%g4 + CPU_ERRQR_BASE], %g6 brz,pt %g6, .find_cpu_continue nop /* * find the strand this vcpu is ON, make sure it is idle * NOTE: currently this check is not necessary, more * likely when we have sub-strand scheduling */ !! %g1 - this vcpu struct !! %g2 - curr vcpu in guest vcpu array !! %g3 - vcpu array idx !! %g4 - target vcpus struct STRAND_STRUCT(%g5) ! this strand ldx [%g4 + CPU_STRAND], %g6 ! vcpu->strand cmp %g5, %g6 be,pn %xcc, .find_cpu_continue nop ! check if the target strand is IDLE ldub [%g6 + STRAND_ID], %g6 ! vcpu->strand->id mov 1, %g5 sllx %g5, %g6, %g6 VCPU2ROOT_STRUCT(%g1, %g5) ldx [%g5 + CONFIG_STIDLE], %g5 btst %g5, %g6 bnz,pt %xcc, .found_a_cpu nop .find_cpu_continue: add %g2, GUEST_VCPUS_INCR, %g2 inc %g3 cmp %g3, NVCPUS bne,pn %xcc, .find_cpu_loop nop ! If we got here, we didn't find a good tgt cpu ! do not send an erpt, exit the guest HVCALL(guest_exit) ba,a .skip_sending_erpt .found_a_cpu: !! %g4 - target vcpu struct /* * This cpu has most of the information to send to the Guest. * We copy from this cpu err rpt to the tgt's err rpt */ STRAND_STRUCT(%g1) ! this strand STRAND2ERPT_STRUCT(STRAND_UE_RPT, %g1, %g1) ! get tgt strand ce erpt ldx [%g4 + CPU_STRAND], %g2 ! tgt_vcpu->strand STRAND2ERPT_STRUCT(STRAND_CE_RPT, %g2, %g3) ! copy info to tgt cpu ce err buf ldx [%g1 + STRAND_SUN4V_ERPT + ESUN4V_G_EHDL], %g4 ! ehdl stx %g4, [%g3 + STRAND_SUN4V_ERPT + ESUN4V_G_EHDL] ldx [%g1 + STRAND_SUN4V_ERPT + ESUN4V_G_STICK], %g4 ! stick stx %g4, [%g3 + STRAND_SUN4V_ERPT + ESUN4V_G_STICK] ld [%g1 + STRAND_SUN4V_ERPT + ESUN4V_EDESC], %g4 ! edesc st %g4, [%g3 + STRAND_SUN4V_ERPT + ESUN4V_EDESC] ld [%g1 + STRAND_SUN4V_ERPT + ESUN4V_ATTR], %g4 ! attr st %g4, [%g3 + STRAND_SUN4V_ERPT + ESUN4V_ATTR] ldx [%g1 + STRAND_SUN4V_ERPT + ESUN4V_ADDR], %g4 ! ra stx %g4, [%g3 + STRAND_SUN4V_ERPT + ESUN4V_ADDR] ld [%g1 + STRAND_SUN4V_ERPT + ESUN4V_SZ], %g4 ! sz st %g4, [%g3 + STRAND_SUN4V_ERPT + ESUN4V_SZ] lduh [%g1 + STRAND_SUN4V_ERPT + ESUN4V_G_CPUID], %g4 ! cpuid stuh %g4, [%g3 + STRAND_SUN4V_ERPT + ESUN4V_G_CPUID] lduh [%g1 + STRAND_SUN4V_ERPT + ESUN4V_G_SECS], %g4 stuh %g4, [%g3 + STRAND_SUN4V_ERPT + ESUN4V_G_SECS] /* * Send a xcall to the target strand so it can finish the work */ ldub [%g2 + STRAND_ID], %g6 ! tgt strand id sllx %g6, INT_VEC_DIS_VCID_SHIFT, %g5 or %g5, VECINTR_CPUINERR, %g5 stxa %g5, [%g0]ASI_INTR_UDB_W .skip_sending_erpt: STRAND_STRUCT(%g6) SPINLOCK_RESUME_ALL_STRAND(%g6, %g1, %g2, %g3, %g4) ! remove self from idle list STRAND_STRUCT(%g1) ldub [%g1 + STRAND_ID], %g6 /* phys id */ mov 1, %g1 sllx %g1, %g6, %g1 ROOT_STRUCT(%g6) ldx [%g6 + CONFIG_STIDLE], %g5 bclr %g1, %g5 stx %g5, [%g6 + CONFIG_STIDLE] ! idle myself STRAND_STRUCT(%g1) ldub [%g1 + STRAND_ID], %g6 /* phys id */ INT_VEC_DSPCH_ONE(INT_VEC_DIS_TYPE_IDLE, %g6, %g3, %g4) /* * Paranoia!! If we get here someone else resumed this strand * by mistake * hvabort to catch the mistake */ ba hvabort rd %pc, %g1 SET_SIZE(strand_in_error) ENTRY(ssi_mondo) /* * Check for JBUS error */ setx JBI_ERR_LOG, %g1, %g2 ldx [%g2], %g2 brnz,pn %g2, ue_jbus_err nop /* * Clear the INT_CTL.MASK bit for the SSI */ setx IOBBASE, %g3, %g2 stx %g0, [%g2 + INT_CTL + INT_CTL_DEV_OFF(IOBDEV_SSIERR)] retry SET_SIZE(ssi_mondo) /* * re-route an error report (cont'd) * 3. select one of the active CPUs for that guest * 4. Copy the data from the error erport into that * CPUs cpu struct * 5. Send a VECINTR_ERROR_XCALL to that CPU * 6: RETRY * * %g2 target guest * %g4 PA */ /* FIXME: re-whack this for vcpu/strand split */ ENTRY_NP(cpu_reroute_error) /* * find first live cpu in guest->vcpus * Then deliver the error to that vcpu, and interrupt * the strand it is running on to make that happen. */ add %g2, GUEST_VCPUS, %g2 mov 0, %g3 1: cmp %g3, NVCPUS be,pn %xcc, cpu_reroute_error_exit nop mulx %g3, GUEST_VCPUS_INCR, %g5 ldx [%g2 + %g5], %g1 brz,a,pn %g1, 1b inc %g3 ! check whether this CPU is running guest code ? ldx [%g1 + CPU_STATUS], %g5 cmp %g5, CPU_STATE_RUNNING bne,pt %xcc, 1b inc %g3 ! %g3 target vcpu id ! %g1 &vcpus[target] ldx [%g1 + CPU_STRAND], %g1 /* * It is possible that the CPUs rerouted data is already in use. * We use the rerouted_addr field as a spinlock. The target CPU * will set this to 0 after reading the error data allowing us * to re-use the rerouting fields. * See cpu_err_rerouted() below. * * %g1 &strands[target] * %g3 target cpuid * %g4 PA */ set STRAND_REROUTED_ADDR, %g2 add %g1, %g2, %g6 1: casx [%g6], %g0, %g4 brnz,pn %g4, 1b nop ! get the data out of the current STRAND's ce_rpt buf and store ! in the target STRAND struct STRAND_ERPT_STRUCT(STRAND_CE_RPT, %g6, %g5) ! g6->strand, g5->strand.ce_rpt ldx [%g5 + STRAND_SUN4V_ERPT + ESUN4V_G_EHDL], %g6 set STRAND_REROUTED_EHDL, %g4 stx %g6, [%g1 + %g4] lduw [%g5 + STRAND_SUN4V_ERPT + ESUN4V_ATTR], %g6 set STRAND_REROUTED_ATTR, %g4 stx %g6, [%g1 + %g4] ldx [%g5 + STRAND_SUN4V_ERPT + ESUN4V_G_STICK], %g6 ! STICK is probably not necssary. I doubt if FMA checks ! both EHDL/STICK when looking for duplicate reports, ! but it doesn't kill us to do it. set STRAND_REROUTED_STICK, %g4 stx %g6, [%g1 + %g4] ! send an x-call to the target CPU ldub [%g1 + STRAND_ID], %g3 sllx %g3, IVDR_THREAD, %g3 mov VECINTR_ERROR_XCALL, %g5 or %g3, %g5, %g3 stxa %g3, [%g0]ASI_INTR_UDB_W cpu_reroute_error_exit: ! error is re-routed, get out of here STRAND_STRUCT(%g6) SPINLOCK_RESUME_ALL_STRAND(%g6, %g1, %g2, %g3, %g4) ldx [%g6 + STRAND_ERR_RET], %g7 ! get return address brnz,a %g7, .ue_return ! valid: clear it & return stx %g0, [%g6 + STRAND_ERR_RET] ! .. retry SET_SIZE(cpu_reroute_error) /* * An error has been re-routed to this STRAND. * The EHDL/ADDR/STICK/ATTR have been stored in the STRAND struct * by the STRAND that originally detected the error. * * Note: STICK may not be strictly necessary */ ENTRY_NP(cpu_err_rerouted) STRAND_ERPT_STRUCT(STRAND_CE_RPT, %g6, %g5) ! g6->strand, g5->strand.ce_rpt #ifdef DEBUG_ERROR_REROUTING PRINT("Error Re-routed to CPU strand "); ldub [%g6 + STRAND_ID], %g4 PRINTX(%g4) PRINT("\r\n"); #endif set STRAND_REROUTED_EHDL, %g4 ldx [%g6 + %g4], %g4 stx %g4, [%g5 + STRAND_SUN4V_ERPT + ESUN4V_G_EHDL] set STRAND_REROUTED_STICK, %g4 ldx [%g6 + %g4], %g4 stx %g4, [%g5 + STRAND_SUN4V_ERPT + ESUN4V_G_STICK] set STRAND_REROUTED_ATTR, %g4 ldx [%g6 + %g4], %g4 stw %g4, [%g5 + STRAND_SUN4V_ERPT + ESUN4V_ATTR] ! keep ADDR after EHDL/STICK/ATTR to avoid race set STRAND_REROUTED_ADDR, %g4 ldx [%g6 + %g4], %g1 ! Clear the strand->rerouted-addr field now to let other ! errors in. stx %g0, [%g6 + %g4] ! Translate the PA to a guest RA VCPU_STRUCT(%g6) CPU_ERR_PA_TO_RA(%g6, %g1, %g4, %g2, %g3) stx %g1, [%g5 + STRAND_SUN4V_ERPT + ESUN4V_ADDR] ldub [%g6 + CPU_VID], %g4 /* guest cpuid */ stuh %g4, [%g5 + STRAND_SUN4V_ERPT + ESUN4V_G_CPUID] set EDESC_UE_RESUMABLE, %g4 stw %g4, [%g5 + STRAND_SUN4V_ERPT + ESUN4V_EDESC] mov ERPT_MEM_SIZE, %g4 st %g4, [%g5 + STRAND_SUN4V_ERPT + ESUN4V_SZ] /* * gueue a resumable error report and return */ ASMCALL_RQ_ERPT(STRAND_CE_RPT, %g1, %g2, %g3, %g4, %g5, %g6, %g7) retry SET_SIZE(cpu_err_rerouted) ENTRY_NP(hvabort) mov %g1, %g6 HV_PRINT_NOTRAP("ABORT: Failure 0x"); HV_PRINTX_NOTRAP(%g6) #ifdef CONFIG_VBSC_SVC HV_PRINT_NOTRAP(", contacting vbsc\r\n"); ba,pt %xcc, vbsc_hv_abort mov %g6, %g1 #else HV_PRINT_NOTRAP(", spinning\r\n"); LEGION_EXIT(1) 2: ba,a 2b nop #endif SET_SIZE(hvabort) ! intended never to return ENTRY(c_hvabort) mov %o7, %g1 ba hvabort nop SET_SIZE(c_hvabort)