Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | /* |
2 | * ========== Copyright Header Begin ========================================== | |
3 | * | |
4 | * OpenSPARC T2 Processor File: ss_err_trap.h | |
5 | * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. | |
6 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. | |
7 | * | |
8 | * The above named program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public | |
10 | * License version 2 as published by the Free Software Foundation. | |
11 | * | |
12 | * The above named program is distributed in the hope that it will be | |
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public | |
18 | * License along with this work; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
20 | * | |
21 | * ========== Copyright Header End ============================================ | |
22 | */ | |
23 | /* | |
24 | * Copyright 2006 Sun Microsystems, Inc. All rights reserved. | |
25 | * Use is subject to license terms. | |
26 | */ | |
27 | ||
28 | #pragma ident "@(#)ss_err_trap.h 1.10 06/11/08 SMI" | |
29 | ||
30 | #ifndef _SS_ERR_TRAP_H | |
31 | #define _SS_ERR_TRAP_H | |
32 | ||
33 | #ifdef __cplusplus | |
34 | extern "C" { | |
35 | #endif | |
36 | ||
37 | #if ERROR_TRAP_GEN /* { */ | |
38 | ||
39 | ||
40 | /* | |
41 | * | |
42 | * | |
43 | * | |
44 | Error Trap Generation Framework | |
45 | =============================== | |
46 | ||
47 | Implementation Details | |
48 | ||
49 | Glossary of Terms: | |
50 | ------------------ | |
51 | ||
52 | "error events" - Data provided by user to describe the error/trap | |
53 | he or she wants Legion to inject (when/how/where/what). | |
54 | ||
55 | "ASI override" - ASI/VA pair provided by the user along with | |
56 | optional value masks which tells Legion how to respond when SW | |
57 | accesses specific ASI/VAs. | |
58 | ||
59 | "error entry" - CPU specific information describing a named error | |
60 | such as "DCDP" (data cache data parity) in terms of trap type, | |
61 | status register bit(s), error enable register bit(s), etc. Each | |
62 | CPU provides a list of all the error entries it supports. | |
63 | ||
64 | "error status register" (ESR) - Most CPU errors are associated with a | |
65 | status register which will have one or more bits set when that | |
66 | error is detected so that the SW trap handlers know which error | |
67 | was encountered. | |
68 | ||
69 | "error enabling register" (EER) - Most CPU errors in the UltraSPARC | |
70 | family are also associated with an error reporting register | |
71 | (which controls whether the error generates a trap) and/or and | |
72 | error recording register (which controls whether the error is | |
73 | detected). Both of these are referred to as error enabling | |
74 | registers. | |
75 | ||
76 | ||
77 | High Level Summary: | |
78 | ------------------- | |
79 | ||
80 | The user provides Legion (via the conf file) a list of "error events" | |
81 | and optional "ASI overrides". | |
82 | ||
83 | Legion examines the error event list and sets itself up to watch for | |
84 | the trigger conditions specified in the error events. | |
85 | ||
86 | When the trigger conditions are satisfied (e.g. reached a certain | |
87 | instruction count or load/store a certain address or executes a specified | |
88 | %pc) then Legion injects the specified error trap. | |
89 | ||
90 | If additional error events have been specified, Legion sets itself | |
91 | up once again to watch for the new trigger conditions. | |
92 | ||
93 | The Legion code for injecting an error trap is responsible for | |
94 | injecting the appropriate ESR bits and checking the appropriate | |
95 | error enabling registers in order to determine whether or not to | |
96 | post a trap, leave it pending, or just drop it. | |
97 | ||
98 | For every non-memory ASI access from the simulated SW, Legion checks | |
99 | it against the list of user provided ASI overrides and returns the | |
100 | user specified value if a match is found. | |
101 | ||
102 | The error trap generation framework also monitors the state of the | |
103 | various ESRs and will repeatedly post traps to the CPU when | |
104 | appropriate as long as the ESR is not cleared by the simulated SW. | |
105 | ||
106 | ||
107 | Entry Points: | |
108 | ------------- | |
109 | ||
110 | When the ERROR_TRAP_GEN compile flag is turned on, the error trap | |
111 | injection framework is enabled. In order to keep the implementation | |
112 | modular, easy to maintain, and common across CPUs, we have made an | |
113 | effort to limit the number of entry points from normal Legion code | |
114 | into the error trap injection framework. These entry points are | |
115 | described here. | |
116 | ||
117 | 1) Parsing of user input - ss_parse() | |
118 | While parsing the "processor" directive in the Legion conf file, if | |
119 | we encounter a error_event {} or error_asi {} directive, we call into | |
120 | the error trap injection framework's parsing functions to handle it. | |
121 | ||
122 | 2) CPU initialization - ss_init() | |
123 | During CPU initialization, we call into the error trap injection | |
124 | framework in order to initialize a few things. We make one chip | |
125 | specific init call as well as one thread specific call per simulated | |
126 | thread. | |
127 | ||
128 | 3) ASI access - ss_asi_access() | |
129 | For all ASI accesses (other than memory asi access such as | |
130 | ASI_SECONDARY, ASI_BLK_S, ASI_REAL_MEM, etc) we call into the error | |
131 | trap injection framework to see whether the specific ASI/VA in | |
132 | question (1) corresponds to one of the error status registers or | |
133 | error enabling registers of this CPU or (2) matches one of the ASI | |
134 | override entries provided by the user. | |
135 | ||
136 | 4) Instruction cycle count monitoring - ss_cycle_target_match() | |
137 | In order to trigger the user specified error trap based on the given | |
138 | conditions, we set certain cycle targets and watch for them. | |
139 | ||
140 | 5) Trigger the trap on: | |
141 | a) Load/Store operations - LOAD_OP()/STORE_OP() | |
142 | Once the error trap cycle target has been matched, we check every | |
143 | load and store operation to see whether it is time to inject the | |
144 | error trap or not. | |
145 | b) %pc value - debug_breakpoint_cb() | |
146 | When a %pc trigger value is specified, we set a breakpoint at the | |
147 | %pc value we want to catch. We then wait for the instn_cnt | |
148 | to be reached (if specified) and once we have reached the specified | |
149 | instn_cnt, the next time we hit that breakpoint, we trigger the | |
150 | error trap. Other constraints will also determine whether we | |
151 | should trigger (ie. trap_levelm priv_level). | |
152 | When the user specified conditions have been met, we inject the error | |
153 | trap by calling trigger_error_trap() for the simcpu in question. | |
154 | ||
155 | 6) Processor State Changes - ss_check_interrupts() | |
156 | Every time the processor state changes, we call into the error trap | |
157 | injection framework to check whether there is an error event which | |
158 | needs to be injected or an error trap which needs to be posted. | |
159 | ||
160 | 7) Taking a trap - ss_take_exception() | |
161 | Every time the processor is about to take a trap, we call into the | |
162 | error trap injection framework so that we know whether or not the | |
163 | trap we injected was actually taken or not. | |
164 | ||
165 | ||
166 | Error trap injection from start to finish: | |
167 | ------------------------------------------ | |
168 | ||
169 | Step 1 - Parse input | |
170 | User input is parsed and added to Legion's list of error | |
171 | events. | |
172 | ||
173 | Step 2 - Assign error event to a CPU | |
174 | The error event is assigned to a specific strand (the one specified | |
175 | to encounter the error event) and the strand is configured to watch | |
176 | for the trigger conditions associated with the error event. | |
177 | ||
178 | Step 3 - Trigger detection | |
179 | Once we detect that the trigger conditions have been satisfied, | |
180 | Legion marks the error event as "triggered" and calls a routine to | |
181 | trigger the error, trigger_error_trap(). | |
182 | ||
183 | Step 4 - Triggering the error | |
184 | The error event trigger code is responsible for ensuring that only | |
185 | one error is being "triggered" at a time per system. This means we | |
186 | are serializing the error trap generation code starting from the | |
187 | point where we "inject" the error and ending when the trap is | |
188 | actually taken on the CPU. Normally this period is very short, but | |
189 | can be longer for maskable trap types. This serialization is required | |
190 | to ensure that we don't lose injected traps due to trap priority | |
191 | conflicts. | |
192 | ||
193 | If no error is currently being triggered on the system, we proceed | |
194 | to call the injection code. Otherwise, if an error is already being | |
195 | triggered, we simply return and the error event will be checked | |
196 | after every CPU state change until we are able to inject the error. | |
197 | ||
198 | Step 5 - Error injection | |
199 | Based on the name of the error, we search the CPU specific table of | |
200 | error entries and that error entry tells us all we need to know | |
201 | about how to inject the error. Details we get from the error entry | |
202 | include which error status bits to inject, which error enabling | |
203 | registers to check, and which trap type to post or keep pending. | |
204 | ||
205 | At this point, we also add any user specified ASI overrides | |
206 | associated with this error to our master list of ASI overrides. | |
207 | ||
208 | Depending on whether the error is persistent or not, we once again | |
209 | set the CPU to monitor for the same trigger conditions (persistent | |
210 | error events) or check the error event list for a new error. | |
211 | ||
212 | Step 6 - CPU state change | |
213 | At every CPU state change, we check our list of errors to see if | |
214 | there are any error traps which we need to post due to the error | |
215 | status bit(s) not being clear. | |
216 | ||
217 | * | |
218 | * | |
219 | */ | |
220 | ||
221 | typedef struct ss_error_entry ss_error_entry_t; | |
222 | ||
223 | #define EAR( _name ) ss_access_asi_##_name /* standard error ASI access routine naming convention */ | |
224 | #define EIR( _name ) ss_inject_esr_##_name /* standard ESR injection routine naming convention */ | |
225 | ||
226 | #define EAR_ARGS simcpu_t *sp, int asi, tvaddr_t addr, bool_t is_load, uint64_t store_val, bool_t legion_access | |
227 | #define EIR_ARGS simcpu_t *sp, uint64_t mask, ss_error_entry_t *errp | |
228 | ||
229 | #define EAR_DEFINITION( _n ) uint64_t EAR( _n )(EAR_ARGS) | |
230 | #define EIR_DEFINITION( _n ) bool_t EIR( _n )(EIR_ARGS) | |
231 | ||
232 | #define ASI_NA -1 | |
233 | #define ADDR_NA -1 | |
234 | #define ANY_ERR_VA 1 | |
235 | #define INVALID_TRAP -1 | |
236 | #define INVALID_ASI -1 | |
237 | ||
238 | #define END_ERR_STRING "done" | |
239 | #define TRAP_ERR_STRING "trap-only" | |
240 | #define TRAP_ONLY_TT 0 | |
241 | #define TARGET_MYSELF -1 | |
242 | ||
243 | /* Service Processor (SP) interrupt stuff */ | |
244 | #define SP_INTR_ERR_STRING "sp-intr" | |
245 | #define SP_INTR_ONLY -1 | |
246 | #define TARGET_SP -2 | |
247 | ||
248 | /* This trap never gets taken.. generate SP interrupt instead */ | |
249 | #define SS_generate_SP_interrupt NULL | |
250 | ||
251 | /* | |
252 | * There two kinds of register which we manage in Legion | |
253 | * for simulation of error traps. Both are accessed via ASI: | |
254 | * | |
255 | * 1) Error Status Registers (ESR) - This is where specific | |
256 | * errors are reported (usually one bit per error type) so | |
257 | * that SW can figure out what error type we encountered. | |
258 | * | |
259 | * 2) Error Enabling Registers (EER) - These control how the | |
260 | * CPU behaves after encountering an error (or injecting an | |
261 | * error trap in our case). There are two types of EERs: | |
262 | * | |
263 | * Error Recording Registers - These control whether or | |
264 | * not a given error is detected and logged. | |
265 | * | |
266 | * Error Reporting Registers - These control whether or | |
267 | * not a trap will be generated for a given error. | |
268 | * | |
269 | * We define a common struct for managing each one of these | |
270 | * error register types and each CPU module defines a list of | |
271 | * the ESRs and EERs it supports. | |
272 | * | |
273 | */ | |
274 | typedef struct { | |
275 | int asi; | |
276 | tvaddr_t addr; | |
277 | uint64_t (*reg_access)(EAR_ARGS); | |
278 | } ss_err_reg_t; | |
279 | ||
280 | /* | |
281 | * We want to come up with a standard structure which can be used to | |
282 | * describe all CPU errors for Niagara2, Rock, and possibly other | |
283 | * CPUs in a common way. | |
284 | * | |
285 | * Each CPU module will provide a table describing all the CPU errors | |
286 | * is supports and that way we can keep most CPU specific implementation | |
287 | * details in an easy to review/modify table and keep the error trap | |
288 | * injection framework code simple and common to all CPUs. | |
289 | * | |
290 | * Keeping that in mind, here are the basic ground rules and assumptions | |
291 | * we will be using to implement support for error trap injection | |
292 | * under Legion. These should all be supported by the UltraSPARC 2006 | |
293 | * spec and/or the CPU PRMs. | |
294 | * | |
295 | * 1) All disrupting error traps are also conditioned by the PSTATE.IE | |
296 | * bit when HPSTATE.HPRIV is set. | |
297 | * | |
298 | * 2) Precise and Deferred traps are always taken on the CPU which | |
299 | * detected the error. | |
300 | * | |
301 | * 3) Persistent errors cannot be Non-Maskable. | |
302 | * | |
303 | * 4) When an error trap is injected and the Error Reporting EER | |
304 | * conditions have not been satisfied (i.e. the trap is masked) | |
305 | * then the trap is held pending only for disrupting errors. | |
306 | * The trap will be dropped for Precise and Deferred errors. | |
307 | * (NOTE: in general, Precise and Deferred errors are not | |
308 | * maskable anyway. But, we provide the option just in case.) | |
309 | * | |
310 | * NOTE: One issue I'm not sure about right now is whether we | |
311 | * need to allow more than one EER per error or not. In theory, | |
312 | * detection or trap generation for a given error may be | |
313 | * dependent on more than just one EER. | |
314 | * | |
315 | * If we do need to change the framework to allow more than | |
316 | * one EER (or even ESR) per error, the implementation will | |
317 | * get a bit more complicated, but the basic ideas still work. | |
318 | * We would simply have to have something like a linked list | |
319 | * of EERs instead of statically defining one per error. | |
320 | * | |
321 | * For now, let's just assume one EER/ESR will do for each | |
322 | * error -- I'm pretty sure this assumption is valid for Rock | |
323 | * and Niagara2. | |
324 | */ | |
325 | ||
326 | ||
327 | /* | |
328 | * Struct which defines how a given EER controls an error's | |
329 | * recording/reporting. | |
330 | */ | |
331 | typedef struct SS_EER_CTRL { | |
332 | uint64_t (*eer_access)(EAR_ARGS); | |
333 | uint64_t mask; | |
334 | } ss_eer_ctrl_t; | |
335 | ||
336 | /* | |
337 | * Struct which defines how a given ESR is associated with | |
338 | * a given error. | |
339 | */ | |
340 | typedef struct SS_ESR_UPDATE { | |
341 | uint64_t (*esr_access)(EAR_ARGS); | |
342 | bool_t (*esr_inject)(EIR_ARGS); | |
343 | uint64_t err_inject_mask; | |
344 | uint64_t err_pending_mask; | |
345 | char * err_inject_name; | |
346 | } ss_esr_inject_t; | |
347 | ||
348 | typedef enum { | |
349 | PRECISE_TT, | |
350 | DEFERRED_TT, | |
351 | DISRUPTING_TT, | |
352 | SP_INTR | |
353 | } error_trap_class_t; | |
354 | ||
355 | ||
356 | typedef struct { | |
357 | char *error_name; /* error name */ | |
358 | uint64_t sp_intr; /* SP interrupt level */ | |
359 | } ss_sp_error_t; | |
360 | ||
361 | /* | |
362 | * Struct which defines a CPU error entry. | |
363 | */ | |
364 | struct ss_error_entry { | |
365 | char * error_name; | |
366 | sparcv9_trap_type_t trap_type; | |
367 | error_trap_class_t trap_class; /* precise/deferred/disrupting */ | |
368 | bool_t is_persistent; /* Keep generating trap while esr bit is set? */ | |
369 | int trap_target; /* -1 means always current strand. */ | |
370 | ss_esr_inject_t error_status; /* ESR to update */ | |
371 | ss_eer_ctrl_t error_record; /* should the error be logged? */ | |
372 | ss_eer_ctrl_t error_report; /* should a trap be generated? */ | |
373 | }; | |
374 | ||
375 | ||
376 | /* | |
377 | * Per chip Error Trap state register. | |
378 | * This struct will hold all the common | |
379 | * state information related to the | |
380 | * error trap injection framework. | |
381 | */ | |
382 | typedef struct { | |
383 | /* | |
384 | * Pointer to the error entry we are currently | |
385 | * trying to inject in the system. | |
386 | */ | |
387 | ss_error_entry_t *inj_error_trap; | |
388 | int trap_target_gid; | |
389 | ||
390 | /* | |
391 | * Used to limit the number of outstanding error | |
392 | * trap injections to one. i.e. Once an error trap | |
393 | * has been injected on a system, we cannot inject | |
394 | * a new one until the error trap has actually | |
395 | * been taken by the target CPU. (or we have | |
396 | * deemed that no new trap will be generated.) | |
397 | */ | |
398 | bool_t ready_for_next_injection; | |
399 | pthread_mutex_t injection_lock; | |
400 | ||
401 | /* | |
402 | * Used for performance reasons. | |
403 | * If we know that no ESRs are currently set (i.e. | |
404 | * they are all clear) then we can skip a lot of | |
405 | * checks during ss_check_error_traps(). | |
406 | */ | |
407 | bool_t esrs_clear; | |
408 | ||
409 | /* | |
410 | * head pointers to the system wide lists which | |
411 | * are parsed from user input. | |
412 | */ | |
413 | error_event_t *error_event_list_rootp; | |
414 | error_asi_t *error_asi_list_rootp; | |
415 | ||
416 | /* | |
417 | * dynamic reload file name | |
418 | */ | |
419 | char *error_config_filep; | |
420 | ||
421 | pthread_mutex_t err_lock; | |
422 | ||
423 | ss_err_reg_t *err_reg_tbl; | |
424 | ss_error_entry_t *err_event_tbl; | |
425 | ss_sp_error_t *sp_err_tbl; | |
426 | ||
427 | } ss_error_state_t; | |
428 | ||
429 | /* | |
430 | * extern definitions used by other binaries. | |
431 | */ | |
432 | extern void ss_check_error_traps(simcpu_t*); | |
433 | extern void ss_error_taking_trap(simcpu_t*, sparcv9_trap_type_t); | |
434 | extern bool_t ss_check_user_asi_list(simcpu_t*,int, tvaddr_t, uint64_t*, bool_t, bool_t); | |
435 | extern bool_t ss_error_asi_access(simcpu_t*, maccess_t, int, int, bool_t, tvaddr_t, uint64_t); | |
436 | extern void check_pending_error_events(simcpu_t*); | |
437 | extern void dump_error_event_list(int, error_event_t*); | |
438 | extern void dump_error_asi_list(int, error_asi_t*); | |
439 | extern void dump_cpu_error_table(int, ss_error_entry_t*); | |
440 | extern void dump_cpu_error_reg_table(int, ss_err_reg_t*); | |
441 | extern bool_t trigger_error_trap(simcpu_t*); | |
442 | extern void check_if_error_event_pending(void); | |
443 | extern void ss_inject_error_trap(simcpu_t *, char *, sparcv9_trap_type_t, int); | |
444 | ||
445 | extern void ss_error_asi_parse(void *procp, bool_t is_reload); | |
446 | extern void ss_error_event_parse(void *procp, bool_t is_reload); | |
447 | extern void ss_error_reload_file(config_proc_t *cp); | |
448 | extern void ss_error_parse_filename(void *procp); | |
449 | ||
450 | extern void ss_error_dump_active(config_proc_t *cp); | |
451 | extern void ss_error_dump_supported(config_proc_t *cp); | |
452 | ||
453 | extern void ss_error_trap_proc_init(config_proc_t * config_procp); | |
454 | extern void ss_error_trap_strand_init(config_proc_t * config_procp, simcpu_t * sp); | |
455 | ||
456 | #endif /* } ERROR_TRAP_GEN */ | |
457 | ||
458 | #ifdef __cplusplus | |
459 | } | |
460 | #endif | |
461 | ||
462 | #endif /* _SS_ERR_TRAP_H */ |