Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / legion / src / procs / sunsparc / include / ss_err_trap.h
CommitLineData
920dae64
AT
1/*
2* ========== Copyright Header Begin ==========================================
3*
4* OpenSPARC T2 Processor File: ss_err_trap.h
5* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
6* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
7*
8* The above named program is free software; you can redistribute it and/or
9* modify it under the terms of the GNU General Public
10* License version 2 as published by the Free Software Foundation.
11*
12* The above named program is distributed in the hope that it will be
13* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
14* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15* General Public License for more details.
16*
17* You should have received a copy of the GNU General Public
18* License along with this work; if not, write to the Free Software
19* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
20*
21* ========== Copyright Header End ============================================
22*/
23/*
24 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
26 */
27
28#pragma ident "@(#)ss_err_trap.h 1.10 06/11/08 SMI"
29
30#ifndef _SS_ERR_TRAP_H
31#define _SS_ERR_TRAP_H
32
33#ifdef __cplusplus
34extern "C" {
35#endif
36
37#if ERROR_TRAP_GEN /* { */
38
39
40/*
41 *
42 *
43 *
44 Error Trap Generation Framework
45 ===============================
46
47 Implementation Details
48
49Glossary of Terms:
50------------------
51
52"error events" - Data provided by user to describe the error/trap
53 he or she wants Legion to inject (when/how/where/what).
54
55"ASI override" - ASI/VA pair provided by the user along with
56 optional value masks which tells Legion how to respond when SW
57 accesses specific ASI/VAs.
58
59"error entry" - CPU specific information describing a named error
60 such as "DCDP" (data cache data parity) in terms of trap type,
61 status register bit(s), error enable register bit(s), etc. Each
62 CPU provides a list of all the error entries it supports.
63
64"error status register" (ESR) - Most CPU errors are associated with a
65 status register which will have one or more bits set when that
66 error is detected so that the SW trap handlers know which error
67 was encountered.
68
69"error enabling register" (EER) - Most CPU errors in the UltraSPARC
70 family are also associated with an error reporting register
71 (which controls whether the error generates a trap) and/or and
72 error recording register (which controls whether the error is
73 detected). Both of these are referred to as error enabling
74 registers.
75
76
77High Level Summary:
78-------------------
79
80The user provides Legion (via the conf file) a list of "error events"
81and optional "ASI overrides".
82
83Legion examines the error event list and sets itself up to watch for
84the trigger conditions specified in the error events.
85
86When the trigger conditions are satisfied (e.g. reached a certain
87instruction count or load/store a certain address or executes a specified
88%pc) then Legion injects the specified error trap.
89
90If additional error events have been specified, Legion sets itself
91up once again to watch for the new trigger conditions.
92
93The Legion code for injecting an error trap is responsible for
94injecting the appropriate ESR bits and checking the appropriate
95error enabling registers in order to determine whether or not to
96post a trap, leave it pending, or just drop it.
97
98For every non-memory ASI access from the simulated SW, Legion checks
99it against the list of user provided ASI overrides and returns the
100user specified value if a match is found.
101
102The error trap generation framework also monitors the state of the
103various ESRs and will repeatedly post traps to the CPU when
104appropriate as long as the ESR is not cleared by the simulated SW.
105
106
107Entry Points:
108-------------
109
110When the ERROR_TRAP_GEN compile flag is turned on, the error trap
111injection framework is enabled. In order to keep the implementation
112modular, easy to maintain, and common across CPUs, we have made an
113effort to limit the number of entry points from normal Legion code
114into the error trap injection framework. These entry points are
115described here.
116
1171) Parsing of user input - ss_parse()
118While parsing the "processor" directive in the Legion conf file, if
119we encounter a error_event {} or error_asi {} directive, we call into
120the error trap injection framework's parsing functions to handle it.
121
1222) CPU initialization - ss_init()
123During CPU initialization, we call into the error trap injection
124framework in order to initialize a few things. We make one chip
125specific init call as well as one thread specific call per simulated
126thread.
127
1283) ASI access - ss_asi_access()
129For all ASI accesses (other than memory asi access such as
130ASI_SECONDARY, ASI_BLK_S, ASI_REAL_MEM, etc) we call into the error
131trap injection framework to see whether the specific ASI/VA in
132question (1) corresponds to one of the error status registers or
133error enabling registers of this CPU or (2) matches one of the ASI
134override entries provided by the user.
135
1364) Instruction cycle count monitoring - ss_cycle_target_match()
137In order to trigger the user specified error trap based on the given
138conditions, we set certain cycle targets and watch for them.
139
1405) Trigger the trap on:
141 a) Load/Store operations - LOAD_OP()/STORE_OP()
142 Once the error trap cycle target has been matched, we check every
143 load and store operation to see whether it is time to inject the
144 error trap or not.
145 b) %pc value - debug_breakpoint_cb()
146 When a %pc trigger value is specified, we set a breakpoint at the
147 %pc value we want to catch. We then wait for the instn_cnt
148 to be reached (if specified) and once we have reached the specified
149 instn_cnt, the next time we hit that breakpoint, we trigger the
150 error trap. Other constraints will also determine whether we
151 should trigger (ie. trap_levelm priv_level).
152 When the user specified conditions have been met, we inject the error
153 trap by calling trigger_error_trap() for the simcpu in question.
154
1556) Processor State Changes - ss_check_interrupts()
156Every time the processor state changes, we call into the error trap
157injection framework to check whether there is an error event which
158needs to be injected or an error trap which needs to be posted.
159
1607) Taking a trap - ss_take_exception()
161Every time the processor is about to take a trap, we call into the
162error trap injection framework so that we know whether or not the
163trap we injected was actually taken or not.
164
165
166Error trap injection from start to finish:
167------------------------------------------
168
169Step 1 - Parse input
170User input is parsed and added to Legion's list of error
171events.
172
173Step 2 - Assign error event to a CPU
174The error event is assigned to a specific strand (the one specified
175to encounter the error event) and the strand is configured to watch
176for the trigger conditions associated with the error event.
177
178Step 3 - Trigger detection
179Once we detect that the trigger conditions have been satisfied,
180Legion marks the error event as "triggered" and calls a routine to
181trigger the error, trigger_error_trap().
182
183Step 4 - Triggering the error
184The error event trigger code is responsible for ensuring that only
185one error is being "triggered" at a time per system. This means we
186are serializing the error trap generation code starting from the
187point where we "inject" the error and ending when the trap is
188actually taken on the CPU. Normally this period is very short, but
189can be longer for maskable trap types. This serialization is required
190to ensure that we don't lose injected traps due to trap priority
191conflicts.
192
193If no error is currently being triggered on the system, we proceed
194to call the injection code. Otherwise, if an error is already being
195triggered, we simply return and the error event will be checked
196after every CPU state change until we are able to inject the error.
197
198Step 5 - Error injection
199Based on the name of the error, we search the CPU specific table of
200error entries and that error entry tells us all we need to know
201about how to inject the error. Details we get from the error entry
202include which error status bits to inject, which error enabling
203registers to check, and which trap type to post or keep pending.
204
205At this point, we also add any user specified ASI overrides
206associated with this error to our master list of ASI overrides.
207
208Depending on whether the error is persistent or not, we once again
209set the CPU to monitor for the same trigger conditions (persistent
210error events) or check the error event list for a new error.
211
212Step 6 - CPU state change
213At every CPU state change, we check our list of errors to see if
214there are any error traps which we need to post due to the error
215status bit(s) not being clear.
216
217 *
218 *
219 */
220
221typedef struct ss_error_entry ss_error_entry_t;
222
223#define EAR( _name ) ss_access_asi_##_name /* standard error ASI access routine naming convention */
224#define EIR( _name ) ss_inject_esr_##_name /* standard ESR injection routine naming convention */
225
226#define EAR_ARGS simcpu_t *sp, int asi, tvaddr_t addr, bool_t is_load, uint64_t store_val, bool_t legion_access
227#define EIR_ARGS simcpu_t *sp, uint64_t mask, ss_error_entry_t *errp
228
229#define EAR_DEFINITION( _n ) uint64_t EAR( _n )(EAR_ARGS)
230#define EIR_DEFINITION( _n ) bool_t EIR( _n )(EIR_ARGS)
231
232#define ASI_NA -1
233#define ADDR_NA -1
234#define ANY_ERR_VA 1
235#define INVALID_TRAP -1
236#define INVALID_ASI -1
237
238#define END_ERR_STRING "done"
239#define TRAP_ERR_STRING "trap-only"
240#define TRAP_ONLY_TT 0
241#define TARGET_MYSELF -1
242
243/* Service Processor (SP) interrupt stuff */
244#define SP_INTR_ERR_STRING "sp-intr"
245#define SP_INTR_ONLY -1
246#define TARGET_SP -2
247
248/* This trap never gets taken.. generate SP interrupt instead */
249#define SS_generate_SP_interrupt NULL
250
251/*
252 * There two kinds of register which we manage in Legion
253 * for simulation of error traps. Both are accessed via ASI:
254 *
255 * 1) Error Status Registers (ESR) - This is where specific
256 * errors are reported (usually one bit per error type) so
257 * that SW can figure out what error type we encountered.
258 *
259 * 2) Error Enabling Registers (EER) - These control how the
260 * CPU behaves after encountering an error (or injecting an
261 * error trap in our case). There are two types of EERs:
262 *
263 * Error Recording Registers - These control whether or
264 * not a given error is detected and logged.
265 *
266 * Error Reporting Registers - These control whether or
267 * not a trap will be generated for a given error.
268 *
269 * We define a common struct for managing each one of these
270 * error register types and each CPU module defines a list of
271 * the ESRs and EERs it supports.
272 *
273 */
274typedef struct {
275 int asi;
276 tvaddr_t addr;
277 uint64_t (*reg_access)(EAR_ARGS);
278} ss_err_reg_t;
279
280/*
281 * We want to come up with a standard structure which can be used to
282 * describe all CPU errors for Niagara2, Rock, and possibly other
283 * CPUs in a common way.
284 *
285 * Each CPU module will provide a table describing all the CPU errors
286 * is supports and that way we can keep most CPU specific implementation
287 * details in an easy to review/modify table and keep the error trap
288 * injection framework code simple and common to all CPUs.
289 *
290 * Keeping that in mind, here are the basic ground rules and assumptions
291 * we will be using to implement support for error trap injection
292 * under Legion. These should all be supported by the UltraSPARC 2006
293 * spec and/or the CPU PRMs.
294 *
295 * 1) All disrupting error traps are also conditioned by the PSTATE.IE
296 * bit when HPSTATE.HPRIV is set.
297 *
298 * 2) Precise and Deferred traps are always taken on the CPU which
299 * detected the error.
300 *
301 * 3) Persistent errors cannot be Non-Maskable.
302 *
303 * 4) When an error trap is injected and the Error Reporting EER
304 * conditions have not been satisfied (i.e. the trap is masked)
305 * then the trap is held pending only for disrupting errors.
306 * The trap will be dropped for Precise and Deferred errors.
307 * (NOTE: in general, Precise and Deferred errors are not
308 * maskable anyway. But, we provide the option just in case.)
309 *
310 * NOTE: One issue I'm not sure about right now is whether we
311 * need to allow more than one EER per error or not. In theory,
312 * detection or trap generation for a given error may be
313 * dependent on more than just one EER.
314 *
315 * If we do need to change the framework to allow more than
316 * one EER (or even ESR) per error, the implementation will
317 * get a bit more complicated, but the basic ideas still work.
318 * We would simply have to have something like a linked list
319 * of EERs instead of statically defining one per error.
320 *
321 * For now, let's just assume one EER/ESR will do for each
322 * error -- I'm pretty sure this assumption is valid for Rock
323 * and Niagara2.
324 */
325
326
327/*
328 * Struct which defines how a given EER controls an error's
329 * recording/reporting.
330 */
331typedef struct SS_EER_CTRL {
332 uint64_t (*eer_access)(EAR_ARGS);
333 uint64_t mask;
334} ss_eer_ctrl_t;
335
336/*
337 * Struct which defines how a given ESR is associated with
338 * a given error.
339 */
340typedef struct SS_ESR_UPDATE {
341 uint64_t (*esr_access)(EAR_ARGS);
342 bool_t (*esr_inject)(EIR_ARGS);
343 uint64_t err_inject_mask;
344 uint64_t err_pending_mask;
345 char * err_inject_name;
346} ss_esr_inject_t;
347
348typedef enum {
349 PRECISE_TT,
350 DEFERRED_TT,
351 DISRUPTING_TT,
352 SP_INTR
353} error_trap_class_t;
354
355
356typedef struct {
357 char *error_name; /* error name */
358 uint64_t sp_intr; /* SP interrupt level */
359} ss_sp_error_t;
360
361/*
362 * Struct which defines a CPU error entry.
363 */
364struct ss_error_entry {
365 char * error_name;
366 sparcv9_trap_type_t trap_type;
367 error_trap_class_t trap_class; /* precise/deferred/disrupting */
368 bool_t is_persistent; /* Keep generating trap while esr bit is set? */
369 int trap_target; /* -1 means always current strand. */
370 ss_esr_inject_t error_status; /* ESR to update */
371 ss_eer_ctrl_t error_record; /* should the error be logged? */
372 ss_eer_ctrl_t error_report; /* should a trap be generated? */
373};
374
375
376/*
377 * Per chip Error Trap state register.
378 * This struct will hold all the common
379 * state information related to the
380 * error trap injection framework.
381 */
382typedef struct {
383 /*
384 * Pointer to the error entry we are currently
385 * trying to inject in the system.
386 */
387 ss_error_entry_t *inj_error_trap;
388 int trap_target_gid;
389
390 /*
391 * Used to limit the number of outstanding error
392 * trap injections to one. i.e. Once an error trap
393 * has been injected on a system, we cannot inject
394 * a new one until the error trap has actually
395 * been taken by the target CPU. (or we have
396 * deemed that no new trap will be generated.)
397 */
398 bool_t ready_for_next_injection;
399 pthread_mutex_t injection_lock;
400
401 /*
402 * Used for performance reasons.
403 * If we know that no ESRs are currently set (i.e.
404 * they are all clear) then we can skip a lot of
405 * checks during ss_check_error_traps().
406 */
407 bool_t esrs_clear;
408
409 /*
410 * head pointers to the system wide lists which
411 * are parsed from user input.
412 */
413 error_event_t *error_event_list_rootp;
414 error_asi_t *error_asi_list_rootp;
415
416 /*
417 * dynamic reload file name
418 */
419 char *error_config_filep;
420
421 pthread_mutex_t err_lock;
422
423 ss_err_reg_t *err_reg_tbl;
424 ss_error_entry_t *err_event_tbl;
425 ss_sp_error_t *sp_err_tbl;
426
427} ss_error_state_t;
428
429/*
430 * extern definitions used by other binaries.
431 */
432extern void ss_check_error_traps(simcpu_t*);
433extern void ss_error_taking_trap(simcpu_t*, sparcv9_trap_type_t);
434extern bool_t ss_check_user_asi_list(simcpu_t*,int, tvaddr_t, uint64_t*, bool_t, bool_t);
435extern bool_t ss_error_asi_access(simcpu_t*, maccess_t, int, int, bool_t, tvaddr_t, uint64_t);
436extern void check_pending_error_events(simcpu_t*);
437extern void dump_error_event_list(int, error_event_t*);
438extern void dump_error_asi_list(int, error_asi_t*);
439extern void dump_cpu_error_table(int, ss_error_entry_t*);
440extern void dump_cpu_error_reg_table(int, ss_err_reg_t*);
441extern bool_t trigger_error_trap(simcpu_t*);
442extern void check_if_error_event_pending(void);
443extern void ss_inject_error_trap(simcpu_t *, char *, sparcv9_trap_type_t, int);
444
445extern void ss_error_asi_parse(void *procp, bool_t is_reload);
446extern void ss_error_event_parse(void *procp, bool_t is_reload);
447extern void ss_error_reload_file(config_proc_t *cp);
448extern void ss_error_parse_filename(void *procp);
449
450extern void ss_error_dump_active(config_proc_t *cp);
451extern void ss_error_dump_supported(config_proc_t *cp);
452
453extern void ss_error_trap_proc_init(config_proc_t * config_procp);
454extern void ss_error_trap_strand_init(config_proc_t * config_procp, simcpu_t * sp);
455
456#endif /* } ERROR_TRAP_GEN */
457
458#ifdef __cplusplus
459}
460#endif
461
462#endif /* _SS_ERR_TRAP_H */