Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | /* |
2 | * ========== Copyright Header Begin ========================================== | |
3 | * | |
4 | * OpenSPARC T2 Processor File: prioque.h | |
5 | * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. | |
6 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. | |
7 | * | |
8 | * The above named program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public | |
10 | * License version 2 as published by the Free Software Foundation. | |
11 | * | |
12 | * The above named program is distributed in the hope that it will be | |
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public | |
18 | * License along with this work; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
20 | * | |
21 | * ========== Copyright Header End ============================================ | |
22 | */ | |
23 | /* | |
24 | * An `Event_t' is what can be put on our priority queue, | |
25 | * it contains a delivery time and a callback func with args, the | |
26 | * delivery time is measured in micro-seconds of simulated time. | |
27 | * | |
28 | * Since we've implemented this as an intrisic rather than extrinsic | |
29 | * (ie its not a <template>) data structure, we're placing the sources | |
30 | * in the "system/blaze" directory where it is used by workerthreads. | |
31 | * | |
32 | * More fruit at the bottom... | |
33 | */ | |
34 | #ifndef _PRIOQUE_H | |
35 | #define _PRIOQUE_H | |
36 | ||
37 | #include <stdio.h> // printf, ... | |
38 | #include <stdlib.h> // malloc, ... | |
39 | #include <string.h> // strcmp, ... | |
40 | #include <synch.h> // mutex, ... | |
41 | ||
42 | ||
43 | // in system/util/include/atomic.h | |
44 | extern int32_t atomic_add_32 (volatile int32_t * variable, const int32_t value); | |
45 | ||
46 | typedef void (*EventFunc_P)(void * arg1, void * arg2); | |
47 | typedef void (*UnloadFunc_P)(int64_t stime, void* arg1, void* arg2); | |
48 | ||
49 | ||
50 | typedef struct { | |
51 | uint64_t pq_priority; // future delivery time, u-secs | |
52 | ||
53 | void * pq_cbfunc; // event callback function | |
54 | void * pq_cbarg1; | |
55 | void * pq_cbarg2; | |
56 | void * pq_cbunload; // unload callback function | |
57 | ||
58 | const char * dbgstring; | |
59 | int event_id; | |
60 | int worker_id; | |
61 | } Event_t; | |
62 | ||
63 | ||
64 | ||
65 | class EventQue { | |
66 | ||
67 | private: // ---------- Private Implementation -------------------- | |
68 | mutex_t _lock; // | |
69 | int _size; // current number of objects in queue (0..2**N-1) | |
70 | int _limit; // maximum number of objects in heap (2**N-1, eg 63) | |
71 | Event_t * _heap; // array[1..size], [0] not used. | |
72 | ||
73 | void _grow(); // double the heap space and limit (initially 63) | |
74 | void _topdown_update (int index); // revive the heap invariant | |
75 | void _bottomup_update (int index); // ditto | |
76 | void _insert (Event_t * object); // copy from *obj into queue | |
77 | void _delete (Event_t * object); // copy to *obj, | |
78 | ||
79 | // Low Level Interface, no longer public | |
80 | void push (Event_t & x) { _insert (&x); } | |
81 | Event_t & top () { return _heap[1]; } | |
82 | void pop () { _delete (NULL); } | |
83 | ||
84 | ||
85 | ||
86 | public: // ---------- Public Interface -------------------------------------- | |
87 | ||
88 | EventQue (); | |
89 | ~EventQue (); | |
90 | ||
91 | bool empty () { return _size == 0; } | |
92 | int size () { return _size; } | |
93 | ||
94 | // NB, we expect only a SINGLE-READER (the cpu sim) so no atomicity needed | |
95 | // between calling top_time() and get_top(). | |
96 | ||
97 | uint64_t top_time () { | |
98 | return _heap[1].pq_priority; | |
99 | } | |
100 | ||
101 | ||
102 | ||
103 | void get_top (Event_t * p) { // does both COPYOUT and POP == | |
104 | mutex_lock (&_lock); | |
105 | *p = top (); | |
106 | pop (); | |
107 | mutex_unlock (&_lock); | |
108 | ||
109 | if (debug) { | |
110 | int64_t now = SYSTEM_get_time(); | |
111 | fprintf(stderr,"[%d] DeliverEvent- %d \"%s\", (%20lld)\n", | |
112 | p->worker_id, | |
113 | p->event_id, | |
114 | (p->dbgstring ? p->dbgstring : "unknown"), | |
115 | now); | |
116 | } | |
117 | } | |
118 | ||
119 | ||
120 | void insert_callback (uint64_t time, // does PUSH == | |
121 | void * func, void* arg1, void* arg2, | |
122 | void * unload, | |
123 | const char * dbgstring, | |
124 | int worker_id) { | |
125 | mutex_lock (&_lock); | |
126 | if (_size == _limit) _grow(); | |
127 | Event_t * p = &_heap[_size+1]; | |
128 | ||
129 | p->pq_priority = time; | |
130 | ||
131 | p->pq_cbfunc = func; | |
132 | p->pq_cbarg1 = arg1; | |
133 | p->pq_cbarg2 = arg2; | |
134 | p->pq_cbunload = unload; | |
135 | p->dbgstring = dbgstring; | |
136 | p->event_id = atomic_add_32 (&event_id, +1); | |
137 | p->worker_id = worker_id; | |
138 | ||
139 | _bottomup_update (_size+1); | |
140 | _size = _size+1; | |
141 | mutex_unlock (&_lock); | |
142 | ||
143 | if (debug) { | |
144 | int64_t now = SYSTEM_get_time(); | |
145 | fprintf(stderr,"[%d] RegisterEvent-%d \"%s\", (%20lld +%7d)\n", | |
146 | p->worker_id, | |
147 | p->event_id, | |
148 | (dbgstring ? dbgstring : "unknown"), | |
149 | now, | |
150 | (int)(time - now)); | |
151 | } | |
152 | } | |
153 | ||
154 | ||
155 | void print () { /* for debugging with "events" UI command */ | |
156 | int64_t now = SYSTEM_get_time(); | |
157 | mutex_lock (&_lock); | |
158 | for (int i = _size; i > 0; i--) { | |
159 | Event_t * p = &_heap[i]; | |
160 | fprintf(stderr,"[%d] Event-%d: \"%s\", (%20lld +%7d)\n", | |
161 | p->worker_id, | |
162 | p->event_id, | |
163 | (p->dbgstring ? p->dbgstring : "unknown"), | |
164 | now, | |
165 | p->pq_priority - now); | |
166 | } | |
167 | mutex_unlock (&_lock); | |
168 | } | |
169 | ||
170 | ||
171 | void unload () { /* call each event's unload func, */ | |
172 | for (int i = _size; i > 0; i--) { /* but leave the events in the Q !! */ | |
173 | Event_t * p = &_heap[i]; | |
174 | if (p->pq_cbunload) | |
175 | (*(UnloadFunc_P)p->pq_cbunload) ( | |
176 | p->pq_priority, | |
177 | p->pq_cbarg1, p->pq_cbarg2); | |
178 | } | |
179 | } | |
180 | ||
181 | void reload () { } /* nada, devices must do this for themselves */ | |
182 | ||
183 | ||
184 | ||
185 | void dump (); | |
186 | ||
187 | void restore (); | |
188 | ||
189 | ||
190 | void internaldebug (int x = 1); // to see the internal heap contents | |
191 | ||
192 | static int debug; | |
193 | private: | |
194 | static volatile int event_id; // sequence number | |
195 | ||
196 | }; | |
197 | ||
198 | /* ---------------------------------------------------------------------------- | |
199 | Notes on Blaze-Event-Queue | |
200 | ||
201 | This is a queue sorted by what future time each registered event should | |
202 | happen at, where each event is described by a record: | |
203 | ||
204 | typedef struct { | |
205 | uint64_t priority; // event delivery time | |
206 | ||
207 | CallBackFunc_T * pq_cbfunc; // func to call at that time | |
208 | void * pq_cbarg1; // args to func | |
209 | void * pq_cbarg2; | |
210 | } Event_t; | |
211 | ||
212 | The units of time are opaque to this package, the cpu and device simulators | |
213 | must both use consistent units from SYSTEM_get_time, which is microsecs. | |
214 | ||
215 | ||
216 | Usage: | |
217 | ||
218 | 1) CPU Instruction Loop: | |
219 | (note, currently each simulated cpu has its own event-queue, see | |
220 | cpu/include/cpu_impl.h: intrT *head, *tail;) | |
221 | ||
222 | if (cpu.pstate.ie | |
223 | && ! cpu.eventq.empty () | |
224 | && system.synchr_time >= cpu.eventq.top_time ()) { | |
225 | ||
226 | Event_t tmp; | |
227 | cpu.eventq.get_top (&tmp); // get = copy And pop. | |
228 | (*tmp.pq_cbfunc) (tmp.pq_arg1, tmp.pq_arg2); // do callback now.!. | |
229 | } | |
230 | ||
231 | ||
232 | 2.a) Device Simulator: | |
233 | call insert_mondo() from a device simulator with all the mondo | |
234 | information (cntl and data register values) and the time at which | |
235 | the mondo interrupt should happen. | |
236 | ||
237 | calling insert_mondo() with time => 0 (or anything less or equal | |
238 | current time) is equivalent to the old CPU_intr_enq() in the sense | |
239 | that the trap will be taken at the next cpu instruction loop in | |
240 | which interrupts are enabled. | |
241 | ||
242 | 2.b) Device Simulator: | |
243 | call insert_callback() and the device's callback function will be | |
244 | called at the specified simulated synchronized time. | |
245 | (NB, it may be hard (impossible?) to dump/restore callbacks, maybe | |
246 | we shouldn't use this...) | |
247 | ||
248 | ||
249 | The low level interface loosely follows STL: empty, size, push, top, pop. | |
250 | ||
251 | The implementation loosely follows that in Cormen/Leiserson/Rivest/Stein's | |
252 | "Intro to Algorithms", ie the children of heap[i] are heap[2*i] and | |
253 | heap[2*i+1], with the root at heap[1], but we implement a MIN-Tree with | |
254 | the smallest (soonest) event time at the top, rather than largest. | |
255 | ||
256 | ||
257 | Blaze is inherently multithreaded, so we will have to support a single- | |
258 | reader (cpu-sim) multiple-writer (device-sims) MT-safe implementation. It | |
259 | is single-reader because (at least in sun4u) a given interrupt is posted | |
260 | to only one cpu (Solaris programs which cpu each device should interrupt). | |
261 | ||
262 | ||
263 | Yes I know it is bad to implement an intrinsic data structure rather than | |
264 | an extrinsic one, and yes I know STL already has a priority queue, but I'm | |
265 | not so sure of STL's performance (esp since stl.priority_queue is an | |
266 | adaptor built on top of an stl.vector or stl.dequeue), and I'm not sure | |
267 | about the thread-safety-ness of STL either. | |
268 | ||
269 | (The one concern I have about this implementation is the copy semantics of | |
270 | the mondo data, we're moving a lot of bits around, not only at | |
271 | push and pop but also in the internal `update' functions. Perhaps we | |
272 | could queue a pointer to (static) data within the device simulator. | |
273 | On the other hand the only routines called frequently by the cpu simulator | |
274 | are "empty()" and "top_time()", and they are inline so ... ) | |
275 | */ | |
276 | ||
277 | #endif /*_PRIOQUE_H*/ |