Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / sam / include / prioque.h
/*
* ========== Copyright Header Begin ==========================================
*
* OpenSPARC T2 Processor File: prioque.h
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
*
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*
* ========== Copyright Header End ============================================
*/
/*
* An `Event_t' is what can be put on our priority queue,
* it contains a delivery time and a callback func with args, the
* delivery time is measured in micro-seconds of simulated time.
*
* Since we've implemented this as an intrisic rather than extrinsic
* (ie its not a <template>) data structure, we're placing the sources
* in the "system/blaze" directory where it is used by workerthreads.
*
* More fruit at the bottom...
*/
#ifndef _PRIOQUE_H
#define _PRIOQUE_H
#include <stdio.h> // printf, ...
#include <stdlib.h> // malloc, ...
#include <string.h> // strcmp, ...
#include <synch.h> // mutex, ...
// in system/util/include/atomic.h
extern int32_t atomic_add_32 (volatile int32_t * variable, const int32_t value);
typedef void (*EventFunc_P)(void * arg1, void * arg2);
typedef void (*UnloadFunc_P)(int64_t stime, void* arg1, void* arg2);
typedef struct {
uint64_t pq_priority; // future delivery time, u-secs
void * pq_cbfunc; // event callback function
void * pq_cbarg1;
void * pq_cbarg2;
void * pq_cbunload; // unload callback function
const char * dbgstring;
int event_id;
int worker_id;
} Event_t;
class EventQue {
private: // ---------- Private Implementation --------------------
mutex_t _lock; //
int _size; // current number of objects in queue (0..2**N-1)
int _limit; // maximum number of objects in heap (2**N-1, eg 63)
Event_t * _heap; // array[1..size], [0] not used.
void _grow(); // double the heap space and limit (initially 63)
void _topdown_update (int index); // revive the heap invariant
void _bottomup_update (int index); // ditto
void _insert (Event_t * object); // copy from *obj into queue
void _delete (Event_t * object); // copy to *obj,
// Low Level Interface, no longer public
void push (Event_t & x) { _insert (&x); }
Event_t & top () { return _heap[1]; }
void pop () { _delete (NULL); }
public: // ---------- Public Interface --------------------------------------
EventQue ();
~EventQue ();
bool empty () { return _size == 0; }
int size () { return _size; }
// NB, we expect only a SINGLE-READER (the cpu sim) so no atomicity needed
// between calling top_time() and get_top().
uint64_t top_time () {
return _heap[1].pq_priority;
}
void get_top (Event_t * p) { // does both COPYOUT and POP ==
mutex_lock (&_lock);
*p = top ();
pop ();
mutex_unlock (&_lock);
if (debug) {
int64_t now = SYSTEM_get_time();
fprintf(stderr,"[%d] DeliverEvent- %d \"%s\", (%20lld)\n",
p->worker_id,
p->event_id,
(p->dbgstring ? p->dbgstring : "unknown"),
now);
}
}
void insert_callback (uint64_t time, // does PUSH ==
void * func, void* arg1, void* arg2,
void * unload,
const char * dbgstring,
int worker_id) {
mutex_lock (&_lock);
if (_size == _limit) _grow();
Event_t * p = &_heap[_size+1];
p->pq_priority = time;
p->pq_cbfunc = func;
p->pq_cbarg1 = arg1;
p->pq_cbarg2 = arg2;
p->pq_cbunload = unload;
p->dbgstring = dbgstring;
p->event_id = atomic_add_32 (&event_id, +1);
p->worker_id = worker_id;
_bottomup_update (_size+1);
_size = _size+1;
mutex_unlock (&_lock);
if (debug) {
int64_t now = SYSTEM_get_time();
fprintf(stderr,"[%d] RegisterEvent-%d \"%s\", (%20lld +%7d)\n",
p->worker_id,
p->event_id,
(dbgstring ? dbgstring : "unknown"),
now,
(int)(time - now));
}
}
void print () { /* for debugging with "events" UI command */
int64_t now = SYSTEM_get_time();
mutex_lock (&_lock);
for (int i = _size; i > 0; i--) {
Event_t * p = &_heap[i];
fprintf(stderr,"[%d] Event-%d: \"%s\", (%20lld +%7d)\n",
p->worker_id,
p->event_id,
(p->dbgstring ? p->dbgstring : "unknown"),
now,
p->pq_priority - now);
}
mutex_unlock (&_lock);
}
void unload () { /* call each event's unload func, */
for (int i = _size; i > 0; i--) { /* but leave the events in the Q !! */
Event_t * p = &_heap[i];
if (p->pq_cbunload)
(*(UnloadFunc_P)p->pq_cbunload) (
p->pq_priority,
p->pq_cbarg1, p->pq_cbarg2);
}
}
void reload () { } /* nada, devices must do this for themselves */
void dump ();
void restore ();
void internaldebug (int x = 1); // to see the internal heap contents
static int debug;
private:
static volatile int event_id; // sequence number
};
/* ----------------------------------------------------------------------------
Notes on Blaze-Event-Queue
This is a queue sorted by what future time each registered event should
happen at, where each event is described by a record:
typedef struct {
uint64_t priority; // event delivery time
CallBackFunc_T * pq_cbfunc; // func to call at that time
void * pq_cbarg1; // args to func
void * pq_cbarg2;
} Event_t;
The units of time are opaque to this package, the cpu and device simulators
must both use consistent units from SYSTEM_get_time, which is microsecs.
Usage:
1) CPU Instruction Loop:
(note, currently each simulated cpu has its own event-queue, see
cpu/include/cpu_impl.h: intrT *head, *tail;)
if (cpu.pstate.ie
&& ! cpu.eventq.empty ()
&& system.synchr_time >= cpu.eventq.top_time ()) {
Event_t tmp;
cpu.eventq.get_top (&tmp); // get = copy And pop.
(*tmp.pq_cbfunc) (tmp.pq_arg1, tmp.pq_arg2); // do callback now.!.
}
2.a) Device Simulator:
call insert_mondo() from a device simulator with all the mondo
information (cntl and data register values) and the time at which
the mondo interrupt should happen.
calling insert_mondo() with time => 0 (or anything less or equal
current time) is equivalent to the old CPU_intr_enq() in the sense
that the trap will be taken at the next cpu instruction loop in
which interrupts are enabled.
2.b) Device Simulator:
call insert_callback() and the device's callback function will be
called at the specified simulated synchronized time.
(NB, it may be hard (impossible?) to dump/restore callbacks, maybe
we shouldn't use this...)
The low level interface loosely follows STL: empty, size, push, top, pop.
The implementation loosely follows that in Cormen/Leiserson/Rivest/Stein's
"Intro to Algorithms", ie the children of heap[i] are heap[2*i] and
heap[2*i+1], with the root at heap[1], but we implement a MIN-Tree with
the smallest (soonest) event time at the top, rather than largest.
Blaze is inherently multithreaded, so we will have to support a single-
reader (cpu-sim) multiple-writer (device-sims) MT-safe implementation. It
is single-reader because (at least in sun4u) a given interrupt is posted
to only one cpu (Solaris programs which cpu each device should interrupt).
Yes I know it is bad to implement an intrinsic data structure rather than
an extrinsic one, and yes I know STL already has a priority queue, but I'm
not so sure of STL's performance (esp since stl.priority_queue is an
adaptor built on top of an stl.vector or stl.dequeue), and I'm not sure
about the thread-safety-ness of STL either.
(The one concern I have about this implementation is the copy semantics of
the mondo data, we're moving a lot of bits around, not only at
push and pop but also in the internal `update' functions. Perhaps we
could queue a pointer to (static) data within the device simulator.
On the other hand the only routines called frequently by the cpu simulator
are "empty()" and "top_time()", and they are inline so ... )
*/
#endif /*_PRIOQUE_H*/