sam-t2/sam/include/prioque.h

/*
* ========== Copyright Header Begin ==========================================
*
* OpenSPARC T2 Processor File: prioque.h
* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
*
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*
* ========== Copyright Header End ============================================
*/
/*
 * An  `Event_t'  is what can be put on our priority queue,
 * it contains a delivery time and a callback func with args, the
 * delivery time is measured in micro-seconds of simulated time.
 *
 * Since we've implemented this as an intrisic rather than extrinsic
 * (ie its not a <template>) data structure, we're placing the sources
 * in the "system/blaze" directory where it is used by workerthreads.
 *
 * More fruit at the bottom...
 */
#ifndef _PRIOQUE_H
#define _PRIOQUE_H

#include <stdio.h>              // printf, ...
#include <stdlib.h>             // malloc, ...
#include <string.h>             // strcmp, ...
#include <synch.h>              // mutex, ...


// in system/util/include/atomic.h
extern int32_t atomic_add_32 (volatile int32_t * variable, const int32_t value);

typedef void (*EventFunc_P)(void * arg1, void * arg2);
typedef void (*UnloadFunc_P)(int64_t stime, void* arg1, void* arg2);


typedef struct {
        uint64_t pq_priority;           // future delivery time, u-secs

        void *   pq_cbfunc;             // event callback function
        void *   pq_cbarg1;
        void *   pq_cbarg2;
        void *   pq_cbunload;           // unload callback function

        const char * dbgstring;
        int          event_id;
        int          worker_id;
} Event_t;


class EventQue {

private: // ---------- Private Implementation --------------------
    mutex_t  _lock;     //
    int      _size;     // current number of objects in queue (0..2**N-1)
    int      _limit;    // maximum number of objects in heap (2**N-1, eg 63)
    Event_t * _heap;    // array[1..size], [0] not used.

    void     _grow();   // double the heap space and limit (initially 63)
    void     _topdown_update (int index);       // revive the heap invariant
    void     _bottomup_update (int index);      // ditto
    void     _insert (Event_t * object);        // copy from *obj into queue
    void     _delete (Event_t * object);        // copy to *obj,

                // Low Level Interface, no longer public
    void   push (Event_t & x)   { _insert (&x); }
    Event_t & top ()    { return _heap[1]; }
    void   pop ()       { _delete (NULL); }


public: // ---------- Public Interface --------------------------------------

    EventQue ();
    ~EventQue ();

    bool  empty ()      { return _size == 0; }
    int   size ()       { return _size; }

    // NB, we expect only a SINGLE-READER (the cpu sim) so no atomicity needed
    // between calling top_time() and get_top().

    uint64_t  top_time () {
        return _heap[1].pq_priority;
    }


    void  get_top (Event_t * p) {               // does both COPYOUT and POP ==
        mutex_lock (&_lock);
        *p = top ();
        pop ();
        mutex_unlock (&_lock);

        if (debug) {
            int64_t now = SYSTEM_get_time();
            fprintf(stderr,"[%d] DeliverEvent- %d \"%s\", (%20lld)\n",
                p->worker_id,
                p->event_id,
                (p->dbgstring ? p->dbgstring : "unknown"),
                now);
        }
    }


   void  insert_callback (uint64_t time,                        // does PUSH ==
                          void * func, void* arg1, void* arg2,
                          void * unload,
                          const char * dbgstring,
                          int worker_id) {
      mutex_lock (&_lock);
        if (_size == _limit) _grow();
        Event_t * p = &_heap[_size+1];

        p->pq_priority = time;

        p->pq_cbfunc = func;
        p->pq_cbarg1 = arg1;
        p->pq_cbarg2 = arg2;
        p->pq_cbunload = unload;
        p->dbgstring = dbgstring;
        p->event_id = atomic_add_32 (&event_id, +1);
        p->worker_id = worker_id;

        _bottomup_update (_size+1);
        _size = _size+1;
      mutex_unlock (&_lock);

      if (debug) {
            int64_t now = SYSTEM_get_time();
            fprintf(stderr,"[%d] RegisterEvent-%d \"%s\", (%20lld +%7d)\n",
                p->worker_id,
                p->event_id,
                (dbgstring ? dbgstring : "unknown"),
                now,
                (int)(time - now));
      }
   }


   void print () {                /* for debugging with "events" UI command */
      int64_t now = SYSTEM_get_time();
      mutex_lock (&_lock);
      for (int i = _size; i > 0; i--) {
         Event_t * p = &_heap[i];
         fprintf(stderr,"[%d] Event-%d: \"%s\", (%20lld +%7d)\n",
                p->worker_id,
                p->event_id,
                (p->dbgstring ? p->dbgstring : "unknown"),
                now,
                p->pq_priority - now);
      }
      mutex_unlock (&_lock);
   }


   void unload () {                      /* call each event's unload func,   */
      for (int i = _size; i > 0; i--) {  /* but leave the events in the Q !! */
         Event_t * p = &_heap[i];
         if (p->pq_cbunload)
             (*(UnloadFunc_P)p->pq_cbunload) (
                p->pq_priority,
                p->pq_cbarg1, p->pq_cbarg2);
      }
   }

   void reload () { }   /* nada, devices must do this for themselves */


   void dump ();

   void restore ();


   void internaldebug (int x = 1);      // to see the internal heap contents

   static int debug;
private:
   static volatile int event_id;        // sequence number

};

/* ----------------------------------------------------------------------------
   Notes on Blaze-Event-Queue

   This is a queue sorted by what future time each registered event should
   happen at, where each event is described by a record:

   typedef struct {
        uint64_t priority;                      // event delivery time

        CallBackFunc_T * pq_cbfunc;             // func to call at that time
        void *    pq_cbarg1;                    // args to func
        void *    pq_cbarg2;
   } Event_t;

   The units of time are opaque to this package, the cpu and device simulators
   must both use consistent units from SYSTEM_get_time, which is microsecs.


   Usage:

   1) CPU Instruction Loop:
        (note, currently each simulated cpu has its own event-queue, see
         cpu/include/cpu_impl.h:  intrT *head, *tail;)

        if (cpu.pstate.ie
            && ! cpu.eventq.empty ()
            && system.synchr_time >=  cpu.eventq.top_time ()) {

          Event_t tmp;
          cpu.eventq.get_top (&tmp);                    // get = copy And pop.
          (*tmp.pq_cbfunc) (tmp.pq_arg1, tmp.pq_arg2);  // do callback now.!.
        }


   2.a) Device Simulator:
        call insert_mondo() from a device simulator with all the mondo
        information (cntl and data register values) and the time at which
        the mondo interrupt should happen.

        calling insert_mondo() with time => 0 (or anything less or equal
        current time) is equivalent to the old CPU_intr_enq() in the sense
        that the trap will be taken at the next cpu instruction loop in
        which interrupts are enabled.

   2.b) Device Simulator:
        call insert_callback() and the device's callback function will be
        called at the specified simulated synchronized time.
        (NB, it may be hard (impossible?) to dump/restore callbacks, maybe
         we shouldn't use this...)


   The low level interface loosely follows STL: empty, size, push, top, pop.

   The implementation loosely follows that in Cormen/Leiserson/Rivest/Stein's
   "Intro to Algorithms", ie the children of heap[i] are heap[2*i] and
   heap[2*i+1], with the root at heap[1], but we implement a MIN-Tree with
   the smallest (soonest) event time at the top, rather than largest.


   Blaze is inherently multithreaded, so we will have to support a single-
   reader (cpu-sim) multiple-writer (device-sims) MT-safe implementation. It
   is single-reader because (at least in sun4u) a given interrupt is posted
   to only one cpu (Solaris programs which cpu each device should interrupt).


   Yes I know it is bad to implement an intrinsic data structure rather than
   an extrinsic one, and yes I know STL already has a priority queue, but I'm
   not so sure of STL's performance (esp since stl.priority_queue is an
   adaptor built on top of an stl.vector or stl.dequeue), and I'm not sure
   about the thread-safety-ness of STL either.

   (The one concern I have about this implementation is the copy semantics of
    the mondo data, we're moving a lot of bits around, not only at
    push and pop but also in the internal `update' functions. Perhaps we
    could queue a pointer to (static) data within the device simulator.
    On the other hand the only routines called frequently by the cpu simulator
    are "empty()" and "top_time()", and they are inline so ... )
*/

#endif /*_PRIOQUE_H*/