• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

sst/elements/genericProc/programs/qthread-1.4/include/qthread_innards.h

00001 #ifndef QTHREAD_INNARDS_H
00002 #define QTHREAD_INNARDS_H
00003 
00004 #ifdef HAVE_CONFIG_H
00005 # include "config.h"
00006 #endif
00007 
00008 #include <math.h>
00009 
00010 #ifdef QTHREAD_HAVE_HWLOC
00011 # include <hwloc.h>
00012 # if (HWLOC_API_VERSION < 0x00010000)
00013 #  error HWLOC version unrecognized
00014 # endif
00015 #endif
00016 
00017 #include "qthread_asserts.h"
00018 #include "qt_atomics.h"
00019 
00020 #if defined(HAVE_UCONTEXT_H) && defined(HAVE_NATIVE_MAKECONTEXT)
00021 # include <ucontext.h>                 /* for ucontext_t */
00022 #else
00023 # include "osx_compat/taskimpl.h"
00024 #endif
00025 
00026 #ifdef QTHREAD_DEBUG
00027 # ifdef HAVE_UNISTD_H
00028 #  include <unistd.h>                  /* for write() */
00029 # endif
00030 # include <stdarg.h>                   /* for va_start and va_end */
00031 #endif
00032 #include <pthread.h>
00033 #include <qt_hash.h>
00034 
00035 extern unsigned int QTHREAD_LOCKING_STRIPES;
00036 
00037 typedef struct qlib_s
00038 {
00039     unsigned int nshepherds;
00040     struct qthread_shepherd_s *shepherds;
00041 
00042     unsigned qthread_stack_size;
00043     unsigned master_stack_size;
00044     unsigned max_stack_size;
00045 
00046     qthread_t *mccoy_thread;    /* free when exiting */
00047 
00048     void *master_stack;
00049     ucontext_t *master_context;
00050 #ifdef QTHREAD_USE_VALGRIND
00051     unsigned int valgrind_masterstack_id;
00052 #endif
00053 
00054     /* assigns a unique thread_id mostly for debugging! */
00055     aligned_t max_thread_id;
00056     QTHREAD_FASTLOCK_TYPE max_thread_id_lock;
00057 
00058     /* round robin scheduler - can probably be smarter */
00059     aligned_t sched_shepherd;
00060     QTHREAD_FASTLOCK_TYPE sched_shepherd_lock;
00061 
00062 #ifdef QTHREAD_HAVE_HWLOC
00063     hwloc_topology_t topology;
00064 #endif
00065 
00066 #if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
00067     QTHREAD_FASTLOCK_TYPE *atomic_locks;
00068 # ifdef QTHREAD_COUNT_THREADS
00069     aligned_t *atomic_stripes;
00070     QTHREAD_FASTLOCK_TYPE *atomic_stripes_locks;
00071 # endif
00072 #endif
00073     /* this is how we manage FEB-type locks
00074      * NOTE: this can be a major bottleneck and we should probably create
00075      * multiple hashtables to improve performance. The current hashing is a bit
00076      * of a hack, but improves the bottleneck a bit
00077      */
00078     qt_hash *locks;
00079 #ifdef QTHREAD_COUNT_THREADS
00080     aligned_t *locks_stripes;
00081 # ifdef QTHREAD_MUTEX_INCREMENT
00082     QTHREAD_FASTLOCK_TYPE *locks_stripes_locks;
00083 # endif
00084 #endif
00085     /* these are separated out for memory reasons: if you can get away with
00086      * simple locks, then you can use a little less memory. Subject to the same
00087      * bottleneck concerns as the above hashtable, though these are slightly
00088      * better at shrinking their critical section. FEBs have more memory
00089      * overhead, though. */
00090     qt_hash *FEBs;
00091 #ifdef QTHREAD_COUNT_THREADS
00092     aligned_t *febs_stripes;
00093 # ifdef QTHREAD_MUTEX_INCREMENT
00094     QTHREAD_FASTLOCK_TYPE *febs_stripes_locks;
00095 # endif
00096 #endif
00097     /* this is for holding syncvar waiters... it's not striped because there
00098      * isn't supposed to be much contention for this hash table */
00099     qt_hash syncvars;
00100 }     *qlib_t;
00101 
00102 #ifndef QTHREAD_SST_PRIMITIVES
00103 extern qlib_t qlib;
00104 #endif
00105 
00106 /* These are the internal functions that futurelib should be allowed to get at */
00107 unsigned int qthread_isfuture(const qthread_t * t);
00108 
00109 void qthread_assertfuture(qthread_t * t);
00110 
00111 void qthread_assertnotfuture(qthread_t * t);
00112 
00113 int qthread_fork_future_to(const qthread_t * me, const qthread_f f,
00114                            const void *arg, aligned_t * ret,
00115                            const qthread_shepherd_id_t shepherd);
00116 unsigned int qthread_internal_shep_to_node(const qthread_shepherd_id_t shep);
00117 
00118 #define QTHREAD_NO_NODE ((unsigned int)(-1))
00119 #ifdef QTHREAD_SST_PRIMITIVES
00120 # define qthread_shepherd_count() PIM_readSpecial(PIM_CMD_LOC_COUNT)
00121 # define qthread_fork_syncvar_future_to(me, f, arg, ret, shep) qthread_fork_syncvar_to(f, arg, ret, shep)
00122 #else
00123 # define qthread_shepherd_count() (qlib->nshepherds)
00124 int qthread_fork_syncvar_future_to(const qthread_t * me, const qthread_f f,
00125                            const void *arg, syncvar_t * ret,
00126                            const qthread_shepherd_id_t shepherd);
00127 #endif
00128 
00129 /* internal initialization functions */
00130 void qt_feb_barrier_internal_init(void);
00131 void qthread_internal_cleanup(void (*function)(void));
00132 
00133 /* for debugging */
00134 #ifdef QTHREAD_DEBUG
00135 enum qthread_debug_levels
00136 { NONE = 0,
00137     THREAD_BEHAVIOR, LOCK_BEHAVIOR, ALL_CALLS, ALL_FUNCTIONS,
00138     THREAD_DETAILS, LOCK_DETAILS, ALL_DETAILS
00139 };
00140 
00141 extern enum qthread_debug_levels debuglevel;
00142 
00143 extern QTHREAD_FASTLOCK_TYPE output_lock;
00144 
00145 #ifdef HAVE_GNU_VAMACROS
00146 #define qthread_debug(level, format, args...) qthread_debug_(level, "%s(%u): " format, __FUNCTION__, __LINE__, ##args)
00147 static QINLINE void qthread_debug_(int level, char *format, ...)
00148 #elif defined( HAVE_C99_VAMACROS )
00149 #define qthread_debug(level, format, ...) qthread_debug_(level, "%s(%u): " format, __FUNCTION__, __LINE__, __VA_ARGS__)
00150 static QINLINE void qthread_debug_(int level, char *format, ...)
00151 #else
00152 static QINLINE void qthread_debug(int level, char *format, ...)
00153 #endif
00154 {                                      /*{{{ */
00155     va_list args;
00156 
00157     if (level <= debuglevel || level == 0) {
00158         static char buf[1024];  /* protected by the output_lock */
00159         char *head = buf;
00160         char ch;
00161 
00162         QTHREAD_FASTLOCK_LOCK(&output_lock);
00163 
00164         while (write(2, "QDEBUG: ", 8) != 8) ;
00165 
00166         va_start(args, format);
00167         /* avoiding the obvious method, to save on memory
00168          * vfprintf(stderr, format, args); */
00169         while ((ch = *format++)) {
00170             assert(head < (buf + 1024));
00171             if (ch == '%') {
00172                 ch = *format++;
00173                 switch (ch) {
00174                     case 's':
00175                     {
00176                         char *str = va_arg(args, char *);
00177 
00178                         qassert(write(2, buf, head - buf), head - buf);
00179                         head = buf;
00180                         qassert(write(2, str, strlen(str)), strlen(str));
00181                         break;
00182                     }
00183                     case 'p':
00184                     case 'x':
00185                         *head++ = '0';
00186                         *head++ = 'x';
00187                     case 'u':
00188                     case 'd':
00189                     case 'i':
00190                     {
00191                         uintptr_t num;
00192                         unsigned base;
00193 
00194                         num = va_arg(args, uintptr_t);
00195                         base = (ch == 'p' || ch == 'x') ? 16 : 10;
00196                         if (!num) {
00197                             *head++ = '0';
00198                         } else {
00199                             /* count places */
00200                             unsigned places = 0;
00201                             uintptr_t tmpnum = num;
00202 
00203                             /* yes, this is dumb, but its guaranteed to take
00204                              * less than 10 iterations on 32-bit numbers and
00205                              * doesn't involve floating point */
00206                             while (tmpnum >= base) {
00207                                 tmpnum /= base;
00208                                 places ++;
00209                             }
00210                             head += places;
00211                             places = 0;
00212                             while (num >= base) {
00213                                 uintptr_t tmp = num % base;
00214                                 *(head - places) =
00215                                     (tmp <
00216                                      10) ? ('0' + tmp) : ('a' + tmp - 10);
00217                                 num /= base;
00218                                 places++;
00219                             }
00220                             num %= base;
00221                             *(head - places) =
00222                                 (num < 10) ? ('0' + num) : ('a' + num - 10);
00223                             head++;
00224                         }
00225                     }
00226                         break;
00227                     default:
00228                         *head++ = '%';
00229                         *head++ = ch;
00230                 }
00231             } else {
00232                 *head++ = ch;
00233             }
00234         }
00235         /* XXX: not checking for extra long values of head */
00236         qassert(write(2, buf, head - buf), head - buf);
00237         va_end(args);
00238         /*fflush(stderr); */
00239 
00240         QTHREAD_FASTLOCK_UNLOCK(&output_lock);
00241     }
00242 }                                      /*}}} */
00243 #else
00244 #define qthread_debug(...) do{ }while(0)
00245 #endif
00246 
00247 #endif

Generated on Fri Oct 22 2010 11:02:24 for SST by  doxygen 1.7.1