00001 #ifndef _QTHREAD_SST_H_
00002 #define _QTHREAD_SST_H_
00003
00004 #include <errno.h>
00005
00006 #include <qthread/qthread-int.h>
00007 #include <qthread/common.h>
00008
00009 #include <string.h>
00010 #include <ppcPimCalls.h>
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifdef __cplusplus
00026 #define Q_STARTCXX extern "C" {
00027 #define Q_ENDCXX }
00028 #else
00029 #define Q_STARTCXX
00030 #define Q_ENDCXX
00031 #endif
00032
00033 Q_STARTCXX
00034 typedef int qthread_t;
00035 typedef unsigned short qthread_shepherd_id_t;
00036
00037
00038 typedef aligned_t(*qthread_f) (qthread_t * me, void *arg);
00039
00040
00041
00042
00043 #define qthread_init(x) PIM_quickPrint(0x5ca1ab1e,x,PIM_readSpecial(PIM_CMD_LOC_COUNT))
00044 #define qthread_initialize() PIM_quickPrint(0x5ca1ab1e,0,PIM_readSpecial(PIM_CMD_LOC_COUNT))
00045
00046
00047 #define qthread_finalize() PIM_quickPrint(0xaced,0,0)
00048
00049
00050 #define qthread_yield(x)
00051
00052
00053 #define qthread_disable_shepherd(x)
00054 #define qthread_enable_shepherd(x)
00055
00056
00057
00058 static inline qthread_t *qthread_self(void) {
00059 return (qthread_t*)PIM_readSpecial(PIM_CMD_THREAD_SEQ);
00060 }
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074 #define qthread_fork(f, arg, ret) qthread_fork_to((f), (arg), (ret), NO_SHEPHERD)
00075 #define qthread_fork_syncvar(f, arg, ret) qthread_fork_to((f), (arg), (aligned_t*)(ret), NO_SHEPHERD)
00076 int qthread_fork_to(const qthread_f f, const void *arg, aligned_t * ret,
00077 const qthread_shepherd_id_t shepherd);
00078 #define qthread_fork_syncvar_to(f, arg, ret, shep) qthread_fork_to((f), (arg), (aligned_t*)(ret), (shep))
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088 #define qthread_prepare(f, arg, ret) qthread_prepare_for((f), (arg), (ret), NO_SHEPHERD)
00089 qthread_t *qthread_prepare_for(const qthread_f f, const void *arg,
00090 aligned_t * ret,
00091 const qthread_shepherd_id_t shepherd);
00092
00093 #define qthread_schedule(t) qthread_schedule_on(t, NO_SHEPHERD)
00094 #define qthread_schedule_on(t,shep) PIM_startStoppedThread((int)t,(int)shep)
00095
00096 static inline
00097 unsigned qthread_migrate_to(const qthread_t *me, const int shepherd)
00098 {
00099 __asm__ __volatile__ (
00100 "mr r3, %1\n\t"
00101 "li r0, %0\n\t"
00102 "sc"
00103 ::"M"(SS_PIM_MOVE_TO),
00104 "r"(shepherd)
00105 :"r0","r3");
00106 return 0;
00107 }
00108
00109
00110
00111 static inline
00112 unsigned qthread_id(const qthread_t * t)
00113 {
00114 return PIM_readSpecial(PIM_CMD_THREAD_SEQ);
00115 }
00116 static inline
00117 qthread_shepherd_id_t qthread_shep(const qthread_t * t)
00118 {
00119 return PIM_readSpecial(PIM_CMD_PROC_NUM);
00120 }
00121 static inline
00122 size_t qthread_stackleft(const qthread_t * t)
00123 {
00124 return 0;
00125 }
00126 static inline
00127 aligned_t *qthread_retloc(const qthread_t * t)
00128 {
00129 return 0;
00130 }
00131 static inline
00132 int qthread_shep_ok(const qthread_t * t)
00133 {
00134 return 1;
00135 }
00136
00137
00138 static inline
00139 int qthread_distance(const qthread_shepherd_id_t src,
00140 const qthread_shepherd_id_t dest)
00141 {
00142 return 0;
00143 }
00144
00145
00146 static inline
00147 const qthread_shepherd_id_t *qthread_sorted_sheps(const qthread_t * t)
00148 {
00149 return NULL;
00150 }
00151 static inline
00152 const qthread_shepherd_id_t *qthread_sorted_sheps_remote(const
00153 qthread_shepherd_id_t
00154 src)
00155 {
00156 return NULL;
00157 }
00158
00159 #define qthread_num_shepherds() ((qthread_shepherd_id_t) PIM_readSpecial(PIM_CMD_LOC_COUNT))
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178 static inline int qthread_feb_status(const aligned_t *addr)
00179 {
00180 return PIM_feb_is_full((unsigned int*)addr);
00181 }
00182 #define qthread_syncvar_status(addr) qthread_feb_status((const aligned_t*)addr)
00183
00184
00185
00186 static inline
00187 int qthread_empty(qthread_t * me, const aligned_t *dest)
00188 {
00189 PIM_feb_empty((unsigned int*)dest);
00190 return 0;
00191 }
00192 #define qthread_syncvar_empty(me, dest) qthread_empty((me), (aligned_t*)(dest))
00193 static inline
00194 int qthread_fill(qthread_t * me, const aligned_t *dest)
00195 {
00196 PIM_feb_fill((unsigned int*)dest);
00197 return 0;
00198 }
00199 #define qthread_syncvar_fill(me, dest) qthread_fill((me), (aligned_t*)(dest))
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215 static inline
00216 int qthread_writeEF(qthread_t * me, aligned_t * const dest,
00217 const aligned_t * const src)
00218 {
00219 PIM_feb_writeef(dest, *src);
00220 return 0;
00221 }
00222 static inline
00223 int qthread_writeEF_const(qthread_t * me, aligned_t * const dest,
00224 const aligned_t src)
00225 {
00226 PIM_feb_writeef(dest, src);
00227 return 0;
00228 }
00229 #define qthread_syncvar_writeEF(me, dest, src) qthread_writeEF((me), (aligned_t*)(dest), (aligned_t*)(src))
00230 #define qthread_syncvar_writeEF_const(me, dest, src) qthread_writeEF_const((me), (aligned_t*)(dest), (aligned_t*)(src))
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246 static inline int qthread_writeF(qthread_t * me, aligned_t * const dest,
00247 const aligned_t * const src)
00248 {
00249 *dest = *src;
00250 PIM_feb_fill(dest);
00251 return 0;
00252 }
00253 static inline int qthread_writeF_const(qthread_t * me, aligned_t * const dest,
00254 const aligned_t src)
00255 {
00256 *dest = src;
00257 PIM_feb_fill(dest);
00258 return 0;
00259 }
00260 #define qthread_syncvar_writeF(me, dest, src) qthread_writeF((me), (aligned_t*)(dest), (aligned_t*)(src))
00261 #define qthread_syncvar_writeF_const(me, dest, src) qthread_writeF_const((me), (aligned_t*)(dest), (aligned_t)(src))
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277 static inline
00278 int qthread_readFF(qthread_t * me, aligned_t * const dest,
00279 const aligned_t * const src)
00280 {
00281 if (dest != NULL && dest != src) {
00282 *dest = PIM_feb_readff((aligned_t * const)src);
00283 } else {
00284 PIM_feb_readff((aligned_t * const)src);
00285 }
00286 return 0;
00287 }
00288 #define qthread_syncvar_readFF(me, dest, src) qthread_readFF((me), (aligned_t*)(dest), (aligned_t*)(src))
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304 static inline
00305 int qthread_readFE(qthread_t * me, aligned_t * restrict const dest,
00306 const aligned_t * restrict const src)
00307 {
00308 if (dest != NULL && dest != src) {
00309 *dest = PIM_feb_readfe((aligned_t * const)src);
00310 } else {
00311 PIM_feb_readfe((aligned_t * const)src);
00312 }
00313 return 0;
00314 }
00315 #define qthread_syncvar_readFE(me, dest, src) qthread_readFE((me), (aligned_t*)(dest), (aligned_t*)(src))
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327 static inline
00328 int qthread_lock(qthread_t * me, const aligned_t * a)
00329 {
00330 PIM_feb_lock((aligned_t * const)a);
00331 return 0;
00332 }
00333 static inline
00334 int qthread_unlock(qthread_t * me, const aligned_t * a)
00335 {
00336 PIM_feb_unlock((aligned_t * const)a);
00337 return 0;
00338 }
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348 static QINLINE float qthread_fincr(volatile float *operand, const float incr)
00349 {
00350 #if defined(HAVE_GCC_INLINE_ASSEMBLY)
00351 # if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
00352 union
00353 {
00354 float f;
00355 uint32_t i;
00356 } retval;
00357 register float incremented_value;
00358 register uint32_t scratch_int;
00359 uint32_t conversion_memory = conversion_memory;
00360 __asm__ __volatile__("1:\n\t"
00361 "lwarx %0,0,%4\n\t"
00362
00363 "stw %0,%2\n\t"
00364 "lfs %1,%2\n\t"
00365
00366 "fadds %1,%1,%5\n\t"
00367
00368 "stfs %1,%2\n\t"
00369 "lwz %3,%2\n\t"
00370
00371 "stwcx. %3,0,%4\n\t"
00372 "bne- 1b\n\t"
00373 "isync"
00374 :"=&b" (retval.i),
00375 "=&f" (incremented_value),
00376 "=m" (conversion_memory),
00377 "=&r" (scratch_int)
00378 :"r" (operand),
00379 "f" (incr)
00380 :"cc", "memory");
00381
00382 return retval.f;
00383 # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
00384 union
00385 {
00386 float f;
00387 uint32_t i;
00388 } oldval, newval;
00389
00390
00391 do {
00392
00393
00394
00395
00396
00397
00398
00399
00400 oldval.f = *operand;
00401 newval.f = oldval.f + incr;
00402 #if defined(__SUNPRO_CC)
00403 asm volatile
00404 #else
00405 __asm__ __volatile__
00406 #endif
00407 ("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
00408 "cas [%1], %2, %0"
00409 :"=&r"(newval.i)
00410 :"r" (operand), "r"(oldval.i), "0"(newval.i)
00411 :"cc", "memory");
00412 } while (oldval.i != newval.i);
00413 return oldval.f;
00414 # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
00415 union
00416 {
00417 float f;
00418 uint32_t i;
00419 } oldval, newval, res;
00420
00421 do {
00422 oldval.f = *operand;
00423 newval.f = oldval.f + incr;
00424 __asm__ __volatile__("mov ar.ccv=%0;;"::"rO"(oldval.i));
00425 __asm__ __volatile__("cmpxchg4.acq %0=[%1],%2,ar.ccv"
00426 :"=r"(res.i)
00427 :"r" (operand), "r"(newval.i)
00428 :"memory");
00429 } while (res.i != oldval.i);
00430 return oldval.f;
00431 # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
00432 union
00433 {
00434 float f;
00435 uint32_t i;
00436 } oldval, newval, retval;
00437
00438 do {
00439 oldval.f = *operand;
00440 newval.f = oldval.f + incr;
00441 __asm__ __volatile__("lock; cmpxchg %1, (%2)"
00442 :"=a"(retval.i)
00443 :"r" (newval.i),
00444 "r"(operand),
00445 "0"(oldval.i)
00446 :"cc", "memory");
00447 } while (retval.i != oldval.i);
00448 return oldval.f;
00449 # endif
00450 #elif defined (QTHREAD_MUTEX_INCREMENT)
00451
00452 float retval;
00453 qthread_t *me = qthread_self();
00454
00455 qthread_lock(me, (aligned_t *) operand);
00456 retval = *operand;
00457 *operand += incr;
00458 qthread_unlock(me, (aligned_t *) operand);
00459 return retval;
00460 #else
00461 #error "Neither atomic nor mutex increment enabled; needed for qthread_fincr"
00462 #endif
00463 }
00464
00465 static QINLINE double qthread_dincr(volatile double *operand,
00466 const double incr)
00467 {
00468 #if defined(HAVE_GCC_INLINE_ASSEMBLY) && (QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32)
00469 #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
00470 register uint64_t scratch_int;
00471 register double incremented_value;
00472 union
00473 {
00474 uint64_t i;
00475 double d;
00476 } retval;
00477 uint64_t conversion_memory = conversion_memory;
00478 __asm__ __volatile__("1:\n\t"
00479 "ldarx %0,0,%4\n\t"
00480
00481 "std %0,%2\n\t"
00482 "lfd %1,%2\n\t"
00483
00484 "fadd %1,%1,%5\n\t"
00485
00486 "stfd %1,%2\n\t"
00487 "ld %3,%2\n\t"
00488
00489 "stdcx. %3,0,%4\n\t"
00490 "bne- 1b\n\t"
00491 "isync"
00492 :"=&b" (retval.i),
00493 "=&f" (incremented_value),
00494 "=m" (conversion_memory),
00495 "=r&" (scratch_int)
00496 :"r" (operand),
00497 "f" (incr)
00498 :"cc", "memory");
00499
00500 return retval.d;
00501 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
00502 double oldval, newval;
00503
00504 newval = *operand;
00505 do {
00506
00507
00508 register uint64_t tmp1 = tmp1;
00509 register uint64_t tmp2 = tmp2;
00510
00511 oldval = newval;
00512 newval = oldval + incr;
00513 __asm__ __volatile__("ldx %0, %1\n\t"
00514 "ldx %4, %2\n\t"
00515 "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
00516 "casx [%3], %2, %1\n\t"
00517 "stx %1, %0"
00518
00519
00520 :"=m" (newval), "=&h"(tmp1), "=&h"(tmp2)
00521 :"r" (operand), "m"(oldval)
00522 :"memory");
00523 } while (oldval != newval);
00524 return oldval;
00525 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
00526 union
00527 {
00528 uint64_t i;
00529 double d;
00530 } oldval, newval;
00531
00532
00533 do {
00534
00535
00536
00537
00538
00539
00540
00541
00542 oldval.d = *operand;
00543 newval.d = oldval.d + incr;
00544 #if defined(__SUNPRO_CC)
00545 asm volatile
00546 #else
00547 __asm__ __volatile__
00548 #endif
00549 ("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
00550 "casx [%1], %2, %0"
00551 :"=&r"(newval.i)
00552 :"r"(operand), "r"(oldval.i), "0"(newval.i)
00553 :"memory");
00554 } while (oldval.d != newval.d);
00555 return oldval.d;
00556 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
00557 union
00558 {
00559 uint64_t i;
00560 double d;
00561 } oldval, newval, res;
00562
00563 do {
00564 oldval.d = *operand;
00565 newval.d = oldval.d + incr;
00566 __asm__ __volatile__("mov ar.ccv=%0;;"::"rO"(oldval.i));
00567 __asm__ __volatile__("cmpxchg8.acq %0=[%1],%2,ar.ccv"
00568 :"=r"(res.i)
00569 :"r" (operand), "r"(newval.i)
00570 :"memory");
00571 } while (res.i != oldval.i);
00572 return oldval.d;
00573
00574 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64)
00575 union
00576 {
00577 double d;
00578 uint64_t i;
00579 } oldval, newval, retval;
00580
00581 do {
00582 oldval.d = *operand;
00583 newval.d = oldval.d + incr;
00584 __asm__ __volatile__("lock; cmpxchgq %1, (%2)"
00585 :"=a"(retval.i)
00586 :"r"(newval.i), "r"(operand),
00587 "0"(oldval.i)
00588 :"memory");
00589 } while (retval.i != oldval.i);
00590 return oldval.d;
00591
00592 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
00593 union
00594 {
00595 double d;
00596 uint64_t i;
00597 struct
00598 {
00599
00600
00601 uint32_t l;
00602 uint32_t h;
00603 } s;
00604 } oldval, newval;
00605 register char test;
00606
00607 do {
00608 #ifdef __PIC__
00609
00610 # define QTHREAD_PIC_PREFIX "xchg %%ebx, %4\n\t"
00611
00612 # define QTHREAD_PIC_SUFFIX "\n\txchg %%ebx, %4"
00613 # define QTHREAD_PIC_REG_4 "r"
00614 #else
00615 # define QTHREAD_PIC_PREFIX
00616 # define QTHREAD_PIC_SUFFIX
00617 # define QTHREAD_PIC_REG_4 "b"
00618 #endif
00619 oldval.d = *operand;
00620 newval.d = oldval.d + incr;
00621
00622
00623
00624
00625
00626
00627
00628
00629
00630
00631
00632
00633
00634
00635
00636
00637
00638
00639
00640 __asm__ __volatile__(QTHREAD_PIC_PREFIX
00641 "lock; cmpxchg8b (%1)\n\t"
00642 "setne %0"
00643 QTHREAD_PIC_SUFFIX
00644 :"=q"(test)
00645 :"r"(operand),
00646 "a"(oldval.s.l),
00647 "d"(oldval.s.h),
00648 QTHREAD_PIC_REG_4(newval.s.l),
00649 "c"(newval.s.h)
00650 :"memory");
00651 } while (test);
00652 return oldval.d;
00653
00654 #else
00655 #error "Unimplemented assembly architecture"
00656 #endif
00657 #elif defined (QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
00658
00659 double retval;
00660 qthread_t *me = qthread_self();
00661
00662 qthread_lock(me, (aligned_t *) operand);
00663 retval = *operand;
00664 *operand += incr;
00665 qthread_unlock(me, (aligned_t *) operand);
00666 return retval;
00667 #else
00668 #error "Neither atomic nor mutex increment enabled; needed for qthread_dincr"
00669 #endif
00670 }
00671
00672 static QINLINE uint32_t qthread_incr32(volatile uint32_t * operand,
00673 const int incr)
00674 {
00675 #if defined(HAVE_GCC_INLINE_ASSEMBLY)
00676
00677 #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || \
00678 (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
00679 uint32_t retval;
00680 register unsigned int incrd = incrd;
00681 __asm__ __volatile__("1:\tlwarx %0,0,%1\n\t"
00682 "add %3,%0,%2\n\t"
00683 "stwcx. %3,0,%1\n\t"
00684 "bne- 1b\n\t"
00685 "isync"
00686 :"=&b" (retval)
00687 :"r" (operand), "r"(incr), "r"(incrd)
00688 :"cc", "memory");
00689
00690 return retval;
00691 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) || \
00692 (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
00693 register uint32_t oldval, newval;
00694
00695
00696 do {
00697
00698
00699
00700
00701
00702
00703
00704
00705 oldval = *operand;
00706 newval = oldval + incr;
00707
00708
00709 #if defined(__SUNPRO_CC)
00710 asm volatile
00711 #else
00712 __asm__ __volatile__
00713 #endif
00714 ("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
00715 "cas [%1] , %2, %0"
00716 :"=&r" (newval)
00717 :"r" (operand), "r"(oldval), "0"(newval)
00718 :"cc", "memory");
00719 } while (oldval != newval);
00720 return oldval;
00721 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
00722 uint32_t res;
00723
00724 if (incr == 1) {
00725 asm volatile ("fetchadd4.rel %0=[%1],1"
00726 :"=r" (res)
00727 :"r" (operand));
00728 } else {
00729 uint32_t old, newval;
00730
00731 do {
00732 old = *operand;
00733 newval = old + incr;
00734 asm volatile ("mov ar.ccv=%0;;":
00735 :"rO" (old));
00736
00737
00738 asm volatile ("cmpxchg4.acq %0=[%1],%2,ar.ccv"
00739 :"=r"(res)
00740 :"r" (operand), "r"(newval)
00741 :"memory");
00742 } while (res != old);
00743 }
00744 return res;
00745 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) || \
00746 (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64)
00747
00748 uint32_t retval = incr;
00749 asm volatile ("lock ; xaddl %0, (%1);"
00750 :"=r" (retval)
00751 :"r" (operand), "0"(retval)
00752 :"memory");
00753
00754 return retval;
00755 #else
00756
00757 #error "Unimplemented assembly architecture"
00758
00759 #endif
00760
00761 #elif defined(QTHREAD_MUTEX_INCREMENT)
00762 uint32_t retval;
00763 qthread_t *me = qthread_self();
00764
00765 qthread_lock(me, (aligned_t *) operand);
00766 retval = *operand;
00767 *operand += incr;
00768 qthread_unlock(me, (aligned_t *) operand);
00769 return retval;
00770 #else
00771
00772 #error "Architecture unsupported for 32-bit atomic ops, and FEB increment not enabled"
00773
00774 #endif
00775 }
00776
00777 static QINLINE uint64_t qthread_incr64(volatile uint64_t * operand,
00778 const int incr)
00779 {
00780 #if defined(HAVE_GCC_INLINE_ASSEMBLY)
00781
00782 #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
00783 uint64_t retval;
00784 register uint64_t incrd = incrd;
00785
00786 asm volatile ("1:\tldarx %0,0,%1\n\t"
00787 "add %3,%0,%2\n\t"
00788 "stdcx. %3,0,%1\n\t"
00789 "bne- 1b\n\t"
00790 "isync"
00791 :"=&b" (retval)
00792 :"r" (operand), "r"(incr), "r"(incrd)
00793 :"cc", "memory");
00794
00795 return retval;
00796 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
00797 uint64_t oldval, newval = *operand;
00798
00799 do {
00800
00801
00802 register uint64_t tmp1 = tmp1;
00803 register uint64_t tmp2 = tmp2;
00804
00805 oldval = newval;
00806 newval += incr;
00807
00808
00809 __asm__ __volatile__("ldx %0, %1\n\t"
00810 "ldx %4, %2\n\t"
00811 "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
00812 "casx [%3] , %2, %1\n\t"
00813 "stx %1, %0"
00814
00815
00816 :"=m" (newval), "=&h"(tmp1), "=&h"(tmp2)
00817 :"r" (operand), "m"(oldval)
00818 :"cc", "memory");
00819 } while (oldval != newval);
00820 return oldval;
00821 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
00822 register uint64_t oldval, newval;
00823
00824
00825 do {
00826
00827
00828
00829
00830
00831
00832
00833
00834 oldval = *operand;
00835 newval = oldval + incr;
00836
00837
00838 #if defined(__SUNPRO_CC)
00839
00840
00841 asm volatile
00842 #else
00843 __asm__ __volatile__
00844 #endif
00845 ("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
00846 "casx [%1] , %2, %0"
00847 :"=&r"(newval)
00848 :"r" (operand), "r"(oldval), "0"(newval)
00849 :"cc", "memory");
00850 } while (oldval != newval);
00851 return oldval;
00852 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
00853 uint64_t res;
00854
00855 if (incr == 1) {
00856 asm volatile ("fetchadd8.rel %0=%1,1"
00857 :"=r" (res)
00858 :"m" (*operand));
00859 } else {
00860 uint64_t old, newval;
00861
00862 do {
00863 old = *operand;
00864 newval = old + incr;
00865 asm volatile ("mov ar.ccv=%0;;":
00866 :"rO" (old));
00867
00868
00869 asm volatile ("cmpxchg8.acq %0=[%1],%2,ar.ccv"
00870 :"=r" (res)
00871 :"r" (operand), "r"(newval)
00872 :"memory");
00873 } while (res != old);
00874 }
00875 return res;
00876 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
00877 union
00878 {
00879 uint64_t i;
00880 struct
00881 {
00882
00883
00884 uint32_t l;
00885 uint32_t h;
00886 } s;
00887 } oldval, newval;
00888 register char test;
00889
00890 do {
00891 #ifndef QTHREAD_PIC_PREFIX
00892 # ifdef __PIC__
00893
00894
00895 # define QTHREAD_PIC_PREFIX "xchg %%ebx, %4\n\t"
00896
00897 # define QTHREAD_PIC_SUFFIX "\n\txchg %%ebx, %4"
00898 # define QTHREAD_PIC_REG_4 "r"
00899 # else
00900 # define QTHREAD_PIC_PREFIX
00901 # define QTHREAD_PIC_SUFFIX
00902 # define QTHREAD_PIC_REG_4 "b"
00903 # endif
00904 #endif
00905 oldval.i = *operand;
00906 newval.i = oldval.i + incr;
00907
00908
00909
00910
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926 __asm__ __volatile__(QTHREAD_PIC_PREFIX
00927 "lock; cmpxchg8b (%1)\n\t"
00928 "setne %0"
00929 QTHREAD_PIC_SUFFIX
00930 :"=q"(test)
00931 :"r" (operand),
00932 "a"(oldval.s.l),
00933 "d"(oldval.s.h),
00934 QTHREAD_PIC_REG_4(newval.s.l),
00935 "c"(newval.s.h)
00936 :"memory");
00937 } while (test);
00938 return oldval.i;
00939 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64)
00940 uint64_t retval = incr;
00941
00942 asm volatile ("lock ; xaddq %0, (%1);"
00943 :"=r" (retval)
00944 :"r" (operand), "0"(retval)
00945 :"memory");
00946
00947 return retval;
00948
00949 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || \
00950 (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
00951
00952
00953
00954 uint64_t retval;
00955 qthread_t *me = qthread_self();
00956
00957 qthread_lock(me, (aligned_t *) operand);
00958 retval = *operand;
00959 *operand += incr;
00960 qthread_unlock(me, (aligned_t *) operand);
00961 return retval;
00962
00963 #else
00964
00965 #error "Unimplemented assembly architecture"
00966
00967 #endif
00968
00969 #elif defined(QTHREAD_MUTEX_INCREMENT)
00970
00971 uint64_t retval;
00972 qthread_t *me = qthread_self();
00973
00974 qthread_lock(me, (aligned_t *) operand);
00975 retval = *operand;
00976 *operand += incr;
00977 qthread_unlock(me, (aligned_t *) operand);
00978 return retval;
00979
00980 #else
00981
00982 #error "Architecture unsupported for 64-bit atomic ops, and FEB increment not enabled"
00983
00984 #endif
00985 }
00986
00987 static QINLINE unsigned long qthread_incr_xx(volatile void *addr, const int incr,
00988 const size_t length)
00989 {
00990 switch (length) {
00991 case 4:
00992 return qthread_incr32((volatile uint32_t *)addr, incr);
00993 case 8:
00994 return qthread_incr64((volatile uint64_t *)addr, incr);
00995 default:
00996
00997
00998 *(int *)(0) = 0;
00999 }
01000 return 0;
01001 }
01002
01003 static QINLINE uint32_t qthread_cas32(volatile uint32_t * operand,
01004 const uint32_t oldval,
01005 const uint32_t newval)
01006 {
01007 #if defined(HAVE_GCC_INLINE_ASSEMBLY)
01008 # if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || \
01009 (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
01010 register uint32_t result;
01011 __asm__ __volatile__ ("1:\n\t"
01012 "lwarx %0,0,%3\n\t"
01013 "cmpw %0,%1\n\t"
01014 "bne 2f\n\t"
01015 "stwcx. %2,0,%3\n\t"
01016 "bne- 1b\n"
01017 "2:"
01018 "isync"
01019 :"=&b" (result)
01020 :"r"(oldval), "r"(newval), "r"(operand)
01021 :"cc", "memory");
01022 return result;
01023 # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) || \
01024 (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
01025 register uint32_t newv = newval;
01026 # if defined(__SUNPRO_CC)
01027 asm volatile
01028 # else
01029 __asm__ __volatile__
01030 # endif
01031 ("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
01032 "cas [%1], %2, %0"
01033 : "=&r" (newv)
01034 : "r" (operand), "r"(oldval), "0"(newv)
01035 : "cc", "memory");
01036 return newv;
01037 # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
01038 register uint32_t retval;
01039 __asm__ __volatile__ ("mov ar.ccv=%0;;": :"rO" (oldval));
01040 __asm__ __volatile__ ("cmpxchg4.acq %0=[%1],%2,ar.ccv"
01041 :"=r"(retval)
01042 :"r"(operand), "r"(newval)
01043 :"memory");
01044 return retval;
01045 # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \
01046 (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
01047 uint32_t retval;
01048
01049
01050
01051
01052
01053 __asm__ __volatile__ ("lock; cmpxchg %1,(%2)"
01054 : "=&a"(retval)
01055 : "r"(newval), "r" (operand),
01056 "0"(oldval)
01057 :"cc","memory");
01058 return retval;
01059 # else
01060 # error "Don't have a qthread_cas implementation for this architecture"
01061 # endif
01062 #else
01063 # error "CAS needs inline assembly OR __sync_val_compare_and_swap"
01064 #endif
01065 }
01066
01067 static QINLINE uint64_t qthread_cas64(volatile uint64_t * operand,
01068 const uint64_t oldval,
01069 const uint64_t newval)
01070 {
01071 #if defined(HAVE_GCC_INLINE_ASSEMBLY)
01072 # if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
01073 register uint64_t result;
01074 __asm__ __volatile__ ("1:\n\t"
01075 "ldarx %0,0,%3\n\t"
01076 "cmpw %0,%1\n\t"
01077 "bne 2f\n\t"
01078 "stdcx. %2,0,%3\n\t"
01079 "bne- 1b\n"
01080 "2:"
01081 "isync"
01082 :"=&b" (result)
01083 :"r"(oldval), "r"(newval), "r"(operand)
01084 :"cc", "memory");
01085 return result;
01086 # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
01087 register uint64_t tmp1=tmp1;
01088 register uint64_t tmp2=tmp2;
01089 uint64_t newv = newval;
01090 # if defined(__SUNPRO_CC)
01091 asm volatile
01092 # else
01093 __asm__ __volatile__
01094 # endif
01095 ("ldx %0, %1\n\t"
01096 "ldx %4, %2\n\t"
01097 "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
01098 "casx [%3], %2, %1\n\t"
01099 "stx %1, %0"
01100
01101
01102 : "=m" (newv), "=&h" (tmp1), "=&h"(tmp2)
01103 : "r" (operand), "m"(oldval), "0"(newv)
01104 : "cc", "memory");
01105 return newv;
01106 # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
01107 register uint64_t newv = newval;
01108 # if defined(__SUNPRO_CC)
01109 asm volatile
01110 # else
01111 __asm__ __volatile__
01112 # endif
01113 ("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
01114 "casx [%1], %2, %0"
01115 : "=&r" (newv)
01116 : "r" (operand), "r"(oldval), "0"(newv)
01117 : "cc", "memory");
01118 return newv;
01119 # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
01120 register uint32_t retval;
01121 __asm__ __volatile__ ("mov ar.ccv=%0;;": :"rO" (oldval));
01122 __asm__ __volatile__ ("cmpxchg8.acq %0=[%1],%2,ar.ccv"
01123 :"=r"(retval)
01124 :"r"(operand), "r"(newval)
01125 :"memory");
01126 return retval;
01127 # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
01128 union {
01129 uint64_t i;
01130 struct {
01131
01132
01133 uint32_t l;
01134 uint32_t h;
01135 } s;
01136 } oldv, newv, ret;
01137 oldv.i = oldval;
01138 newv.i = newval;
01139
01140 __asm__ __volatile__ (
01141 QTHREAD_PIC_PREFIX
01142 "lock; cmpxchg8b (%2)"
01143 QTHREAD_PIC_SUFFIX
01144 :"=a"(ret.s.l),
01145 "=d"(ret.s.h)
01146 :"r"(operand),
01147 "a"(oldv.s.l),
01148 QTHREAD_PIC_REG_4(newv.s.l),
01149 "d"(oldv.s.h),
01150 "c"(newv.s.h)
01151 :"memory");
01152 return ret.i;
01153 # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64)
01154 register uint64_t retval;
01155
01156
01157
01158
01159
01160 __asm__ __volatile__ ("lock; cmpxchg %1,(%2)"
01161 : "=&a"(retval)
01162 : "r"(newval), "r" (operand),
01163 "0"(oldval)
01164 :"cc","memory");
01165 return retval;
01166 # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
01167
01168
01169 uint64_t retval;
01170 qthread_t *me = qthread_self();
01171
01172 qthread_lock(me, (aligned_t*)operand);
01173 retval = *operand;
01174 if (retval == oldval) {
01175 *operand = newval;
01176 }
01177 qthread_unlock(me, (aligned_t*)operand);
01178 return retval;
01179 # else
01180 # error "Don't have a qthread_cas64 implementation for this architecture"
01181 # endif
01182 #else
01183 # error "CAS needs inline assembly OR __sync_val_compare_and_swap"
01184 #endif
01185 }
01186
01187 static QINLINE aligned_t qthread_cas_xx(volatile aligned_t * addr,
01188 const aligned_t oldval,
01189 const aligned_t newval, const size_t length)
01190 {
01191 switch (length) {
01192 case 4:
01193 return qthread_cas32((volatile uint32_t *)addr, oldval, newval);
01194 case 8:
01195 return qthread_cas64((volatile uint64_t *)addr, oldval, newval);
01196 default:
01197
01198
01199 *(int *)(0) = 0;
01200 }
01201 return 0;
01202 }
01203
01204 static QINLINE void *qthread_cas_ptr_(void *volatile*const addr,
01205 void *const oldval, void *const newval)
01206 {
01207 switch (sizeof(void *)) {
01208 case 4:
01209 return (void *)(uintptr_t) qthread_cas32((volatile uint32_t *)
01210 addr,
01211 (uint32_t)(uintptr_t)
01212 oldval,
01213 (uint32_t)(uintptr_t)
01214 newval);
01215 case 8:
01216 return (void *)(uintptr_t) qthread_cas64((volatile uint64_t *)
01217 addr,
01218 (uint64_t)(uintptr_t)
01219 oldval,
01220 (uint64_t)(uintptr_t)
01221 newval);
01222 default:
01223
01224
01225 *(int *)(0) = 0;
01226 }
01227 return NULL;
01228 }
01229
01230 #ifdef QTHREAD_ATOMIC_CAS
01231 # define qthread_cas(ADDR, OLDV, NEWV) \
01232 __sync_val_compare_and_swap((ADDR), (OLDV), (NEWV))
01233 # define qthread_cas_ptr(ADDR, OLDV, NEWV) \
01234 __sync_val_compare_and_swap((ADDR), (OLDV), (NEWV))
01235 #else
01236 # define qthread_cas(ADDR, OLDV, NEWV) \
01237 qthread_cas_xx((volatile aligned_t*)(ADDR), (aligned_t)(OLDV), (aligned_t)(NEWV), sizeof(*(ADDR)))
01238 # ifdef QTHREAD_ATOMIC_CAS_PTR
01239 # define qthread_cas_ptr(ADDR, OLDV, NEWV) \
01240 __sync_val_compare_and_swap((ADDR), (OLDV), (NEWV))
01241 # else
01242 # define qthread_cas_ptr(ADDR, OLDV, NEWV) \
01243 qthread_cas_ptr_((void*volatile*const)(ADDR), (void*const)(OLDV), (void*const)(NEWV))
01244 # endif
01245 #endif
01246
01247 Q_ENDCXX
01248
01249 #ifndef __cplusplus
01250
01251 # ifdef QTHREAD_ATOMIC_INCR
01252 # define qthread_incr( ADDR, INCVAL ) \
01253 __sync_fetch_and_add(ADDR, INCVAL)
01254 # else
01255 # define qthread_incr( ADDR, INCVAL ) \
01256 qthread_incr_xx( (volatile void*)(ADDR), (int)(INCVAL), sizeof(*(ADDR)) )
01257 # endif
01258
01259 #else
01260 # include "qthread.hpp"
01261 #endif
01262
01263 #endif