fathom/moab-docs-develop/gs_8cpp_source.html

00001 /* compile-time settings:
00002
00003  FORTRAN naming convention
00004  default      cpgs_setup, etc.
00005  -DUPCASE     CPGS_SETUP, etc.
00006  -DUNDERSCORE cpgs_setup_, etc.
00007
00008  -DMPI             parallel version (sequential otherwise)
00009  -DCRYSTAL_STATIC  avoid some message exchange at the risk of
00010  crashing b/c of insufficient buffer size
00011
00012  -DINITIAL_BUFFER_SIZE=expression
00013  ignored unless CRYSTAL_STATIC is defined.
00014  arithmetic expression controlling the initial buffer size for the crystal
00015  router; this needs to be large enough to hold the intermediate messages
00016  during all stages of the crystal router
00017
00018  variables that can be used in expression include
00019  num   - the number of processors
00020  n     - the length of the global index array
00021
00022  */
00023
00024 /* default for INITIAL_BUFFER_SIZE */
00025 #ifdef CRYSTAL_STATIC
00026 #ifndef INITIAL_BUFFER_SIZE
00027 #define INITIAL_BUFFER_SIZE 2 * ( 3 * num + n * 9 )
00028 #endif
00029 #endif
00030
00031 /* FORTRAN usage:
00032
00033  integer hc, np
00034  call crystal_new(hc,comm,np)  ! get a crystal router handle (see fcrystal.c)
00035
00036  integer hgs
00037  integer n, max_vec_dim
00038  integer*? global_index_array(1:n) ! type corresponding to slong in "types.h"
00039
00040  call cpgs_setup(hgs,hc,global_index_array,n,max_vec_dim)
00041  sets hgs to new handle
00042
00043  !ok to call crystal_done(hc) here, or any later time
00044
00045  call cpgs_op(hgs, u, op)
00046  integer handle, op : 1-add, 2-multiply, 3-min, 4-max
00047  real    u(1:n) - same layout as global_index_array provided to cpgs_setup
00048
00049  call cpgs_op_vec(hgs, u, d, op)
00050  integer op : 1-add, 2-multiply, 3-min, 4-max
00051  integer d    <= max_vec_dim
00052  real    u(1:d, 1:n) - vector components for each node stored together
00053
00054  call cpgs_op_many(hgs, u1, u2, u3, u4, u5, u6, d, op)
00055  integer op : 1-add, 2-multiply, 3-min, 4-max
00056  integer d : in {1,2,3,4,5,6}, <= max_vec_dim
00057  real    u1(1:n), u2(1:n), u3(1:n), etc.
00058
00059  same effect as: call cpgs_op(hgs, u1, op)
00060  if(d.gt.1) call cpgs_op(hgs, u2, op)
00061  if(d.gt.2) call cpgs_op(hgs, u3, op)
00062  etc.
00063  with possibly some savings as fewer messages are exchanged
00064
00065  call cpgs_free(hgs)
00066  */
00067
00068 #include
00069 #include
00070 #include
00071 #include
00072 #include
00073 #include "moab/gs.hpp"
00074 #ifdef MOAB_HAVE_MPI
00075 #include "moab_mpi.h"
00076 #endif
00077
00078 #ifdef MOAB_HAVE_MPI
00079
00080 #ifdef MOAB_HAVE_VALGRIND
00081 #include
00082 #elif !defined( VALGRIND_CHECK_MEM_IS_DEFINED )
00083 #define VALGRIND_CHECK_MEM_IS_DEFINED( a, b ) ( (void)0 )
00084 #endif
00085
00086 #endif
00087
00088 namespace moab
00089 {
00090
00091 /*--------------------------------------------------------------------------
00092  Local Execution Phases
00093  --------------------------------------------------------------------------*/
00094
00095 #define DO_SET( a, b ) b = a
00096 #define DO_ADD( a, b ) a += b
00097 #define DO_MUL( a, b ) a *= b
00098 #define DO_MIN( a, b ) \
00099     if( ( b ) < ( a ) ) ( a ) = b
00100 #define DO_MAX( a, b ) \
00101     if( ( b ) > ( a ) ) ( a ) = b
00102 #define DO_BPR( a, b )         \
00103     do                         \
00104     {                          \
00105         uint a_ = a;           \
00106         uint b_ = b;           \
00107         for( ;; )              \
00108         {                      \
00109             if( a_ < b_ )      \
00110                 b_ >>= 1;      \
00111             else if( b_ < a_ ) \
00112                 a_ >>= 1;      \
00113             else               \
00114                 break;         \
00115         }                      \
00116         ( a ) = a_;            \
00117     } while( 0 )
00118
00119 #define LOOP( op )                       \
00120     do                                   \
00121     {                                    \
00122         sint i, j;                       \
00123         while( ( i = *cm++ ) != -1 )     \
00124             while( ( j = *cm++ ) != -1 ) \
00125                 op( u[i], u[j] );        \
00126     } while( 0 )
00127
00128 static void local_condense( realType* u, int op, const sint* cm )
00129 {
00130     switch( op )
00131     {
00132         case GS_OP_ADD:
00133             LOOP( DO_ADD );
00134             break;
00135         case GS_OP_MUL:
00136             LOOP( DO_MUL );
00137             break;
00138         case GS_OP_MIN:
00139             LOOP( DO_MIN );
00140             break;
00141         case GS_OP_MAX:
00142             LOOP( DO_MAX );
00143             break;
00144         case GS_OP_BPR:
00145             LOOP( DO_BPR );
00146             break;
00147     }
00148 }
00149
00150 static void local_uncondense( realType* u, const sint* cm )
00151 {
00152     LOOP( DO_SET );
00153 }
00154
00155 #undef LOOP
00156
00157 #define LOOP( op )                        \
00158     do                                    \
00159     {                                     \
00160         sint i, j, k;                     \
00161         while( ( i = *cm++ ) != -1 )      \
00162         {                                 \
00163             realType* pi = u + n * i;     \
00164             while( ( j = *cm++ ) != -1 )  \
00165             {                             \
00166                 realType* pj = u + n * j; \
00167                 for( k = n; k; --k )      \
00168                 {                         \
00169                     op( *pi, *pj );       \
00170                     ++pi;                 \
00171                     ++pj;                 \
00172                 }                         \
00173             }                             \
00174         }                                 \
00175     } while( 0 )
00176
00177 static void local_condense_vec( realType* u, uint n, int op, const sint* cm )
00178 {
00179     switch( op )
00180     {
00181         case GS_OP_ADD:
00182             LOOP( DO_ADD );
00183             break;
00184         case GS_OP_MUL:
00185             LOOP( DO_MUL );
00186             break;
00187         case GS_OP_MIN:
00188             LOOP( DO_MIN );
00189             break;
00190         case GS_OP_MAX:
00191             LOOP( DO_MAX );
00192             break;
00193         case GS_OP_BPR:
00194             LOOP( DO_BPR );
00195             break;
00196     }
00197 }
00198
00199 static void local_uncondense_vec( realType* u, uint n, const sint* cm )
00200 {
00201     LOOP( DO_SET );
00202 }
00203
00204 #undef LOOP
00205
00206 /*--------------------------------------------------------------------------
00207  Non-local Execution Phases
00208  --------------------------------------------------------------------------*/
00209
00210 #ifdef MOAB_HAVE_MPI
00211
00212 void gs_data::nonlocal_info::initialize( uint np, uint count, uint nlabels, uint nulabels, uint maxv )
00213 {
00214     _target  = NULL;
00215     _nshared = NULL;
00216     _sh_ind  = NULL;
00217     _slabels = NULL;
00218     _ulabels = NULL;
00219     _reqs    = NULL;
00220     _buf     = NULL;
00221     _np      = np;
00222     _target  = (uint*)malloc( ( 2 * np + count ) * sizeof( uint ) );
00223     _nshared = _target + np;
00224     _sh_ind  = _nshared + np;
00225     if( 1 < nlabels )
00226         _slabels = (slong*)malloc( ( ( nlabels - 1 ) * count ) * sizeof( slong ) );
00227     else
00228         _slabels = NULL;
00229     _ulabels = (Ulong*)malloc( ( nulabels * count ) * sizeof( Ulong ) );
00230     _reqs    = (MPI_Request*)malloc( 2 * np * sizeof( MPI_Request ) );
00231     _buf     = (realType*)malloc( ( 2 * count * maxv ) * sizeof( realType ) );
00232     _maxv    = maxv;
00233 }
00234
00235 void gs_data::nonlocal_info::nlinfo_free()
00236 {
00237     // Free the ptrs
00238     free( _buf );
00239     free( _reqs );
00240     free( _target );
00241     free( _slabels );
00242     free( _ulabels );
00243     // Set to null
00244     _ulabels = NULL;
00245     _buf     = NULL;
00246     _reqs    = NULL;
00247     _target  = NULL;
00248     _slabels = NULL;
00249     _nshared = NULL;
00250     _sh_ind  = NULL;
00251 }
00252
00253 void gs_data::nonlocal_info::nonlocal( realType* u, int op, MPI_Comm comm )
00254 {
00255     MPI_Status status;
00256     uint np           = this->_np;
00257     MPI_Request* reqs = this->_reqs;
00258     uint* targ        = this->_target;
00259     uint* nshared     = this->_nshared;
00260     uint* sh_ind      = this->_sh_ind;
00261     uint id;
00262     realType *buf = this->_buf, *start;
00263     unsigned int i;
00264     {
00265         MPI_Comm_rank( comm, (int*)&i );
00266         id = i;
00267     }
00268     for( i = 0; i < np; ++i )
00269     {
00270         uint c = nshared[i];
00271         start  = buf;
00272         for( ; c; --c )
00273             *buf++ = u[*sh_ind++];
00274         MPI_Isend( (void*)start, nshared[i] * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ );
00275     }
00276     start = buf;
00277     for( i = 0; i < np; ++i )
00278     {
00279         MPI_Irecv( (void*)start, nshared[i] * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
00280         start += nshared[i];
00281     }
00282     for( reqs = this->_reqs, i = np * 2; i; --i )
00283         MPI_Wait( reqs++, &status );
00284     sh_ind = this->_sh_ind;
00285 #define LOOP( OP )                        \
00286     do                                    \
00287     {                                     \
00288         for( i = 0; i < np; ++i )         \
00289         {                                 \
00290             uint c;                       \
00291             for( c = nshared[i]; c; --c ) \
00292             {                             \
00293                 OP( u[*sh_ind], *buf );   \
00294                 ++sh_ind, ++buf;          \
00295             }                             \
00296         }                                 \
00297     } while( 0 )
00298     switch( op )
00299     {
00300         case GS_OP_ADD:
00301             LOOP( DO_ADD );
00302             break;
00303         case GS_OP_MUL:
00304             LOOP( DO_MUL );
00305             break;
00306         case GS_OP_MIN:
00307             LOOP( DO_MIN );
00308             break;
00309         case GS_OP_MAX:
00310             LOOP( DO_MAX );
00311             break;
00312         case GS_OP_BPR:
00313             LOOP( DO_BPR );
00314             break;
00315     }
00316 #undef LOOP
00317 }
00318
00319 void gs_data::nonlocal_info::nonlocal_vec( realType* u, uint n, int op, MPI_Comm comm )
00320 {
00321     MPI_Status status;
00322     uint np           = this->_np;
00323     MPI_Request* reqs = this->_reqs;
00324     uint* targ        = this->_target;
00325     uint* nshared     = this->_nshared;
00326     uint* sh_ind      = this->_sh_ind;
00327     uint id;
00328     realType *buf = this->_buf, *start;
00329     uint size     = n * sizeof( realType );
00330     unsigned int i;
00331     {
00332         MPI_Comm_rank( comm, (int*)&i );
00333         id = i;
00334     }
00335     for( i = 0; i < np; ++i )
00336     {
00337         uint ns = nshared[i], c = ns;
00338         start = buf;
00339         for( ; c; --c )
00340         {
00341             memcpy( buf, u + n * ( *sh_ind++ ), size );
00342             buf += n;
00343         }
00344         MPI_Isend( (void*)start, ns * size, MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ );
00345     }
00346     start = buf;
00347     for( i = 0; i < np; ++i )
00348     {
00349         int nsn = n * nshared[i];
00350         MPI_Irecv( (void*)start, nsn * size, MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
00351         start += nsn;
00352     }
00353     for( reqs = this->_reqs, i = np * 2; i; --i )
00354         MPI_Wait( reqs++, &status );
00355     sh_ind = this->_sh_ind;
00356 #define LOOP( OP )                                    \
00357     do                                                \
00358     {                                                 \
00359         for( i = 0; i < np; ++i )                     \
00360         {                                             \
00361             uint c, j;                                \
00362             for( c = nshared[i]; c; --c )             \
00363             {                                         \
00364                 realType* uu = u + n * ( *sh_ind++ ); \
00365                 for( j = n; j; --j )                  \
00366                 {                                     \
00367                     OP( *uu, *buf );                  \
00368                     ++uu;                             \
00369                     ++buf;                            \
00370                 }                                     \
00371             }                                         \
00372         }                                             \
00373     } while( 0 )
00374     switch( op )
00375     {
00376         case GS_OP_ADD:
00377             LOOP( DO_ADD );
00378             break;
00379         case GS_OP_MUL:
00380             LOOP( DO_MUL );
00381             break;
00382         case GS_OP_MIN:
00383             LOOP( DO_MIN );
00384             break;
00385         case GS_OP_MAX:
00386             LOOP( DO_MAX );
00387             break;
00388         case GS_OP_BPR:
00389             LOOP( DO_BPR );
00390             break;
00391     }
00392 #undef LOOP
00393 }
00394
00395 void gs_data::nonlocal_info::nonlocal_many( realType** u, uint n, int op, MPI_Comm comm )
00396 {
00397     MPI_Status status;
00398     uint np           = this->_np;
00399     MPI_Request* reqs = this->_reqs;
00400     uint* targ        = this->_target;
00401     uint* nshared     = this->_nshared;
00402     uint* sh_ind      = this->_sh_ind;
00403     uint id;
00404     realType *buf = this->_buf, *start;
00405     unsigned int i;
00406     {
00407         MPI_Comm_rank( comm, (int*)&i );
00408         id = i;
00409     }
00410     for( i = 0; i < np; ++i )
00411     {
00412         uint c, j, ns = nshared[i];
00413         start = buf;
00414         for( j = 0; j < n; ++j )
00415         {
00416             realType* uu = u[j];
00417             for( c = 0; c < ns; ++c )
00418                 *buf++ = uu[sh_ind[c]];
00419         }
00420         sh_ind += ns;
00421         MPI_Isend( (void*)start, n * ns * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ );
00422     }
00423     start = buf;
00424     for( i = 0; i < np; ++i )
00425     {
00426         int nsn = n * nshared[i];
00427         MPI_Irecv( (void*)start, nsn * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
00428         start += nsn;
00429     }
00430     for( reqs = this->_reqs, i = np * 2; i; --i )
00431         MPI_Wait( reqs++, &status );
00432     sh_ind = this->_sh_ind;
00433 #define LOOP( OP )                             \
00434     do                                         \
00435     {                                          \
00436         for( i = 0; i < np; ++i )              \
00437         {                                      \
00438             uint c, j, ns = nshared[i];        \
00439             for( j = 0; j < n; ++j )           \
00440             {                                  \
00441                 realType* uu = u[j];           \
00442                 for( c = 0; c < ns; ++c )      \
00443                 {                              \
00444                     OP( uu[sh_ind[c]], *buf ); \
00445                     ++buf;                     \
00446                 }                              \
00447             }                                  \
00448             sh_ind += ns;                      \
00449         }                                      \
00450     } while( 0 )
00451     switch( op )
00452     {
00453         case GS_OP_ADD:
00454             LOOP( DO_ADD );
00455             break;
00456         case GS_OP_MUL:
00457             LOOP( DO_MUL );
00458             break;
00459         case GS_OP_MIN:
00460             LOOP( DO_MIN );
00461             break;
00462         case GS_OP_MAX:
00463             LOOP( DO_MAX );
00464             break;
00465         case GS_OP_BPR:
00466             LOOP( DO_BPR );
00467             break;
00468     }
00469 #undef LOOP
00470 }
00471
00472 /*---------------------------------------------------------------------------
00473  MOAB Crystal Router
00474  ---------------------------------------------------------------------------*/
00475 gs_data::crystal_data::crystal_data() {}
00476
00477 void gs_data::crystal_data::initialize( MPI_Comm comm )
00478 {
00479     int num, id;
00480     buffers[0].buf.buffer_init( 1024 );
00481     buffers[1].buf.buffer_init( 1024 );
00482     buffers[2].buf.buffer_init( 1024 );
00483     all  = &buffers[0];
00484     keep = &buffers[1];
00485     send = &buffers[2];
00486     memcpy( &( this->_comm ), &comm, sizeof( MPI_Comm ) );
00487     MPI_Comm_rank( comm, &id );
00488     this->_id = id;
00489     MPI_Comm_size( comm, &num );
00490     this->_num = num;
00491 }
00492
00493 void gs_data::crystal_data::reset()
00494 {
00495     buffers[0].buf.reset();
00496     buffers[1].buf.reset();
00497     buffers[2].buf.reset();
00498     keep = NULL;
00499     all  = NULL;
00500     send = NULL;
00501 }
00502
00503 // Partition data before sending
00504 void gs_data::crystal_data::partition( uint cutoff, crystal_buf* lo, crystal_buf* hi )
00505 {
00506     const uint* src = (uint*)all->buf.ptr;
00507     const uint* end = (uint*)src + all->n;
00508     uint *target, *lop, *hip;
00509     lo->n = hi->n = 0;
00510     lo->buf.buffer_reserve( all->n * sizeof( uint ) );
00511     hi->buf.buffer_reserve( all->n * sizeof( uint ) );
00512     lop = (uint*)lo->buf.ptr;
00513     hip = (uint*)hi->buf.ptr;
00514     while( src != end )
00515     {
00516         uint chunk_len = 3 + src[2];
00517         if( src[0] < cutoff )
00518         {
00519             target = lop;
00520             lo->n += chunk_len;
00521             lop += chunk_len;
00522         }
00523         else
00524         {
00525             target = hip;
00526             hi->n += chunk_len;
00527             hip += chunk_len;
00528         }
00529         memcpy( target, src, chunk_len * sizeof( uint ) );
00530         src += chunk_len;
00531     }
00532 }
00533
00534 // Send message to target process
00535 void gs_data::crystal_data::send_( uint target, int recvn )
00536 {
00537     MPI_Request req[3] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL };
00538     MPI_Status status[3];
00539     uint count[2] = { 0, 0 }, sum, *recv[2];
00540     crystal_buf* t;
00541     int i;
00542
00543     (void)VALGRIND_CHECK_MEM_IS_DEFINED( &send->n, sizeof( uint ) );
00544     MPI_Isend( (void*)&send->n, sizeof( uint ), MPI_UNSIGNED_CHAR, target, _id, _comm, &req[0] );
00545     for( i = 0; i < recvn; ++i )
00546         MPI_Irecv( (void*)&count[i], sizeof( uint ), MPI_UNSIGNED_CHAR, target + i, target + i, _comm, &req[i + 1] );
00547     MPI_Waitall( recvn + 1, req, status );
00548     sum = keep->n;
00549     for( i = 0; i < recvn; ++i )
00550         sum += count[i];
00551     keep->buf.buffer_reserve( sum * sizeof( uint ) );
00552     recv[0] = (uint*)keep->buf.ptr;
00553     recv[0] += keep->n;
00554     recv[1] = recv[0] + count[0];
00555     keep->n = sum;
00556
00557     (void)VALGRIND_CHECK_MEM_IS_DEFINED( send->buf.ptr, send->n * sizeof( uint ) );
00558     MPI_Isend( (void*)send->buf.ptr, send->n * sizeof( uint ), MPI_UNSIGNED_CHAR, target, _id, _comm, &req[0] );
00559     if( recvn )
00560     {
00561         MPI_Irecv( (void*)recv[0], count[0] * sizeof( uint ), MPI_UNSIGNED_CHAR, target, target, _comm, &req[1] );
00562         if( recvn == 2 )
00563             MPI_Irecv( (void*)recv[1], count[1] * sizeof( uint ), MPI_UNSIGNED_CHAR, target + 1, target + 1, _comm,
00564                        &req[2] );
00565     }
00566     MPI_Waitall( recvn + 1, req, status );
00567
00568     t    = all;
00569     all  = keep;
00570     keep = t;
00571 }
00572
00573 void gs_data::crystal_data::crystal_router()
00574 {
00575     uint bl = 0, bh, n = _num, nl, target;
00576     int recvn;
00577     crystal_buf *lo, *hi;
00578     while( n > 1 )
00579     {
00580         nl = n / 2, bh = bl + nl;
00581         if( _id < bh )
00582         {
00583             target = _id + nl;
00584             recvn  = ( n & 1 && _id == bh - 1 ) ? 2 : 1;
00585             lo     = keep;
00586             hi     = send;
00587         }
00588         else
00589         {
00590             target = _id - nl;
00591             recvn  = ( target == bh ) ? ( --target, 0 ) : 1;
00592             hi     = keep;
00593             lo     = send;
00594         }
00595         partition( bh, lo, hi );
00596         send_( target, recvn );
00597         if( _id < bh )
00598             n = nl;
00599         else
00600         {
00601             n -= nl;
00602             bl = bh;
00603         }
00604     }
00605 }
00606
00607 #define UINT_PER_X( X ) ( ( sizeof( X ) + sizeof( uint ) - 1 ) / sizeof( uint ) )
00608 #define UINT_PER_REAL   UINT_PER_X( realType )
00609 #define UINT_PER_LONG   UINT_PER_X( slong )
00610 #define UINT_PER_ULONG  UINT_PER_X( Ulong )
00611
00612 /*-------------------------------------------------------------------------
00613  Transfer
00614  -------------------------------------------------------------------------*/
00615 ErrorCode gs_data::crystal_data::gs_transfer( int dynamic, moab::TupleList& tl, unsigned pf )
00616 {
00617
00618     unsigned mi, ml, mul, mr;
00619     tl.getTupleSize( mi, ml, mul, mr );
00620
00621     const unsigned tsize = ( mi - 1 ) + ml * UINT_PER_LONG + mul * UINT_PER_ULONG + mr * UINT_PER_REAL;
00622     sint p, lp = -1;
00623     sint* ri;
00624     slong* rl;
00625     Ulong* rul;
00626     realType* rr;
00627     uint i, j, *buf, *len = 0, *buf_end;
00628
00629     /* sort to group by target proc */
00630     if( pf >= mi ) return moab::MB_MEMORY_ALLOCATION_FAILED;
00631     // fail("pf expected to be in vi array (%d, %d)", pf, mi);
00632     tl.sort( pf, &all->buf );
00633
00634     /* pack into buffer for crystal router */
00635     all->buf.buffer_reserve( ( tl.get_n() * ( 3 + tsize ) ) * sizeof( uint ) );
00636     all->n = 0;
00637     buf    = (uint*)all->buf.ptr;
00638
00639     bool canWrite = tl.get_writeEnabled();
00640     if( !canWrite ) tl.enableWriteAccess();
00641
00642     ri  = tl.vi_wr;
00643     rl  = tl.vl_wr;
00644     rul = tl.vul_wr;
00645     rr  = tl.vr_wr;
00646
00647     for( i = tl.get_n(); i; --i )
00648     {
00649         p = ri[pf];
00650         if( p != lp )
00651         {
00652             lp     = p;
00653             *buf++ = p;   /* target */
00654             *buf++ = _id; /* source */
00655             len    = buf++;
00656             *len   = 0; /* length */
00657             all->n += 3;
00658         }
00659         for( j = 0; j < mi; ++j, ++ri )
00660             if( j != pf ) *buf++ = *ri;
00661         for( j = ml; j; --j, ++rl )
00662         {
00663             memcpy( buf, rl, sizeof( slong ) );
00664             buf += UINT_PER_LONG;
00665         }
00666         for( j = mul; j; --j, ++rul )
00667         {
00668             memcpy( buf, rul, sizeof( Ulong ) );
00669             buf += UINT_PER_ULONG;
00670         }
00671         for( j = mr; j; --j, ++rr )
00672         {
00673             memcpy( buf, rr, sizeof( realType ) );
00674             buf += UINT_PER_REAL;
00675         }
00676         *len += tsize, all->n += tsize;
00677     }
00678
00679     crystal_router();
00680
00681     /* unpack */
00682     buf     = (uint*)all->buf.ptr;
00683     buf_end = buf + all->n;
00684     tl.set_n( 0 );
00685     ri  = tl.vi_wr;
00686     rl  = tl.vl_wr;
00687     rul = tl.vul_wr;
00688     rr  = tl.vr_wr;
00689
00690     while( buf != buf_end )
00691     {
00692         sint llen;
00693         buf++;         /* target ( == this proc ) */
00694         p    = *buf++; /* source */
00695         llen = *buf++; /* length */
00696         while( llen > 0 )
00697         {
00698             if( tl.get_n() == tl.get_max() )
00699             {
00700                 if( !dynamic )
00701                 {
00702                     tl.set_n( tl.get_max() + 1 );
00703                     if( !canWrite ) tl.disableWriteAccess();
00704                     return moab::MB_SUCCESS;
00705                 }
00706                 ErrorCode rval = tl.resize( tl.get_max() + ( 1 + tl.get_max() ) / 2 + 1 );
00707                 if( rval != moab::MB_SUCCESS )
00708                 {
00709                     if( !canWrite ) tl.disableWriteAccess();
00710                     return rval;
00711                 }
00712                 ri  = tl.vi_wr + mi * tl.get_n();
00713                 rl  = tl.vl_wr + ml * tl.get_n();
00714                 rul = tl.vul_wr + mul * tl.get_n();
00715                 rr  = tl.vr_wr + mr * tl.get_n();
00716             }
00717             tl.inc_n();
00718             for( j = 0; j < mi; ++j )
00719                 if( j != pf )
00720                     *ri++ = *buf++;
00721                 else
00722                     *ri++ = p;
00723             for( j = ml; j; --j )
00724             {
00725                 memcpy( rl++, buf, sizeof( slong ) );
00726                 buf += UINT_PER_LONG;
00727             }
00728             for( j = mul; j; --j )
00729             {
00730                 memcpy( rul++, buf, sizeof( Ulong ) );
00731                 buf += UINT_PER_ULONG;
00732             }
00733             for( j = mr; j; --j )
00734             {
00735                 memcpy( rr++, buf, sizeof( realType ) );
00736                 buf += UINT_PER_REAL;
00737             }
00738             llen -= tsize;
00739         }
00740     }
00741
00742     if( !canWrite ) tl.disableWriteAccess();
00743     return moab::MB_SUCCESS;
00744 }
00745 #endif
00746
00747 /*--------------------------------------------------------------------------
00748  Combined Execution
00749  --------------------------------------------------------------------------*/
00750
00751 void gs_data::gs_data_op( realType* u, int op )
00752 {
00753     local_condense( u, op, this->local_cm );
00754 #ifdef MOAB_HAVE_MPI
00755     this->nlinfo->nonlocal( u, op, _comm );
00756 #endif
00757     local_uncondense( u, local_cm );
00758 }
00759
00760 void gs_data::gs_data_op_vec( realType* u, uint n, int op )
00761 {
00762 #ifdef MOAB_HAVE_MPI
00763     if( n > nlinfo->_maxv )
00764         moab::fail( "%s: initialized with max vec size = %d,"
00765                     " but called with vec size = %d\n",
00766                     __FILE__, nlinfo->_maxv, n );
00767 #endif
00768     local_condense_vec( u, n, op, local_cm );
00769 #ifdef MOAB_HAVE_MPI
00770     this->nlinfo->nonlocal_vec( u, n, op, _comm );
00771 #endif
00772     local_uncondense_vec( u, n, local_cm );
00773 }
00774
00775 void gs_data::gs_data_op_many( realType** u, uint n, int op )
00776 {
00777     uint i;
00778 #ifdef MOAB_HAVE_MPI
00779     if( n > nlinfo->_maxv )
00780         moab::fail( "%s: initialized with max vec size = %d,"
00781                     " but called with vec size = %d\n",
00782                     __FILE__, nlinfo->_maxv, n );
00783 #endif
00784     for( i = 0; i < n; ++i )
00785         local_condense( u[i], op, local_cm );
00786
00787     moab::fail( "%s: initialized with max vec size = %d,"
00788                 " but called with vec size = %d\n",
00789                 __FILE__, 6, n );
00790
00791 #ifdef MOAB_HAVE_MPI
00792     this->nlinfo->nonlocal_many( u, n, op, _comm );
00793 #endif
00794     for( i = 0; i < n; ++i )
00795         local_uncondense( u[i], local_cm );
00796 }
00797
00798 /*--------------------------------------------------------------------------
00799  Setup
00800  --------------------------------------------------------------------------*/
00801
00802 ErrorCode gs_data::initialize( uint n,
00803                                const long* label,
00804                                const Ulong* ulabel,
00805                                uint maxv,
00806                                const unsigned int nlabels,
00807                                const unsigned int nulabels,
00808                                crystal_data* crystal )
00809 {
00810     nlinfo = NULL;
00811     unsigned int j;
00812     TupleList nonzero, primary;
00813     ErrorCode rval;
00814 #ifdef MOAB_HAVE_MPI
00815     TupleList shared;
00816 #else
00817     moab::TupleList::buffer buf;
00818 #endif
00819     (void)VALGRIND_CHECK_MEM_IS_DEFINED( label, nlabels * sizeof( long ) );
00820     (void)VALGRIND_CHECK_MEM_IS_DEFINED( ulabel, nlabels * sizeof( Ulong ) );
00821 #ifdef MOAB_HAVE_MPI
00822     MPI_Comm_dup( crystal->_comm, &this->_comm );
00823 #else
00824     buf.buffer_init( 1024 );
00825 #endif
00826
00827     /* construct list of nonzeros: (index ^, label) */
00828     nonzero.initialize( 1, nlabels, nulabels, 0, n );
00829     nonzero.enableWriteAccess();
00830     {
00831         uint i;
00832         sint* nzi;
00833         long* nzl;
00834         Ulong* nzul;
00835         nzi  = nonzero.vi_wr;
00836         nzl  = nonzero.vl_wr;
00837         nzul = nonzero.vul_wr;
00838         for( i = 0; i < n; ++i )
00839             if( label[nlabels * i] != 0 )
00840             {
00841                 nzi[0] = i;
00842                 for( j = 0; j < nlabels; j++ )
00843                     nzl[j] = label[nlabels * i + j];
00844                 for( j = 0; j < nulabels; j++ )
00845                     nzul[j] = ulabel[nulabels * i + j];
00846                 nzi++;
00847                 nzl += nlabels;
00848                 nzul += nulabels;
00849                 nonzero.inc_n();
00850             }
00851     }
00852
00853     /* sort nonzeros by label: (index ^2, label ^1) */
00854 #ifndef MOAB_HAVE_MPI
00855     nonzero.sort( 1, &buf );
00856 #else
00857     nonzero.sort( 1, &crystal->all->buf );
00858 #endif
00859
00860     /* build list of unique labels w/ lowest associated index:
00861      (index in nonzero ^, primary (lowest) index in label, count, label(s),
00862      ulabel(s)) */
00863     primary.initialize( 3, nlabels, nulabels, 0, nonzero.get_n() );
00864     primary.enableWriteAccess();
00865     {
00866         uint i;
00867         sint *nzi = nonzero.vi_wr, *pi = primary.vi_wr;
00868         slong *nzl = nonzero.vl_wr, *pl = primary.vl_wr;
00869         Ulong *nzul = nonzero.vul_wr, *pul = primary.vul_wr;
00870         slong last = -1;
00871         for( i = 0; i < nonzero.get_n(); ++i, nzi += 1, nzl += nlabels, nzul += nulabels )
00872         {
00873             if( nzl[0] == last )
00874             {
00875                 ++pi[-1];
00876                 continue;
00877             }
00878             last  = nzl[0];
00879             pi[0] = i;
00880             pi[1] = nzi[0];
00881             for( j = 0; j < nlabels; j++ )
00882                 pl[j] = nzl[j];
00883             for( j = 0; j < nulabels; j++ )
00884                 pul[j] = nzul[j];
00885             pi[2] = 1;
00886             pi += 3, pl += nlabels;
00887             pul += nulabels;
00888             primary.inc_n();
00889         }
00890     }
00891
00892     /* calculate size of local condense map */
00893     {
00894         uint i, count = 1;
00895         sint* pi = primary.vi_wr;
00896         for( i = primary.get_n(); i; --i, pi += 3 )
00897             if( pi[2] > 1 ) count += pi[2] + 1;
00898         this->local_cm = (sint*)malloc( count * sizeof( sint ) );
00899     }
00900
00901     /* sort unique labels by primary index:
00902      (nonzero index ^2, primary index ^1, count, label ^2) */
00903 #ifndef MOAB_HAVE_MPI
00904     primary.sort( 0, &buf );
00905     buf.reset();
00906     // buffer_free(&buf);
00907 #else
00908     primary.sort( 0, &crystal->all->buf );
00909 #endif
00910
00911     /* construct local condense map */
00912     {
00913         uint i, ln;
00914         sint* pi = primary.vi_wr;
00915         sint* cm = this->local_cm;
00916         for( i = primary.get_n(); i > 0; --i, pi += 3 )
00917             if( ( ln = pi[2] ) > 1 )
00918             {
00919                 sint* nzi = nonzero.vi_wr + 1 * pi[0];
00920                 for( j = ln; j > 0; --j, nzi += 1 )
00921                     *cm++ = nzi[0];
00922                 *cm++ = -1;
00923             }
00924         *cm++ = -1;
00925     }
00926     nonzero.reset();
00927 #ifndef MOAB_HAVE_MPI
00928     primary.reset();
00929 #else
00930     /* assign work proc by label modulo np */
00931     {
00932         uint i;
00933         sint* pi  = primary.vi_wr;
00934         slong* pl = primary.vl_wr;
00935         for( i = primary.get_n(); i; --i, pi += 3, pl += nlabels )
00936             pi[0] = pl[0] % crystal->_num;
00937     }
00938     rval = crystal->gs_transfer( 1, primary, 0 ); /* transfer to work procs */
00939     if( rval != MB_SUCCESS ) return rval;
00940     /* primary: (source proc, index on src, useless, label) */
00941     /* sort by label */
00942     primary.sort( 3, &crystal->all->buf );
00943     /* add sentinel to primary list */
00944     if( primary.get_n() == primary.get_max() )
00945         primary.resize( ( primary.get_max() ? primary.get_max() + ( primary.get_max() + 1 ) / 2 + 1 : 2 ) );
00946     primary.vl_wr[nlabels * primary.get_n()] = -1;
00947     /* construct shared list: (proc1, proc2, index1, label) */
00948 #ifdef MOAB_HAVE_MPI
00949     shared.initialize( 3, nlabels, nulabels, 0, primary.get_n() );
00950     shared.enableWriteAccess();
00951 #endif
00952     {
00953         sint *pi1 = primary.vi_wr, *si = shared.vi_wr;
00954         slong lbl, *pl1 = primary.vl_wr, *sl = shared.vl_wr;
00955         Ulong *pul1 = primary.vul_wr, *sul = shared.vul_wr;
00956         for( ; ( lbl = pl1[0] ) != -1; pi1 += 3, pl1 += nlabels, pul1 += nulabels )
00957         {
00958             sint* pi2   = pi1 + 3;
00959             slong* pl2  = pl1 + nlabels;
00960             Ulong* pul2 = pul1 + nulabels;
00961             for( ; pl2[0] == lbl; pi2 += 3, pl2 += nlabels, pul2 += nulabels )
00962             {
00963                 if( shared.get_n() + 2 > shared.get_max() )
00964                     shared.resize( ( shared.get_max() ? shared.get_max() + ( shared.get_max() + 1 ) / 2 + 1 : 2 ) ),
00965                         si = shared.vi_wr + shared.get_n() * 3;
00966                 sl    = shared.vl_wr + shared.get_n() * nlabels;
00967                 sul   = shared.vul_wr + shared.get_n() * nulabels;
00968                 si[0] = pi1[0];
00969                 si[1] = pi2[0];
00970                 si[2] = pi1[1];
00971                 for( j = 0; j < nlabels; j++ )
00972                     sl[j] = pl2[j];
00973                 for( j = 0; j < nulabels; j++ )
00974                     sul[j] = pul2[j];
00975                 si += 3;
00976                 sl += nlabels;
00977                 sul += nulabels;
00978                 shared.inc_n();
00979                 si[0] = pi2[0];
00980                 si[1] = pi1[0];
00981                 si[2] = pi2[1];
00982                 for( j = 0; j < nlabels; j++ )
00983                     sl[j] = pl1[j];
00984                 for( j = 0; j < nulabels; j++ )
00985                     sul[j] = pul1[j];
00986                 si += 3;
00987                 sl += nlabels;
00988                 sul += nulabels;
00989                 shared.inc_n();
00990             }
00991         }
00992     }
00993     primary.reset();
00994     rval = crystal->gs_transfer( 1, shared, 0 ); /* segfaulting transfer to dest procs */
00995     if( rval != MB_SUCCESS ) return rval;
00996     /* shared list: (useless, proc2, index, label) */
00997     /* sort by label */
00998     shared.sort( 3, &crystal->all->buf );
00999     /* sort by partner proc */
01000     shared.sort( 1, &crystal->all->buf );
01001     /* count partner procs */
01002     {
01003         uint i, count = 0;
01004         sint proc = -1, *si = shared.vi_wr;
01005         for( i = shared.get_n(); i; --i, si += 3 )
01006             if( si[1] != proc )
01007             {
01008                 ++count;
01009                 proc = si[1];
01010             }
01011         // this->nlinfo = new nonlocal_info();
01012         // this->nlinfo->initialize(count,shared.get_n(),
01013         //                          nlabels, nulabels, maxv);
01014         this->nlinfo = new nonlocal_info( count, shared.get_n(), nlabels, nulabels, maxv );
01015     }
01016     /* construct non-local info */
01017     {
01018         uint i;
01019         sint proc = -1, *si = shared.vi_wr;
01020         slong* sl      = shared.vl_wr;
01021         Ulong* ul      = shared.vul_wr;
01022         uint* target   = this->nlinfo->_target;
01023         uint* nshared  = this->nlinfo->_nshared;
01024         uint* sh_ind   = this->nlinfo->_sh_ind;
01025         slong* slabels = this->nlinfo->_slabels;
01026         Ulong* ulabels = this->nlinfo->_ulabels;
01027         for( i = shared.get_n(); i; --i, si += 3 )
01028         {
01029             if( si[1] != proc )
01030             {
01031                 proc       = si[1];
01032                 *target++  = proc;
01033                 *nshared++ = 0;
01034             }
01035             ++nshared[-1];
01036             *sh_ind++ = si[2];
01037             // don't store 1st slabel
01038             sl++;
01039             for( j = 0; j < nlabels - 1; j++ )
01040                 slabels[j] = sl[j];
01041             for( j = 0; j < nulabels; j++ )
01042                 ulabels[j] = ul[j];
01043             slabels += nlabels - 1;
01044             ulabels += nulabels;
01045             sl += nlabels - 1;
01046             ul += nulabels;
01047         }
01048     }
01049     shared.reset();
01050 #endif
01051     return MB_SUCCESS;
01052 }
01053
01054 void gs_data::reset()
01055 {
01056     free( local_cm );
01057     local_cm = NULL;
01058 #ifdef MOAB_HAVE_MPI
01059     if( nlinfo != NULL )
01060     {
01061         nlinfo->nlinfo_free();
01062         delete this->nlinfo;
01063         MPI_Comm_free( &_comm );
01064         nlinfo = NULL;
01065     }
01066 #endif
01067 }
01068
01069 }  // namespace moab