MOAB: Mesh Oriented datABase  (version 5.3.1)
gs.cpp
Go to the documentation of this file.
00001 /* compile-time settings:
00002 
00003  FORTRAN naming convention
00004  default      cpgs_setup, etc.
00005  -DUPCASE     CPGS_SETUP, etc.
00006  -DUNDERSCORE cpgs_setup_, etc.
00007 
00008  -DMPI             parallel version (sequential otherwise)
00009  -DCRYSTAL_STATIC  avoid some message exchange at the risk of
00010  crashing b/c of insufficient buffer size
00011 
00012  -DINITIAL_BUFFER_SIZE=expression
00013  ignored unless CRYSTAL_STATIC is defined.
00014  arithmetic expression controlling the initial buffer size for the crystal
00015  router; this needs to be large enough to hold the intermediate messages
00016  during all stages of the crystal router
00017 
00018  variables that can be used in expression include
00019  num   - the number of processors
00020  n     - the length of the global index array
00021 
00022  */
00023 
00024 /* default for INITIAL_BUFFER_SIZE */
00025 #ifdef CRYSTAL_STATIC
00026 #ifndef INITIAL_BUFFER_SIZE
00027 #define INITIAL_BUFFER_SIZE 2 * ( 3 * num + n * 9 )
00028 #endif
00029 #endif
00030 
00031 /* FORTRAN usage:
00032 
00033  integer hc, np
00034  call crystal_new(hc,comm,np)  ! get a crystal router handle (see fcrystal.c)
00035 
00036  integer hgs
00037  integer n, max_vec_dim
00038  integer*? global_index_array(1:n) ! type corresponding to slong in "types.h"
00039 
00040  call cpgs_setup(hgs,hc,global_index_array,n,max_vec_dim)
00041  sets hgs to new handle
00042 
00043  !ok to call crystal_done(hc) here, or any later time
00044 
00045  call cpgs_op(hgs, u, op)
00046  integer handle, op : 1-add, 2-multiply, 3-min, 4-max
00047  real    u(1:n) - same layout as global_index_array provided to cpgs_setup
00048 
00049  call cpgs_op_vec(hgs, u, d, op)
00050  integer op : 1-add, 2-multiply, 3-min, 4-max
00051  integer d    <= max_vec_dim
00052  real    u(1:d, 1:n) - vector components for each node stored together
00053 
00054  call cpgs_op_many(hgs, u1, u2, u3, u4, u5, u6, d, op)
00055  integer op : 1-add, 2-multiply, 3-min, 4-max
00056  integer d : in {1,2,3,4,5,6}, <= max_vec_dim
00057  real    u1(1:n), u2(1:n), u3(1:n), etc.
00058 
00059  same effect as: call cpgs_op(hgs, u1, op)
00060  if(d.gt.1) call cpgs_op(hgs, u2, op)
00061  if(d.gt.2) call cpgs_op(hgs, u3, op)
00062  etc.
00063  with possibly some savings as fewer messages are exchanged
00064 
00065  call cpgs_free(hgs)
00066  */
00067 
00068 #include <cstdio>
00069 #include <cstdlib>
00070 #include <cstdarg>
00071 #include <cstring>
00072 #include <cmath>
00073 #include "moab/gs.hpp"
00074 #ifdef MOAB_HAVE_MPI
00075 #include "moab_mpi.h"
00076 #endif
00077 
00078 #ifdef MOAB_HAVE_MPI
00079 
00080 #ifdef MOAB_HAVE_VALGRIND
00081 #include <valgrind/memcheck.h>
00082 #elif !defined( VALGRIND_CHECK_MEM_IS_DEFINED )
00083 #define VALGRIND_CHECK_MEM_IS_DEFINED( a, b ) ( (void)0 )
00084 #endif
00085 
00086 #endif
00087 
00088 namespace moab
00089 {
00090 
00091 /*--------------------------------------------------------------------------
00092  Local Execution Phases
00093  --------------------------------------------------------------------------*/
00094 
00095 #define DO_SET( a, b ) b = a
00096 #define DO_ADD( a, b ) a += b
00097 #define DO_MUL( a, b ) a *= b
00098 #define DO_MIN( a, b ) \
00099     if( ( b ) < ( a ) ) ( a ) = b
00100 #define DO_MAX( a, b ) \
00101     if( ( b ) > ( a ) ) ( a ) = b
00102 #define DO_BPR( a, b )         \
00103     do                         \
00104     {                          \
00105         uint a_ = a;           \
00106         uint b_ = b;           \
00107         for( ;; )              \
00108         {                      \
00109             if( a_ < b_ )      \
00110                 b_ >>= 1;      \
00111             else if( b_ < a_ ) \
00112                 a_ >>= 1;      \
00113             else               \
00114                 break;         \
00115         }                      \
00116         ( a ) = a_;            \
00117     } while( 0 )
00118 
00119 #define LOOP( op )                       \
00120     do                                   \
00121     {                                    \
00122         sint i, j;                       \
00123         while( ( i = *cm++ ) != -1 )     \
00124             while( ( j = *cm++ ) != -1 ) \
00125                 op( u[i], u[j] );        \
00126     } while( 0 )
00127 
00128 static void local_condense( realType* u, int op, const sint* cm )
00129 {
00130     switch( op )
00131     {
00132         case GS_OP_ADD:
00133             LOOP( DO_ADD );
00134             break;
00135         case GS_OP_MUL:
00136             LOOP( DO_MUL );
00137             break;
00138         case GS_OP_MIN:
00139             LOOP( DO_MIN );
00140             break;
00141         case GS_OP_MAX:
00142             LOOP( DO_MAX );
00143             break;
00144         case GS_OP_BPR:
00145             LOOP( DO_BPR );
00146             break;
00147     }
00148 }
00149 
00150 static void local_uncondense( realType* u, const sint* cm )
00151 {
00152     LOOP( DO_SET );
00153 }
00154 
00155 #undef LOOP
00156 
00157 #define LOOP( op )                        \
00158     do                                    \
00159     {                                     \
00160         sint i, j, k;                     \
00161         while( ( i = *cm++ ) != -1 )      \
00162         {                                 \
00163             realType* pi = u + n * i;     \
00164             while( ( j = *cm++ ) != -1 )  \
00165             {                             \
00166                 realType* pj = u + n * j; \
00167                 for( k = n; k; --k )      \
00168                 {                         \
00169                     op( *pi, *pj );       \
00170                     ++pi;                 \
00171                     ++pj;                 \
00172                 }                         \
00173             }                             \
00174         }                                 \
00175     } while( 0 )
00176 
00177 static void local_condense_vec( realType* u, uint n, int op, const sint* cm )
00178 {
00179     switch( op )
00180     {
00181         case GS_OP_ADD:
00182             LOOP( DO_ADD );
00183             break;
00184         case GS_OP_MUL:
00185             LOOP( DO_MUL );
00186             break;
00187         case GS_OP_MIN:
00188             LOOP( DO_MIN );
00189             break;
00190         case GS_OP_MAX:
00191             LOOP( DO_MAX );
00192             break;
00193         case GS_OP_BPR:
00194             LOOP( DO_BPR );
00195             break;
00196     }
00197 }
00198 
00199 static void local_uncondense_vec( realType* u, uint n, const sint* cm )
00200 {
00201     LOOP( DO_SET );
00202 }
00203 
00204 #undef LOOP
00205 
00206 /*--------------------------------------------------------------------------
00207  Non-local Execution Phases
00208  --------------------------------------------------------------------------*/
00209 
00210 #ifdef MOAB_HAVE_MPI
00211 
00212 void gs_data::nonlocal_info::initialize( uint np, uint count, uint nlabels, uint nulabels, uint maxv )
00213 {
00214     _target  = NULL;
00215     _nshared = NULL;
00216     _sh_ind  = NULL;
00217     _slabels = NULL;
00218     _ulabels = NULL;
00219     _reqs    = NULL;
00220     _buf     = NULL;
00221     _np      = np;
00222     _target  = (uint*)malloc( ( 2 * np + count ) * sizeof( uint ) );
00223     _nshared = _target + np;
00224     _sh_ind  = _nshared + np;
00225     if( 1 < nlabels )
00226         _slabels = (slong*)malloc( ( ( nlabels - 1 ) * count ) * sizeof( slong ) );
00227     else
00228         _slabels = NULL;
00229     _ulabels = (Ulong*)malloc( ( nulabels * count ) * sizeof( Ulong ) );
00230     _reqs    = (MPI_Request*)malloc( 2 * np * sizeof( MPI_Request ) );
00231     _buf     = (realType*)malloc( ( 2 * count * maxv ) * sizeof( realType ) );
00232     _maxv    = maxv;
00233 }
00234 
00235 void gs_data::nonlocal_info::nlinfo_free()
00236 {
00237     // Free the ptrs
00238     free( _buf );
00239     free( _reqs );
00240     free( _target );
00241     free( _slabels );
00242     free( _ulabels );
00243     // Set to null
00244     _ulabels = NULL;
00245     _buf     = NULL;
00246     _reqs    = NULL;
00247     _target  = NULL;
00248     _slabels = NULL;
00249     _nshared = NULL;
00250     _sh_ind  = NULL;
00251 }
00252 
00253 void gs_data::nonlocal_info::nonlocal( realType* u, int op, MPI_Comm comm )
00254 {
00255     MPI_Status status;
00256     uint np           = this->_np;
00257     MPI_Request* reqs = this->_reqs;
00258     uint* targ        = this->_target;
00259     uint* nshared     = this->_nshared;
00260     uint* sh_ind      = this->_sh_ind;
00261     uint id;
00262     realType *buf = this->_buf, *start;
00263     unsigned int i;
00264     {
00265         MPI_Comm_rank( comm, (int*)&i );
00266         id = i;
00267     }
00268     for( i = 0; i < np; ++i )
00269     {
00270         uint c = nshared[i];
00271         start  = buf;
00272         for( ; c; --c )
00273             *buf++ = u[*sh_ind++];
00274         MPI_Isend( (void*)start, nshared[i] * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ );
00275     }
00276     start = buf;
00277     for( i = 0; i < np; ++i )
00278     {
00279         MPI_Irecv( (void*)start, nshared[i] * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
00280         start += nshared[i];
00281     }
00282     for( reqs = this->_reqs, i = np * 2; i; --i )
00283         MPI_Wait( reqs++, &status );
00284     sh_ind = this->_sh_ind;
00285 #define LOOP( OP )                        \
00286     do                                    \
00287     {                                     \
00288         for( i = 0; i < np; ++i )         \
00289         {                                 \
00290             uint c;                       \
00291             for( c = nshared[i]; c; --c ) \
00292             {                             \
00293                 OP( u[*sh_ind], *buf );   \
00294                 ++sh_ind, ++buf;          \
00295             }                             \
00296         }                                 \
00297     } while( 0 )
00298     switch( op )
00299     {
00300         case GS_OP_ADD:
00301             LOOP( DO_ADD );
00302             break;
00303         case GS_OP_MUL:
00304             LOOP( DO_MUL );
00305             break;
00306         case GS_OP_MIN:
00307             LOOP( DO_MIN );
00308             break;
00309         case GS_OP_MAX:
00310             LOOP( DO_MAX );
00311             break;
00312         case GS_OP_BPR:
00313             LOOP( DO_BPR );
00314             break;
00315     }
00316 #undef LOOP
00317 }
00318 
00319 void gs_data::nonlocal_info::nonlocal_vec( realType* u, uint n, int op, MPI_Comm comm )
00320 {
00321     MPI_Status status;
00322     uint np           = this->_np;
00323     MPI_Request* reqs = this->_reqs;
00324     uint* targ        = this->_target;
00325     uint* nshared     = this->_nshared;
00326     uint* sh_ind      = this->_sh_ind;
00327     uint id;
00328     realType *buf = this->_buf, *start;
00329     uint size     = n * sizeof( realType );
00330     unsigned int i;
00331     {
00332         MPI_Comm_rank( comm, (int*)&i );
00333         id = i;
00334     }
00335     for( i = 0; i < np; ++i )
00336     {
00337         uint ns = nshared[i], c = ns;
00338         start = buf;
00339         for( ; c; --c )
00340         {
00341             memcpy( buf, u + n * ( *sh_ind++ ), size );
00342             buf += n;
00343         }
00344         MPI_Isend( (void*)start, ns * size, MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ );
00345     }
00346     start = buf;
00347     for( i = 0; i < np; ++i )
00348     {
00349         int nsn = n * nshared[i];
00350         MPI_Irecv( (void*)start, nsn * size, MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
00351         start += nsn;
00352     }
00353     for( reqs = this->_reqs, i = np * 2; i; --i )
00354         MPI_Wait( reqs++, &status );
00355     sh_ind = this->_sh_ind;
00356 #define LOOP( OP )                                    \
00357     do                                                \
00358     {                                                 \
00359         for( i = 0; i < np; ++i )                     \
00360         {                                             \
00361             uint c, j;                                \
00362             for( c = nshared[i]; c; --c )             \
00363             {                                         \
00364                 realType* uu = u + n * ( *sh_ind++ ); \
00365                 for( j = n; j; --j )                  \
00366                 {                                     \
00367                     OP( *uu, *buf );                  \
00368                     ++uu;                             \
00369                     ++buf;                            \
00370                 }                                     \
00371             }                                         \
00372         }                                             \
00373     } while( 0 )
00374     switch( op )
00375     {
00376         case GS_OP_ADD:
00377             LOOP( DO_ADD );
00378             break;
00379         case GS_OP_MUL:
00380             LOOP( DO_MUL );
00381             break;
00382         case GS_OP_MIN:
00383             LOOP( DO_MIN );
00384             break;
00385         case GS_OP_MAX:
00386             LOOP( DO_MAX );
00387             break;
00388         case GS_OP_BPR:
00389             LOOP( DO_BPR );
00390             break;
00391     }
00392 #undef LOOP
00393 }
00394 
00395 void gs_data::nonlocal_info::nonlocal_many( realType** u, uint n, int op, MPI_Comm comm )
00396 {
00397     MPI_Status status;
00398     uint np           = this->_np;
00399     MPI_Request* reqs = this->_reqs;
00400     uint* targ        = this->_target;
00401     uint* nshared     = this->_nshared;
00402     uint* sh_ind      = this->_sh_ind;
00403     uint id;
00404     realType *buf = this->_buf, *start;
00405     unsigned int i;
00406     {
00407         MPI_Comm_rank( comm, (int*)&i );
00408         id = i;
00409     }
00410     for( i = 0; i < np; ++i )
00411     {
00412         uint c, j, ns = nshared[i];
00413         start = buf;
00414         for( j = 0; j < n; ++j )
00415         {
00416             realType* uu = u[j];
00417             for( c = 0; c < ns; ++c )
00418                 *buf++ = uu[sh_ind[c]];
00419         }
00420         sh_ind += ns;
00421         MPI_Isend( (void*)start, n * ns * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ );
00422     }
00423     start = buf;
00424     for( i = 0; i < np; ++i )
00425     {
00426         int nsn = n * nshared[i];
00427         MPI_Irecv( (void*)start, nsn * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
00428         start += nsn;
00429     }
00430     for( reqs = this->_reqs, i = np * 2; i; --i )
00431         MPI_Wait( reqs++, &status );
00432     sh_ind = this->_sh_ind;
00433 #define LOOP( OP )                             \
00434     do                                         \
00435     {                                          \
00436         for( i = 0; i < np; ++i )              \
00437         {                                      \
00438             uint c, j, ns = nshared[i];        \
00439             for( j = 0; j < n; ++j )           \
00440             {                                  \
00441                 realType* uu = u[j];           \
00442                 for( c = 0; c < ns; ++c )      \
00443                 {                              \
00444                     OP( uu[sh_ind[c]], *buf ); \
00445                     ++buf;                     \
00446                 }                              \
00447             }                                  \
00448             sh_ind += ns;                      \
00449         }                                      \
00450     } while( 0 )
00451     switch( op )
00452     {
00453         case GS_OP_ADD:
00454             LOOP( DO_ADD );
00455             break;
00456         case GS_OP_MUL:
00457             LOOP( DO_MUL );
00458             break;
00459         case GS_OP_MIN:
00460             LOOP( DO_MIN );
00461             break;
00462         case GS_OP_MAX:
00463             LOOP( DO_MAX );
00464             break;
00465         case GS_OP_BPR:
00466             LOOP( DO_BPR );
00467             break;
00468     }
00469 #undef LOOP
00470 }
00471 
00472 /*---------------------------------------------------------------------------
00473  MOAB Crystal Router
00474  ---------------------------------------------------------------------------*/
00475 gs_data::crystal_data::crystal_data() {}
00476 
00477 void gs_data::crystal_data::initialize( MPI_Comm comm )
00478 {
00479     int num, id;
00480     buffers[0].buf.buffer_init( 1024 );
00481     buffers[1].buf.buffer_init( 1024 );
00482     buffers[2].buf.buffer_init( 1024 );
00483     all  = &buffers[0];
00484     keep = &buffers[1];
00485     send = &buffers[2];
00486     memcpy( &( this->_comm ), &comm, sizeof( MPI_Comm ) );
00487     MPI_Comm_rank( comm, &id );
00488     this->_id = id;
00489     MPI_Comm_size( comm, &num );
00490     this->_num = num;
00491 }
00492 
00493 void gs_data::crystal_data::reset()
00494 {
00495     buffers[0].buf.reset();
00496     buffers[1].buf.reset();
00497     buffers[2].buf.reset();
00498     keep = NULL;
00499     all  = NULL;
00500     send = NULL;
00501 }
00502 
00503 // Partition data before sending
00504 void gs_data::crystal_data::partition( uint cutoff, crystal_buf* lo, crystal_buf* hi )
00505 {
00506     const uint* src = (uint*)all->buf.ptr;
00507     const uint* end = (uint*)src + all->n;
00508     uint *target, *lop, *hip;
00509     lo->n = hi->n = 0;
00510     lo->buf.buffer_reserve( all->n * sizeof( uint ) );
00511     hi->buf.buffer_reserve( all->n * sizeof( uint ) );
00512     lop = (uint*)lo->buf.ptr;
00513     hip = (uint*)hi->buf.ptr;
00514     while( src != end )
00515     {
00516         uint chunk_len = 3 + src[2];
00517         if( src[0] < cutoff )
00518         {
00519             target = lop;
00520             lo->n += chunk_len;
00521             lop += chunk_len;
00522         }
00523         else
00524         {
00525             target = hip;
00526             hi->n += chunk_len;
00527             hip += chunk_len;
00528         }
00529         memcpy( target, src, chunk_len * sizeof( uint ) );
00530         src += chunk_len;
00531     }
00532 }
00533 
00534 // Send message to target process
00535 void gs_data::crystal_data::send_( uint target, int recvn )
00536 {
00537     MPI_Request req[3] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL };
00538     MPI_Status status[3];
00539     uint count[2] = { 0, 0 }, sum, *recv[2];
00540     crystal_buf* t;
00541     int i;
00542 
00543     (void)VALGRIND_CHECK_MEM_IS_DEFINED( &send->n, sizeof( uint ) );
00544     MPI_Isend( (void*)&send->n, sizeof( uint ), MPI_UNSIGNED_CHAR, target, _id, _comm, &req[0] );
00545     for( i = 0; i < recvn; ++i )
00546         MPI_Irecv( (void*)&count[i], sizeof( uint ), MPI_UNSIGNED_CHAR, target + i, target + i, _comm, &req[i + 1] );
00547     MPI_Waitall( recvn + 1, req, status );
00548     sum = keep->n;
00549     for( i = 0; i < recvn; ++i )
00550         sum += count[i];
00551     keep->buf.buffer_reserve( sum * sizeof( uint ) );
00552     recv[0] = (uint*)keep->buf.ptr;
00553     recv[0] += keep->n;
00554     recv[1] = recv[0] + count[0];
00555     keep->n = sum;
00556 
00557     (void)VALGRIND_CHECK_MEM_IS_DEFINED( send->buf.ptr, send->n * sizeof( uint ) );
00558     MPI_Isend( (void*)send->buf.ptr, send->n * sizeof( uint ), MPI_UNSIGNED_CHAR, target, _id, _comm, &req[0] );
00559     if( recvn )
00560     {
00561         MPI_Irecv( (void*)recv[0], count[0] * sizeof( uint ), MPI_UNSIGNED_CHAR, target, target, _comm, &req[1] );
00562         if( recvn == 2 )
00563             MPI_Irecv( (void*)recv[1], count[1] * sizeof( uint ), MPI_UNSIGNED_CHAR, target + 1, target + 1, _comm,
00564                        &req[2] );
00565     }
00566     MPI_Waitall( recvn + 1, req, status );
00567 
00568     t    = all;
00569     all  = keep;
00570     keep = t;
00571 }
00572 
00573 void gs_data::crystal_data::crystal_router()
00574 {
00575     uint bl = 0, bh, n = _num, nl, target;
00576     int recvn;
00577     crystal_buf *lo, *hi;
00578     while( n > 1 )
00579     {
00580         nl = n / 2, bh = bl + nl;
00581         if( _id < bh )
00582         {
00583             target = _id + nl;
00584             recvn  = ( n & 1 && _id == bh - 1 ) ? 2 : 1;
00585             lo     = keep;
00586             hi     = send;
00587         }
00588         else
00589         {
00590             target = _id - nl;
00591             recvn  = ( target == bh ) ? ( --target, 0 ) : 1;
00592             hi     = keep;
00593             lo     = send;
00594         }
00595         partition( bh, lo, hi );
00596         send_( target, recvn );
00597         if( _id < bh )
00598             n = nl;
00599         else
00600         {
00601             n -= nl;
00602             bl = bh;
00603         }
00604     }
00605 }
00606 
00607 #define UINT_PER_X( X ) ( ( sizeof( X ) + sizeof( uint ) - 1 ) / sizeof( uint ) )
00608 #define UINT_PER_REAL   UINT_PER_X( realType )
00609 #define UINT_PER_LONG   UINT_PER_X( slong )
00610 #define UINT_PER_ULONG  UINT_PER_X( Ulong )
00611 
00612 /*-------------------------------------------------------------------------
00613  Transfer
00614  -------------------------------------------------------------------------*/
00615 ErrorCode gs_data::crystal_data::gs_transfer( int dynamic, moab::TupleList& tl, unsigned pf )
00616 {
00617 
00618     unsigned mi, ml, mul, mr;
00619     tl.getTupleSize( mi, ml, mul, mr );
00620 
00621     const unsigned tsize = ( mi - 1 ) + ml * UINT_PER_LONG + mul * UINT_PER_ULONG + mr * UINT_PER_REAL;
00622     sint p, lp = -1;
00623     sint* ri;
00624     slong* rl;
00625     Ulong* rul;
00626     realType* rr;
00627     uint i, j, *buf, *len = 0, *buf_end;
00628 
00629     /* sort to group by target proc */
00630     if( pf >= mi ) return moab::MB_MEMORY_ALLOCATION_FAILED;
00631     // fail("pf expected to be in vi array (%d, %d)", pf, mi);
00632     tl.sort( pf, &all->buf );
00633 
00634     /* pack into buffer for crystal router */
00635     all->buf.buffer_reserve( ( tl.get_n() * ( 3 + tsize ) ) * sizeof( uint ) );
00636     all->n = 0;
00637     buf    = (uint*)all->buf.ptr;
00638 
00639     bool canWrite = tl.get_writeEnabled();
00640     if( !canWrite ) tl.enableWriteAccess();
00641 
00642     ri  = tl.vi_wr;
00643     rl  = tl.vl_wr;
00644     rul = tl.vul_wr;
00645     rr  = tl.vr_wr;
00646 
00647     for( i = tl.get_n(); i; --i )
00648     {
00649         p = ri[pf];
00650         if( p != lp )
00651         {
00652             lp     = p;
00653             *buf++ = p;   /* target */
00654             *buf++ = _id; /* source */
00655             len    = buf++;
00656             *len   = 0; /* length */
00657             all->n += 3;
00658         }
00659         for( j = 0; j < mi; ++j, ++ri )
00660             if( j != pf ) *buf++ = *ri;
00661         for( j = ml; j; --j, ++rl )
00662         {
00663             memcpy( buf, rl, sizeof( slong ) );
00664             buf += UINT_PER_LONG;
00665         }
00666         for( j = mul; j; --j, ++rul )
00667         {
00668             memcpy( buf, rul, sizeof( Ulong ) );
00669             buf += UINT_PER_ULONG;
00670         }
00671         for( j = mr; j; --j, ++rr )
00672         {
00673             memcpy( buf, rr, sizeof( realType ) );
00674             buf += UINT_PER_REAL;
00675         }
00676         *len += tsize, all->n += tsize;
00677     }
00678 
00679     crystal_router();
00680 
00681     /* unpack */
00682     buf     = (uint*)all->buf.ptr;
00683     buf_end = buf + all->n;
00684     tl.set_n( 0 );
00685     ri  = tl.vi_wr;
00686     rl  = tl.vl_wr;
00687     rul = tl.vul_wr;
00688     rr  = tl.vr_wr;
00689 
00690     while( buf != buf_end )
00691     {
00692         sint llen;
00693         buf++;         /* target ( == this proc ) */
00694         p    = *buf++; /* source */
00695         llen = *buf++; /* length */
00696         while( llen > 0 )
00697         {
00698             if( tl.get_n() == tl.get_max() )
00699             {
00700                 if( !dynamic )
00701                 {
00702                     tl.set_n( tl.get_max() + 1 );
00703                     if( !canWrite ) tl.disableWriteAccess();
00704                     return moab::MB_SUCCESS;
00705                 }
00706                 ErrorCode rval = tl.resize( tl.get_max() + ( 1 + tl.get_max() ) / 2 + 1 );
00707                 if( rval != moab::MB_SUCCESS )
00708                 {
00709                     if( !canWrite ) tl.disableWriteAccess();
00710                     return rval;
00711                 }
00712                 ri  = tl.vi_wr + mi * tl.get_n();
00713                 rl  = tl.vl_wr + ml * tl.get_n();
00714                 rul = tl.vul_wr + mul * tl.get_n();
00715                 rr  = tl.vr_wr + mr * tl.get_n();
00716             }
00717             tl.inc_n();
00718             for( j = 0; j < mi; ++j )
00719                 if( j != pf )
00720                     *ri++ = *buf++;
00721                 else
00722                     *ri++ = p;
00723             for( j = ml; j; --j )
00724             {
00725                 memcpy( rl++, buf, sizeof( slong ) );
00726                 buf += UINT_PER_LONG;
00727             }
00728             for( j = mul; j; --j )
00729             {
00730                 memcpy( rul++, buf, sizeof( Ulong ) );
00731                 buf += UINT_PER_ULONG;
00732             }
00733             for( j = mr; j; --j )
00734             {
00735                 memcpy( rr++, buf, sizeof( realType ) );
00736                 buf += UINT_PER_REAL;
00737             }
00738             llen -= tsize;
00739         }
00740     }
00741 
00742     if( !canWrite ) tl.disableWriteAccess();
00743     return moab::MB_SUCCESS;
00744 }
00745 #endif
00746 
00747 /*--------------------------------------------------------------------------
00748  Combined Execution
00749  --------------------------------------------------------------------------*/
00750 
00751 void gs_data::gs_data_op( realType* u, int op )
00752 {
00753     local_condense( u, op, this->local_cm );
00754 #ifdef MOAB_HAVE_MPI
00755     this->nlinfo->nonlocal( u, op, _comm );
00756 #endif
00757     local_uncondense( u, local_cm );
00758 }
00759 
00760 void gs_data::gs_data_op_vec( realType* u, uint n, int op )
00761 {
00762 #ifdef MOAB_HAVE_MPI
00763     if( n > nlinfo->_maxv )
00764         moab::fail( "%s: initialized with max vec size = %d,"
00765                     " but called with vec size = %d\n",
00766                     __FILE__, nlinfo->_maxv, n );
00767 #endif
00768     local_condense_vec( u, n, op, local_cm );
00769 #ifdef MOAB_HAVE_MPI
00770     this->nlinfo->nonlocal_vec( u, n, op, _comm );
00771 #endif
00772     local_uncondense_vec( u, n, local_cm );
00773 }
00774 
00775 void gs_data::gs_data_op_many( realType** u, uint n, int op )
00776 {
00777     uint i;
00778 #ifdef MOAB_HAVE_MPI
00779     if( n > nlinfo->_maxv )
00780         moab::fail( "%s: initialized with max vec size = %d,"
00781                     " but called with vec size = %d\n",
00782                     __FILE__, nlinfo->_maxv, n );
00783 #endif
00784     for( i = 0; i < n; ++i )
00785         local_condense( u[i], op, local_cm );
00786 
00787     moab::fail( "%s: initialized with max vec size = %d,"
00788                 " but called with vec size = %d\n",
00789                 __FILE__, 6, n );
00790 
00791 #ifdef MOAB_HAVE_MPI
00792     this->nlinfo->nonlocal_many( u, n, op, _comm );
00793 #endif
00794     for( i = 0; i < n; ++i )
00795         local_uncondense( u[i], local_cm );
00796 }
00797 
00798 /*--------------------------------------------------------------------------
00799  Setup
00800  --------------------------------------------------------------------------*/
00801 
00802 ErrorCode gs_data::initialize( uint n, const long* label, const Ulong* ulabel, uint maxv, const unsigned int nlabels,
00803                                const unsigned int nulabels, crystal_data* crystal )
00804 {
00805     nlinfo = NULL;
00806     unsigned int j;
00807     TupleList nonzero, primary;
00808     ErrorCode rval;
00809 #ifdef MOAB_HAVE_MPI
00810     TupleList shared;
00811 #else
00812     moab::TupleList::buffer buf;
00813 #endif
00814     (void)VALGRIND_CHECK_MEM_IS_DEFINED( label, nlabels * sizeof( long ) );
00815     (void)VALGRIND_CHECK_MEM_IS_DEFINED( ulabel, nlabels * sizeof( Ulong ) );
00816 #ifdef MOAB_HAVE_MPI
00817     MPI_Comm_dup( crystal->_comm, &this->_comm );
00818 #else
00819     buf.buffer_init( 1024 );
00820 #endif
00821 
00822     /* construct list of nonzeros: (index ^, label) */
00823     nonzero.initialize( 1, nlabels, nulabels, 0, n );
00824     nonzero.enableWriteAccess();
00825     {
00826         uint i;
00827         sint* nzi;
00828         long* nzl;
00829         Ulong* nzul;
00830         nzi  = nonzero.vi_wr;
00831         nzl  = nonzero.vl_wr;
00832         nzul = nonzero.vul_wr;
00833         for( i = 0; i < n; ++i )
00834             if( label[nlabels * i] != 0 )
00835             {
00836                 nzi[0] = i;
00837                 for( j = 0; j < nlabels; j++ )
00838                     nzl[j] = label[nlabels * i + j];
00839                 for( j = 0; j < nulabels; j++ )
00840                     nzul[j] = ulabel[nulabels * i + j];
00841                 nzi++;
00842                 nzl += nlabels;
00843                 nzul += nulabels;
00844                 nonzero.inc_n();
00845             }
00846     }
00847 
00848     /* sort nonzeros by label: (index ^2, label ^1) */
00849 #ifndef MOAB_HAVE_MPI
00850     nonzero.sort( 1, &buf );
00851 #else
00852     nonzero.sort( 1, &crystal->all->buf );
00853 #endif
00854 
00855     /* build list of unique labels w/ lowest associated index:
00856      (index in nonzero ^, primary (lowest) index in label, count, label(s),
00857      ulabel(s)) */
00858     primary.initialize( 3, nlabels, nulabels, 0, nonzero.get_n() );
00859     primary.enableWriteAccess();
00860     {
00861         uint i;
00862         sint *nzi = nonzero.vi_wr, *pi = primary.vi_wr;
00863         slong *nzl = nonzero.vl_wr, *pl = primary.vl_wr;
00864         Ulong *nzul = nonzero.vul_wr, *pul = primary.vul_wr;
00865         slong last = -1;
00866         for( i = 0; i < nonzero.get_n(); ++i, nzi += 1, nzl += nlabels, nzul += nulabels )
00867         {
00868             if( nzl[0] == last )
00869             {
00870                 ++pi[-1];
00871                 continue;
00872             }
00873             last  = nzl[0];
00874             pi[0] = i;
00875             pi[1] = nzi[0];
00876             for( j = 0; j < nlabels; j++ )
00877                 pl[j] = nzl[j];
00878             for( j = 0; j < nulabels; j++ )
00879                 pul[j] = nzul[j];
00880             pi[2] = 1;
00881             pi += 3, pl += nlabels;
00882             pul += nulabels;
00883             primary.inc_n();
00884         }
00885     }
00886 
00887     /* calculate size of local condense map */
00888     {
00889         uint i, count = 1;
00890         sint* pi = primary.vi_wr;
00891         for( i = primary.get_n(); i; --i, pi += 3 )
00892             if( pi[2] > 1 ) count += pi[2] + 1;
00893         this->local_cm = (sint*)malloc( count * sizeof( sint ) );
00894     }
00895 
00896     /* sort unique labels by primary index:
00897      (nonzero index ^2, primary index ^1, count, label ^2) */
00898 #ifndef MOAB_HAVE_MPI
00899     primary.sort( 0, &buf );
00900     buf.reset();
00901     // buffer_free(&buf);
00902 #else
00903     primary.sort( 0, &crystal->all->buf );
00904 #endif
00905 
00906     /* construct local condense map */
00907     {
00908         uint i, ln;
00909         sint* pi = primary.vi_wr;
00910         sint* cm = this->local_cm;
00911         for( i = primary.get_n(); i > 0; --i, pi += 3 )
00912             if( ( ln = pi[2] ) > 1 )
00913             {
00914                 sint* nzi = nonzero.vi_wr + 1 * pi[0];
00915                 for( j = ln; j > 0; --j, nzi += 1 )
00916                     *cm++ = nzi[0];
00917                 *cm++ = -1;
00918             }
00919         *cm++ = -1;
00920     }
00921     nonzero.reset();
00922 #ifndef MOAB_HAVE_MPI
00923     primary.reset();
00924 #else
00925     /* assign work proc by label modulo np */
00926     {
00927         uint i;
00928         sint* pi  = primary.vi_wr;
00929         slong* pl = primary.vl_wr;
00930         for( i = primary.get_n(); i; --i, pi += 3, pl += nlabels )
00931             pi[0] = pl[0] % crystal->_num;
00932     }
00933     rval = crystal->gs_transfer( 1, primary, 0 ); /* transfer to work procs */
00934     if( rval != MB_SUCCESS ) return rval;
00935     /* primary: (source proc, index on src, useless, label) */
00936     /* sort by label */
00937     primary.sort( 3, &crystal->all->buf );
00938     /* add sentinel to primary list */
00939     if( primary.get_n() == primary.get_max() )
00940         primary.resize( ( primary.get_max() ? primary.get_max() + ( primary.get_max() + 1 ) / 2 + 1 : 2 ) );
00941     primary.vl_wr[nlabels * primary.get_n()] = -1;
00942     /* construct shared list: (proc1, proc2, index1, label) */
00943 #ifdef MOAB_HAVE_MPI
00944     shared.initialize( 3, nlabels, nulabels, 0, primary.get_n() );
00945     shared.enableWriteAccess();
00946 #endif
00947     {
00948         sint *pi1 = primary.vi_wr, *si = shared.vi_wr;
00949         slong lbl, *pl1 = primary.vl_wr, *sl = shared.vl_wr;
00950         Ulong *pul1 = primary.vul_wr, *sul = shared.vul_wr;
00951         for( ; ( lbl = pl1[0] ) != -1; pi1 += 3, pl1 += nlabels, pul1 += nulabels )
00952         {
00953             sint* pi2   = pi1 + 3;
00954             slong* pl2  = pl1 + nlabels;
00955             Ulong* pul2 = pul1 + nulabels;
00956             for( ; pl2[0] == lbl; pi2 += 3, pl2 += nlabels, pul2 += nulabels )
00957             {
00958                 if( shared.get_n() + 2 > shared.get_max() )
00959                     shared.resize( ( shared.get_max() ? shared.get_max() + ( shared.get_max() + 1 ) / 2 + 1 : 2 ) ),
00960                         si = shared.vi_wr + shared.get_n() * 3;
00961                 sl    = shared.vl_wr + shared.get_n() * nlabels;
00962                 sul   = shared.vul_wr + shared.get_n() * nulabels;
00963                 si[0] = pi1[0];
00964                 si[1] = pi2[0];
00965                 si[2] = pi1[1];
00966                 for( j = 0; j < nlabels; j++ )
00967                     sl[j] = pl2[j];
00968                 for( j = 0; j < nulabels; j++ )
00969                     sul[j] = pul2[j];
00970                 si += 3;
00971                 sl += nlabels;
00972                 sul += nulabels;
00973                 shared.inc_n();
00974                 si[0] = pi2[0];
00975                 si[1] = pi1[0];
00976                 si[2] = pi2[1];
00977                 for( j = 0; j < nlabels; j++ )
00978                     sl[j] = pl1[j];
00979                 for( j = 0; j < nulabels; j++ )
00980                     sul[j] = pul1[j];
00981                 si += 3;
00982                 sl += nlabels;
00983                 sul += nulabels;
00984                 shared.inc_n();
00985             }
00986         }
00987     }
00988     primary.reset();
00989     rval = crystal->gs_transfer( 1, shared, 0 ); /* segfaulting transfer to dest procs */
00990     if( rval != MB_SUCCESS ) return rval;
00991     /* shared list: (useless, proc2, index, label) */
00992     /* sort by label */
00993     shared.sort( 3, &crystal->all->buf );
00994     /* sort by partner proc */
00995     shared.sort( 1, &crystal->all->buf );
00996     /* count partner procs */
00997     {
00998         uint i, count = 0;
00999         sint proc = -1, *si = shared.vi_wr;
01000         for( i = shared.get_n(); i; --i, si += 3 )
01001             if( si[1] != proc )
01002             {
01003                 ++count;
01004                 proc = si[1];
01005             }
01006         // this->nlinfo = new nonlocal_info();
01007         // this->nlinfo->initialize(count,shared.get_n(),
01008         //                          nlabels, nulabels, maxv);
01009         this->nlinfo = new nonlocal_info( count, shared.get_n(), nlabels, nulabels, maxv );
01010     }
01011     /* construct non-local info */
01012     {
01013         uint i;
01014         sint proc = -1, *si = shared.vi_wr;
01015         slong* sl      = shared.vl_wr;
01016         Ulong* ul      = shared.vul_wr;
01017         uint* target   = this->nlinfo->_target;
01018         uint* nshared  = this->nlinfo->_nshared;
01019         uint* sh_ind   = this->nlinfo->_sh_ind;
01020         slong* slabels = this->nlinfo->_slabels;
01021         Ulong* ulabels = this->nlinfo->_ulabels;
01022         for( i = shared.get_n(); i; --i, si += 3 )
01023         {
01024             if( si[1] != proc )
01025             {
01026                 proc       = si[1];
01027                 *target++  = proc;
01028                 *nshared++ = 0;
01029             }
01030             ++nshared[-1];
01031             *sh_ind++ = si[2];
01032             // don't store 1st slabel
01033             sl++;
01034             for( j = 0; j < nlabels - 1; j++ )
01035                 slabels[j] = sl[j];
01036             for( j = 0; j < nulabels; j++ )
01037                 ulabels[j] = ul[j];
01038             slabels += nlabels - 1;
01039             ulabels += nulabels;
01040             sl += nlabels - 1;
01041             ul += nulabels;
01042         }
01043     }
01044     shared.reset();
01045 #endif
01046     return MB_SUCCESS;
01047 }
01048 
01049 void gs_data::reset()
01050 {
01051     free( local_cm );
01052     local_cm = NULL;
01053 #ifdef MOAB_HAVE_MPI
01054     if( nlinfo != NULL )
01055     {
01056         nlinfo->nlinfo_free();
01057         delete this->nlinfo;
01058         MPI_Comm_free( &_comm );
01059         nlinfo = NULL;
01060     }
01061 #endif
01062 }
01063 
01064 }  // namespace moab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines