![]() |
Mesh Oriented datABase
(version 5.4.1)
Array-based unstructured mesh datastructure
|
00001 /* compile-time settings:
00002
00003 FORTRAN naming convention
00004 default cpgs_setup, etc.
00005 -DUPCASE CPGS_SETUP, etc.
00006 -DUNDERSCORE cpgs_setup_, etc.
00007
00008 -DMPI parallel version (sequential otherwise)
00009 -DCRYSTAL_STATIC avoid some message exchange at the risk of
00010 crashing b/c of insufficient buffer size
00011
00012 -DINITIAL_BUFFER_SIZE=expression
00013 ignored unless CRYSTAL_STATIC is defined.
00014 arithmetic expression controlling the initial buffer size for the crystal
00015 router; this needs to be large enough to hold the intermediate messages
00016 during all stages of the crystal router
00017
00018 variables that can be used in expression include
00019 num - the number of processors
00020 n - the length of the global index array
00021
00022 */
00023
00024 /* default for INITIAL_BUFFER_SIZE */
00025 #ifdef CRYSTAL_STATIC
00026 #ifndef INITIAL_BUFFER_SIZE
00027 #define INITIAL_BUFFER_SIZE 2 * ( 3 * num + n * 9 )
00028 #endif
00029 #endif
00030
00031 /* FORTRAN usage:
00032
00033 integer hc, np
00034 call crystal_new(hc,comm,np) ! get a crystal router handle (see fcrystal.c)
00035
00036 integer hgs
00037 integer n, max_vec_dim
00038 integer*? global_index_array(1:n) ! type corresponding to slong in "types.h"
00039
00040 call cpgs_setup(hgs,hc,global_index_array,n,max_vec_dim)
00041 sets hgs to new handle
00042
00043 !ok to call crystal_done(hc) here, or any later time
00044
00045 call cpgs_op(hgs, u, op)
00046 integer handle, op : 1-add, 2-multiply, 3-min, 4-max
00047 real u(1:n) - same layout as global_index_array provided to cpgs_setup
00048
00049 call cpgs_op_vec(hgs, u, d, op)
00050 integer op : 1-add, 2-multiply, 3-min, 4-max
00051 integer d <= max_vec_dim
00052 real u(1:d, 1:n) - vector components for each node stored together
00053
00054 call cpgs_op_many(hgs, u1, u2, u3, u4, u5, u6, d, op)
00055 integer op : 1-add, 2-multiply, 3-min, 4-max
00056 integer d : in {1,2,3,4,5,6}, <= max_vec_dim
00057 real u1(1:n), u2(1:n), u3(1:n), etc.
00058
00059 same effect as: call cpgs_op(hgs, u1, op)
00060 if(d.gt.1) call cpgs_op(hgs, u2, op)
00061 if(d.gt.2) call cpgs_op(hgs, u3, op)
00062 etc.
00063 with possibly some savings as fewer messages are exchanged
00064
00065 call cpgs_free(hgs)
00066 */
00067
00068 #include
00069 #include
00070 #include
00071 #include
00072 #include
00073 #include "moab/gs.hpp"
00074 #ifdef MOAB_HAVE_MPI
00075 #include "moab_mpi.h"
00076 #endif
00077
00078 #ifdef MOAB_HAVE_MPI
00079
00080 #ifdef MOAB_HAVE_VALGRIND
00081 #include
00082 #elif !defined( VALGRIND_CHECK_MEM_IS_DEFINED )
00083 #define VALGRIND_CHECK_MEM_IS_DEFINED( a, b ) ( (void)0 )
00084 #endif
00085
00086 #endif
00087
00088 namespace moab
00089 {
00090
00091 /*--------------------------------------------------------------------------
00092 Local Execution Phases
00093 --------------------------------------------------------------------------*/
00094
00095 #define DO_SET( a, b ) b = a
00096 #define DO_ADD( a, b ) a += b
00097 #define DO_MUL( a, b ) a *= b
00098 #define DO_MIN( a, b ) \
00099 if( ( b ) < ( a ) ) ( a ) = b
00100 #define DO_MAX( a, b ) \
00101 if( ( b ) > ( a ) ) ( a ) = b
00102 #define DO_BPR( a, b ) \
00103 do \
00104 { \
00105 uint a_ = a; \
00106 uint b_ = b; \
00107 for( ;; ) \
00108 { \
00109 if( a_ < b_ ) \
00110 b_ >>= 1; \
00111 else if( b_ < a_ ) \
00112 a_ >>= 1; \
00113 else \
00114 break; \
00115 } \
00116 ( a ) = a_; \
00117 } while( 0 )
00118
00119 #define LOOP( op ) \
00120 do \
00121 { \
00122 sint i, j; \
00123 while( ( i = *cm++ ) != -1 ) \
00124 while( ( j = *cm++ ) != -1 ) \
00125 op( u[i], u[j] ); \
00126 } while( 0 )
00127
00128 static void local_condense( realType* u, int op, const sint* cm )
00129 {
00130 switch( op )
00131 {
00132 case GS_OP_ADD:
00133 LOOP( DO_ADD );
00134 break;
00135 case GS_OP_MUL:
00136 LOOP( DO_MUL );
00137 break;
00138 case GS_OP_MIN:
00139 LOOP( DO_MIN );
00140 break;
00141 case GS_OP_MAX:
00142 LOOP( DO_MAX );
00143 break;
00144 case GS_OP_BPR:
00145 LOOP( DO_BPR );
00146 break;
00147 }
00148 }
00149
00150 static void local_uncondense( realType* u, const sint* cm )
00151 {
00152 LOOP( DO_SET );
00153 }
00154
00155 #undef LOOP
00156
00157 #define LOOP( op ) \
00158 do \
00159 { \
00160 sint i, j, k; \
00161 while( ( i = *cm++ ) != -1 ) \
00162 { \
00163 realType* pi = u + n * i; \
00164 while( ( j = *cm++ ) != -1 ) \
00165 { \
00166 realType* pj = u + n * j; \
00167 for( k = n; k; --k ) \
00168 { \
00169 op( *pi, *pj ); \
00170 ++pi; \
00171 ++pj; \
00172 } \
00173 } \
00174 } \
00175 } while( 0 )
00176
00177 static void local_condense_vec( realType* u, uint n, int op, const sint* cm )
00178 {
00179 switch( op )
00180 {
00181 case GS_OP_ADD:
00182 LOOP( DO_ADD );
00183 break;
00184 case GS_OP_MUL:
00185 LOOP( DO_MUL );
00186 break;
00187 case GS_OP_MIN:
00188 LOOP( DO_MIN );
00189 break;
00190 case GS_OP_MAX:
00191 LOOP( DO_MAX );
00192 break;
00193 case GS_OP_BPR:
00194 LOOP( DO_BPR );
00195 break;
00196 }
00197 }
00198
00199 static void local_uncondense_vec( realType* u, uint n, const sint* cm )
00200 {
00201 LOOP( DO_SET );
00202 }
00203
00204 #undef LOOP
00205
00206 /*--------------------------------------------------------------------------
00207 Non-local Execution Phases
00208 --------------------------------------------------------------------------*/
00209
00210 #ifdef MOAB_HAVE_MPI
00211
00212 void gs_data::nonlocal_info::initialize( uint np, uint count, uint nlabels, uint nulabels, uint maxv )
00213 {
00214 _target = NULL;
00215 _nshared = NULL;
00216 _sh_ind = NULL;
00217 _slabels = NULL;
00218 _ulabels = NULL;
00219 _reqs = NULL;
00220 _buf = NULL;
00221 _np = np;
00222 _target = (uint*)malloc( ( 2 * np + count ) * sizeof( uint ) );
00223 _nshared = _target + np;
00224 _sh_ind = _nshared + np;
00225 if( 1 < nlabels )
00226 _slabels = (slong*)malloc( ( ( nlabels - 1 ) * count ) * sizeof( slong ) );
00227 else
00228 _slabels = NULL;
00229 _ulabels = (Ulong*)malloc( ( nulabels * count ) * sizeof( Ulong ) );
00230 _reqs = (MPI_Request*)malloc( 2 * np * sizeof( MPI_Request ) );
00231 _buf = (realType*)malloc( ( 2 * count * maxv ) * sizeof( realType ) );
00232 _maxv = maxv;
00233 }
00234
00235 void gs_data::nonlocal_info::nlinfo_free()
00236 {
00237 // Free the ptrs
00238 free( _buf );
00239 free( _reqs );
00240 free( _target );
00241 free( _slabels );
00242 free( _ulabels );
00243 // Set to null
00244 _ulabels = NULL;
00245 _buf = NULL;
00246 _reqs = NULL;
00247 _target = NULL;
00248 _slabels = NULL;
00249 _nshared = NULL;
00250 _sh_ind = NULL;
00251 }
00252
00253 void gs_data::nonlocal_info::nonlocal( realType* u, int op, MPI_Comm comm )
00254 {
00255 MPI_Status status;
00256 uint np = this->_np;
00257 MPI_Request* reqs = this->_reqs;
00258 uint* targ = this->_target;
00259 uint* nshared = this->_nshared;
00260 uint* sh_ind = this->_sh_ind;
00261 uint id;
00262 realType *buf = this->_buf, *start;
00263 unsigned int i;
00264 {
00265 MPI_Comm_rank( comm, (int*)&i );
00266 id = i;
00267 }
00268 for( i = 0; i < np; ++i )
00269 {
00270 uint c = nshared[i];
00271 start = buf;
00272 for( ; c; --c )
00273 *buf++ = u[*sh_ind++];
00274 MPI_Isend( (void*)start, nshared[i] * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ );
00275 }
00276 start = buf;
00277 for( i = 0; i < np; ++i )
00278 {
00279 MPI_Irecv( (void*)start, nshared[i] * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
00280 start += nshared[i];
00281 }
00282 for( reqs = this->_reqs, i = np * 2; i; --i )
00283 MPI_Wait( reqs++, &status );
00284 sh_ind = this->_sh_ind;
00285 #define LOOP( OP ) \
00286 do \
00287 { \
00288 for( i = 0; i < np; ++i ) \
00289 { \
00290 uint c; \
00291 for( c = nshared[i]; c; --c ) \
00292 { \
00293 OP( u[*sh_ind], *buf ); \
00294 ++sh_ind, ++buf; \
00295 } \
00296 } \
00297 } while( 0 )
00298 switch( op )
00299 {
00300 case GS_OP_ADD:
00301 LOOP( DO_ADD );
00302 break;
00303 case GS_OP_MUL:
00304 LOOP( DO_MUL );
00305 break;
00306 case GS_OP_MIN:
00307 LOOP( DO_MIN );
00308 break;
00309 case GS_OP_MAX:
00310 LOOP( DO_MAX );
00311 break;
00312 case GS_OP_BPR:
00313 LOOP( DO_BPR );
00314 break;
00315 }
00316 #undef LOOP
00317 }
00318
00319 void gs_data::nonlocal_info::nonlocal_vec( realType* u, uint n, int op, MPI_Comm comm )
00320 {
00321 MPI_Status status;
00322 uint np = this->_np;
00323 MPI_Request* reqs = this->_reqs;
00324 uint* targ = this->_target;
00325 uint* nshared = this->_nshared;
00326 uint* sh_ind = this->_sh_ind;
00327 uint id;
00328 realType *buf = this->_buf, *start;
00329 uint size = n * sizeof( realType );
00330 unsigned int i;
00331 {
00332 MPI_Comm_rank( comm, (int*)&i );
00333 id = i;
00334 }
00335 for( i = 0; i < np; ++i )
00336 {
00337 uint ns = nshared[i], c = ns;
00338 start = buf;
00339 for( ; c; --c )
00340 {
00341 memcpy( buf, u + n * ( *sh_ind++ ), size );
00342 buf += n;
00343 }
00344 MPI_Isend( (void*)start, ns * size, MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ );
00345 }
00346 start = buf;
00347 for( i = 0; i < np; ++i )
00348 {
00349 int nsn = n * nshared[i];
00350 MPI_Irecv( (void*)start, nsn * size, MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
00351 start += nsn;
00352 }
00353 for( reqs = this->_reqs, i = np * 2; i; --i )
00354 MPI_Wait( reqs++, &status );
00355 sh_ind = this->_sh_ind;
00356 #define LOOP( OP ) \
00357 do \
00358 { \
00359 for( i = 0; i < np; ++i ) \
00360 { \
00361 uint c, j; \
00362 for( c = nshared[i]; c; --c ) \
00363 { \
00364 realType* uu = u + n * ( *sh_ind++ ); \
00365 for( j = n; j; --j ) \
00366 { \
00367 OP( *uu, *buf ); \
00368 ++uu; \
00369 ++buf; \
00370 } \
00371 } \
00372 } \
00373 } while( 0 )
00374 switch( op )
00375 {
00376 case GS_OP_ADD:
00377 LOOP( DO_ADD );
00378 break;
00379 case GS_OP_MUL:
00380 LOOP( DO_MUL );
00381 break;
00382 case GS_OP_MIN:
00383 LOOP( DO_MIN );
00384 break;
00385 case GS_OP_MAX:
00386 LOOP( DO_MAX );
00387 break;
00388 case GS_OP_BPR:
00389 LOOP( DO_BPR );
00390 break;
00391 }
00392 #undef LOOP
00393 }
00394
00395 void gs_data::nonlocal_info::nonlocal_many( realType** u, uint n, int op, MPI_Comm comm )
00396 {
00397 MPI_Status status;
00398 uint np = this->_np;
00399 MPI_Request* reqs = this->_reqs;
00400 uint* targ = this->_target;
00401 uint* nshared = this->_nshared;
00402 uint* sh_ind = this->_sh_ind;
00403 uint id;
00404 realType *buf = this->_buf, *start;
00405 unsigned int i;
00406 {
00407 MPI_Comm_rank( comm, (int*)&i );
00408 id = i;
00409 }
00410 for( i = 0; i < np; ++i )
00411 {
00412 uint c, j, ns = nshared[i];
00413 start = buf;
00414 for( j = 0; j < n; ++j )
00415 {
00416 realType* uu = u[j];
00417 for( c = 0; c < ns; ++c )
00418 *buf++ = uu[sh_ind[c]];
00419 }
00420 sh_ind += ns;
00421 MPI_Isend( (void*)start, n * ns * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ );
00422 }
00423 start = buf;
00424 for( i = 0; i < np; ++i )
00425 {
00426 int nsn = n * nshared[i];
00427 MPI_Irecv( (void*)start, nsn * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
00428 start += nsn;
00429 }
00430 for( reqs = this->_reqs, i = np * 2; i; --i )
00431 MPI_Wait( reqs++, &status );
00432 sh_ind = this->_sh_ind;
00433 #define LOOP( OP ) \
00434 do \
00435 { \
00436 for( i = 0; i < np; ++i ) \
00437 { \
00438 uint c, j, ns = nshared[i]; \
00439 for( j = 0; j < n; ++j ) \
00440 { \
00441 realType* uu = u[j]; \
00442 for( c = 0; c < ns; ++c ) \
00443 { \
00444 OP( uu[sh_ind[c]], *buf ); \
00445 ++buf; \
00446 } \
00447 } \
00448 sh_ind += ns; \
00449 } \
00450 } while( 0 )
00451 switch( op )
00452 {
00453 case GS_OP_ADD:
00454 LOOP( DO_ADD );
00455 break;
00456 case GS_OP_MUL:
00457 LOOP( DO_MUL );
00458 break;
00459 case GS_OP_MIN:
00460 LOOP( DO_MIN );
00461 break;
00462 case GS_OP_MAX:
00463 LOOP( DO_MAX );
00464 break;
00465 case GS_OP_BPR:
00466 LOOP( DO_BPR );
00467 break;
00468 }
00469 #undef LOOP
00470 }
00471
00472 /*---------------------------------------------------------------------------
00473 MOAB Crystal Router
00474 ---------------------------------------------------------------------------*/
00475 gs_data::crystal_data::crystal_data() {}
00476
00477 void gs_data::crystal_data::initialize( MPI_Comm comm )
00478 {
00479 int num, id;
00480 buffers[0].buf.buffer_init( 1024 );
00481 buffers[1].buf.buffer_init( 1024 );
00482 buffers[2].buf.buffer_init( 1024 );
00483 all = &buffers[0];
00484 keep = &buffers[1];
00485 send = &buffers[2];
00486 memcpy( &( this->_comm ), &comm, sizeof( MPI_Comm ) );
00487 MPI_Comm_rank( comm, &id );
00488 this->_id = id;
00489 MPI_Comm_size( comm, &num );
00490 this->_num = num;
00491 }
00492
00493 void gs_data::crystal_data::reset()
00494 {
00495 buffers[0].buf.reset();
00496 buffers[1].buf.reset();
00497 buffers[2].buf.reset();
00498 keep = NULL;
00499 all = NULL;
00500 send = NULL;
00501 }
00502
00503 // Partition data before sending
00504 void gs_data::crystal_data::partition( uint cutoff, crystal_buf* lo, crystal_buf* hi )
00505 {
00506 const uint* src = (uint*)all->buf.ptr;
00507 const uint* end = (uint*)src + all->n;
00508 uint *target, *lop, *hip;
00509 lo->n = hi->n = 0;
00510 lo->buf.buffer_reserve( all->n * sizeof( uint ) );
00511 hi->buf.buffer_reserve( all->n * sizeof( uint ) );
00512 lop = (uint*)lo->buf.ptr;
00513 hip = (uint*)hi->buf.ptr;
00514 while( src != end )
00515 {
00516 uint chunk_len = 3 + src[2];
00517 if( src[0] < cutoff )
00518 {
00519 target = lop;
00520 lo->n += chunk_len;
00521 lop += chunk_len;
00522 }
00523 else
00524 {
00525 target = hip;
00526 hi->n += chunk_len;
00527 hip += chunk_len;
00528 }
00529 memcpy( target, src, chunk_len * sizeof( uint ) );
00530 src += chunk_len;
00531 }
00532 }
00533
00534 // Send message to target process
00535 void gs_data::crystal_data::send_( uint target, int recvn )
00536 {
00537 MPI_Request req[3] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL };
00538 MPI_Status status[3];
00539 uint count[2] = { 0, 0 }, sum, *recv[2];
00540 crystal_buf* t;
00541 int i;
00542
00543 (void)VALGRIND_CHECK_MEM_IS_DEFINED( &send->n, sizeof( uint ) );
00544 MPI_Isend( (void*)&send->n, sizeof( uint ), MPI_UNSIGNED_CHAR, target, _id, _comm, &req[0] );
00545 for( i = 0; i < recvn; ++i )
00546 MPI_Irecv( (void*)&count[i], sizeof( uint ), MPI_UNSIGNED_CHAR, target + i, target + i, _comm, &req[i + 1] );
00547 MPI_Waitall( recvn + 1, req, status );
00548 sum = keep->n;
00549 for( i = 0; i < recvn; ++i )
00550 sum += count[i];
00551 keep->buf.buffer_reserve( sum * sizeof( uint ) );
00552 recv[0] = (uint*)keep->buf.ptr;
00553 recv[0] += keep->n;
00554 recv[1] = recv[0] + count[0];
00555 keep->n = sum;
00556
00557 (void)VALGRIND_CHECK_MEM_IS_DEFINED( send->buf.ptr, send->n * sizeof( uint ) );
00558 MPI_Isend( (void*)send->buf.ptr, send->n * sizeof( uint ), MPI_UNSIGNED_CHAR, target, _id, _comm, &req[0] );
00559 if( recvn )
00560 {
00561 MPI_Irecv( (void*)recv[0], count[0] * sizeof( uint ), MPI_UNSIGNED_CHAR, target, target, _comm, &req[1] );
00562 if( recvn == 2 )
00563 MPI_Irecv( (void*)recv[1], count[1] * sizeof( uint ), MPI_UNSIGNED_CHAR, target + 1, target + 1, _comm,
00564 &req[2] );
00565 }
00566 MPI_Waitall( recvn + 1, req, status );
00567
00568 t = all;
00569 all = keep;
00570 keep = t;
00571 }
00572
00573 void gs_data::crystal_data::crystal_router()
00574 {
00575 uint bl = 0, bh, n = _num, nl, target;
00576 int recvn;
00577 crystal_buf *lo, *hi;
00578 while( n > 1 )
00579 {
00580 nl = n / 2, bh = bl + nl;
00581 if( _id < bh )
00582 {
00583 target = _id + nl;
00584 recvn = ( n & 1 && _id == bh - 1 ) ? 2 : 1;
00585 lo = keep;
00586 hi = send;
00587 }
00588 else
00589 {
00590 target = _id - nl;
00591 recvn = ( target == bh ) ? ( --target, 0 ) : 1;
00592 hi = keep;
00593 lo = send;
00594 }
00595 partition( bh, lo, hi );
00596 send_( target, recvn );
00597 if( _id < bh )
00598 n = nl;
00599 else
00600 {
00601 n -= nl;
00602 bl = bh;
00603 }
00604 }
00605 }
00606
00607 #define UINT_PER_X( X ) ( ( sizeof( X ) + sizeof( uint ) - 1 ) / sizeof( uint ) )
00608 #define UINT_PER_REAL UINT_PER_X( realType )
00609 #define UINT_PER_LONG UINT_PER_X( slong )
00610 #define UINT_PER_ULONG UINT_PER_X( Ulong )
00611
00612 /*-------------------------------------------------------------------------
00613 Transfer
00614 -------------------------------------------------------------------------*/
00615 ErrorCode gs_data::crystal_data::gs_transfer( int dynamic, moab::TupleList& tl, unsigned pf )
00616 {
00617
00618 unsigned mi, ml, mul, mr;
00619 tl.getTupleSize( mi, ml, mul, mr );
00620
00621 const unsigned tsize = ( mi - 1 ) + ml * UINT_PER_LONG + mul * UINT_PER_ULONG + mr * UINT_PER_REAL;
00622 sint p, lp = -1;
00623 sint* ri;
00624 slong* rl;
00625 Ulong* rul;
00626 realType* rr;
00627 uint i, j, *buf, *len = 0, *buf_end;
00628
00629 /* sort to group by target proc */
00630 if( pf >= mi ) return moab::MB_MEMORY_ALLOCATION_FAILED;
00631 // fail("pf expected to be in vi array (%d, %d)", pf, mi);
00632 tl.sort( pf, &all->buf );
00633
00634 /* pack into buffer for crystal router */
00635 all->buf.buffer_reserve( ( tl.get_n() * ( 3 + tsize ) ) * sizeof( uint ) );
00636 all->n = 0;
00637 buf = (uint*)all->buf.ptr;
00638
00639 bool canWrite = tl.get_writeEnabled();
00640 if( !canWrite ) tl.enableWriteAccess();
00641
00642 ri = tl.vi_wr;
00643 rl = tl.vl_wr;
00644 rul = tl.vul_wr;
00645 rr = tl.vr_wr;
00646
00647 for( i = tl.get_n(); i; --i )
00648 {
00649 p = ri[pf];
00650 if( p != lp )
00651 {
00652 lp = p;
00653 *buf++ = p; /* target */
00654 *buf++ = _id; /* source */
00655 len = buf++;
00656 *len = 0; /* length */
00657 all->n += 3;
00658 }
00659 for( j = 0; j < mi; ++j, ++ri )
00660 if( j != pf ) *buf++ = *ri;
00661 for( j = ml; j; --j, ++rl )
00662 {
00663 memcpy( buf, rl, sizeof( slong ) );
00664 buf += UINT_PER_LONG;
00665 }
00666 for( j = mul; j; --j, ++rul )
00667 {
00668 memcpy( buf, rul, sizeof( Ulong ) );
00669 buf += UINT_PER_ULONG;
00670 }
00671 for( j = mr; j; --j, ++rr )
00672 {
00673 memcpy( buf, rr, sizeof( realType ) );
00674 buf += UINT_PER_REAL;
00675 }
00676 *len += tsize, all->n += tsize;
00677 }
00678
00679 crystal_router();
00680
00681 /* unpack */
00682 buf = (uint*)all->buf.ptr;
00683 buf_end = buf + all->n;
00684 tl.set_n( 0 );
00685 ri = tl.vi_wr;
00686 rl = tl.vl_wr;
00687 rul = tl.vul_wr;
00688 rr = tl.vr_wr;
00689
00690 while( buf != buf_end )
00691 {
00692 sint llen;
00693 buf++; /* target ( == this proc ) */
00694 p = *buf++; /* source */
00695 llen = *buf++; /* length */
00696 while( llen > 0 )
00697 {
00698 if( tl.get_n() == tl.get_max() )
00699 {
00700 if( !dynamic )
00701 {
00702 tl.set_n( tl.get_max() + 1 );
00703 if( !canWrite ) tl.disableWriteAccess();
00704 return moab::MB_SUCCESS;
00705 }
00706 ErrorCode rval = tl.resize( tl.get_max() + ( 1 + tl.get_max() ) / 2 + 1 );
00707 if( rval != moab::MB_SUCCESS )
00708 {
00709 if( !canWrite ) tl.disableWriteAccess();
00710 return rval;
00711 }
00712 ri = tl.vi_wr + mi * tl.get_n();
00713 rl = tl.vl_wr + ml * tl.get_n();
00714 rul = tl.vul_wr + mul * tl.get_n();
00715 rr = tl.vr_wr + mr * tl.get_n();
00716 }
00717 tl.inc_n();
00718 for( j = 0; j < mi; ++j )
00719 if( j != pf )
00720 *ri++ = *buf++;
00721 else
00722 *ri++ = p;
00723 for( j = ml; j; --j )
00724 {
00725 memcpy( rl++, buf, sizeof( slong ) );
00726 buf += UINT_PER_LONG;
00727 }
00728 for( j = mul; j; --j )
00729 {
00730 memcpy( rul++, buf, sizeof( Ulong ) );
00731 buf += UINT_PER_ULONG;
00732 }
00733 for( j = mr; j; --j )
00734 {
00735 memcpy( rr++, buf, sizeof( realType ) );
00736 buf += UINT_PER_REAL;
00737 }
00738 llen -= tsize;
00739 }
00740 }
00741
00742 if( !canWrite ) tl.disableWriteAccess();
00743 return moab::MB_SUCCESS;
00744 }
00745 #endif
00746
00747 /*--------------------------------------------------------------------------
00748 Combined Execution
00749 --------------------------------------------------------------------------*/
00750
00751 void gs_data::gs_data_op( realType* u, int op )
00752 {
00753 local_condense( u, op, this->local_cm );
00754 #ifdef MOAB_HAVE_MPI
00755 this->nlinfo->nonlocal( u, op, _comm );
00756 #endif
00757 local_uncondense( u, local_cm );
00758 }
00759
00760 void gs_data::gs_data_op_vec( realType* u, uint n, int op )
00761 {
00762 #ifdef MOAB_HAVE_MPI
00763 if( n > nlinfo->_maxv )
00764 moab::fail( "%s: initialized with max vec size = %d,"
00765 " but called with vec size = %d\n",
00766 __FILE__, nlinfo->_maxv, n );
00767 #endif
00768 local_condense_vec( u, n, op, local_cm );
00769 #ifdef MOAB_HAVE_MPI
00770 this->nlinfo->nonlocal_vec( u, n, op, _comm );
00771 #endif
00772 local_uncondense_vec( u, n, local_cm );
00773 }
00774
00775 void gs_data::gs_data_op_many( realType** u, uint n, int op )
00776 {
00777 uint i;
00778 #ifdef MOAB_HAVE_MPI
00779 if( n > nlinfo->_maxv )
00780 moab::fail( "%s: initialized with max vec size = %d,"
00781 " but called with vec size = %d\n",
00782 __FILE__, nlinfo->_maxv, n );
00783 #endif
00784 for( i = 0; i < n; ++i )
00785 local_condense( u[i], op, local_cm );
00786
00787 moab::fail( "%s: initialized with max vec size = %d,"
00788 " but called with vec size = %d\n",
00789 __FILE__, 6, n );
00790
00791 #ifdef MOAB_HAVE_MPI
00792 this->nlinfo->nonlocal_many( u, n, op, _comm );
00793 #endif
00794 for( i = 0; i < n; ++i )
00795 local_uncondense( u[i], local_cm );
00796 }
00797
00798 /*--------------------------------------------------------------------------
00799 Setup
00800 --------------------------------------------------------------------------*/
00801
00802 ErrorCode gs_data::initialize( uint n,
00803 const long* label,
00804 const Ulong* ulabel,
00805 uint maxv,
00806 const unsigned int nlabels,
00807 const unsigned int nulabels,
00808 crystal_data* crystal )
00809 {
00810 nlinfo = NULL;
00811 unsigned int j;
00812 TupleList nonzero, primary;
00813 ErrorCode rval;
00814 #ifdef MOAB_HAVE_MPI
00815 TupleList shared;
00816 #else
00817 moab::TupleList::buffer buf;
00818 #endif
00819 (void)VALGRIND_CHECK_MEM_IS_DEFINED( label, nlabels * sizeof( long ) );
00820 (void)VALGRIND_CHECK_MEM_IS_DEFINED( ulabel, nlabels * sizeof( Ulong ) );
00821 #ifdef MOAB_HAVE_MPI
00822 MPI_Comm_dup( crystal->_comm, &this->_comm );
00823 #else
00824 buf.buffer_init( 1024 );
00825 #endif
00826
00827 /* construct list of nonzeros: (index ^, label) */
00828 nonzero.initialize( 1, nlabels, nulabels, 0, n );
00829 nonzero.enableWriteAccess();
00830 {
00831 uint i;
00832 sint* nzi;
00833 long* nzl;
00834 Ulong* nzul;
00835 nzi = nonzero.vi_wr;
00836 nzl = nonzero.vl_wr;
00837 nzul = nonzero.vul_wr;
00838 for( i = 0; i < n; ++i )
00839 if( label[nlabels * i] != 0 )
00840 {
00841 nzi[0] = i;
00842 for( j = 0; j < nlabels; j++ )
00843 nzl[j] = label[nlabels * i + j];
00844 for( j = 0; j < nulabels; j++ )
00845 nzul[j] = ulabel[nulabels * i + j];
00846 nzi++;
00847 nzl += nlabels;
00848 nzul += nulabels;
00849 nonzero.inc_n();
00850 }
00851 }
00852
00853 /* sort nonzeros by label: (index ^2, label ^1) */
00854 #ifndef MOAB_HAVE_MPI
00855 nonzero.sort( 1, &buf );
00856 #else
00857 nonzero.sort( 1, &crystal->all->buf );
00858 #endif
00859
00860 /* build list of unique labels w/ lowest associated index:
00861 (index in nonzero ^, primary (lowest) index in label, count, label(s),
00862 ulabel(s)) */
00863 primary.initialize( 3, nlabels, nulabels, 0, nonzero.get_n() );
00864 primary.enableWriteAccess();
00865 {
00866 uint i;
00867 sint *nzi = nonzero.vi_wr, *pi = primary.vi_wr;
00868 slong *nzl = nonzero.vl_wr, *pl = primary.vl_wr;
00869 Ulong *nzul = nonzero.vul_wr, *pul = primary.vul_wr;
00870 slong last = -1;
00871 for( i = 0; i < nonzero.get_n(); ++i, nzi += 1, nzl += nlabels, nzul += nulabels )
00872 {
00873 if( nzl[0] == last )
00874 {
00875 ++pi[-1];
00876 continue;
00877 }
00878 last = nzl[0];
00879 pi[0] = i;
00880 pi[1] = nzi[0];
00881 for( j = 0; j < nlabels; j++ )
00882 pl[j] = nzl[j];
00883 for( j = 0; j < nulabels; j++ )
00884 pul[j] = nzul[j];
00885 pi[2] = 1;
00886 pi += 3, pl += nlabels;
00887 pul += nulabels;
00888 primary.inc_n();
00889 }
00890 }
00891
00892 /* calculate size of local condense map */
00893 {
00894 uint i, count = 1;
00895 sint* pi = primary.vi_wr;
00896 for( i = primary.get_n(); i; --i, pi += 3 )
00897 if( pi[2] > 1 ) count += pi[2] + 1;
00898 this->local_cm = (sint*)malloc( count * sizeof( sint ) );
00899 }
00900
00901 /* sort unique labels by primary index:
00902 (nonzero index ^2, primary index ^1, count, label ^2) */
00903 #ifndef MOAB_HAVE_MPI
00904 primary.sort( 0, &buf );
00905 buf.reset();
00906 // buffer_free(&buf);
00907 #else
00908 primary.sort( 0, &crystal->all->buf );
00909 #endif
00910
00911 /* construct local condense map */
00912 {
00913 uint i, ln;
00914 sint* pi = primary.vi_wr;
00915 sint* cm = this->local_cm;
00916 for( i = primary.get_n(); i > 0; --i, pi += 3 )
00917 if( ( ln = pi[2] ) > 1 )
00918 {
00919 sint* nzi = nonzero.vi_wr + 1 * pi[0];
00920 for( j = ln; j > 0; --j, nzi += 1 )
00921 *cm++ = nzi[0];
00922 *cm++ = -1;
00923 }
00924 *cm++ = -1;
00925 }
00926 nonzero.reset();
00927 #ifndef MOAB_HAVE_MPI
00928 primary.reset();
00929 #else
00930 /* assign work proc by label modulo np */
00931 {
00932 uint i;
00933 sint* pi = primary.vi_wr;
00934 slong* pl = primary.vl_wr;
00935 for( i = primary.get_n(); i; --i, pi += 3, pl += nlabels )
00936 pi[0] = pl[0] % crystal->_num;
00937 }
00938 rval = crystal->gs_transfer( 1, primary, 0 ); /* transfer to work procs */
00939 if( rval != MB_SUCCESS ) return rval;
00940 /* primary: (source proc, index on src, useless, label) */
00941 /* sort by label */
00942 primary.sort( 3, &crystal->all->buf );
00943 /* add sentinel to primary list */
00944 if( primary.get_n() == primary.get_max() )
00945 primary.resize( ( primary.get_max() ? primary.get_max() + ( primary.get_max() + 1 ) / 2 + 1 : 2 ) );
00946 primary.vl_wr[nlabels * primary.get_n()] = -1;
00947 /* construct shared list: (proc1, proc2, index1, label) */
00948 #ifdef MOAB_HAVE_MPI
00949 shared.initialize( 3, nlabels, nulabels, 0, primary.get_n() );
00950 shared.enableWriteAccess();
00951 #endif
00952 {
00953 sint *pi1 = primary.vi_wr, *si = shared.vi_wr;
00954 slong lbl, *pl1 = primary.vl_wr, *sl = shared.vl_wr;
00955 Ulong *pul1 = primary.vul_wr, *sul = shared.vul_wr;
00956 for( ; ( lbl = pl1[0] ) != -1; pi1 += 3, pl1 += nlabels, pul1 += nulabels )
00957 {
00958 sint* pi2 = pi1 + 3;
00959 slong* pl2 = pl1 + nlabels;
00960 Ulong* pul2 = pul1 + nulabels;
00961 for( ; pl2[0] == lbl; pi2 += 3, pl2 += nlabels, pul2 += nulabels )
00962 {
00963 if( shared.get_n() + 2 > shared.get_max() )
00964 shared.resize( ( shared.get_max() ? shared.get_max() + ( shared.get_max() + 1 ) / 2 + 1 : 2 ) ),
00965 si = shared.vi_wr + shared.get_n() * 3;
00966 sl = shared.vl_wr + shared.get_n() * nlabels;
00967 sul = shared.vul_wr + shared.get_n() * nulabels;
00968 si[0] = pi1[0];
00969 si[1] = pi2[0];
00970 si[2] = pi1[1];
00971 for( j = 0; j < nlabels; j++ )
00972 sl[j] = pl2[j];
00973 for( j = 0; j < nulabels; j++ )
00974 sul[j] = pul2[j];
00975 si += 3;
00976 sl += nlabels;
00977 sul += nulabels;
00978 shared.inc_n();
00979 si[0] = pi2[0];
00980 si[1] = pi1[0];
00981 si[2] = pi2[1];
00982 for( j = 0; j < nlabels; j++ )
00983 sl[j] = pl1[j];
00984 for( j = 0; j < nulabels; j++ )
00985 sul[j] = pul1[j];
00986 si += 3;
00987 sl += nlabels;
00988 sul += nulabels;
00989 shared.inc_n();
00990 }
00991 }
00992 }
00993 primary.reset();
00994 rval = crystal->gs_transfer( 1, shared, 0 ); /* segfaulting transfer to dest procs */
00995 if( rval != MB_SUCCESS ) return rval;
00996 /* shared list: (useless, proc2, index, label) */
00997 /* sort by label */
00998 shared.sort( 3, &crystal->all->buf );
00999 /* sort by partner proc */
01000 shared.sort( 1, &crystal->all->buf );
01001 /* count partner procs */
01002 {
01003 uint i, count = 0;
01004 sint proc = -1, *si = shared.vi_wr;
01005 for( i = shared.get_n(); i; --i, si += 3 )
01006 if( si[1] != proc )
01007 {
01008 ++count;
01009 proc = si[1];
01010 }
01011 // this->nlinfo = new nonlocal_info();
01012 // this->nlinfo->initialize(count,shared.get_n(),
01013 // nlabels, nulabels, maxv);
01014 this->nlinfo = new nonlocal_info( count, shared.get_n(), nlabels, nulabels, maxv );
01015 }
01016 /* construct non-local info */
01017 {
01018 uint i;
01019 sint proc = -1, *si = shared.vi_wr;
01020 slong* sl = shared.vl_wr;
01021 Ulong* ul = shared.vul_wr;
01022 uint* target = this->nlinfo->_target;
01023 uint* nshared = this->nlinfo->_nshared;
01024 uint* sh_ind = this->nlinfo->_sh_ind;
01025 slong* slabels = this->nlinfo->_slabels;
01026 Ulong* ulabels = this->nlinfo->_ulabels;
01027 for( i = shared.get_n(); i; --i, si += 3 )
01028 {
01029 if( si[1] != proc )
01030 {
01031 proc = si[1];
01032 *target++ = proc;
01033 *nshared++ = 0;
01034 }
01035 ++nshared[-1];
01036 *sh_ind++ = si[2];
01037 // don't store 1st slabel
01038 sl++;
01039 for( j = 0; j < nlabels - 1; j++ )
01040 slabels[j] = sl[j];
01041 for( j = 0; j < nulabels; j++ )
01042 ulabels[j] = ul[j];
01043 slabels += nlabels - 1;
01044 ulabels += nulabels;
01045 sl += nlabels - 1;
01046 ul += nulabels;
01047 }
01048 }
01049 shared.reset();
01050 #endif
01051 return MB_SUCCESS;
01052 }
01053
01054 void gs_data::reset()
01055 {
01056 free( local_cm );
01057 local_cm = NULL;
01058 #ifdef MOAB_HAVE_MPI
01059 if( nlinfo != NULL )
01060 {
01061 nlinfo->nlinfo_free();
01062 delete this->nlinfo;
01063 MPI_Comm_free( &_comm );
01064 nlinfo = NULL;
01065 }
01066 #endif
01067 }
01068
01069 } // namespace moab