Mesh Oriented datABase
(version 5.4.1)
Array-based unstructured mesh datastructure
|
00001 /* compile-time settings: 00002 00003 FORTRAN naming convention 00004 default cpgs_setup, etc. 00005 -DUPCASE CPGS_SETUP, etc. 00006 -DUNDERSCORE cpgs_setup_, etc. 00007 00008 -DMPI parallel version (sequential otherwise) 00009 -DCRYSTAL_STATIC avoid some message exchange at the risk of 00010 crashing b/c of insufficient buffer size 00011 00012 -DINITIAL_BUFFER_SIZE=expression 00013 ignored unless CRYSTAL_STATIC is defined. 00014 arithmetic expression controlling the initial buffer size for the crystal 00015 router; this needs to be large enough to hold the intermediate messages 00016 during all stages of the crystal router 00017 00018 variables that can be used in expression include 00019 num - the number of processors 00020 n - the length of the global index array 00021 00022 */ 00023 00024 /* default for INITIAL_BUFFER_SIZE */ 00025 #ifdef CRYSTAL_STATIC 00026 #ifndef INITIAL_BUFFER_SIZE 00027 #define INITIAL_BUFFER_SIZE 2 * ( 3 * num + n * 9 ) 00028 #endif 00029 #endif 00030 00031 /* FORTRAN usage: 00032 00033 integer hc, np 00034 call crystal_new(hc,comm,np) ! get a crystal router handle (see fcrystal.c) 00035 00036 integer hgs 00037 integer n, max_vec_dim 00038 integer*? global_index_array(1:n) ! type corresponding to slong in "types.h" 00039 00040 call cpgs_setup(hgs,hc,global_index_array,n,max_vec_dim) 00041 sets hgs to new handle 00042 00043 !ok to call crystal_done(hc) here, or any later time 00044 00045 call cpgs_op(hgs, u, op) 00046 integer handle, op : 1-add, 2-multiply, 3-min, 4-max 00047 real u(1:n) - same layout as global_index_array provided to cpgs_setup 00048 00049 call cpgs_op_vec(hgs, u, d, op) 00050 integer op : 1-add, 2-multiply, 3-min, 4-max 00051 integer d <= max_vec_dim 00052 real u(1:d, 1:n) - vector components for each node stored together 00053 00054 call cpgs_op_many(hgs, u1, u2, u3, u4, u5, u6, d, op) 00055 integer op : 1-add, 2-multiply, 3-min, 4-max 00056 integer d : in {1,2,3,4,5,6}, <= max_vec_dim 00057 real u1(1:n), u2(1:n), u3(1:n), etc. 00058 00059 same effect as: call cpgs_op(hgs, u1, op) 00060 if(d.gt.1) call cpgs_op(hgs, u2, op) 00061 if(d.gt.2) call cpgs_op(hgs, u3, op) 00062 etc. 00063 with possibly some savings as fewer messages are exchanged 00064 00065 call cpgs_free(hgs) 00066 */ 00067 00068 #include <cstdio> 00069 #include <cstdlib> 00070 #include <cstdarg> 00071 #include <cstring> 00072 #include <cmath> 00073 #include "moab/gs.hpp" 00074 #ifdef MOAB_HAVE_MPI 00075 #include "moab_mpi.h" 00076 #endif 00077 00078 #ifdef MOAB_HAVE_MPI 00079 00080 #ifdef MOAB_HAVE_VALGRIND 00081 #include <valgrind/memcheck.h> 00082 #elif !defined( VALGRIND_CHECK_MEM_IS_DEFINED ) 00083 #define VALGRIND_CHECK_MEM_IS_DEFINED( a, b ) ( (void)0 ) 00084 #endif 00085 00086 #endif 00087 00088 namespace moab 00089 { 00090 00091 /*-------------------------------------------------------------------------- 00092 Local Execution Phases 00093 --------------------------------------------------------------------------*/ 00094 00095 #define DO_SET( a, b ) b = a 00096 #define DO_ADD( a, b ) a += b 00097 #define DO_MUL( a, b ) a *= b 00098 #define DO_MIN( a, b ) \ 00099 if( ( b ) < ( a ) ) ( a ) = b 00100 #define DO_MAX( a, b ) \ 00101 if( ( b ) > ( a ) ) ( a ) = b 00102 #define DO_BPR( a, b ) \ 00103 do \ 00104 { \ 00105 uint a_ = a; \ 00106 uint b_ = b; \ 00107 for( ;; ) \ 00108 { \ 00109 if( a_ < b_ ) \ 00110 b_ >>= 1; \ 00111 else if( b_ < a_ ) \ 00112 a_ >>= 1; \ 00113 else \ 00114 break; \ 00115 } \ 00116 ( a ) = a_; \ 00117 } while( 0 ) 00118 00119 #define LOOP( op ) \ 00120 do \ 00121 { \ 00122 sint i, j; \ 00123 while( ( i = *cm++ ) != -1 ) \ 00124 while( ( j = *cm++ ) != -1 ) \ 00125 op( u[i], u[j] ); \ 00126 } while( 0 ) 00127 00128 static void local_condense( realType* u, int op, const sint* cm ) 00129 { 00130 switch( op ) 00131 { 00132 case GS_OP_ADD: 00133 LOOP( DO_ADD ); 00134 break; 00135 case GS_OP_MUL: 00136 LOOP( DO_MUL ); 00137 break; 00138 case GS_OP_MIN: 00139 LOOP( DO_MIN ); 00140 break; 00141 case GS_OP_MAX: 00142 LOOP( DO_MAX ); 00143 break; 00144 case GS_OP_BPR: 00145 LOOP( DO_BPR ); 00146 break; 00147 } 00148 } 00149 00150 static void local_uncondense( realType* u, const sint* cm ) 00151 { 00152 LOOP( DO_SET ); 00153 } 00154 00155 #undef LOOP 00156 00157 #define LOOP( op ) \ 00158 do \ 00159 { \ 00160 sint i, j, k; \ 00161 while( ( i = *cm++ ) != -1 ) \ 00162 { \ 00163 realType* pi = u + n * i; \ 00164 while( ( j = *cm++ ) != -1 ) \ 00165 { \ 00166 realType* pj = u + n * j; \ 00167 for( k = n; k; --k ) \ 00168 { \ 00169 op( *pi, *pj ); \ 00170 ++pi; \ 00171 ++pj; \ 00172 } \ 00173 } \ 00174 } \ 00175 } while( 0 ) 00176 00177 static void local_condense_vec( realType* u, uint n, int op, const sint* cm ) 00178 { 00179 switch( op ) 00180 { 00181 case GS_OP_ADD: 00182 LOOP( DO_ADD ); 00183 break; 00184 case GS_OP_MUL: 00185 LOOP( DO_MUL ); 00186 break; 00187 case GS_OP_MIN: 00188 LOOP( DO_MIN ); 00189 break; 00190 case GS_OP_MAX: 00191 LOOP( DO_MAX ); 00192 break; 00193 case GS_OP_BPR: 00194 LOOP( DO_BPR ); 00195 break; 00196 } 00197 } 00198 00199 static void local_uncondense_vec( realType* u, uint n, const sint* cm ) 00200 { 00201 LOOP( DO_SET ); 00202 } 00203 00204 #undef LOOP 00205 00206 /*-------------------------------------------------------------------------- 00207 Non-local Execution Phases 00208 --------------------------------------------------------------------------*/ 00209 00210 #ifdef MOAB_HAVE_MPI 00211 00212 void gs_data::nonlocal_info::initialize( uint np, uint count, uint nlabels, uint nulabels, uint maxv ) 00213 { 00214 _target = NULL; 00215 _nshared = NULL; 00216 _sh_ind = NULL; 00217 _slabels = NULL; 00218 _ulabels = NULL; 00219 _reqs = NULL; 00220 _buf = NULL; 00221 _np = np; 00222 _target = (uint*)malloc( ( 2 * np + count ) * sizeof( uint ) ); 00223 _nshared = _target + np; 00224 _sh_ind = _nshared + np; 00225 if( 1 < nlabels ) 00226 _slabels = (slong*)malloc( ( ( nlabels - 1 ) * count ) * sizeof( slong ) ); 00227 else 00228 _slabels = NULL; 00229 _ulabels = (Ulong*)malloc( ( nulabels * count ) * sizeof( Ulong ) ); 00230 _reqs = (MPI_Request*)malloc( 2 * np * sizeof( MPI_Request ) ); 00231 _buf = (realType*)malloc( ( 2 * count * maxv ) * sizeof( realType ) ); 00232 _maxv = maxv; 00233 } 00234 00235 void gs_data::nonlocal_info::nlinfo_free() 00236 { 00237 // Free the ptrs 00238 free( _buf ); 00239 free( _reqs ); 00240 free( _target ); 00241 free( _slabels ); 00242 free( _ulabels ); 00243 // Set to null 00244 _ulabels = NULL; 00245 _buf = NULL; 00246 _reqs = NULL; 00247 _target = NULL; 00248 _slabels = NULL; 00249 _nshared = NULL; 00250 _sh_ind = NULL; 00251 } 00252 00253 void gs_data::nonlocal_info::nonlocal( realType* u, int op, MPI_Comm comm ) 00254 { 00255 MPI_Status status; 00256 uint np = this->_np; 00257 MPI_Request* reqs = this->_reqs; 00258 uint* targ = this->_target; 00259 uint* nshared = this->_nshared; 00260 uint* sh_ind = this->_sh_ind; 00261 uint id; 00262 realType *buf = this->_buf, *start; 00263 unsigned int i; 00264 { 00265 MPI_Comm_rank( comm, (int*)&i ); 00266 id = i; 00267 } 00268 for( i = 0; i < np; ++i ) 00269 { 00270 uint c = nshared[i]; 00271 start = buf; 00272 for( ; c; --c ) 00273 *buf++ = u[*sh_ind++]; 00274 MPI_Isend( (void*)start, nshared[i] * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ ); 00275 } 00276 start = buf; 00277 for( i = 0; i < np; ++i ) 00278 { 00279 MPI_Irecv( (void*)start, nshared[i] * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ ); 00280 start += nshared[i]; 00281 } 00282 for( reqs = this->_reqs, i = np * 2; i; --i ) 00283 MPI_Wait( reqs++, &status ); 00284 sh_ind = this->_sh_ind; 00285 #define LOOP( OP ) \ 00286 do \ 00287 { \ 00288 for( i = 0; i < np; ++i ) \ 00289 { \ 00290 uint c; \ 00291 for( c = nshared[i]; c; --c ) \ 00292 { \ 00293 OP( u[*sh_ind], *buf ); \ 00294 ++sh_ind, ++buf; \ 00295 } \ 00296 } \ 00297 } while( 0 ) 00298 switch( op ) 00299 { 00300 case GS_OP_ADD: 00301 LOOP( DO_ADD ); 00302 break; 00303 case GS_OP_MUL: 00304 LOOP( DO_MUL ); 00305 break; 00306 case GS_OP_MIN: 00307 LOOP( DO_MIN ); 00308 break; 00309 case GS_OP_MAX: 00310 LOOP( DO_MAX ); 00311 break; 00312 case GS_OP_BPR: 00313 LOOP( DO_BPR ); 00314 break; 00315 } 00316 #undef LOOP 00317 } 00318 00319 void gs_data::nonlocal_info::nonlocal_vec( realType* u, uint n, int op, MPI_Comm comm ) 00320 { 00321 MPI_Status status; 00322 uint np = this->_np; 00323 MPI_Request* reqs = this->_reqs; 00324 uint* targ = this->_target; 00325 uint* nshared = this->_nshared; 00326 uint* sh_ind = this->_sh_ind; 00327 uint id; 00328 realType *buf = this->_buf, *start; 00329 uint size = n * sizeof( realType ); 00330 unsigned int i; 00331 { 00332 MPI_Comm_rank( comm, (int*)&i ); 00333 id = i; 00334 } 00335 for( i = 0; i < np; ++i ) 00336 { 00337 uint ns = nshared[i], c = ns; 00338 start = buf; 00339 for( ; c; --c ) 00340 { 00341 memcpy( buf, u + n * ( *sh_ind++ ), size ); 00342 buf += n; 00343 } 00344 MPI_Isend( (void*)start, ns * size, MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ ); 00345 } 00346 start = buf; 00347 for( i = 0; i < np; ++i ) 00348 { 00349 int nsn = n * nshared[i]; 00350 MPI_Irecv( (void*)start, nsn * size, MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ ); 00351 start += nsn; 00352 } 00353 for( reqs = this->_reqs, i = np * 2; i; --i ) 00354 MPI_Wait( reqs++, &status ); 00355 sh_ind = this->_sh_ind; 00356 #define LOOP( OP ) \ 00357 do \ 00358 { \ 00359 for( i = 0; i < np; ++i ) \ 00360 { \ 00361 uint c, j; \ 00362 for( c = nshared[i]; c; --c ) \ 00363 { \ 00364 realType* uu = u + n * ( *sh_ind++ ); \ 00365 for( j = n; j; --j ) \ 00366 { \ 00367 OP( *uu, *buf ); \ 00368 ++uu; \ 00369 ++buf; \ 00370 } \ 00371 } \ 00372 } \ 00373 } while( 0 ) 00374 switch( op ) 00375 { 00376 case GS_OP_ADD: 00377 LOOP( DO_ADD ); 00378 break; 00379 case GS_OP_MUL: 00380 LOOP( DO_MUL ); 00381 break; 00382 case GS_OP_MIN: 00383 LOOP( DO_MIN ); 00384 break; 00385 case GS_OP_MAX: 00386 LOOP( DO_MAX ); 00387 break; 00388 case GS_OP_BPR: 00389 LOOP( DO_BPR ); 00390 break; 00391 } 00392 #undef LOOP 00393 } 00394 00395 void gs_data::nonlocal_info::nonlocal_many( realType** u, uint n, int op, MPI_Comm comm ) 00396 { 00397 MPI_Status status; 00398 uint np = this->_np; 00399 MPI_Request* reqs = this->_reqs; 00400 uint* targ = this->_target; 00401 uint* nshared = this->_nshared; 00402 uint* sh_ind = this->_sh_ind; 00403 uint id; 00404 realType *buf = this->_buf, *start; 00405 unsigned int i; 00406 { 00407 MPI_Comm_rank( comm, (int*)&i ); 00408 id = i; 00409 } 00410 for( i = 0; i < np; ++i ) 00411 { 00412 uint c, j, ns = nshared[i]; 00413 start = buf; 00414 for( j = 0; j < n; ++j ) 00415 { 00416 realType* uu = u[j]; 00417 for( c = 0; c < ns; ++c ) 00418 *buf++ = uu[sh_ind[c]]; 00419 } 00420 sh_ind += ns; 00421 MPI_Isend( (void*)start, n * ns * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ ); 00422 } 00423 start = buf; 00424 for( i = 0; i < np; ++i ) 00425 { 00426 int nsn = n * nshared[i]; 00427 MPI_Irecv( (void*)start, nsn * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ ); 00428 start += nsn; 00429 } 00430 for( reqs = this->_reqs, i = np * 2; i; --i ) 00431 MPI_Wait( reqs++, &status ); 00432 sh_ind = this->_sh_ind; 00433 #define LOOP( OP ) \ 00434 do \ 00435 { \ 00436 for( i = 0; i < np; ++i ) \ 00437 { \ 00438 uint c, j, ns = nshared[i]; \ 00439 for( j = 0; j < n; ++j ) \ 00440 { \ 00441 realType* uu = u[j]; \ 00442 for( c = 0; c < ns; ++c ) \ 00443 { \ 00444 OP( uu[sh_ind[c]], *buf ); \ 00445 ++buf; \ 00446 } \ 00447 } \ 00448 sh_ind += ns; \ 00449 } \ 00450 } while( 0 ) 00451 switch( op ) 00452 { 00453 case GS_OP_ADD: 00454 LOOP( DO_ADD ); 00455 break; 00456 case GS_OP_MUL: 00457 LOOP( DO_MUL ); 00458 break; 00459 case GS_OP_MIN: 00460 LOOP( DO_MIN ); 00461 break; 00462 case GS_OP_MAX: 00463 LOOP( DO_MAX ); 00464 break; 00465 case GS_OP_BPR: 00466 LOOP( DO_BPR ); 00467 break; 00468 } 00469 #undef LOOP 00470 } 00471 00472 /*--------------------------------------------------------------------------- 00473 MOAB Crystal Router 00474 ---------------------------------------------------------------------------*/ 00475 gs_data::crystal_data::crystal_data() {} 00476 00477 void gs_data::crystal_data::initialize( MPI_Comm comm ) 00478 { 00479 int num, id; 00480 buffers[0].buf.buffer_init( 1024 ); 00481 buffers[1].buf.buffer_init( 1024 ); 00482 buffers[2].buf.buffer_init( 1024 ); 00483 all = &buffers[0]; 00484 keep = &buffers[1]; 00485 send = &buffers[2]; 00486 memcpy( &( this->_comm ), &comm, sizeof( MPI_Comm ) ); 00487 MPI_Comm_rank( comm, &id ); 00488 this->_id = id; 00489 MPI_Comm_size( comm, &num ); 00490 this->_num = num; 00491 } 00492 00493 void gs_data::crystal_data::reset() 00494 { 00495 buffers[0].buf.reset(); 00496 buffers[1].buf.reset(); 00497 buffers[2].buf.reset(); 00498 keep = NULL; 00499 all = NULL; 00500 send = NULL; 00501 } 00502 00503 // Partition data before sending 00504 void gs_data::crystal_data::partition( uint cutoff, crystal_buf* lo, crystal_buf* hi ) 00505 { 00506 const uint* src = (uint*)all->buf.ptr; 00507 const uint* end = (uint*)src + all->n; 00508 uint *target, *lop, *hip; 00509 lo->n = hi->n = 0; 00510 lo->buf.buffer_reserve( all->n * sizeof( uint ) ); 00511 hi->buf.buffer_reserve( all->n * sizeof( uint ) ); 00512 lop = (uint*)lo->buf.ptr; 00513 hip = (uint*)hi->buf.ptr; 00514 while( src != end ) 00515 { 00516 uint chunk_len = 3 + src[2]; 00517 if( src[0] < cutoff ) 00518 { 00519 target = lop; 00520 lo->n += chunk_len; 00521 lop += chunk_len; 00522 } 00523 else 00524 { 00525 target = hip; 00526 hi->n += chunk_len; 00527 hip += chunk_len; 00528 } 00529 memcpy( target, src, chunk_len * sizeof( uint ) ); 00530 src += chunk_len; 00531 } 00532 } 00533 00534 // Send message to target process 00535 void gs_data::crystal_data::send_( uint target, int recvn ) 00536 { 00537 MPI_Request req[3] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL }; 00538 MPI_Status status[3]; 00539 uint count[2] = { 0, 0 }, sum, *recv[2]; 00540 crystal_buf* t; 00541 int i; 00542 00543 (void)VALGRIND_CHECK_MEM_IS_DEFINED( &send->n, sizeof( uint ) ); 00544 MPI_Isend( (void*)&send->n, sizeof( uint ), MPI_UNSIGNED_CHAR, target, _id, _comm, &req[0] ); 00545 for( i = 0; i < recvn; ++i ) 00546 MPI_Irecv( (void*)&count[i], sizeof( uint ), MPI_UNSIGNED_CHAR, target + i, target + i, _comm, &req[i + 1] ); 00547 MPI_Waitall( recvn + 1, req, status ); 00548 sum = keep->n; 00549 for( i = 0; i < recvn; ++i ) 00550 sum += count[i]; 00551 keep->buf.buffer_reserve( sum * sizeof( uint ) ); 00552 recv[0] = (uint*)keep->buf.ptr; 00553 recv[0] += keep->n; 00554 recv[1] = recv[0] + count[0]; 00555 keep->n = sum; 00556 00557 (void)VALGRIND_CHECK_MEM_IS_DEFINED( send->buf.ptr, send->n * sizeof( uint ) ); 00558 MPI_Isend( (void*)send->buf.ptr, send->n * sizeof( uint ), MPI_UNSIGNED_CHAR, target, _id, _comm, &req[0] ); 00559 if( recvn ) 00560 { 00561 MPI_Irecv( (void*)recv[0], count[0] * sizeof( uint ), MPI_UNSIGNED_CHAR, target, target, _comm, &req[1] ); 00562 if( recvn == 2 ) 00563 MPI_Irecv( (void*)recv[1], count[1] * sizeof( uint ), MPI_UNSIGNED_CHAR, target + 1, target + 1, _comm, 00564 &req[2] ); 00565 } 00566 MPI_Waitall( recvn + 1, req, status ); 00567 00568 t = all; 00569 all = keep; 00570 keep = t; 00571 } 00572 00573 void gs_data::crystal_data::crystal_router() 00574 { 00575 uint bl = 0, bh, n = _num, nl, target; 00576 int recvn; 00577 crystal_buf *lo, *hi; 00578 while( n > 1 ) 00579 { 00580 nl = n / 2, bh = bl + nl; 00581 if( _id < bh ) 00582 { 00583 target = _id + nl; 00584 recvn = ( n & 1 && _id == bh - 1 ) ? 2 : 1; 00585 lo = keep; 00586 hi = send; 00587 } 00588 else 00589 { 00590 target = _id - nl; 00591 recvn = ( target == bh ) ? ( --target, 0 ) : 1; 00592 hi = keep; 00593 lo = send; 00594 } 00595 partition( bh, lo, hi ); 00596 send_( target, recvn ); 00597 if( _id < bh ) 00598 n = nl; 00599 else 00600 { 00601 n -= nl; 00602 bl = bh; 00603 } 00604 } 00605 } 00606 00607 #define UINT_PER_X( X ) ( ( sizeof( X ) + sizeof( uint ) - 1 ) / sizeof( uint ) ) 00608 #define UINT_PER_REAL UINT_PER_X( realType ) 00609 #define UINT_PER_LONG UINT_PER_X( slong ) 00610 #define UINT_PER_ULONG UINT_PER_X( Ulong ) 00611 00612 /*------------------------------------------------------------------------- 00613 Transfer 00614 -------------------------------------------------------------------------*/ 00615 ErrorCode gs_data::crystal_data::gs_transfer( int dynamic, moab::TupleList& tl, unsigned pf ) 00616 { 00617 00618 unsigned mi, ml, mul, mr; 00619 tl.getTupleSize( mi, ml, mul, mr ); 00620 00621 const unsigned tsize = ( mi - 1 ) + ml * UINT_PER_LONG + mul * UINT_PER_ULONG + mr * UINT_PER_REAL; 00622 sint p, lp = -1; 00623 sint* ri; 00624 slong* rl; 00625 Ulong* rul; 00626 realType* rr; 00627 uint i, j, *buf, *len = 0, *buf_end; 00628 00629 /* sort to group by target proc */ 00630 if( pf >= mi ) return moab::MB_MEMORY_ALLOCATION_FAILED; 00631 // fail("pf expected to be in vi array (%d, %d)", pf, mi); 00632 tl.sort( pf, &all->buf ); 00633 00634 /* pack into buffer for crystal router */ 00635 all->buf.buffer_reserve( ( tl.get_n() * ( 3 + tsize ) ) * sizeof( uint ) ); 00636 all->n = 0; 00637 buf = (uint*)all->buf.ptr; 00638 00639 bool canWrite = tl.get_writeEnabled(); 00640 if( !canWrite ) tl.enableWriteAccess(); 00641 00642 ri = tl.vi_wr; 00643 rl = tl.vl_wr; 00644 rul = tl.vul_wr; 00645 rr = tl.vr_wr; 00646 00647 for( i = tl.get_n(); i; --i ) 00648 { 00649 p = ri[pf]; 00650 if( p != lp ) 00651 { 00652 lp = p; 00653 *buf++ = p; /* target */ 00654 *buf++ = _id; /* source */ 00655 len = buf++; 00656 *len = 0; /* length */ 00657 all->n += 3; 00658 } 00659 for( j = 0; j < mi; ++j, ++ri ) 00660 if( j != pf ) *buf++ = *ri; 00661 for( j = ml; j; --j, ++rl ) 00662 { 00663 memcpy( buf, rl, sizeof( slong ) ); 00664 buf += UINT_PER_LONG; 00665 } 00666 for( j = mul; j; --j, ++rul ) 00667 { 00668 memcpy( buf, rul, sizeof( Ulong ) ); 00669 buf += UINT_PER_ULONG; 00670 } 00671 for( j = mr; j; --j, ++rr ) 00672 { 00673 memcpy( buf, rr, sizeof( realType ) ); 00674 buf += UINT_PER_REAL; 00675 } 00676 *len += tsize, all->n += tsize; 00677 } 00678 00679 crystal_router(); 00680 00681 /* unpack */ 00682 buf = (uint*)all->buf.ptr; 00683 buf_end = buf + all->n; 00684 tl.set_n( 0 ); 00685 ri = tl.vi_wr; 00686 rl = tl.vl_wr; 00687 rul = tl.vul_wr; 00688 rr = tl.vr_wr; 00689 00690 while( buf != buf_end ) 00691 { 00692 sint llen; 00693 buf++; /* target ( == this proc ) */ 00694 p = *buf++; /* source */ 00695 llen = *buf++; /* length */ 00696 while( llen > 0 ) 00697 { 00698 if( tl.get_n() == tl.get_max() ) 00699 { 00700 if( !dynamic ) 00701 { 00702 tl.set_n( tl.get_max() + 1 ); 00703 if( !canWrite ) tl.disableWriteAccess(); 00704 return moab::MB_SUCCESS; 00705 } 00706 ErrorCode rval = tl.resize( tl.get_max() + ( 1 + tl.get_max() ) / 2 + 1 ); 00707 if( rval != moab::MB_SUCCESS ) 00708 { 00709 if( !canWrite ) tl.disableWriteAccess(); 00710 return rval; 00711 } 00712 ri = tl.vi_wr + mi * tl.get_n(); 00713 rl = tl.vl_wr + ml * tl.get_n(); 00714 rul = tl.vul_wr + mul * tl.get_n(); 00715 rr = tl.vr_wr + mr * tl.get_n(); 00716 } 00717 tl.inc_n(); 00718 for( j = 0; j < mi; ++j ) 00719 if( j != pf ) 00720 *ri++ = *buf++; 00721 else 00722 *ri++ = p; 00723 for( j = ml; j; --j ) 00724 { 00725 memcpy( rl++, buf, sizeof( slong ) ); 00726 buf += UINT_PER_LONG; 00727 } 00728 for( j = mul; j; --j ) 00729 { 00730 memcpy( rul++, buf, sizeof( Ulong ) ); 00731 buf += UINT_PER_ULONG; 00732 } 00733 for( j = mr; j; --j ) 00734 { 00735 memcpy( rr++, buf, sizeof( realType ) ); 00736 buf += UINT_PER_REAL; 00737 } 00738 llen -= tsize; 00739 } 00740 } 00741 00742 if( !canWrite ) tl.disableWriteAccess(); 00743 return moab::MB_SUCCESS; 00744 } 00745 #endif 00746 00747 /*-------------------------------------------------------------------------- 00748 Combined Execution 00749 --------------------------------------------------------------------------*/ 00750 00751 void gs_data::gs_data_op( realType* u, int op ) 00752 { 00753 local_condense( u, op, this->local_cm ); 00754 #ifdef MOAB_HAVE_MPI 00755 this->nlinfo->nonlocal( u, op, _comm ); 00756 #endif 00757 local_uncondense( u, local_cm ); 00758 } 00759 00760 void gs_data::gs_data_op_vec( realType* u, uint n, int op ) 00761 { 00762 #ifdef MOAB_HAVE_MPI 00763 if( n > nlinfo->_maxv ) 00764 moab::fail( "%s: initialized with max vec size = %d," 00765 " but called with vec size = %d\n", 00766 __FILE__, nlinfo->_maxv, n ); 00767 #endif 00768 local_condense_vec( u, n, op, local_cm ); 00769 #ifdef MOAB_HAVE_MPI 00770 this->nlinfo->nonlocal_vec( u, n, op, _comm ); 00771 #endif 00772 local_uncondense_vec( u, n, local_cm ); 00773 } 00774 00775 void gs_data::gs_data_op_many( realType** u, uint n, int op ) 00776 { 00777 uint i; 00778 #ifdef MOAB_HAVE_MPI 00779 if( n > nlinfo->_maxv ) 00780 moab::fail( "%s: initialized with max vec size = %d," 00781 " but called with vec size = %d\n", 00782 __FILE__, nlinfo->_maxv, n ); 00783 #endif 00784 for( i = 0; i < n; ++i ) 00785 local_condense( u[i], op, local_cm ); 00786 00787 moab::fail( "%s: initialized with max vec size = %d," 00788 " but called with vec size = %d\n", 00789 __FILE__, 6, n ); 00790 00791 #ifdef MOAB_HAVE_MPI 00792 this->nlinfo->nonlocal_many( u, n, op, _comm ); 00793 #endif 00794 for( i = 0; i < n; ++i ) 00795 local_uncondense( u[i], local_cm ); 00796 } 00797 00798 /*-------------------------------------------------------------------------- 00799 Setup 00800 --------------------------------------------------------------------------*/ 00801 00802 ErrorCode gs_data::initialize( uint n, 00803 const long* label, 00804 const Ulong* ulabel, 00805 uint maxv, 00806 const unsigned int nlabels, 00807 const unsigned int nulabels, 00808 crystal_data* crystal ) 00809 { 00810 nlinfo = NULL; 00811 unsigned int j; 00812 TupleList nonzero, primary; 00813 ErrorCode rval; 00814 #ifdef MOAB_HAVE_MPI 00815 TupleList shared; 00816 #else 00817 moab::TupleList::buffer buf; 00818 #endif 00819 (void)VALGRIND_CHECK_MEM_IS_DEFINED( label, nlabels * sizeof( long ) ); 00820 (void)VALGRIND_CHECK_MEM_IS_DEFINED( ulabel, nlabels * sizeof( Ulong ) ); 00821 #ifdef MOAB_HAVE_MPI 00822 MPI_Comm_dup( crystal->_comm, &this->_comm ); 00823 #else 00824 buf.buffer_init( 1024 ); 00825 #endif 00826 00827 /* construct list of nonzeros: (index ^, label) */ 00828 nonzero.initialize( 1, nlabels, nulabels, 0, n ); 00829 nonzero.enableWriteAccess(); 00830 { 00831 uint i; 00832 sint* nzi; 00833 long* nzl; 00834 Ulong* nzul; 00835 nzi = nonzero.vi_wr; 00836 nzl = nonzero.vl_wr; 00837 nzul = nonzero.vul_wr; 00838 for( i = 0; i < n; ++i ) 00839 if( label[nlabels * i] != 0 ) 00840 { 00841 nzi[0] = i; 00842 for( j = 0; j < nlabels; j++ ) 00843 nzl[j] = label[nlabels * i + j]; 00844 for( j = 0; j < nulabels; j++ ) 00845 nzul[j] = ulabel[nulabels * i + j]; 00846 nzi++; 00847 nzl += nlabels; 00848 nzul += nulabels; 00849 nonzero.inc_n(); 00850 } 00851 } 00852 00853 /* sort nonzeros by label: (index ^2, label ^1) */ 00854 #ifndef MOAB_HAVE_MPI 00855 nonzero.sort( 1, &buf ); 00856 #else 00857 nonzero.sort( 1, &crystal->all->buf ); 00858 #endif 00859 00860 /* build list of unique labels w/ lowest associated index: 00861 (index in nonzero ^, primary (lowest) index in label, count, label(s), 00862 ulabel(s)) */ 00863 primary.initialize( 3, nlabels, nulabels, 0, nonzero.get_n() ); 00864 primary.enableWriteAccess(); 00865 { 00866 uint i; 00867 sint *nzi = nonzero.vi_wr, *pi = primary.vi_wr; 00868 slong *nzl = nonzero.vl_wr, *pl = primary.vl_wr; 00869 Ulong *nzul = nonzero.vul_wr, *pul = primary.vul_wr; 00870 slong last = -1; 00871 for( i = 0; i < nonzero.get_n(); ++i, nzi += 1, nzl += nlabels, nzul += nulabels ) 00872 { 00873 if( nzl[0] == last ) 00874 { 00875 ++pi[-1]; 00876 continue; 00877 } 00878 last = nzl[0]; 00879 pi[0] = i; 00880 pi[1] = nzi[0]; 00881 for( j = 0; j < nlabels; j++ ) 00882 pl[j] = nzl[j]; 00883 for( j = 0; j < nulabels; j++ ) 00884 pul[j] = nzul[j]; 00885 pi[2] = 1; 00886 pi += 3, pl += nlabels; 00887 pul += nulabels; 00888 primary.inc_n(); 00889 } 00890 } 00891 00892 /* calculate size of local condense map */ 00893 { 00894 uint i, count = 1; 00895 sint* pi = primary.vi_wr; 00896 for( i = primary.get_n(); i; --i, pi += 3 ) 00897 if( pi[2] > 1 ) count += pi[2] + 1; 00898 this->local_cm = (sint*)malloc( count * sizeof( sint ) ); 00899 } 00900 00901 /* sort unique labels by primary index: 00902 (nonzero index ^2, primary index ^1, count, label ^2) */ 00903 #ifndef MOAB_HAVE_MPI 00904 primary.sort( 0, &buf ); 00905 buf.reset(); 00906 // buffer_free(&buf); 00907 #else 00908 primary.sort( 0, &crystal->all->buf ); 00909 #endif 00910 00911 /* construct local condense map */ 00912 { 00913 uint i, ln; 00914 sint* pi = primary.vi_wr; 00915 sint* cm = this->local_cm; 00916 for( i = primary.get_n(); i > 0; --i, pi += 3 ) 00917 if( ( ln = pi[2] ) > 1 ) 00918 { 00919 sint* nzi = nonzero.vi_wr + 1 * pi[0]; 00920 for( j = ln; j > 0; --j, nzi += 1 ) 00921 *cm++ = nzi[0]; 00922 *cm++ = -1; 00923 } 00924 *cm++ = -1; 00925 } 00926 nonzero.reset(); 00927 #ifndef MOAB_HAVE_MPI 00928 primary.reset(); 00929 #else 00930 /* assign work proc by label modulo np */ 00931 { 00932 uint i; 00933 sint* pi = primary.vi_wr; 00934 slong* pl = primary.vl_wr; 00935 for( i = primary.get_n(); i; --i, pi += 3, pl += nlabels ) 00936 pi[0] = pl[0] % crystal->_num; 00937 } 00938 rval = crystal->gs_transfer( 1, primary, 0 ); /* transfer to work procs */ 00939 if( rval != MB_SUCCESS ) return rval; 00940 /* primary: (source proc, index on src, useless, label) */ 00941 /* sort by label */ 00942 primary.sort( 3, &crystal->all->buf ); 00943 /* add sentinel to primary list */ 00944 if( primary.get_n() == primary.get_max() ) 00945 primary.resize( ( primary.get_max() ? primary.get_max() + ( primary.get_max() + 1 ) / 2 + 1 : 2 ) ); 00946 primary.vl_wr[nlabels * primary.get_n()] = -1; 00947 /* construct shared list: (proc1, proc2, index1, label) */ 00948 #ifdef MOAB_HAVE_MPI 00949 shared.initialize( 3, nlabels, nulabels, 0, primary.get_n() ); 00950 shared.enableWriteAccess(); 00951 #endif 00952 { 00953 sint *pi1 = primary.vi_wr, *si = shared.vi_wr; 00954 slong lbl, *pl1 = primary.vl_wr, *sl = shared.vl_wr; 00955 Ulong *pul1 = primary.vul_wr, *sul = shared.vul_wr; 00956 for( ; ( lbl = pl1[0] ) != -1; pi1 += 3, pl1 += nlabels, pul1 += nulabels ) 00957 { 00958 sint* pi2 = pi1 + 3; 00959 slong* pl2 = pl1 + nlabels; 00960 Ulong* pul2 = pul1 + nulabels; 00961 for( ; pl2[0] == lbl; pi2 += 3, pl2 += nlabels, pul2 += nulabels ) 00962 { 00963 if( shared.get_n() + 2 > shared.get_max() ) 00964 shared.resize( ( shared.get_max() ? shared.get_max() + ( shared.get_max() + 1 ) / 2 + 1 : 2 ) ), 00965 si = shared.vi_wr + shared.get_n() * 3; 00966 sl = shared.vl_wr + shared.get_n() * nlabels; 00967 sul = shared.vul_wr + shared.get_n() * nulabels; 00968 si[0] = pi1[0]; 00969 si[1] = pi2[0]; 00970 si[2] = pi1[1]; 00971 for( j = 0; j < nlabels; j++ ) 00972 sl[j] = pl2[j]; 00973 for( j = 0; j < nulabels; j++ ) 00974 sul[j] = pul2[j]; 00975 si += 3; 00976 sl += nlabels; 00977 sul += nulabels; 00978 shared.inc_n(); 00979 si[0] = pi2[0]; 00980 si[1] = pi1[0]; 00981 si[2] = pi2[1]; 00982 for( j = 0; j < nlabels; j++ ) 00983 sl[j] = pl1[j]; 00984 for( j = 0; j < nulabels; j++ ) 00985 sul[j] = pul1[j]; 00986 si += 3; 00987 sl += nlabels; 00988 sul += nulabels; 00989 shared.inc_n(); 00990 } 00991 } 00992 } 00993 primary.reset(); 00994 rval = crystal->gs_transfer( 1, shared, 0 ); /* segfaulting transfer to dest procs */ 00995 if( rval != MB_SUCCESS ) return rval; 00996 /* shared list: (useless, proc2, index, label) */ 00997 /* sort by label */ 00998 shared.sort( 3, &crystal->all->buf ); 00999 /* sort by partner proc */ 01000 shared.sort( 1, &crystal->all->buf ); 01001 /* count partner procs */ 01002 { 01003 uint i, count = 0; 01004 sint proc = -1, *si = shared.vi_wr; 01005 for( i = shared.get_n(); i; --i, si += 3 ) 01006 if( si[1] != proc ) 01007 { 01008 ++count; 01009 proc = si[1]; 01010 } 01011 // this->nlinfo = new nonlocal_info(); 01012 // this->nlinfo->initialize(count,shared.get_n(), 01013 // nlabels, nulabels, maxv); 01014 this->nlinfo = new nonlocal_info( count, shared.get_n(), nlabels, nulabels, maxv ); 01015 } 01016 /* construct non-local info */ 01017 { 01018 uint i; 01019 sint proc = -1, *si = shared.vi_wr; 01020 slong* sl = shared.vl_wr; 01021 Ulong* ul = shared.vul_wr; 01022 uint* target = this->nlinfo->_target; 01023 uint* nshared = this->nlinfo->_nshared; 01024 uint* sh_ind = this->nlinfo->_sh_ind; 01025 slong* slabels = this->nlinfo->_slabels; 01026 Ulong* ulabels = this->nlinfo->_ulabels; 01027 for( i = shared.get_n(); i; --i, si += 3 ) 01028 { 01029 if( si[1] != proc ) 01030 { 01031 proc = si[1]; 01032 *target++ = proc; 01033 *nshared++ = 0; 01034 } 01035 ++nshared[-1]; 01036 *sh_ind++ = si[2]; 01037 // don't store 1st slabel 01038 sl++; 01039 for( j = 0; j < nlabels - 1; j++ ) 01040 slabels[j] = sl[j]; 01041 for( j = 0; j < nulabels; j++ ) 01042 ulabels[j] = ul[j]; 01043 slabels += nlabels - 1; 01044 ulabels += nulabels; 01045 sl += nlabels - 1; 01046 ul += nulabels; 01047 } 01048 } 01049 shared.reset(); 01050 #endif 01051 return MB_SUCCESS; 01052 } 01053 01054 void gs_data::reset() 01055 { 01056 free( local_cm ); 01057 local_cm = NULL; 01058 #ifdef MOAB_HAVE_MPI 01059 if( nlinfo != NULL ) 01060 { 01061 nlinfo->nlinfo_free(); 01062 delete this->nlinfo; 01063 MPI_Comm_free( &_comm ); 01064 nlinfo = NULL; 01065 } 01066 #endif 01067 } 01068 01069 } // namespace moab