MOAB: Mesh Oriented datABase
(version 5.4.1)
|
00001 #include "ParallelHelper.hpp" 00002 00003 #include <mpi.h> 00004 00005 #include <cstdio> 00006 #include <cassert> 00007 #include <iostream> 00008 #include <cstdlib> 00009 #include <algorithm> 00010 #include <ctime> 00011 00012 #include "MsqVertex.hpp" 00013 #include "MsqError.hpp" 00014 00015 #define VERTEX_HEADER 1 00016 #define VERTEX_BLOCK 1000 00017 #define GHOST_NODE_INFO 2000 00018 #define GHOST_NODE_VERTICES_WANTED 2001 00019 #define GHOST_NODE_VERTEX_GIDS 2002 00020 #define GHOST_NODE_VERTEX_UPDATES 2003 00021 00022 #define ARRPTR( x ) arrptr( x, true ) 00023 00024 namespace MBMesquite 00025 { 00026 00027 void parallel_barrier() 00028 { 00029 int is_init = 0; 00030 int err = MPI_Initialized( &is_init ); 00031 if( MPI_SUCCESS != err ) return; 00032 if( is_init ) MPI_Barrier( MPI_COMM_WORLD ); 00033 } 00034 00035 int get_parallel_rank() 00036 { 00037 int rank = 0; 00038 int is_init = 0; 00039 int err = MPI_Initialized( &is_init ); 00040 if( MPI_SUCCESS != err ) return 0; 00041 if( is_init ) MPI_Comm_rank( MPI_COMM_WORLD, &rank ); 00042 return rank; 00043 } 00044 00045 int get_parallel_size() 00046 { 00047 int nprocs = 0; 00048 int is_init = 0; 00049 int err = MPI_Initialized( &is_init ); 00050 if( MPI_SUCCESS != err ) return 0; 00051 if( is_init ) MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 00052 return nprocs; 00053 } 00054 00055 double reduce_parallel_max( double value ) 00056 { 00057 int is_init = 0; 00058 int err = MPI_Initialized( &is_init ); 00059 if( MPI_SUCCESS != err ) return value; 00060 if( !is_init ) return value; 00061 00062 double d_max[1]; 00063 double d_max_recv[1]; 00064 d_max[0] = value; 00065 int rval = MPI_Allreduce( d_max, d_max_recv, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD ); 00066 if( MPI_SUCCESS != rval ) return value; 00067 00068 return d_max_recv[0]; 00069 } 00070 00071 static const char* mpi_err_string( int error_code ) 00072 { 00073 static char buffer[128]; 00074 int len = sizeof( buffer ); 00075 int e = MPI_Error_string( error_code, buffer, &len ); 00076 if( MPI_SUCCESS != e ) len = 0; 00077 00078 if( len >= (int)sizeof( buffer ) ) len = sizeof( buffer ) - 1; 00079 buffer[len] = '\0'; 00080 return buffer; 00081 } 00082 00083 #define CHECK_MPI( RVAL, ERR ) \ 00084 do \ 00085 { \ 00086 if( MPI_SUCCESS != ( RVAL ) ) \ 00087 { \ 00088 MSQ_SETERR( err ) \ 00089 ( MsqError::UNKNOWN_ERROR, "MPI Error %d: %s", (int)( RVAL ), mpi_err_string( ( RVAL ) ) ); \ 00090 return; \ 00091 } \ 00092 } while( false ) 00093 00094 #define CHECK_MPI_RZERO( RVAL, ERR ) \ 00095 do \ 00096 { \ 00097 if( MPI_SUCCESS != ( RVAL ) ) \ 00098 { \ 00099 MSQ_SETERR( err ) \ 00100 ( MsqError::UNKNOWN_ERROR, "MPI Error %d: %s", (int)( RVAL ), mpi_err_string( ( RVAL ) ) ); \ 00101 return 0; \ 00102 } \ 00103 } while( false ) 00104 00105 static bool vertex_map_insert( VertexIdMap& map, size_t glob_id, int proc_id, int value ) 00106 { 00107 VertexIdMapKey vid; 00108 vid.glob_id = glob_id; 00109 vid.proc_id = proc_id; 00110 return map.insert( VertexIdMap::value_type( vid, value ) ).second; 00111 } 00112 00113 static int vertex_map_find( const VertexIdMap& map, size_t glob_id, int proc_id ) 00114 { 00115 VertexIdMapKey vid; 00116 vid.glob_id = glob_id; 00117 vid.proc_id = proc_id; 00118 VertexIdMap::const_iterator map_element = map.find( vid ); 00119 if( map_element == map.end() ) 00120 { 00121 return 0; 00122 } 00123 else 00124 { 00125 return map_element->second; 00126 } 00127 } 00128 00129 static void my_quicksort( int* a, size_t* b, MBMesquite::Mesh::VertexHandle* c, int i, int j ) 00130 { 00131 int in_i = i; 00132 int in_j = j; 00133 int wa; 00134 size_t wb; 00135 MBMesquite::Mesh::VertexHandle w1; 00136 int key = a[( i + j ) / 2]; 00137 do 00138 { 00139 while( a[i] < key ) 00140 i++; 00141 while( a[j] > key ) 00142 j--; 00143 if( i < j ) 00144 { 00145 wa = a[i]; 00146 a[i] = a[j]; 00147 a[j] = wa; 00148 wb = b[i]; 00149 b[i] = b[j]; 00150 b[j] = wb; 00151 w1 = c[i]; 00152 c[i] = c[j]; 00153 c[j] = w1; 00154 } 00155 } while( ++i <= --j ); 00156 if( i == j + 3 ) 00157 { 00158 i--; 00159 j++; 00160 } 00161 if( j > in_i ) my_quicksort( a, b, c, in_i, j ); 00162 if( i < in_j ) my_quicksort( a, b, c, i, in_j ); 00163 } 00164 00165 static int hash6432shift( unsigned long long key ) 00166 { 00167 key = ( ~key ) + ( key << 18 ); // key = (key << 18) - key - 1; 00168 key = key ^ ( key >> 31 ); 00169 key = key * 21; // key = (key + (key << 2)) + (key << 4); 00170 key = key ^ ( key >> 11 ); 00171 key = key + ( key << 6 ); 00172 key = key ^ ( key >> 22 ); 00173 return (int)key; 00174 } 00175 00176 static unsigned long long hash64shift( unsigned long long key ) 00177 { 00178 key = ( ~key ) + ( key << 21 ); // key = (key << 21) - key - 1; 00179 key = key ^ ( key >> 24 ); 00180 key = ( key + ( key << 3 ) ) + ( key << 8 ); // key * 265 00181 key = key ^ ( key >> 14 ); 00182 key = ( key + ( key << 2 ) ) + ( key << 4 ); // key * 21 00183 key = key ^ ( key >> 28 ); 00184 key = key + ( key << 31 ); 00185 return key; 00186 } 00187 00188 static double generate_random_number( int generate_random_numbers, int proc_id, size_t glob_id ) 00189 { 00190 int gid; 00191 00192 if( sizeof( size_t ) == sizeof( unsigned long long ) ) 00193 { 00194 gid = hash6432shift( (unsigned long long)glob_id ); 00195 } 00196 else 00197 { 00198 gid = (int)glob_id; 00199 } 00200 00201 if( generate_random_numbers == 1 ) 00202 { 00203 // count number of on bits 00204 int on = 0; 00205 unsigned int mist = (unsigned int)gid; 00206 while( mist ) 00207 { 00208 if( mist & 1 ) on++; 00209 mist = mist >> 1; 00210 } 00211 unsigned short xsubi[3]; 00212 if( on & 1 ) 00213 mist = (unsigned int)gid; 00214 else 00215 mist = (unsigned int)( -gid ); 00216 if( on & 2 ) 00217 { 00218 xsubi[0] = (unsigned short)( 65535 & mist ); 00219 xsubi[1] = (unsigned short)( 65535 & ( mist >> 16 ) ); 00220 } 00221 else 00222 { 00223 xsubi[1] = (unsigned short)( 65535 & mist ); 00224 xsubi[0] = (unsigned short)( 65535 & ( mist >> 16 ) ); 00225 } 00226 xsubi[2] = proc_id; 00227 00228 #if( defined WIN32 || defined WIN64 ) 00229 time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 ); 00230 srand( (int)seed ); 00231 return rand() / ( RAND_MAX + 1.0 ); 00232 #else 00233 return erand48( xsubi ); 00234 #endif 00235 } 00236 else if( generate_random_numbers == 2 ) 00237 { 00238 // count number of on bits 00239 int on = 0; 00240 unsigned int mist = (unsigned int)gid; 00241 while( mist ) 00242 { 00243 if( mist & 1 ) on++; 00244 mist = mist >> 1; 00245 } 00246 unsigned short xsubi[3]; 00247 if( on & 1 ) 00248 mist = (unsigned int)gid; 00249 else 00250 mist = (unsigned int)( -gid ); 00251 if( on & 2 ) 00252 { 00253 xsubi[0] = (unsigned short)( 65535 & mist ); 00254 xsubi[1] = (unsigned short)( 65535 & ( mist >> 16 ) ); 00255 } 00256 else 00257 { 00258 xsubi[1] = (unsigned short)( 65535 & mist ); 00259 xsubi[0] = (unsigned short)( 65535 & ( mist >> 16 ) ); 00260 } 00261 xsubi[2] = proc_id ^ xsubi[1]; 00262 00263 #if( defined WIN32 || defined WIN64 ) 00264 time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 ); 00265 srand( (int)seed ); 00266 return rand() / ( RAND_MAX + 1.0 ); 00267 #else 00268 return erand48( xsubi ); 00269 #endif 00270 } 00271 else if( generate_random_numbers == 3 ) 00272 { 00273 unsigned long long key = (unsigned long long)gid; 00274 key = key << 32; 00275 key = key | proc_id; 00276 return (double)hash64shift( key ); 00277 } 00278 else if( generate_random_numbers == 4 ) 00279 { 00280 unsigned long long key = (unsigned long long)gid; 00281 key = key << 32; 00282 key = key | proc_id; 00283 key = hash64shift( key ); 00284 unsigned short xsubi[3]; 00285 xsubi[0] = (unsigned short)( key >> 48 ); 00286 xsubi[1] = (unsigned short)( key >> 32 ); 00287 xsubi[2] = (unsigned short)( key >> 16 ); 00288 00289 #if( defined WIN32 || defined WIN64 ) 00290 time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 ); 00291 srand( (int)seed ); 00292 return rand() / ( RAND_MAX + 1.0 ); 00293 #else 00294 return erand48( xsubi ); 00295 #endif 00296 } 00297 else 00298 { 00299 return -1; 00300 } 00301 } 00302 00303 ParallelHelperImpl::ParallelHelperImpl() 00304 { 00305 mesh = 0; 00306 set_communicator( MPI_COMM_WORLD ); 00307 communication_model = TrulyNonBlockingAvoidAllReduce; 00308 00309 MPI_Comm_rank( (MPI_Comm)communicator, &rank ); 00310 MPI_Comm_size( (MPI_Comm)communicator, &nprocs ); 00311 00312 // variable defaults for VertexMover::loop_over_mesh() 00313 generate_random_numbers = 2; 00314 00315 // memory chunks referenced for VertexMover::loop_over_mesh() 00316 vertices.clear(); 00317 vtx_in_partition_boundary.clear(); 00318 part_vertices.clear(); 00319 unghost_vertices.clear(); 00320 part_proc_owner.clear(); 00321 part_gid.clear(); 00322 part_smoothed_flag.clear(); 00323 part_rand_number.clear(); 00324 exportVtxGIDs.clear(); 00325 exportVtxLIDs.clear(); 00326 exportProc.clear(); 00327 in_independent_set.clear(); 00328 vid_map.clear(); 00329 neighbourProcSend.clear(); 00330 neighbourProcRecv.clear(); 00331 neighbourProcSendRemain.clear(); 00332 neighbourProcRecvRemain.clear(); 00333 vtx_off_proc_list.clear(); 00334 neighbourProc.clear(); 00335 } 00336 00337 ParallelHelperImpl::~ParallelHelperImpl() {} 00338 00339 void ParallelHelperImpl::set_parallel_mesh( ParallelMesh* pmesh ) 00340 { 00341 this->mesh = pmesh; 00342 } 00343 00344 void ParallelHelperImpl::set_communicator( size_t comm ) 00345 { 00346 communicator = comm; 00347 MPI_Comm_rank( (MPI_Comm)communicator, &rank ); 00348 MPI_Comm_size( (MPI_Comm)communicator, &nprocs ); 00349 } 00350 00351 void ParallelHelperImpl::set_communication_model( int model, MsqError& ) 00352 { 00353 communication_model = model; 00354 } 00355 00356 void ParallelHelperImpl::set_generate_random_numbers( int grn, MsqError& ) 00357 { 00358 this->generate_random_numbers = grn; 00359 } 00360 00361 void ParallelHelperImpl::smoothing_init( MsqError& err ) 00362 { 00363 00364 int i, j, k, rval; 00365 size_t l; 00366 00367 if( !mesh ) 00368 { 00369 MSQ_SETERR( err )( MsqError::INVALID_STATE ); 00370 return; 00371 } 00372 if( nprocs == 1 ) return; 00373 00374 /* get the vertices */ 00375 mesh->get_all_vertices( vertices, err );MSQ_ERRRTN( err ); 00376 num_vertex = vertices.size(); 00377 00378 /* allocate the data arrays we'll use for smoothing */ 00379 std::vector< size_t > gid( num_vertex ); 00380 std::vector< int > proc_owner( num_vertex ); 00381 std::vector< unsigned char > app_fixed( num_vertex ); 00382 00383 /* get the data from the mesquite mesh */ 00384 mesh->vertices_get_global_id( ARRPTR( vertices ), ARRPTR( gid ), num_vertex, err );MSQ_ERRRTN( err ); 00385 mesh->vertices_get_byte( ARRPTR( vertices ), ARRPTR( app_fixed ), num_vertex, err );MSQ_ERRRTN( err ); 00386 mesh->vertices_get_processor_id( ARRPTR( vertices ), ARRPTR( proc_owner ), num_vertex, err );MSQ_ERRRTN( err ); 00387 if( false ) 00388 { 00389 int ncull = 0; 00390 for( i = 0; i < num_vertex; ++i ) 00391 { 00392 if( app_fixed[i] & MsqVertex::MSQ_CULLED ) 00393 { 00394 ++ncull; 00395 } 00396 } 00397 std::cout << "P[" << rank << "] smoothing_init ncull= " << ncull << " num_vertex= " << num_vertex << std::endl; 00398 } 00399 00400 /* only interested in fixed flag from vertex byte? Clear others. */ 00401 // srkenno AT sandia.gov 1/19/12: bug fix: changed from |= which makes all vertices fixed 00402 for( i = 0; i < num_vertex; ++i ) 00403 app_fixed[i] &= MsqVertex::MSQ_HARD_FIXED; 00404 00405 /* create temporary Tag for the local IDs */ 00406 std::vector< int > lid( num_vertex ); 00407 for( i = 0; i < num_vertex; i++ ) 00408 lid[i] = i; 00409 const char LOCAL_ID_NAME[] = "LOCAL_ID"; 00410 TagHandle lid_tag = mesh->tag_create( LOCAL_ID_NAME, Mesh::INT, 1, NULL, err );MSQ_ERRRTN( err ); 00411 mesh->tag_set_vertex_data( lid_tag, num_vertex, ARRPTR( vertices ), ARRPTR( lid ), err );MSQ_ERRRTN( err ); 00412 00413 if( false ) printf( "[%d] set local tags on %d vertices\n", rank, num_vertex ); 00414 00415 /* get the elements */ 00416 std::vector< MBMesquite::Mesh::ElementHandle > elements; 00417 mesh->get_all_elements( elements, err );MSQ_ERRRTN( err ); 00418 int num_elems = elements.size(); 00419 00420 /**************************************************************** 00421 PARTITION BOUNDARY VERTEX DETERMINATION 00422 ***********************************************************************/ 00423 00424 /* initialize the vertex partition boundary array */ 00425 vtx_in_partition_boundary.clear(); 00426 vtx_in_partition_boundary.resize( num_vertex, 0 ); 00427 int incident_vtx, vtx_off_proc, vtx_on_proc; 00428 00429 /* get the array that contains the adjacent vertices for each mesh element */ 00430 std::vector< MBMesquite::Mesh::VertexHandle > adj_vertices; 00431 std::vector< size_t > vtx_offsets; 00432 mesh->elements_get_attached_vertices( ARRPTR( elements ), num_elems, adj_vertices, vtx_offsets, err ); 00433 std::vector< int > adj_vertices_lid( adj_vertices.size() ); 00434 mesh->tag_get_vertex_data( lid_tag, adj_vertices.size(), ARRPTR( adj_vertices ), ARRPTR( adj_vertices_lid ), err ); 00435 00436 if( false ) printf( "[%d] gotten adjacent elements for %d elements\n", rank, num_elems ); 00437 00438 /* determine which vertices are smoothed as part of the boundary (and which are unused ghost 00439 * vertices) */ 00440 num_vtx_partition_boundary_local = 0; 00441 num_vtx_partition_boundary_remote = 0; 00442 for( i = 0; i < num_elems; i++ ) 00443 { 00444 /* count how many vertices of the current element are on/off a different processor */ 00445 vtx_off_proc = 0; 00446 vtx_on_proc = 0; 00447 for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ ) 00448 { 00449 incident_vtx = adj_vertices_lid[j]; 00450 /* obviously the vertex only counts if it is not app_fixed */ 00451 if( !app_fixed[incident_vtx] ) 00452 { 00453 if( proc_owner[incident_vtx] != rank ) 00454 { 00455 vtx_off_proc++; 00456 } 00457 else 00458 { 00459 vtx_on_proc++; 00460 } 00461 } 00462 } 00463 00464 /* if vertices are on different processors mark all local vertices as boundary1 and all 00465 * remote vertices as boundary2 */ 00466 if( vtx_off_proc > 0 && vtx_on_proc > 0 ) 00467 { 00468 /* collect stats */ 00469 // smooth_stats.num_part_bndy_elem++; 00470 /* mark the vertices */ 00471 for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ ) 00472 { 00473 incident_vtx = adj_vertices_lid[j]; 00474 /* obviously the vertex does not need to be marked if it was already marked or if it 00475 * is app_fixed*/ 00476 if( vtx_in_partition_boundary[incident_vtx] <= 0 && app_fixed[incident_vtx] == 0 ) 00477 { 00478 /* mark and count the vertex */ 00479 if( proc_owner[incident_vtx] != rank ) 00480 { 00481 vtx_in_partition_boundary[incident_vtx] = 2; 00482 num_vtx_partition_boundary_remote++; 00483 } 00484 else 00485 { 00486 vtx_in_partition_boundary[incident_vtx] = 1; 00487 num_vtx_partition_boundary_local++; 00488 } 00489 } 00490 } 00491 } 00492 else if( vtx_off_proc > 0 ) 00493 { 00494 /* mark the vertices as boundary-1 (aka unused ghost) if the element has only 00495 * off-processor vertices */ 00496 for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ ) 00497 { 00498 incident_vtx = adj_vertices_lid[j]; 00499 /* obviously the vertex is not marked if it was already marked or if it is 00500 * app_fixed*/ 00501 if( vtx_in_partition_boundary[incident_vtx] == 0 && app_fixed[incident_vtx] == 0 ) 00502 { 00503 vtx_in_partition_boundary[incident_vtx] = -1; 00504 } 00505 } 00506 } 00507 } 00508 00509 if( false ) 00510 { 00511 printf( "[%d]i%d local %d remote %d ", rank, iteration, num_vtx_partition_boundary_local, 00512 num_vtx_partition_boundary_remote ); 00513 printf( "[%d]i%d pb1 ", rank, iteration ); 00514 for( i = 0; i < num_vertex; i++ ) 00515 if( vtx_in_partition_boundary[i] == 1 ) printf( "%d,%lu ", i, gid[i] ); 00516 printf( "\n" ); 00517 printf( "[%d]i%d pb2 ", rank, iteration ); 00518 for( i = 0; i < num_vertex; i++ ) 00519 if( vtx_in_partition_boundary[i] == 2 ) printf( "%d,%lu ", i, gid[i] ); 00520 printf( "\n" ); 00521 fflush( NULL ); 00522 } 00523 00524 num_vtx_partition_boundary = num_vtx_partition_boundary_local + num_vtx_partition_boundary_remote; 00525 00526 /******************************************************************** 00527 COLLECT THE PARTITION BOUNDARY VERTICES AND THE UNUSED GHOST VERTICES 00528 ********************************************************************/ 00529 00530 /* create the vectors to store the partition boundary vertex data */ 00531 part_vertices.resize( num_vtx_partition_boundary ); 00532 part_proc_owner.resize( num_vtx_partition_boundary ); 00533 part_gid.resize( num_vtx_partition_boundary ); 00534 part_smoothed_flag.resize( num_vtx_partition_boundary ); 00535 part_rand_number.resize( num_vtx_partition_boundary ); 00536 00537 /* create the partition boundary map and its inverse */ 00538 std::vector< int > vtx_partition_boundary_map_inverse( num_vertex ); 00539 00540 j = 0; 00541 /* first we map the partition boundary vertices that we will smooth on this processor */ 00542 for( i = 0; i < num_vertex; i++ ) 00543 { 00544 if( vtx_in_partition_boundary[i] == 1 ) 00545 { 00546 part_vertices[j] = vertices[i]; 00547 part_proc_owner[j] = rank; 00548 assert( proc_owner[i] == rank ); 00549 part_gid[j] = gid[i]; 00550 vtx_partition_boundary_map_inverse[i] = j; 00551 j++; 00552 } 00553 } 00554 00555 vid_map.clear(); 00556 00557 /* then we map the ghost vertices that will be smoothed on other processors */ 00558 for( i = 0; i < num_vertex; i++ ) 00559 { 00560 if( vtx_in_partition_boundary[i] == 2 ) 00561 { 00562 part_vertices[j] = vertices[i]; 00563 part_proc_owner[j] = proc_owner[i]; 00564 assert( proc_owner[i] != rank ); 00565 part_gid[j] = gid[i]; 00566 vtx_partition_boundary_map_inverse[i] = j; 00567 /* only insert those vertices in the map that are smoothed on other processors */ 00568 vertex_map_insert( vid_map, part_gid[j], part_proc_owner[j], j ); 00569 // printf("[%d] inserting vertex with gid %u and pid %d \n", rank, part_gid[j], 00570 // part_proc_owner[j]); 00571 j++; 00572 } 00573 } 00574 00575 /* create our own 'very pseudo random' numbers */ 00576 for( i = 0; i < num_vtx_partition_boundary; i++ ) 00577 part_rand_number[i] = generate_random_number( generate_random_numbers, part_proc_owner[i], part_gid[i] ); 00578 00579 /* count the number of unused ghost vertices */ 00580 unghost_num_vtx = 0; 00581 for( i = 0; i < num_vertex; i++ ) 00582 { 00583 if( vtx_in_partition_boundary[i] == -1 ) 00584 { 00585 unghost_num_vtx++; 00586 } 00587 } 00588 // printf("[%d] found %d unused ghost vertices (local %d remote %d)\n",rank, unghost_num_vtx, 00589 // num_vtx_partition_boundary_local,num_vtx_partition_boundary_remote); 00590 00591 /* create the vectors to store the unused ghost vertices */ 00592 unghost_vertices.resize( unghost_num_vtx ); 00593 std::vector< int > unghost_proc_owner( unghost_num_vtx ); 00594 std::vector< size_t > unghost_gid( unghost_num_vtx ); 00595 00596 /* store the unused ghost vertices that are copies of vertices from other processors and will 00597 * need to be received */ 00598 j = 0; 00599 for( i = 0; i < num_vertex; i++ ) 00600 { 00601 if( vtx_in_partition_boundary[i] == -1 ) 00602 { 00603 unghost_vertices[j] = vertices[i]; 00604 unghost_proc_owner[j] = proc_owner[i]; 00605 assert( proc_owner[i] != rank ); 00606 unghost_gid[j] = gid[i]; 00607 // printf(" %d", unghost_gid[j]); 00608 j++; 00609 } 00610 } 00611 00612 /* no longer needed */ 00613 // delete [] gid; gid = 0; 00614 // delete [] proc_owner; proc_owner = 0; 00615 00616 unghost_num_procs = 0; 00617 unghost_procs.clear(); 00618 unghost_procs_offset.clear(); 00619 unghost_procs_num_vtx.clear(); 00620 if( unghost_num_vtx ) 00621 { 00622 /* sort the unused ghost vertices by processor */ 00623 my_quicksort( ARRPTR( unghost_proc_owner ), ARRPTR( unghost_gid ), &( unghost_vertices[0] ), 0, 00624 unghost_num_vtx - 1 ); 00625 00626 /* count the number of processors we have unused ghost data from that we want to get updated 00627 */ 00628 unghost_num_procs = 1; 00629 for( i = 1; i < unghost_num_vtx; i++ ) 00630 { 00631 if( unghost_proc_owner[i - 1] != unghost_proc_owner[i] ) unghost_num_procs++; 00632 } 00633 00634 /* get the ids of those processors and the number of vertices we want from each */ 00635 unghost_procs.resize( unghost_num_procs ); 00636 unghost_procs_offset.resize( unghost_num_procs + 1 ); 00637 unghost_procs_num_vtx.resize( unghost_num_procs ); 00638 unghost_procs[0] = unghost_proc_owner[0]; 00639 unghost_procs_offset[0] = 0; 00640 for( i = 1, j = 1; i < unghost_num_vtx; i++ ) 00641 { 00642 if( unghost_proc_owner[i - 1] != unghost_proc_owner[i] ) 00643 { 00644 unghost_procs[j] = unghost_proc_owner[i]; 00645 unghost_procs_offset[j] = i; 00646 unghost_procs_num_vtx[j - 1] = unghost_procs_offset[j] - unghost_procs_offset[j - 1]; 00647 assert( unghost_procs_num_vtx[j - 1] > 0 ); 00648 j++; 00649 } 00650 } 00651 unghost_procs_offset[j] = i; 00652 unghost_procs_num_vtx[j - 1] = unghost_procs_offset[j] - unghost_procs_offset[j - 1]; 00653 assert( unghost_procs_num_vtx[j - 1] > 0 ); 00654 assert( j == unghost_num_procs ); 00655 00656 // delete [] unghost_proc_owner; unghost_proc_owner = 0; 00657 00658 // printf("[%d] have ugns from %d processor(s) (%d,%d,%d)\n", rank, unghost_num_procs, 00659 // unghost_procs[0], (unghost_num_procs>1) ? unghost_procs[1] : -1, (unghost_num_procs>2) ? 00660 // unghost_procs[1] : -1); 00661 } 00662 00663 /* this will eventually store to how many processors each processor needs to send unused ghost 00664 * data to */ 00665 std::vector< int > num_sends_of_unghost; 00666 00667 /* gather the information about how many processors need ghost data */ 00668 if( rank == 0 ) 00669 { 00670 /* this will eventually store to how many processors each processor needs to send unused 00671 * ghost data to */ 00672 num_sends_of_unghost.resize( nprocs ); 00673 } 00674 /* temporary used for the initial gather in which each proc tells the root from how many procs 00675 * it wants unused ghost data updates */ 00676 rval = MPI_Gather( &unghost_num_procs, 1, MPI_INT, ARRPTR( num_sends_of_unghost ), 1, MPI_INT, 0, 00677 (MPI_Comm)communicator ); 00678 CHECK_MPI( rval, err ); 00679 00680 /* now each processor tells the root node from which processors they want unused ghost nodes 00681 * information */ 00682 if( rank == 0 ) 00683 { 00684 int procs_num = 0; 00685 int procs_max = 0; 00686 /* first count how many processors will send unused ghost node info and find the maximum 00687 * they will send */ 00688 for( i = 1; i < nprocs; i++ ) 00689 { 00690 if( num_sends_of_unghost[i] ) 00691 { 00692 procs_num++; 00693 if( num_sends_of_unghost[i] > procs_max ) procs_max = num_sends_of_unghost[i]; 00694 } 00695 } 00696 /* clean the temporary used array */ 00697 for( i = 0; i < nprocs; i++ ) 00698 num_sends_of_unghost[i] = 0; 00699 /* process rank 0's unused ghost nodes procs first */ 00700 for( j = 0; j < unghost_num_procs; j++ ) 00701 { 00702 num_sends_of_unghost[unghost_procs[j]]++; 00703 } 00704 /* now we process messages from all other processors that want unused ghost node information 00705 */ 00706 int* unghost_procs_array = new int[procs_max]; 00707 for( i = 0; i < procs_num; i++ ) 00708 { 00709 MPI_Status status; 00710 rval = MPI_Recv( unghost_procs_array, procs_max, MPI_INT, MPI_ANY_SOURCE, GHOST_NODE_INFO, 00711 (MPI_Comm)communicator, &status ); 00712 CHECK_MPI( rval, err ); 00713 int count; 00714 MPI_Get_count( &status, MPI_INT, &count ); 00715 for( j = 0; j < count; j++ ) 00716 { 00717 num_sends_of_unghost[unghost_procs_array[j]]++; 00718 } 00719 } 00720 delete[] unghost_procs_array; 00721 } 00722 else 00723 { 00724 if( unghost_num_vtx ) 00725 { 00726 rval = MPI_Send( ARRPTR( unghost_procs ), unghost_num_procs, MPI_INT, 0, GHOST_NODE_INFO, 00727 (MPI_Comm)communicator ); 00728 CHECK_MPI( rval, err ); 00729 } 00730 } 00731 00732 /* now the root node knows for each processor on how many other processors they have ghost nodes 00733 * (which need updating) */ 00734 /* the scatter distributes this information to each processor */ 00735 rval = MPI_Scatter( ARRPTR( num_sends_of_unghost ), 1, MPI_INT, &update_num_procs, 1, MPI_INT, 0, 00736 (MPI_Comm)communicator ); 00737 CHECK_MPI( rval, err ); 00738 00739 // if (rank == 0) delete [] num_sends_of_unghost; 00740 00741 // printf("[%d] i have unused ghost nodes from %d procs and i need to send updates to %d 00742 // procs\n", rank, unghost_num_procs, update_num_procs); 00743 00744 /* now the processors can negotiate amongst themselves: */ 00745 00746 /* first tell each processor the number of unused ghost nodes we want from them */ 00747 std::vector< MPI_Request > requests_unghost( unghost_num_procs ); 00748 for( j = 0; j < unghost_num_procs; j++ ) 00749 { 00750 rval = MPI_Isend( &( unghost_procs_num_vtx[j] ), 1, MPI_INT, unghost_procs[j], GHOST_NODE_VERTICES_WANTED, 00751 (MPI_Comm)communicator, &( requests_unghost[j] ) ); 00752 CHECK_MPI( rval, err ); 00753 } 00754 00755 /* then listen to as many processors as there are that want updates from us */ 00756 std::vector< MPI_Request > requests_updates( update_num_procs ); 00757 update_procs_num_vtx.resize( update_num_procs ); 00758 for( j = 0; j < update_num_procs; j++ ) 00759 { 00760 rval = MPI_Irecv( &( update_procs_num_vtx[j] ), 1, MPI_INT, MPI_ANY_SOURCE, GHOST_NODE_VERTICES_WANTED, 00761 (MPI_Comm)communicator, &( requests_updates[j] ) ); 00762 CHECK_MPI( rval, err ); 00763 } 00764 00765 /* wait until we have heard from all processors how many ghost nodes updates they want from us 00766 */ 00767 std::vector< MPI_Status > status_update( update_num_procs ); 00768 update_procs.resize( update_num_procs ); 00769 rval = MPI_Waitall( update_num_procs, ARRPTR( requests_updates ), ARRPTR( status_update ) ); 00770 CHECK_MPI( rval, err ); 00771 for( j = 0; j < update_num_procs; j++ ) 00772 { 00773 update_procs[j] = status_update[j].MPI_SOURCE; 00774 // printf("[%d] i have to send %d vertices to processor %d\n", rank, 00775 // update_procs_num_vtx[j], update_procs[j]); 00776 } 00777 00778 /* count the total number of vertices that we need to update elsewhere */ 00779 update_procs_offset.resize( update_num_procs + 1 ); 00780 update_num_vtx = 0; 00781 update_procs_offset[0] = 0; 00782 for( j = 0; j < update_num_procs; j++ ) 00783 { 00784 update_num_vtx += update_procs_num_vtx[j]; 00785 update_procs_offset[j + 1] = update_num_vtx; 00786 } 00787 00788 /* create enough space to receive all the vertex indices */ 00789 update_gid.resize( update_num_vtx ); 00790 00791 /* tell each processor which vertices we want from them */ 00792 for( j = 0; j < unghost_num_procs; j++ ) 00793 { 00794 rval = MPI_Isend( &( unghost_gid[unghost_procs_offset[j]] ), unghost_procs_num_vtx[j], 00795 sizeof( size_t ) == 4 ? MPI_INT : MPI_DOUBLE, unghost_procs[j], GHOST_NODE_VERTEX_GIDS, 00796 (MPI_Comm)communicator, &( requests_unghost[j] ) ); 00797 CHECK_MPI( rval, err ); 00798 } 00799 00800 /* receive from each processor the info which vertices they want from us */ 00801 for( j = 0; j < update_num_procs; j++ ) 00802 { 00803 rval = MPI_Irecv( &( update_gid[update_procs_offset[j]] ), update_procs_num_vtx[j], 00804 sizeof( size_t ) == 4 ? MPI_INT : MPI_DOUBLE, update_procs[j], GHOST_NODE_VERTEX_GIDS, 00805 (MPI_Comm)communicator, &( requests_updates[j] ) ); 00806 CHECK_MPI( rval, err ); 00807 } 00808 00809 /* wait until we have heard from all processors which vertices they want from us */ 00810 rval = MPI_Waitall( update_num_procs, ARRPTR( requests_updates ), ARRPTR( status_update ) ); 00811 CHECK_MPI( rval, err ); 00812 00813 /* wait until we have sent to all processors which vertices we want from them */ 00814 std::vector< MPI_Status > status_unghost( unghost_num_procs ); 00815 rval = MPI_Waitall( unghost_num_procs, ARRPTR( requests_unghost ), ARRPTR( status_unghost ) ); 00816 CHECK_MPI( rval, err ); 00817 00818 /* 00819 for (j = 0; j < update_num_procs; j++) 00820 { 00821 printf("[%d] will send to proc %d:", rank, update_procs[j]); 00822 for (i = update_procs_offset[j]; i < update_procs_offset[j+1]; i++) printf(" %d", 00823 update_gid[i]); printf("\n"); 00824 } 00825 */ 00826 00827 /*********************************************************************** 00828 COMPUTE THE SET OF NEIGHBORS THAT OUR VERTICES HAVE ON OTHER PROCESSORS 00829 COMPUTE THE SET OF NEIGHBORS PROCESSORS 00830 ***********************************************************************/ 00831 00832 if( num_vtx_partition_boundary_local == 0 ) 00833 { 00834 /* this processor does not partake in the boundary smoothing */ 00835 neighbourProc.clear(); 00836 mesh->tag_delete( lid_tag, err ); 00837 vtx_partition_boundary_map_inverse.clear(); 00838 return; 00839 } 00840 00841 /* init the neighbour processor list */ 00842 neighbourProc.clear(); 00843 00844 /* init the neighbour lists */ 00845 00846 vtx_off_proc_list.clear(); 00847 vtx_off_proc_list.resize( num_vtx_partition_boundary_local ); 00848 00849 /* get the adjacency arrays that we need */ 00850 std::vector< MBMesquite::Mesh::ElementHandle > adj_elements; 00851 std::vector< MBMesquite::Mesh::VertexHandle > adj_adj_vertices; 00852 std::vector< size_t > elem_offsets; 00853 std::vector< size_t > adj_vtx_offsets; 00854 mesh->vertices_get_attached_elements( ARRPTR( part_vertices ), num_vtx_partition_boundary_local, adj_elements, 00855 elem_offsets, err ); 00856 mesh->elements_get_attached_vertices( ARRPTR( adj_elements ), adj_elements.size(), adj_adj_vertices, 00857 adj_vtx_offsets, err ); 00858 // delete adj_elements; adj_elements = 0; 00859 std::vector< int > adj_adj_vertices_lid( adj_adj_vertices.size() ); 00860 mesh->tag_get_vertex_data( lid_tag, adj_adj_vertices.size(), ARRPTR( adj_adj_vertices ), 00861 ARRPTR( adj_adj_vertices_lid ), err ); 00862 // delete adj_adj_vertices; adj_adj_vertices = 0; 00863 mesh->tag_delete( lid_tag, err ); 00864 00865 for( i = 0; i < num_vtx_partition_boundary_local; i++ ) 00866 { 00867 /* loop over the elements surrounding that vertex */ 00868 for( j = elem_offsets[i]; j < (int)( elem_offsets[i + 1] ); j++ ) 00869 { 00870 /* loop over the neighbors of the considered vertex (i.e. the vertices of these element) 00871 */ 00872 for( k = adj_vtx_offsets[j]; k < (int)( adj_vtx_offsets[j + 1] ); k++ ) 00873 { 00874 /* get the next neighbour */ 00875 incident_vtx = adj_adj_vertices_lid[k]; 00876 /* if this neighbour is a vertex that is smoothed on a different processor */ 00877 if( vtx_in_partition_boundary[incident_vtx] == 2 ) 00878 { 00879 /* then map it into our domain */ 00880 incident_vtx = vtx_partition_boundary_map_inverse[incident_vtx]; 00881 /* is this vertex already in our neighbour list ? */ 00882 if( std::find( vtx_off_proc_list[i].begin(), vtx_off_proc_list[i].end(), incident_vtx ) == 00883 vtx_off_proc_list[i].end() ) 00884 { 00885 /* if the vertex is not in the list yet ... add it */ 00886 vtx_off_proc_list[i].push_back( incident_vtx ); 00887 /* is the processor of this vertex already in the processor list */ 00888 incident_vtx = part_proc_owner[incident_vtx]; 00889 /* check by scanning the list for this processor */ 00890 if( std::find( neighbourProc.begin(), neighbourProc.end(), incident_vtx ) == 00891 neighbourProc.end() ) 00892 { 00893 /* the processor is not in the list yet ... add it */ 00894 neighbourProc.push_back( incident_vtx ); 00895 } 00896 } 00897 } 00898 } 00899 } 00900 } 00901 00902 /* sort the list of neighbour processors */ 00903 00904 std::sort( neighbourProc.begin(), neighbourProc.end() ); 00905 00906 /*********************************************************************** 00907 COMPUTE HOW MANY VERTICES WE NEED TO SEND/RECV FROM EACH PROCESSOR 00908 ***********************************************************************/ 00909 neighbourProcSend.clear(); 00910 neighbourProcRecv.clear(); 00911 neighbourProcSendRemain.clear(); 00912 neighbourProcRecvRemain.clear(); 00913 00914 if( communication_model & 1 ) // AVOID_ALL_REDUCE 00915 { 00916 total_num_vertices_to_smooth = num_vtx_partition_boundary_local; 00917 total_num_vertices_to_recv = num_vtx_partition_boundary_remote; 00918 neighbourProcSend.resize( neighbourProc.size(), 0 ); 00919 neighbourProcRecv.resize( neighbourProc.size(), 0 ); 00920 00921 /* for each vertex we smooth find the processors we need to send it too */ 00922 for( i = 0; i < num_vtx_partition_boundary_local; i++ ) 00923 { 00924 /* loop over its adjacent off-processor vertices */ 00925 for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ ) 00926 { 00927 /* get the processor id of these vertices */ 00928 incident_vtx = part_proc_owner[vtx_off_proc_list[i][j]]; 00929 /* check if we got this processor id before */ 00930 for( k = 0; k < j; k++ ) 00931 { 00932 if( incident_vtx == part_proc_owner[vtx_off_proc_list[i][k]] ) 00933 { 00934 /* if we have has this procesor id already we do not need to count it again 00935 */ 00936 incident_vtx = -1; 00937 break; 00938 } 00939 } 00940 /* if this was a new processor id */ 00941 if( incident_vtx != -1 ) 00942 { 00943 /* find the processor in the list and increment its counter */ 00944 for( l = 0; l < neighbourProc.size(); l++ ) 00945 { 00946 if( neighbourProc[l] == incident_vtx ) 00947 { 00948 neighbourProcSend[l]++; 00949 break; 00950 } 00951 } 00952 } 00953 } 00954 } 00955 for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ ) 00956 { 00957 incident_vtx = part_proc_owner[i]; 00958 for( l = 0; l < neighbourProc.size(); l++ ) 00959 { 00960 if( neighbourProc[l] == incident_vtx ) 00961 { 00962 neighbourProcRecv[l]++; 00963 break; 00964 } 00965 } 00966 } 00967 neighbourProcSendRemain.resize( neighbourProc.size() ); 00968 neighbourProcRecvRemain.resize( neighbourProc.size() ); 00969 } 00970 00971 exportVtxGIDs.resize( num_vtx_partition_boundary ); 00972 exportVtxLIDs.resize( num_vtx_partition_boundary ); 00973 exportProc.resize( num_vtx_partition_boundary ); 00974 in_independent_set.resize( num_vtx_partition_boundary_local ); 00975 } 00976 00977 void ParallelHelperImpl::compute_first_independent_set( std::vector< Mesh::VertexHandle >& fixed_vertices ) 00978 { 00979 if( nprocs == 1 ) return; 00980 00981 int i; 00982 00983 // to avoid all reduce we need to know how many vertices we send & receive 00984 if( communication_model & 1 ) // AVOID_ALL_REDUCE 00985 { 00986 for( i = 0; i < (long)neighbourProc.size(); i++ ) 00987 { 00988 neighbourProcSendRemain[i] = neighbourProcSend[i]; 00989 neighbourProcRecvRemain[i] = neighbourProcRecv[i]; 00990 } 00991 } 00992 00993 // this is iteration zero of the bounday smooting process 00994 iteration = 0; 00995 00996 // mark all boundary partition vertices as not smoothed 00997 for( i = 0; i < num_vtx_partition_boundary; i++ ) 00998 { 00999 part_smoothed_flag[i] = 0; 01000 } 01001 01002 // populates the in_independent_set and the vertex export arrays 01003 compute_independent_set(); 01004 01005 // counts how many vertices are already smoothed 01006 num_already_smoothed_vertices = 0; 01007 01008 fixed_vertices.clear(); 01009 01010 /* mark which local boundary partition vertices are already smoothed (i.e. they are in the 1st 01011 * independent set) */ 01012 for( i = 0; i < num_vtx_partition_boundary_local; i++ ) 01013 { 01014 if( in_independent_set[i] ) 01015 { 01016 part_smoothed_flag[i] = 1; 01017 num_already_smoothed_vertices++; 01018 } 01019 else 01020 { 01021 fixed_vertices.push_back( part_vertices[i] ); // fix vertices *not* in the independent set 01022 } 01023 } 01024 01025 if( false ) 01026 { 01027 printf( "[%d]i%d after first we smoothed %d of %d\n", rank, iteration, num_already_smoothed_vertices, 01028 num_vtx_partition_boundary_local ); 01029 fflush( NULL ); 01030 } 01031 01032 // fix the ghost vertices that are smoothed on another processor 01033 for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ ) 01034 { 01035 fixed_vertices.push_back( part_vertices[i] ); 01036 } 01037 01038 // fix the ghost vertices that are unused 01039 for( i = 0; i < (int)( unghost_vertices.size() ); i++ ) 01040 { 01041 fixed_vertices.push_back( unghost_vertices[i] ); 01042 } 01043 } 01044 01045 void ParallelHelperImpl::communicate_first_independent_set( MsqError& err ) 01046 { 01047 if( nprocs == 1 ) return; 01048 01049 switch( communication_model ) 01050 { 01051 case TrulyNonBlocking: 01052 num_already_recv_vertices = comm_smoothed_vtx_tnb( err ); 01053 break; 01054 case TrulyNonBlockingAvoidAllReduce: 01055 num_already_recv_vertices = comm_smoothed_vtx_tnb_no_all( err ); 01056 break; 01057 case NonBlocking: 01058 num_already_recv_vertices = comm_smoothed_vtx_nb( err ); 01059 break; 01060 case NonBlockingAvoidAllReduce: 01061 num_already_recv_vertices = comm_smoothed_vtx_nb_no_all( err ); 01062 break; 01063 case Blocking: 01064 num_already_recv_vertices = comm_smoothed_vtx_b( err ); 01065 break; 01066 case BlockingAvoidAllReduce: 01067 num_already_recv_vertices = comm_smoothed_vtx_b_no_all( err ); 01068 break; 01069 } 01070 global_work_remains = ( neighbourProc.size() ? 1 : 0 );MSQ_CHKERR( err ); 01071 } 01072 01073 bool ParallelHelperImpl::compute_next_independent_set() 01074 { 01075 if( nprocs == 1 ) return false; 01076 01077 if( global_work_remains && ( iteration < 20 ) ) 01078 { 01079 iteration++; 01080 if( false ) printf( "[%d] work remains %d after %d iterations\n", rank, global_work_remains, iteration ); 01081 compute_independent_set(); 01082 next_vtx_partition_boundary = 0; 01083 return true; 01084 } 01085 else 01086 { 01087 if( global_work_remains ) 01088 { 01089 printf( "WARNING: global work remains %d after %d iterations\n", global_work_remains, iteration ); 01090 } 01091 return false; 01092 } 01093 } 01094 01095 bool ParallelHelperImpl::get_next_partition_boundary_vertex( MBMesquite::Mesh::VertexHandle& vertex_handle ) 01096 { 01097 while( next_vtx_partition_boundary < num_vtx_partition_boundary_local ) 01098 { 01099 if( in_independent_set[next_vtx_partition_boundary] ) 01100 { 01101 vertex_handle = part_vertices[next_vtx_partition_boundary]; 01102 num_already_smoothed_vertices++; 01103 assert( part_smoothed_flag[next_vtx_partition_boundary] == 0 ); 01104 part_smoothed_flag[next_vtx_partition_boundary] = 1; 01105 next_vtx_partition_boundary++; 01106 return true; 01107 } 01108 next_vtx_partition_boundary++; 01109 } 01110 if( false ) 01111 { 01112 printf( "[%d]i%d after next we smoothed %d of %d\n", rank, iteration, num_already_smoothed_vertices, 01113 num_vtx_partition_boundary_local ); 01114 fflush( NULL ); 01115 } 01116 return false; 01117 } 01118 01119 void ParallelHelperImpl::communicate_next_independent_set( MsqError& err ) 01120 { 01121 switch( communication_model ) 01122 { 01123 case TrulyNonBlocking: 01124 num_already_recv_vertices += comm_smoothed_vtx_tnb( err ); 01125 break; 01126 case TrulyNonBlockingAvoidAllReduce: 01127 num_already_recv_vertices += comm_smoothed_vtx_tnb_no_all( err ); 01128 break; 01129 case NonBlocking: 01130 num_already_recv_vertices += comm_smoothed_vtx_nb( err ); 01131 break; 01132 case NonBlockingAvoidAllReduce: 01133 num_already_recv_vertices += comm_smoothed_vtx_nb_no_all( err ); 01134 break; 01135 case Blocking: 01136 num_already_recv_vertices += comm_smoothed_vtx_b( err ); 01137 break; 01138 case BlockingAvoidAllReduce: 01139 num_already_recv_vertices += comm_smoothed_vtx_b_no_all( err ); 01140 break; 01141 } 01142 01143 if( communication_model & 1 ) // AVOID_ALL_REDUCE 01144 { 01145 global_work_remains = ( total_num_vertices_to_smooth - num_already_smoothed_vertices ) + 01146 ( total_num_vertices_to_recv - num_already_recv_vertices ); 01147 if( false ) 01148 { 01149 printf( "[%d]i%d %d - %d + %d - %d = %d \n", rank, iteration, total_num_vertices_to_smooth, 01150 num_already_smoothed_vertices, total_num_vertices_to_recv, num_already_recv_vertices, 01151 global_work_remains ); 01152 fflush( NULL ); 01153 } 01154 } 01155 else 01156 { 01157 int i, work_remains = 0; 01158 for( i = 0; i < num_vtx_partition_boundary_local; i++ ) 01159 { 01160 if( part_smoothed_flag[i] == 0 ) 01161 { 01162 work_remains++; 01163 } 01164 } 01165 int rval = MPI_Allreduce( &work_remains, &global_work_remains, 1, MPI_INT, MPI_SUM, (MPI_Comm)communicator ); 01166 CHECK_MPI( rval, err ); 01167 } 01168 } 01169 01170 void ParallelHelperImpl::smoothing_close( MsqError& err ) 01171 { 01172 int i, j, rval; 01173 01174 if( nprocs == 1 ) return; 01175 01176 // printf("[%d] used %d iterations\n", rank, iteration); 01177 01178 // communicate unused ghost nodes 01179 01180 std::vector< double > update_updates; 01181 std::vector< MPI_Request > update_requests; 01182 01183 if( update_num_procs ) 01184 { 01185 /* get the tags so we can find the requested vertices */ 01186 std::vector< size_t > gid( num_vertex ); 01187 mesh->vertices_get_global_id( ARRPTR( vertices ), ARRPTR( gid ), num_vertex, err );MSQ_ERRRTN( err ); 01188 std::vector< unsigned char > app_fixed( num_vertex ); 01189 mesh->vertices_get_byte( ARRPTR( vertices ), ARRPTR( app_fixed ), num_vertex, err );MSQ_ERRRTN( err ); 01190 std::vector< int > proc_owner( num_vertex ); 01191 mesh->vertices_get_processor_id( ARRPTR( vertices ), ARRPTR( proc_owner ), num_vertex, err );MSQ_ERRRTN( err ); 01192 01193 if( false ) 01194 { 01195 int ncull = 0; 01196 for( i = 0; i < num_vertex; ++i ) 01197 { 01198 if( app_fixed[i] & MsqVertex::MSQ_CULLED ) 01199 { 01200 ++ncull; 01201 } 01202 } 01203 std::cout << "P[" << rank << "] ncull= " << ncull << " num_vertex= " << num_vertex << std::endl; 01204 } 01205 01206 /* only interested in fixed flag from vertex byte? Clear others. */ 01207 // srkenno AT sandia.gov 1/19/12: bug fix: changed from |= which makes all vertices fixed 01208 for( i = 0; i < num_vertex; ++i ) 01209 app_fixed[i] &= MsqVertex::MSQ_HARD_FIXED; 01210 01211 /* insert all our unfixed vertices into a map so we can find the requested vertices 01212 * efficiently */ 01213 VertexIdMap temp_vid_map; 01214 for( j = 0; j < num_vertex; j++ ) 01215 { 01216 if( proc_owner[j] == rank && app_fixed[j] == false ) 01217 { 01218 vertex_map_insert( temp_vid_map, gid[j], rank, j ); 01219 } 01220 } 01221 01222 /* deallocate the tags */ 01223 // delete [] gid; gid = 0; 01224 // delete [] app_fixed; app_fixed = 0; 01225 // delete [] proc_owner; proc_owner = 0; 01226 01227 /* find the requested updates and collect them into an array */ 01228 MBMesquite::MsqVertex coordinates; 01229 update_updates.resize( update_num_vtx * 3 ); 01230 for( i = 0; i < update_num_vtx; i++ ) 01231 { 01232 j = vertex_map_find( temp_vid_map, update_gid[i], rank ); 01233 mesh->vertices_get_coordinates( &( vertices[j] ), &coordinates, 1, err );MSQ_ERRRTN( err ); 01234 update_updates[3 * i + 0] = coordinates[0]; 01235 update_updates[3 * i + 1] = coordinates[1]; 01236 update_updates[3 * i + 2] = coordinates[2]; 01237 // printf("[%d] send gid %d with %g %g %g\n", rank, update_gid[i], coordinates[0], 01238 // coordinates[1], coordinates[2]); 01239 } 01240 01241 /* deallocate the map and the gid array */ 01242 // delete temp_vid_map; temp_vid_map = 0; 01243 // delete [] update_gid; update_gid = 0; 01244 01245 update_requests.resize( update_num_procs ); 01246 /* send each processor the unused ghost node updates that they requested */ 01247 for( j = 0; j < update_num_procs; j++ ) 01248 { 01249 rval = MPI_Isend( &( update_updates[update_procs_offset[j] * 3] ), update_procs_num_vtx[j] * 3, MPI_DOUBLE, 01250 update_procs[j], GHOST_NODE_VERTEX_UPDATES, (MPI_Comm)communicator, 01251 &( update_requests[j] ) ); 01252 // printf("[%d] sending %d of %d from %d with offset %d \n", rank, 01253 // update_procs_num_vtx[j], update_num_vtx, update_procs[j], 01254 // update_procs_offset[j]); 01255 CHECK_MPI( rval, err ); 01256 } 01257 01258 /* deallocate more arrays that we no longer need */ 01259 // delete [] update_procs_offset; update_procs_offset = 0; 01260 // delete [] update_procs_num_vtx; update_procs_num_vtx = 0; 01261 // delete [] update_procs; update_procs = 0; 01262 } 01263 01264 if( unghost_num_procs ) 01265 { 01266 std::vector< MPI_Request > unghost_requests( unghost_num_procs ); 01267 /* receive from each processor the unused ghost nodes updates i want from them */ 01268 std::vector< double > unghost_updates( unghost_num_vtx * 3 ); 01269 for( j = 0; j < unghost_num_procs; j++ ) 01270 { 01271 rval = MPI_Irecv( &( unghost_updates[unghost_procs_offset[j] * 3] ), unghost_procs_num_vtx[j] * 3, 01272 MPI_DOUBLE, unghost_procs[j], GHOST_NODE_VERTEX_UPDATES, (MPI_Comm)communicator, 01273 &( unghost_requests[j] ) ); 01274 // printf("[%d] receiving %d of %d from %d with offset %d \n", rank, 01275 // unghost_procs_num_vtx[j], unghost_num_vtx, unghost_procs[j], 01276 // unghost_procs_offset[j]); 01277 CHECK_MPI( rval, err ); 01278 } 01279 01280 /* deallocate more arrays that we no longer need */ 01281 // delete [] unghost_procs_offset; unghost_procs_offset = 0; 01282 // delete [] unghost_procs_num_vtx; unghost_procs_num_vtx = 0; 01283 // delete [] unghost_procs; unghost_procs = 0; 01284 01285 std::vector< MPI_Status > status( unghost_num_procs ); 01286 rval = MPI_Waitall( unghost_num_procs, ARRPTR( unghost_requests ), ARRPTR( status ) ); 01287 CHECK_MPI( rval, err ); 01288 01289 /* apply the received updates for the unused ghost vertices */ 01290 for( i = 0; i < unghost_num_vtx; i++ ) 01291 { 01292 MBMesquite::Vector3D coordinates; 01293 coordinates[0] = unghost_updates[3 * i + 0]; 01294 coordinates[1] = unghost_updates[3 * i + 1]; 01295 coordinates[2] = unghost_updates[3 * i + 2]; 01296 // printf("[%d] recv %g %g %g\n", rank, coordinates[0], coordinates[1], 01297 // coordinates[2]); 01298 mesh->vertex_set_coordinates( unghost_vertices[i], coordinates, err );MSQ_ERRRTN( err ); 01299 } 01300 01301 /* deallocate more arrays that we no longer need */ 01302 // delete unghost_vertices; unghost_vertices = 0; 01303 // delete [] unghost_updates; unghost_updates = 0; 01304 // delete [] unghost_requests; unghost_requests = 0; 01305 } 01306 01307 // if (update_num_procs) 01308 //{ 01309 // delete [] update_updates; update_updates = 0; 01310 // delete [] update_requests; update_requests = 0; 01311 //} 01312 01313 // if (vertices) delete vertices; vertices = 0; 01314 // if (vtx_in_partition_boundary) delete [] vtx_in_partition_boundary; vtx_in_partition_boundary 01315 // = 0; if (part_vertices) delete part_vertices; part_vertices = 0; if (unghost_vertices) delete 01316 // unghost_vertices; unghost_vertices = 0; if (part_proc_owner) delete [] part_proc_owner; 01317 // part_proc_owner = 0; if (part_gid) delete [] part_gid; part_gid = 0; if (part_smoothed_flag) 01318 // delete [] part_smoothed_flag; part_smoothed_flag = 0; if (part_rand_number) delete [] 01319 // part_rand_number; part_rand_number = 0; if (exportVtxGIDs) delete [] exportVtxGIDs; 01320 // exportVtxGIDs = 0; if (exportVtxLIDs) delete [] exportVtxLIDs; exportVtxLIDs = 0; if 01321 // (exportProc) delete [] exportProc; exportProc = 0; if (in_independent_set) delete [] 01322 // in_independent_set; in_independent_set = 0; if (vid_map) delete vid_map; vid_map = 0; if 01323 // (neighbourProcSend) delete [] neighbourProcSend; neighbourProcSend = 0; if (neighbourProcRecv) 01324 // delete [] neighbourProcRecv; neighbourProcRecv = 0; if (neighbourProcSendRemain) delete [] 01325 // neighbourProcSendRemain; neighbourProcSendRemain = 0; if (neighbourProcRecvRemain) delete [] 01326 // neighbourProcRecvRemain; neighbourProcRecvRemain = 0; if (vtx_off_proc_list_size) delete [] 01327 // vtx_off_proc_list_size; vtx_off_proc_list_size = 0; if (vtx_off_proc_list) { 01328 // for (i = 0; i < num_vtx_partition_boundary_local; i++) free(vtx_off_proc_list[i]); 01329 // delete [] vtx_off_proc_list; vtx_off_proc_list = 0; 01330 //} 01331 // if (neighbourProc) free(neighbourProc); neighbourProc = 0; 01332 } 01333 01334 typedef struct VertexPack 01335 { 01336 double x; 01337 double y; 01338 double z; 01339 union 01340 { 01341 double mist; 01342 size_t glob_id; 01343 }; 01344 } VertexPack; 01345 01346 int ParallelHelperImpl::comm_smoothed_vtx_tnb( MsqError& err ) 01347 { 01348 int i, j, k, rval; 01349 01350 // printf("[%d]i%d truly non blocking\n",rank, iteration);fflush(NULL); 01351 01352 /* compute how many vertices we send to each processor */ 01353 01354 std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 ); 01355 std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 ); 01356 for( i = 0; i < num_exportVtx; i++ ) 01357 { 01358 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01359 { 01360 if( exportProc[i] == neighbourProc[j] ) 01361 { 01362 /* increment count */ 01363 numVtxPerProcSend[j]++; 01364 /* end loop */ 01365 break; 01366 } 01367 } 01368 /* did loop end without finding the processor */ 01369 if( j == (long)neighbourProc.size() ) 01370 { 01371 printf( "[%d]i%d WARNING: did not find exportProc[%d] = %d in list of %lu processors.\n", rank, iteration, 01372 i, exportProc[i], (unsigned long)neighbourProc.size() ); 01373 } 01374 } 01375 01376 /* tell each processor how many vertices they can expect from us */ 01377 /* also ask each processor how many vertices we can expect from them */ 01378 01379 int num_neighbourProcSend = 0; 01380 int num_neighbourProcRecv = 0; 01381 std::vector< MPI_Request > requests_send( neighbourProc.size() ); 01382 std::vector< MPI_Request > requests_recv( neighbourProc.size() ); 01383 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01384 { 01385 /* send the vertex count to this processor */ 01386 if( 0 ) 01387 { 01388 printf( "[%d]i%d Announce send %d vertices from proc %d\n", rank, iteration, numVtxPerProcSend[j], 01389 neighbourProc[j] ); 01390 fflush( NULL ); 01391 } 01392 rval = MPI_Isend( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 01393 (MPI_Comm)communicator, &( requests_send[j] ) ); 01394 CHECK_MPI_RZERO( rval, err ); 01395 num_neighbourProcSend++; 01396 01397 /* recv the vertex count for this processor */ 01398 if( 0 ) 01399 { 01400 printf( "[%d]i%d Listen recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[j], 01401 neighbourProc[j] ); 01402 fflush( NULL ); 01403 } 01404 rval = MPI_Irecv( &( numVtxPerProcRecv[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 01405 (MPI_Comm)communicator, &( requests_recv[j] ) ); 01406 CHECK_MPI_RZERO( rval, err ); 01407 num_neighbourProcRecv++; 01408 } 01409 01410 /* set up memory for the outgoing vertex data blocks */ 01411 01412 std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */ 01413 std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() ); 01414 if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export ); 01415 for( i = 1; i < (long)neighbourProc.size(); i++ ) 01416 { 01417 packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1]; 01418 } 01419 01420 /* place vertex data going to the same processor into consecutive memory space */ 01421 01422 std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size() ); 01423 for( i = 0; i < (long)neighbourProc.size(); i++ ) 01424 { 01425 numVtxPerProcSendPACKED[i] = 0; 01426 } 01427 for( i = 0; i < num_exportVtx; i++ ) 01428 { 01429 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01430 { 01431 if( exportProc[i] == neighbourProc[j] ) 01432 { 01433 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j]; 01434 numVtxPerProcSendPACKED[j]++; 01435 MBMesquite::MsqVertex coordinates; 01436 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err ); 01437 MSQ_ERRZERO( err ); 01438 packing_vertex->x = coordinates[0]; 01439 packing_vertex->y = coordinates[1]; 01440 packing_vertex->z = coordinates[2]; 01441 packing_vertex->glob_id = exportVtxGIDs[i]; 01442 } 01443 } 01444 } 01445 01446 /* wait until we have heard from all processors how many vertices we will receive from them */ 01447 01448 std::vector< MPI_Status > status( neighbourProc.size() ); 01449 01450 if( num_neighbourProcRecv ) 01451 { 01452 rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_recv ), ARRPTR( status ) ); 01453 CHECK_MPI_RZERO( rval, err ); 01454 } 01455 01456 /* how many vertices will we receive */ 01457 01458 int numVtxImport = 0; 01459 for( i = 0; i < (long)neighbourProc.size(); i++ ) 01460 { 01461 numVtxImport += numVtxPerProcRecv[i]; 01462 } 01463 01464 /* set up memory for the incoming vertex data blocks */ 01465 01466 std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */ 01467 std::vector< VertexPack* > packed_vertices_import( neighbourProc.size() ); 01468 if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import ); 01469 for( i = 1; i < (long)neighbourProc.size(); i++ ) 01470 { 01471 packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecv[i - 1]; 01472 } 01473 01474 /* post receives for all processors that have something for us */ 01475 /* post sends for all processors that we have something for */ 01476 01477 num_neighbourProcRecv = 0; 01478 01479 for( i = 0; i < (long)neighbourProc.size(); i++ ) 01480 { 01481 if( numVtxPerProcRecv[i] ) 01482 { 01483 if( 0 ) 01484 { 01485 printf( "[%d]i%d Will recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i], 01486 neighbourProc[i] ); 01487 fflush( NULL ); 01488 } 01489 rval = MPI_Irecv( packed_vertices_import[i], 4 * numVtxPerProcRecv[i], MPI_DOUBLE, neighbourProc[i], 01490 VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_recv[i] ) ); 01491 CHECK_MPI_RZERO( rval, err ); 01492 num_neighbourProcRecv++; 01493 } 01494 else 01495 { 01496 requests_recv[i] = MPI_REQUEST_NULL; 01497 } 01498 if( numVtxPerProcSend[i] ) 01499 { 01500 if( 0 ) 01501 { 01502 printf( "[%d]i%d Will send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[i], 01503 neighbourProc[i] ); 01504 fflush( NULL ); 01505 } 01506 rval = MPI_Isend( packed_vertices_export[i], 4 * numVtxPerProcSend[i], MPI_DOUBLE, neighbourProc[i], 01507 VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[i] ) ); 01508 CHECK_MPI_RZERO( rval, err ); 01509 } 01510 else 01511 { 01512 requests_send[i] = MPI_REQUEST_NULL; 01513 } 01514 } 01515 01516 /* wait for some receive to arrive */ 01517 01518 int local_id; 01519 while( num_neighbourProcRecv ) 01520 { 01521 rval = MPI_Waitany( neighbourProc.size(), ARRPTR( requests_recv ), &k, ARRPTR( status ) ); 01522 CHECK_MPI_RZERO( rval, err ); 01523 /* unpack all vertices */ 01524 for( i = 0; i < numVtxPerProcRecv[k]; i++ ) 01525 { 01526 local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProc[k] ); 01527 if( local_id ) 01528 { 01529 MBMesquite::Vector3D coordinates; 01530 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y, 01531 packed_vertices_import[k][i].z ); 01532 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err ); 01533 MSQ_ERRZERO( err ); 01534 assert( part_smoothed_flag[local_id] == 0 ); 01535 part_smoothed_flag[local_id] = 1; 01536 } 01537 else 01538 { 01539 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration, 01540 packed_vertices_import[k][i].glob_id, neighbourProc[k] ); 01541 } 01542 } 01543 num_neighbourProcRecv--; 01544 } 01545 01546 /* all receives have completed. it is save to release the memory */ 01547 // free(vertex_pack_import); 01548 01549 /* wait until the sends have completed */ 01550 01551 if( num_neighbourProcSend ) 01552 { 01553 rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( status ) ); 01554 CHECK_MPI_RZERO( rval, err ); 01555 } 01556 01557 /* all sends have completed. it is save to release the memory */ 01558 // free(vertex_pack_export); 01559 01560 return numVtxImport; 01561 } 01562 01563 int ParallelHelperImpl::comm_smoothed_vtx_tnb_no_all( MsqError& err ) 01564 { 01565 int i, j, k, rval; 01566 01567 // printf("[%d]i%d truly non blocking avoid reduce all\n", rank, iteration); fflush(NULL); 01568 01569 /* compute how many vertices we send to each processor */ 01570 01571 std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 ); 01572 std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 ); 01573 for( i = 0; i < num_exportVtx; i++ ) 01574 { 01575 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01576 { 01577 if( exportProc[i] == neighbourProc[j] ) 01578 { 01579 /* increment count */ 01580 numVtxPerProcSend[j]++; 01581 /* end loop */ 01582 break; 01583 } 01584 } 01585 /* did loop end without finding the processor */ 01586 if( j == (long)neighbourProc.size() ) 01587 { 01588 printf( "[%d]i%d WARNING: did not find exportProc[%d] = %d in list of %lu processors.\n", rank, iteration, 01589 i, exportProc[i], (unsigned long)neighbourProc.size() ); 01590 } 01591 } 01592 01593 /* tell each processor how many vertices they can expect from us */ 01594 /* also ask each processor how many vertices we can expect from them */ 01595 01596 int num_neighbourProcSend = 0; 01597 int num_neighbourProcRecv = 0; 01598 std::vector< MPI_Request > requests_send( neighbourProc.size() ); 01599 std::vector< MPI_Request > requests_recv( neighbourProc.size() ); 01600 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01601 { 01602 if( neighbourProcSendRemain[j] ) 01603 { 01604 /* send the vertex count to this processor */ 01605 if( 0 ) 01606 { 01607 printf( "[%d]i%d Announce send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j], 01608 neighbourProc[j] ); 01609 fflush( NULL ); 01610 } 01611 rval = MPI_Isend( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 01612 (MPI_Comm)communicator, &( requests_send[j] ) ); 01613 CHECK_MPI_RZERO( rval, err ); 01614 num_neighbourProcSend++; 01615 } 01616 else 01617 { 01618 requests_send[j] = MPI_REQUEST_NULL; 01619 } 01620 if( neighbourProcRecvRemain[j] ) 01621 { 01622 /* recv the vertex count for this processor */ 01623 if( 0 ) 01624 { 01625 printf( "[%d]i%d Listen recv xx vertices from proc %d\n", rank, iteration, neighbourProc[j] ); 01626 fflush( NULL ); 01627 } 01628 rval = MPI_Irecv( &( numVtxPerProcRecv[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 01629 (MPI_Comm)communicator, &( requests_recv[j] ) ); 01630 CHECK_MPI_RZERO( rval, err ); 01631 num_neighbourProcRecv++; 01632 } 01633 else 01634 { 01635 requests_recv[j] = MPI_REQUEST_NULL; 01636 } 01637 } 01638 01639 /* set up memory for the outgoing vertex data blocks */ 01640 01641 std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */ 01642 std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() ); 01643 if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export ); 01644 for( i = 1; i < (long)neighbourProc.size(); i++ ) 01645 { 01646 packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1]; 01647 } 01648 01649 /* place vertex data going to the same processor into consecutive memory space */ 01650 01651 std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 ); 01652 for( i = 0; i < num_exportVtx; i++ ) 01653 { 01654 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01655 { 01656 if( exportProc[i] == neighbourProc[j] ) 01657 { 01658 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j]; 01659 numVtxPerProcSendPACKED[j]++; 01660 MBMesquite::MsqVertex coordinates; 01661 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err ); 01662 MSQ_ERRZERO( err ); 01663 packing_vertex->x = coordinates[0]; 01664 packing_vertex->y = coordinates[1]; 01665 packing_vertex->z = coordinates[2]; 01666 packing_vertex->glob_id = exportVtxGIDs[i]; 01667 if( 0 ) 01668 printf( "[%d]i%d vertex %lu packed %g %g %g\n", rank, iteration, (unsigned long)exportVtxGIDs[i], 01669 packing_vertex->x, packing_vertex->y, packing_vertex->z ); 01670 } 01671 } 01672 } 01673 01674 /* wait until we have heard from all processors how many vertices we will receive from them */ 01675 01676 std::vector< MPI_Status > status( neighbourProc.size() ); 01677 01678 if( num_neighbourProcRecv ) 01679 { 01680 rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_recv ), ARRPTR( status ) ); 01681 CHECK_MPI_RZERO( rval, err ); 01682 } 01683 01684 /* how many vertices will we receive */ 01685 01686 int numVtxImport = 0; 01687 for( i = 0; i < (long)neighbourProc.size(); i++ ) 01688 { 01689 numVtxImport += numVtxPerProcRecv[i]; 01690 neighbourProcRecvRemain[i] -= numVtxPerProcRecv[i]; 01691 neighbourProcSendRemain[i] -= numVtxPerProcSend[i]; 01692 } 01693 01694 /* set up memory for the incoming vertex data blocks */ 01695 01696 std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */ 01697 std::vector< VertexPack* > packed_vertices_import( neighbourProc.size() ); 01698 if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import ); 01699 for( i = 1; i < (long)neighbourProc.size(); i++ ) 01700 { 01701 packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecv[i - 1]; 01702 } 01703 01704 /* post receives for all processors that have something for us */ 01705 /* post sends for all processors that we have something for */ 01706 01707 num_neighbourProcRecv = 0; 01708 01709 for( i = 0; i < (long)neighbourProc.size(); i++ ) 01710 { 01711 if( 0 ) 01712 { 01713 printf( "[%d]i%d Will recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i], 01714 neighbourProc[i] ); 01715 fflush( NULL ); 01716 } 01717 if( numVtxPerProcRecv[i] ) 01718 { 01719 rval = MPI_Irecv( packed_vertices_import[i], 4 * numVtxPerProcRecv[i], MPI_DOUBLE, neighbourProc[i], 01720 VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_recv[i] ) ); 01721 CHECK_MPI_RZERO( rval, err ); 01722 num_neighbourProcRecv++; 01723 } 01724 else 01725 { 01726 requests_recv[i] = MPI_REQUEST_NULL; 01727 } 01728 if( 0 ) 01729 { 01730 printf( "[%d]i%d Will send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[i], 01731 neighbourProc[i] ); 01732 fflush( NULL ); 01733 } 01734 if( numVtxPerProcSend[i] ) 01735 { 01736 rval = MPI_Isend( packed_vertices_export[i], 4 * numVtxPerProcSend[i], MPI_DOUBLE, neighbourProc[i], 01737 VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[i] ) ); 01738 CHECK_MPI_RZERO( rval, err ); 01739 } 01740 else 01741 { 01742 requests_send[i] = MPI_REQUEST_NULL; 01743 } 01744 } 01745 01746 /* wait for some receive to arrive */ 01747 01748 int local_id; 01749 while( num_neighbourProcRecv ) 01750 { 01751 rval = MPI_Waitany( neighbourProc.size(), ARRPTR( requests_recv ), &k, ARRPTR( status ) ); 01752 CHECK_MPI_RZERO( rval, err ); 01753 /* unpack all vertices */ 01754 for( i = 0; i < numVtxPerProcRecv[k]; i++ ) 01755 { 01756 local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProc[k] ); 01757 if( local_id ) 01758 { 01759 MBMesquite::Vector3D coordinates; 01760 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y, 01761 packed_vertices_import[k][i].z ); 01762 if( 0 ) 01763 printf( "[%d]i%d vertex %d becomes %g %g %g\n", rank, iteration, local_id, 01764 packed_vertices_import[k][i].x, packed_vertices_import[k][i].y, 01765 packed_vertices_import[k][i].z ); 01766 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err ); 01767 MSQ_ERRZERO( err ); 01768 assert( part_smoothed_flag[local_id] == 0 ); 01769 part_smoothed_flag[local_id] = 1; 01770 } 01771 else 01772 { 01773 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration, 01774 packed_vertices_import[k][i].glob_id, neighbourProc[k] ); 01775 } 01776 } 01777 num_neighbourProcRecv--; 01778 } 01779 01780 /* all receives have completed. it is save to release the memory */ 01781 // free(vertex_pack_import); 01782 01783 /* wait until the sends have completed */ 01784 01785 if( num_neighbourProcSend ) 01786 { 01787 rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( status ) ); 01788 CHECK_MPI_RZERO( rval, err ); 01789 } 01790 01791 /* all sends have completed. it is save to release the memory */ 01792 // free(vertex_pack_export); 01793 01794 return numVtxImport; 01795 } 01796 01797 int ParallelHelperImpl::comm_smoothed_vtx_nb( MsqError& err ) 01798 { 01799 int i, j, k, rval; 01800 01801 // printf("[%d] %d %d non blocking\n",rank, iteration, pass);fflush(NULL); 01802 01803 /* how many vertices will we receive */ 01804 01805 std::vector< int > numVtxPerProcSend( neighbourProc.size() ); 01806 for( i = 0; i < (long)neighbourProc.size(); i++ ) 01807 { 01808 numVtxPerProcSend[i] = 0; 01809 } 01810 for( i = 0; i < num_exportVtx; i++ ) 01811 { 01812 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01813 { 01814 if( exportProc[i] == neighbourProc[j] ) 01815 { 01816 /* increment count */ 01817 numVtxPerProcSend[j]++; 01818 /* end loop */ 01819 break; 01820 } 01821 } 01822 /* assert loop did not end without finding processor */ 01823 assert( j != (long)neighbourProc.size() ); 01824 } 01825 01826 /* tell each processor how many vertices to expect */ 01827 01828 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01829 { 01830 rval = MPI_Send( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 01831 (MPI_Comm)communicator ); 01832 CHECK_MPI_RZERO( rval, err ); 01833 // printf("[%d]i%d Announcing %d vertices to proc 01834 // %d\n",rank,iteration,numVtxPerProcSend[j],neighbourProc[j]); fflush(NULL); 01835 } 01836 01837 /* place vertex data going to the same processor into consecutive memory space */ 01838 std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */ 01839 std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() ); 01840 if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export ); 01841 for( i = 1; i < (long)neighbourProc.size(); i++ ) 01842 { 01843 packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1]; 01844 } 01845 01846 std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 ); 01847 for( i = 0; i < num_exportVtx; i++ ) 01848 { 01849 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01850 { 01851 if( exportProc[i] == neighbourProc[j] ) 01852 { 01853 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j]; 01854 numVtxPerProcSendPACKED[j]++; 01855 MBMesquite::MsqVertex coordinates; 01856 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err ); 01857 MSQ_ERRZERO( err ); 01858 packing_vertex->x = coordinates[0]; 01859 packing_vertex->y = coordinates[1]; 01860 packing_vertex->z = coordinates[2]; 01861 packing_vertex->glob_id = exportVtxGIDs[i]; 01862 } 01863 } 01864 } 01865 // delete [] numVtxPerProcSendPACKED; 01866 01867 /* now ask each processor how many vertices to expect */ 01868 01869 int num; 01870 int proc; 01871 int numVtxImport = 0; 01872 int num_neighbourProcRecv = 0; 01873 std::vector< int > numVtxPerProcRecv( neighbourProc.size() ); 01874 MPI_Status status; 01875 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01876 { 01877 numVtxPerProcRecv[j] = 0; 01878 } 01879 01880 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01881 { 01882 /* get the vertex count for some processor */ 01883 rval = MPI_Recv( &num, /* message buffer */ 01884 1, /* one data item */ 01885 MPI_INT, /* of type int */ 01886 MPI_ANY_SOURCE, /* receive from any sender */ 01887 VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs from this iteration */ 01888 (MPI_Comm)communicator, /* default communicator */ 01889 &status ); /* info about the received message */ 01890 CHECK_MPI_RZERO( rval, err ); 01891 proc = status.MPI_SOURCE; 01892 /* will we import vertices from this processor */ 01893 01894 // printf("[%d]i%d Heard we will receive %d vertices from proc 01895 // %d\n",rank,iteration,num,proc); fflush(NULL); 01896 01897 if( num ) 01898 { 01899 /* increase number of processors we will receive from */ 01900 num_neighbourProcRecv++; 01901 /* add number of vertices we will receive to the import total */ 01902 numVtxImport += num; 01903 } 01904 /* find this processor in the list */ 01905 for( i = 0; i < (long)neighbourProc.size(); i++ ) 01906 { 01907 if( neighbourProc[i] == proc ) 01908 { 01909 numVtxPerProcRecv[i] = num; 01910 break; 01911 } 01912 } 01913 /* assert loop did not end without finding processor */ 01914 assert( i != (long)neighbourProc.size() ); 01915 if( 0 ) printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, num, proc ); 01916 fflush( NULL ); 01917 } 01918 01919 /* create list of processors we receive from */ 01920 std::vector< int > neighbourProcRecv( num_neighbourProcRecv ); 01921 std::vector< int > numVtxPerProcRecvRecv( num_neighbourProcRecv ); 01922 for( i = 0, k = 0; i < (long)neighbourProc.size(); i++ ) 01923 { 01924 if( 0 ) 01925 printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i], 01926 neighbourProc[i] ); 01927 fflush( NULL ); 01928 if( numVtxPerProcRecv[i] ) 01929 { 01930 neighbourProcRecv[k] = neighbourProc[i]; 01931 numVtxPerProcRecvRecv[k] = numVtxPerProcRecv[i]; 01932 k++; 01933 } 01934 } 01935 01936 /* set up memory for the incoming vertex data blocks */ 01937 std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */ 01938 std::vector< VertexPack* > packed_vertices_import( num_neighbourProcRecv ); 01939 if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import ); 01940 for( i = 1; i < num_neighbourProcRecv; i++ ) 01941 { 01942 packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecvRecv[i - 1]; 01943 } 01944 01945 /* receive from all processors that have something for us */ 01946 std::vector< MPI_Request > request( num_neighbourProcRecv ); 01947 for( j = 0; j < num_neighbourProcRecv; j++ ) 01948 { 01949 rval = MPI_Irecv( packed_vertices_import[j], 4 * numVtxPerProcRecvRecv[j], MPI_DOUBLE, neighbourProcRecv[j], 01950 VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( request[j] ) ); 01951 CHECK_MPI_RZERO( rval, err ); 01952 if( false ) 01953 { 01954 printf( "[%d]i%d Scheduling receipt of %d vertices to proc %d\n", rank, iteration, numVtxPerProcRecvRecv[j], 01955 neighbourProcRecv[j] ); 01956 fflush( NULL ); 01957 } 01958 } 01959 01960 /* now send the data blocks */ 01961 01962 std::vector< MPI_Request > requests_send( neighbourProc.size() ); 01963 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01964 { 01965 if( numVtxPerProcSend[j] ) 01966 { 01967 rval = MPI_Isend( packed_vertices_export[j], 4 * numVtxPerProcSend[j], MPI_DOUBLE, neighbourProc[j], 01968 VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[j] ) ); 01969 CHECK_MPI_RZERO( rval, err ); 01970 if( 0 ) 01971 { 01972 printf( "[%d]i%d Scheduling send of %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j], 01973 neighbourProc[j] ); 01974 fflush( NULL ); 01975 } 01976 } 01977 else 01978 { 01979 requests_send[j] = MPI_REQUEST_NULL; 01980 } 01981 } 01982 01983 /* process messages as they arrive */ 01984 01985 int local_id; 01986 for( j = 0; j < num_neighbourProcRecv; j++ ) 01987 { 01988 rval = MPI_Waitany( num_neighbourProcRecv, ARRPTR( request ), &k, &status ); 01989 CHECK_MPI_RZERO( rval, err ); 01990 01991 /* unpack messages */ 01992 proc = status.MPI_SOURCE; 01993 int count; 01994 MPI_Get_count( &status, MPI_INT, &count ); 01995 if( 0 ) 01996 printf( "[%d]i%d Received %d (%d) vertices from proc %d (%d)\n", rank, iteration, numVtxPerProcRecvRecv[k], 01997 count, neighbourProcRecv[k], proc ); 01998 fflush( NULL ); 01999 for( i = 0; i < numVtxPerProcRecvRecv[k]; i++ ) 02000 { 02001 local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] ); 02002 if( local_id ) 02003 { 02004 MBMesquite::Vector3D coordinates; 02005 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y, 02006 packed_vertices_import[k][i].z ); 02007 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err ); 02008 MSQ_ERRZERO( err ); 02009 assert( part_smoothed_flag[local_id] == 0 ); 02010 part_smoothed_flag[local_id] = 1; 02011 if( 0 ) 02012 printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration, 02013 (int)( packed_vertices_import[k][i].glob_id ), packed_vertices_import[k][i].x, 02014 packed_vertices_import[k][i].y, packed_vertices_import[k][i].z ); 02015 } 02016 else 02017 { 02018 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration, 02019 packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] ); 02020 } 02021 } 02022 } 02023 02024 /* all receives have completed. it is save to release the memory */ 02025 // free(vertex_pack_import); 02026 /* wait until the sends have completed */ 02027 std::vector< MPI_Status > stati( neighbourProc.size() ); 02028 rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( stati ) ); 02029 CHECK_MPI_RZERO( rval, err ); 02030 /* all sends have completed. it is save to release the memory */ 02031 // free(vertex_pack_export); 02032 02033 return numVtxImport; 02034 } 02035 02036 int ParallelHelperImpl::comm_smoothed_vtx_nb_no_all( MsqError& err ) 02037 { 02038 int i, j, k, rval; 02039 02040 // printf("[%d] %d %d non blocking avoid reduce all\n",rank, iteration, pass);fflush(NULL); 02041 02042 /* how many vertices will we receive */ 02043 02044 std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 ); 02045 for( i = 0; i < num_exportVtx; i++ ) 02046 { 02047 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02048 { 02049 if( exportProc[i] == neighbourProc[j] ) 02050 { 02051 /* increment count */ 02052 numVtxPerProcSend[j]++; 02053 /* end loop */ 02054 break; 02055 } 02056 } 02057 /* assert loop did not end without finding processor */ 02058 assert( j != (long)neighbourProc.size() ); 02059 } 02060 02061 /* tell each processor how many vertices to expect */ 02062 02063 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02064 { 02065 if( neighbourProcSendRemain[j] ) 02066 { 02067 assert( neighbourProcSendRemain[j] >= numVtxPerProcSend[j] ); 02068 neighbourProcSendRemain[j] -= numVtxPerProcSend[j]; 02069 rval = MPI_Send( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 02070 (MPI_Comm)communicator ); 02071 CHECK_MPI_RZERO( rval, err ); 02072 // printf("[%d]i%d Announcing %d vertices to proc 02073 // %d\n",rank,iteration,numVtxPerProcSend[j],neighbourProc[j]); fflush(NULL); 02074 } 02075 else 02076 { 02077 assert( numVtxPerProcSend[j] == 0 ); 02078 } 02079 } 02080 02081 /* place vertex data going to the same processor into consecutive memory space */ 02082 std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */ 02083 std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() ); 02084 if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export ); 02085 for( i = 1; i < (long)neighbourProc.size(); i++ ) 02086 { 02087 packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1]; 02088 } 02089 02090 std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 ); 02091 for( i = 0; i < num_exportVtx; i++ ) 02092 { 02093 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02094 { 02095 if( exportProc[i] == neighbourProc[j] ) 02096 { 02097 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j]; 02098 numVtxPerProcSendPACKED[j]++; 02099 MBMesquite::MsqVertex coordinates; 02100 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err ); 02101 MSQ_ERRZERO( err ); 02102 packing_vertex->x = coordinates[0]; 02103 packing_vertex->y = coordinates[1]; 02104 packing_vertex->z = coordinates[2]; 02105 packing_vertex->glob_id = exportVtxGIDs[i]; 02106 } 02107 } 02108 } 02109 // delete [] numVtxPerProcSendPACKED; 02110 02111 /* now ask each processor how many vertices to expect */ 02112 02113 int num; 02114 int proc; 02115 int numVtxImport = 0; 02116 int num_neighbourProcRecv = 0; 02117 std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 ); 02118 MPI_Status status; 02119 02120 int num_neighbourProcRecvRemain = 0; 02121 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02122 { 02123 if( neighbourProcRecvRemain[j] ) 02124 { 02125 num_neighbourProcRecvRemain++; 02126 } 02127 } 02128 for( j = 0; j < num_neighbourProcRecvRemain; j++ ) 02129 { 02130 /* get the vertex count for some processor */ 02131 rval = MPI_Recv( &num, /* message buffer */ 02132 1, /* one data item */ 02133 MPI_INT, /* of type int */ 02134 MPI_ANY_SOURCE, /* receive from any sender */ 02135 VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs from this iteration */ 02136 (MPI_Comm)communicator, /* default communicator */ 02137 &status ); /* info about the received message */ 02138 CHECK_MPI_RZERO( rval, err ); 02139 proc = status.MPI_SOURCE; 02140 /* will we import vertices from this processor */ 02141 02142 // printf("[%d]i%d Heard we will receive %d vertices from proc 02143 // %d\n",rank,iteration,num,proc); fflush(NULL); 02144 02145 if( num ) 02146 { 02147 /* increase number of processors we will receive from */ 02148 num_neighbourProcRecv++; 02149 /* add number of vertices we will receive to the import total */ 02150 numVtxImport += num; 02151 } 02152 /* find this processor in the list */ 02153 for( i = 0; i < (long)neighbourProc.size(); i++ ) 02154 { 02155 if( neighbourProc[i] == proc ) 02156 { 02157 numVtxPerProcRecv[i] = num; 02158 assert( neighbourProcRecvRemain[i] >= num ); 02159 neighbourProcRecvRemain[i] -= num; 02160 break; 02161 } 02162 } 02163 /* assert loop did not end without finding processor */ 02164 assert( i != (long)neighbourProc.size() ); 02165 if( false ) printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, num, proc ); 02166 fflush( NULL ); 02167 } 02168 02169 /* create list of processors we receive from */ 02170 std::vector< int > neighbourProcRecv( num_neighbourProcRecv ); 02171 std::vector< int > numVtxPerProcRecvRecv( num_neighbourProcRecv ); 02172 for( i = 0, k = 0; i < (long)neighbourProc.size(); i++ ) 02173 { 02174 if( 0 ) 02175 printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i], 02176 neighbourProc[i] ); 02177 fflush( NULL ); 02178 if( numVtxPerProcRecv[i] ) 02179 { 02180 neighbourProcRecv[k] = neighbourProc[i]; 02181 numVtxPerProcRecvRecv[k] = numVtxPerProcRecv[i]; 02182 k++; 02183 } 02184 } 02185 02186 /* set up memory for the incoming vertex data blocks */ 02187 std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */ 02188 std::vector< VertexPack* > packed_vertices_import( num_neighbourProcRecv ); 02189 if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import ); 02190 for( i = 1; i < num_neighbourProcRecv; i++ ) 02191 { 02192 packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecvRecv[i - 1]; 02193 } 02194 02195 /* receive from all processors that have something for us */ 02196 std::vector< MPI_Request > request( num_neighbourProcRecv ); 02197 for( j = 0; j < num_neighbourProcRecv; j++ ) 02198 { 02199 rval = MPI_Irecv( packed_vertices_import[j], 4 * numVtxPerProcRecvRecv[j], MPI_DOUBLE, neighbourProcRecv[j], 02200 VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( request[j] ) ); 02201 CHECK_MPI_RZERO( rval, err ); 02202 if( false ) 02203 { 02204 printf( "[%d]i%d Scheduling receipt of %d vertices to proc %d\n", rank, iteration, numVtxPerProcRecvRecv[j], 02205 neighbourProcRecv[j] ); 02206 fflush( NULL ); 02207 } 02208 } 02209 02210 /* now send the data blocks */ 02211 02212 std::vector< MPI_Request > requests_send( neighbourProc.size() ); 02213 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02214 { 02215 if( numVtxPerProcSend[j] ) 02216 { 02217 rval = MPI_Isend( packed_vertices_export[j], 4 * numVtxPerProcSend[j], MPI_DOUBLE, neighbourProc[j], 02218 VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[j] ) ); 02219 CHECK_MPI_RZERO( rval, err ); 02220 if( 0 ) 02221 { 02222 printf( "[%d]i%d Scheduling send of %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j], 02223 neighbourProc[j] ); 02224 fflush( NULL ); 02225 } 02226 } 02227 else 02228 { 02229 requests_send[j] = MPI_REQUEST_NULL; 02230 } 02231 } 02232 02233 /* process messages as they arrive */ 02234 02235 int local_id; 02236 for( j = 0; j < num_neighbourProcRecv; j++ ) 02237 { 02238 rval = MPI_Waitany( num_neighbourProcRecv, ARRPTR( request ), &k, &status ); 02239 CHECK_MPI_RZERO( rval, err ); 02240 02241 /* unpack messages */ 02242 proc = status.MPI_SOURCE; 02243 int count; 02244 MPI_Get_count( &status, MPI_INT, &count ); 02245 if( 0 ) 02246 printf( "[%d]i%d Received %d (%d) vertices from proc %d (%d)\n", rank, iteration, numVtxPerProcRecvRecv[k], 02247 count, neighbourProcRecv[k], proc ); 02248 fflush( NULL ); 02249 for( i = 0; i < numVtxPerProcRecvRecv[k]; i++ ) 02250 { 02251 local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] ); 02252 if( local_id ) 02253 { 02254 MBMesquite::Vector3D coordinates; 02255 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y, 02256 packed_vertices_import[k][i].z ); 02257 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err ); 02258 MSQ_ERRZERO( err ); 02259 assert( part_smoothed_flag[local_id] == 0 ); 02260 part_smoothed_flag[local_id] = 1; 02261 if( 0 ) 02262 printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration, 02263 (int)( packed_vertices_import[k][i].glob_id ), packed_vertices_import[k][i].x, 02264 packed_vertices_import[k][i].y, packed_vertices_import[k][i].z ); 02265 } 02266 else 02267 { 02268 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration, 02269 packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] ); 02270 } 02271 } 02272 } 02273 02274 /* all receives have completed. it is save to release the memory */ 02275 // free(vertex_pack_import); 02276 /* wait until the sends have completed */ 02277 std::vector< MPI_Status > stati( neighbourProc.size() ); 02278 rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( stati ) ); 02279 CHECK_MPI_RZERO( rval, err ); 02280 /* all sends have completed. it is save to release the memory */ 02281 // free(vertex_pack_export); 02282 02283 return numVtxImport; 02284 } 02285 02286 int ParallelHelperImpl::comm_smoothed_vtx_b( MsqError& err ) 02287 { 02288 int i, j, rval; 02289 02290 // printf("[%d] %d %d blocking\n",rank, iteration, pass);fflush(NULL); 02291 02292 /* how many vertices per processor */ 02293 02294 std::vector< int > numVtxPerProc( neighbourProc.size(), 0 ); 02295 for( i = 0; i < num_exportVtx; i++ ) 02296 { 02297 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02298 { 02299 if( exportProc[i] == neighbourProc[j] ) 02300 { 02301 /* increment count */ 02302 numVtxPerProc[j]++; 02303 /* end loop */ 02304 break; 02305 } 02306 /* assert loop did not end without finding the processor */ 02307 assert( j != (long)neighbourProc.size() ); 02308 } 02309 } 02310 02311 /* place vertices going to the same processor into consecutive memory space */ 02312 02313 std::vector< VertexPack > vertex_pack( num_exportVtx + 10 ); /* add 10 to have enough memory */ 02314 std::vector< VertexPack* > packed_vertices( neighbourProc.size() ); 02315 VertexPack* packing_vertex; 02316 packed_vertices[0] = ARRPTR( vertex_pack ); 02317 for( i = 1; i < (long)neighbourProc.size(); i++ ) 02318 { 02319 packed_vertices[i] = packed_vertices[i - 1] + numVtxPerProc[i - 1]; 02320 } 02321 02322 std::vector< int > numVtxPackedPerProc( neighbourProc.size(), 0 ); 02323 02324 for( i = 0; i < num_exportVtx; i++ ) 02325 { 02326 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02327 { 02328 if( exportProc[i] == neighbourProc[j] ) 02329 { 02330 packing_vertex = packed_vertices[j] + numVtxPackedPerProc[j]; 02331 numVtxPackedPerProc[j]++; 02332 MBMesquite::MsqVertex coordinates; 02333 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err ); 02334 MSQ_ERRZERO( err ); 02335 packing_vertex->x = coordinates[0]; 02336 packing_vertex->y = coordinates[1]; 02337 packing_vertex->z = coordinates[2]; 02338 packing_vertex->glob_id = exportVtxGIDs[i]; 02339 } 02340 } 02341 } 02342 02343 // delete [] numVtxPackedPerProc; 02344 02345 /* send each block so the corresponding processor preceeded by the number of vertices */ 02346 02347 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02348 { 02349 02350 // printf("[%d]i%dp%d Announcing %d vertices to proc 02351 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL); 02352 02353 rval = MPI_Send( &( numVtxPerProc[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 02354 (MPI_Comm)communicator ); 02355 CHECK_MPI_RZERO( rval, err ); 02356 // printf("[%d]i%dp%d Sending %d vertices to proc 02357 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL); 02358 02359 /* is there any vertex data to be sent */ 02360 02361 if( numVtxPerProc[j] ) 02362 { 02363 rval = MPI_Send( packed_vertices[j], 4 * numVtxPerProc[j], MPI_DOUBLE, neighbourProc[j], 02364 VERTEX_BLOCK + iteration, (MPI_Comm)communicator ); 02365 CHECK_MPI_RZERO( rval, err ); 02366 // printf("[%d]i%dp%d Sent %d vertices to proc 02367 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL); 02368 } 02369 } 02370 02371 int num; 02372 int proc; 02373 // int tag; 02374 int count; 02375 int numVtxImport = 0; 02376 MPI_Status status; 02377 int local_id; 02378 02379 // printf("[%d]i%dp%d Waiting to receive vertices from %d processors ... 02380 // \n",rank,iteration,pass,neighbourProc.size()); fflush(NULL); 02381 02382 /* receiving blocks from other processors */ 02383 02384 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02385 { 02386 rval = MPI_Recv( &num, /* message buffer */ 02387 1, /* one data item */ 02388 MPI_INT, /* of type int */ 02389 MPI_ANY_SOURCE, /* receive from any sender */ 02390 VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs */ 02391 (MPI_Comm)communicator, /* default communicator */ 02392 &status ); /* info about the received message */ 02393 CHECK_MPI_RZERO( rval, err ); 02394 proc = status.MPI_SOURCE; 02395 // tag = status.MPI_TAG; 02396 MPI_Get_count( &status, MPI_INT, &count ); 02397 02398 // printf("[%d]i%dp%d Receiving %d vertices from proc 02399 // %d/%d/%d\n",rank,iteration,pass,num,proc,tag,count); fflush(NULL); 02400 02401 /* is there any vertex data to be received */ 02402 02403 if( num ) 02404 { 02405 02406 numVtxImport += num; 02407 02408 /* do we have enough space allocated */ 02409 02410 if( num_exportVtx + 10 < num ) 02411 { 02412 // if (vertex_pack) free(vertex_pack); 02413 num_exportVtx = num; 02414 vertex_pack.resize( num_exportVtx + 10 ); 02415 // vertex_pack = (VertexPack*)malloc(sizeof(VertexPack)*(num_exportVtx+10)); 02416 } 02417 02418 rval = MPI_Recv( ARRPTR( vertex_pack ), /* message buffer */ 02419 4 * num, /* num data's item with 4 doubles each */ 02420 MPI_DOUBLE, /* of type double */ 02421 proc, /* receive from this procesor only */ 02422 VERTEX_BLOCK + iteration, /* receive only VERTEX BLOCKs */ 02423 (MPI_Comm)communicator, /* default communicator */ 02424 &status ); /* info about the received message */ 02425 CHECK_MPI_RZERO( rval, err ); 02426 02427 proc = status.MPI_SOURCE; 02428 // tag = status.MPI_TAG; 02429 MPI_Get_count( &status, MPI_DOUBLE, &count ); 02430 02431 if( count != 4 * num ) 02432 printf( "[%d]i%d WARNING: expected %d vertices = %d bytes from proc %d but only " 02433 "got %d bytes\n", 02434 rank, iteration, num, num * 4, proc, count ); 02435 fflush( NULL ); 02436 02437 // printf("[%d]i%d Received %d vertices from proc 02438 // %d/%d/%d\n",rank,iteration,num,proc,tag,count); fflush(NULL); 02439 02440 /* update the received vertices in our boundary mesh */ 02441 for( i = 0; i < num; i++ ) 02442 { 02443 /* printf("[%d]i%d updating vertex %d with global_id 02444 * %d\n",rank,iteration,i,(int)(vertex_pack[i].glob_id)); fflush(NULL); */ 02445 local_id = vertex_map_find( vid_map, (int)( vertex_pack[i].glob_id ), proc ); 02446 if( local_id ) 02447 { 02448 MBMesquite::Vector3D coordinates; 02449 coordinates.set( vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z ); 02450 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err ); 02451 MSQ_ERRZERO( err ); 02452 assert( part_smoothed_flag[local_id] == 0 ); 02453 part_smoothed_flag[local_id] = 1; 02454 if( 0 ) 02455 printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration, 02456 (int)( vertex_pack[i].glob_id ), vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z ); 02457 } 02458 else 02459 { 02460 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration, 02461 vertex_pack[i].glob_id, proc ); 02462 } 02463 } 02464 } 02465 } 02466 // if (vertex_pack) free(vertex_pack); 02467 02468 return numVtxImport; 02469 } 02470 02471 int ParallelHelperImpl::comm_smoothed_vtx_b_no_all( MsqError& err ) 02472 { 02473 int i, j, rval; 02474 02475 // printf("[%d] %d %d blocking avoid reduce all\n",rank, iteration, pass);fflush(NULL); 02476 02477 /* how many vertices per processor */ 02478 02479 std::vector< int > numVtxPerProc( neighbourProc.size(), 0 ); 02480 for( i = 0; i < num_exportVtx; i++ ) 02481 { 02482 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02483 { 02484 if( exportProc[i] == neighbourProc[j] ) 02485 { 02486 /* increment count */ 02487 numVtxPerProc[j]++; 02488 /* end loop */ 02489 break; 02490 } 02491 /* assert loop did not end without finding the processor */ 02492 assert( j != (long)neighbourProc.size() ); 02493 } 02494 } 02495 02496 /* place vertices going to the same processor into consecutive memory space */ 02497 02498 std::vector< VertexPack > vertex_pack( num_exportVtx + 10 ); /* add 10 to have enough memory */ 02499 std::vector< VertexPack* > packed_vertices( neighbourProc.size() ); 02500 VertexPack* packing_vertex; 02501 packed_vertices[0] = ARRPTR( vertex_pack ); 02502 for( i = 1; i < (long)neighbourProc.size(); i++ ) 02503 { 02504 packed_vertices[i] = packed_vertices[i - 1] + numVtxPerProc[i - 1]; 02505 } 02506 02507 std::vector< int > numVtxPackedPerProc( neighbourProc.size(), 0 ); 02508 02509 for( i = 0; i < num_exportVtx; i++ ) 02510 { 02511 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02512 { 02513 if( exportProc[i] == neighbourProc[j] ) 02514 { 02515 packing_vertex = packed_vertices[j] + numVtxPackedPerProc[j]; 02516 numVtxPackedPerProc[j]++; 02517 MBMesquite::MsqVertex coordinates; 02518 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err ); 02519 MSQ_ERRZERO( err ); 02520 packing_vertex->x = coordinates[0]; 02521 packing_vertex->y = coordinates[1]; 02522 packing_vertex->z = coordinates[2]; 02523 packing_vertex->glob_id = exportVtxGIDs[i]; 02524 } 02525 } 02526 } 02527 02528 // delete [] numVtxPackedPerProc; 02529 02530 /* send each block so the corresponding processor preceeded by the number of vertices */ 02531 02532 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02533 { 02534 02535 if( neighbourProcSendRemain[j] ) 02536 { 02537 assert( neighbourProcSendRemain[j] >= numVtxPerProc[j] ); 02538 neighbourProcSendRemain[j] -= numVtxPerProc[j]; 02539 02540 // printf("[%d]i%dp%d Announcing %d vertices to proc 02541 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL); 02542 02543 rval = MPI_Send( &( numVtxPerProc[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 02544 (MPI_Comm)communicator ); 02545 CHECK_MPI_RZERO( rval, err ); 02546 02547 // printf("[%d]i%dp%d Sending %d vertices to proc 02548 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL); 02549 02550 /* is there any vertex data to be sent */ 02551 02552 if( numVtxPerProc[j] ) 02553 { 02554 rval = MPI_Send( packed_vertices[j], 4 * numVtxPerProc[j], MPI_DOUBLE, neighbourProc[j], 02555 VERTEX_BLOCK + iteration, (MPI_Comm)communicator ); 02556 CHECK_MPI_RZERO( rval, err ); 02557 02558 // printf("[%d]i%dp%d Sent %d vertices to proc 02559 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL); 02560 } 02561 } 02562 else 02563 { 02564 // printf("[%d]i%dp%d no longer sending to %d\n",rank,iteration,neighbourProc[j]); 02565 // fflush(NULL); 02566 } 02567 } 02568 02569 int num; 02570 int proc; 02571 // int tag; 02572 int count; 02573 int numVtxImport = 0; 02574 MPI_Status status; 02575 int local_id; 02576 02577 // printf("[%d]i%dp%d Waiting to receive vertices from %d processors ... 02578 // \n",rank,iteration,pass,neighbourProc.size()); fflush(NULL); 02579 02580 /* receiving blocks in any order from other processors */ 02581 02582 int num_neighbourProcRecvRemain = 0; 02583 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02584 { 02585 if( neighbourProcRecvRemain[j] ) 02586 { 02587 num_neighbourProcRecvRemain++; 02588 } 02589 } 02590 for( j = 0; j < num_neighbourProcRecvRemain; j++ ) 02591 { 02592 rval = MPI_Recv( &num, /* message buffer */ 02593 1, /* one data item */ 02594 MPI_INT, /* of type int */ 02595 MPI_ANY_SOURCE, /* receive from any sender */ 02596 VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs */ 02597 (MPI_Comm)communicator, /* default communicator */ 02598 &status ); /* info about the received message */ 02599 CHECK_MPI_RZERO( rval, err ); 02600 proc = status.MPI_SOURCE; 02601 // tag = status.MPI_TAG; 02602 MPI_Get_count( &status, MPI_INT, &count ); 02603 02604 // printf("[%d]i%dp%d Receiving %d vertices from proc 02605 // %d/%d/%d\n",rank,iteration,pass,num,proc,tag,count); fflush(NULL); 02606 02607 /* is there any vertex data to be received */ 02608 02609 if( num ) 02610 { 02611 02612 numVtxImport += num; 02613 02614 /* do we have enough space allocated */ 02615 02616 if( num_exportVtx + 10 < num ) 02617 { 02618 // if (vertex_pack) free(vertex_pack); 02619 num_exportVtx = num; 02620 // vertex_pack = (VertexPack*)malloc(sizeof(VertexPack)*(num_exportVtx+10)); 02621 vertex_pack.resize( num_exportVtx + 10 ); 02622 } 02623 02624 rval = MPI_Recv( ARRPTR( vertex_pack ), /* message buffer */ 02625 4 * num, /* num data's item with 4 doubles each */ 02626 MPI_DOUBLE, /* of type double */ 02627 proc, /* receive from this procesor only */ 02628 VERTEX_BLOCK + iteration, /* receive only VERTEX BLOCKs */ 02629 (MPI_Comm)communicator, /* default communicator */ 02630 &status ); /* info about the received message */ 02631 CHECK_MPI_RZERO( rval, err ); 02632 02633 proc = status.MPI_SOURCE; 02634 // tag = status.MPI_TAG; 02635 MPI_Get_count( &status, MPI_DOUBLE, &count ); 02636 02637 if( count != 4 * num ) 02638 printf( "[%d]i%d WARNING: expected %d vertices = %d bytes from proc %d but only " 02639 "got %d bytes\n", 02640 rank, iteration, num, num * 4, proc, count ); 02641 fflush( NULL ); 02642 02643 // printf("[%d]i%d Received %d vertices from proc 02644 // %d/%d/%d\n",rank,iteration,num,proc,tag,count); fflush(NULL); 02645 02646 /* update the received vertices in our boundary mesh */ 02647 for( i = 0; i < num; i++ ) 02648 { 02649 /* printf("[%d]i%d updating vertex %d with global_id 02650 * %d\n",rank,iteration,i,(int)(vertex_pack[i].glob_id)); fflush(NULL); */ 02651 local_id = vertex_map_find( vid_map, (int)( vertex_pack[i].glob_id ), proc ); 02652 if( local_id ) 02653 { 02654 MBMesquite::Vector3D coordinates; 02655 coordinates.set( vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z ); 02656 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err ); 02657 MSQ_ERRZERO( err ); 02658 assert( part_smoothed_flag[local_id] == 0 ); 02659 part_smoothed_flag[local_id] = 1; 02660 if( 0 && rank == 1 ) 02661 printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration, 02662 (int)( vertex_pack[i].glob_id ), vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z ); 02663 } 02664 else 02665 { 02666 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration, 02667 vertex_pack[i].glob_id, proc ); 02668 } 02669 } 02670 } 02671 for( i = 0; i < (long)neighbourProc.size(); i++ ) 02672 { 02673 if( proc == neighbourProc[i] ) 02674 { 02675 assert( neighbourProcRecvRemain[i] >= num ); 02676 neighbourProcRecvRemain[i] -= num; 02677 break; 02678 } 02679 } 02680 } 02681 // if (vertex_pack) free(vertex_pack); 02682 02683 return numVtxImport; 02684 } 02685 02686 /*********************************************************************** 02687 COMPUTING THE INDEPENDENT SET AND EXPORT INFORMATION 02688 ***********************************************************************/ 02689 /* the only thing that can prevent a vertex from being in the independent 02690 set is that is has an neighbouring vertex on a different processor that 02691 has not been smoothed yet and that has a larger random number */ 02692 02693 void ParallelHelperImpl::compute_independent_set() 02694 { 02695 int i, j, k, l; 02696 int incident_vtx; 02697 bool done; 02698 02699 for( i = 0; i < num_vtx_partition_boundary_local; i++ ) 02700 in_independent_set[i] = false; 02701 02702 bool found_more = true; 02703 int found_iter = 0; 02704 02705 num_exportVtx = 0; 02706 02707 while( found_more ) 02708 { 02709 found_iter++; 02710 found_more = false; 02711 for( i = 0; i < num_vtx_partition_boundary_local; i++ ) 02712 { 02713 /* if this vertex could become part of the independent set */ 02714 if( part_smoothed_flag[i] == 0 && in_independent_set[i] == false ) 02715 { 02716 /* assume it's in the independent set */ 02717 done = false; 02718 /* then loop over the neighbors it has on other processors */ 02719 for( j = 0; !done && j < (long)vtx_off_proc_list[i].size(); j++ ) 02720 { 02721 incident_vtx = vtx_off_proc_list[i][j]; 02722 /* if this neighbour has not yet been smoothed and is not covered and ... */ 02723 if( part_smoothed_flag[incident_vtx] == 0 ) 02724 { 02725 /* ... has a higher rand_number than me */ 02726 if( part_rand_number[i] < part_rand_number[incident_vtx] ) 02727 { 02728 /* then I am not in the independent set */ 02729 done = true; 02730 break; 02731 } 02732 /* ... or has the same rand_number than me but a higher processor id */ 02733 else if( ( part_rand_number[i] == part_rand_number[incident_vtx] ) && 02734 ( rank < part_proc_owner[incident_vtx] ) ) 02735 { 02736 /* then I am not in the independent set */ 02737 done = true; 02738 break; 02739 } 02740 } 02741 } 02742 /* if the vertex is in the independent set, add it to the export list */ 02743 if( !done ) 02744 { 02745 found_more = true; 02746 // if (found_iter > 1) printf("[%d]i%d found another one %d in iteration 02747 //%d\n",rank,iteration,i, found_iter); 02748 in_independent_set[i] = true; 02749 /* mark vertices with lower random numbers as covered */ 02750 for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ ) 02751 { 02752 incident_vtx = vtx_off_proc_list[i][j]; 02753 /* if this neighbour has not yet been smoothed or covered mark it as covered 02754 */ 02755 if( part_smoothed_flag[incident_vtx] == 0 ) 02756 { 02757 part_smoothed_flag[incident_vtx] = 2; 02758 } 02759 } 02760 k = num_exportVtx; 02761 /* then loop over the neighbors it has on other processors */ 02762 for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ ) 02763 { 02764 incident_vtx = vtx_off_proc_list[i][j]; 02765 /* check to see if this processor already on the list */ 02766 done = false; 02767 for( l = k; l < num_exportVtx && !done; l++ ) 02768 { 02769 if( exportProc[l] == part_proc_owner[incident_vtx] ) 02770 { 02771 done = true; 02772 } 02773 } 02774 /* if it's not on the list add it */ 02775 if( !done ) 02776 { 02777 exportVtxLIDs[num_exportVtx] = i; 02778 exportVtxGIDs[num_exportVtx] = part_gid[i]; 02779 exportProc[num_exportVtx] = part_proc_owner[incident_vtx]; 02780 num_exportVtx++; 02781 } 02782 } 02783 } 02784 } 02785 } 02786 } 02787 02788 if( false ) 02789 { 02790 int in_set = 0; 02791 for( i = 0; i < num_vtx_partition_boundary_local; i++ ) 02792 if( in_independent_set[i] ) in_set++; 02793 ; 02794 printf( "[%d]i%d independent set has %d of %d vertices sent out %d times\n", rank, iteration, in_set, 02795 num_vtx_partition_boundary_local, num_exportVtx ); 02796 fflush( NULL ); 02797 } 02798 02799 /* unmark the vertices that have been marked as covered */ 02800 for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ ) 02801 if( part_smoothed_flag[i] == 2 ) part_smoothed_flag[i] = 0; 02802 } 02803 02804 int ParallelHelperImpl::get_rank() const 02805 { 02806 return rank; 02807 } 02808 02809 int ParallelHelperImpl::get_nprocs() const 02810 { 02811 return nprocs; 02812 } 02813 02814 bool ParallelHelperImpl::is_our_element( MBMesquite::Mesh::ElementHandle element_handle, MsqError& err ) const 02815 { 02816 int i; 02817 std::vector< Mesh::VertexHandle > pvertices; 02818 std::vector< size_t > junk; 02819 mesh->elements_get_attached_vertices( &element_handle, 1, pvertices, junk, err ); 02820 MSQ_ERRZERO( err ); 02821 int num_verts = pvertices.size(); 02822 std::vector< int > proc_ids( num_verts ); 02823 mesh->vertices_get_processor_id( ARRPTR( pvertices ), ARRPTR( proc_ids ), num_verts, err ); 02824 MSQ_ERRZERO( err ); 02825 int max_proc_id = proc_ids[0]; 02826 for( i = 1; i < num_verts; i++ ) 02827 if( max_proc_id < proc_ids[i] ) max_proc_id = proc_ids[i]; 02828 return ( max_proc_id == rank ); 02829 } 02830 02831 bool ParallelHelperImpl::is_our_vertex( MBMesquite::Mesh::VertexHandle vertex_handle, MsqError& err ) const 02832 { 02833 int proc_id; 02834 mesh->vertices_get_processor_id( &vertex_handle, &proc_id, 1, err ); 02835 return !MSQ_CHKERR( err ) && ( proc_id == rank ); 02836 } 02837 02838 void ParallelHelperImpl::communicate_min_max_to_all( double* minimum, double* maximum, MsqError& err ) const 02839 { 02840 double d_min[2]; 02841 double d_min_recv[2]; 02842 d_min[0] = -( *maximum ); 02843 d_min[1] = *minimum; 02844 int rval = MPI_Allreduce( d_min, d_min_recv, 2, MPI_DOUBLE, MPI_MIN, (MPI_Comm)communicator ); 02845 CHECK_MPI( rval, err ); 02846 *maximum = -d_min_recv[0]; 02847 *minimum = d_min_recv[1]; 02848 } 02849 02850 void ParallelHelperImpl::communicate_min_max_to_zero( double* minimum, double* maximum, MsqError& err ) const 02851 { 02852 double d_min[2]; 02853 double d_min_recv[2]; 02854 d_min[0] = -( *maximum ); 02855 d_min[1] = *minimum; 02856 int rval = MPI_Reduce( d_min, d_min_recv, 2, MPI_DOUBLE, MPI_MIN, 0, (MPI_Comm)communicator ); 02857 CHECK_MPI( rval, err ); 02858 if( rank == 0 ) 02859 { 02860 *maximum = -d_min_recv[0]; 02861 *minimum = d_min_recv[1]; 02862 } 02863 } 02864 02865 void ParallelHelperImpl::communicate_sums_to_zero( size_t* freeElementCount, 02866 int* invertedElementCount, 02867 size_t* elementCount, 02868 int* invertedSampleCount, 02869 size_t* sampleCount, 02870 long unsigned int* count, 02871 long unsigned int* invalid, 02872 double* sum, 02873 double* sqrSum, 02874 MsqError& err ) const 02875 { 02876 double d_sum[9]; 02877 double d_sum_recv[9]; 02878 02879 d_sum[0] = (double)( *freeElementCount ); 02880 d_sum[1] = (double)( *invertedElementCount ); 02881 d_sum[2] = (double)( *elementCount ); 02882 d_sum[3] = (double)( *invertedSampleCount ); 02883 d_sum[4] = (double)( *sampleCount ); 02884 d_sum[5] = (double)( *count ); 02885 d_sum[6] = (double)( *invalid ); 02886 d_sum[7] = *sum; 02887 d_sum[8] = *sqrSum; 02888 02889 int rval = MPI_Reduce( d_sum, d_sum_recv, 9, MPI_DOUBLE, MPI_SUM, 0, (MPI_Comm)communicator ); 02890 CHECK_MPI( rval, err ); 02891 02892 if( rank == 0 ) 02893 { 02894 *freeElementCount = (size_t)d_sum_recv[0]; 02895 *invertedElementCount = (int)d_sum_recv[1]; 02896 *elementCount = (size_t)d_sum_recv[2]; 02897 *invertedSampleCount = (int)d_sum_recv[3]; 02898 *sampleCount = (size_t)d_sum_recv[4]; 02899 *count = (long unsigned int)d_sum_recv[5]; 02900 *invalid = (long unsigned int)d_sum_recv[6]; 02901 *sum = d_sum_recv[7]; 02902 *sqrSum = d_sum_recv[8]; 02903 } 02904 } 02905 02906 void ParallelHelperImpl::communicate_power_sum_to_zero( double* pMean, MsqError& err ) const 02907 { 02908 double result; 02909 int rval = MPI_Reduce( pMean, &result, 1, MPI_DOUBLE, MPI_SUM, 0, (MPI_Comm)communicator ); 02910 CHECK_MPI( rval, err ); 02911 if( rank == 0 ) *pMean = result; 02912 } 02913 02914 void ParallelHelperImpl::communicate_histogram_to_zero( std::vector< int >& histogram, MsqError& err ) const 02915 { 02916 std::vector< int > histogram_recv( histogram.size() ); 02917 int rval = MPI_Reduce( &( histogram[0] ), &( histogram_recv[0] ), histogram.size(), MPI_INT, MPI_SUM, 0, 02918 (MPI_Comm)communicator ); 02919 CHECK_MPI( rval, err ); 02920 if( rank == 0 ) 02921 { 02922 histogram.swap( histogram_recv ); 02923 } 02924 } 02925 02926 /// if any proc has a true input @param value, return true to all 02927 void ParallelHelperImpl::communicate_any_true( bool& value, MsqError& err ) const 02928 { 02929 int byte_out = value, byte_in; 02930 int rval = MPI_Allreduce( &byte_out, &byte_in, 1, MPI_INT, MPI_MAX, (MPI_Comm)communicator ); 02931 CHECK_MPI( rval, err ); 02932 value = ( byte_in != 0 ); 02933 } 02934 02935 /// srkenno AT sandia.gov 1/19/12: bug fix: changed from MPI_MAX to MIN - this makes the name of the 02936 /// function correct, if all proc's values are true, it returns true. @see communicate_any_true 02937 /// above 02938 02939 /// return true in @param value if all procs have value=true, else if any have it false, return 02940 /// false 02941 void ParallelHelperImpl::communicate_all_true( bool& value, MsqError& err ) const 02942 { 02943 int byte_out = value, byte_in; 02944 int rval = MPI_Allreduce( &byte_out, &byte_in, 1, MPI_INT, MPI_MIN, (MPI_Comm)communicator ); 02945 CHECK_MPI( rval, err ); 02946 value = ( byte_in != 0 ); 02947 } 02948 02949 } // namespace MBMesquite