MOAB: Mesh Oriented datABase
(version 5.2.1)
|
00001 #include "ParallelHelper.hpp" 00002 00003 #include <mpi.h> 00004 00005 #include <stdio.h> 00006 #include <assert.h> 00007 #include <iostream> 00008 #include <stdlib.h> 00009 #include <algorithm> 00010 #include <time.h> 00011 00012 #include "MsqVertex.hpp" 00013 #include "MsqError.hpp" 00014 00015 #define VERTEX_HEADER 1 00016 #define VERTEX_BLOCK 1000 00017 #define GHOST_NODE_INFO 2000 00018 #define GHOST_NODE_VERTICES_WANTED 2001 00019 #define GHOST_NODE_VERTEX_GIDS 2002 00020 #define GHOST_NODE_VERTEX_UPDATES 2003 00021 00022 #define ARRPTR( x ) arrptr( x, true ) 00023 00024 namespace MBMesquite 00025 { 00026 00027 void parallel_barrier() 00028 { 00029 int is_init = 0; 00030 int err = MPI_Initialized( &is_init ); 00031 if( MPI_SUCCESS != err ) return; 00032 if( is_init ) MPI_Barrier( MPI_COMM_WORLD ); 00033 } 00034 00035 int get_parallel_rank() 00036 { 00037 int rank = 0; 00038 int is_init = 0; 00039 int err = MPI_Initialized( &is_init ); 00040 if( MPI_SUCCESS != err ) return 0; 00041 if( is_init ) MPI_Comm_rank( MPI_COMM_WORLD, &rank ); 00042 return rank; 00043 } 00044 00045 int get_parallel_size() 00046 { 00047 int nprocs = 0; 00048 int is_init = 0; 00049 int err = MPI_Initialized( &is_init ); 00050 if( MPI_SUCCESS != err ) return 0; 00051 if( is_init ) MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 00052 return nprocs; 00053 } 00054 00055 double reduce_parallel_max( double value ) 00056 { 00057 int is_init = 0; 00058 int err = MPI_Initialized( &is_init ); 00059 if( MPI_SUCCESS != err ) return value; 00060 if( !is_init ) return value; 00061 00062 double d_max[1]; 00063 double d_max_recv[1]; 00064 d_max[0] = value; 00065 int rval = MPI_Allreduce( d_max, d_max_recv, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD ); 00066 if( MPI_SUCCESS != rval ) return value; 00067 00068 return d_max_recv[0]; 00069 } 00070 00071 static const char* mpi_err_string( int error_code ) 00072 { 00073 static char buffer[128]; 00074 int len = sizeof( buffer ); 00075 int e = MPI_Error_string( error_code, buffer, &len ); 00076 if( MPI_SUCCESS != e ) len = 0; 00077 00078 if( len >= (int)sizeof( buffer ) ) len = sizeof( buffer ) - 1; 00079 buffer[len] = '\0'; 00080 return buffer; 00081 } 00082 00083 #define CHECK_MPI( RVAL, ERR ) \ 00084 do \ 00085 { \ 00086 if( MPI_SUCCESS != ( RVAL ) ) \ 00087 { \ 00088 MSQ_SETERR( err ) \ 00089 ( MsqError::UNKNOWN_ERROR, "MPI Error %d: %s", (int)( RVAL ), mpi_err_string( ( RVAL ) ) ); \ 00090 return; \ 00091 } \ 00092 } while( false ) 00093 00094 #define CHECK_MPI_RZERO( RVAL, ERR ) \ 00095 do \ 00096 { \ 00097 if( MPI_SUCCESS != ( RVAL ) ) \ 00098 { \ 00099 MSQ_SETERR( err ) \ 00100 ( MsqError::UNKNOWN_ERROR, "MPI Error %d: %s", (int)( RVAL ), mpi_err_string( ( RVAL ) ) ); \ 00101 return 0; \ 00102 } \ 00103 } while( false ) 00104 00105 static bool vertex_map_insert( VertexIdMap& map, size_t glob_id, int proc_id, int value ) 00106 { 00107 VertexIdMapKey vid; 00108 vid.glob_id = glob_id; 00109 vid.proc_id = proc_id; 00110 return map.insert( VertexIdMap::value_type( vid, value ) ).second; 00111 } 00112 00113 static int vertex_map_find( const VertexIdMap& map, size_t glob_id, int proc_id ) 00114 { 00115 VertexIdMapKey vid; 00116 vid.glob_id = glob_id; 00117 vid.proc_id = proc_id; 00118 VertexIdMap::const_iterator map_element = map.find( vid ); 00119 if( map_element == map.end() ) { return 0; } 00120 else 00121 { 00122 return map_element->second; 00123 } 00124 } 00125 00126 static void my_quicksort( int* a, size_t* b, MBMesquite::Mesh::VertexHandle* c, int i, int j ) 00127 { 00128 int in_i = i; 00129 int in_j = j; 00130 int wa; 00131 size_t wb; 00132 MBMesquite::Mesh::VertexHandle w1; 00133 int key = a[( i + j ) / 2]; 00134 do 00135 { 00136 while( a[i] < key ) 00137 i++; 00138 while( a[j] > key ) 00139 j--; 00140 if( i < j ) 00141 { 00142 wa = a[i]; 00143 a[i] = a[j]; 00144 a[j] = wa; 00145 wb = b[i]; 00146 b[i] = b[j]; 00147 b[j] = wb; 00148 w1 = c[i]; 00149 c[i] = c[j]; 00150 c[j] = w1; 00151 } 00152 } while( ++i <= --j ); 00153 if( i == j + 3 ) 00154 { 00155 i--; 00156 j++; 00157 } 00158 if( j > in_i ) my_quicksort( a, b, c, in_i, j ); 00159 if( i < in_j ) my_quicksort( a, b, c, i, in_j ); 00160 } 00161 00162 static int hash6432shift( unsigned long long key ) 00163 { 00164 key = ( ~key ) + ( key << 18 ); // key = (key << 18) - key - 1; 00165 key = key ^ ( key >> 31 ); 00166 key = key * 21; // key = (key + (key << 2)) + (key << 4); 00167 key = key ^ ( key >> 11 ); 00168 key = key + ( key << 6 ); 00169 key = key ^ ( key >> 22 ); 00170 return (int)key; 00171 } 00172 00173 static unsigned long long hash64shift( unsigned long long key ) 00174 { 00175 key = ( ~key ) + ( key << 21 ); // key = (key << 21) - key - 1; 00176 key = key ^ ( key >> 24 ); 00177 key = ( key + ( key << 3 ) ) + ( key << 8 ); // key * 265 00178 key = key ^ ( key >> 14 ); 00179 key = ( key + ( key << 2 ) ) + ( key << 4 ); // key * 21 00180 key = key ^ ( key >> 28 ); 00181 key = key + ( key << 31 ); 00182 return key; 00183 } 00184 00185 static double generate_random_number( int generate_random_numbers, int proc_id, size_t glob_id ) 00186 { 00187 int gid; 00188 00189 if( sizeof( size_t ) == sizeof( unsigned long long ) ) { gid = hash6432shift( (unsigned long long)glob_id ); } 00190 else 00191 { 00192 gid = (int)glob_id; 00193 } 00194 00195 if( generate_random_numbers == 1 ) 00196 { 00197 // count number of on bits 00198 int on = 0; 00199 unsigned int mist = (unsigned int)gid; 00200 while( mist ) 00201 { 00202 if( mist & 1 ) on++; 00203 mist = mist >> 1; 00204 } 00205 unsigned short xsubi[3]; 00206 if( on & 1 ) 00207 mist = (unsigned int)gid; 00208 else 00209 mist = (unsigned int)( -gid ); 00210 if( on & 2 ) 00211 { 00212 xsubi[0] = (unsigned short)( 65535 & mist ); 00213 xsubi[1] = (unsigned short)( 65535 & ( mist >> 16 ) ); 00214 } 00215 else 00216 { 00217 xsubi[1] = (unsigned short)( 65535 & mist ); 00218 xsubi[0] = (unsigned short)( 65535 & ( mist >> 16 ) ); 00219 } 00220 xsubi[2] = proc_id; 00221 00222 #if( defined WIN32 || defined WIN64 ) 00223 time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 ); 00224 srand( (int)seed ); 00225 return rand() / ( RAND_MAX + 1.0 ); 00226 #else 00227 return erand48( xsubi ); 00228 #endif 00229 } 00230 else if( generate_random_numbers == 2 ) 00231 { 00232 // count number of on bits 00233 int on = 0; 00234 unsigned int mist = (unsigned int)gid; 00235 while( mist ) 00236 { 00237 if( mist & 1 ) on++; 00238 mist = mist >> 1; 00239 } 00240 unsigned short xsubi[3]; 00241 if( on & 1 ) 00242 mist = (unsigned int)gid; 00243 else 00244 mist = (unsigned int)( -gid ); 00245 if( on & 2 ) 00246 { 00247 xsubi[0] = (unsigned short)( 65535 & mist ); 00248 xsubi[1] = (unsigned short)( 65535 & ( mist >> 16 ) ); 00249 } 00250 else 00251 { 00252 xsubi[1] = (unsigned short)( 65535 & mist ); 00253 xsubi[0] = (unsigned short)( 65535 & ( mist >> 16 ) ); 00254 } 00255 xsubi[2] = proc_id ^ xsubi[1]; 00256 00257 #if( defined WIN32 || defined WIN64 ) 00258 time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 ); 00259 srand( (int)seed ); 00260 return rand() / ( RAND_MAX + 1.0 ); 00261 #else 00262 return erand48( xsubi ); 00263 #endif 00264 } 00265 else if( generate_random_numbers == 3 ) 00266 { 00267 unsigned long long key = (unsigned long long)gid; 00268 key = key << 32; 00269 key = key | proc_id; 00270 return (double)hash64shift( key ); 00271 } 00272 else if( generate_random_numbers == 4 ) 00273 { 00274 unsigned long long key = (unsigned long long)gid; 00275 key = key << 32; 00276 key = key | proc_id; 00277 key = hash64shift( key ); 00278 unsigned short xsubi[3]; 00279 xsubi[0] = (unsigned short)( key >> 48 ); 00280 xsubi[1] = (unsigned short)( key >> 32 ); 00281 xsubi[2] = (unsigned short)( key >> 16 ); 00282 00283 #if( defined WIN32 || defined WIN64 ) 00284 time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 ); 00285 srand( (int)seed ); 00286 return rand() / ( RAND_MAX + 1.0 ); 00287 #else 00288 return erand48( xsubi ); 00289 #endif 00290 } 00291 else 00292 { 00293 return -1; 00294 } 00295 } 00296 00297 ParallelHelperImpl::ParallelHelperImpl() 00298 { 00299 mesh = 0; 00300 set_communicator( MPI_COMM_WORLD ); 00301 communication_model = TrulyNonBlockingAvoidAllReduce; 00302 00303 MPI_Comm_rank( (MPI_Comm)communicator, &rank ); 00304 MPI_Comm_size( (MPI_Comm)communicator, &nprocs ); 00305 00306 // variable defaults for VertexMover::loop_over_mesh() 00307 generate_random_numbers = 2; 00308 00309 // memory chunks referenced for VertexMover::loop_over_mesh() 00310 vertices.clear(); 00311 vtx_in_partition_boundary.clear(); 00312 part_vertices.clear(); 00313 unghost_vertices.clear(); 00314 part_proc_owner.clear(); 00315 part_gid.clear(); 00316 part_smoothed_flag.clear(); 00317 part_rand_number.clear(); 00318 exportVtxGIDs.clear(); 00319 exportVtxLIDs.clear(); 00320 exportProc.clear(); 00321 in_independent_set.clear(); 00322 vid_map.clear(); 00323 neighbourProcSend.clear(); 00324 neighbourProcRecv.clear(); 00325 neighbourProcSendRemain.clear(); 00326 neighbourProcRecvRemain.clear(); 00327 vtx_off_proc_list.clear(); 00328 neighbourProc.clear(); 00329 } 00330 00331 ParallelHelperImpl::~ParallelHelperImpl() {} 00332 00333 void ParallelHelperImpl::set_parallel_mesh( ParallelMesh* pmesh ) 00334 { 00335 this->mesh = pmesh; 00336 } 00337 00338 void ParallelHelperImpl::set_communicator( size_t comm ) 00339 { 00340 communicator = comm; 00341 MPI_Comm_rank( (MPI_Comm)communicator, &rank ); 00342 MPI_Comm_size( (MPI_Comm)communicator, &nprocs ); 00343 } 00344 00345 void ParallelHelperImpl::set_communication_model( int model, MsqError& ) 00346 { 00347 communication_model = model; 00348 } 00349 00350 void ParallelHelperImpl::set_generate_random_numbers( int grn, MsqError& ) 00351 { 00352 this->generate_random_numbers = grn; 00353 } 00354 00355 void ParallelHelperImpl::smoothing_init( MsqError& err ) 00356 { 00357 00358 int i, j, k, rval; 00359 size_t l; 00360 00361 if( !mesh ) 00362 { 00363 MSQ_SETERR( err )( MsqError::INVALID_STATE ); 00364 return; 00365 } 00366 if( nprocs == 1 ) return; 00367 00368 /* get the vertices */ 00369 mesh->get_all_vertices( vertices, err );MSQ_ERRRTN( err ); 00370 num_vertex = vertices.size(); 00371 00372 /* allocate the data arrays we'll use for smoothing */ 00373 std::vector< size_t > gid( num_vertex ); 00374 std::vector< int > proc_owner( num_vertex ); 00375 std::vector< unsigned char > app_fixed( num_vertex ); 00376 00377 /* get the data from the mesquite mesh */ 00378 mesh->vertices_get_global_id( ARRPTR( vertices ), ARRPTR( gid ), num_vertex, err );MSQ_ERRRTN( err ); 00379 mesh->vertices_get_byte( ARRPTR( vertices ), ARRPTR( app_fixed ), num_vertex, err );MSQ_ERRRTN( err ); 00380 mesh->vertices_get_processor_id( ARRPTR( vertices ), ARRPTR( proc_owner ), num_vertex, err );MSQ_ERRRTN( err ); 00381 if( 0 ) 00382 { 00383 int ncull = 0; 00384 for( i = 0; i < num_vertex; ++i ) 00385 { 00386 if( app_fixed[i] & MsqVertex::MSQ_CULLED ) { ++ncull; } 00387 } 00388 std::cout << "P[" << rank << "] smoothing_init ncull= " << ncull << " num_vertex= " << num_vertex << std::endl; 00389 } 00390 00391 /* only interested in fixed flag from vertex byte? Clear others. */ 00392 // srkenno AT sandia.gov 1/19/12: bug fix: changed from |= which makes all vertices fixed 00393 for( i = 0; i < num_vertex; ++i ) 00394 app_fixed[i] &= MsqVertex::MSQ_HARD_FIXED; 00395 00396 /* create temporary Tag for the local IDs */ 00397 std::vector< int > lid( num_vertex ); 00398 for( i = 0; i < num_vertex; i++ ) 00399 lid[i] = i; 00400 const char LOCAL_ID_NAME[] = "LOCAL_ID"; 00401 TagHandle lid_tag = mesh->tag_create( LOCAL_ID_NAME, Mesh::INT, 1, NULL, err );MSQ_ERRRTN( err ); 00402 mesh->tag_set_vertex_data( lid_tag, num_vertex, ARRPTR( vertices ), ARRPTR( lid ), err );MSQ_ERRRTN( err ); 00403 00404 if( 0 ) printf( "[%d] set local tags on %d vertices\n", rank, num_vertex ); 00405 00406 /* get the elements */ 00407 std::vector< MBMesquite::Mesh::ElementHandle > elements; 00408 mesh->get_all_elements( elements, err );MSQ_ERRRTN( err ); 00409 int num_elems = elements.size(); 00410 00411 /**************************************************************** 00412 PARTITION BOUNDARY VERTEX DETERMINATION 00413 ***********************************************************************/ 00414 00415 /* initialize the vertex partition boundary array */ 00416 vtx_in_partition_boundary.clear(); 00417 vtx_in_partition_boundary.resize( num_vertex, 0 ); 00418 int incident_vtx, vtx_off_proc, vtx_on_proc; 00419 00420 /* get the array that contains the adjacent vertices for each mesh element */ 00421 std::vector< MBMesquite::Mesh::VertexHandle > adj_vertices; 00422 std::vector< size_t > vtx_offsets; 00423 mesh->elements_get_attached_vertices( ARRPTR( elements ), num_elems, adj_vertices, vtx_offsets, err ); 00424 std::vector< int > adj_vertices_lid( adj_vertices.size() ); 00425 mesh->tag_get_vertex_data( lid_tag, adj_vertices.size(), ARRPTR( adj_vertices ), ARRPTR( adj_vertices_lid ), err ); 00426 00427 if( 0 ) printf( "[%d] gotten adjacent elements for %d elements\n", rank, num_elems ); 00428 00429 /* determine which vertices are smoothed as part of the boundary (and which are unused ghost 00430 * vertices) */ 00431 num_vtx_partition_boundary_local = 0; 00432 num_vtx_partition_boundary_remote = 0; 00433 for( i = 0; i < num_elems; i++ ) 00434 { 00435 /* count how many vertices of the current element are on/off a different processor */ 00436 vtx_off_proc = 0; 00437 vtx_on_proc = 0; 00438 for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ ) 00439 { 00440 incident_vtx = adj_vertices_lid[j]; 00441 /* obviously the vertex only counts if it is not app_fixed */ 00442 if( !app_fixed[incident_vtx] ) 00443 { 00444 if( proc_owner[incident_vtx] != rank ) { vtx_off_proc++; } 00445 else 00446 { 00447 vtx_on_proc++; 00448 } 00449 } 00450 } 00451 00452 /* if vertices are on different processors mark all local vertices as boundary1 and all 00453 * remote vertices as boundary2 */ 00454 if( vtx_off_proc > 0 && vtx_on_proc > 0 ) 00455 { 00456 /* collect stats */ 00457 // smooth_stats.num_part_bndy_elem++; 00458 /* mark the vertices */ 00459 for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ ) 00460 { 00461 incident_vtx = adj_vertices_lid[j]; 00462 /* obviously the vertex does not need to be marked if it was already marked or if it 00463 * is app_fixed*/ 00464 if( vtx_in_partition_boundary[incident_vtx] <= 0 && app_fixed[incident_vtx] == 0 ) 00465 { 00466 /* mark and count the vertex */ 00467 if( proc_owner[incident_vtx] != rank ) 00468 { 00469 vtx_in_partition_boundary[incident_vtx] = 2; 00470 num_vtx_partition_boundary_remote++; 00471 } 00472 else 00473 { 00474 vtx_in_partition_boundary[incident_vtx] = 1; 00475 num_vtx_partition_boundary_local++; 00476 } 00477 } 00478 } 00479 } 00480 else if( vtx_off_proc > 0 ) 00481 { 00482 /* mark the vertices as boundary-1 (aka unused ghost) if the element has only 00483 * off-processor vertices */ 00484 for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ ) 00485 { 00486 incident_vtx = adj_vertices_lid[j]; 00487 /* obviously the vertex is not marked if it was already marked or if it is 00488 * app_fixed*/ 00489 if( vtx_in_partition_boundary[incident_vtx] == 0 && app_fixed[incident_vtx] == 0 ) 00490 { vtx_in_partition_boundary[incident_vtx] = -1; } 00491 } 00492 } 00493 } 00494 00495 if( 0 ) 00496 { 00497 printf( "[%d]i%d local %d remote %d ", rank, iteration, num_vtx_partition_boundary_local, 00498 num_vtx_partition_boundary_remote ); 00499 printf( "[%d]i%d pb1 ", rank, iteration ); 00500 for( i = 0; i < num_vertex; i++ ) 00501 if( vtx_in_partition_boundary[i] == 1 ) printf( "%d,%lu ", i, gid[i] ); 00502 printf( "\n" ); 00503 printf( "[%d]i%d pb2 ", rank, iteration ); 00504 for( i = 0; i < num_vertex; i++ ) 00505 if( vtx_in_partition_boundary[i] == 2 ) printf( "%d,%lu ", i, gid[i] ); 00506 printf( "\n" ); 00507 fflush( NULL ); 00508 } 00509 00510 num_vtx_partition_boundary = num_vtx_partition_boundary_local + num_vtx_partition_boundary_remote; 00511 00512 /******************************************************************** 00513 COLLECT THE PARTITION BOUNDARY VERTICES AND THE UNUSED GHOST VERTICES 00514 ********************************************************************/ 00515 00516 /* create the vectors to store the partition boundary vertex data */ 00517 part_vertices.resize( num_vtx_partition_boundary ); 00518 part_proc_owner.resize( num_vtx_partition_boundary ); 00519 part_gid.resize( num_vtx_partition_boundary ); 00520 part_smoothed_flag.resize( num_vtx_partition_boundary ); 00521 part_rand_number.resize( num_vtx_partition_boundary ); 00522 00523 /* create the partition boundary map and its inverse */ 00524 std::vector< int > vtx_partition_boundary_map_inverse( num_vertex ); 00525 00526 j = 0; 00527 /* first we map the partition boundary vertices that we will smooth on this processor */ 00528 for( i = 0; i < num_vertex; i++ ) 00529 { 00530 if( vtx_in_partition_boundary[i] == 1 ) 00531 { 00532 part_vertices[j] = vertices[i]; 00533 part_proc_owner[j] = rank; 00534 assert( proc_owner[i] == rank ); 00535 part_gid[j] = gid[i]; 00536 vtx_partition_boundary_map_inverse[i] = j; 00537 j++; 00538 } 00539 } 00540 00541 vid_map.clear(); 00542 00543 /* then we map the ghost vertices that will be smoothed on other processors */ 00544 for( i = 0; i < num_vertex; i++ ) 00545 { 00546 if( vtx_in_partition_boundary[i] == 2 ) 00547 { 00548 part_vertices[j] = vertices[i]; 00549 part_proc_owner[j] = proc_owner[i]; 00550 assert( proc_owner[i] != rank ); 00551 part_gid[j] = gid[i]; 00552 vtx_partition_boundary_map_inverse[i] = j; 00553 /* only insert those vertices in the map that are smoothed on other processors */ 00554 vertex_map_insert( vid_map, part_gid[j], part_proc_owner[j], j ); 00555 // printf("[%d] inserting vertex with gid %u and pid %d \n", rank, part_gid[j], 00556 // part_proc_owner[j]); 00557 j++; 00558 } 00559 } 00560 00561 /* create our own 'very pseudo random' numbers */ 00562 for( i = 0; i < num_vtx_partition_boundary; i++ ) 00563 part_rand_number[i] = generate_random_number( generate_random_numbers, part_proc_owner[i], part_gid[i] ); 00564 00565 /* count the number of unused ghost vertices */ 00566 unghost_num_vtx = 0; 00567 for( i = 0; i < num_vertex; i++ ) 00568 { 00569 if( vtx_in_partition_boundary[i] == -1 ) { unghost_num_vtx++; } 00570 } 00571 // printf("[%d] found %d unused ghost vertices (local %d remote %d)\n",rank, unghost_num_vtx, 00572 // num_vtx_partition_boundary_local,num_vtx_partition_boundary_remote); 00573 00574 /* create the vectors to store the unused ghost vertices */ 00575 unghost_vertices.resize( unghost_num_vtx ); 00576 std::vector< int > unghost_proc_owner( unghost_num_vtx ); 00577 std::vector< size_t > unghost_gid( unghost_num_vtx ); 00578 00579 /* store the unused ghost vertices that are copies of vertices from other processors and will 00580 * need to be received */ 00581 j = 0; 00582 for( i = 0; i < num_vertex; i++ ) 00583 { 00584 if( vtx_in_partition_boundary[i] == -1 ) 00585 { 00586 unghost_vertices[j] = vertices[i]; 00587 unghost_proc_owner[j] = proc_owner[i]; 00588 assert( proc_owner[i] != rank ); 00589 unghost_gid[j] = gid[i]; 00590 // printf(" %d", unghost_gid[j]); 00591 j++; 00592 } 00593 } 00594 00595 /* no longer needed */ 00596 // delete [] gid; gid = 0; 00597 // delete [] proc_owner; proc_owner = 0; 00598 00599 unghost_num_procs = 0; 00600 unghost_procs.clear(); 00601 unghost_procs_offset.clear(); 00602 unghost_procs_num_vtx.clear(); 00603 if( unghost_num_vtx ) 00604 { 00605 /* sort the unused ghost vertices by processor */ 00606 my_quicksort( ARRPTR( unghost_proc_owner ), ARRPTR( unghost_gid ), &( unghost_vertices[0] ), 0, 00607 unghost_num_vtx - 1 ); 00608 00609 /* count the number of processors we have unused ghost data from that we want to get updated 00610 */ 00611 unghost_num_procs = 1; 00612 for( i = 1; i < unghost_num_vtx; i++ ) 00613 { 00614 if( unghost_proc_owner[i - 1] != unghost_proc_owner[i] ) unghost_num_procs++; 00615 } 00616 00617 /* get the ids of those processors and the number of vertices we want from each */ 00618 unghost_procs.resize( unghost_num_procs ); 00619 unghost_procs_offset.resize( unghost_num_procs + 1 ); 00620 unghost_procs_num_vtx.resize( unghost_num_procs ); 00621 unghost_procs[0] = unghost_proc_owner[0]; 00622 unghost_procs_offset[0] = 0; 00623 for( i = 1, j = 1; i < unghost_num_vtx; i++ ) 00624 { 00625 if( unghost_proc_owner[i - 1] != unghost_proc_owner[i] ) 00626 { 00627 unghost_procs[j] = unghost_proc_owner[i]; 00628 unghost_procs_offset[j] = i; 00629 unghost_procs_num_vtx[j - 1] = unghost_procs_offset[j] - unghost_procs_offset[j - 1]; 00630 assert( unghost_procs_num_vtx[j - 1] > 0 ); 00631 j++; 00632 } 00633 } 00634 unghost_procs_offset[j] = i; 00635 unghost_procs_num_vtx[j - 1] = unghost_procs_offset[j] - unghost_procs_offset[j - 1]; 00636 assert( unghost_procs_num_vtx[j - 1] > 0 ); 00637 assert( j == unghost_num_procs ); 00638 00639 // delete [] unghost_proc_owner; unghost_proc_owner = 0; 00640 00641 // printf("[%d] have ugns from %d processor(s) (%d,%d,%d)\n", rank, unghost_num_procs, 00642 // unghost_procs[0], (unghost_num_procs>1) ? unghost_procs[1] : -1, (unghost_num_procs>2) ? 00643 // unghost_procs[1] : -1); 00644 } 00645 00646 /* this will eventually store to how many processors each processor needs to send unused ghost 00647 * data to */ 00648 std::vector< int > num_sends_of_unghost; 00649 00650 /* gather the information about how many processors need ghost data */ 00651 if( rank == 0 ) 00652 { 00653 /* this will eventually store to how many processors each processor needs to send unused 00654 * ghost data to */ 00655 num_sends_of_unghost.resize( nprocs ); 00656 } 00657 /* temporary used for the initial gather in which each proc tells the root from how many procs 00658 * it wants unused ghost data updates */ 00659 rval = MPI_Gather( &unghost_num_procs, 1, MPI_INT, ARRPTR( num_sends_of_unghost ), 1, MPI_INT, 0, 00660 (MPI_Comm)communicator ); 00661 CHECK_MPI( rval, err ); 00662 00663 /* now each processor tells the root node from which processors they want unused ghost nodes 00664 * information */ 00665 if( rank == 0 ) 00666 { 00667 int procs_num = 0; 00668 int procs_max = 0; 00669 /* first count how many processors will send unused ghost node info and find the maximum 00670 * they will send */ 00671 for( i = 1; i < nprocs; i++ ) 00672 { 00673 if( num_sends_of_unghost[i] ) 00674 { 00675 procs_num++; 00676 if( num_sends_of_unghost[i] > procs_max ) procs_max = num_sends_of_unghost[i]; 00677 } 00678 } 00679 /* clean the temporary used array */ 00680 for( i = 0; i < nprocs; i++ ) 00681 num_sends_of_unghost[i] = 0; 00682 /* process rank 0's unused ghost nodes procs first */ 00683 for( j = 0; j < unghost_num_procs; j++ ) 00684 { 00685 num_sends_of_unghost[unghost_procs[j]]++; 00686 } 00687 /* now we process messages from all other processors that want unused ghost node information 00688 */ 00689 int* unghost_procs_array = new int[procs_max]; 00690 for( i = 0; i < procs_num; i++ ) 00691 { 00692 MPI_Status status; 00693 rval = MPI_Recv( unghost_procs_array, procs_max, MPI_INT, MPI_ANY_SOURCE, GHOST_NODE_INFO, 00694 (MPI_Comm)communicator, &status ); 00695 CHECK_MPI( rval, err ); 00696 int count; 00697 MPI_Get_count( &status, MPI_INT, &count ); 00698 for( j = 0; j < count; j++ ) 00699 { 00700 num_sends_of_unghost[unghost_procs_array[j]]++; 00701 } 00702 } 00703 delete[] unghost_procs_array; 00704 } 00705 else 00706 { 00707 if( unghost_num_vtx ) 00708 { 00709 rval = MPI_Send( ARRPTR( unghost_procs ), unghost_num_procs, MPI_INT, 0, GHOST_NODE_INFO, 00710 (MPI_Comm)communicator ); 00711 CHECK_MPI( rval, err ); 00712 } 00713 } 00714 00715 /* now the root node knows for each processor on how many other processors they have ghost nodes 00716 * (which need updating) */ 00717 /* the scatter distributes this information to each processor */ 00718 rval = MPI_Scatter( ARRPTR( num_sends_of_unghost ), 1, MPI_INT, &update_num_procs, 1, MPI_INT, 0, 00719 (MPI_Comm)communicator ); 00720 CHECK_MPI( rval, err ); 00721 00722 // if (rank == 0) delete [] num_sends_of_unghost; 00723 00724 // printf("[%d] i have unused ghost nodes from %d procs and i need to send updates to %d 00725 // procs\n", rank, unghost_num_procs, update_num_procs); 00726 00727 /* now the processors can negotiate amongst themselves: */ 00728 00729 /* first tell each processor the number of unused ghost nodes we want from them */ 00730 std::vector< MPI_Request > requests_unghost( unghost_num_procs ); 00731 for( j = 0; j < unghost_num_procs; j++ ) 00732 { 00733 rval = MPI_Isend( &( unghost_procs_num_vtx[j] ), 1, MPI_INT, unghost_procs[j], GHOST_NODE_VERTICES_WANTED, 00734 (MPI_Comm)communicator, &( requests_unghost[j] ) ); 00735 CHECK_MPI( rval, err ); 00736 } 00737 00738 /* then listen to as many processors as there are that want updates from us */ 00739 std::vector< MPI_Request > requests_updates( update_num_procs ); 00740 update_procs_num_vtx.resize( update_num_procs ); 00741 for( j = 0; j < update_num_procs; j++ ) 00742 { 00743 rval = MPI_Irecv( &( update_procs_num_vtx[j] ), 1, MPI_INT, MPI_ANY_SOURCE, GHOST_NODE_VERTICES_WANTED, 00744 (MPI_Comm)communicator, &( requests_updates[j] ) ); 00745 CHECK_MPI( rval, err ); 00746 } 00747 00748 /* wait until we have heard from all processors how many ghost nodes updates they want from us 00749 */ 00750 std::vector< MPI_Status > status_update( update_num_procs ); 00751 update_procs.resize( update_num_procs ); 00752 rval = MPI_Waitall( update_num_procs, ARRPTR( requests_updates ), ARRPTR( status_update ) ); 00753 CHECK_MPI( rval, err ); 00754 for( j = 0; j < update_num_procs; j++ ) 00755 { 00756 update_procs[j] = status_update[j].MPI_SOURCE; 00757 // printf("[%d] i have to send %d vertices to processor %d\n", rank, 00758 // update_procs_num_vtx[j], update_procs[j]); 00759 } 00760 00761 /* count the total number of vertices that we need to update elsewhere */ 00762 update_procs_offset.resize( update_num_procs + 1 ); 00763 update_num_vtx = 0; 00764 update_procs_offset[0] = 0; 00765 for( j = 0; j < update_num_procs; j++ ) 00766 { 00767 update_num_vtx += update_procs_num_vtx[j]; 00768 update_procs_offset[j + 1] = update_num_vtx; 00769 } 00770 00771 /* create enough space to receive all the vertex indices */ 00772 update_gid.resize( update_num_vtx ); 00773 00774 /* tell each processor which vertices we want from them */ 00775 for( j = 0; j < unghost_num_procs; j++ ) 00776 { 00777 rval = MPI_Isend( &( unghost_gid[unghost_procs_offset[j]] ), unghost_procs_num_vtx[j], 00778 sizeof( size_t ) == 4 ? MPI_INT : MPI_DOUBLE, unghost_procs[j], GHOST_NODE_VERTEX_GIDS, 00779 (MPI_Comm)communicator, &( requests_unghost[j] ) ); 00780 CHECK_MPI( rval, err ); 00781 } 00782 00783 /* receive from each processor the info which vertices they want from us */ 00784 for( j = 0; j < update_num_procs; j++ ) 00785 { 00786 rval = MPI_Irecv( &( update_gid[update_procs_offset[j]] ), update_procs_num_vtx[j], 00787 sizeof( size_t ) == 4 ? MPI_INT : MPI_DOUBLE, update_procs[j], GHOST_NODE_VERTEX_GIDS, 00788 (MPI_Comm)communicator, &( requests_updates[j] ) ); 00789 CHECK_MPI( rval, err ); 00790 } 00791 00792 /* wait until we have heard from all processors which vertices they want from us */ 00793 rval = MPI_Waitall( update_num_procs, ARRPTR( requests_updates ), ARRPTR( status_update ) ); 00794 CHECK_MPI( rval, err ); 00795 00796 /* wait until we have sent to all processors which vertices we want from them */ 00797 std::vector< MPI_Status > status_unghost( unghost_num_procs ); 00798 rval = MPI_Waitall( unghost_num_procs, ARRPTR( requests_unghost ), ARRPTR( status_unghost ) ); 00799 CHECK_MPI( rval, err ); 00800 00801 /* 00802 for (j = 0; j < update_num_procs; j++) 00803 { 00804 printf("[%d] will send to proc %d:", rank, update_procs[j]); 00805 for (i = update_procs_offset[j]; i < update_procs_offset[j+1]; i++) printf(" %d", 00806 update_gid[i]); printf("\n"); 00807 } 00808 */ 00809 00810 /*********************************************************************** 00811 COMPUTE THE SET OF NEIGHBORS THAT OUR VERTICES HAVE ON OTHER PROCESSORS 00812 COMPUTE THE SET OF NEIGHBORS PROCESSORS 00813 ***********************************************************************/ 00814 00815 if( num_vtx_partition_boundary_local == 0 ) 00816 { 00817 /* this processor does not partake in the boundary smoothing */ 00818 neighbourProc.clear(); 00819 mesh->tag_delete( lid_tag, err ); 00820 vtx_partition_boundary_map_inverse.clear(); 00821 return; 00822 } 00823 00824 /* init the neighbour processor list */ 00825 neighbourProc.clear(); 00826 00827 /* init the neighbour lists */ 00828 00829 vtx_off_proc_list.clear(); 00830 vtx_off_proc_list.resize( num_vtx_partition_boundary_local ); 00831 00832 /* get the adjacency arrays that we need */ 00833 std::vector< MBMesquite::Mesh::ElementHandle > adj_elements; 00834 std::vector< MBMesquite::Mesh::VertexHandle > adj_adj_vertices; 00835 std::vector< size_t > elem_offsets; 00836 std::vector< size_t > adj_vtx_offsets; 00837 mesh->vertices_get_attached_elements( ARRPTR( part_vertices ), num_vtx_partition_boundary_local, adj_elements, 00838 elem_offsets, err ); 00839 mesh->elements_get_attached_vertices( ARRPTR( adj_elements ), adj_elements.size(), adj_adj_vertices, 00840 adj_vtx_offsets, err ); 00841 // delete adj_elements; adj_elements = 0; 00842 std::vector< int > adj_adj_vertices_lid( adj_adj_vertices.size() ); 00843 mesh->tag_get_vertex_data( lid_tag, adj_adj_vertices.size(), ARRPTR( adj_adj_vertices ), 00844 ARRPTR( adj_adj_vertices_lid ), err ); 00845 // delete adj_adj_vertices; adj_adj_vertices = 0; 00846 mesh->tag_delete( lid_tag, err ); 00847 00848 for( i = 0; i < num_vtx_partition_boundary_local; i++ ) 00849 { 00850 /* loop over the elements surrounding that vertex */ 00851 for( j = elem_offsets[i]; j < (int)( elem_offsets[i + 1] ); j++ ) 00852 { 00853 /* loop over the neighbors of the considered vertex (i.e. the vertices of these element) 00854 */ 00855 for( k = adj_vtx_offsets[j]; k < (int)( adj_vtx_offsets[j + 1] ); k++ ) 00856 { 00857 /* get the next neighbour */ 00858 incident_vtx = adj_adj_vertices_lid[k]; 00859 /* if this neighbour is a vertex that is smoothed on a different processor */ 00860 if( vtx_in_partition_boundary[incident_vtx] == 2 ) 00861 { 00862 /* then map it into our domain */ 00863 incident_vtx = vtx_partition_boundary_map_inverse[incident_vtx]; 00864 /* is this vertex already in our neighbour list ? */ 00865 if( std::find( vtx_off_proc_list[i].begin(), vtx_off_proc_list[i].end(), incident_vtx ) == 00866 vtx_off_proc_list[i].end() ) 00867 { 00868 /* if the vertex is not in the list yet ... add it */ 00869 vtx_off_proc_list[i].push_back( incident_vtx ); 00870 /* is the processor of this vertex already in the processor list */ 00871 incident_vtx = part_proc_owner[incident_vtx]; 00872 /* check by scanning the list for this processor */ 00873 if( std::find( neighbourProc.begin(), neighbourProc.end(), incident_vtx ) == 00874 neighbourProc.end() ) 00875 { 00876 /* the processor is not in the list yet ... add it */ 00877 neighbourProc.push_back( incident_vtx ); 00878 } 00879 } 00880 } 00881 } 00882 } 00883 } 00884 00885 /* sort the list of neighbour processors */ 00886 00887 std::sort( neighbourProc.begin(), neighbourProc.end() ); 00888 00889 /*********************************************************************** 00890 COMPUTE HOW MANY VERTICES WE NEED TO SEND/RECV FROM EACH PROCESSOR 00891 ***********************************************************************/ 00892 neighbourProcSend.clear(); 00893 neighbourProcRecv.clear(); 00894 neighbourProcSendRemain.clear(); 00895 neighbourProcRecvRemain.clear(); 00896 00897 if( communication_model & 1 ) // AVOID_ALL_REDUCE 00898 { 00899 total_num_vertices_to_smooth = num_vtx_partition_boundary_local; 00900 total_num_vertices_to_recv = num_vtx_partition_boundary_remote; 00901 neighbourProcSend.resize( neighbourProc.size(), 0 ); 00902 neighbourProcRecv.resize( neighbourProc.size(), 0 ); 00903 00904 /* for each vertex we smooth find the processors we need to send it too */ 00905 for( i = 0; i < num_vtx_partition_boundary_local; i++ ) 00906 { 00907 /* loop over its adjacent off-processor vertices */ 00908 for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ ) 00909 { 00910 /* get the processor id of these vertices */ 00911 incident_vtx = part_proc_owner[vtx_off_proc_list[i][j]]; 00912 /* check if we got this processor id before */ 00913 for( k = 0; k < j; k++ ) 00914 { 00915 if( incident_vtx == part_proc_owner[vtx_off_proc_list[i][k]] ) 00916 { 00917 /* if we have has this procesor id already we do not need to count it again 00918 */ 00919 incident_vtx = -1; 00920 break; 00921 } 00922 } 00923 /* if this was a new processor id */ 00924 if( incident_vtx != -1 ) 00925 { 00926 /* find the processor in the list and increment its counter */ 00927 for( l = 0; l < neighbourProc.size(); l++ ) 00928 { 00929 if( neighbourProc[l] == incident_vtx ) 00930 { 00931 neighbourProcSend[l]++; 00932 break; 00933 } 00934 } 00935 } 00936 } 00937 } 00938 for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ ) 00939 { 00940 incident_vtx = part_proc_owner[i]; 00941 for( l = 0; l < neighbourProc.size(); l++ ) 00942 { 00943 if( neighbourProc[l] == incident_vtx ) 00944 { 00945 neighbourProcRecv[l]++; 00946 break; 00947 } 00948 } 00949 } 00950 neighbourProcSendRemain.resize( neighbourProc.size() ); 00951 neighbourProcRecvRemain.resize( neighbourProc.size() ); 00952 } 00953 00954 exportVtxGIDs.resize( num_vtx_partition_boundary ); 00955 exportVtxLIDs.resize( num_vtx_partition_boundary ); 00956 exportProc.resize( num_vtx_partition_boundary ); 00957 in_independent_set.resize( num_vtx_partition_boundary_local ); 00958 } 00959 00960 void ParallelHelperImpl::compute_first_independent_set( std::vector< Mesh::VertexHandle >& fixed_vertices ) 00961 { 00962 if( nprocs == 1 ) return; 00963 00964 int i; 00965 00966 // to avoid all reduce we need to know how many vertices we send & receive 00967 if( communication_model & 1 ) // AVOID_ALL_REDUCE 00968 { 00969 for( i = 0; i < (long)neighbourProc.size(); i++ ) 00970 { 00971 neighbourProcSendRemain[i] = neighbourProcSend[i]; 00972 neighbourProcRecvRemain[i] = neighbourProcRecv[i]; 00973 } 00974 } 00975 00976 // this is iteration zero of the bounday smooting process 00977 iteration = 0; 00978 00979 // mark all boundary partition vertices as not smoothed 00980 for( i = 0; i < num_vtx_partition_boundary; i++ ) 00981 { 00982 part_smoothed_flag[i] = 0; 00983 } 00984 00985 // populates the in_independent_set and the vertex export arrays 00986 compute_independent_set(); 00987 00988 // counts how many vertices are already smoothed 00989 num_already_smoothed_vertices = 0; 00990 00991 fixed_vertices.clear(); 00992 00993 /* mark which local boundary partition vertices are already smoothed (i.e. they are in the 1st 00994 * independent set) */ 00995 for( i = 0; i < num_vtx_partition_boundary_local; i++ ) 00996 { 00997 if( in_independent_set[i] ) 00998 { 00999 part_smoothed_flag[i] = 1; 01000 num_already_smoothed_vertices++; 01001 } 01002 else 01003 { 01004 fixed_vertices.push_back( part_vertices[i] ); // fix vertices *not* in the independent set 01005 } 01006 } 01007 01008 if( 0 ) 01009 { 01010 printf( "[%d]i%d after first we smoothed %d of %d\n", rank, iteration, num_already_smoothed_vertices, 01011 num_vtx_partition_boundary_local ); 01012 fflush( NULL ); 01013 } 01014 01015 // fix the ghost vertices that are smoothed on another processor 01016 for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ ) 01017 { 01018 fixed_vertices.push_back( part_vertices[i] ); 01019 } 01020 01021 // fix the ghost vertices that are unused 01022 for( i = 0; i < (int)( unghost_vertices.size() ); i++ ) 01023 { 01024 fixed_vertices.push_back( unghost_vertices[i] ); 01025 } 01026 } 01027 01028 void ParallelHelperImpl::communicate_first_independent_set( MsqError& err ) 01029 { 01030 if( nprocs == 1 ) return; 01031 01032 switch( communication_model ) 01033 { 01034 case TrulyNonBlocking: 01035 num_already_recv_vertices = comm_smoothed_vtx_tnb( err ); 01036 break; 01037 case TrulyNonBlockingAvoidAllReduce: 01038 num_already_recv_vertices = comm_smoothed_vtx_tnb_no_all( err ); 01039 break; 01040 case NonBlocking: 01041 num_already_recv_vertices = comm_smoothed_vtx_nb( err ); 01042 break; 01043 case NonBlockingAvoidAllReduce: 01044 num_already_recv_vertices = comm_smoothed_vtx_nb_no_all( err ); 01045 break; 01046 case Blocking: 01047 num_already_recv_vertices = comm_smoothed_vtx_b( err ); 01048 break; 01049 case BlockingAvoidAllReduce: 01050 num_already_recv_vertices = comm_smoothed_vtx_b_no_all( err ); 01051 break; 01052 } 01053 global_work_remains = ( neighbourProc.size() ? 1 : 0 );MSQ_CHKERR( err ); 01054 } 01055 01056 bool ParallelHelperImpl::compute_next_independent_set() 01057 { 01058 if( nprocs == 1 ) return false; 01059 01060 if( global_work_remains && ( iteration < 20 ) ) 01061 { 01062 iteration++; 01063 if( 0 ) printf( "[%d] work remains %d after %d iterations\n", rank, global_work_remains, iteration ); 01064 compute_independent_set(); 01065 next_vtx_partition_boundary = 0; 01066 return true; 01067 } 01068 else 01069 { 01070 if( global_work_remains ) 01071 { printf( "WARNING: global work remains %d after %d iterations\n", global_work_remains, iteration ); } 01072 return false; 01073 } 01074 } 01075 01076 bool ParallelHelperImpl::get_next_partition_boundary_vertex( MBMesquite::Mesh::VertexHandle& vertex_handle ) 01077 { 01078 while( next_vtx_partition_boundary < num_vtx_partition_boundary_local ) 01079 { 01080 if( in_independent_set[next_vtx_partition_boundary] ) 01081 { 01082 vertex_handle = part_vertices[next_vtx_partition_boundary]; 01083 num_already_smoothed_vertices++; 01084 assert( part_smoothed_flag[next_vtx_partition_boundary] == 0 ); 01085 part_smoothed_flag[next_vtx_partition_boundary] = 1; 01086 next_vtx_partition_boundary++; 01087 return true; 01088 } 01089 next_vtx_partition_boundary++; 01090 } 01091 if( 0 ) 01092 { 01093 printf( "[%d]i%d after next we smoothed %d of %d\n", rank, iteration, num_already_smoothed_vertices, 01094 num_vtx_partition_boundary_local ); 01095 fflush( NULL ); 01096 } 01097 return false; 01098 } 01099 01100 void ParallelHelperImpl::communicate_next_independent_set( MsqError& err ) 01101 { 01102 switch( communication_model ) 01103 { 01104 case TrulyNonBlocking: 01105 num_already_recv_vertices += comm_smoothed_vtx_tnb( err ); 01106 break; 01107 case TrulyNonBlockingAvoidAllReduce: 01108 num_already_recv_vertices += comm_smoothed_vtx_tnb_no_all( err ); 01109 break; 01110 case NonBlocking: 01111 num_already_recv_vertices += comm_smoothed_vtx_nb( err ); 01112 break; 01113 case NonBlockingAvoidAllReduce: 01114 num_already_recv_vertices += comm_smoothed_vtx_nb_no_all( err ); 01115 break; 01116 case Blocking: 01117 num_already_recv_vertices += comm_smoothed_vtx_b( err ); 01118 break; 01119 case BlockingAvoidAllReduce: 01120 num_already_recv_vertices += comm_smoothed_vtx_b_no_all( err ); 01121 break; 01122 } 01123 01124 if( communication_model & 1 ) // AVOID_ALL_REDUCE 01125 { 01126 global_work_remains = ( total_num_vertices_to_smooth - num_already_smoothed_vertices ) + 01127 ( total_num_vertices_to_recv - num_already_recv_vertices ); 01128 if( 0 ) 01129 { 01130 printf( "[%d]i%d %d - %d + %d - %d = %d \n", rank, iteration, total_num_vertices_to_smooth, 01131 num_already_smoothed_vertices, total_num_vertices_to_recv, num_already_recv_vertices, 01132 global_work_remains ); 01133 fflush( NULL ); 01134 } 01135 } 01136 else 01137 { 01138 int i, work_remains = 0; 01139 for( i = 0; i < num_vtx_partition_boundary_local; i++ ) 01140 { 01141 if( part_smoothed_flag[i] == 0 ) { work_remains++; } 01142 } 01143 int rval = MPI_Allreduce( &work_remains, &global_work_remains, 1, MPI_INT, MPI_SUM, (MPI_Comm)communicator ); 01144 CHECK_MPI( rval, err ); 01145 } 01146 } 01147 01148 void ParallelHelperImpl::smoothing_close( MsqError& err ) 01149 { 01150 int i, j, rval; 01151 01152 if( nprocs == 1 ) return; 01153 01154 // printf("[%d] used %d iterations\n", rank, iteration); 01155 01156 // communicate unused ghost nodes 01157 01158 std::vector< double > update_updates; 01159 std::vector< MPI_Request > update_requests; 01160 01161 if( update_num_procs ) 01162 { 01163 /* get the tags so we can find the requested vertices */ 01164 std::vector< size_t > gid( num_vertex ); 01165 mesh->vertices_get_global_id( ARRPTR( vertices ), ARRPTR( gid ), num_vertex, err );MSQ_ERRRTN( err ); 01166 std::vector< unsigned char > app_fixed( num_vertex ); 01167 mesh->vertices_get_byte( ARRPTR( vertices ), ARRPTR( app_fixed ), num_vertex, err );MSQ_ERRRTN( err ); 01168 std::vector< int > proc_owner( num_vertex ); 01169 mesh->vertices_get_processor_id( ARRPTR( vertices ), ARRPTR( proc_owner ), num_vertex, err );MSQ_ERRRTN( err ); 01170 01171 if( 0 ) 01172 { 01173 int ncull = 0; 01174 for( i = 0; i < num_vertex; ++i ) 01175 { 01176 if( app_fixed[i] & MsqVertex::MSQ_CULLED ) { ++ncull; } 01177 } 01178 std::cout << "P[" << rank << "] ncull= " << ncull << " num_vertex= " << num_vertex << std::endl; 01179 } 01180 01181 /* only interested in fixed flag from vertex byte? Clear others. */ 01182 // srkenno AT sandia.gov 1/19/12: bug fix: changed from |= which makes all vertices fixed 01183 for( i = 0; i < num_vertex; ++i ) 01184 app_fixed[i] &= MsqVertex::MSQ_HARD_FIXED; 01185 01186 /* insert all our unfixed vertices into a map so we can find the requested vertices 01187 * efficiently */ 01188 VertexIdMap temp_vid_map; 01189 for( j = 0; j < num_vertex; j++ ) 01190 { 01191 if( proc_owner[j] == rank && app_fixed[j] == false ) { vertex_map_insert( temp_vid_map, gid[j], rank, j ); } 01192 } 01193 01194 /* deallocate the tags */ 01195 // delete [] gid; gid = 0; 01196 // delete [] app_fixed; app_fixed = 0; 01197 // delete [] proc_owner; proc_owner = 0; 01198 01199 /* find the requested updates and collect them into an array */ 01200 MBMesquite::MsqVertex coordinates; 01201 update_updates.resize( update_num_vtx * 3 ); 01202 for( i = 0; i < update_num_vtx; i++ ) 01203 { 01204 j = vertex_map_find( temp_vid_map, update_gid[i], rank ); 01205 mesh->vertices_get_coordinates( &( vertices[j] ), &coordinates, 1, err );MSQ_ERRRTN( err ); 01206 update_updates[3 * i + 0] = coordinates[0]; 01207 update_updates[3 * i + 1] = coordinates[1]; 01208 update_updates[3 * i + 2] = coordinates[2]; 01209 // printf("[%d] send gid %d with %g %g %g\n", rank, update_gid[i], coordinates[0], 01210 // coordinates[1], coordinates[2]); 01211 } 01212 01213 /* deallocate the map and the gid array */ 01214 // delete temp_vid_map; temp_vid_map = 0; 01215 // delete [] update_gid; update_gid = 0; 01216 01217 update_requests.resize( update_num_procs ); 01218 /* send each processor the unused ghost node updates that they requested */ 01219 for( j = 0; j < update_num_procs; j++ ) 01220 { 01221 rval = MPI_Isend( &( update_updates[update_procs_offset[j] * 3] ), update_procs_num_vtx[j] * 3, MPI_DOUBLE, 01222 update_procs[j], GHOST_NODE_VERTEX_UPDATES, (MPI_Comm)communicator, 01223 &( update_requests[j] ) ); 01224 // printf("[%d] sending %d of %d from %d with offset %d \n", rank, 01225 // update_procs_num_vtx[j], update_num_vtx, update_procs[j], 01226 // update_procs_offset[j]); 01227 CHECK_MPI( rval, err ); 01228 } 01229 01230 /* deallocate more arrays that we no longer need */ 01231 // delete [] update_procs_offset; update_procs_offset = 0; 01232 // delete [] update_procs_num_vtx; update_procs_num_vtx = 0; 01233 // delete [] update_procs; update_procs = 0; 01234 } 01235 01236 if( unghost_num_procs ) 01237 { 01238 std::vector< MPI_Request > unghost_requests( unghost_num_procs ); 01239 /* receive from each processor the unused ghost nodes updates i want from them */ 01240 std::vector< double > unghost_updates( unghost_num_vtx * 3 ); 01241 for( j = 0; j < unghost_num_procs; j++ ) 01242 { 01243 rval = MPI_Irecv( &( unghost_updates[unghost_procs_offset[j] * 3] ), unghost_procs_num_vtx[j] * 3, 01244 MPI_DOUBLE, unghost_procs[j], GHOST_NODE_VERTEX_UPDATES, (MPI_Comm)communicator, 01245 &( unghost_requests[j] ) ); 01246 // printf("[%d] receiving %d of %d from %d with offset %d \n", rank, 01247 // unghost_procs_num_vtx[j], unghost_num_vtx, unghost_procs[j], 01248 // unghost_procs_offset[j]); 01249 CHECK_MPI( rval, err ); 01250 } 01251 01252 /* deallocate more arrays that we no longer need */ 01253 // delete [] unghost_procs_offset; unghost_procs_offset = 0; 01254 // delete [] unghost_procs_num_vtx; unghost_procs_num_vtx = 0; 01255 // delete [] unghost_procs; unghost_procs = 0; 01256 01257 std::vector< MPI_Status > status( unghost_num_procs ); 01258 rval = MPI_Waitall( unghost_num_procs, ARRPTR( unghost_requests ), ARRPTR( status ) ); 01259 CHECK_MPI( rval, err ); 01260 01261 /* apply the received updates for the unused ghost vertices */ 01262 for( i = 0; i < unghost_num_vtx; i++ ) 01263 { 01264 MBMesquite::Vector3D coordinates; 01265 coordinates[0] = unghost_updates[3 * i + 0]; 01266 coordinates[1] = unghost_updates[3 * i + 1]; 01267 coordinates[2] = unghost_updates[3 * i + 2]; 01268 // printf("[%d] recv %g %g %g\n", rank, coordinates[0], coordinates[1], 01269 // coordinates[2]); 01270 mesh->vertex_set_coordinates( unghost_vertices[i], coordinates, err );MSQ_ERRRTN( err ); 01271 } 01272 01273 /* deallocate more arrays that we no longer need */ 01274 // delete unghost_vertices; unghost_vertices = 0; 01275 // delete [] unghost_updates; unghost_updates = 0; 01276 // delete [] unghost_requests; unghost_requests = 0; 01277 } 01278 01279 // if (update_num_procs) 01280 //{ 01281 // delete [] update_updates; update_updates = 0; 01282 // delete [] update_requests; update_requests = 0; 01283 //} 01284 01285 // if (vertices) delete vertices; vertices = 0; 01286 // if (vtx_in_partition_boundary) delete [] vtx_in_partition_boundary; vtx_in_partition_boundary 01287 // = 0; if (part_vertices) delete part_vertices; part_vertices = 0; if (unghost_vertices) delete 01288 // unghost_vertices; unghost_vertices = 0; if (part_proc_owner) delete [] part_proc_owner; 01289 // part_proc_owner = 0; if (part_gid) delete [] part_gid; part_gid = 0; if (part_smoothed_flag) 01290 // delete [] part_smoothed_flag; part_smoothed_flag = 0; if (part_rand_number) delete [] 01291 // part_rand_number; part_rand_number = 0; if (exportVtxGIDs) delete [] exportVtxGIDs; 01292 // exportVtxGIDs = 0; if (exportVtxLIDs) delete [] exportVtxLIDs; exportVtxLIDs = 0; if 01293 // (exportProc) delete [] exportProc; exportProc = 0; if (in_independent_set) delete [] 01294 // in_independent_set; in_independent_set = 0; if (vid_map) delete vid_map; vid_map = 0; if 01295 // (neighbourProcSend) delete [] neighbourProcSend; neighbourProcSend = 0; if (neighbourProcRecv) 01296 // delete [] neighbourProcRecv; neighbourProcRecv = 0; if (neighbourProcSendRemain) delete [] 01297 // neighbourProcSendRemain; neighbourProcSendRemain = 0; if (neighbourProcRecvRemain) delete [] 01298 // neighbourProcRecvRemain; neighbourProcRecvRemain = 0; if (vtx_off_proc_list_size) delete [] 01299 // vtx_off_proc_list_size; vtx_off_proc_list_size = 0; if (vtx_off_proc_list) { 01300 // for (i = 0; i < num_vtx_partition_boundary_local; i++) free(vtx_off_proc_list[i]); 01301 // delete [] vtx_off_proc_list; vtx_off_proc_list = 0; 01302 //} 01303 // if (neighbourProc) free(neighbourProc); neighbourProc = 0; 01304 } 01305 01306 typedef struct VertexPack 01307 { 01308 double x; 01309 double y; 01310 double z; 01311 union 01312 { 01313 double mist; 01314 size_t glob_id; 01315 }; 01316 } VertexPack; 01317 01318 int ParallelHelperImpl::comm_smoothed_vtx_tnb( MsqError& err ) 01319 { 01320 int i, j, k, rval; 01321 01322 // printf("[%d]i%d truly non blocking\n",rank, iteration);fflush(NULL); 01323 01324 /* compute how many vertices we send to each processor */ 01325 01326 std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 ); 01327 std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 ); 01328 for( i = 0; i < num_exportVtx; i++ ) 01329 { 01330 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01331 { 01332 if( exportProc[i] == neighbourProc[j] ) 01333 { 01334 /* increment count */ 01335 numVtxPerProcSend[j]++; 01336 /* end loop */ 01337 break; 01338 } 01339 } 01340 /* did loop end without finding the processor */ 01341 if( j == (long)neighbourProc.size() ) 01342 { 01343 printf( "[%d]i%d WARNING: did not find exportProc[%d] = %d in list of %lu processors.\n", rank, iteration, 01344 i, exportProc[i], (unsigned long)neighbourProc.size() ); 01345 } 01346 } 01347 01348 /* tell each processor how many vertices they can expect from us */ 01349 /* also ask each processor how many vertices we can expect from them */ 01350 01351 int num_neighbourProcSend = 0; 01352 int num_neighbourProcRecv = 0; 01353 std::vector< MPI_Request > requests_send( neighbourProc.size() ); 01354 std::vector< MPI_Request > requests_recv( neighbourProc.size() ); 01355 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01356 { 01357 /* send the vertex count to this processor */ 01358 if( 0 ) 01359 { 01360 printf( "[%d]i%d Announce send %d vertices from proc %d\n", rank, iteration, numVtxPerProcSend[j], 01361 neighbourProc[j] ); 01362 fflush( NULL ); 01363 } 01364 rval = MPI_Isend( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 01365 (MPI_Comm)communicator, &( requests_send[j] ) ); 01366 CHECK_MPI_RZERO( rval, err ); 01367 num_neighbourProcSend++; 01368 01369 /* recv the vertex count for this processor */ 01370 if( 0 ) 01371 { 01372 printf( "[%d]i%d Listen recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[j], 01373 neighbourProc[j] ); 01374 fflush( NULL ); 01375 } 01376 rval = MPI_Irecv( &( numVtxPerProcRecv[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 01377 (MPI_Comm)communicator, &( requests_recv[j] ) ); 01378 CHECK_MPI_RZERO( rval, err ); 01379 num_neighbourProcRecv++; 01380 } 01381 01382 /* set up memory for the outgoing vertex data blocks */ 01383 01384 std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */ 01385 std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() ); 01386 if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export ); 01387 for( i = 1; i < (long)neighbourProc.size(); i++ ) 01388 { 01389 packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1]; 01390 } 01391 01392 /* place vertex data going to the same processor into consecutive memory space */ 01393 01394 std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size() ); 01395 for( i = 0; i < (long)neighbourProc.size(); i++ ) 01396 { 01397 numVtxPerProcSendPACKED[i] = 0; 01398 } 01399 for( i = 0; i < num_exportVtx; i++ ) 01400 { 01401 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01402 { 01403 if( exportProc[i] == neighbourProc[j] ) 01404 { 01405 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j]; 01406 numVtxPerProcSendPACKED[j]++; 01407 MBMesquite::MsqVertex coordinates; 01408 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err ); 01409 MSQ_ERRZERO( err ); 01410 packing_vertex->x = coordinates[0]; 01411 packing_vertex->y = coordinates[1]; 01412 packing_vertex->z = coordinates[2]; 01413 packing_vertex->glob_id = exportVtxGIDs[i]; 01414 } 01415 } 01416 } 01417 01418 /* wait until we have heard from all processors how many vertices we will receive from them */ 01419 01420 std::vector< MPI_Status > status( neighbourProc.size() ); 01421 01422 if( num_neighbourProcRecv ) 01423 { 01424 rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_recv ), ARRPTR( status ) ); 01425 CHECK_MPI_RZERO( rval, err ); 01426 } 01427 01428 /* how many vertices will we receive */ 01429 01430 int numVtxImport = 0; 01431 for( i = 0; i < (long)neighbourProc.size(); i++ ) 01432 { 01433 numVtxImport += numVtxPerProcRecv[i]; 01434 } 01435 01436 /* set up memory for the incoming vertex data blocks */ 01437 01438 std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */ 01439 std::vector< VertexPack* > packed_vertices_import( neighbourProc.size() ); 01440 if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import ); 01441 for( i = 1; i < (long)neighbourProc.size(); i++ ) 01442 { 01443 packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecv[i - 1]; 01444 } 01445 01446 /* post receives for all processors that have something for us */ 01447 /* post sends for all processors that we have something for */ 01448 01449 num_neighbourProcRecv = 0; 01450 01451 for( i = 0; i < (long)neighbourProc.size(); i++ ) 01452 { 01453 if( numVtxPerProcRecv[i] ) 01454 { 01455 if( 0 ) 01456 { 01457 printf( "[%d]i%d Will recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i], 01458 neighbourProc[i] ); 01459 fflush( NULL ); 01460 } 01461 rval = 01462 MPI_Irecv( packed_vertices_import[i], 4 * numVtxPerProcRecv[i], MPI_DOUBLE_PRECISION, neighbourProc[i], 01463 VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_recv[i] ) ); 01464 CHECK_MPI_RZERO( rval, err ); 01465 num_neighbourProcRecv++; 01466 } 01467 else 01468 { 01469 requests_recv[i] = MPI_REQUEST_NULL; 01470 } 01471 if( numVtxPerProcSend[i] ) 01472 { 01473 if( 0 ) 01474 { 01475 printf( "[%d]i%d Will send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[i], 01476 neighbourProc[i] ); 01477 fflush( NULL ); 01478 } 01479 rval = 01480 MPI_Isend( packed_vertices_export[i], 4 * numVtxPerProcSend[i], MPI_DOUBLE_PRECISION, neighbourProc[i], 01481 VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[i] ) ); 01482 CHECK_MPI_RZERO( rval, err ); 01483 } 01484 else 01485 { 01486 requests_send[i] = MPI_REQUEST_NULL; 01487 } 01488 } 01489 01490 /* wait for some receive to arrive */ 01491 01492 int local_id; 01493 while( num_neighbourProcRecv ) 01494 { 01495 rval = MPI_Waitany( neighbourProc.size(), ARRPTR( requests_recv ), &k, ARRPTR( status ) ); 01496 CHECK_MPI_RZERO( rval, err ); 01497 /* unpack all vertices */ 01498 for( i = 0; i < numVtxPerProcRecv[k]; i++ ) 01499 { 01500 local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProc[k] ); 01501 if( local_id ) 01502 { 01503 MBMesquite::Vector3D coordinates; 01504 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y, 01505 packed_vertices_import[k][i].z ); 01506 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err ); 01507 MSQ_ERRZERO( err ); 01508 assert( part_smoothed_flag[local_id] == 0 ); 01509 part_smoothed_flag[local_id] = 1; 01510 } 01511 else 01512 { 01513 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration, 01514 packed_vertices_import[k][i].glob_id, neighbourProc[k] ); 01515 } 01516 } 01517 num_neighbourProcRecv--; 01518 } 01519 01520 /* all receives have completed. it is save to release the memory */ 01521 // free(vertex_pack_import); 01522 01523 /* wait until the sends have completed */ 01524 01525 if( num_neighbourProcSend ) 01526 { 01527 rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( status ) ); 01528 CHECK_MPI_RZERO( rval, err ); 01529 } 01530 01531 /* all sends have completed. it is save to release the memory */ 01532 // free(vertex_pack_export); 01533 01534 return numVtxImport; 01535 } 01536 01537 int ParallelHelperImpl::comm_smoothed_vtx_tnb_no_all( MsqError& err ) 01538 { 01539 int i, j, k, rval; 01540 01541 // printf("[%d]i%d truly non blocking avoid reduce all\n", rank, iteration); fflush(NULL); 01542 01543 /* compute how many vertices we send to each processor */ 01544 01545 std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 ); 01546 std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 ); 01547 for( i = 0; i < num_exportVtx; i++ ) 01548 { 01549 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01550 { 01551 if( exportProc[i] == neighbourProc[j] ) 01552 { 01553 /* increment count */ 01554 numVtxPerProcSend[j]++; 01555 /* end loop */ 01556 break; 01557 } 01558 } 01559 /* did loop end without finding the processor */ 01560 if( j == (long)neighbourProc.size() ) 01561 { 01562 printf( "[%d]i%d WARNING: did not find exportProc[%d] = %d in list of %lu processors.\n", rank, iteration, 01563 i, exportProc[i], (unsigned long)neighbourProc.size() ); 01564 } 01565 } 01566 01567 /* tell each processor how many vertices they can expect from us */ 01568 /* also ask each processor how many vertices we can expect from them */ 01569 01570 int num_neighbourProcSend = 0; 01571 int num_neighbourProcRecv = 0; 01572 std::vector< MPI_Request > requests_send( neighbourProc.size() ); 01573 std::vector< MPI_Request > requests_recv( neighbourProc.size() ); 01574 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01575 { 01576 if( neighbourProcSendRemain[j] ) 01577 { 01578 /* send the vertex count to this processor */ 01579 if( 0 ) 01580 { 01581 printf( "[%d]i%d Announce send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j], 01582 neighbourProc[j] ); 01583 fflush( NULL ); 01584 } 01585 rval = MPI_Isend( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 01586 (MPI_Comm)communicator, &( requests_send[j] ) ); 01587 CHECK_MPI_RZERO( rval, err ); 01588 num_neighbourProcSend++; 01589 } 01590 else 01591 { 01592 requests_send[j] = MPI_REQUEST_NULL; 01593 } 01594 if( neighbourProcRecvRemain[j] ) 01595 { 01596 /* recv the vertex count for this processor */ 01597 if( 0 ) 01598 { 01599 printf( "[%d]i%d Listen recv xx vertices from proc %d\n", rank, iteration, neighbourProc[j] ); 01600 fflush( NULL ); 01601 } 01602 rval = MPI_Irecv( &( numVtxPerProcRecv[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 01603 (MPI_Comm)communicator, &( requests_recv[j] ) ); 01604 CHECK_MPI_RZERO( rval, err ); 01605 num_neighbourProcRecv++; 01606 } 01607 else 01608 { 01609 requests_recv[j] = MPI_REQUEST_NULL; 01610 } 01611 } 01612 01613 /* set up memory for the outgoing vertex data blocks */ 01614 01615 std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */ 01616 std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() ); 01617 if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export ); 01618 for( i = 1; i < (long)neighbourProc.size(); i++ ) 01619 { 01620 packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1]; 01621 } 01622 01623 /* place vertex data going to the same processor into consecutive memory space */ 01624 01625 std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 ); 01626 for( i = 0; i < num_exportVtx; i++ ) 01627 { 01628 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01629 { 01630 if( exportProc[i] == neighbourProc[j] ) 01631 { 01632 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j]; 01633 numVtxPerProcSendPACKED[j]++; 01634 MBMesquite::MsqVertex coordinates; 01635 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err ); 01636 MSQ_ERRZERO( err ); 01637 packing_vertex->x = coordinates[0]; 01638 packing_vertex->y = coordinates[1]; 01639 packing_vertex->z = coordinates[2]; 01640 packing_vertex->glob_id = exportVtxGIDs[i]; 01641 if( 0 ) 01642 printf( "[%d]i%d vertex %lu packed %g %g %g\n", rank, iteration, (unsigned long)exportVtxGIDs[i], 01643 packing_vertex->x, packing_vertex->y, packing_vertex->z ); 01644 } 01645 } 01646 } 01647 01648 /* wait until we have heard from all processors how many vertices we will receive from them */ 01649 01650 std::vector< MPI_Status > status( neighbourProc.size() ); 01651 01652 if( num_neighbourProcRecv ) 01653 { 01654 rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_recv ), ARRPTR( status ) ); 01655 CHECK_MPI_RZERO( rval, err ); 01656 } 01657 01658 /* how many vertices will we receive */ 01659 01660 int numVtxImport = 0; 01661 for( i = 0; i < (long)neighbourProc.size(); i++ ) 01662 { 01663 numVtxImport += numVtxPerProcRecv[i]; 01664 neighbourProcRecvRemain[i] -= numVtxPerProcRecv[i]; 01665 neighbourProcSendRemain[i] -= numVtxPerProcSend[i]; 01666 } 01667 01668 /* set up memory for the incoming vertex data blocks */ 01669 01670 std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */ 01671 std::vector< VertexPack* > packed_vertices_import( neighbourProc.size() ); 01672 if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import ); 01673 for( i = 1; i < (long)neighbourProc.size(); i++ ) 01674 { 01675 packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecv[i - 1]; 01676 } 01677 01678 /* post receives for all processors that have something for us */ 01679 /* post sends for all processors that we have something for */ 01680 01681 num_neighbourProcRecv = 0; 01682 01683 for( i = 0; i < (long)neighbourProc.size(); i++ ) 01684 { 01685 if( 0 ) 01686 { 01687 printf( "[%d]i%d Will recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i], 01688 neighbourProc[i] ); 01689 fflush( NULL ); 01690 } 01691 if( numVtxPerProcRecv[i] ) 01692 { 01693 rval = 01694 MPI_Irecv( packed_vertices_import[i], 4 * numVtxPerProcRecv[i], MPI_DOUBLE_PRECISION, neighbourProc[i], 01695 VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_recv[i] ) ); 01696 CHECK_MPI_RZERO( rval, err ); 01697 num_neighbourProcRecv++; 01698 } 01699 else 01700 { 01701 requests_recv[i] = MPI_REQUEST_NULL; 01702 } 01703 if( 0 ) 01704 { 01705 printf( "[%d]i%d Will send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[i], 01706 neighbourProc[i] ); 01707 fflush( NULL ); 01708 } 01709 if( numVtxPerProcSend[i] ) 01710 { 01711 rval = 01712 MPI_Isend( packed_vertices_export[i], 4 * numVtxPerProcSend[i], MPI_DOUBLE_PRECISION, neighbourProc[i], 01713 VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[i] ) ); 01714 CHECK_MPI_RZERO( rval, err ); 01715 } 01716 else 01717 { 01718 requests_send[i] = MPI_REQUEST_NULL; 01719 } 01720 } 01721 01722 /* wait for some receive to arrive */ 01723 01724 int local_id; 01725 while( num_neighbourProcRecv ) 01726 { 01727 rval = MPI_Waitany( neighbourProc.size(), ARRPTR( requests_recv ), &k, ARRPTR( status ) ); 01728 CHECK_MPI_RZERO( rval, err ); 01729 /* unpack all vertices */ 01730 for( i = 0; i < numVtxPerProcRecv[k]; i++ ) 01731 { 01732 local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProc[k] ); 01733 if( local_id ) 01734 { 01735 MBMesquite::Vector3D coordinates; 01736 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y, 01737 packed_vertices_import[k][i].z ); 01738 if( 0 ) 01739 printf( "[%d]i%d vertex %d becomes %g %g %g\n", rank, iteration, local_id, 01740 packed_vertices_import[k][i].x, packed_vertices_import[k][i].y, 01741 packed_vertices_import[k][i].z ); 01742 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err ); 01743 MSQ_ERRZERO( err ); 01744 assert( part_smoothed_flag[local_id] == 0 ); 01745 part_smoothed_flag[local_id] = 1; 01746 } 01747 else 01748 { 01749 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration, 01750 packed_vertices_import[k][i].glob_id, neighbourProc[k] ); 01751 } 01752 } 01753 num_neighbourProcRecv--; 01754 } 01755 01756 /* all receives have completed. it is save to release the memory */ 01757 // free(vertex_pack_import); 01758 01759 /* wait until the sends have completed */ 01760 01761 if( num_neighbourProcSend ) 01762 { 01763 rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( status ) ); 01764 CHECK_MPI_RZERO( rval, err ); 01765 } 01766 01767 /* all sends have completed. it is save to release the memory */ 01768 // free(vertex_pack_export); 01769 01770 return numVtxImport; 01771 } 01772 01773 int ParallelHelperImpl::comm_smoothed_vtx_nb( MsqError& err ) 01774 { 01775 int i, j, k, rval; 01776 01777 // printf("[%d] %d %d non blocking\n",rank, iteration, pass);fflush(NULL); 01778 01779 /* how many vertices will we receive */ 01780 01781 std::vector< int > numVtxPerProcSend( neighbourProc.size() ); 01782 for( i = 0; i < (long)neighbourProc.size(); i++ ) 01783 { 01784 numVtxPerProcSend[i] = 0; 01785 } 01786 for( i = 0; i < num_exportVtx; i++ ) 01787 { 01788 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01789 { 01790 if( exportProc[i] == neighbourProc[j] ) 01791 { 01792 /* increment count */ 01793 numVtxPerProcSend[j]++; 01794 /* end loop */ 01795 break; 01796 } 01797 } 01798 /* assert loop did not end without finding processor */ 01799 assert( j != (long)neighbourProc.size() ); 01800 } 01801 01802 /* tell each processor how many vertices to expect */ 01803 01804 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01805 { 01806 rval = MPI_Send( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 01807 (MPI_Comm)communicator ); 01808 CHECK_MPI_RZERO( rval, err ); 01809 // printf("[%d]i%d Announcing %d vertices to proc 01810 // %d\n",rank,iteration,numVtxPerProcSend[j],neighbourProc[j]); fflush(NULL); 01811 } 01812 01813 /* place vertex data going to the same processor into consecutive memory space */ 01814 std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */ 01815 std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() ); 01816 if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export ); 01817 for( i = 1; i < (long)neighbourProc.size(); i++ ) 01818 { 01819 packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1]; 01820 } 01821 01822 std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 ); 01823 for( i = 0; i < num_exportVtx; i++ ) 01824 { 01825 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01826 { 01827 if( exportProc[i] == neighbourProc[j] ) 01828 { 01829 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j]; 01830 numVtxPerProcSendPACKED[j]++; 01831 MBMesquite::MsqVertex coordinates; 01832 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err ); 01833 MSQ_ERRZERO( err ); 01834 packing_vertex->x = coordinates[0]; 01835 packing_vertex->y = coordinates[1]; 01836 packing_vertex->z = coordinates[2]; 01837 packing_vertex->glob_id = exportVtxGIDs[i]; 01838 } 01839 } 01840 } 01841 // delete [] numVtxPerProcSendPACKED; 01842 01843 /* now ask each processor how many vertices to expect */ 01844 01845 int num; 01846 int proc; 01847 int numVtxImport = 0; 01848 int num_neighbourProcRecv = 0; 01849 std::vector< int > numVtxPerProcRecv( neighbourProc.size() ); 01850 MPI_Status status; 01851 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01852 { 01853 numVtxPerProcRecv[j] = 0; 01854 } 01855 01856 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01857 { 01858 /* get the vertex count for some processor */ 01859 rval = MPI_Recv( &num, /* message buffer */ 01860 1, /* one data item */ 01861 MPI_INT, /* of type int */ 01862 MPI_ANY_SOURCE, /* receive from any sender */ 01863 VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs from this iteration */ 01864 (MPI_Comm)communicator, /* default communicator */ 01865 &status ); /* info about the received message */ 01866 CHECK_MPI_RZERO( rval, err ); 01867 proc = status.MPI_SOURCE; 01868 /* will we import vertices from this processor */ 01869 01870 // printf("[%d]i%d Heard we will receive %d vertices from proc 01871 // %d\n",rank,iteration,num,proc); fflush(NULL); 01872 01873 if( num ) 01874 { 01875 /* increase number of processors we will receive from */ 01876 num_neighbourProcRecv++; 01877 /* add number of vertices we will receive to the import total */ 01878 numVtxImport += num; 01879 } 01880 /* find this processor in the list */ 01881 for( i = 0; i < (long)neighbourProc.size(); i++ ) 01882 { 01883 if( neighbourProc[i] == proc ) 01884 { 01885 numVtxPerProcRecv[i] = num; 01886 break; 01887 } 01888 } 01889 /* assert loop did not end without finding processor */ 01890 assert( i != (long)neighbourProc.size() ); 01891 if( 0 ) printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, num, proc ); 01892 fflush( NULL ); 01893 } 01894 01895 /* create list of processors we receive from */ 01896 std::vector< int > neighbourProcRecv( num_neighbourProcRecv ); 01897 std::vector< int > numVtxPerProcRecvRecv( num_neighbourProcRecv ); 01898 for( i = 0, k = 0; i < (long)neighbourProc.size(); i++ ) 01899 { 01900 if( 0 ) 01901 printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i], 01902 neighbourProc[i] ); 01903 fflush( NULL ); 01904 if( numVtxPerProcRecv[i] ) 01905 { 01906 neighbourProcRecv[k] = neighbourProc[i]; 01907 numVtxPerProcRecvRecv[k] = numVtxPerProcRecv[i]; 01908 k++; 01909 } 01910 } 01911 01912 /* set up memory for the incoming vertex data blocks */ 01913 std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */ 01914 std::vector< VertexPack* > packed_vertices_import( num_neighbourProcRecv ); 01915 if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import ); 01916 for( i = 1; i < num_neighbourProcRecv; i++ ) 01917 { 01918 packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecvRecv[i - 1]; 01919 } 01920 01921 /* receive from all processors that have something for us */ 01922 std::vector< MPI_Request > request( num_neighbourProcRecv ); 01923 for( j = 0; j < num_neighbourProcRecv; j++ ) 01924 { 01925 rval = MPI_Irecv( packed_vertices_import[j], 4 * numVtxPerProcRecvRecv[j], MPI_DOUBLE_PRECISION, 01926 neighbourProcRecv[j], VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( request[j] ) ); 01927 CHECK_MPI_RZERO( rval, err ); 01928 if( 0 ) 01929 { 01930 printf( "[%d]i%d Scheduling receipt of %d vertices to proc %d\n", rank, iteration, numVtxPerProcRecvRecv[j], 01931 neighbourProcRecv[j] ); 01932 fflush( NULL ); 01933 } 01934 } 01935 01936 /* now send the data blocks */ 01937 01938 std::vector< MPI_Request > requests_send( neighbourProc.size() ); 01939 for( j = 0; j < (long)neighbourProc.size(); j++ ) 01940 { 01941 if( numVtxPerProcSend[j] ) 01942 { 01943 rval = 01944 MPI_Isend( packed_vertices_export[j], 4 * numVtxPerProcSend[j], MPI_DOUBLE_PRECISION, neighbourProc[j], 01945 VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[j] ) ); 01946 CHECK_MPI_RZERO( rval, err ); 01947 if( 0 ) 01948 { 01949 printf( "[%d]i%d Scheduling send of %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j], 01950 neighbourProc[j] ); 01951 fflush( NULL ); 01952 } 01953 } 01954 else 01955 { 01956 requests_send[j] = MPI_REQUEST_NULL; 01957 } 01958 } 01959 01960 /* process messages as they arrive */ 01961 01962 int local_id; 01963 for( j = 0; j < num_neighbourProcRecv; j++ ) 01964 { 01965 rval = MPI_Waitany( num_neighbourProcRecv, ARRPTR( request ), &k, &status ); 01966 CHECK_MPI_RZERO( rval, err ); 01967 01968 /* unpack messages */ 01969 proc = status.MPI_SOURCE; 01970 int count; 01971 MPI_Get_count( &status, MPI_INT, &count ); 01972 if( 0 ) 01973 printf( "[%d]i%d Received %d (%d) vertices from proc %d (%d)\n", rank, iteration, numVtxPerProcRecvRecv[k], 01974 count, neighbourProcRecv[k], proc ); 01975 fflush( NULL ); 01976 for( i = 0; i < numVtxPerProcRecvRecv[k]; i++ ) 01977 { 01978 local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] ); 01979 if( local_id ) 01980 { 01981 MBMesquite::Vector3D coordinates; 01982 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y, 01983 packed_vertices_import[k][i].z ); 01984 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err ); 01985 MSQ_ERRZERO( err ); 01986 assert( part_smoothed_flag[local_id] == 0 ); 01987 part_smoothed_flag[local_id] = 1; 01988 if( 0 ) 01989 printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration, 01990 (int)( packed_vertices_import[k][i].glob_id ), packed_vertices_import[k][i].x, 01991 packed_vertices_import[k][i].y, packed_vertices_import[k][i].z ); 01992 } 01993 else 01994 { 01995 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration, 01996 packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] ); 01997 } 01998 } 01999 } 02000 02001 /* all receives have completed. it is save to release the memory */ 02002 // free(vertex_pack_import); 02003 /* wait until the sends have completed */ 02004 std::vector< MPI_Status > stati( neighbourProc.size() ); 02005 rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( stati ) ); 02006 CHECK_MPI_RZERO( rval, err ); 02007 /* all sends have completed. it is save to release the memory */ 02008 // free(vertex_pack_export); 02009 02010 return numVtxImport; 02011 } 02012 02013 int ParallelHelperImpl::comm_smoothed_vtx_nb_no_all( MsqError& err ) 02014 { 02015 int i, j, k, rval; 02016 02017 // printf("[%d] %d %d non blocking avoid reduce all\n",rank, iteration, pass);fflush(NULL); 02018 02019 /* how many vertices will we receive */ 02020 02021 std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 ); 02022 for( i = 0; i < num_exportVtx; i++ ) 02023 { 02024 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02025 { 02026 if( exportProc[i] == neighbourProc[j] ) 02027 { 02028 /* increment count */ 02029 numVtxPerProcSend[j]++; 02030 /* end loop */ 02031 break; 02032 } 02033 } 02034 /* assert loop did not end without finding processor */ 02035 assert( j != (long)neighbourProc.size() ); 02036 } 02037 02038 /* tell each processor how many vertices to expect */ 02039 02040 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02041 { 02042 if( neighbourProcSendRemain[j] ) 02043 { 02044 assert( neighbourProcSendRemain[j] >= numVtxPerProcSend[j] ); 02045 neighbourProcSendRemain[j] -= numVtxPerProcSend[j]; 02046 rval = MPI_Send( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 02047 (MPI_Comm)communicator ); 02048 CHECK_MPI_RZERO( rval, err ); 02049 // printf("[%d]i%d Announcing %d vertices to proc 02050 // %d\n",rank,iteration,numVtxPerProcSend[j],neighbourProc[j]); fflush(NULL); 02051 } 02052 else 02053 { 02054 assert( numVtxPerProcSend[j] == 0 ); 02055 } 02056 } 02057 02058 /* place vertex data going to the same processor into consecutive memory space */ 02059 std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */ 02060 std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() ); 02061 if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export ); 02062 for( i = 1; i < (long)neighbourProc.size(); i++ ) 02063 { 02064 packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1]; 02065 } 02066 02067 std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 ); 02068 for( i = 0; i < num_exportVtx; i++ ) 02069 { 02070 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02071 { 02072 if( exportProc[i] == neighbourProc[j] ) 02073 { 02074 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j]; 02075 numVtxPerProcSendPACKED[j]++; 02076 MBMesquite::MsqVertex coordinates; 02077 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err ); 02078 MSQ_ERRZERO( err ); 02079 packing_vertex->x = coordinates[0]; 02080 packing_vertex->y = coordinates[1]; 02081 packing_vertex->z = coordinates[2]; 02082 packing_vertex->glob_id = exportVtxGIDs[i]; 02083 } 02084 } 02085 } 02086 // delete [] numVtxPerProcSendPACKED; 02087 02088 /* now ask each processor how many vertices to expect */ 02089 02090 int num; 02091 int proc; 02092 int numVtxImport = 0; 02093 int num_neighbourProcRecv = 0; 02094 std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 ); 02095 MPI_Status status; 02096 02097 int num_neighbourProcRecvRemain = 0; 02098 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02099 { 02100 if( neighbourProcRecvRemain[j] ) { num_neighbourProcRecvRemain++; } 02101 } 02102 for( j = 0; j < num_neighbourProcRecvRemain; j++ ) 02103 { 02104 /* get the vertex count for some processor */ 02105 rval = MPI_Recv( &num, /* message buffer */ 02106 1, /* one data item */ 02107 MPI_INT, /* of type int */ 02108 MPI_ANY_SOURCE, /* receive from any sender */ 02109 VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs from this iteration */ 02110 (MPI_Comm)communicator, /* default communicator */ 02111 &status ); /* info about the received message */ 02112 CHECK_MPI_RZERO( rval, err ); 02113 proc = status.MPI_SOURCE; 02114 /* will we import vertices from this processor */ 02115 02116 // printf("[%d]i%d Heard we will receive %d vertices from proc 02117 // %d\n",rank,iteration,num,proc); fflush(NULL); 02118 02119 if( num ) 02120 { 02121 /* increase number of processors we will receive from */ 02122 num_neighbourProcRecv++; 02123 /* add number of vertices we will receive to the import total */ 02124 numVtxImport += num; 02125 } 02126 /* find this processor in the list */ 02127 for( i = 0; i < (long)neighbourProc.size(); i++ ) 02128 { 02129 if( neighbourProc[i] == proc ) 02130 { 02131 numVtxPerProcRecv[i] = num; 02132 assert( neighbourProcRecvRemain[i] >= num ); 02133 neighbourProcRecvRemain[i] -= num; 02134 break; 02135 } 02136 } 02137 /* assert loop did not end without finding processor */ 02138 assert( i != (long)neighbourProc.size() ); 02139 if( 0 ) printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, num, proc ); 02140 fflush( NULL ); 02141 } 02142 02143 /* create list of processors we receive from */ 02144 std::vector< int > neighbourProcRecv( num_neighbourProcRecv ); 02145 std::vector< int > numVtxPerProcRecvRecv( num_neighbourProcRecv ); 02146 for( i = 0, k = 0; i < (long)neighbourProc.size(); i++ ) 02147 { 02148 if( 0 ) 02149 printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i], 02150 neighbourProc[i] ); 02151 fflush( NULL ); 02152 if( numVtxPerProcRecv[i] ) 02153 { 02154 neighbourProcRecv[k] = neighbourProc[i]; 02155 numVtxPerProcRecvRecv[k] = numVtxPerProcRecv[i]; 02156 k++; 02157 } 02158 } 02159 02160 /* set up memory for the incoming vertex data blocks */ 02161 std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */ 02162 std::vector< VertexPack* > packed_vertices_import( num_neighbourProcRecv ); 02163 if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import ); 02164 for( i = 1; i < num_neighbourProcRecv; i++ ) 02165 { 02166 packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecvRecv[i - 1]; 02167 } 02168 02169 /* receive from all processors that have something for us */ 02170 std::vector< MPI_Request > request( num_neighbourProcRecv ); 02171 for( j = 0; j < num_neighbourProcRecv; j++ ) 02172 { 02173 rval = MPI_Irecv( packed_vertices_import[j], 4 * numVtxPerProcRecvRecv[j], MPI_DOUBLE_PRECISION, 02174 neighbourProcRecv[j], VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( request[j] ) ); 02175 CHECK_MPI_RZERO( rval, err ); 02176 if( 0 ) 02177 { 02178 printf( "[%d]i%d Scheduling receipt of %d vertices to proc %d\n", rank, iteration, numVtxPerProcRecvRecv[j], 02179 neighbourProcRecv[j] ); 02180 fflush( NULL ); 02181 } 02182 } 02183 02184 /* now send the data blocks */ 02185 02186 std::vector< MPI_Request > requests_send( neighbourProc.size() ); 02187 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02188 { 02189 if( numVtxPerProcSend[j] ) 02190 { 02191 rval = 02192 MPI_Isend( packed_vertices_export[j], 4 * numVtxPerProcSend[j], MPI_DOUBLE_PRECISION, neighbourProc[j], 02193 VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[j] ) ); 02194 CHECK_MPI_RZERO( rval, err ); 02195 if( 0 ) 02196 { 02197 printf( "[%d]i%d Scheduling send of %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j], 02198 neighbourProc[j] ); 02199 fflush( NULL ); 02200 } 02201 } 02202 else 02203 { 02204 requests_send[j] = MPI_REQUEST_NULL; 02205 } 02206 } 02207 02208 /* process messages as they arrive */ 02209 02210 int local_id; 02211 for( j = 0; j < num_neighbourProcRecv; j++ ) 02212 { 02213 rval = MPI_Waitany( num_neighbourProcRecv, ARRPTR( request ), &k, &status ); 02214 CHECK_MPI_RZERO( rval, err ); 02215 02216 /* unpack messages */ 02217 proc = status.MPI_SOURCE; 02218 int count; 02219 MPI_Get_count( &status, MPI_INT, &count ); 02220 if( 0 ) 02221 printf( "[%d]i%d Received %d (%d) vertices from proc %d (%d)\n", rank, iteration, numVtxPerProcRecvRecv[k], 02222 count, neighbourProcRecv[k], proc ); 02223 fflush( NULL ); 02224 for( i = 0; i < numVtxPerProcRecvRecv[k]; i++ ) 02225 { 02226 local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] ); 02227 if( local_id ) 02228 { 02229 MBMesquite::Vector3D coordinates; 02230 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y, 02231 packed_vertices_import[k][i].z ); 02232 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err ); 02233 MSQ_ERRZERO( err ); 02234 assert( part_smoothed_flag[local_id] == 0 ); 02235 part_smoothed_flag[local_id] = 1; 02236 if( 0 ) 02237 printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration, 02238 (int)( packed_vertices_import[k][i].glob_id ), packed_vertices_import[k][i].x, 02239 packed_vertices_import[k][i].y, packed_vertices_import[k][i].z ); 02240 } 02241 else 02242 { 02243 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration, 02244 packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] ); 02245 } 02246 } 02247 } 02248 02249 /* all receives have completed. it is save to release the memory */ 02250 // free(vertex_pack_import); 02251 /* wait until the sends have completed */ 02252 std::vector< MPI_Status > stati( neighbourProc.size() ); 02253 rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( stati ) ); 02254 CHECK_MPI_RZERO( rval, err ); 02255 /* all sends have completed. it is save to release the memory */ 02256 // free(vertex_pack_export); 02257 02258 return numVtxImport; 02259 } 02260 02261 int ParallelHelperImpl::comm_smoothed_vtx_b( MsqError& err ) 02262 { 02263 int i, j, rval; 02264 02265 // printf("[%d] %d %d blocking\n",rank, iteration, pass);fflush(NULL); 02266 02267 /* how many vertices per processor */ 02268 02269 std::vector< int > numVtxPerProc( neighbourProc.size(), 0 ); 02270 for( i = 0; i < num_exportVtx; i++ ) 02271 { 02272 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02273 { 02274 if( exportProc[i] == neighbourProc[j] ) 02275 { 02276 /* increment count */ 02277 numVtxPerProc[j]++; 02278 /* end loop */ 02279 break; 02280 } 02281 /* assert loop did not end without finding the processor */ 02282 assert( j != (long)neighbourProc.size() ); 02283 } 02284 } 02285 02286 /* place vertices going to the same processor into consecutive memory space */ 02287 02288 std::vector< VertexPack > vertex_pack( num_exportVtx + 10 ); /* add 10 to have enough memory */ 02289 std::vector< VertexPack* > packed_vertices( neighbourProc.size() ); 02290 VertexPack* packing_vertex; 02291 packed_vertices[0] = ARRPTR( vertex_pack ); 02292 for( i = 1; i < (long)neighbourProc.size(); i++ ) 02293 { 02294 packed_vertices[i] = packed_vertices[i - 1] + numVtxPerProc[i - 1]; 02295 } 02296 02297 std::vector< int > numVtxPackedPerProc( neighbourProc.size(), 0 ); 02298 02299 for( i = 0; i < num_exportVtx; i++ ) 02300 { 02301 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02302 { 02303 if( exportProc[i] == neighbourProc[j] ) 02304 { 02305 packing_vertex = packed_vertices[j] + numVtxPackedPerProc[j]; 02306 numVtxPackedPerProc[j]++; 02307 MBMesquite::MsqVertex coordinates; 02308 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err ); 02309 MSQ_ERRZERO( err ); 02310 packing_vertex->x = coordinates[0]; 02311 packing_vertex->y = coordinates[1]; 02312 packing_vertex->z = coordinates[2]; 02313 packing_vertex->glob_id = exportVtxGIDs[i]; 02314 } 02315 } 02316 } 02317 02318 // delete [] numVtxPackedPerProc; 02319 02320 /* send each block so the corresponding processor preceeded by the number of vertices */ 02321 02322 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02323 { 02324 02325 // printf("[%d]i%dp%d Announcing %d vertices to proc 02326 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL); 02327 02328 rval = MPI_Send( &( numVtxPerProc[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 02329 (MPI_Comm)communicator ); 02330 CHECK_MPI_RZERO( rval, err ); 02331 // printf("[%d]i%dp%d Sending %d vertices to proc 02332 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL); 02333 02334 /* is there any vertex data to be sent */ 02335 02336 if( numVtxPerProc[j] ) 02337 { 02338 rval = MPI_Send( packed_vertices[j], 4 * numVtxPerProc[j], MPI_DOUBLE_PRECISION, neighbourProc[j], 02339 VERTEX_BLOCK + iteration, (MPI_Comm)communicator ); 02340 CHECK_MPI_RZERO( rval, err ); 02341 // printf("[%d]i%dp%d Sent %d vertices to proc 02342 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL); 02343 } 02344 } 02345 02346 int num; 02347 int proc; 02348 // int tag; 02349 int count; 02350 int numVtxImport = 0; 02351 MPI_Status status; 02352 int local_id; 02353 02354 // printf("[%d]i%dp%d Waiting to receive vertices from %d processors ... 02355 // \n",rank,iteration,pass,neighbourProc.size()); fflush(NULL); 02356 02357 /* receiving blocks from other processors */ 02358 02359 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02360 { 02361 rval = MPI_Recv( &num, /* message buffer */ 02362 1, /* one data item */ 02363 MPI_INT, /* of type int */ 02364 MPI_ANY_SOURCE, /* receive from any sender */ 02365 VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs */ 02366 (MPI_Comm)communicator, /* default communicator */ 02367 &status ); /* info about the received message */ 02368 CHECK_MPI_RZERO( rval, err ); 02369 proc = status.MPI_SOURCE; 02370 // tag = status.MPI_TAG; 02371 MPI_Get_count( &status, MPI_INT, &count ); 02372 02373 // printf("[%d]i%dp%d Receiving %d vertices from proc 02374 // %d/%d/%d\n",rank,iteration,pass,num,proc,tag,count); fflush(NULL); 02375 02376 /* is there any vertex data to be received */ 02377 02378 if( num ) 02379 { 02380 02381 numVtxImport += num; 02382 02383 /* do we have enough space allocated */ 02384 02385 if( num_exportVtx + 10 < num ) 02386 { 02387 // if (vertex_pack) free(vertex_pack); 02388 num_exportVtx = num; 02389 vertex_pack.resize( num_exportVtx + 10 ); 02390 // vertex_pack = (VertexPack*)malloc(sizeof(VertexPack)*(num_exportVtx+10)); 02391 } 02392 02393 rval = MPI_Recv( ARRPTR( vertex_pack ), /* message buffer */ 02394 4 * num, /* num data's item with 4 doubles each */ 02395 MPI_DOUBLE_PRECISION, /* of type double */ 02396 proc, /* receive from this procesor only */ 02397 VERTEX_BLOCK + iteration, /* receive only VERTEX BLOCKs */ 02398 (MPI_Comm)communicator, /* default communicator */ 02399 &status ); /* info about the received message */ 02400 CHECK_MPI_RZERO( rval, err ); 02401 02402 proc = status.MPI_SOURCE; 02403 // tag = status.MPI_TAG; 02404 MPI_Get_count( &status, MPI_DOUBLE_PRECISION, &count ); 02405 02406 if( count != 4 * num ) 02407 printf( "[%d]i%d WARNING: expected %d vertices = %d bytes from proc %d but only " 02408 "got %d bytes\n", 02409 rank, iteration, num, num * 4, proc, count ); 02410 fflush( NULL ); 02411 02412 // printf("[%d]i%d Received %d vertices from proc 02413 // %d/%d/%d\n",rank,iteration,num,proc,tag,count); fflush(NULL); 02414 02415 /* update the received vertices in our boundary mesh */ 02416 for( i = 0; i < num; i++ ) 02417 { 02418 /* printf("[%d]i%d updating vertex %d with global_id 02419 * %d\n",rank,iteration,i,(int)(vertex_pack[i].glob_id)); fflush(NULL); */ 02420 local_id = vertex_map_find( vid_map, (int)( vertex_pack[i].glob_id ), proc ); 02421 if( local_id ) 02422 { 02423 MBMesquite::Vector3D coordinates; 02424 coordinates.set( vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z ); 02425 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err ); 02426 MSQ_ERRZERO( err ); 02427 assert( part_smoothed_flag[local_id] == 0 ); 02428 part_smoothed_flag[local_id] = 1; 02429 if( 0 ) 02430 printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration, 02431 (int)( vertex_pack[i].glob_id ), vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z ); 02432 } 02433 else 02434 { 02435 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration, 02436 vertex_pack[i].glob_id, proc ); 02437 } 02438 } 02439 } 02440 } 02441 // if (vertex_pack) free(vertex_pack); 02442 02443 return numVtxImport; 02444 } 02445 02446 int ParallelHelperImpl::comm_smoothed_vtx_b_no_all( MsqError& err ) 02447 { 02448 int i, j, rval; 02449 02450 // printf("[%d] %d %d blocking avoid reduce all\n",rank, iteration, pass);fflush(NULL); 02451 02452 /* how many vertices per processor */ 02453 02454 std::vector< int > numVtxPerProc( neighbourProc.size(), 0 ); 02455 for( i = 0; i < num_exportVtx; i++ ) 02456 { 02457 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02458 { 02459 if( exportProc[i] == neighbourProc[j] ) 02460 { 02461 /* increment count */ 02462 numVtxPerProc[j]++; 02463 /* end loop */ 02464 break; 02465 } 02466 /* assert loop did not end without finding the processor */ 02467 assert( j != (long)neighbourProc.size() ); 02468 } 02469 } 02470 02471 /* place vertices going to the same processor into consecutive memory space */ 02472 02473 std::vector< VertexPack > vertex_pack( num_exportVtx + 10 ); /* add 10 to have enough memory */ 02474 std::vector< VertexPack* > packed_vertices( neighbourProc.size() ); 02475 VertexPack* packing_vertex; 02476 packed_vertices[0] = ARRPTR( vertex_pack ); 02477 for( i = 1; i < (long)neighbourProc.size(); i++ ) 02478 { 02479 packed_vertices[i] = packed_vertices[i - 1] + numVtxPerProc[i - 1]; 02480 } 02481 02482 std::vector< int > numVtxPackedPerProc( neighbourProc.size(), 0 ); 02483 02484 for( i = 0; i < num_exportVtx; i++ ) 02485 { 02486 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02487 { 02488 if( exportProc[i] == neighbourProc[j] ) 02489 { 02490 packing_vertex = packed_vertices[j] + numVtxPackedPerProc[j]; 02491 numVtxPackedPerProc[j]++; 02492 MBMesquite::MsqVertex coordinates; 02493 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err ); 02494 MSQ_ERRZERO( err ); 02495 packing_vertex->x = coordinates[0]; 02496 packing_vertex->y = coordinates[1]; 02497 packing_vertex->z = coordinates[2]; 02498 packing_vertex->glob_id = exportVtxGIDs[i]; 02499 } 02500 } 02501 } 02502 02503 // delete [] numVtxPackedPerProc; 02504 02505 /* send each block so the corresponding processor preceeded by the number of vertices */ 02506 02507 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02508 { 02509 02510 if( neighbourProcSendRemain[j] ) 02511 { 02512 assert( neighbourProcSendRemain[j] >= numVtxPerProc[j] ); 02513 neighbourProcSendRemain[j] -= numVtxPerProc[j]; 02514 02515 // printf("[%d]i%dp%d Announcing %d vertices to proc 02516 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL); 02517 02518 rval = MPI_Send( &( numVtxPerProc[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration, 02519 (MPI_Comm)communicator ); 02520 CHECK_MPI_RZERO( rval, err ); 02521 02522 // printf("[%d]i%dp%d Sending %d vertices to proc 02523 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL); 02524 02525 /* is there any vertex data to be sent */ 02526 02527 if( numVtxPerProc[j] ) 02528 { 02529 rval = MPI_Send( packed_vertices[j], 4 * numVtxPerProc[j], MPI_DOUBLE_PRECISION, neighbourProc[j], 02530 VERTEX_BLOCK + iteration, (MPI_Comm)communicator ); 02531 CHECK_MPI_RZERO( rval, err ); 02532 02533 // printf("[%d]i%dp%d Sent %d vertices to proc 02534 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL); 02535 } 02536 } 02537 else 02538 { 02539 // printf("[%d]i%dp%d no longer sending to %d\n",rank,iteration,neighbourProc[j]); 02540 // fflush(NULL); 02541 } 02542 } 02543 02544 int num; 02545 int proc; 02546 // int tag; 02547 int count; 02548 int numVtxImport = 0; 02549 MPI_Status status; 02550 int local_id; 02551 02552 // printf("[%d]i%dp%d Waiting to receive vertices from %d processors ... 02553 // \n",rank,iteration,pass,neighbourProc.size()); fflush(NULL); 02554 02555 /* receiving blocks in any order from other processors */ 02556 02557 int num_neighbourProcRecvRemain = 0; 02558 for( j = 0; j < (long)neighbourProc.size(); j++ ) 02559 { 02560 if( neighbourProcRecvRemain[j] ) { num_neighbourProcRecvRemain++; } 02561 } 02562 for( j = 0; j < num_neighbourProcRecvRemain; j++ ) 02563 { 02564 rval = MPI_Recv( &num, /* message buffer */ 02565 1, /* one data item */ 02566 MPI_INT, /* of type int */ 02567 MPI_ANY_SOURCE, /* receive from any sender */ 02568 VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs */ 02569 (MPI_Comm)communicator, /* default communicator */ 02570 &status ); /* info about the received message */ 02571 CHECK_MPI_RZERO( rval, err ); 02572 proc = status.MPI_SOURCE; 02573 // tag = status.MPI_TAG; 02574 MPI_Get_count( &status, MPI_INT, &count ); 02575 02576 // printf("[%d]i%dp%d Receiving %d vertices from proc 02577 // %d/%d/%d\n",rank,iteration,pass,num,proc,tag,count); fflush(NULL); 02578 02579 /* is there any vertex data to be received */ 02580 02581 if( num ) 02582 { 02583 02584 numVtxImport += num; 02585 02586 /* do we have enough space allocated */ 02587 02588 if( num_exportVtx + 10 < num ) 02589 { 02590 // if (vertex_pack) free(vertex_pack); 02591 num_exportVtx = num; 02592 // vertex_pack = (VertexPack*)malloc(sizeof(VertexPack)*(num_exportVtx+10)); 02593 vertex_pack.resize( num_exportVtx + 10 ); 02594 } 02595 02596 rval = MPI_Recv( ARRPTR( vertex_pack ), /* message buffer */ 02597 4 * num, /* num data's item with 4 doubles each */ 02598 MPI_DOUBLE_PRECISION, /* of type double */ 02599 proc, /* receive from this procesor only */ 02600 VERTEX_BLOCK + iteration, /* receive only VERTEX BLOCKs */ 02601 (MPI_Comm)communicator, /* default communicator */ 02602 &status ); /* info about the received message */ 02603 CHECK_MPI_RZERO( rval, err ); 02604 02605 proc = status.MPI_SOURCE; 02606 // tag = status.MPI_TAG; 02607 MPI_Get_count( &status, MPI_DOUBLE_PRECISION, &count ); 02608 02609 if( count != 4 * num ) 02610 printf( "[%d]i%d WARNING: expected %d vertices = %d bytes from proc %d but only " 02611 "got %d bytes\n", 02612 rank, iteration, num, num * 4, proc, count ); 02613 fflush( NULL ); 02614 02615 // printf("[%d]i%d Received %d vertices from proc 02616 // %d/%d/%d\n",rank,iteration,num,proc,tag,count); fflush(NULL); 02617 02618 /* update the received vertices in our boundary mesh */ 02619 for( i = 0; i < num; i++ ) 02620 { 02621 /* printf("[%d]i%d updating vertex %d with global_id 02622 * %d\n",rank,iteration,i,(int)(vertex_pack[i].glob_id)); fflush(NULL); */ 02623 local_id = vertex_map_find( vid_map, (int)( vertex_pack[i].glob_id ), proc ); 02624 if( local_id ) 02625 { 02626 MBMesquite::Vector3D coordinates; 02627 coordinates.set( vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z ); 02628 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err ); 02629 MSQ_ERRZERO( err ); 02630 assert( part_smoothed_flag[local_id] == 0 ); 02631 part_smoothed_flag[local_id] = 1; 02632 if( 0 && rank == 1 ) 02633 printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration, 02634 (int)( vertex_pack[i].glob_id ), vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z ); 02635 } 02636 else 02637 { 02638 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration, 02639 vertex_pack[i].glob_id, proc ); 02640 } 02641 } 02642 } 02643 for( i = 0; i < (long)neighbourProc.size(); i++ ) 02644 { 02645 if( proc == neighbourProc[i] ) 02646 { 02647 assert( neighbourProcRecvRemain[i] >= num ); 02648 neighbourProcRecvRemain[i] -= num; 02649 break; 02650 } 02651 } 02652 } 02653 // if (vertex_pack) free(vertex_pack); 02654 02655 return numVtxImport; 02656 } 02657 02658 /*********************************************************************** 02659 COMPUTING THE INDEPENDENT SET AND EXPORT INFORMATION 02660 ***********************************************************************/ 02661 /* the only thing that can prevent a vertex from being in the independent 02662 set is that is has an neighbouring vertex on a different processor that 02663 has not been smoothed yet and that has a larger random number */ 02664 02665 void ParallelHelperImpl::compute_independent_set() 02666 { 02667 int i, j, k, l; 02668 int incident_vtx; 02669 bool done; 02670 02671 for( i = 0; i < num_vtx_partition_boundary_local; i++ ) 02672 in_independent_set[i] = false; 02673 02674 bool found_more = true; 02675 int found_iter = 0; 02676 02677 num_exportVtx = 0; 02678 02679 while( found_more ) 02680 { 02681 found_iter++; 02682 found_more = false; 02683 for( i = 0; i < num_vtx_partition_boundary_local; i++ ) 02684 { 02685 /* if this vertex could become part of the independent set */ 02686 if( part_smoothed_flag[i] == 0 && in_independent_set[i] == false ) 02687 { 02688 /* assume it's in the independent set */ 02689 done = false; 02690 /* then loop over the neighbors it has on other processors */ 02691 for( j = 0; !done && j < (long)vtx_off_proc_list[i].size(); j++ ) 02692 { 02693 incident_vtx = vtx_off_proc_list[i][j]; 02694 /* if this neighbour has not yet been smoothed and is not covered and ... */ 02695 if( part_smoothed_flag[incident_vtx] == 0 ) 02696 { 02697 /* ... has a higher rand_number than me */ 02698 if( part_rand_number[i] < part_rand_number[incident_vtx] ) 02699 { 02700 /* then I am not in the independent set */ 02701 done = true; 02702 break; 02703 } 02704 /* ... or has the same rand_number than me but a higher processor id */ 02705 else if( ( part_rand_number[i] == part_rand_number[incident_vtx] ) && 02706 ( rank < part_proc_owner[incident_vtx] ) ) 02707 { 02708 /* then I am not in the independent set */ 02709 done = true; 02710 break; 02711 } 02712 } 02713 } 02714 /* if the vertex is in the independent set, add it to the export list */ 02715 if( !done ) 02716 { 02717 found_more = true; 02718 // if (found_iter > 1) printf("[%d]i%d found another one %d in iteration 02719 //%d\n",rank,iteration,i, found_iter); 02720 in_independent_set[i] = true; 02721 /* mark vertices with lower random numbers as covered */ 02722 for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ ) 02723 { 02724 incident_vtx = vtx_off_proc_list[i][j]; 02725 /* if this neighbour has not yet been smoothed or covered mark it as covered 02726 */ 02727 if( part_smoothed_flag[incident_vtx] == 0 ) { part_smoothed_flag[incident_vtx] = 2; } 02728 } 02729 k = num_exportVtx; 02730 /* then loop over the neighbors it has on other processors */ 02731 for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ ) 02732 { 02733 incident_vtx = vtx_off_proc_list[i][j]; 02734 /* check to see if this processor already on the list */ 02735 done = false; 02736 for( l = k; l < num_exportVtx && !done; l++ ) 02737 { 02738 if( exportProc[l] == part_proc_owner[incident_vtx] ) { done = true; } 02739 } 02740 /* if it's not on the list add it */ 02741 if( !done ) 02742 { 02743 exportVtxLIDs[num_exportVtx] = i; 02744 exportVtxGIDs[num_exportVtx] = part_gid[i]; 02745 exportProc[num_exportVtx] = part_proc_owner[incident_vtx]; 02746 num_exportVtx++; 02747 } 02748 } 02749 } 02750 } 02751 } 02752 } 02753 02754 if( 0 ) 02755 { 02756 int in_set = 0; 02757 for( i = 0; i < num_vtx_partition_boundary_local; i++ ) 02758 if( in_independent_set[i] ) in_set++; 02759 ; 02760 printf( "[%d]i%d independent set has %d of %d vertices sent out %d times\n", rank, iteration, in_set, 02761 num_vtx_partition_boundary_local, num_exportVtx ); 02762 fflush( NULL ); 02763 } 02764 02765 /* unmark the vertices that have been marked as covered */ 02766 for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ ) 02767 if( part_smoothed_flag[i] == 2 ) part_smoothed_flag[i] = 0; 02768 } 02769 02770 int ParallelHelperImpl::get_rank() const 02771 { 02772 return rank; 02773 } 02774 02775 int ParallelHelperImpl::get_nprocs() const 02776 { 02777 return nprocs; 02778 } 02779 02780 bool ParallelHelperImpl::is_our_element( MBMesquite::Mesh::ElementHandle element_handle, MsqError& err ) const 02781 { 02782 int i; 02783 std::vector< Mesh::VertexHandle > pvertices; 02784 std::vector< size_t > junk; 02785 mesh->elements_get_attached_vertices( &element_handle, 1, pvertices, junk, err ); 02786 MSQ_ERRZERO( err ); 02787 int num_verts = pvertices.size(); 02788 std::vector< int > proc_ids( num_verts ); 02789 mesh->vertices_get_processor_id( ARRPTR( pvertices ), ARRPTR( proc_ids ), num_verts, err ); 02790 MSQ_ERRZERO( err ); 02791 int max_proc_id = proc_ids[0]; 02792 for( i = 1; i < num_verts; i++ ) 02793 if( max_proc_id < proc_ids[i] ) max_proc_id = proc_ids[i]; 02794 return ( max_proc_id == rank ); 02795 } 02796 02797 bool ParallelHelperImpl::is_our_vertex( MBMesquite::Mesh::VertexHandle vertex_handle, MsqError& err ) const 02798 { 02799 int proc_id; 02800 mesh->vertices_get_processor_id( &vertex_handle, &proc_id, 1, err ); 02801 return !MSQ_CHKERR( err ) && ( proc_id == rank ); 02802 } 02803 02804 void ParallelHelperImpl::communicate_min_max_to_all( double* minimum, double* maximum, MsqError& err ) const 02805 { 02806 double d_min[2]; 02807 double d_min_recv[2]; 02808 d_min[0] = -( *maximum ); 02809 d_min[1] = *minimum; 02810 int rval = MPI_Allreduce( d_min, d_min_recv, 2, MPI_DOUBLE, MPI_MIN, (MPI_Comm)communicator ); 02811 CHECK_MPI( rval, err ); 02812 *maximum = -d_min_recv[0]; 02813 *minimum = d_min_recv[1]; 02814 } 02815 02816 void ParallelHelperImpl::communicate_min_max_to_zero( double* minimum, double* maximum, MsqError& err ) const 02817 { 02818 double d_min[2]; 02819 double d_min_recv[2]; 02820 d_min[0] = -( *maximum ); 02821 d_min[1] = *minimum; 02822 int rval = MPI_Reduce( d_min, d_min_recv, 2, MPI_DOUBLE, MPI_MIN, 0, (MPI_Comm)communicator ); 02823 CHECK_MPI( rval, err ); 02824 if( rank == 0 ) 02825 { 02826 *maximum = -d_min_recv[0]; 02827 *minimum = d_min_recv[1]; 02828 } 02829 } 02830 02831 void ParallelHelperImpl::communicate_sums_to_zero( size_t* freeElementCount, int* invertedElementCount, 02832 size_t* elementCount, int* invertedSampleCount, size_t* sampleCount, 02833 long unsigned int* count, long unsigned int* invalid, double* sum, 02834 double* sqrSum, MsqError& err ) const 02835 { 02836 double d_sum[9]; 02837 double d_sum_recv[9]; 02838 02839 d_sum[0] = (double)( *freeElementCount ); 02840 d_sum[1] = (double)( *invertedElementCount ); 02841 d_sum[2] = (double)( *elementCount ); 02842 d_sum[3] = (double)( *invertedSampleCount ); 02843 d_sum[4] = (double)( *sampleCount ); 02844 d_sum[5] = (double)( *count ); 02845 d_sum[6] = (double)( *invalid ); 02846 d_sum[7] = *sum; 02847 d_sum[8] = *sqrSum; 02848 02849 int rval = MPI_Reduce( d_sum, d_sum_recv, 9, MPI_DOUBLE, MPI_SUM, 0, (MPI_Comm)communicator ); 02850 CHECK_MPI( rval, err ); 02851 02852 if( rank == 0 ) 02853 { 02854 *freeElementCount = (size_t)d_sum_recv[0]; 02855 *invertedElementCount = (int)d_sum_recv[1]; 02856 *elementCount = (size_t)d_sum_recv[2]; 02857 *invertedSampleCount = (int)d_sum_recv[3]; 02858 *sampleCount = (size_t)d_sum_recv[4]; 02859 *count = (long unsigned int)d_sum_recv[5]; 02860 *invalid = (long unsigned int)d_sum_recv[6]; 02861 *sum = d_sum_recv[7]; 02862 *sqrSum = d_sum_recv[8]; 02863 } 02864 } 02865 02866 void ParallelHelperImpl::communicate_power_sum_to_zero( double* pMean, MsqError& err ) const 02867 { 02868 double result; 02869 int rval = MPI_Reduce( pMean, &result, 1, MPI_DOUBLE, MPI_SUM, 0, (MPI_Comm)communicator ); 02870 CHECK_MPI( rval, err ); 02871 if( rank == 0 ) *pMean = result; 02872 } 02873 02874 void ParallelHelperImpl::communicate_histogram_to_zero( std::vector< int >& histogram, MsqError& err ) const 02875 { 02876 std::vector< int > histogram_recv( histogram.size() ); 02877 int rval = MPI_Reduce( &( histogram[0] ), &( histogram_recv[0] ), histogram.size(), MPI_INT, MPI_SUM, 0, 02878 (MPI_Comm)communicator ); 02879 CHECK_MPI( rval, err ); 02880 if( rank == 0 ) { histogram.swap( histogram_recv ); } 02881 } 02882 02883 /// if any proc has a true input @param value, return true to all 02884 void ParallelHelperImpl::communicate_any_true( bool& value, MsqError& err ) const 02885 { 02886 int byte_out = value, byte_in; 02887 int rval = MPI_Allreduce( &byte_out, &byte_in, 1, MPI_INT, MPI_MAX, (MPI_Comm)communicator ); 02888 CHECK_MPI( rval, err ); 02889 value = ( byte_in != 0 ); 02890 } 02891 02892 /// srkenno AT sandia.gov 1/19/12: bug fix: changed from MPI_MAX to MIN - this makes the name of the 02893 /// function correct, if all proc's values are true, it returns true. @see communicate_any_true 02894 /// above 02895 02896 /// return true in @param value if all procs have value=true, else if any have it false, return 02897 /// false 02898 void ParallelHelperImpl::communicate_all_true( bool& value, MsqError& err ) const 02899 { 02900 int byte_out = value, byte_in; 02901 int rval = MPI_Allreduce( &byte_out, &byte_in, 1, MPI_INT, MPI_MIN, (MPI_Comm)communicator ); 02902 CHECK_MPI( rval, err ); 02903 value = ( byte_in != 0 ); 02904 } 02905 02906 } // namespace MBMesquite