MOAB: Mesh Oriented datABase  (version 5.4.1)
ParallelHelper.cpp
Go to the documentation of this file.
00001 #include "ParallelHelper.hpp"
00002 
00003 #include <mpi.h>
00004 
00005 #include <cstdio>
00006 #include <cassert>
00007 #include <iostream>
00008 #include <cstdlib>
00009 #include <algorithm>
00010 #include <ctime>
00011 
00012 #include "MsqVertex.hpp"
00013 #include "MsqError.hpp"
00014 
00015 #define VERTEX_HEADER              1
00016 #define VERTEX_BLOCK               1000
00017 #define GHOST_NODE_INFO            2000
00018 #define GHOST_NODE_VERTICES_WANTED 2001
00019 #define GHOST_NODE_VERTEX_GIDS     2002
00020 #define GHOST_NODE_VERTEX_UPDATES  2003
00021 
00022 #define ARRPTR( x ) arrptr( x, true )
00023 
00024 namespace MBMesquite
00025 {
00026 
00027 void parallel_barrier()
00028 {
00029     int is_init = 0;
00030     int err     = MPI_Initialized( &is_init );
00031     if( MPI_SUCCESS != err ) return;
00032     if( is_init ) MPI_Barrier( MPI_COMM_WORLD );
00033 }
00034 
00035 int get_parallel_rank()
00036 {
00037     int rank    = 0;
00038     int is_init = 0;
00039     int err     = MPI_Initialized( &is_init );
00040     if( MPI_SUCCESS != err ) return 0;
00041     if( is_init ) MPI_Comm_rank( MPI_COMM_WORLD, &rank );
00042     return rank;
00043 }
00044 
00045 int get_parallel_size()
00046 {
00047     int nprocs  = 0;
00048     int is_init = 0;
00049     int err     = MPI_Initialized( &is_init );
00050     if( MPI_SUCCESS != err ) return 0;
00051     if( is_init ) MPI_Comm_size( MPI_COMM_WORLD, &nprocs );
00052     return nprocs;
00053 }
00054 
00055 double reduce_parallel_max( double value )
00056 {
00057     int is_init = 0;
00058     int err     = MPI_Initialized( &is_init );
00059     if( MPI_SUCCESS != err ) return value;
00060     if( !is_init ) return value;
00061 
00062     double d_max[1];
00063     double d_max_recv[1];
00064     d_max[0] = value;
00065     int rval = MPI_Allreduce( d_max, d_max_recv, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
00066     if( MPI_SUCCESS != rval ) return value;
00067 
00068     return d_max_recv[0];
00069 }
00070 
00071 static const char* mpi_err_string( int error_code )
00072 {
00073     static char buffer[128];
00074     int len = sizeof( buffer );
00075     int e   = MPI_Error_string( error_code, buffer, &len );
00076     if( MPI_SUCCESS != e ) len = 0;
00077 
00078     if( len >= (int)sizeof( buffer ) ) len = sizeof( buffer ) - 1;
00079     buffer[len] = '\0';
00080     return buffer;
00081 }
00082 
00083 #define CHECK_MPI( RVAL, ERR )                                                                          \
00084     do                                                                                                  \
00085     {                                                                                                   \
00086         if( MPI_SUCCESS != ( RVAL ) )                                                                   \
00087         {                                                                                               \
00088             MSQ_SETERR( err )                                                                           \
00089             ( MsqError::UNKNOWN_ERROR, "MPI Error %d: %s", (int)( RVAL ), mpi_err_string( ( RVAL ) ) ); \
00090             return;                                                                                     \
00091         }                                                                                               \
00092     } while( false )
00093 
00094 #define CHECK_MPI_RZERO( RVAL, ERR )                                                                    \
00095     do                                                                                                  \
00096     {                                                                                                   \
00097         if( MPI_SUCCESS != ( RVAL ) )                                                                   \
00098         {                                                                                               \
00099             MSQ_SETERR( err )                                                                           \
00100             ( MsqError::UNKNOWN_ERROR, "MPI Error %d: %s", (int)( RVAL ), mpi_err_string( ( RVAL ) ) ); \
00101             return 0;                                                                                   \
00102         }                                                                                               \
00103     } while( false )
00104 
00105 static bool vertex_map_insert( VertexIdMap& map, size_t glob_id, int proc_id, int value )
00106 {
00107     VertexIdMapKey vid;
00108     vid.glob_id = glob_id;
00109     vid.proc_id = proc_id;
00110     return map.insert( VertexIdMap::value_type( vid, value ) ).second;
00111 }
00112 
00113 static int vertex_map_find( const VertexIdMap& map, size_t glob_id, int proc_id )
00114 {
00115     VertexIdMapKey vid;
00116     vid.glob_id                             = glob_id;
00117     vid.proc_id                             = proc_id;
00118     VertexIdMap::const_iterator map_element = map.find( vid );
00119     if( map_element == map.end() )
00120     {
00121         return 0;
00122     }
00123     else
00124     {
00125         return map_element->second;
00126     }
00127 }
00128 
00129 static void my_quicksort( int* a, size_t* b, MBMesquite::Mesh::VertexHandle* c, int i, int j )
00130 {
00131     int in_i = i;
00132     int in_j = j;
00133     int wa;
00134     size_t wb;
00135     MBMesquite::Mesh::VertexHandle w1;
00136     int key = a[( i + j ) / 2];
00137     do
00138     {
00139         while( a[i] < key )
00140             i++;
00141         while( a[j] > key )
00142             j--;
00143         if( i < j )
00144         {
00145             wa   = a[i];
00146             a[i] = a[j];
00147             a[j] = wa;
00148             wb   = b[i];
00149             b[i] = b[j];
00150             b[j] = wb;
00151             w1   = c[i];
00152             c[i] = c[j];
00153             c[j] = w1;
00154         }
00155     } while( ++i <= --j );
00156     if( i == j + 3 )
00157     {
00158         i--;
00159         j++;
00160     }
00161     if( j > in_i ) my_quicksort( a, b, c, in_i, j );
00162     if( i < in_j ) my_quicksort( a, b, c, i, in_j );
00163 }
00164 
00165 static int hash6432shift( unsigned long long key )
00166 {
00167     key = ( ~key ) + ( key << 18 );  // key = (key << 18) - key - 1;
00168     key = key ^ ( key >> 31 );
00169     key = key * 21;  // key = (key + (key << 2)) + (key << 4);
00170     key = key ^ ( key >> 11 );
00171     key = key + ( key << 6 );
00172     key = key ^ ( key >> 22 );
00173     return (int)key;
00174 }
00175 
00176 static unsigned long long hash64shift( unsigned long long key )
00177 {
00178     key = ( ~key ) + ( key << 21 );  // key = (key << 21) - key - 1;
00179     key = key ^ ( key >> 24 );
00180     key = ( key + ( key << 3 ) ) + ( key << 8 );  // key * 265
00181     key = key ^ ( key >> 14 );
00182     key = ( key + ( key << 2 ) ) + ( key << 4 );  // key * 21
00183     key = key ^ ( key >> 28 );
00184     key = key + ( key << 31 );
00185     return key;
00186 }
00187 
00188 static double generate_random_number( int generate_random_numbers, int proc_id, size_t glob_id )
00189 {
00190     int gid;
00191 
00192     if( sizeof( size_t ) == sizeof( unsigned long long ) )
00193     {
00194         gid = hash6432shift( (unsigned long long)glob_id );
00195     }
00196     else
00197     {
00198         gid = (int)glob_id;
00199     }
00200 
00201     if( generate_random_numbers == 1 )
00202     {
00203         // count number of on bits
00204         int on            = 0;
00205         unsigned int mist = (unsigned int)gid;
00206         while( mist )
00207         {
00208             if( mist & 1 ) on++;
00209             mist = mist >> 1;
00210         }
00211         unsigned short xsubi[3];
00212         if( on & 1 )
00213             mist = (unsigned int)gid;
00214         else
00215             mist = (unsigned int)( -gid );
00216         if( on & 2 )
00217         {
00218             xsubi[0] = (unsigned short)( 65535 & mist );
00219             xsubi[1] = (unsigned short)( 65535 & ( mist >> 16 ) );
00220         }
00221         else
00222         {
00223             xsubi[1] = (unsigned short)( 65535 & mist );
00224             xsubi[0] = (unsigned short)( 65535 & ( mist >> 16 ) );
00225         }
00226         xsubi[2] = proc_id;
00227 
00228 #if( defined WIN32 || defined WIN64 )
00229         time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 );
00230         srand( (int)seed );
00231         return rand() / ( RAND_MAX + 1.0 );
00232 #else
00233         return erand48( xsubi );
00234 #endif
00235     }
00236     else if( generate_random_numbers == 2 )
00237     {
00238         // count number of on bits
00239         int on            = 0;
00240         unsigned int mist = (unsigned int)gid;
00241         while( mist )
00242         {
00243             if( mist & 1 ) on++;
00244             mist = mist >> 1;
00245         }
00246         unsigned short xsubi[3];
00247         if( on & 1 )
00248             mist = (unsigned int)gid;
00249         else
00250             mist = (unsigned int)( -gid );
00251         if( on & 2 )
00252         {
00253             xsubi[0] = (unsigned short)( 65535 & mist );
00254             xsubi[1] = (unsigned short)( 65535 & ( mist >> 16 ) );
00255         }
00256         else
00257         {
00258             xsubi[1] = (unsigned short)( 65535 & mist );
00259             xsubi[0] = (unsigned short)( 65535 & ( mist >> 16 ) );
00260         }
00261         xsubi[2] = proc_id ^ xsubi[1];
00262 
00263 #if( defined WIN32 || defined WIN64 )
00264         time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 );
00265         srand( (int)seed );
00266         return rand() / ( RAND_MAX + 1.0 );
00267 #else
00268         return erand48( xsubi );
00269 #endif
00270     }
00271     else if( generate_random_numbers == 3 )
00272     {
00273         unsigned long long key = (unsigned long long)gid;
00274         key                    = key << 32;
00275         key                    = key | proc_id;
00276         return (double)hash64shift( key );
00277     }
00278     else if( generate_random_numbers == 4 )
00279     {
00280         unsigned long long key = (unsigned long long)gid;
00281         key                    = key << 32;
00282         key                    = key | proc_id;
00283         key                    = hash64shift( key );
00284         unsigned short xsubi[3];
00285         xsubi[0] = (unsigned short)( key >> 48 );
00286         xsubi[1] = (unsigned short)( key >> 32 );
00287         xsubi[2] = (unsigned short)( key >> 16 );
00288 
00289 #if( defined WIN32 || defined WIN64 )
00290         time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 );
00291         srand( (int)seed );
00292         return rand() / ( RAND_MAX + 1.0 );
00293 #else
00294         return erand48( xsubi );
00295 #endif
00296     }
00297     else
00298     {
00299         return -1;
00300     }
00301 }
00302 
00303 ParallelHelperImpl::ParallelHelperImpl()
00304 {
00305     mesh = 0;
00306     set_communicator( MPI_COMM_WORLD );
00307     communication_model = TrulyNonBlockingAvoidAllReduce;
00308 
00309     MPI_Comm_rank( (MPI_Comm)communicator, &rank );
00310     MPI_Comm_size( (MPI_Comm)communicator, &nprocs );
00311 
00312     // variable defaults for VertexMover::loop_over_mesh()
00313     generate_random_numbers = 2;
00314 
00315     // memory chunks referenced for VertexMover::loop_over_mesh()
00316     vertices.clear();
00317     vtx_in_partition_boundary.clear();
00318     part_vertices.clear();
00319     unghost_vertices.clear();
00320     part_proc_owner.clear();
00321     part_gid.clear();
00322     part_smoothed_flag.clear();
00323     part_rand_number.clear();
00324     exportVtxGIDs.clear();
00325     exportVtxLIDs.clear();
00326     exportProc.clear();
00327     in_independent_set.clear();
00328     vid_map.clear();
00329     neighbourProcSend.clear();
00330     neighbourProcRecv.clear();
00331     neighbourProcSendRemain.clear();
00332     neighbourProcRecvRemain.clear();
00333     vtx_off_proc_list.clear();
00334     neighbourProc.clear();
00335 }
00336 
00337 ParallelHelperImpl::~ParallelHelperImpl() {}
00338 
00339 void ParallelHelperImpl::set_parallel_mesh( ParallelMesh* pmesh )
00340 {
00341     this->mesh = pmesh;
00342 }
00343 
00344 void ParallelHelperImpl::set_communicator( size_t comm )
00345 {
00346     communicator = comm;
00347     MPI_Comm_rank( (MPI_Comm)communicator, &rank );
00348     MPI_Comm_size( (MPI_Comm)communicator, &nprocs );
00349 }
00350 
00351 void ParallelHelperImpl::set_communication_model( int model, MsqError& )
00352 {
00353     communication_model = model;
00354 }
00355 
00356 void ParallelHelperImpl::set_generate_random_numbers( int grn, MsqError& )
00357 {
00358     this->generate_random_numbers = grn;
00359 }
00360 
00361 void ParallelHelperImpl::smoothing_init( MsqError& err )
00362 {
00363 
00364     int i, j, k, rval;
00365     size_t l;
00366 
00367     if( !mesh )
00368     {
00369         MSQ_SETERR( err )( MsqError::INVALID_STATE );
00370         return;
00371     }
00372     if( nprocs == 1 ) return;
00373 
00374     /* get the vertices */
00375     mesh->get_all_vertices( vertices, err );MSQ_ERRRTN( err );
00376     num_vertex = vertices.size();
00377 
00378     /* allocate the data arrays we'll use for smoothing */
00379     std::vector< size_t > gid( num_vertex );
00380     std::vector< int > proc_owner( num_vertex );
00381     std::vector< unsigned char > app_fixed( num_vertex );
00382 
00383     /* get the data from the mesquite mesh */
00384     mesh->vertices_get_global_id( ARRPTR( vertices ), ARRPTR( gid ), num_vertex, err );MSQ_ERRRTN( err );
00385     mesh->vertices_get_byte( ARRPTR( vertices ), ARRPTR( app_fixed ), num_vertex, err );MSQ_ERRRTN( err );
00386     mesh->vertices_get_processor_id( ARRPTR( vertices ), ARRPTR( proc_owner ), num_vertex, err );MSQ_ERRRTN( err );
00387     if( false )
00388     {
00389         int ncull = 0;
00390         for( i = 0; i < num_vertex; ++i )
00391         {
00392             if( app_fixed[i] & MsqVertex::MSQ_CULLED )
00393             {
00394                 ++ncull;
00395             }
00396         }
00397         std::cout << "P[" << rank << "] smoothing_init ncull= " << ncull << " num_vertex= " << num_vertex << std::endl;
00398     }
00399 
00400     /* only interested in fixed flag from vertex byte? Clear others. */
00401     // srkenno AT sandia.gov 1/19/12: bug fix: changed from |= which makes all vertices fixed
00402     for( i = 0; i < num_vertex; ++i )
00403         app_fixed[i] &= MsqVertex::MSQ_HARD_FIXED;
00404 
00405     /* create temporary Tag for the local IDs */
00406     std::vector< int > lid( num_vertex );
00407     for( i = 0; i < num_vertex; i++ )
00408         lid[i] = i;
00409     const char LOCAL_ID_NAME[] = "LOCAL_ID";
00410     TagHandle lid_tag          = mesh->tag_create( LOCAL_ID_NAME, Mesh::INT, 1, NULL, err );MSQ_ERRRTN( err );
00411     mesh->tag_set_vertex_data( lid_tag, num_vertex, ARRPTR( vertices ), ARRPTR( lid ), err );MSQ_ERRRTN( err );
00412 
00413     if( false ) printf( "[%d] set local tags on %d vertices\n", rank, num_vertex );
00414 
00415     /* get the elements */
00416     std::vector< MBMesquite::Mesh::ElementHandle > elements;
00417     mesh->get_all_elements( elements, err );MSQ_ERRRTN( err );
00418     int num_elems = elements.size();
00419 
00420     /****************************************************************
00421     PARTITION BOUNDARY VERTEX DETERMINATION
00422     ***********************************************************************/
00423 
00424     /* initialize the vertex partition boundary array */
00425     vtx_in_partition_boundary.clear();
00426     vtx_in_partition_boundary.resize( num_vertex, 0 );
00427     int incident_vtx, vtx_off_proc, vtx_on_proc;
00428 
00429     /* get the array that contains the adjacent vertices for each mesh element */
00430     std::vector< MBMesquite::Mesh::VertexHandle > adj_vertices;
00431     std::vector< size_t > vtx_offsets;
00432     mesh->elements_get_attached_vertices( ARRPTR( elements ), num_elems, adj_vertices, vtx_offsets, err );
00433     std::vector< int > adj_vertices_lid( adj_vertices.size() );
00434     mesh->tag_get_vertex_data( lid_tag, adj_vertices.size(), ARRPTR( adj_vertices ), ARRPTR( adj_vertices_lid ), err );
00435 
00436     if( false ) printf( "[%d] gotten adjacent elements for %d elements\n", rank, num_elems );
00437 
00438     /* determine which vertices are smoothed as part of the boundary (and which are unused ghost
00439      * vertices) */
00440     num_vtx_partition_boundary_local  = 0;
00441     num_vtx_partition_boundary_remote = 0;
00442     for( i = 0; i < num_elems; i++ )
00443     {
00444         /* count how many vertices of the current element are on/off a different processor */
00445         vtx_off_proc = 0;
00446         vtx_on_proc  = 0;
00447         for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ )
00448         {
00449             incident_vtx = adj_vertices_lid[j];
00450             /* obviously the vertex only counts if it is not app_fixed */
00451             if( !app_fixed[incident_vtx] )
00452             {
00453                 if( proc_owner[incident_vtx] != rank )
00454                 {
00455                     vtx_off_proc++;
00456                 }
00457                 else
00458                 {
00459                     vtx_on_proc++;
00460                 }
00461             }
00462         }
00463 
00464         /* if vertices are on different processors mark all local vertices as boundary1 and all
00465          * remote vertices as boundary2 */
00466         if( vtx_off_proc > 0 && vtx_on_proc > 0 )
00467         {
00468             /* collect stats */
00469             //  smooth_stats.num_part_bndy_elem++;
00470             /* mark the vertices */
00471             for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ )
00472             {
00473                 incident_vtx = adj_vertices_lid[j];
00474                 /* obviously the vertex does not need to be marked if it was already marked or if it
00475                  * is app_fixed*/
00476                 if( vtx_in_partition_boundary[incident_vtx] <= 0 && app_fixed[incident_vtx] == 0 )
00477                 {
00478                     /* mark and count the vertex */
00479                     if( proc_owner[incident_vtx] != rank )
00480                     {
00481                         vtx_in_partition_boundary[incident_vtx] = 2;
00482                         num_vtx_partition_boundary_remote++;
00483                     }
00484                     else
00485                     {
00486                         vtx_in_partition_boundary[incident_vtx] = 1;
00487                         num_vtx_partition_boundary_local++;
00488                     }
00489                 }
00490             }
00491         }
00492         else if( vtx_off_proc > 0 )
00493         {
00494             /* mark the vertices as boundary-1 (aka unused ghost) if the element has only
00495              * off-processor vertices */
00496             for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ )
00497             {
00498                 incident_vtx = adj_vertices_lid[j];
00499                 /* obviously the vertex is not marked if it was already marked or if it is
00500                  * app_fixed*/
00501                 if( vtx_in_partition_boundary[incident_vtx] == 0 && app_fixed[incident_vtx] == 0 )
00502                 {
00503                     vtx_in_partition_boundary[incident_vtx] = -1;
00504                 }
00505             }
00506         }
00507     }
00508 
00509     if( false )
00510     {
00511         printf( "[%d]i%d local %d remote %d ", rank, iteration, num_vtx_partition_boundary_local,
00512                 num_vtx_partition_boundary_remote );
00513         printf( "[%d]i%d pb1 ", rank, iteration );
00514         for( i = 0; i < num_vertex; i++ )
00515             if( vtx_in_partition_boundary[i] == 1 ) printf( "%d,%lu ", i, gid[i] );
00516         printf( "\n" );
00517         printf( "[%d]i%d pb2 ", rank, iteration );
00518         for( i = 0; i < num_vertex; i++ )
00519             if( vtx_in_partition_boundary[i] == 2 ) printf( "%d,%lu ", i, gid[i] );
00520         printf( "\n" );
00521         fflush( NULL );
00522     }
00523 
00524     num_vtx_partition_boundary = num_vtx_partition_boundary_local + num_vtx_partition_boundary_remote;
00525 
00526     /********************************************************************
00527    COLLECT THE PARTITION BOUNDARY VERTICES AND THE UNUSED GHOST VERTICES
00528     ********************************************************************/
00529 
00530     /* create the vectors to store the partition boundary vertex data */
00531     part_vertices.resize( num_vtx_partition_boundary );
00532     part_proc_owner.resize( num_vtx_partition_boundary );
00533     part_gid.resize( num_vtx_partition_boundary );
00534     part_smoothed_flag.resize( num_vtx_partition_boundary );
00535     part_rand_number.resize( num_vtx_partition_boundary );
00536 
00537     /* create the partition boundary map and its inverse */
00538     std::vector< int > vtx_partition_boundary_map_inverse( num_vertex );
00539 
00540     j = 0;
00541     /* first we map the partition boundary vertices that we will smooth on this processor */
00542     for( i = 0; i < num_vertex; i++ )
00543     {
00544         if( vtx_in_partition_boundary[i] == 1 )
00545         {
00546             part_vertices[j]   = vertices[i];
00547             part_proc_owner[j] = rank;
00548             assert( proc_owner[i] == rank );
00549             part_gid[j]                           = gid[i];
00550             vtx_partition_boundary_map_inverse[i] = j;
00551             j++;
00552         }
00553     }
00554 
00555     vid_map.clear();
00556 
00557     /* then we map the ghost vertices that will be smoothed on other processors */
00558     for( i = 0; i < num_vertex; i++ )
00559     {
00560         if( vtx_in_partition_boundary[i] == 2 )
00561         {
00562             part_vertices[j]   = vertices[i];
00563             part_proc_owner[j] = proc_owner[i];
00564             assert( proc_owner[i] != rank );
00565             part_gid[j]                           = gid[i];
00566             vtx_partition_boundary_map_inverse[i] = j;
00567             /* only insert those vertices in the map that are smoothed on other processors */
00568             vertex_map_insert( vid_map, part_gid[j], part_proc_owner[j], j );
00569             // printf("[%d] inserting vertex with gid %u and pid %d \n", rank, part_gid[j],
00570             // part_proc_owner[j]);
00571             j++;
00572         }
00573     }
00574 
00575     /* create our own 'very pseudo random' numbers */
00576     for( i = 0; i < num_vtx_partition_boundary; i++ )
00577         part_rand_number[i] = generate_random_number( generate_random_numbers, part_proc_owner[i], part_gid[i] );
00578 
00579     /* count the number of unused ghost vertices */
00580     unghost_num_vtx = 0;
00581     for( i = 0; i < num_vertex; i++ )
00582     {
00583         if( vtx_in_partition_boundary[i] == -1 )
00584         {
00585             unghost_num_vtx++;
00586         }
00587     }
00588     // printf("[%d] found %d unused ghost vertices (local %d remote %d)\n",rank, unghost_num_vtx,
00589     // num_vtx_partition_boundary_local,num_vtx_partition_boundary_remote);
00590 
00591     /* create the vectors to store the unused ghost vertices */
00592     unghost_vertices.resize( unghost_num_vtx );
00593     std::vector< int > unghost_proc_owner( unghost_num_vtx );
00594     std::vector< size_t > unghost_gid( unghost_num_vtx );
00595 
00596     /* store the unused ghost vertices that are copies of vertices from other processors and will
00597      * need to be received */
00598     j = 0;
00599     for( i = 0; i < num_vertex; i++ )
00600     {
00601         if( vtx_in_partition_boundary[i] == -1 )
00602         {
00603             unghost_vertices[j]   = vertices[i];
00604             unghost_proc_owner[j] = proc_owner[i];
00605             assert( proc_owner[i] != rank );
00606             unghost_gid[j] = gid[i];
00607             // printf(" %d", unghost_gid[j]);
00608             j++;
00609         }
00610     }
00611 
00612     /* no longer needed */
00613     // delete [] gid; gid = 0;
00614     // delete [] proc_owner; proc_owner = 0;
00615 
00616     unghost_num_procs = 0;
00617     unghost_procs.clear();
00618     unghost_procs_offset.clear();
00619     unghost_procs_num_vtx.clear();
00620     if( unghost_num_vtx )
00621     {
00622         /* sort the unused ghost vertices by processor */
00623         my_quicksort( ARRPTR( unghost_proc_owner ), ARRPTR( unghost_gid ), &( unghost_vertices[0] ), 0,
00624                       unghost_num_vtx - 1 );
00625 
00626         /* count the number of processors we have unused ghost data from that we want to get updated
00627          */
00628         unghost_num_procs = 1;
00629         for( i = 1; i < unghost_num_vtx; i++ )
00630         {
00631             if( unghost_proc_owner[i - 1] != unghost_proc_owner[i] ) unghost_num_procs++;
00632         }
00633 
00634         /* get the ids of those processors and the number of vertices we want from each */
00635         unghost_procs.resize( unghost_num_procs );
00636         unghost_procs_offset.resize( unghost_num_procs + 1 );
00637         unghost_procs_num_vtx.resize( unghost_num_procs );
00638         unghost_procs[0]        = unghost_proc_owner[0];
00639         unghost_procs_offset[0] = 0;
00640         for( i = 1, j = 1; i < unghost_num_vtx; i++ )
00641         {
00642             if( unghost_proc_owner[i - 1] != unghost_proc_owner[i] )
00643             {
00644                 unghost_procs[j]             = unghost_proc_owner[i];
00645                 unghost_procs_offset[j]      = i;
00646                 unghost_procs_num_vtx[j - 1] = unghost_procs_offset[j] - unghost_procs_offset[j - 1];
00647                 assert( unghost_procs_num_vtx[j - 1] > 0 );
00648                 j++;
00649             }
00650         }
00651         unghost_procs_offset[j]      = i;
00652         unghost_procs_num_vtx[j - 1] = unghost_procs_offset[j] - unghost_procs_offset[j - 1];
00653         assert( unghost_procs_num_vtx[j - 1] > 0 );
00654         assert( j == unghost_num_procs );
00655 
00656         // delete [] unghost_proc_owner; unghost_proc_owner = 0;
00657 
00658         // printf("[%d] have ugns from %d processor(s) (%d,%d,%d)\n", rank, unghost_num_procs,
00659         // unghost_procs[0], (unghost_num_procs>1) ? unghost_procs[1] : -1, (unghost_num_procs>2) ?
00660         // unghost_procs[1] : -1);
00661     }
00662 
00663     /* this will eventually store to how many processors each processor needs to send unused ghost
00664      * data to */
00665     std::vector< int > num_sends_of_unghost;
00666 
00667     /* gather the information about how many processors need ghost data */
00668     if( rank == 0 )
00669     {
00670         /* this will eventually store to how many processors each processor needs to send unused
00671          * ghost data to */
00672         num_sends_of_unghost.resize( nprocs );
00673     }
00674     /* temporary used for the initial gather in which each proc tells the root from how many procs
00675      * it wants unused ghost data updates */
00676     rval = MPI_Gather( &unghost_num_procs, 1, MPI_INT, ARRPTR( num_sends_of_unghost ), 1, MPI_INT, 0,
00677                        (MPI_Comm)communicator );
00678     CHECK_MPI( rval, err );
00679 
00680     /* now each processor tells the root node from which processors they want unused ghost nodes
00681      * information */
00682     if( rank == 0 )
00683     {
00684         int procs_num = 0;
00685         int procs_max = 0;
00686         /* first count how many processors will send unused ghost node info and find the maximum
00687          * they will send */
00688         for( i = 1; i < nprocs; i++ )
00689         {
00690             if( num_sends_of_unghost[i] )
00691             {
00692                 procs_num++;
00693                 if( num_sends_of_unghost[i] > procs_max ) procs_max = num_sends_of_unghost[i];
00694             }
00695         }
00696         /* clean the temporary used array */
00697         for( i = 0; i < nprocs; i++ )
00698             num_sends_of_unghost[i] = 0;
00699         /* process rank 0's unused ghost nodes procs first */
00700         for( j = 0; j < unghost_num_procs; j++ )
00701         {
00702             num_sends_of_unghost[unghost_procs[j]]++;
00703         }
00704         /* now we process messages from all other processors that want unused ghost node information
00705          */
00706         int* unghost_procs_array = new int[procs_max];
00707         for( i = 0; i < procs_num; i++ )
00708         {
00709             MPI_Status status;
00710             rval = MPI_Recv( unghost_procs_array, procs_max, MPI_INT, MPI_ANY_SOURCE, GHOST_NODE_INFO,
00711                              (MPI_Comm)communicator, &status );
00712             CHECK_MPI( rval, err );
00713             int count;
00714             MPI_Get_count( &status, MPI_INT, &count );
00715             for( j = 0; j < count; j++ )
00716             {
00717                 num_sends_of_unghost[unghost_procs_array[j]]++;
00718             }
00719         }
00720         delete[] unghost_procs_array;
00721     }
00722     else
00723     {
00724         if( unghost_num_vtx )
00725         {
00726             rval = MPI_Send( ARRPTR( unghost_procs ), unghost_num_procs, MPI_INT, 0, GHOST_NODE_INFO,
00727                              (MPI_Comm)communicator );
00728             CHECK_MPI( rval, err );
00729         }
00730     }
00731 
00732     /* now the root node knows for each processor on how many other processors they have ghost nodes
00733      * (which need updating) */
00734     /* the scatter distributes this information to each processor */
00735     rval = MPI_Scatter( ARRPTR( num_sends_of_unghost ), 1, MPI_INT, &update_num_procs, 1, MPI_INT, 0,
00736                         (MPI_Comm)communicator );
00737     CHECK_MPI( rval, err );
00738 
00739     // if (rank == 0) delete [] num_sends_of_unghost;
00740 
00741     // printf("[%d] i have unused ghost nodes from %d procs and i need to send updates to %d
00742     // procs\n", rank, unghost_num_procs, update_num_procs);
00743 
00744     /* now the processors can negotiate amongst themselves: */
00745 
00746     /* first tell each processor the number of unused ghost nodes we want from them */
00747     std::vector< MPI_Request > requests_unghost( unghost_num_procs );
00748     for( j = 0; j < unghost_num_procs; j++ )
00749     {
00750         rval = MPI_Isend( &( unghost_procs_num_vtx[j] ), 1, MPI_INT, unghost_procs[j], GHOST_NODE_VERTICES_WANTED,
00751                           (MPI_Comm)communicator, &( requests_unghost[j] ) );
00752         CHECK_MPI( rval, err );
00753     }
00754 
00755     /* then listen to as many processors as there are that want updates from us */
00756     std::vector< MPI_Request > requests_updates( update_num_procs );
00757     update_procs_num_vtx.resize( update_num_procs );
00758     for( j = 0; j < update_num_procs; j++ )
00759     {
00760         rval = MPI_Irecv( &( update_procs_num_vtx[j] ), 1, MPI_INT, MPI_ANY_SOURCE, GHOST_NODE_VERTICES_WANTED,
00761                           (MPI_Comm)communicator, &( requests_updates[j] ) );
00762         CHECK_MPI( rval, err );
00763     }
00764 
00765     /* wait until we have heard from all processors how many ghost nodes updates they want from us
00766      */
00767     std::vector< MPI_Status > status_update( update_num_procs );
00768     update_procs.resize( update_num_procs );
00769     rval = MPI_Waitall( update_num_procs, ARRPTR( requests_updates ), ARRPTR( status_update ) );
00770     CHECK_MPI( rval, err );
00771     for( j = 0; j < update_num_procs; j++ )
00772     {
00773         update_procs[j] = status_update[j].MPI_SOURCE;
00774         // printf("[%d] i have to send %d vertices to processor %d\n", rank,
00775         // update_procs_num_vtx[j], update_procs[j]);
00776     }
00777 
00778     /* count the total number of vertices that we need to update elsewhere */
00779     update_procs_offset.resize( update_num_procs + 1 );
00780     update_num_vtx         = 0;
00781     update_procs_offset[0] = 0;
00782     for( j = 0; j < update_num_procs; j++ )
00783     {
00784         update_num_vtx += update_procs_num_vtx[j];
00785         update_procs_offset[j + 1] = update_num_vtx;
00786     }
00787 
00788     /* create enough space to receive all the vertex indices */
00789     update_gid.resize( update_num_vtx );
00790 
00791     /* tell each processor which vertices we want from them */
00792     for( j = 0; j < unghost_num_procs; j++ )
00793     {
00794         rval = MPI_Isend( &( unghost_gid[unghost_procs_offset[j]] ), unghost_procs_num_vtx[j],
00795                           sizeof( size_t ) == 4 ? MPI_INT : MPI_DOUBLE, unghost_procs[j], GHOST_NODE_VERTEX_GIDS,
00796                           (MPI_Comm)communicator, &( requests_unghost[j] ) );
00797         CHECK_MPI( rval, err );
00798     }
00799 
00800     /* receive from each processor the info which vertices they want from us */
00801     for( j = 0; j < update_num_procs; j++ )
00802     {
00803         rval = MPI_Irecv( &( update_gid[update_procs_offset[j]] ), update_procs_num_vtx[j],
00804                           sizeof( size_t ) == 4 ? MPI_INT : MPI_DOUBLE, update_procs[j], GHOST_NODE_VERTEX_GIDS,
00805                           (MPI_Comm)communicator, &( requests_updates[j] ) );
00806         CHECK_MPI( rval, err );
00807     }
00808 
00809     /* wait until we have heard from all processors which vertices they want from us */
00810     rval = MPI_Waitall( update_num_procs, ARRPTR( requests_updates ), ARRPTR( status_update ) );
00811     CHECK_MPI( rval, err );
00812 
00813     /* wait until we have sent to all processors which vertices we want from them */
00814     std::vector< MPI_Status > status_unghost( unghost_num_procs );
00815     rval = MPI_Waitall( unghost_num_procs, ARRPTR( requests_unghost ), ARRPTR( status_unghost ) );
00816     CHECK_MPI( rval, err );
00817 
00818     /*
00819     for (j = 0; j < update_num_procs; j++)
00820     {
00821       printf("[%d] will send to proc %d:", rank, update_procs[j]);
00822       for (i = update_procs_offset[j]; i < update_procs_offset[j+1]; i++) printf(" %d",
00823     update_gid[i]); printf("\n");
00824     }
00825     */
00826 
00827     /***********************************************************************
00828    COMPUTE THE SET OF NEIGHBORS THAT OUR VERTICES HAVE ON OTHER PROCESSORS
00829                  COMPUTE THE SET OF NEIGHBORS PROCESSORS
00830     ***********************************************************************/
00831 
00832     if( num_vtx_partition_boundary_local == 0 )
00833     {
00834         /* this processor does not partake in the boundary smoothing */
00835         neighbourProc.clear();
00836         mesh->tag_delete( lid_tag, err );
00837         vtx_partition_boundary_map_inverse.clear();
00838         return;
00839     }
00840 
00841     /* init the neighbour processor list */
00842     neighbourProc.clear();
00843 
00844     /* init the neighbour lists */
00845 
00846     vtx_off_proc_list.clear();
00847     vtx_off_proc_list.resize( num_vtx_partition_boundary_local );
00848 
00849     /* get the adjacency arrays that we need */
00850     std::vector< MBMesquite::Mesh::ElementHandle > adj_elements;
00851     std::vector< MBMesquite::Mesh::VertexHandle > adj_adj_vertices;
00852     std::vector< size_t > elem_offsets;
00853     std::vector< size_t > adj_vtx_offsets;
00854     mesh->vertices_get_attached_elements( ARRPTR( part_vertices ), num_vtx_partition_boundary_local, adj_elements,
00855                                           elem_offsets, err );
00856     mesh->elements_get_attached_vertices( ARRPTR( adj_elements ), adj_elements.size(), adj_adj_vertices,
00857                                           adj_vtx_offsets, err );
00858     // delete adj_elements; adj_elements = 0;
00859     std::vector< int > adj_adj_vertices_lid( adj_adj_vertices.size() );
00860     mesh->tag_get_vertex_data( lid_tag, adj_adj_vertices.size(), ARRPTR( adj_adj_vertices ),
00861                                ARRPTR( adj_adj_vertices_lid ), err );
00862     // delete adj_adj_vertices; adj_adj_vertices = 0;
00863     mesh->tag_delete( lid_tag, err );
00864 
00865     for( i = 0; i < num_vtx_partition_boundary_local; i++ )
00866     {
00867         /* loop over the elements surrounding that vertex */
00868         for( j = elem_offsets[i]; j < (int)( elem_offsets[i + 1] ); j++ )
00869         {
00870             /* loop over the neighbors of the considered vertex (i.e. the vertices of these element)
00871              */
00872             for( k = adj_vtx_offsets[j]; k < (int)( adj_vtx_offsets[j + 1] ); k++ )
00873             {
00874                 /* get the next neighbour */
00875                 incident_vtx = adj_adj_vertices_lid[k];
00876                 /* if this neighbour is a vertex that is smoothed on a different processor */
00877                 if( vtx_in_partition_boundary[incident_vtx] == 2 )
00878                 {
00879                     /* then map it into our domain */
00880                     incident_vtx = vtx_partition_boundary_map_inverse[incident_vtx];
00881                     /* is this vertex already in our neighbour list ? */
00882                     if( std::find( vtx_off_proc_list[i].begin(), vtx_off_proc_list[i].end(), incident_vtx ) ==
00883                         vtx_off_proc_list[i].end() )
00884                     {
00885                         /* if the vertex is not in the list yet ... add it */
00886                         vtx_off_proc_list[i].push_back( incident_vtx );
00887                         /* is the processor of this vertex already in the processor list */
00888                         incident_vtx = part_proc_owner[incident_vtx];
00889                         /* check by scanning the list for this processor */
00890                         if( std::find( neighbourProc.begin(), neighbourProc.end(), incident_vtx ) ==
00891                             neighbourProc.end() )
00892                         {
00893                             /* the processor is not in the list yet ... add it */
00894                             neighbourProc.push_back( incident_vtx );
00895                         }
00896                     }
00897                 }
00898             }
00899         }
00900     }
00901 
00902     /* sort the list of neighbour processors */
00903 
00904     std::sort( neighbourProc.begin(), neighbourProc.end() );
00905 
00906     /***********************************************************************
00907       COMPUTE HOW MANY VERTICES WE NEED TO SEND/RECV FROM EACH PROCESSOR
00908     ***********************************************************************/
00909     neighbourProcSend.clear();
00910     neighbourProcRecv.clear();
00911     neighbourProcSendRemain.clear();
00912     neighbourProcRecvRemain.clear();
00913 
00914     if( communication_model & 1 )  // AVOID_ALL_REDUCE
00915     {
00916         total_num_vertices_to_smooth = num_vtx_partition_boundary_local;
00917         total_num_vertices_to_recv   = num_vtx_partition_boundary_remote;
00918         neighbourProcSend.resize( neighbourProc.size(), 0 );
00919         neighbourProcRecv.resize( neighbourProc.size(), 0 );
00920 
00921         /* for each vertex we smooth find the processors we need to send it too */
00922         for( i = 0; i < num_vtx_partition_boundary_local; i++ )
00923         {
00924             /* loop over its adjacent off-processor vertices */
00925             for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ )
00926             {
00927                 /* get the processor id of these vertices */
00928                 incident_vtx = part_proc_owner[vtx_off_proc_list[i][j]];
00929                 /* check if we got this processor id before */
00930                 for( k = 0; k < j; k++ )
00931                 {
00932                     if( incident_vtx == part_proc_owner[vtx_off_proc_list[i][k]] )
00933                     {
00934                         /* if we have has this procesor id already we do not need to count it again
00935                          */
00936                         incident_vtx = -1;
00937                         break;
00938                     }
00939                 }
00940                 /* if this was a new processor id */
00941                 if( incident_vtx != -1 )
00942                 {
00943                     /* find the processor in the list and increment its counter */
00944                     for( l = 0; l < neighbourProc.size(); l++ )
00945                     {
00946                         if( neighbourProc[l] == incident_vtx )
00947                         {
00948                             neighbourProcSend[l]++;
00949                             break;
00950                         }
00951                     }
00952                 }
00953             }
00954         }
00955         for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ )
00956         {
00957             incident_vtx = part_proc_owner[i];
00958             for( l = 0; l < neighbourProc.size(); l++ )
00959             {
00960                 if( neighbourProc[l] == incident_vtx )
00961                 {
00962                     neighbourProcRecv[l]++;
00963                     break;
00964                 }
00965             }
00966         }
00967         neighbourProcSendRemain.resize( neighbourProc.size() );
00968         neighbourProcRecvRemain.resize( neighbourProc.size() );
00969     }
00970 
00971     exportVtxGIDs.resize( num_vtx_partition_boundary );
00972     exportVtxLIDs.resize( num_vtx_partition_boundary );
00973     exportProc.resize( num_vtx_partition_boundary );
00974     in_independent_set.resize( num_vtx_partition_boundary_local );
00975 }
00976 
00977 void ParallelHelperImpl::compute_first_independent_set( std::vector< Mesh::VertexHandle >& fixed_vertices )
00978 {
00979     if( nprocs == 1 ) return;
00980 
00981     int i;
00982 
00983     // to avoid all reduce we need to know how many vertices we send & receive
00984     if( communication_model & 1 )  // AVOID_ALL_REDUCE
00985     {
00986         for( i = 0; i < (long)neighbourProc.size(); i++ )
00987         {
00988             neighbourProcSendRemain[i] = neighbourProcSend[i];
00989             neighbourProcRecvRemain[i] = neighbourProcRecv[i];
00990         }
00991     }
00992 
00993     // this is iteration zero of the bounday smooting process
00994     iteration = 0;
00995 
00996     // mark all boundary partition vertices as not smoothed
00997     for( i = 0; i < num_vtx_partition_boundary; i++ )
00998     {
00999         part_smoothed_flag[i] = 0;
01000     }
01001 
01002     // populates the in_independent_set and the vertex export arrays
01003     compute_independent_set();
01004 
01005     // counts how many vertices are already smoothed
01006     num_already_smoothed_vertices = 0;
01007 
01008     fixed_vertices.clear();
01009 
01010     /* mark which local boundary partition vertices are already smoothed (i.e. they are in the 1st
01011      * independent set) */
01012     for( i = 0; i < num_vtx_partition_boundary_local; i++ )
01013     {
01014         if( in_independent_set[i] )
01015         {
01016             part_smoothed_flag[i] = 1;
01017             num_already_smoothed_vertices++;
01018         }
01019         else
01020         {
01021             fixed_vertices.push_back( part_vertices[i] );  // fix vertices *not* in the independent set
01022         }
01023     }
01024 
01025     if( false )
01026     {
01027         printf( "[%d]i%d after first we smoothed %d of %d\n", rank, iteration, num_already_smoothed_vertices,
01028                 num_vtx_partition_boundary_local );
01029         fflush( NULL );
01030     }
01031 
01032     // fix the ghost vertices that are smoothed on another processor
01033     for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ )
01034     {
01035         fixed_vertices.push_back( part_vertices[i] );
01036     }
01037 
01038     // fix the ghost vertices that are unused
01039     for( i = 0; i < (int)( unghost_vertices.size() ); i++ )
01040     {
01041         fixed_vertices.push_back( unghost_vertices[i] );
01042     }
01043 }
01044 
01045 void ParallelHelperImpl::communicate_first_independent_set( MsqError& err )
01046 {
01047     if( nprocs == 1 ) return;
01048 
01049     switch( communication_model )
01050     {
01051         case TrulyNonBlocking:
01052             num_already_recv_vertices = comm_smoothed_vtx_tnb( err );
01053             break;
01054         case TrulyNonBlockingAvoidAllReduce:
01055             num_already_recv_vertices = comm_smoothed_vtx_tnb_no_all( err );
01056             break;
01057         case NonBlocking:
01058             num_already_recv_vertices = comm_smoothed_vtx_nb( err );
01059             break;
01060         case NonBlockingAvoidAllReduce:
01061             num_already_recv_vertices = comm_smoothed_vtx_nb_no_all( err );
01062             break;
01063         case Blocking:
01064             num_already_recv_vertices = comm_smoothed_vtx_b( err );
01065             break;
01066         case BlockingAvoidAllReduce:
01067             num_already_recv_vertices = comm_smoothed_vtx_b_no_all( err );
01068             break;
01069     }
01070     global_work_remains = ( neighbourProc.size() ? 1 : 0 );MSQ_CHKERR( err );
01071 }
01072 
01073 bool ParallelHelperImpl::compute_next_independent_set()
01074 {
01075     if( nprocs == 1 ) return false;
01076 
01077     if( global_work_remains && ( iteration < 20 ) )
01078     {
01079         iteration++;
01080         if( false ) printf( "[%d] work remains %d after %d iterations\n", rank, global_work_remains, iteration );
01081         compute_independent_set();
01082         next_vtx_partition_boundary = 0;
01083         return true;
01084     }
01085     else
01086     {
01087         if( global_work_remains )
01088         {
01089             printf( "WARNING: global work remains %d after %d iterations\n", global_work_remains, iteration );
01090         }
01091         return false;
01092     }
01093 }
01094 
01095 bool ParallelHelperImpl::get_next_partition_boundary_vertex( MBMesquite::Mesh::VertexHandle& vertex_handle )
01096 {
01097     while( next_vtx_partition_boundary < num_vtx_partition_boundary_local )
01098     {
01099         if( in_independent_set[next_vtx_partition_boundary] )
01100         {
01101             vertex_handle = part_vertices[next_vtx_partition_boundary];
01102             num_already_smoothed_vertices++;
01103             assert( part_smoothed_flag[next_vtx_partition_boundary] == 0 );
01104             part_smoothed_flag[next_vtx_partition_boundary] = 1;
01105             next_vtx_partition_boundary++;
01106             return true;
01107         }
01108         next_vtx_partition_boundary++;
01109     }
01110     if( false )
01111     {
01112         printf( "[%d]i%d after next we smoothed %d of %d\n", rank, iteration, num_already_smoothed_vertices,
01113                 num_vtx_partition_boundary_local );
01114         fflush( NULL );
01115     }
01116     return false;
01117 }
01118 
01119 void ParallelHelperImpl::communicate_next_independent_set( MsqError& err )
01120 {
01121     switch( communication_model )
01122     {
01123         case TrulyNonBlocking:
01124             num_already_recv_vertices += comm_smoothed_vtx_tnb( err );
01125             break;
01126         case TrulyNonBlockingAvoidAllReduce:
01127             num_already_recv_vertices += comm_smoothed_vtx_tnb_no_all( err );
01128             break;
01129         case NonBlocking:
01130             num_already_recv_vertices += comm_smoothed_vtx_nb( err );
01131             break;
01132         case NonBlockingAvoidAllReduce:
01133             num_already_recv_vertices += comm_smoothed_vtx_nb_no_all( err );
01134             break;
01135         case Blocking:
01136             num_already_recv_vertices += comm_smoothed_vtx_b( err );
01137             break;
01138         case BlockingAvoidAllReduce:
01139             num_already_recv_vertices += comm_smoothed_vtx_b_no_all( err );
01140             break;
01141     }
01142 
01143     if( communication_model & 1 )  // AVOID_ALL_REDUCE
01144     {
01145         global_work_remains = ( total_num_vertices_to_smooth - num_already_smoothed_vertices ) +
01146                               ( total_num_vertices_to_recv - num_already_recv_vertices );
01147         if( false )
01148         {
01149             printf( "[%d]i%d %d - %d + %d  - %d = %d \n", rank, iteration, total_num_vertices_to_smooth,
01150                     num_already_smoothed_vertices, total_num_vertices_to_recv, num_already_recv_vertices,
01151                     global_work_remains );
01152             fflush( NULL );
01153         }
01154     }
01155     else
01156     {
01157         int i, work_remains = 0;
01158         for( i = 0; i < num_vtx_partition_boundary_local; i++ )
01159         {
01160             if( part_smoothed_flag[i] == 0 )
01161             {
01162                 work_remains++;
01163             }
01164         }
01165         int rval = MPI_Allreduce( &work_remains, &global_work_remains, 1, MPI_INT, MPI_SUM, (MPI_Comm)communicator );
01166         CHECK_MPI( rval, err );
01167     }
01168 }
01169 
01170 void ParallelHelperImpl::smoothing_close( MsqError& err )
01171 {
01172     int i, j, rval;
01173 
01174     if( nprocs == 1 ) return;
01175 
01176     //  printf("[%d] used %d iterations\n", rank, iteration);
01177 
01178     // communicate unused ghost nodes
01179 
01180     std::vector< double > update_updates;
01181     std::vector< MPI_Request > update_requests;
01182 
01183     if( update_num_procs )
01184     {
01185         /* get the tags so we can find the requested vertices */
01186         std::vector< size_t > gid( num_vertex );
01187         mesh->vertices_get_global_id( ARRPTR( vertices ), ARRPTR( gid ), num_vertex, err );MSQ_ERRRTN( err );
01188         std::vector< unsigned char > app_fixed( num_vertex );
01189         mesh->vertices_get_byte( ARRPTR( vertices ), ARRPTR( app_fixed ), num_vertex, err );MSQ_ERRRTN( err );
01190         std::vector< int > proc_owner( num_vertex );
01191         mesh->vertices_get_processor_id( ARRPTR( vertices ), ARRPTR( proc_owner ), num_vertex, err );MSQ_ERRRTN( err );
01192 
01193         if( false )
01194         {
01195             int ncull = 0;
01196             for( i = 0; i < num_vertex; ++i )
01197             {
01198                 if( app_fixed[i] & MsqVertex::MSQ_CULLED )
01199                 {
01200                     ++ncull;
01201                 }
01202             }
01203             std::cout << "P[" << rank << "] ncull= " << ncull << " num_vertex= " << num_vertex << std::endl;
01204         }
01205 
01206         /* only interested in fixed flag from vertex byte? Clear others. */
01207         // srkenno AT sandia.gov 1/19/12: bug fix: changed from |= which makes all vertices fixed
01208         for( i = 0; i < num_vertex; ++i )
01209             app_fixed[i] &= MsqVertex::MSQ_HARD_FIXED;
01210 
01211         /* insert all our unfixed vertices into a map so we can find the requested vertices
01212          * efficiently */
01213         VertexIdMap temp_vid_map;
01214         for( j = 0; j < num_vertex; j++ )
01215         {
01216             if( proc_owner[j] == rank && app_fixed[j] == false )
01217             {
01218                 vertex_map_insert( temp_vid_map, gid[j], rank, j );
01219             }
01220         }
01221 
01222         /* deallocate the tags */
01223         // delete [] gid; gid = 0;
01224         // delete [] app_fixed; app_fixed = 0;
01225         // delete [] proc_owner; proc_owner = 0;
01226 
01227         /* find the requested updates and collect them into an array */
01228         MBMesquite::MsqVertex coordinates;
01229         update_updates.resize( update_num_vtx * 3 );
01230         for( i = 0; i < update_num_vtx; i++ )
01231         {
01232             j = vertex_map_find( temp_vid_map, update_gid[i], rank );
01233             mesh->vertices_get_coordinates( &( vertices[j] ), &coordinates, 1, err );MSQ_ERRRTN( err );
01234             update_updates[3 * i + 0] = coordinates[0];
01235             update_updates[3 * i + 1] = coordinates[1];
01236             update_updates[3 * i + 2] = coordinates[2];
01237             //      printf("[%d] send gid %d with %g %g %g\n", rank, update_gid[i], coordinates[0],
01238             //      coordinates[1], coordinates[2]);
01239         }
01240 
01241         /* deallocate the map and the gid array */
01242         // delete temp_vid_map; temp_vid_map = 0;
01243         // delete [] update_gid; update_gid = 0;
01244 
01245         update_requests.resize( update_num_procs );
01246         /* send each processor the unused ghost node updates that they requested */
01247         for( j = 0; j < update_num_procs; j++ )
01248         {
01249             rval = MPI_Isend( &( update_updates[update_procs_offset[j] * 3] ), update_procs_num_vtx[j] * 3, MPI_DOUBLE,
01250                               update_procs[j], GHOST_NODE_VERTEX_UPDATES, (MPI_Comm)communicator,
01251                               &( update_requests[j] ) );
01252             //      printf("[%d] sending %d of %d from %d with offset %d \n", rank,
01253             //      update_procs_num_vtx[j], update_num_vtx, update_procs[j],
01254             //      update_procs_offset[j]);
01255             CHECK_MPI( rval, err );
01256         }
01257 
01258         /* deallocate more arrays that we no longer need */
01259         // delete [] update_procs_offset; update_procs_offset = 0;
01260         // delete [] update_procs_num_vtx; update_procs_num_vtx = 0;
01261         // delete [] update_procs; update_procs = 0;
01262     }
01263 
01264     if( unghost_num_procs )
01265     {
01266         std::vector< MPI_Request > unghost_requests( unghost_num_procs );
01267         /* receive from each processor the unused ghost nodes updates i want from them */
01268         std::vector< double > unghost_updates( unghost_num_vtx * 3 );
01269         for( j = 0; j < unghost_num_procs; j++ )
01270         {
01271             rval = MPI_Irecv( &( unghost_updates[unghost_procs_offset[j] * 3] ), unghost_procs_num_vtx[j] * 3,
01272                               MPI_DOUBLE, unghost_procs[j], GHOST_NODE_VERTEX_UPDATES, (MPI_Comm)communicator,
01273                               &( unghost_requests[j] ) );
01274             //      printf("[%d] receiving %d of %d from %d with offset %d \n", rank,
01275             //      unghost_procs_num_vtx[j], unghost_num_vtx, unghost_procs[j],
01276             //      unghost_procs_offset[j]);
01277             CHECK_MPI( rval, err );
01278         }
01279 
01280         /* deallocate more arrays that we no longer need */
01281         // delete [] unghost_procs_offset; unghost_procs_offset = 0;
01282         // delete [] unghost_procs_num_vtx; unghost_procs_num_vtx = 0;
01283         // delete [] unghost_procs; unghost_procs = 0;
01284 
01285         std::vector< MPI_Status > status( unghost_num_procs );
01286         rval = MPI_Waitall( unghost_num_procs, ARRPTR( unghost_requests ), ARRPTR( status ) );
01287         CHECK_MPI( rval, err );
01288 
01289         /* apply the received updates for the unused ghost vertices */
01290         for( i = 0; i < unghost_num_vtx; i++ )
01291         {
01292             MBMesquite::Vector3D coordinates;
01293             coordinates[0] = unghost_updates[3 * i + 0];
01294             coordinates[1] = unghost_updates[3 * i + 1];
01295             coordinates[2] = unghost_updates[3 * i + 2];
01296             //      printf("[%d] recv %g %g %g\n", rank, coordinates[0], coordinates[1],
01297             //      coordinates[2]);
01298             mesh->vertex_set_coordinates( unghost_vertices[i], coordinates, err );MSQ_ERRRTN( err );
01299         }
01300 
01301         /* deallocate more arrays that we no longer need */
01302         // delete unghost_vertices; unghost_vertices = 0;
01303         // delete [] unghost_updates; unghost_updates = 0;
01304         // delete [] unghost_requests; unghost_requests = 0;
01305     }
01306 
01307     // if (update_num_procs)
01308     //{
01309     //  delete [] update_updates; update_updates = 0;
01310     //  delete [] update_requests; update_requests = 0;
01311     //}
01312 
01313     // if (vertices) delete vertices; vertices = 0;
01314     // if (vtx_in_partition_boundary) delete [] vtx_in_partition_boundary; vtx_in_partition_boundary
01315     // = 0; if (part_vertices) delete part_vertices; part_vertices = 0; if (unghost_vertices) delete
01316     // unghost_vertices; unghost_vertices = 0; if (part_proc_owner) delete [] part_proc_owner;
01317     // part_proc_owner = 0; if (part_gid) delete [] part_gid; part_gid = 0; if (part_smoothed_flag)
01318     // delete [] part_smoothed_flag; part_smoothed_flag = 0; if (part_rand_number) delete []
01319     // part_rand_number; part_rand_number = 0; if (exportVtxGIDs) delete [] exportVtxGIDs;
01320     // exportVtxGIDs = 0; if (exportVtxLIDs) delete [] exportVtxLIDs; exportVtxLIDs = 0; if
01321     // (exportProc) delete [] exportProc; exportProc = 0; if (in_independent_set) delete []
01322     // in_independent_set; in_independent_set = 0; if (vid_map) delete vid_map; vid_map = 0; if
01323     // (neighbourProcSend) delete [] neighbourProcSend; neighbourProcSend = 0; if (neighbourProcRecv)
01324     // delete [] neighbourProcRecv; neighbourProcRecv = 0; if (neighbourProcSendRemain) delete []
01325     // neighbourProcSendRemain; neighbourProcSendRemain = 0; if (neighbourProcRecvRemain) delete []
01326     // neighbourProcRecvRemain; neighbourProcRecvRemain = 0; if (vtx_off_proc_list_size) delete []
01327     // vtx_off_proc_list_size; vtx_off_proc_list_size = 0; if (vtx_off_proc_list) {
01328     //  for (i = 0; i < num_vtx_partition_boundary_local; i++) free(vtx_off_proc_list[i]);
01329     //  delete [] vtx_off_proc_list; vtx_off_proc_list = 0;
01330     //}
01331     // if (neighbourProc) free(neighbourProc); neighbourProc = 0;
01332 }
01333 
01334 typedef struct VertexPack
01335 {
01336     double x;
01337     double y;
01338     double z;
01339     union
01340     {
01341         double mist;
01342         size_t glob_id;
01343     };
01344 } VertexPack;
01345 
01346 int ParallelHelperImpl::comm_smoothed_vtx_tnb( MsqError& err )
01347 {
01348     int i, j, k, rval;
01349 
01350     // printf("[%d]i%d truly non blocking\n",rank, iteration);fflush(NULL);
01351 
01352     /* compute how many vertices we send to each processor */
01353 
01354     std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 );
01355     std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 );
01356     for( i = 0; i < num_exportVtx; i++ )
01357     {
01358         for( j = 0; j < (long)neighbourProc.size(); j++ )
01359         {
01360             if( exportProc[i] == neighbourProc[j] )
01361             {
01362                 /* increment count */
01363                 numVtxPerProcSend[j]++;
01364                 /* end loop */
01365                 break;
01366             }
01367         }
01368         /* did loop end without finding the processor */
01369         if( j == (long)neighbourProc.size() )
01370         {
01371             printf( "[%d]i%d WARNING: did not find exportProc[%d] = %d in list of %lu processors.\n", rank, iteration,
01372                     i, exportProc[i], (unsigned long)neighbourProc.size() );
01373         }
01374     }
01375 
01376     /* tell each processor how many vertices they can expect from us */
01377     /* also ask each processor how many vertices we can expect from them */
01378 
01379     int num_neighbourProcSend = 0;
01380     int num_neighbourProcRecv = 0;
01381     std::vector< MPI_Request > requests_send( neighbourProc.size() );
01382     std::vector< MPI_Request > requests_recv( neighbourProc.size() );
01383     for( j = 0; j < (long)neighbourProc.size(); j++ )
01384     {
01385         /* send the vertex count to this processor */
01386         if( 0 )
01387         {
01388             printf( "[%d]i%d Announce send %d vertices from proc %d\n", rank, iteration, numVtxPerProcSend[j],
01389                     neighbourProc[j] );
01390             fflush( NULL );
01391         }
01392         rval = MPI_Isend( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01393                           (MPI_Comm)communicator, &( requests_send[j] ) );
01394         CHECK_MPI_RZERO( rval, err );
01395         num_neighbourProcSend++;
01396 
01397         /* recv the vertex count for this processor */
01398         if( 0 )
01399         {
01400             printf( "[%d]i%d Listen  recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[j],
01401                     neighbourProc[j] );
01402             fflush( NULL );
01403         }
01404         rval = MPI_Irecv( &( numVtxPerProcRecv[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01405                           (MPI_Comm)communicator, &( requests_recv[j] ) );
01406         CHECK_MPI_RZERO( rval, err );
01407         num_neighbourProcRecv++;
01408     }
01409 
01410     /* set up memory for the outgoing vertex data blocks */
01411 
01412     std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */
01413     std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() );
01414     if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export );
01415     for( i = 1; i < (long)neighbourProc.size(); i++ )
01416     {
01417         packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1];
01418     }
01419 
01420     /* place vertex data going to the same processor into consecutive memory space */
01421 
01422     std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size() );
01423     for( i = 0; i < (long)neighbourProc.size(); i++ )
01424     {
01425         numVtxPerProcSendPACKED[i] = 0;
01426     }
01427     for( i = 0; i < num_exportVtx; i++ )
01428     {
01429         for( j = 0; j < (long)neighbourProc.size(); j++ )
01430         {
01431             if( exportProc[i] == neighbourProc[j] )
01432             {
01433                 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j];
01434                 numVtxPerProcSendPACKED[j]++;
01435                 MBMesquite::MsqVertex coordinates;
01436                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
01437                 MSQ_ERRZERO( err );
01438                 packing_vertex->x       = coordinates[0];
01439                 packing_vertex->y       = coordinates[1];
01440                 packing_vertex->z       = coordinates[2];
01441                 packing_vertex->glob_id = exportVtxGIDs[i];
01442             }
01443         }
01444     }
01445 
01446     /* wait until we have heard from all processors how many vertices we will receive from them */
01447 
01448     std::vector< MPI_Status > status( neighbourProc.size() );
01449 
01450     if( num_neighbourProcRecv )
01451     {
01452         rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_recv ), ARRPTR( status ) );
01453         CHECK_MPI_RZERO( rval, err );
01454     }
01455 
01456     /* how many vertices will we receive */
01457 
01458     int numVtxImport = 0;
01459     for( i = 0; i < (long)neighbourProc.size(); i++ )
01460     {
01461         numVtxImport += numVtxPerProcRecv[i];
01462     }
01463 
01464     /* set up memory for the incoming vertex data blocks */
01465 
01466     std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */
01467     std::vector< VertexPack* > packed_vertices_import( neighbourProc.size() );
01468     if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import );
01469     for( i = 1; i < (long)neighbourProc.size(); i++ )
01470     {
01471         packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecv[i - 1];
01472     }
01473 
01474     /* post receives for all processors that have something for us */
01475     /* post sends for all processors that we have something for */
01476 
01477     num_neighbourProcRecv = 0;
01478 
01479     for( i = 0; i < (long)neighbourProc.size(); i++ )
01480     {
01481         if( numVtxPerProcRecv[i] )
01482         {
01483             if( 0 )
01484             {
01485                 printf( "[%d]i%d Will recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i],
01486                         neighbourProc[i] );
01487                 fflush( NULL );
01488             }
01489             rval = MPI_Irecv( packed_vertices_import[i], 4 * numVtxPerProcRecv[i], MPI_DOUBLE, neighbourProc[i],
01490                               VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_recv[i] ) );
01491             CHECK_MPI_RZERO( rval, err );
01492             num_neighbourProcRecv++;
01493         }
01494         else
01495         {
01496             requests_recv[i] = MPI_REQUEST_NULL;
01497         }
01498         if( numVtxPerProcSend[i] )
01499         {
01500             if( 0 )
01501             {
01502                 printf( "[%d]i%d Will send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[i],
01503                         neighbourProc[i] );
01504                 fflush( NULL );
01505             }
01506             rval = MPI_Isend( packed_vertices_export[i], 4 * numVtxPerProcSend[i], MPI_DOUBLE, neighbourProc[i],
01507                               VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[i] ) );
01508             CHECK_MPI_RZERO( rval, err );
01509         }
01510         else
01511         {
01512             requests_send[i] = MPI_REQUEST_NULL;
01513         }
01514     }
01515 
01516     /* wait for some receive to arrive */
01517 
01518     int local_id;
01519     while( num_neighbourProcRecv )
01520     {
01521         rval = MPI_Waitany( neighbourProc.size(), ARRPTR( requests_recv ), &k, ARRPTR( status ) );
01522         CHECK_MPI_RZERO( rval, err );
01523         /* unpack all vertices */
01524         for( i = 0; i < numVtxPerProcRecv[k]; i++ )
01525         {
01526             local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProc[k] );
01527             if( local_id )
01528             {
01529                 MBMesquite::Vector3D coordinates;
01530                 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
01531                                  packed_vertices_import[k][i].z );
01532                 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
01533                 MSQ_ERRZERO( err );
01534                 assert( part_smoothed_flag[local_id] == 0 );
01535                 part_smoothed_flag[local_id] = 1;
01536             }
01537             else
01538             {
01539                 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
01540                         packed_vertices_import[k][i].glob_id, neighbourProc[k] );
01541             }
01542         }
01543         num_neighbourProcRecv--;
01544     }
01545 
01546     /* all receives have completed. it is save to release the memory */
01547     // free(vertex_pack_import);
01548 
01549     /* wait until the sends have completed */
01550 
01551     if( num_neighbourProcSend )
01552     {
01553         rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( status ) );
01554         CHECK_MPI_RZERO( rval, err );
01555     }
01556 
01557     /* all sends have completed. it is save to release the memory */
01558     // free(vertex_pack_export);
01559 
01560     return numVtxImport;
01561 }
01562 
01563 int ParallelHelperImpl::comm_smoothed_vtx_tnb_no_all( MsqError& err )
01564 {
01565     int i, j, k, rval;
01566 
01567     // printf("[%d]i%d truly non blocking avoid reduce all\n", rank, iteration); fflush(NULL);
01568 
01569     /* compute how many vertices we send to each processor */
01570 
01571     std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 );
01572     std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 );
01573     for( i = 0; i < num_exportVtx; i++ )
01574     {
01575         for( j = 0; j < (long)neighbourProc.size(); j++ )
01576         {
01577             if( exportProc[i] == neighbourProc[j] )
01578             {
01579                 /* increment count */
01580                 numVtxPerProcSend[j]++;
01581                 /* end loop */
01582                 break;
01583             }
01584         }
01585         /* did loop end without finding the processor */
01586         if( j == (long)neighbourProc.size() )
01587         {
01588             printf( "[%d]i%d WARNING: did not find exportProc[%d] = %d in list of %lu processors.\n", rank, iteration,
01589                     i, exportProc[i], (unsigned long)neighbourProc.size() );
01590         }
01591     }
01592 
01593     /* tell each processor how many vertices they can expect from us */
01594     /* also ask each processor how many vertices we can expect from them */
01595 
01596     int num_neighbourProcSend = 0;
01597     int num_neighbourProcRecv = 0;
01598     std::vector< MPI_Request > requests_send( neighbourProc.size() );
01599     std::vector< MPI_Request > requests_recv( neighbourProc.size() );
01600     for( j = 0; j < (long)neighbourProc.size(); j++ )
01601     {
01602         if( neighbourProcSendRemain[j] )
01603         {
01604             /* send the vertex count to this processor */
01605             if( 0 )
01606             {
01607                 printf( "[%d]i%d Announce send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j],
01608                         neighbourProc[j] );
01609                 fflush( NULL );
01610             }
01611             rval = MPI_Isend( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01612                               (MPI_Comm)communicator, &( requests_send[j] ) );
01613             CHECK_MPI_RZERO( rval, err );
01614             num_neighbourProcSend++;
01615         }
01616         else
01617         {
01618             requests_send[j] = MPI_REQUEST_NULL;
01619         }
01620         if( neighbourProcRecvRemain[j] )
01621         {
01622             /* recv the vertex count for this processor */
01623             if( 0 )
01624             {
01625                 printf( "[%d]i%d Listen recv xx vertices from proc %d\n", rank, iteration, neighbourProc[j] );
01626                 fflush( NULL );
01627             }
01628             rval = MPI_Irecv( &( numVtxPerProcRecv[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01629                               (MPI_Comm)communicator, &( requests_recv[j] ) );
01630             CHECK_MPI_RZERO( rval, err );
01631             num_neighbourProcRecv++;
01632         }
01633         else
01634         {
01635             requests_recv[j] = MPI_REQUEST_NULL;
01636         }
01637     }
01638 
01639     /* set up memory for the outgoing vertex data blocks */
01640 
01641     std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */
01642     std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() );
01643     if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export );
01644     for( i = 1; i < (long)neighbourProc.size(); i++ )
01645     {
01646         packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1];
01647     }
01648 
01649     /* place vertex data going to the same processor into consecutive memory space */
01650 
01651     std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 );
01652     for( i = 0; i < num_exportVtx; i++ )
01653     {
01654         for( j = 0; j < (long)neighbourProc.size(); j++ )
01655         {
01656             if( exportProc[i] == neighbourProc[j] )
01657             {
01658                 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j];
01659                 numVtxPerProcSendPACKED[j]++;
01660                 MBMesquite::MsqVertex coordinates;
01661                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
01662                 MSQ_ERRZERO( err );
01663                 packing_vertex->x       = coordinates[0];
01664                 packing_vertex->y       = coordinates[1];
01665                 packing_vertex->z       = coordinates[2];
01666                 packing_vertex->glob_id = exportVtxGIDs[i];
01667                 if( 0 )
01668                     printf( "[%d]i%d vertex %lu packed %g %g %g\n", rank, iteration, (unsigned long)exportVtxGIDs[i],
01669                             packing_vertex->x, packing_vertex->y, packing_vertex->z );
01670             }
01671         }
01672     }
01673 
01674     /* wait until we have heard from all processors how many vertices we will receive from them */
01675 
01676     std::vector< MPI_Status > status( neighbourProc.size() );
01677 
01678     if( num_neighbourProcRecv )
01679     {
01680         rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_recv ), ARRPTR( status ) );
01681         CHECK_MPI_RZERO( rval, err );
01682     }
01683 
01684     /* how many vertices will we receive */
01685 
01686     int numVtxImport = 0;
01687     for( i = 0; i < (long)neighbourProc.size(); i++ )
01688     {
01689         numVtxImport += numVtxPerProcRecv[i];
01690         neighbourProcRecvRemain[i] -= numVtxPerProcRecv[i];
01691         neighbourProcSendRemain[i] -= numVtxPerProcSend[i];
01692     }
01693 
01694     /* set up memory for the incoming vertex data blocks */
01695 
01696     std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */
01697     std::vector< VertexPack* > packed_vertices_import( neighbourProc.size() );
01698     if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import );
01699     for( i = 1; i < (long)neighbourProc.size(); i++ )
01700     {
01701         packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecv[i - 1];
01702     }
01703 
01704     /* post receives for all processors that have something for us */
01705     /* post sends for all processors that we have something for */
01706 
01707     num_neighbourProcRecv = 0;
01708 
01709     for( i = 0; i < (long)neighbourProc.size(); i++ )
01710     {
01711         if( 0 )
01712         {
01713             printf( "[%d]i%d Will recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i],
01714                     neighbourProc[i] );
01715             fflush( NULL );
01716         }
01717         if( numVtxPerProcRecv[i] )
01718         {
01719             rval = MPI_Irecv( packed_vertices_import[i], 4 * numVtxPerProcRecv[i], MPI_DOUBLE, neighbourProc[i],
01720                               VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_recv[i] ) );
01721             CHECK_MPI_RZERO( rval, err );
01722             num_neighbourProcRecv++;
01723         }
01724         else
01725         {
01726             requests_recv[i] = MPI_REQUEST_NULL;
01727         }
01728         if( 0 )
01729         {
01730             printf( "[%d]i%d Will send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[i],
01731                     neighbourProc[i] );
01732             fflush( NULL );
01733         }
01734         if( numVtxPerProcSend[i] )
01735         {
01736             rval = MPI_Isend( packed_vertices_export[i], 4 * numVtxPerProcSend[i], MPI_DOUBLE, neighbourProc[i],
01737                               VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[i] ) );
01738             CHECK_MPI_RZERO( rval, err );
01739         }
01740         else
01741         {
01742             requests_send[i] = MPI_REQUEST_NULL;
01743         }
01744     }
01745 
01746     /* wait for some receive to arrive */
01747 
01748     int local_id;
01749     while( num_neighbourProcRecv )
01750     {
01751         rval = MPI_Waitany( neighbourProc.size(), ARRPTR( requests_recv ), &k, ARRPTR( status ) );
01752         CHECK_MPI_RZERO( rval, err );
01753         /* unpack all vertices */
01754         for( i = 0; i < numVtxPerProcRecv[k]; i++ )
01755         {
01756             local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProc[k] );
01757             if( local_id )
01758             {
01759                 MBMesquite::Vector3D coordinates;
01760                 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
01761                                  packed_vertices_import[k][i].z );
01762                 if( 0 )
01763                     printf( "[%d]i%d vertex %d becomes %g %g %g\n", rank, iteration, local_id,
01764                             packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
01765                             packed_vertices_import[k][i].z );
01766                 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
01767                 MSQ_ERRZERO( err );
01768                 assert( part_smoothed_flag[local_id] == 0 );
01769                 part_smoothed_flag[local_id] = 1;
01770             }
01771             else
01772             {
01773                 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
01774                         packed_vertices_import[k][i].glob_id, neighbourProc[k] );
01775             }
01776         }
01777         num_neighbourProcRecv--;
01778     }
01779 
01780     /* all receives have completed. it is save to release the memory */
01781     // free(vertex_pack_import);
01782 
01783     /* wait until the sends have completed */
01784 
01785     if( num_neighbourProcSend )
01786     {
01787         rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( status ) );
01788         CHECK_MPI_RZERO( rval, err );
01789     }
01790 
01791     /* all sends have completed. it is save to release the memory */
01792     // free(vertex_pack_export);
01793 
01794     return numVtxImport;
01795 }
01796 
01797 int ParallelHelperImpl::comm_smoothed_vtx_nb( MsqError& err )
01798 {
01799     int i, j, k, rval;
01800 
01801     // printf("[%d] %d %d non blocking\n",rank, iteration, pass);fflush(NULL);
01802 
01803     /* how many vertices will we receive */
01804 
01805     std::vector< int > numVtxPerProcSend( neighbourProc.size() );
01806     for( i = 0; i < (long)neighbourProc.size(); i++ )
01807     {
01808         numVtxPerProcSend[i] = 0;
01809     }
01810     for( i = 0; i < num_exportVtx; i++ )
01811     {
01812         for( j = 0; j < (long)neighbourProc.size(); j++ )
01813         {
01814             if( exportProc[i] == neighbourProc[j] )
01815             {
01816                 /* increment count */
01817                 numVtxPerProcSend[j]++;
01818                 /* end loop */
01819                 break;
01820             }
01821         }
01822         /* assert loop did not end without finding processor */
01823         assert( j != (long)neighbourProc.size() );
01824     }
01825 
01826     /* tell each processor how many vertices to expect */
01827 
01828     for( j = 0; j < (long)neighbourProc.size(); j++ )
01829     {
01830         rval = MPI_Send( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01831                          (MPI_Comm)communicator );
01832         CHECK_MPI_RZERO( rval, err );
01833         //    printf("[%d]i%d Announcing %d vertices to proc
01834         //    %d\n",rank,iteration,numVtxPerProcSend[j],neighbourProc[j]); fflush(NULL);
01835     }
01836 
01837     /* place vertex data going to the same processor into consecutive memory space */
01838     std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */
01839     std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() );
01840     if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export );
01841     for( i = 1; i < (long)neighbourProc.size(); i++ )
01842     {
01843         packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1];
01844     }
01845 
01846     std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 );
01847     for( i = 0; i < num_exportVtx; i++ )
01848     {
01849         for( j = 0; j < (long)neighbourProc.size(); j++ )
01850         {
01851             if( exportProc[i] == neighbourProc[j] )
01852             {
01853                 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j];
01854                 numVtxPerProcSendPACKED[j]++;
01855                 MBMesquite::MsqVertex coordinates;
01856                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
01857                 MSQ_ERRZERO( err );
01858                 packing_vertex->x       = coordinates[0];
01859                 packing_vertex->y       = coordinates[1];
01860                 packing_vertex->z       = coordinates[2];
01861                 packing_vertex->glob_id = exportVtxGIDs[i];
01862             }
01863         }
01864     }
01865     // delete [] numVtxPerProcSendPACKED;
01866 
01867     /* now ask each processor how many vertices to expect */
01868 
01869     int num;
01870     int proc;
01871     int numVtxImport          = 0;
01872     int num_neighbourProcRecv = 0;
01873     std::vector< int > numVtxPerProcRecv( neighbourProc.size() );
01874     MPI_Status status;
01875     for( j = 0; j < (long)neighbourProc.size(); j++ )
01876     {
01877         numVtxPerProcRecv[j] = 0;
01878     }
01879 
01880     for( j = 0; j < (long)neighbourProc.size(); j++ )
01881     {
01882         /* get the vertex count for some processor */
01883         rval = MPI_Recv( &num,                      /* message buffer */
01884                          1,                         /* one data item */
01885                          MPI_INT,                   /* of type int */
01886                          MPI_ANY_SOURCE,            /* receive from any sender */
01887                          VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs from this iteration */
01888                          (MPI_Comm)communicator,    /* default communicator */
01889                          &status );                 /* info about the received message */
01890         CHECK_MPI_RZERO( rval, err );
01891         proc = status.MPI_SOURCE;
01892         /* will we import vertices from this processor */
01893 
01894         //    printf("[%d]i%d Heard we will receive %d vertices from proc
01895         //    %d\n",rank,iteration,num,proc); fflush(NULL);
01896 
01897         if( num )
01898         {
01899             /* increase number of processors we will receive from */
01900             num_neighbourProcRecv++;
01901             /* add number of vertices we will receive to the import total */
01902             numVtxImport += num;
01903         }
01904         /* find this processor in the list */
01905         for( i = 0; i < (long)neighbourProc.size(); i++ )
01906         {
01907             if( neighbourProc[i] == proc )
01908             {
01909                 numVtxPerProcRecv[i] = num;
01910                 break;
01911             }
01912         }
01913         /* assert loop did not end without finding processor */
01914         assert( i != (long)neighbourProc.size() );
01915         if( 0 ) printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, num, proc );
01916         fflush( NULL );
01917     }
01918 
01919     /* create list of processors we receive from */
01920     std::vector< int > neighbourProcRecv( num_neighbourProcRecv );
01921     std::vector< int > numVtxPerProcRecvRecv( num_neighbourProcRecv );
01922     for( i = 0, k = 0; i < (long)neighbourProc.size(); i++ )
01923     {
01924         if( 0 )
01925             printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i],
01926                     neighbourProc[i] );
01927         fflush( NULL );
01928         if( numVtxPerProcRecv[i] )
01929         {
01930             neighbourProcRecv[k]     = neighbourProc[i];
01931             numVtxPerProcRecvRecv[k] = numVtxPerProcRecv[i];
01932             k++;
01933         }
01934     }
01935 
01936     /* set up memory for the incoming vertex data blocks */
01937     std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */
01938     std::vector< VertexPack* > packed_vertices_import( num_neighbourProcRecv );
01939     if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import );
01940     for( i = 1; i < num_neighbourProcRecv; i++ )
01941     {
01942         packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecvRecv[i - 1];
01943     }
01944 
01945     /* receive from all processors that have something for us */
01946     std::vector< MPI_Request > request( num_neighbourProcRecv );
01947     for( j = 0; j < num_neighbourProcRecv; j++ )
01948     {
01949         rval = MPI_Irecv( packed_vertices_import[j], 4 * numVtxPerProcRecvRecv[j], MPI_DOUBLE, neighbourProcRecv[j],
01950                           VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( request[j] ) );
01951         CHECK_MPI_RZERO( rval, err );
01952         if( false )
01953         {
01954             printf( "[%d]i%d Scheduling receipt of %d vertices to proc %d\n", rank, iteration, numVtxPerProcRecvRecv[j],
01955                     neighbourProcRecv[j] );
01956             fflush( NULL );
01957         }
01958     }
01959 
01960     /* now send the data blocks */
01961 
01962     std::vector< MPI_Request > requests_send( neighbourProc.size() );
01963     for( j = 0; j < (long)neighbourProc.size(); j++ )
01964     {
01965         if( numVtxPerProcSend[j] )
01966         {
01967             rval = MPI_Isend( packed_vertices_export[j], 4 * numVtxPerProcSend[j], MPI_DOUBLE, neighbourProc[j],
01968                               VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[j] ) );
01969             CHECK_MPI_RZERO( rval, err );
01970             if( 0 )
01971             {
01972                 printf( "[%d]i%d Scheduling send of %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j],
01973                         neighbourProc[j] );
01974                 fflush( NULL );
01975             }
01976         }
01977         else
01978         {
01979             requests_send[j] = MPI_REQUEST_NULL;
01980         }
01981     }
01982 
01983     /* process messages as they arrive */
01984 
01985     int local_id;
01986     for( j = 0; j < num_neighbourProcRecv; j++ )
01987     {
01988         rval = MPI_Waitany( num_neighbourProcRecv, ARRPTR( request ), &k, &status );
01989         CHECK_MPI_RZERO( rval, err );
01990 
01991         /* unpack messages */
01992         proc = status.MPI_SOURCE;
01993         int count;
01994         MPI_Get_count( &status, MPI_INT, &count );
01995         if( 0 )
01996             printf( "[%d]i%d Received %d (%d) vertices from proc %d (%d)\n", rank, iteration, numVtxPerProcRecvRecv[k],
01997                     count, neighbourProcRecv[k], proc );
01998         fflush( NULL );
01999         for( i = 0; i < numVtxPerProcRecvRecv[k]; i++ )
02000         {
02001             local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] );
02002             if( local_id )
02003             {
02004                 MBMesquite::Vector3D coordinates;
02005                 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
02006                                  packed_vertices_import[k][i].z );
02007                 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
02008                 MSQ_ERRZERO( err );
02009                 assert( part_smoothed_flag[local_id] == 0 );
02010                 part_smoothed_flag[local_id] = 1;
02011                 if( 0 )
02012                     printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration,
02013                             (int)( packed_vertices_import[k][i].glob_id ), packed_vertices_import[k][i].x,
02014                             packed_vertices_import[k][i].y, packed_vertices_import[k][i].z );
02015             }
02016             else
02017             {
02018                 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
02019                         packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] );
02020             }
02021         }
02022     }
02023 
02024     /* all receives have completed. it is save to release the memory */
02025     // free(vertex_pack_import);
02026     /* wait until the sends have completed */
02027     std::vector< MPI_Status > stati( neighbourProc.size() );
02028     rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( stati ) );
02029     CHECK_MPI_RZERO( rval, err );
02030     /* all sends have completed. it is save to release the memory */
02031     // free(vertex_pack_export);
02032 
02033     return numVtxImport;
02034 }
02035 
02036 int ParallelHelperImpl::comm_smoothed_vtx_nb_no_all( MsqError& err )
02037 {
02038     int i, j, k, rval;
02039 
02040     // printf("[%d] %d %d non blocking avoid reduce all\n",rank, iteration, pass);fflush(NULL);
02041 
02042     /* how many vertices will we receive */
02043 
02044     std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 );
02045     for( i = 0; i < num_exportVtx; i++ )
02046     {
02047         for( j = 0; j < (long)neighbourProc.size(); j++ )
02048         {
02049             if( exportProc[i] == neighbourProc[j] )
02050             {
02051                 /* increment count */
02052                 numVtxPerProcSend[j]++;
02053                 /* end loop */
02054                 break;
02055             }
02056         }
02057         /* assert loop did not end without finding processor */
02058         assert( j != (long)neighbourProc.size() );
02059     }
02060 
02061     /* tell each processor how many vertices to expect */
02062 
02063     for( j = 0; j < (long)neighbourProc.size(); j++ )
02064     {
02065         if( neighbourProcSendRemain[j] )
02066         {
02067             assert( neighbourProcSendRemain[j] >= numVtxPerProcSend[j] );
02068             neighbourProcSendRemain[j] -= numVtxPerProcSend[j];
02069             rval = MPI_Send( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
02070                              (MPI_Comm)communicator );
02071             CHECK_MPI_RZERO( rval, err );
02072             //    printf("[%d]i%d Announcing %d vertices to proc
02073             //    %d\n",rank,iteration,numVtxPerProcSend[j],neighbourProc[j]); fflush(NULL);
02074         }
02075         else
02076         {
02077             assert( numVtxPerProcSend[j] == 0 );
02078         }
02079     }
02080 
02081     /* place vertex data going to the same processor into consecutive memory space */
02082     std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */
02083     std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() );
02084     if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export );
02085     for( i = 1; i < (long)neighbourProc.size(); i++ )
02086     {
02087         packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1];
02088     }
02089 
02090     std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 );
02091     for( i = 0; i < num_exportVtx; i++ )
02092     {
02093         for( j = 0; j < (long)neighbourProc.size(); j++ )
02094         {
02095             if( exportProc[i] == neighbourProc[j] )
02096             {
02097                 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j];
02098                 numVtxPerProcSendPACKED[j]++;
02099                 MBMesquite::MsqVertex coordinates;
02100                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
02101                 MSQ_ERRZERO( err );
02102                 packing_vertex->x       = coordinates[0];
02103                 packing_vertex->y       = coordinates[1];
02104                 packing_vertex->z       = coordinates[2];
02105                 packing_vertex->glob_id = exportVtxGIDs[i];
02106             }
02107         }
02108     }
02109     // delete [] numVtxPerProcSendPACKED;
02110 
02111     /* now ask each processor how many vertices to expect */
02112 
02113     int num;
02114     int proc;
02115     int numVtxImport          = 0;
02116     int num_neighbourProcRecv = 0;
02117     std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 );
02118     MPI_Status status;
02119 
02120     int num_neighbourProcRecvRemain = 0;
02121     for( j = 0; j < (long)neighbourProc.size(); j++ )
02122     {
02123         if( neighbourProcRecvRemain[j] )
02124         {
02125             num_neighbourProcRecvRemain++;
02126         }
02127     }
02128     for( j = 0; j < num_neighbourProcRecvRemain; j++ )
02129     {
02130         /* get the vertex count for some processor */
02131         rval = MPI_Recv( &num,                      /* message buffer */
02132                          1,                         /* one data item */
02133                          MPI_INT,                   /* of type int */
02134                          MPI_ANY_SOURCE,            /* receive from any sender */
02135                          VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs from this iteration */
02136                          (MPI_Comm)communicator,    /* default communicator */
02137                          &status );                 /* info about the received message */
02138         CHECK_MPI_RZERO( rval, err );
02139         proc = status.MPI_SOURCE;
02140         /* will we import vertices from this processor */
02141 
02142         //    printf("[%d]i%d Heard we will receive %d vertices from proc
02143         //    %d\n",rank,iteration,num,proc); fflush(NULL);
02144 
02145         if( num )
02146         {
02147             /* increase number of processors we will receive from */
02148             num_neighbourProcRecv++;
02149             /* add number of vertices we will receive to the import total */
02150             numVtxImport += num;
02151         }
02152         /* find this processor in the list */
02153         for( i = 0; i < (long)neighbourProc.size(); i++ )
02154         {
02155             if( neighbourProc[i] == proc )
02156             {
02157                 numVtxPerProcRecv[i] = num;
02158                 assert( neighbourProcRecvRemain[i] >= num );
02159                 neighbourProcRecvRemain[i] -= num;
02160                 break;
02161             }
02162         }
02163         /* assert loop did not end without finding processor */
02164         assert( i != (long)neighbourProc.size() );
02165         if( false ) printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, num, proc );
02166         fflush( NULL );
02167     }
02168 
02169     /* create list of processors we receive from */
02170     std::vector< int > neighbourProcRecv( num_neighbourProcRecv );
02171     std::vector< int > numVtxPerProcRecvRecv( num_neighbourProcRecv );
02172     for( i = 0, k = 0; i < (long)neighbourProc.size(); i++ )
02173     {
02174         if( 0 )
02175             printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i],
02176                     neighbourProc[i] );
02177         fflush( NULL );
02178         if( numVtxPerProcRecv[i] )
02179         {
02180             neighbourProcRecv[k]     = neighbourProc[i];
02181             numVtxPerProcRecvRecv[k] = numVtxPerProcRecv[i];
02182             k++;
02183         }
02184     }
02185 
02186     /* set up memory for the incoming vertex data blocks */
02187     std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */
02188     std::vector< VertexPack* > packed_vertices_import( num_neighbourProcRecv );
02189     if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import );
02190     for( i = 1; i < num_neighbourProcRecv; i++ )
02191     {
02192         packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecvRecv[i - 1];
02193     }
02194 
02195     /* receive from all processors that have something for us */
02196     std::vector< MPI_Request > request( num_neighbourProcRecv );
02197     for( j = 0; j < num_neighbourProcRecv; j++ )
02198     {
02199         rval = MPI_Irecv( packed_vertices_import[j], 4 * numVtxPerProcRecvRecv[j], MPI_DOUBLE, neighbourProcRecv[j],
02200                           VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( request[j] ) );
02201         CHECK_MPI_RZERO( rval, err );
02202         if( false )
02203         {
02204             printf( "[%d]i%d Scheduling receipt of %d vertices to proc %d\n", rank, iteration, numVtxPerProcRecvRecv[j],
02205                     neighbourProcRecv[j] );
02206             fflush( NULL );
02207         }
02208     }
02209 
02210     /* now send the data blocks */
02211 
02212     std::vector< MPI_Request > requests_send( neighbourProc.size() );
02213     for( j = 0; j < (long)neighbourProc.size(); j++ )
02214     {
02215         if( numVtxPerProcSend[j] )
02216         {
02217             rval = MPI_Isend( packed_vertices_export[j], 4 * numVtxPerProcSend[j], MPI_DOUBLE, neighbourProc[j],
02218                               VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[j] ) );
02219             CHECK_MPI_RZERO( rval, err );
02220             if( 0 )
02221             {
02222                 printf( "[%d]i%d Scheduling send of %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j],
02223                         neighbourProc[j] );
02224                 fflush( NULL );
02225             }
02226         }
02227         else
02228         {
02229             requests_send[j] = MPI_REQUEST_NULL;
02230         }
02231     }
02232 
02233     /* process messages as they arrive */
02234 
02235     int local_id;
02236     for( j = 0; j < num_neighbourProcRecv; j++ )
02237     {
02238         rval = MPI_Waitany( num_neighbourProcRecv, ARRPTR( request ), &k, &status );
02239         CHECK_MPI_RZERO( rval, err );
02240 
02241         /* unpack messages */
02242         proc = status.MPI_SOURCE;
02243         int count;
02244         MPI_Get_count( &status, MPI_INT, &count );
02245         if( 0 )
02246             printf( "[%d]i%d Received %d (%d) vertices from proc %d (%d)\n", rank, iteration, numVtxPerProcRecvRecv[k],
02247                     count, neighbourProcRecv[k], proc );
02248         fflush( NULL );
02249         for( i = 0; i < numVtxPerProcRecvRecv[k]; i++ )
02250         {
02251             local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] );
02252             if( local_id )
02253             {
02254                 MBMesquite::Vector3D coordinates;
02255                 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
02256                                  packed_vertices_import[k][i].z );
02257                 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
02258                 MSQ_ERRZERO( err );
02259                 assert( part_smoothed_flag[local_id] == 0 );
02260                 part_smoothed_flag[local_id] = 1;
02261                 if( 0 )
02262                     printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration,
02263                             (int)( packed_vertices_import[k][i].glob_id ), packed_vertices_import[k][i].x,
02264                             packed_vertices_import[k][i].y, packed_vertices_import[k][i].z );
02265             }
02266             else
02267             {
02268                 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
02269                         packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] );
02270             }
02271         }
02272     }
02273 
02274     /* all receives have completed. it is save to release the memory */
02275     // free(vertex_pack_import);
02276     /* wait until the sends have completed */
02277     std::vector< MPI_Status > stati( neighbourProc.size() );
02278     rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( stati ) );
02279     CHECK_MPI_RZERO( rval, err );
02280     /* all sends have completed. it is save to release the memory */
02281     // free(vertex_pack_export);
02282 
02283     return numVtxImport;
02284 }
02285 
02286 int ParallelHelperImpl::comm_smoothed_vtx_b( MsqError& err )
02287 {
02288     int i, j, rval;
02289 
02290     // printf("[%d] %d %d blocking\n",rank, iteration, pass);fflush(NULL);
02291 
02292     /* how many vertices per processor */
02293 
02294     std::vector< int > numVtxPerProc( neighbourProc.size(), 0 );
02295     for( i = 0; i < num_exportVtx; i++ )
02296     {
02297         for( j = 0; j < (long)neighbourProc.size(); j++ )
02298         {
02299             if( exportProc[i] == neighbourProc[j] )
02300             {
02301                 /* increment count */
02302                 numVtxPerProc[j]++;
02303                 /* end loop */
02304                 break;
02305             }
02306             /* assert loop did not end without finding the processor */
02307             assert( j != (long)neighbourProc.size() );
02308         }
02309     }
02310 
02311     /* place vertices going to the same processor into consecutive memory space */
02312 
02313     std::vector< VertexPack > vertex_pack( num_exportVtx + 10 ); /* add 10 to have enough memory */
02314     std::vector< VertexPack* > packed_vertices( neighbourProc.size() );
02315     VertexPack* packing_vertex;
02316     packed_vertices[0] = ARRPTR( vertex_pack );
02317     for( i = 1; i < (long)neighbourProc.size(); i++ )
02318     {
02319         packed_vertices[i] = packed_vertices[i - 1] + numVtxPerProc[i - 1];
02320     }
02321 
02322     std::vector< int > numVtxPackedPerProc( neighbourProc.size(), 0 );
02323 
02324     for( i = 0; i < num_exportVtx; i++ )
02325     {
02326         for( j = 0; j < (long)neighbourProc.size(); j++ )
02327         {
02328             if( exportProc[i] == neighbourProc[j] )
02329             {
02330                 packing_vertex = packed_vertices[j] + numVtxPackedPerProc[j];
02331                 numVtxPackedPerProc[j]++;
02332                 MBMesquite::MsqVertex coordinates;
02333                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
02334                 MSQ_ERRZERO( err );
02335                 packing_vertex->x       = coordinates[0];
02336                 packing_vertex->y       = coordinates[1];
02337                 packing_vertex->z       = coordinates[2];
02338                 packing_vertex->glob_id = exportVtxGIDs[i];
02339             }
02340         }
02341     }
02342 
02343     // delete [] numVtxPackedPerProc;
02344 
02345     /* send each block so the corresponding processor preceeded by the number of vertices */
02346 
02347     for( j = 0; j < (long)neighbourProc.size(); j++ )
02348     {
02349 
02350         //    printf("[%d]i%dp%d Announcing %d vertices to proc
02351         //    %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02352 
02353         rval = MPI_Send( &( numVtxPerProc[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
02354                          (MPI_Comm)communicator );
02355         CHECK_MPI_RZERO( rval, err );
02356         // printf("[%d]i%dp%d Sending %d vertices to proc
02357         // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02358 
02359         /* is there any vertex data to be sent */
02360 
02361         if( numVtxPerProc[j] )
02362         {
02363             rval = MPI_Send( packed_vertices[j], 4 * numVtxPerProc[j], MPI_DOUBLE, neighbourProc[j],
02364                              VERTEX_BLOCK + iteration, (MPI_Comm)communicator );
02365             CHECK_MPI_RZERO( rval, err );
02366             // printf("[%d]i%dp%d Sent %d vertices to proc
02367             // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02368         }
02369     }
02370 
02371     int num;
02372     int proc;
02373     // int tag;
02374     int count;
02375     int numVtxImport = 0;
02376     MPI_Status status;
02377     int local_id;
02378 
02379     //  printf("[%d]i%dp%d Waiting to receive vertices from %d processors ...
02380     //  \n",rank,iteration,pass,neighbourProc.size()); fflush(NULL);
02381 
02382     /* receiving blocks from other processors */
02383 
02384     for( j = 0; j < (long)neighbourProc.size(); j++ )
02385     {
02386         rval = MPI_Recv( &num,                      /* message buffer */
02387                          1,                         /* one data item */
02388                          MPI_INT,                   /* of type int */
02389                          MPI_ANY_SOURCE,            /* receive from any sender */
02390                          VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs */
02391                          (MPI_Comm)communicator,    /* default communicator */
02392                          &status );                 /* info about the received message */
02393         CHECK_MPI_RZERO( rval, err );
02394         proc = status.MPI_SOURCE;
02395         // tag = status.MPI_TAG;
02396         MPI_Get_count( &status, MPI_INT, &count );
02397 
02398         //    printf("[%d]i%dp%d Receiving %d vertices from proc
02399         //    %d/%d/%d\n",rank,iteration,pass,num,proc,tag,count); fflush(NULL);
02400 
02401         /* is there any vertex data to be received */
02402 
02403         if( num )
02404         {
02405 
02406             numVtxImport += num;
02407 
02408             /* do we have enough space allocated */
02409 
02410             if( num_exportVtx + 10 < num )
02411             {
02412                 // if (vertex_pack) free(vertex_pack);
02413                 num_exportVtx = num;
02414                 vertex_pack.resize( num_exportVtx + 10 );
02415                 // vertex_pack = (VertexPack*)malloc(sizeof(VertexPack)*(num_exportVtx+10));
02416             }
02417 
02418             rval = MPI_Recv( ARRPTR( vertex_pack ),    /* message buffer */
02419                              4 * num,                  /* num data's item with 4 doubles each */
02420                              MPI_DOUBLE,               /* of type double */
02421                              proc,                     /* receive from this procesor only */
02422                              VERTEX_BLOCK + iteration, /* receive only VERTEX BLOCKs */
02423                              (MPI_Comm)communicator,   /* default communicator */
02424                              &status );                /* info about the received message */
02425             CHECK_MPI_RZERO( rval, err );
02426 
02427             proc = status.MPI_SOURCE;
02428             // tag = status.MPI_TAG;
02429             MPI_Get_count( &status, MPI_DOUBLE, &count );
02430 
02431             if( count != 4 * num )
02432                 printf( "[%d]i%d WARNING: expected %d vertices = %d bytes from proc %d but only "
02433                         "got %d bytes\n",
02434                         rank, iteration, num, num * 4, proc, count );
02435             fflush( NULL );
02436 
02437             //      printf("[%d]i%d Received %d vertices from proc
02438             //      %d/%d/%d\n",rank,iteration,num,proc,tag,count); fflush(NULL);
02439 
02440             /* update the received vertices in our boundary mesh */
02441             for( i = 0; i < num; i++ )
02442             {
02443                 /*  printf("[%d]i%d updating vertex %d with global_id
02444                  * %d\n",rank,iteration,i,(int)(vertex_pack[i].glob_id)); fflush(NULL); */
02445                 local_id = vertex_map_find( vid_map, (int)( vertex_pack[i].glob_id ), proc );
02446                 if( local_id )
02447                 {
02448                     MBMesquite::Vector3D coordinates;
02449                     coordinates.set( vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z );
02450                     mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
02451                     MSQ_ERRZERO( err );
02452                     assert( part_smoothed_flag[local_id] == 0 );
02453                     part_smoothed_flag[local_id] = 1;
02454                     if( 0 )
02455                         printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration,
02456                                 (int)( vertex_pack[i].glob_id ), vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z );
02457                 }
02458                 else
02459                 {
02460                     printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
02461                             vertex_pack[i].glob_id, proc );
02462                 }
02463             }
02464         }
02465     }
02466     // if (vertex_pack) free(vertex_pack);
02467 
02468     return numVtxImport;
02469 }
02470 
02471 int ParallelHelperImpl::comm_smoothed_vtx_b_no_all( MsqError& err )
02472 {
02473     int i, j, rval;
02474 
02475     // printf("[%d] %d %d blocking avoid reduce all\n",rank, iteration, pass);fflush(NULL);
02476 
02477     /* how many vertices per processor */
02478 
02479     std::vector< int > numVtxPerProc( neighbourProc.size(), 0 );
02480     for( i = 0; i < num_exportVtx; i++ )
02481     {
02482         for( j = 0; j < (long)neighbourProc.size(); j++ )
02483         {
02484             if( exportProc[i] == neighbourProc[j] )
02485             {
02486                 /* increment count */
02487                 numVtxPerProc[j]++;
02488                 /* end loop */
02489                 break;
02490             }
02491             /* assert loop did not end without finding the processor */
02492             assert( j != (long)neighbourProc.size() );
02493         }
02494     }
02495 
02496     /* place vertices going to the same processor into consecutive memory space */
02497 
02498     std::vector< VertexPack > vertex_pack( num_exportVtx + 10 ); /* add 10 to have enough memory */
02499     std::vector< VertexPack* > packed_vertices( neighbourProc.size() );
02500     VertexPack* packing_vertex;
02501     packed_vertices[0] = ARRPTR( vertex_pack );
02502     for( i = 1; i < (long)neighbourProc.size(); i++ )
02503     {
02504         packed_vertices[i] = packed_vertices[i - 1] + numVtxPerProc[i - 1];
02505     }
02506 
02507     std::vector< int > numVtxPackedPerProc( neighbourProc.size(), 0 );
02508 
02509     for( i = 0; i < num_exportVtx; i++ )
02510     {
02511         for( j = 0; j < (long)neighbourProc.size(); j++ )
02512         {
02513             if( exportProc[i] == neighbourProc[j] )
02514             {
02515                 packing_vertex = packed_vertices[j] + numVtxPackedPerProc[j];
02516                 numVtxPackedPerProc[j]++;
02517                 MBMesquite::MsqVertex coordinates;
02518                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
02519                 MSQ_ERRZERO( err );
02520                 packing_vertex->x       = coordinates[0];
02521                 packing_vertex->y       = coordinates[1];
02522                 packing_vertex->z       = coordinates[2];
02523                 packing_vertex->glob_id = exportVtxGIDs[i];
02524             }
02525         }
02526     }
02527 
02528     // delete [] numVtxPackedPerProc;
02529 
02530     /* send each block so the corresponding processor preceeded by the number of vertices */
02531 
02532     for( j = 0; j < (long)neighbourProc.size(); j++ )
02533     {
02534 
02535         if( neighbourProcSendRemain[j] )
02536         {
02537             assert( neighbourProcSendRemain[j] >= numVtxPerProc[j] );
02538             neighbourProcSendRemain[j] -= numVtxPerProc[j];
02539 
02540             // printf("[%d]i%dp%d Announcing %d vertices to proc
02541             // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02542 
02543             rval = MPI_Send( &( numVtxPerProc[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
02544                              (MPI_Comm)communicator );
02545             CHECK_MPI_RZERO( rval, err );
02546 
02547             // printf("[%d]i%dp%d Sending %d vertices to proc
02548             // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02549 
02550             /* is there any vertex data to be sent */
02551 
02552             if( numVtxPerProc[j] )
02553             {
02554                 rval = MPI_Send( packed_vertices[j], 4 * numVtxPerProc[j], MPI_DOUBLE, neighbourProc[j],
02555                                  VERTEX_BLOCK + iteration, (MPI_Comm)communicator );
02556                 CHECK_MPI_RZERO( rval, err );
02557 
02558                 // printf("[%d]i%dp%d Sent %d vertices to proc
02559                 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02560             }
02561         }
02562         else
02563         {
02564             // printf("[%d]i%dp%d no longer sending to %d\n",rank,iteration,neighbourProc[j]);
02565             // fflush(NULL);
02566         }
02567     }
02568 
02569     int num;
02570     int proc;
02571     // int tag;
02572     int count;
02573     int numVtxImport = 0;
02574     MPI_Status status;
02575     int local_id;
02576 
02577     //  printf("[%d]i%dp%d Waiting to receive vertices from %d processors ...
02578     //  \n",rank,iteration,pass,neighbourProc.size()); fflush(NULL);
02579 
02580     /* receiving blocks in any order from other processors */
02581 
02582     int num_neighbourProcRecvRemain = 0;
02583     for( j = 0; j < (long)neighbourProc.size(); j++ )
02584     {
02585         if( neighbourProcRecvRemain[j] )
02586         {
02587             num_neighbourProcRecvRemain++;
02588         }
02589     }
02590     for( j = 0; j < num_neighbourProcRecvRemain; j++ )
02591     {
02592         rval = MPI_Recv( &num,                      /* message buffer */
02593                          1,                         /* one data item */
02594                          MPI_INT,                   /* of type int */
02595                          MPI_ANY_SOURCE,            /* receive from any sender */
02596                          VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs */
02597                          (MPI_Comm)communicator,    /* default communicator */
02598                          &status );                 /* info about the received message */
02599         CHECK_MPI_RZERO( rval, err );
02600         proc = status.MPI_SOURCE;
02601         // tag = status.MPI_TAG;
02602         MPI_Get_count( &status, MPI_INT, &count );
02603 
02604         // printf("[%d]i%dp%d Receiving %d vertices from proc
02605         // %d/%d/%d\n",rank,iteration,pass,num,proc,tag,count); fflush(NULL);
02606 
02607         /* is there any vertex data to be received */
02608 
02609         if( num )
02610         {
02611 
02612             numVtxImport += num;
02613 
02614             /* do we have enough space allocated */
02615 
02616             if( num_exportVtx + 10 < num )
02617             {
02618                 // if (vertex_pack) free(vertex_pack);
02619                 num_exportVtx = num;
02620                 // vertex_pack = (VertexPack*)malloc(sizeof(VertexPack)*(num_exportVtx+10));
02621                 vertex_pack.resize( num_exportVtx + 10 );
02622             }
02623 
02624             rval = MPI_Recv( ARRPTR( vertex_pack ),    /* message buffer */
02625                              4 * num,                  /* num data's item with 4 doubles each */
02626                              MPI_DOUBLE,               /* of type double */
02627                              proc,                     /* receive from this procesor only */
02628                              VERTEX_BLOCK + iteration, /* receive only VERTEX BLOCKs */
02629                              (MPI_Comm)communicator,   /* default communicator */
02630                              &status );                /* info about the received message */
02631             CHECK_MPI_RZERO( rval, err );
02632 
02633             proc = status.MPI_SOURCE;
02634             // tag = status.MPI_TAG;
02635             MPI_Get_count( &status, MPI_DOUBLE, &count );
02636 
02637             if( count != 4 * num )
02638                 printf( "[%d]i%d WARNING: expected %d vertices = %d bytes from proc %d but only "
02639                         "got %d bytes\n",
02640                         rank, iteration, num, num * 4, proc, count );
02641             fflush( NULL );
02642 
02643             // printf("[%d]i%d Received %d vertices from proc
02644             // %d/%d/%d\n",rank,iteration,num,proc,tag,count); fflush(NULL);
02645 
02646             /* update the received vertices in our boundary mesh */
02647             for( i = 0; i < num; i++ )
02648             {
02649                 /*  printf("[%d]i%d updating vertex %d with global_id
02650                  * %d\n",rank,iteration,i,(int)(vertex_pack[i].glob_id)); fflush(NULL); */
02651                 local_id = vertex_map_find( vid_map, (int)( vertex_pack[i].glob_id ), proc );
02652                 if( local_id )
02653                 {
02654                     MBMesquite::Vector3D coordinates;
02655                     coordinates.set( vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z );
02656                     mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
02657                     MSQ_ERRZERO( err );
02658                     assert( part_smoothed_flag[local_id] == 0 );
02659                     part_smoothed_flag[local_id] = 1;
02660                     if( 0 && rank == 1 )
02661                         printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration,
02662                                 (int)( vertex_pack[i].glob_id ), vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z );
02663                 }
02664                 else
02665                 {
02666                     printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
02667                             vertex_pack[i].glob_id, proc );
02668                 }
02669             }
02670         }
02671         for( i = 0; i < (long)neighbourProc.size(); i++ )
02672         {
02673             if( proc == neighbourProc[i] )
02674             {
02675                 assert( neighbourProcRecvRemain[i] >= num );
02676                 neighbourProcRecvRemain[i] -= num;
02677                 break;
02678             }
02679         }
02680     }
02681     // if (vertex_pack) free(vertex_pack);
02682 
02683     return numVtxImport;
02684 }
02685 
02686 /***********************************************************************
02687 COMPUTING THE INDEPENDENT SET AND EXPORT INFORMATION
02688 ***********************************************************************/
02689 /* the only thing that can prevent a vertex from being in the independent
02690    set is that is has an neighbouring vertex on a different processor that
02691    has not been smoothed yet and that has a larger random number */
02692 
02693 void ParallelHelperImpl::compute_independent_set()
02694 {
02695     int i, j, k, l;
02696     int incident_vtx;
02697     bool done;
02698 
02699     for( i = 0; i < num_vtx_partition_boundary_local; i++ )
02700         in_independent_set[i] = false;
02701 
02702     bool found_more = true;
02703     int found_iter  = 0;
02704 
02705     num_exportVtx = 0;
02706 
02707     while( found_more )
02708     {
02709         found_iter++;
02710         found_more = false;
02711         for( i = 0; i < num_vtx_partition_boundary_local; i++ )
02712         {
02713             /* if this vertex could become part of the independent set */
02714             if( part_smoothed_flag[i] == 0 && in_independent_set[i] == false )
02715             {
02716                 /* assume it's in the independent set */
02717                 done = false;
02718                 /* then loop over the neighbors it has on other processors */
02719                 for( j = 0; !done && j < (long)vtx_off_proc_list[i].size(); j++ )
02720                 {
02721                     incident_vtx = vtx_off_proc_list[i][j];
02722                     /* if this neighbour has not yet been smoothed and is not covered and ... */
02723                     if( part_smoothed_flag[incident_vtx] == 0 )
02724                     {
02725                         /* ... has a higher rand_number than me */
02726                         if( part_rand_number[i] < part_rand_number[incident_vtx] )
02727                         {
02728                             /* then I am not in the independent set */
02729                             done = true;
02730                             break;
02731                         }
02732                         /* ... or has the same rand_number than me but a higher processor id */
02733                         else if( ( part_rand_number[i] == part_rand_number[incident_vtx] ) &&
02734                                  ( rank < part_proc_owner[incident_vtx] ) )
02735                         {
02736                             /* then I am not in the independent set */
02737                             done = true;
02738                             break;
02739                         }
02740                     }
02741                 }
02742                 /* if the vertex is in the independent set, add it to the export list */
02743                 if( !done )
02744                 {
02745                     found_more = true;
02746                     //  if (found_iter > 1) printf("[%d]i%d found another one %d in iteration
02747                     //%d\n",rank,iteration,i, found_iter);
02748                     in_independent_set[i] = true;
02749                     /* mark vertices with lower random numbers as covered */
02750                     for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ )
02751                     {
02752                         incident_vtx = vtx_off_proc_list[i][j];
02753                         /* if this neighbour has not yet been smoothed or covered mark it as covered
02754                          */
02755                         if( part_smoothed_flag[incident_vtx] == 0 )
02756                         {
02757                             part_smoothed_flag[incident_vtx] = 2;
02758                         }
02759                     }
02760                     k = num_exportVtx;
02761                     /* then loop over the neighbors it has on other processors */
02762                     for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ )
02763                     {
02764                         incident_vtx = vtx_off_proc_list[i][j];
02765                         /* check to see if this processor already on the list */
02766                         done = false;
02767                         for( l = k; l < num_exportVtx && !done; l++ )
02768                         {
02769                             if( exportProc[l] == part_proc_owner[incident_vtx] )
02770                             {
02771                                 done = true;
02772                             }
02773                         }
02774                         /* if it's not on the list add it */
02775                         if( !done )
02776                         {
02777                             exportVtxLIDs[num_exportVtx] = i;
02778                             exportVtxGIDs[num_exportVtx] = part_gid[i];
02779                             exportProc[num_exportVtx]    = part_proc_owner[incident_vtx];
02780                             num_exportVtx++;
02781                         }
02782                     }
02783                 }
02784             }
02785         }
02786     }
02787 
02788     if( false )
02789     {
02790         int in_set = 0;
02791         for( i = 0; i < num_vtx_partition_boundary_local; i++ )
02792             if( in_independent_set[i] ) in_set++;
02793         ;
02794         printf( "[%d]i%d independent set has %d of %d vertices sent out %d times\n", rank, iteration, in_set,
02795                 num_vtx_partition_boundary_local, num_exportVtx );
02796         fflush( NULL );
02797     }
02798 
02799     /* unmark the vertices that have been marked as covered */
02800     for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ )
02801         if( part_smoothed_flag[i] == 2 ) part_smoothed_flag[i] = 0;
02802 }
02803 
02804 int ParallelHelperImpl::get_rank() const
02805 {
02806     return rank;
02807 }
02808 
02809 int ParallelHelperImpl::get_nprocs() const
02810 {
02811     return nprocs;
02812 }
02813 
02814 bool ParallelHelperImpl::is_our_element( MBMesquite::Mesh::ElementHandle element_handle, MsqError& err ) const
02815 {
02816     int i;
02817     std::vector< Mesh::VertexHandle > pvertices;
02818     std::vector< size_t > junk;
02819     mesh->elements_get_attached_vertices( &element_handle, 1, pvertices, junk, err );
02820     MSQ_ERRZERO( err );
02821     int num_verts = pvertices.size();
02822     std::vector< int > proc_ids( num_verts );
02823     mesh->vertices_get_processor_id( ARRPTR( pvertices ), ARRPTR( proc_ids ), num_verts, err );
02824     MSQ_ERRZERO( err );
02825     int max_proc_id = proc_ids[0];
02826     for( i = 1; i < num_verts; i++ )
02827         if( max_proc_id < proc_ids[i] ) max_proc_id = proc_ids[i];
02828     return ( max_proc_id == rank );
02829 }
02830 
02831 bool ParallelHelperImpl::is_our_vertex( MBMesquite::Mesh::VertexHandle vertex_handle, MsqError& err ) const
02832 {
02833     int proc_id;
02834     mesh->vertices_get_processor_id( &vertex_handle, &proc_id, 1, err );
02835     return !MSQ_CHKERR( err ) && ( proc_id == rank );
02836 }
02837 
02838 void ParallelHelperImpl::communicate_min_max_to_all( double* minimum, double* maximum, MsqError& err ) const
02839 {
02840     double d_min[2];
02841     double d_min_recv[2];
02842     d_min[0] = -( *maximum );
02843     d_min[1] = *minimum;
02844     int rval = MPI_Allreduce( d_min, d_min_recv, 2, MPI_DOUBLE, MPI_MIN, (MPI_Comm)communicator );
02845     CHECK_MPI( rval, err );
02846     *maximum = -d_min_recv[0];
02847     *minimum = d_min_recv[1];
02848 }
02849 
02850 void ParallelHelperImpl::communicate_min_max_to_zero( double* minimum, double* maximum, MsqError& err ) const
02851 {
02852     double d_min[2];
02853     double d_min_recv[2];
02854     d_min[0] = -( *maximum );
02855     d_min[1] = *minimum;
02856     int rval = MPI_Reduce( d_min, d_min_recv, 2, MPI_DOUBLE, MPI_MIN, 0, (MPI_Comm)communicator );
02857     CHECK_MPI( rval, err );
02858     if( rank == 0 )
02859     {
02860         *maximum = -d_min_recv[0];
02861         *minimum = d_min_recv[1];
02862     }
02863 }
02864 
02865 void ParallelHelperImpl::communicate_sums_to_zero( size_t* freeElementCount,
02866                                                    int* invertedElementCount,
02867                                                    size_t* elementCount,
02868                                                    int* invertedSampleCount,
02869                                                    size_t* sampleCount,
02870                                                    long unsigned int* count,
02871                                                    long unsigned int* invalid,
02872                                                    double* sum,
02873                                                    double* sqrSum,
02874                                                    MsqError& err ) const
02875 {
02876     double d_sum[9];
02877     double d_sum_recv[9];
02878 
02879     d_sum[0] = (double)( *freeElementCount );
02880     d_sum[1] = (double)( *invertedElementCount );
02881     d_sum[2] = (double)( *elementCount );
02882     d_sum[3] = (double)( *invertedSampleCount );
02883     d_sum[4] = (double)( *sampleCount );
02884     d_sum[5] = (double)( *count );
02885     d_sum[6] = (double)( *invalid );
02886     d_sum[7] = *sum;
02887     d_sum[8] = *sqrSum;
02888 
02889     int rval = MPI_Reduce( d_sum, d_sum_recv, 9, MPI_DOUBLE, MPI_SUM, 0, (MPI_Comm)communicator );
02890     CHECK_MPI( rval, err );
02891 
02892     if( rank == 0 )
02893     {
02894         *freeElementCount     = (size_t)d_sum_recv[0];
02895         *invertedElementCount = (int)d_sum_recv[1];
02896         *elementCount         = (size_t)d_sum_recv[2];
02897         *invertedSampleCount  = (int)d_sum_recv[3];
02898         *sampleCount          = (size_t)d_sum_recv[4];
02899         *count                = (long unsigned int)d_sum_recv[5];
02900         *invalid              = (long unsigned int)d_sum_recv[6];
02901         *sum                  = d_sum_recv[7];
02902         *sqrSum               = d_sum_recv[8];
02903     }
02904 }
02905 
02906 void ParallelHelperImpl::communicate_power_sum_to_zero( double* pMean, MsqError& err ) const
02907 {
02908     double result;
02909     int rval = MPI_Reduce( pMean, &result, 1, MPI_DOUBLE, MPI_SUM, 0, (MPI_Comm)communicator );
02910     CHECK_MPI( rval, err );
02911     if( rank == 0 ) *pMean = result;
02912 }
02913 
02914 void ParallelHelperImpl::communicate_histogram_to_zero( std::vector< int >& histogram, MsqError& err ) const
02915 {
02916     std::vector< int > histogram_recv( histogram.size() );
02917     int rval = MPI_Reduce( &( histogram[0] ), &( histogram_recv[0] ), histogram.size(), MPI_INT, MPI_SUM, 0,
02918                            (MPI_Comm)communicator );
02919     CHECK_MPI( rval, err );
02920     if( rank == 0 )
02921     {
02922         histogram.swap( histogram_recv );
02923     }
02924 }
02925 
02926 /// if any proc has a true input @param value, return true to all
02927 void ParallelHelperImpl::communicate_any_true( bool& value, MsqError& err ) const
02928 {
02929     int byte_out = value, byte_in;
02930     int rval     = MPI_Allreduce( &byte_out, &byte_in, 1, MPI_INT, MPI_MAX, (MPI_Comm)communicator );
02931     CHECK_MPI( rval, err );
02932     value = ( byte_in != 0 );
02933 }
02934 
02935 /// srkenno AT sandia.gov 1/19/12: bug fix: changed from MPI_MAX to MIN - this makes the name of the
02936 /// function correct, if all proc's values are true, it returns true.  @see communicate_any_true
02937 /// above
02938 
02939 /// return true in @param value if all procs have value=true, else if any have it false, return
02940 /// false
02941 void ParallelHelperImpl::communicate_all_true( bool& value, MsqError& err ) const
02942 {
02943     int byte_out = value, byte_in;
02944     int rval     = MPI_Allreduce( &byte_out, &byte_in, 1, MPI_INT, MPI_MIN, (MPI_Comm)communicator );
02945     CHECK_MPI( rval, err );
02946     value = ( byte_in != 0 );
02947 }
02948 
02949 }  // namespace MBMesquite
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines