MOAB: Mesh Oriented datABase  (version 5.3.0)
ParallelHelper.cpp
Go to the documentation of this file.
00001 #include "ParallelHelper.hpp"
00002 
00003 #include <mpi.h>
00004 
00005 #include <cstdio>
00006 #include <cassert>
00007 #include <iostream>
00008 #include <cstdlib>
00009 #include <algorithm>
00010 #include <ctime>
00011 
00012 #include "MsqVertex.hpp"
00013 #include "MsqError.hpp"
00014 
00015 #define VERTEX_HEADER              1
00016 #define VERTEX_BLOCK               1000
00017 #define GHOST_NODE_INFO            2000
00018 #define GHOST_NODE_VERTICES_WANTED 2001
00019 #define GHOST_NODE_VERTEX_GIDS     2002
00020 #define GHOST_NODE_VERTEX_UPDATES  2003
00021 
00022 #define ARRPTR( x ) arrptr( x, true )
00023 
00024 namespace MBMesquite
00025 {
00026 
00027 void parallel_barrier()
00028 {
00029     int is_init = 0;
00030     int err     = MPI_Initialized( &is_init );
00031     if( MPI_SUCCESS != err ) return;
00032     if( is_init ) MPI_Barrier( MPI_COMM_WORLD );
00033 }
00034 
00035 int get_parallel_rank()
00036 {
00037     int rank    = 0;
00038     int is_init = 0;
00039     int err     = MPI_Initialized( &is_init );
00040     if( MPI_SUCCESS != err ) return 0;
00041     if( is_init ) MPI_Comm_rank( MPI_COMM_WORLD, &rank );
00042     return rank;
00043 }
00044 
00045 int get_parallel_size()
00046 {
00047     int nprocs  = 0;
00048     int is_init = 0;
00049     int err     = MPI_Initialized( &is_init );
00050     if( MPI_SUCCESS != err ) return 0;
00051     if( is_init ) MPI_Comm_size( MPI_COMM_WORLD, &nprocs );
00052     return nprocs;
00053 }
00054 
00055 double reduce_parallel_max( double value )
00056 {
00057     int is_init = 0;
00058     int err     = MPI_Initialized( &is_init );
00059     if( MPI_SUCCESS != err ) return value;
00060     if( !is_init ) return value;
00061 
00062     double d_max[1];
00063     double d_max_recv[1];
00064     d_max[0] = value;
00065     int rval = MPI_Allreduce( d_max, d_max_recv, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
00066     if( MPI_SUCCESS != rval ) return value;
00067 
00068     return d_max_recv[0];
00069 }
00070 
00071 static const char* mpi_err_string( int error_code )
00072 {
00073     static char buffer[128];
00074     int len = sizeof( buffer );
00075     int e   = MPI_Error_string( error_code, buffer, &len );
00076     if( MPI_SUCCESS != e ) len = 0;
00077 
00078     if( len >= (int)sizeof( buffer ) ) len = sizeof( buffer ) - 1;
00079     buffer[len] = '\0';
00080     return buffer;
00081 }
00082 
00083 #define CHECK_MPI( RVAL, ERR )                                                                          \
00084     do                                                                                                  \
00085     {                                                                                                   \
00086         if( MPI_SUCCESS != ( RVAL ) )                                                                   \
00087         {                                                                                               \
00088             MSQ_SETERR( err )                                                                           \
00089             ( MsqError::UNKNOWN_ERROR, "MPI Error %d: %s", (int)( RVAL ), mpi_err_string( ( RVAL ) ) ); \
00090             return;                                                                                     \
00091         }                                                                                               \
00092     } while( false )
00093 
00094 #define CHECK_MPI_RZERO( RVAL, ERR )                                                                    \
00095     do                                                                                                  \
00096     {                                                                                                   \
00097         if( MPI_SUCCESS != ( RVAL ) )                                                                   \
00098         {                                                                                               \
00099             MSQ_SETERR( err )                                                                           \
00100             ( MsqError::UNKNOWN_ERROR, "MPI Error %d: %s", (int)( RVAL ), mpi_err_string( ( RVAL ) ) ); \
00101             return 0;                                                                                   \
00102         }                                                                                               \
00103     } while( false )
00104 
00105 static bool vertex_map_insert( VertexIdMap& map, size_t glob_id, int proc_id, int value )
00106 {
00107     VertexIdMapKey vid;
00108     vid.glob_id = glob_id;
00109     vid.proc_id = proc_id;
00110     return map.insert( VertexIdMap::value_type( vid, value ) ).second;
00111 }
00112 
00113 static int vertex_map_find( const VertexIdMap& map, size_t glob_id, int proc_id )
00114 {
00115     VertexIdMapKey vid;
00116     vid.glob_id                             = glob_id;
00117     vid.proc_id                             = proc_id;
00118     VertexIdMap::const_iterator map_element = map.find( vid );
00119     if( map_element == map.end() ) { return 0; }
00120     else
00121     {
00122         return map_element->second;
00123     }
00124 }
00125 
00126 static void my_quicksort( int* a, size_t* b, MBMesquite::Mesh::VertexHandle* c, int i, int j )
00127 {
00128     int in_i = i;
00129     int in_j = j;
00130     int wa;
00131     size_t wb;
00132     MBMesquite::Mesh::VertexHandle w1;
00133     int key = a[( i + j ) / 2];
00134     do
00135     {
00136         while( a[i] < key )
00137             i++;
00138         while( a[j] > key )
00139             j--;
00140         if( i < j )
00141         {
00142             wa   = a[i];
00143             a[i] = a[j];
00144             a[j] = wa;
00145             wb   = b[i];
00146             b[i] = b[j];
00147             b[j] = wb;
00148             w1   = c[i];
00149             c[i] = c[j];
00150             c[j] = w1;
00151         }
00152     } while( ++i <= --j );
00153     if( i == j + 3 )
00154     {
00155         i--;
00156         j++;
00157     }
00158     if( j > in_i ) my_quicksort( a, b, c, in_i, j );
00159     if( i < in_j ) my_quicksort( a, b, c, i, in_j );
00160 }
00161 
00162 static int hash6432shift( unsigned long long key )
00163 {
00164     key = ( ~key ) + ( key << 18 );  // key = (key << 18) - key - 1;
00165     key = key ^ ( key >> 31 );
00166     key = key * 21;  // key = (key + (key << 2)) + (key << 4);
00167     key = key ^ ( key >> 11 );
00168     key = key + ( key << 6 );
00169     key = key ^ ( key >> 22 );
00170     return (int)key;
00171 }
00172 
00173 static unsigned long long hash64shift( unsigned long long key )
00174 {
00175     key = ( ~key ) + ( key << 21 );  // key = (key << 21) - key - 1;
00176     key = key ^ ( key >> 24 );
00177     key = ( key + ( key << 3 ) ) + ( key << 8 );  // key * 265
00178     key = key ^ ( key >> 14 );
00179     key = ( key + ( key << 2 ) ) + ( key << 4 );  // key * 21
00180     key = key ^ ( key >> 28 );
00181     key = key + ( key << 31 );
00182     return key;
00183 }
00184 
00185 static double generate_random_number( int generate_random_numbers, int proc_id, size_t glob_id )
00186 {
00187     int gid;
00188 
00189     if( sizeof( size_t ) == sizeof( unsigned long long ) ) { gid = hash6432shift( (unsigned long long)glob_id ); }
00190     else
00191     {
00192         gid = (int)glob_id;
00193     }
00194 
00195     if( generate_random_numbers == 1 )
00196     {
00197         // count number of on bits
00198         int on            = 0;
00199         unsigned int mist = (unsigned int)gid;
00200         while( mist )
00201         {
00202             if( mist & 1 ) on++;
00203             mist = mist >> 1;
00204         }
00205         unsigned short xsubi[3];
00206         if( on & 1 )
00207             mist = (unsigned int)gid;
00208         else
00209             mist = (unsigned int)( -gid );
00210         if( on & 2 )
00211         {
00212             xsubi[0] = (unsigned short)( 65535 & mist );
00213             xsubi[1] = (unsigned short)( 65535 & ( mist >> 16 ) );
00214         }
00215         else
00216         {
00217             xsubi[1] = (unsigned short)( 65535 & mist );
00218             xsubi[0] = (unsigned short)( 65535 & ( mist >> 16 ) );
00219         }
00220         xsubi[2] = proc_id;
00221 
00222 #if( defined WIN32 || defined WIN64 )
00223         time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 );
00224         srand( (int)seed );
00225         return rand() / ( RAND_MAX + 1.0 );
00226 #else
00227         return erand48( xsubi );
00228 #endif
00229     }
00230     else if( generate_random_numbers == 2 )
00231     {
00232         // count number of on bits
00233         int on            = 0;
00234         unsigned int mist = (unsigned int)gid;
00235         while( mist )
00236         {
00237             if( mist & 1 ) on++;
00238             mist = mist >> 1;
00239         }
00240         unsigned short xsubi[3];
00241         if( on & 1 )
00242             mist = (unsigned int)gid;
00243         else
00244             mist = (unsigned int)( -gid );
00245         if( on & 2 )
00246         {
00247             xsubi[0] = (unsigned short)( 65535 & mist );
00248             xsubi[1] = (unsigned short)( 65535 & ( mist >> 16 ) );
00249         }
00250         else
00251         {
00252             xsubi[1] = (unsigned short)( 65535 & mist );
00253             xsubi[0] = (unsigned short)( 65535 & ( mist >> 16 ) );
00254         }
00255         xsubi[2] = proc_id ^ xsubi[1];
00256 
00257 #if( defined WIN32 || defined WIN64 )
00258         time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 );
00259         srand( (int)seed );
00260         return rand() / ( RAND_MAX + 1.0 );
00261 #else
00262         return erand48( xsubi );
00263 #endif
00264     }
00265     else if( generate_random_numbers == 3 )
00266     {
00267         unsigned long long key = (unsigned long long)gid;
00268         key                    = key << 32;
00269         key                    = key | proc_id;
00270         return (double)hash64shift( key );
00271     }
00272     else if( generate_random_numbers == 4 )
00273     {
00274         unsigned long long key = (unsigned long long)gid;
00275         key                    = key << 32;
00276         key                    = key | proc_id;
00277         key                    = hash64shift( key );
00278         unsigned short xsubi[3];
00279         xsubi[0] = (unsigned short)( key >> 48 );
00280         xsubi[1] = (unsigned short)( key >> 32 );
00281         xsubi[2] = (unsigned short)( key >> 16 );
00282 
00283 #if( defined WIN32 || defined WIN64 )
00284         time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 );
00285         srand( (int)seed );
00286         return rand() / ( RAND_MAX + 1.0 );
00287 #else
00288         return erand48( xsubi );
00289 #endif
00290     }
00291     else
00292     {
00293         return -1;
00294     }
00295 }
00296 
00297 ParallelHelperImpl::ParallelHelperImpl()
00298 {
00299     mesh = 0;
00300     set_communicator( MPI_COMM_WORLD );
00301     communication_model = TrulyNonBlockingAvoidAllReduce;
00302 
00303     MPI_Comm_rank( (MPI_Comm)communicator, &rank );
00304     MPI_Comm_size( (MPI_Comm)communicator, &nprocs );
00305 
00306     // variable defaults for VertexMover::loop_over_mesh()
00307     generate_random_numbers = 2;
00308 
00309     // memory chunks referenced for VertexMover::loop_over_mesh()
00310     vertices.clear();
00311     vtx_in_partition_boundary.clear();
00312     part_vertices.clear();
00313     unghost_vertices.clear();
00314     part_proc_owner.clear();
00315     part_gid.clear();
00316     part_smoothed_flag.clear();
00317     part_rand_number.clear();
00318     exportVtxGIDs.clear();
00319     exportVtxLIDs.clear();
00320     exportProc.clear();
00321     in_independent_set.clear();
00322     vid_map.clear();
00323     neighbourProcSend.clear();
00324     neighbourProcRecv.clear();
00325     neighbourProcSendRemain.clear();
00326     neighbourProcRecvRemain.clear();
00327     vtx_off_proc_list.clear();
00328     neighbourProc.clear();
00329 }
00330 
00331 ParallelHelperImpl::~ParallelHelperImpl() {}
00332 
00333 void ParallelHelperImpl::set_parallel_mesh( ParallelMesh* pmesh )
00334 {
00335     this->mesh = pmesh;
00336 }
00337 
00338 void ParallelHelperImpl::set_communicator( size_t comm )
00339 {
00340     communicator = comm;
00341     MPI_Comm_rank( (MPI_Comm)communicator, &rank );
00342     MPI_Comm_size( (MPI_Comm)communicator, &nprocs );
00343 }
00344 
00345 void ParallelHelperImpl::set_communication_model( int model, MsqError& )
00346 {
00347     communication_model = model;
00348 }
00349 
00350 void ParallelHelperImpl::set_generate_random_numbers( int grn, MsqError& )
00351 {
00352     this->generate_random_numbers = grn;
00353 }
00354 
00355 void ParallelHelperImpl::smoothing_init( MsqError& err )
00356 {
00357 
00358     int i, j, k, rval;
00359     size_t l;
00360 
00361     if( !mesh )
00362     {
00363         MSQ_SETERR( err )( MsqError::INVALID_STATE );
00364         return;
00365     }
00366     if( nprocs == 1 ) return;
00367 
00368     /* get the vertices */
00369     mesh->get_all_vertices( vertices, err );MSQ_ERRRTN( err );
00370     num_vertex = vertices.size();
00371 
00372     /* allocate the data arrays we'll use for smoothing */
00373     std::vector< size_t > gid( num_vertex );
00374     std::vector< int > proc_owner( num_vertex );
00375     std::vector< unsigned char > app_fixed( num_vertex );
00376 
00377     /* get the data from the mesquite mesh */
00378     mesh->vertices_get_global_id( ARRPTR( vertices ), ARRPTR( gid ), num_vertex, err );MSQ_ERRRTN( err );
00379     mesh->vertices_get_byte( ARRPTR( vertices ), ARRPTR( app_fixed ), num_vertex, err );MSQ_ERRRTN( err );
00380     mesh->vertices_get_processor_id( ARRPTR( vertices ), ARRPTR( proc_owner ), num_vertex, err );MSQ_ERRRTN( err );
00381     if( false )
00382     {
00383         int ncull = 0;
00384         for( i = 0; i < num_vertex; ++i )
00385         {
00386             if( app_fixed[i] & MsqVertex::MSQ_CULLED ) { ++ncull; }
00387         }
00388         std::cout << "P[" << rank << "] smoothing_init ncull= " << ncull << " num_vertex= " << num_vertex << std::endl;
00389     }
00390 
00391     /* only interested in fixed flag from vertex byte? Clear others. */
00392     // srkenno AT sandia.gov 1/19/12: bug fix: changed from |= which makes all vertices fixed
00393     for( i = 0; i < num_vertex; ++i )
00394         app_fixed[i] &= MsqVertex::MSQ_HARD_FIXED;
00395 
00396     /* create temporary Tag for the local IDs */
00397     std::vector< int > lid( num_vertex );
00398     for( i = 0; i < num_vertex; i++ )
00399         lid[i] = i;
00400     const char LOCAL_ID_NAME[] = "LOCAL_ID";
00401     TagHandle lid_tag          = mesh->tag_create( LOCAL_ID_NAME, Mesh::INT, 1, NULL, err );MSQ_ERRRTN( err );
00402     mesh->tag_set_vertex_data( lid_tag, num_vertex, ARRPTR( vertices ), ARRPTR( lid ), err );MSQ_ERRRTN( err );
00403 
00404     if( false ) printf( "[%d] set local tags on %d vertices\n", rank, num_vertex );
00405 
00406     /* get the elements */
00407     std::vector< MBMesquite::Mesh::ElementHandle > elements;
00408     mesh->get_all_elements( elements, err );MSQ_ERRRTN( err );
00409     int num_elems = elements.size();
00410 
00411     /****************************************************************
00412     PARTITION BOUNDARY VERTEX DETERMINATION
00413     ***********************************************************************/
00414 
00415     /* initialize the vertex partition boundary array */
00416     vtx_in_partition_boundary.clear();
00417     vtx_in_partition_boundary.resize( num_vertex, 0 );
00418     int incident_vtx, vtx_off_proc, vtx_on_proc;
00419 
00420     /* get the array that contains the adjacent vertices for each mesh element */
00421     std::vector< MBMesquite::Mesh::VertexHandle > adj_vertices;
00422     std::vector< size_t > vtx_offsets;
00423     mesh->elements_get_attached_vertices( ARRPTR( elements ), num_elems, adj_vertices, vtx_offsets, err );
00424     std::vector< int > adj_vertices_lid( adj_vertices.size() );
00425     mesh->tag_get_vertex_data( lid_tag, adj_vertices.size(), ARRPTR( adj_vertices ), ARRPTR( adj_vertices_lid ), err );
00426 
00427     if( false ) printf( "[%d] gotten adjacent elements for %d elements\n", rank, num_elems );
00428 
00429     /* determine which vertices are smoothed as part of the boundary (and which are unused ghost
00430      * vertices) */
00431     num_vtx_partition_boundary_local  = 0;
00432     num_vtx_partition_boundary_remote = 0;
00433     for( i = 0; i < num_elems; i++ )
00434     {
00435         /* count how many vertices of the current element are on/off a different processor */
00436         vtx_off_proc = 0;
00437         vtx_on_proc  = 0;
00438         for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ )
00439         {
00440             incident_vtx = adj_vertices_lid[j];
00441             /* obviously the vertex only counts if it is not app_fixed */
00442             if( !app_fixed[incident_vtx] )
00443             {
00444                 if( proc_owner[incident_vtx] != rank ) { vtx_off_proc++; }
00445                 else
00446                 {
00447                     vtx_on_proc++;
00448                 }
00449             }
00450         }
00451 
00452         /* if vertices are on different processors mark all local vertices as boundary1 and all
00453          * remote vertices as boundary2 */
00454         if( vtx_off_proc > 0 && vtx_on_proc > 0 )
00455         {
00456             /* collect stats */
00457             //  smooth_stats.num_part_bndy_elem++;
00458             /* mark the vertices */
00459             for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ )
00460             {
00461                 incident_vtx = adj_vertices_lid[j];
00462                 /* obviously the vertex does not need to be marked if it was already marked or if it
00463                  * is app_fixed*/
00464                 if( vtx_in_partition_boundary[incident_vtx] <= 0 && app_fixed[incident_vtx] == 0 )
00465                 {
00466                     /* mark and count the vertex */
00467                     if( proc_owner[incident_vtx] != rank )
00468                     {
00469                         vtx_in_partition_boundary[incident_vtx] = 2;
00470                         num_vtx_partition_boundary_remote++;
00471                     }
00472                     else
00473                     {
00474                         vtx_in_partition_boundary[incident_vtx] = 1;
00475                         num_vtx_partition_boundary_local++;
00476                     }
00477                 }
00478             }
00479         }
00480         else if( vtx_off_proc > 0 )
00481         {
00482             /* mark the vertices as boundary-1 (aka unused ghost) if the element has only
00483              * off-processor vertices */
00484             for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ )
00485             {
00486                 incident_vtx = adj_vertices_lid[j];
00487                 /* obviously the vertex is not marked if it was already marked or if it is
00488                  * app_fixed*/
00489                 if( vtx_in_partition_boundary[incident_vtx] == 0 && app_fixed[incident_vtx] == 0 )
00490                 {
00491                     vtx_in_partition_boundary[incident_vtx] = -1;
00492                 }
00493             }
00494         }
00495     }
00496 
00497     if( false )
00498     {
00499         printf( "[%d]i%d local %d remote %d ", rank, iteration, num_vtx_partition_boundary_local,
00500                 num_vtx_partition_boundary_remote );
00501         printf( "[%d]i%d pb1 ", rank, iteration );
00502         for( i = 0; i < num_vertex; i++ )
00503             if( vtx_in_partition_boundary[i] == 1 ) printf( "%d,%lu ", i, gid[i] );
00504         printf( "\n" );
00505         printf( "[%d]i%d pb2 ", rank, iteration );
00506         for( i = 0; i < num_vertex; i++ )
00507             if( vtx_in_partition_boundary[i] == 2 ) printf( "%d,%lu ", i, gid[i] );
00508         printf( "\n" );
00509         fflush( NULL );
00510     }
00511 
00512     num_vtx_partition_boundary = num_vtx_partition_boundary_local + num_vtx_partition_boundary_remote;
00513 
00514     /********************************************************************
00515    COLLECT THE PARTITION BOUNDARY VERTICES AND THE UNUSED GHOST VERTICES
00516     ********************************************************************/
00517 
00518     /* create the vectors to store the partition boundary vertex data */
00519     part_vertices.resize( num_vtx_partition_boundary );
00520     part_proc_owner.resize( num_vtx_partition_boundary );
00521     part_gid.resize( num_vtx_partition_boundary );
00522     part_smoothed_flag.resize( num_vtx_partition_boundary );
00523     part_rand_number.resize( num_vtx_partition_boundary );
00524 
00525     /* create the partition boundary map and its inverse */
00526     std::vector< int > vtx_partition_boundary_map_inverse( num_vertex );
00527 
00528     j = 0;
00529     /* first we map the partition boundary vertices that we will smooth on this processor */
00530     for( i = 0; i < num_vertex; i++ )
00531     {
00532         if( vtx_in_partition_boundary[i] == 1 )
00533         {
00534             part_vertices[j]   = vertices[i];
00535             part_proc_owner[j] = rank;
00536             assert( proc_owner[i] == rank );
00537             part_gid[j]                           = gid[i];
00538             vtx_partition_boundary_map_inverse[i] = j;
00539             j++;
00540         }
00541     }
00542 
00543     vid_map.clear();
00544 
00545     /* then we map the ghost vertices that will be smoothed on other processors */
00546     for( i = 0; i < num_vertex; i++ )
00547     {
00548         if( vtx_in_partition_boundary[i] == 2 )
00549         {
00550             part_vertices[j]   = vertices[i];
00551             part_proc_owner[j] = proc_owner[i];
00552             assert( proc_owner[i] != rank );
00553             part_gid[j]                           = gid[i];
00554             vtx_partition_boundary_map_inverse[i] = j;
00555             /* only insert those vertices in the map that are smoothed on other processors */
00556             vertex_map_insert( vid_map, part_gid[j], part_proc_owner[j], j );
00557             // printf("[%d] inserting vertex with gid %u and pid %d \n", rank, part_gid[j],
00558             // part_proc_owner[j]);
00559             j++;
00560         }
00561     }
00562 
00563     /* create our own 'very pseudo random' numbers */
00564     for( i = 0; i < num_vtx_partition_boundary; i++ )
00565         part_rand_number[i] = generate_random_number( generate_random_numbers, part_proc_owner[i], part_gid[i] );
00566 
00567     /* count the number of unused ghost vertices */
00568     unghost_num_vtx = 0;
00569     for( i = 0; i < num_vertex; i++ )
00570     {
00571         if( vtx_in_partition_boundary[i] == -1 ) { unghost_num_vtx++; }
00572     }
00573     // printf("[%d] found %d unused ghost vertices (local %d remote %d)\n",rank, unghost_num_vtx,
00574     // num_vtx_partition_boundary_local,num_vtx_partition_boundary_remote);
00575 
00576     /* create the vectors to store the unused ghost vertices */
00577     unghost_vertices.resize( unghost_num_vtx );
00578     std::vector< int > unghost_proc_owner( unghost_num_vtx );
00579     std::vector< size_t > unghost_gid( unghost_num_vtx );
00580 
00581     /* store the unused ghost vertices that are copies of vertices from other processors and will
00582      * need to be received */
00583     j = 0;
00584     for( i = 0; i < num_vertex; i++ )
00585     {
00586         if( vtx_in_partition_boundary[i] == -1 )
00587         {
00588             unghost_vertices[j]   = vertices[i];
00589             unghost_proc_owner[j] = proc_owner[i];
00590             assert( proc_owner[i] != rank );
00591             unghost_gid[j] = gid[i];
00592             // printf(" %d", unghost_gid[j]);
00593             j++;
00594         }
00595     }
00596 
00597     /* no longer needed */
00598     // delete [] gid; gid = 0;
00599     // delete [] proc_owner; proc_owner = 0;
00600 
00601     unghost_num_procs = 0;
00602     unghost_procs.clear();
00603     unghost_procs_offset.clear();
00604     unghost_procs_num_vtx.clear();
00605     if( unghost_num_vtx )
00606     {
00607         /* sort the unused ghost vertices by processor */
00608         my_quicksort( ARRPTR( unghost_proc_owner ), ARRPTR( unghost_gid ), &( unghost_vertices[0] ), 0,
00609                       unghost_num_vtx - 1 );
00610 
00611         /* count the number of processors we have unused ghost data from that we want to get updated
00612          */
00613         unghost_num_procs = 1;
00614         for( i = 1; i < unghost_num_vtx; i++ )
00615         {
00616             if( unghost_proc_owner[i - 1] != unghost_proc_owner[i] ) unghost_num_procs++;
00617         }
00618 
00619         /* get the ids of those processors and the number of vertices we want from each */
00620         unghost_procs.resize( unghost_num_procs );
00621         unghost_procs_offset.resize( unghost_num_procs + 1 );
00622         unghost_procs_num_vtx.resize( unghost_num_procs );
00623         unghost_procs[0]        = unghost_proc_owner[0];
00624         unghost_procs_offset[0] = 0;
00625         for( i = 1, j = 1; i < unghost_num_vtx; i++ )
00626         {
00627             if( unghost_proc_owner[i - 1] != unghost_proc_owner[i] )
00628             {
00629                 unghost_procs[j]             = unghost_proc_owner[i];
00630                 unghost_procs_offset[j]      = i;
00631                 unghost_procs_num_vtx[j - 1] = unghost_procs_offset[j] - unghost_procs_offset[j - 1];
00632                 assert( unghost_procs_num_vtx[j - 1] > 0 );
00633                 j++;
00634             }
00635         }
00636         unghost_procs_offset[j]      = i;
00637         unghost_procs_num_vtx[j - 1] = unghost_procs_offset[j] - unghost_procs_offset[j - 1];
00638         assert( unghost_procs_num_vtx[j - 1] > 0 );
00639         assert( j == unghost_num_procs );
00640 
00641         // delete [] unghost_proc_owner; unghost_proc_owner = 0;
00642 
00643         // printf("[%d] have ugns from %d processor(s) (%d,%d,%d)\n", rank, unghost_num_procs,
00644         // unghost_procs[0], (unghost_num_procs>1) ? unghost_procs[1] : -1, (unghost_num_procs>2) ?
00645         // unghost_procs[1] : -1);
00646     }
00647 
00648     /* this will eventually store to how many processors each processor needs to send unused ghost
00649      * data to */
00650     std::vector< int > num_sends_of_unghost;
00651 
00652     /* gather the information about how many processors need ghost data */
00653     if( rank == 0 )
00654     {
00655         /* this will eventually store to how many processors each processor needs to send unused
00656          * ghost data to */
00657         num_sends_of_unghost.resize( nprocs );
00658     }
00659     /* temporary used for the initial gather in which each proc tells the root from how many procs
00660      * it wants unused ghost data updates */
00661     rval = MPI_Gather( &unghost_num_procs, 1, MPI_INT, ARRPTR( num_sends_of_unghost ), 1, MPI_INT, 0,
00662                        (MPI_Comm)communicator );
00663     CHECK_MPI( rval, err );
00664 
00665     /* now each processor tells the root node from which processors they want unused ghost nodes
00666      * information */
00667     if( rank == 0 )
00668     {
00669         int procs_num = 0;
00670         int procs_max = 0;
00671         /* first count how many processors will send unused ghost node info and find the maximum
00672          * they will send */
00673         for( i = 1; i < nprocs; i++ )
00674         {
00675             if( num_sends_of_unghost[i] )
00676             {
00677                 procs_num++;
00678                 if( num_sends_of_unghost[i] > procs_max ) procs_max = num_sends_of_unghost[i];
00679             }
00680         }
00681         /* clean the temporary used array */
00682         for( i = 0; i < nprocs; i++ )
00683             num_sends_of_unghost[i] = 0;
00684         /* process rank 0's unused ghost nodes procs first */
00685         for( j = 0; j < unghost_num_procs; j++ )
00686         {
00687             num_sends_of_unghost[unghost_procs[j]]++;
00688         }
00689         /* now we process messages from all other processors that want unused ghost node information
00690          */
00691         int* unghost_procs_array = new int[procs_max];
00692         for( i = 0; i < procs_num; i++ )
00693         {
00694             MPI_Status status;
00695             rval = MPI_Recv( unghost_procs_array, procs_max, MPI_INT, MPI_ANY_SOURCE, GHOST_NODE_INFO,
00696                              (MPI_Comm)communicator, &status );
00697             CHECK_MPI( rval, err );
00698             int count;
00699             MPI_Get_count( &status, MPI_INT, &count );
00700             for( j = 0; j < count; j++ )
00701             {
00702                 num_sends_of_unghost[unghost_procs_array[j]]++;
00703             }
00704         }
00705         delete[] unghost_procs_array;
00706     }
00707     else
00708     {
00709         if( unghost_num_vtx )
00710         {
00711             rval = MPI_Send( ARRPTR( unghost_procs ), unghost_num_procs, MPI_INT, 0, GHOST_NODE_INFO,
00712                              (MPI_Comm)communicator );
00713             CHECK_MPI( rval, err );
00714         }
00715     }
00716 
00717     /* now the root node knows for each processor on how many other processors they have ghost nodes
00718      * (which need updating) */
00719     /* the scatter distributes this information to each processor */
00720     rval = MPI_Scatter( ARRPTR( num_sends_of_unghost ), 1, MPI_INT, &update_num_procs, 1, MPI_INT, 0,
00721                         (MPI_Comm)communicator );
00722     CHECK_MPI( rval, err );
00723 
00724     // if (rank == 0) delete [] num_sends_of_unghost;
00725 
00726     // printf("[%d] i have unused ghost nodes from %d procs and i need to send updates to %d
00727     // procs\n", rank, unghost_num_procs, update_num_procs);
00728 
00729     /* now the processors can negotiate amongst themselves: */
00730 
00731     /* first tell each processor the number of unused ghost nodes we want from them */
00732     std::vector< MPI_Request > requests_unghost( unghost_num_procs );
00733     for( j = 0; j < unghost_num_procs; j++ )
00734     {
00735         rval = MPI_Isend( &( unghost_procs_num_vtx[j] ), 1, MPI_INT, unghost_procs[j], GHOST_NODE_VERTICES_WANTED,
00736                           (MPI_Comm)communicator, &( requests_unghost[j] ) );
00737         CHECK_MPI( rval, err );
00738     }
00739 
00740     /* then listen to as many processors as there are that want updates from us */
00741     std::vector< MPI_Request > requests_updates( update_num_procs );
00742     update_procs_num_vtx.resize( update_num_procs );
00743     for( j = 0; j < update_num_procs; j++ )
00744     {
00745         rval = MPI_Irecv( &( update_procs_num_vtx[j] ), 1, MPI_INT, MPI_ANY_SOURCE, GHOST_NODE_VERTICES_WANTED,
00746                           (MPI_Comm)communicator, &( requests_updates[j] ) );
00747         CHECK_MPI( rval, err );
00748     }
00749 
00750     /* wait until we have heard from all processors how many ghost nodes updates they want from us
00751      */
00752     std::vector< MPI_Status > status_update( update_num_procs );
00753     update_procs.resize( update_num_procs );
00754     rval = MPI_Waitall( update_num_procs, ARRPTR( requests_updates ), ARRPTR( status_update ) );
00755     CHECK_MPI( rval, err );
00756     for( j = 0; j < update_num_procs; j++ )
00757     {
00758         update_procs[j] = status_update[j].MPI_SOURCE;
00759         // printf("[%d] i have to send %d vertices to processor %d\n", rank,
00760         // update_procs_num_vtx[j], update_procs[j]);
00761     }
00762 
00763     /* count the total number of vertices that we need to update elsewhere */
00764     update_procs_offset.resize( update_num_procs + 1 );
00765     update_num_vtx         = 0;
00766     update_procs_offset[0] = 0;
00767     for( j = 0; j < update_num_procs; j++ )
00768     {
00769         update_num_vtx += update_procs_num_vtx[j];
00770         update_procs_offset[j + 1] = update_num_vtx;
00771     }
00772 
00773     /* create enough space to receive all the vertex indices */
00774     update_gid.resize( update_num_vtx );
00775 
00776     /* tell each processor which vertices we want from them */
00777     for( j = 0; j < unghost_num_procs; j++ )
00778     {
00779         rval = MPI_Isend( &( unghost_gid[unghost_procs_offset[j]] ), unghost_procs_num_vtx[j],
00780                           sizeof( size_t ) == 4 ? MPI_INT : MPI_DOUBLE, unghost_procs[j], GHOST_NODE_VERTEX_GIDS,
00781                           (MPI_Comm)communicator, &( requests_unghost[j] ) );
00782         CHECK_MPI( rval, err );
00783     }
00784 
00785     /* receive from each processor the info which vertices they want from us */
00786     for( j = 0; j < update_num_procs; j++ )
00787     {
00788         rval = MPI_Irecv( &( update_gid[update_procs_offset[j]] ), update_procs_num_vtx[j],
00789                           sizeof( size_t ) == 4 ? MPI_INT : MPI_DOUBLE, update_procs[j], GHOST_NODE_VERTEX_GIDS,
00790                           (MPI_Comm)communicator, &( requests_updates[j] ) );
00791         CHECK_MPI( rval, err );
00792     }
00793 
00794     /* wait until we have heard from all processors which vertices they want from us */
00795     rval = MPI_Waitall( update_num_procs, ARRPTR( requests_updates ), ARRPTR( status_update ) );
00796     CHECK_MPI( rval, err );
00797 
00798     /* wait until we have sent to all processors which vertices we want from them */
00799     std::vector< MPI_Status > status_unghost( unghost_num_procs );
00800     rval = MPI_Waitall( unghost_num_procs, ARRPTR( requests_unghost ), ARRPTR( status_unghost ) );
00801     CHECK_MPI( rval, err );
00802 
00803     /*
00804     for (j = 0; j < update_num_procs; j++)
00805     {
00806       printf("[%d] will send to proc %d:", rank, update_procs[j]);
00807       for (i = update_procs_offset[j]; i < update_procs_offset[j+1]; i++) printf(" %d",
00808     update_gid[i]); printf("\n");
00809     }
00810     */
00811 
00812     /***********************************************************************
00813    COMPUTE THE SET OF NEIGHBORS THAT OUR VERTICES HAVE ON OTHER PROCESSORS
00814                  COMPUTE THE SET OF NEIGHBORS PROCESSORS
00815     ***********************************************************************/
00816 
00817     if( num_vtx_partition_boundary_local == 0 )
00818     {
00819         /* this processor does not partake in the boundary smoothing */
00820         neighbourProc.clear();
00821         mesh->tag_delete( lid_tag, err );
00822         vtx_partition_boundary_map_inverse.clear();
00823         return;
00824     }
00825 
00826     /* init the neighbour processor list */
00827     neighbourProc.clear();
00828 
00829     /* init the neighbour lists */
00830 
00831     vtx_off_proc_list.clear();
00832     vtx_off_proc_list.resize( num_vtx_partition_boundary_local );
00833 
00834     /* get the adjacency arrays that we need */
00835     std::vector< MBMesquite::Mesh::ElementHandle > adj_elements;
00836     std::vector< MBMesquite::Mesh::VertexHandle > adj_adj_vertices;
00837     std::vector< size_t > elem_offsets;
00838     std::vector< size_t > adj_vtx_offsets;
00839     mesh->vertices_get_attached_elements( ARRPTR( part_vertices ), num_vtx_partition_boundary_local, adj_elements,
00840                                           elem_offsets, err );
00841     mesh->elements_get_attached_vertices( ARRPTR( adj_elements ), adj_elements.size(), adj_adj_vertices,
00842                                           adj_vtx_offsets, err );
00843     // delete adj_elements; adj_elements = 0;
00844     std::vector< int > adj_adj_vertices_lid( adj_adj_vertices.size() );
00845     mesh->tag_get_vertex_data( lid_tag, adj_adj_vertices.size(), ARRPTR( adj_adj_vertices ),
00846                                ARRPTR( adj_adj_vertices_lid ), err );
00847     // delete adj_adj_vertices; adj_adj_vertices = 0;
00848     mesh->tag_delete( lid_tag, err );
00849 
00850     for( i = 0; i < num_vtx_partition_boundary_local; i++ )
00851     {
00852         /* loop over the elements surrounding that vertex */
00853         for( j = elem_offsets[i]; j < (int)( elem_offsets[i + 1] ); j++ )
00854         {
00855             /* loop over the neighbors of the considered vertex (i.e. the vertices of these element)
00856              */
00857             for( k = adj_vtx_offsets[j]; k < (int)( adj_vtx_offsets[j + 1] ); k++ )
00858             {
00859                 /* get the next neighbour */
00860                 incident_vtx = adj_adj_vertices_lid[k];
00861                 /* if this neighbour is a vertex that is smoothed on a different processor */
00862                 if( vtx_in_partition_boundary[incident_vtx] == 2 )
00863                 {
00864                     /* then map it into our domain */
00865                     incident_vtx = vtx_partition_boundary_map_inverse[incident_vtx];
00866                     /* is this vertex already in our neighbour list ? */
00867                     if( std::find( vtx_off_proc_list[i].begin(), vtx_off_proc_list[i].end(), incident_vtx ) ==
00868                         vtx_off_proc_list[i].end() )
00869                     {
00870                         /* if the vertex is not in the list yet ... add it */
00871                         vtx_off_proc_list[i].push_back( incident_vtx );
00872                         /* is the processor of this vertex already in the processor list */
00873                         incident_vtx = part_proc_owner[incident_vtx];
00874                         /* check by scanning the list for this processor */
00875                         if( std::find( neighbourProc.begin(), neighbourProc.end(), incident_vtx ) ==
00876                             neighbourProc.end() )
00877                         {
00878                             /* the processor is not in the list yet ... add it */
00879                             neighbourProc.push_back( incident_vtx );
00880                         }
00881                     }
00882                 }
00883             }
00884         }
00885     }
00886 
00887     /* sort the list of neighbour processors */
00888 
00889     std::sort( neighbourProc.begin(), neighbourProc.end() );
00890 
00891     /***********************************************************************
00892       COMPUTE HOW MANY VERTICES WE NEED TO SEND/RECV FROM EACH PROCESSOR
00893     ***********************************************************************/
00894     neighbourProcSend.clear();
00895     neighbourProcRecv.clear();
00896     neighbourProcSendRemain.clear();
00897     neighbourProcRecvRemain.clear();
00898 
00899     if( communication_model & 1 )  // AVOID_ALL_REDUCE
00900     {
00901         total_num_vertices_to_smooth = num_vtx_partition_boundary_local;
00902         total_num_vertices_to_recv   = num_vtx_partition_boundary_remote;
00903         neighbourProcSend.resize( neighbourProc.size(), 0 );
00904         neighbourProcRecv.resize( neighbourProc.size(), 0 );
00905 
00906         /* for each vertex we smooth find the processors we need to send it too */
00907         for( i = 0; i < num_vtx_partition_boundary_local; i++ )
00908         {
00909             /* loop over its adjacent off-processor vertices */
00910             for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ )
00911             {
00912                 /* get the processor id of these vertices */
00913                 incident_vtx = part_proc_owner[vtx_off_proc_list[i][j]];
00914                 /* check if we got this processor id before */
00915                 for( k = 0; k < j; k++ )
00916                 {
00917                     if( incident_vtx == part_proc_owner[vtx_off_proc_list[i][k]] )
00918                     {
00919                         /* if we have has this procesor id already we do not need to count it again
00920                          */
00921                         incident_vtx = -1;
00922                         break;
00923                     }
00924                 }
00925                 /* if this was a new processor id */
00926                 if( incident_vtx != -1 )
00927                 {
00928                     /* find the processor in the list and increment its counter */
00929                     for( l = 0; l < neighbourProc.size(); l++ )
00930                     {
00931                         if( neighbourProc[l] == incident_vtx )
00932                         {
00933                             neighbourProcSend[l]++;
00934                             break;
00935                         }
00936                     }
00937                 }
00938             }
00939         }
00940         for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ )
00941         {
00942             incident_vtx = part_proc_owner[i];
00943             for( l = 0; l < neighbourProc.size(); l++ )
00944             {
00945                 if( neighbourProc[l] == incident_vtx )
00946                 {
00947                     neighbourProcRecv[l]++;
00948                     break;
00949                 }
00950             }
00951         }
00952         neighbourProcSendRemain.resize( neighbourProc.size() );
00953         neighbourProcRecvRemain.resize( neighbourProc.size() );
00954     }
00955 
00956     exportVtxGIDs.resize( num_vtx_partition_boundary );
00957     exportVtxLIDs.resize( num_vtx_partition_boundary );
00958     exportProc.resize( num_vtx_partition_boundary );
00959     in_independent_set.resize( num_vtx_partition_boundary_local );
00960 }
00961 
00962 void ParallelHelperImpl::compute_first_independent_set( std::vector< Mesh::VertexHandle >& fixed_vertices )
00963 {
00964     if( nprocs == 1 ) return;
00965 
00966     int i;
00967 
00968     // to avoid all reduce we need to know how many vertices we send & receive
00969     if( communication_model & 1 )  // AVOID_ALL_REDUCE
00970     {
00971         for( i = 0; i < (long)neighbourProc.size(); i++ )
00972         {
00973             neighbourProcSendRemain[i] = neighbourProcSend[i];
00974             neighbourProcRecvRemain[i] = neighbourProcRecv[i];
00975         }
00976     }
00977 
00978     // this is iteration zero of the bounday smooting process
00979     iteration = 0;
00980 
00981     // mark all boundary partition vertices as not smoothed
00982     for( i = 0; i < num_vtx_partition_boundary; i++ )
00983     {
00984         part_smoothed_flag[i] = 0;
00985     }
00986 
00987     // populates the in_independent_set and the vertex export arrays
00988     compute_independent_set();
00989 
00990     // counts how many vertices are already smoothed
00991     num_already_smoothed_vertices = 0;
00992 
00993     fixed_vertices.clear();
00994 
00995     /* mark which local boundary partition vertices are already smoothed (i.e. they are in the 1st
00996      * independent set) */
00997     for( i = 0; i < num_vtx_partition_boundary_local; i++ )
00998     {
00999         if( in_independent_set[i] )
01000         {
01001             part_smoothed_flag[i] = 1;
01002             num_already_smoothed_vertices++;
01003         }
01004         else
01005         {
01006             fixed_vertices.push_back( part_vertices[i] );  // fix vertices *not* in the independent set
01007         }
01008     }
01009 
01010     if( false )
01011     {
01012         printf( "[%d]i%d after first we smoothed %d of %d\n", rank, iteration, num_already_smoothed_vertices,
01013                 num_vtx_partition_boundary_local );
01014         fflush( NULL );
01015     }
01016 
01017     // fix the ghost vertices that are smoothed on another processor
01018     for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ )
01019     {
01020         fixed_vertices.push_back( part_vertices[i] );
01021     }
01022 
01023     // fix the ghost vertices that are unused
01024     for( i = 0; i < (int)( unghost_vertices.size() ); i++ )
01025     {
01026         fixed_vertices.push_back( unghost_vertices[i] );
01027     }
01028 }
01029 
01030 void ParallelHelperImpl::communicate_first_independent_set( MsqError& err )
01031 {
01032     if( nprocs == 1 ) return;
01033 
01034     switch( communication_model )
01035     {
01036         case TrulyNonBlocking:
01037             num_already_recv_vertices = comm_smoothed_vtx_tnb( err );
01038             break;
01039         case TrulyNonBlockingAvoidAllReduce:
01040             num_already_recv_vertices = comm_smoothed_vtx_tnb_no_all( err );
01041             break;
01042         case NonBlocking:
01043             num_already_recv_vertices = comm_smoothed_vtx_nb( err );
01044             break;
01045         case NonBlockingAvoidAllReduce:
01046             num_already_recv_vertices = comm_smoothed_vtx_nb_no_all( err );
01047             break;
01048         case Blocking:
01049             num_already_recv_vertices = comm_smoothed_vtx_b( err );
01050             break;
01051         case BlockingAvoidAllReduce:
01052             num_already_recv_vertices = comm_smoothed_vtx_b_no_all( err );
01053             break;
01054     }
01055     global_work_remains = ( neighbourProc.size() ? 1 : 0 );MSQ_CHKERR( err );
01056 }
01057 
01058 bool ParallelHelperImpl::compute_next_independent_set()
01059 {
01060     if( nprocs == 1 ) return false;
01061 
01062     if( global_work_remains && ( iteration < 20 ) )
01063     {
01064         iteration++;
01065         if( false ) printf( "[%d] work remains %d after %d iterations\n", rank, global_work_remains, iteration );
01066         compute_independent_set();
01067         next_vtx_partition_boundary = 0;
01068         return true;
01069     }
01070     else
01071     {
01072         if( global_work_remains )
01073         {
01074             printf( "WARNING: global work remains %d after %d iterations\n", global_work_remains, iteration );
01075         }
01076         return false;
01077     }
01078 }
01079 
01080 bool ParallelHelperImpl::get_next_partition_boundary_vertex( MBMesquite::Mesh::VertexHandle& vertex_handle )
01081 {
01082     while( next_vtx_partition_boundary < num_vtx_partition_boundary_local )
01083     {
01084         if( in_independent_set[next_vtx_partition_boundary] )
01085         {
01086             vertex_handle = part_vertices[next_vtx_partition_boundary];
01087             num_already_smoothed_vertices++;
01088             assert( part_smoothed_flag[next_vtx_partition_boundary] == 0 );
01089             part_smoothed_flag[next_vtx_partition_boundary] = 1;
01090             next_vtx_partition_boundary++;
01091             return true;
01092         }
01093         next_vtx_partition_boundary++;
01094     }
01095     if( false )
01096     {
01097         printf( "[%d]i%d after next we smoothed %d of %d\n", rank, iteration, num_already_smoothed_vertices,
01098                 num_vtx_partition_boundary_local );
01099         fflush( NULL );
01100     }
01101     return false;
01102 }
01103 
01104 void ParallelHelperImpl::communicate_next_independent_set( MsqError& err )
01105 {
01106     switch( communication_model )
01107     {
01108         case TrulyNonBlocking:
01109             num_already_recv_vertices += comm_smoothed_vtx_tnb( err );
01110             break;
01111         case TrulyNonBlockingAvoidAllReduce:
01112             num_already_recv_vertices += comm_smoothed_vtx_tnb_no_all( err );
01113             break;
01114         case NonBlocking:
01115             num_already_recv_vertices += comm_smoothed_vtx_nb( err );
01116             break;
01117         case NonBlockingAvoidAllReduce:
01118             num_already_recv_vertices += comm_smoothed_vtx_nb_no_all( err );
01119             break;
01120         case Blocking:
01121             num_already_recv_vertices += comm_smoothed_vtx_b( err );
01122             break;
01123         case BlockingAvoidAllReduce:
01124             num_already_recv_vertices += comm_smoothed_vtx_b_no_all( err );
01125             break;
01126     }
01127 
01128     if( communication_model & 1 )  // AVOID_ALL_REDUCE
01129     {
01130         global_work_remains = ( total_num_vertices_to_smooth - num_already_smoothed_vertices ) +
01131                               ( total_num_vertices_to_recv - num_already_recv_vertices );
01132         if( false )
01133         {
01134             printf( "[%d]i%d %d - %d + %d  - %d = %d \n", rank, iteration, total_num_vertices_to_smooth,
01135                     num_already_smoothed_vertices, total_num_vertices_to_recv, num_already_recv_vertices,
01136                     global_work_remains );
01137             fflush( NULL );
01138         }
01139     }
01140     else
01141     {
01142         int i, work_remains = 0;
01143         for( i = 0; i < num_vtx_partition_boundary_local; i++ )
01144         {
01145             if( part_smoothed_flag[i] == 0 ) { work_remains++; }
01146         }
01147         int rval = MPI_Allreduce( &work_remains, &global_work_remains, 1, MPI_INT, MPI_SUM, (MPI_Comm)communicator );
01148         CHECK_MPI( rval, err );
01149     }
01150 }
01151 
01152 void ParallelHelperImpl::smoothing_close( MsqError& err )
01153 {
01154     int i, j, rval;
01155 
01156     if( nprocs == 1 ) return;
01157 
01158     //  printf("[%d] used %d iterations\n", rank, iteration);
01159 
01160     // communicate unused ghost nodes
01161 
01162     std::vector< double > update_updates;
01163     std::vector< MPI_Request > update_requests;
01164 
01165     if( update_num_procs )
01166     {
01167         /* get the tags so we can find the requested vertices */
01168         std::vector< size_t > gid( num_vertex );
01169         mesh->vertices_get_global_id( ARRPTR( vertices ), ARRPTR( gid ), num_vertex, err );MSQ_ERRRTN( err );
01170         std::vector< unsigned char > app_fixed( num_vertex );
01171         mesh->vertices_get_byte( ARRPTR( vertices ), ARRPTR( app_fixed ), num_vertex, err );MSQ_ERRRTN( err );
01172         std::vector< int > proc_owner( num_vertex );
01173         mesh->vertices_get_processor_id( ARRPTR( vertices ), ARRPTR( proc_owner ), num_vertex, err );MSQ_ERRRTN( err );
01174 
01175         if( false )
01176         {
01177             int ncull = 0;
01178             for( i = 0; i < num_vertex; ++i )
01179             {
01180                 if( app_fixed[i] & MsqVertex::MSQ_CULLED ) { ++ncull; }
01181             }
01182             std::cout << "P[" << rank << "] ncull= " << ncull << " num_vertex= " << num_vertex << std::endl;
01183         }
01184 
01185         /* only interested in fixed flag from vertex byte? Clear others. */
01186         // srkenno AT sandia.gov 1/19/12: bug fix: changed from |= which makes all vertices fixed
01187         for( i = 0; i < num_vertex; ++i )
01188             app_fixed[i] &= MsqVertex::MSQ_HARD_FIXED;
01189 
01190         /* insert all our unfixed vertices into a map so we can find the requested vertices
01191          * efficiently */
01192         VertexIdMap temp_vid_map;
01193         for( j = 0; j < num_vertex; j++ )
01194         {
01195             if( proc_owner[j] == rank && app_fixed[j] == false ) { vertex_map_insert( temp_vid_map, gid[j], rank, j ); }
01196         }
01197 
01198         /* deallocate the tags */
01199         // delete [] gid; gid = 0;
01200         // delete [] app_fixed; app_fixed = 0;
01201         // delete [] proc_owner; proc_owner = 0;
01202 
01203         /* find the requested updates and collect them into an array */
01204         MBMesquite::MsqVertex coordinates;
01205         update_updates.resize( update_num_vtx * 3 );
01206         for( i = 0; i < update_num_vtx; i++ )
01207         {
01208             j = vertex_map_find( temp_vid_map, update_gid[i], rank );
01209             mesh->vertices_get_coordinates( &( vertices[j] ), &coordinates, 1, err );MSQ_ERRRTN( err );
01210             update_updates[3 * i + 0] = coordinates[0];
01211             update_updates[3 * i + 1] = coordinates[1];
01212             update_updates[3 * i + 2] = coordinates[2];
01213             //      printf("[%d] send gid %d with %g %g %g\n", rank, update_gid[i], coordinates[0],
01214             //      coordinates[1], coordinates[2]);
01215         }
01216 
01217         /* deallocate the map and the gid array */
01218         // delete temp_vid_map; temp_vid_map = 0;
01219         // delete [] update_gid; update_gid = 0;
01220 
01221         update_requests.resize( update_num_procs );
01222         /* send each processor the unused ghost node updates that they requested */
01223         for( j = 0; j < update_num_procs; j++ )
01224         {
01225             rval = MPI_Isend( &( update_updates[update_procs_offset[j] * 3] ), update_procs_num_vtx[j] * 3, MPI_DOUBLE,
01226                               update_procs[j], GHOST_NODE_VERTEX_UPDATES, (MPI_Comm)communicator,
01227                               &( update_requests[j] ) );
01228             //      printf("[%d] sending %d of %d from %d with offset %d \n", rank,
01229             //      update_procs_num_vtx[j], update_num_vtx, update_procs[j],
01230             //      update_procs_offset[j]);
01231             CHECK_MPI( rval, err );
01232         }
01233 
01234         /* deallocate more arrays that we no longer need */
01235         // delete [] update_procs_offset; update_procs_offset = 0;
01236         // delete [] update_procs_num_vtx; update_procs_num_vtx = 0;
01237         // delete [] update_procs; update_procs = 0;
01238     }
01239 
01240     if( unghost_num_procs )
01241     {
01242         std::vector< MPI_Request > unghost_requests( unghost_num_procs );
01243         /* receive from each processor the unused ghost nodes updates i want from them */
01244         std::vector< double > unghost_updates( unghost_num_vtx * 3 );
01245         for( j = 0; j < unghost_num_procs; j++ )
01246         {
01247             rval = MPI_Irecv( &( unghost_updates[unghost_procs_offset[j] * 3] ), unghost_procs_num_vtx[j] * 3,
01248                               MPI_DOUBLE, unghost_procs[j], GHOST_NODE_VERTEX_UPDATES, (MPI_Comm)communicator,
01249                               &( unghost_requests[j] ) );
01250             //      printf("[%d] receiving %d of %d from %d with offset %d \n", rank,
01251             //      unghost_procs_num_vtx[j], unghost_num_vtx, unghost_procs[j],
01252             //      unghost_procs_offset[j]);
01253             CHECK_MPI( rval, err );
01254         }
01255 
01256         /* deallocate more arrays that we no longer need */
01257         // delete [] unghost_procs_offset; unghost_procs_offset = 0;
01258         // delete [] unghost_procs_num_vtx; unghost_procs_num_vtx = 0;
01259         // delete [] unghost_procs; unghost_procs = 0;
01260 
01261         std::vector< MPI_Status > status( unghost_num_procs );
01262         rval = MPI_Waitall( unghost_num_procs, ARRPTR( unghost_requests ), ARRPTR( status ) );
01263         CHECK_MPI( rval, err );
01264 
01265         /* apply the received updates for the unused ghost vertices */
01266         for( i = 0; i < unghost_num_vtx; i++ )
01267         {
01268             MBMesquite::Vector3D coordinates;
01269             coordinates[0] = unghost_updates[3 * i + 0];
01270             coordinates[1] = unghost_updates[3 * i + 1];
01271             coordinates[2] = unghost_updates[3 * i + 2];
01272             //      printf("[%d] recv %g %g %g\n", rank, coordinates[0], coordinates[1],
01273             //      coordinates[2]);
01274             mesh->vertex_set_coordinates( unghost_vertices[i], coordinates, err );MSQ_ERRRTN( err );
01275         }
01276 
01277         /* deallocate more arrays that we no longer need */
01278         // delete unghost_vertices; unghost_vertices = 0;
01279         // delete [] unghost_updates; unghost_updates = 0;
01280         // delete [] unghost_requests; unghost_requests = 0;
01281     }
01282 
01283     // if (update_num_procs)
01284     //{
01285     //  delete [] update_updates; update_updates = 0;
01286     //  delete [] update_requests; update_requests = 0;
01287     //}
01288 
01289     // if (vertices) delete vertices; vertices = 0;
01290     // if (vtx_in_partition_boundary) delete [] vtx_in_partition_boundary; vtx_in_partition_boundary
01291     // = 0; if (part_vertices) delete part_vertices; part_vertices = 0; if (unghost_vertices) delete
01292     // unghost_vertices; unghost_vertices = 0; if (part_proc_owner) delete [] part_proc_owner;
01293     // part_proc_owner = 0; if (part_gid) delete [] part_gid; part_gid = 0; if (part_smoothed_flag)
01294     // delete [] part_smoothed_flag; part_smoothed_flag = 0; if (part_rand_number) delete []
01295     // part_rand_number; part_rand_number = 0; if (exportVtxGIDs) delete [] exportVtxGIDs;
01296     // exportVtxGIDs = 0; if (exportVtxLIDs) delete [] exportVtxLIDs; exportVtxLIDs = 0; if
01297     // (exportProc) delete [] exportProc; exportProc = 0; if (in_independent_set) delete []
01298     // in_independent_set; in_independent_set = 0; if (vid_map) delete vid_map; vid_map = 0; if
01299     // (neighbourProcSend) delete [] neighbourProcSend; neighbourProcSend = 0; if (neighbourProcRecv)
01300     // delete [] neighbourProcRecv; neighbourProcRecv = 0; if (neighbourProcSendRemain) delete []
01301     // neighbourProcSendRemain; neighbourProcSendRemain = 0; if (neighbourProcRecvRemain) delete []
01302     // neighbourProcRecvRemain; neighbourProcRecvRemain = 0; if (vtx_off_proc_list_size) delete []
01303     // vtx_off_proc_list_size; vtx_off_proc_list_size = 0; if (vtx_off_proc_list) {
01304     //  for (i = 0; i < num_vtx_partition_boundary_local; i++) free(vtx_off_proc_list[i]);
01305     //  delete [] vtx_off_proc_list; vtx_off_proc_list = 0;
01306     //}
01307     // if (neighbourProc) free(neighbourProc); neighbourProc = 0;
01308 }
01309 
01310 typedef struct VertexPack
01311 {
01312     double x;
01313     double y;
01314     double z;
01315     union
01316     {
01317         double mist;
01318         size_t glob_id;
01319     };
01320 } VertexPack;
01321 
01322 int ParallelHelperImpl::comm_smoothed_vtx_tnb( MsqError& err )
01323 {
01324     int i, j, k, rval;
01325 
01326     // printf("[%d]i%d truly non blocking\n",rank, iteration);fflush(NULL);
01327 
01328     /* compute how many vertices we send to each processor */
01329 
01330     std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 );
01331     std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 );
01332     for( i = 0; i < num_exportVtx; i++ )
01333     {
01334         for( j = 0; j < (long)neighbourProc.size(); j++ )
01335         {
01336             if( exportProc[i] == neighbourProc[j] )
01337             {
01338                 /* increment count */
01339                 numVtxPerProcSend[j]++;
01340                 /* end loop */
01341                 break;
01342             }
01343         }
01344         /* did loop end without finding the processor */
01345         if( j == (long)neighbourProc.size() )
01346         {
01347             printf( "[%d]i%d WARNING: did not find exportProc[%d] = %d in list of %lu processors.\n", rank, iteration,
01348                     i, exportProc[i], (unsigned long)neighbourProc.size() );
01349         }
01350     }
01351 
01352     /* tell each processor how many vertices they can expect from us */
01353     /* also ask each processor how many vertices we can expect from them */
01354 
01355     int num_neighbourProcSend = 0;
01356     int num_neighbourProcRecv = 0;
01357     std::vector< MPI_Request > requests_send( neighbourProc.size() );
01358     std::vector< MPI_Request > requests_recv( neighbourProc.size() );
01359     for( j = 0; j < (long)neighbourProc.size(); j++ )
01360     {
01361         /* send the vertex count to this processor */
01362         if( 0 )
01363         {
01364             printf( "[%d]i%d Announce send %d vertices from proc %d\n", rank, iteration, numVtxPerProcSend[j],
01365                     neighbourProc[j] );
01366             fflush( NULL );
01367         }
01368         rval = MPI_Isend( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01369                           (MPI_Comm)communicator, &( requests_send[j] ) );
01370         CHECK_MPI_RZERO( rval, err );
01371         num_neighbourProcSend++;
01372 
01373         /* recv the vertex count for this processor */
01374         if( 0 )
01375         {
01376             printf( "[%d]i%d Listen  recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[j],
01377                     neighbourProc[j] );
01378             fflush( NULL );
01379         }
01380         rval = MPI_Irecv( &( numVtxPerProcRecv[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01381                           (MPI_Comm)communicator, &( requests_recv[j] ) );
01382         CHECK_MPI_RZERO( rval, err );
01383         num_neighbourProcRecv++;
01384     }
01385 
01386     /* set up memory for the outgoing vertex data blocks */
01387 
01388     std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */
01389     std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() );
01390     if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export );
01391     for( i = 1; i < (long)neighbourProc.size(); i++ )
01392     {
01393         packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1];
01394     }
01395 
01396     /* place vertex data going to the same processor into consecutive memory space */
01397 
01398     std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size() );
01399     for( i = 0; i < (long)neighbourProc.size(); i++ )
01400     {
01401         numVtxPerProcSendPACKED[i] = 0;
01402     }
01403     for( i = 0; i < num_exportVtx; i++ )
01404     {
01405         for( j = 0; j < (long)neighbourProc.size(); j++ )
01406         {
01407             if( exportProc[i] == neighbourProc[j] )
01408             {
01409                 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j];
01410                 numVtxPerProcSendPACKED[j]++;
01411                 MBMesquite::MsqVertex coordinates;
01412                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
01413                 MSQ_ERRZERO( err );
01414                 packing_vertex->x       = coordinates[0];
01415                 packing_vertex->y       = coordinates[1];
01416                 packing_vertex->z       = coordinates[2];
01417                 packing_vertex->glob_id = exportVtxGIDs[i];
01418             }
01419         }
01420     }
01421 
01422     /* wait until we have heard from all processors how many vertices we will receive from them */
01423 
01424     std::vector< MPI_Status > status( neighbourProc.size() );
01425 
01426     if( num_neighbourProcRecv )
01427     {
01428         rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_recv ), ARRPTR( status ) );
01429         CHECK_MPI_RZERO( rval, err );
01430     }
01431 
01432     /* how many vertices will we receive */
01433 
01434     int numVtxImport = 0;
01435     for( i = 0; i < (long)neighbourProc.size(); i++ )
01436     {
01437         numVtxImport += numVtxPerProcRecv[i];
01438     }
01439 
01440     /* set up memory for the incoming vertex data blocks */
01441 
01442     std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */
01443     std::vector< VertexPack* > packed_vertices_import( neighbourProc.size() );
01444     if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import );
01445     for( i = 1; i < (long)neighbourProc.size(); i++ )
01446     {
01447         packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecv[i - 1];
01448     }
01449 
01450     /* post receives for all processors that have something for us */
01451     /* post sends for all processors that we have something for */
01452 
01453     num_neighbourProcRecv = 0;
01454 
01455     for( i = 0; i < (long)neighbourProc.size(); i++ )
01456     {
01457         if( numVtxPerProcRecv[i] )
01458         {
01459             if( 0 )
01460             {
01461                 printf( "[%d]i%d Will recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i],
01462                         neighbourProc[i] );
01463                 fflush( NULL );
01464             }
01465             rval = MPI_Irecv( packed_vertices_import[i], 4 * numVtxPerProcRecv[i], MPI_DOUBLE, neighbourProc[i],
01466                               VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_recv[i] ) );
01467             CHECK_MPI_RZERO( rval, err );
01468             num_neighbourProcRecv++;
01469         }
01470         else
01471         {
01472             requests_recv[i] = MPI_REQUEST_NULL;
01473         }
01474         if( numVtxPerProcSend[i] )
01475         {
01476             if( 0 )
01477             {
01478                 printf( "[%d]i%d Will send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[i],
01479                         neighbourProc[i] );
01480                 fflush( NULL );
01481             }
01482             rval = MPI_Isend( packed_vertices_export[i], 4 * numVtxPerProcSend[i], MPI_DOUBLE, neighbourProc[i],
01483                               VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[i] ) );
01484             CHECK_MPI_RZERO( rval, err );
01485         }
01486         else
01487         {
01488             requests_send[i] = MPI_REQUEST_NULL;
01489         }
01490     }
01491 
01492     /* wait for some receive to arrive */
01493 
01494     int local_id;
01495     while( num_neighbourProcRecv )
01496     {
01497         rval = MPI_Waitany( neighbourProc.size(), ARRPTR( requests_recv ), &k, ARRPTR( status ) );
01498         CHECK_MPI_RZERO( rval, err );
01499         /* unpack all vertices */
01500         for( i = 0; i < numVtxPerProcRecv[k]; i++ )
01501         {
01502             local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProc[k] );
01503             if( local_id )
01504             {
01505                 MBMesquite::Vector3D coordinates;
01506                 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
01507                                  packed_vertices_import[k][i].z );
01508                 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
01509                 MSQ_ERRZERO( err );
01510                 assert( part_smoothed_flag[local_id] == 0 );
01511                 part_smoothed_flag[local_id] = 1;
01512             }
01513             else
01514             {
01515                 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
01516                         packed_vertices_import[k][i].glob_id, neighbourProc[k] );
01517             }
01518         }
01519         num_neighbourProcRecv--;
01520     }
01521 
01522     /* all receives have completed. it is save to release the memory */
01523     // free(vertex_pack_import);
01524 
01525     /* wait until the sends have completed */
01526 
01527     if( num_neighbourProcSend )
01528     {
01529         rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( status ) );
01530         CHECK_MPI_RZERO( rval, err );
01531     }
01532 
01533     /* all sends have completed. it is save to release the memory */
01534     // free(vertex_pack_export);
01535 
01536     return numVtxImport;
01537 }
01538 
01539 int ParallelHelperImpl::comm_smoothed_vtx_tnb_no_all( MsqError& err )
01540 {
01541     int i, j, k, rval;
01542 
01543     // printf("[%d]i%d truly non blocking avoid reduce all\n", rank, iteration); fflush(NULL);
01544 
01545     /* compute how many vertices we send to each processor */
01546 
01547     std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 );
01548     std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 );
01549     for( i = 0; i < num_exportVtx; i++ )
01550     {
01551         for( j = 0; j < (long)neighbourProc.size(); j++ )
01552         {
01553             if( exportProc[i] == neighbourProc[j] )
01554             {
01555                 /* increment count */
01556                 numVtxPerProcSend[j]++;
01557                 /* end loop */
01558                 break;
01559             }
01560         }
01561         /* did loop end without finding the processor */
01562         if( j == (long)neighbourProc.size() )
01563         {
01564             printf( "[%d]i%d WARNING: did not find exportProc[%d] = %d in list of %lu processors.\n", rank, iteration,
01565                     i, exportProc[i], (unsigned long)neighbourProc.size() );
01566         }
01567     }
01568 
01569     /* tell each processor how many vertices they can expect from us */
01570     /* also ask each processor how many vertices we can expect from them */
01571 
01572     int num_neighbourProcSend = 0;
01573     int num_neighbourProcRecv = 0;
01574     std::vector< MPI_Request > requests_send( neighbourProc.size() );
01575     std::vector< MPI_Request > requests_recv( neighbourProc.size() );
01576     for( j = 0; j < (long)neighbourProc.size(); j++ )
01577     {
01578         if( neighbourProcSendRemain[j] )
01579         {
01580             /* send the vertex count to this processor */
01581             if( 0 )
01582             {
01583                 printf( "[%d]i%d Announce send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j],
01584                         neighbourProc[j] );
01585                 fflush( NULL );
01586             }
01587             rval = MPI_Isend( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01588                               (MPI_Comm)communicator, &( requests_send[j] ) );
01589             CHECK_MPI_RZERO( rval, err );
01590             num_neighbourProcSend++;
01591         }
01592         else
01593         {
01594             requests_send[j] = MPI_REQUEST_NULL;
01595         }
01596         if( neighbourProcRecvRemain[j] )
01597         {
01598             /* recv the vertex count for this processor */
01599             if( 0 )
01600             {
01601                 printf( "[%d]i%d Listen recv xx vertices from proc %d\n", rank, iteration, neighbourProc[j] );
01602                 fflush( NULL );
01603             }
01604             rval = MPI_Irecv( &( numVtxPerProcRecv[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01605                               (MPI_Comm)communicator, &( requests_recv[j] ) );
01606             CHECK_MPI_RZERO( rval, err );
01607             num_neighbourProcRecv++;
01608         }
01609         else
01610         {
01611             requests_recv[j] = MPI_REQUEST_NULL;
01612         }
01613     }
01614 
01615     /* set up memory for the outgoing vertex data blocks */
01616 
01617     std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */
01618     std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() );
01619     if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export );
01620     for( i = 1; i < (long)neighbourProc.size(); i++ )
01621     {
01622         packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1];
01623     }
01624 
01625     /* place vertex data going to the same processor into consecutive memory space */
01626 
01627     std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 );
01628     for( i = 0; i < num_exportVtx; i++ )
01629     {
01630         for( j = 0; j < (long)neighbourProc.size(); j++ )
01631         {
01632             if( exportProc[i] == neighbourProc[j] )
01633             {
01634                 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j];
01635                 numVtxPerProcSendPACKED[j]++;
01636                 MBMesquite::MsqVertex coordinates;
01637                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
01638                 MSQ_ERRZERO( err );
01639                 packing_vertex->x       = coordinates[0];
01640                 packing_vertex->y       = coordinates[1];
01641                 packing_vertex->z       = coordinates[2];
01642                 packing_vertex->glob_id = exportVtxGIDs[i];
01643                 if( 0 )
01644                     printf( "[%d]i%d vertex %lu packed %g %g %g\n", rank, iteration, (unsigned long)exportVtxGIDs[i],
01645                             packing_vertex->x, packing_vertex->y, packing_vertex->z );
01646             }
01647         }
01648     }
01649 
01650     /* wait until we have heard from all processors how many vertices we will receive from them */
01651 
01652     std::vector< MPI_Status > status( neighbourProc.size() );
01653 
01654     if( num_neighbourProcRecv )
01655     {
01656         rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_recv ), ARRPTR( status ) );
01657         CHECK_MPI_RZERO( rval, err );
01658     }
01659 
01660     /* how many vertices will we receive */
01661 
01662     int numVtxImport = 0;
01663     for( i = 0; i < (long)neighbourProc.size(); i++ )
01664     {
01665         numVtxImport += numVtxPerProcRecv[i];
01666         neighbourProcRecvRemain[i] -= numVtxPerProcRecv[i];
01667         neighbourProcSendRemain[i] -= numVtxPerProcSend[i];
01668     }
01669 
01670     /* set up memory for the incoming vertex data blocks */
01671 
01672     std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */
01673     std::vector< VertexPack* > packed_vertices_import( neighbourProc.size() );
01674     if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import );
01675     for( i = 1; i < (long)neighbourProc.size(); i++ )
01676     {
01677         packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecv[i - 1];
01678     }
01679 
01680     /* post receives for all processors that have something for us */
01681     /* post sends for all processors that we have something for */
01682 
01683     num_neighbourProcRecv = 0;
01684 
01685     for( i = 0; i < (long)neighbourProc.size(); i++ )
01686     {
01687         if( 0 )
01688         {
01689             printf( "[%d]i%d Will recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i],
01690                     neighbourProc[i] );
01691             fflush( NULL );
01692         }
01693         if( numVtxPerProcRecv[i] )
01694         {
01695             rval = MPI_Irecv( packed_vertices_import[i], 4 * numVtxPerProcRecv[i], MPI_DOUBLE, neighbourProc[i],
01696                               VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_recv[i] ) );
01697             CHECK_MPI_RZERO( rval, err );
01698             num_neighbourProcRecv++;
01699         }
01700         else
01701         {
01702             requests_recv[i] = MPI_REQUEST_NULL;
01703         }
01704         if( 0 )
01705         {
01706             printf( "[%d]i%d Will send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[i],
01707                     neighbourProc[i] );
01708             fflush( NULL );
01709         }
01710         if( numVtxPerProcSend[i] )
01711         {
01712             rval = MPI_Isend( packed_vertices_export[i], 4 * numVtxPerProcSend[i], MPI_DOUBLE, neighbourProc[i],
01713                               VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[i] ) );
01714             CHECK_MPI_RZERO( rval, err );
01715         }
01716         else
01717         {
01718             requests_send[i] = MPI_REQUEST_NULL;
01719         }
01720     }
01721 
01722     /* wait for some receive to arrive */
01723 
01724     int local_id;
01725     while( num_neighbourProcRecv )
01726     {
01727         rval = MPI_Waitany( neighbourProc.size(), ARRPTR( requests_recv ), &k, ARRPTR( status ) );
01728         CHECK_MPI_RZERO( rval, err );
01729         /* unpack all vertices */
01730         for( i = 0; i < numVtxPerProcRecv[k]; i++ )
01731         {
01732             local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProc[k] );
01733             if( local_id )
01734             {
01735                 MBMesquite::Vector3D coordinates;
01736                 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
01737                                  packed_vertices_import[k][i].z );
01738                 if( 0 )
01739                     printf( "[%d]i%d vertex %d becomes %g %g %g\n", rank, iteration, local_id,
01740                             packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
01741                             packed_vertices_import[k][i].z );
01742                 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
01743                 MSQ_ERRZERO( err );
01744                 assert( part_smoothed_flag[local_id] == 0 );
01745                 part_smoothed_flag[local_id] = 1;
01746             }
01747             else
01748             {
01749                 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
01750                         packed_vertices_import[k][i].glob_id, neighbourProc[k] );
01751             }
01752         }
01753         num_neighbourProcRecv--;
01754     }
01755 
01756     /* all receives have completed. it is save to release the memory */
01757     // free(vertex_pack_import);
01758 
01759     /* wait until the sends have completed */
01760 
01761     if( num_neighbourProcSend )
01762     {
01763         rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( status ) );
01764         CHECK_MPI_RZERO( rval, err );
01765     }
01766 
01767     /* all sends have completed. it is save to release the memory */
01768     // free(vertex_pack_export);
01769 
01770     return numVtxImport;
01771 }
01772 
01773 int ParallelHelperImpl::comm_smoothed_vtx_nb( MsqError& err )
01774 {
01775     int i, j, k, rval;
01776 
01777     // printf("[%d] %d %d non blocking\n",rank, iteration, pass);fflush(NULL);
01778 
01779     /* how many vertices will we receive */
01780 
01781     std::vector< int > numVtxPerProcSend( neighbourProc.size() );
01782     for( i = 0; i < (long)neighbourProc.size(); i++ )
01783     {
01784         numVtxPerProcSend[i] = 0;
01785     }
01786     for( i = 0; i < num_exportVtx; i++ )
01787     {
01788         for( j = 0; j < (long)neighbourProc.size(); j++ )
01789         {
01790             if( exportProc[i] == neighbourProc[j] )
01791             {
01792                 /* increment count */
01793                 numVtxPerProcSend[j]++;
01794                 /* end loop */
01795                 break;
01796             }
01797         }
01798         /* assert loop did not end without finding processor */
01799         assert( j != (long)neighbourProc.size() );
01800     }
01801 
01802     /* tell each processor how many vertices to expect */
01803 
01804     for( j = 0; j < (long)neighbourProc.size(); j++ )
01805     {
01806         rval = MPI_Send( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01807                          (MPI_Comm)communicator );
01808         CHECK_MPI_RZERO( rval, err );
01809         //    printf("[%d]i%d Announcing %d vertices to proc
01810         //    %d\n",rank,iteration,numVtxPerProcSend[j],neighbourProc[j]); fflush(NULL);
01811     }
01812 
01813     /* place vertex data going to the same processor into consecutive memory space */
01814     std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */
01815     std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() );
01816     if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export );
01817     for( i = 1; i < (long)neighbourProc.size(); i++ )
01818     {
01819         packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1];
01820     }
01821 
01822     std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 );
01823     for( i = 0; i < num_exportVtx; i++ )
01824     {
01825         for( j = 0; j < (long)neighbourProc.size(); j++ )
01826         {
01827             if( exportProc[i] == neighbourProc[j] )
01828             {
01829                 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j];
01830                 numVtxPerProcSendPACKED[j]++;
01831                 MBMesquite::MsqVertex coordinates;
01832                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
01833                 MSQ_ERRZERO( err );
01834                 packing_vertex->x       = coordinates[0];
01835                 packing_vertex->y       = coordinates[1];
01836                 packing_vertex->z       = coordinates[2];
01837                 packing_vertex->glob_id = exportVtxGIDs[i];
01838             }
01839         }
01840     }
01841     // delete [] numVtxPerProcSendPACKED;
01842 
01843     /* now ask each processor how many vertices to expect */
01844 
01845     int num;
01846     int proc;
01847     int numVtxImport          = 0;
01848     int num_neighbourProcRecv = 0;
01849     std::vector< int > numVtxPerProcRecv( neighbourProc.size() );
01850     MPI_Status status;
01851     for( j = 0; j < (long)neighbourProc.size(); j++ )
01852     {
01853         numVtxPerProcRecv[j] = 0;
01854     }
01855 
01856     for( j = 0; j < (long)neighbourProc.size(); j++ )
01857     {
01858         /* get the vertex count for some processor */
01859         rval = MPI_Recv( &num,                      /* message buffer */
01860                          1,                         /* one data item */
01861                          MPI_INT,                   /* of type int */
01862                          MPI_ANY_SOURCE,            /* receive from any sender */
01863                          VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs from this iteration */
01864                          (MPI_Comm)communicator,    /* default communicator */
01865                          &status );                 /* info about the received message */
01866         CHECK_MPI_RZERO( rval, err );
01867         proc = status.MPI_SOURCE;
01868         /* will we import vertices from this processor */
01869 
01870         //    printf("[%d]i%d Heard we will receive %d vertices from proc
01871         //    %d\n",rank,iteration,num,proc); fflush(NULL);
01872 
01873         if( num )
01874         {
01875             /* increase number of processors we will receive from */
01876             num_neighbourProcRecv++;
01877             /* add number of vertices we will receive to the import total */
01878             numVtxImport += num;
01879         }
01880         /* find this processor in the list */
01881         for( i = 0; i < (long)neighbourProc.size(); i++ )
01882         {
01883             if( neighbourProc[i] == proc )
01884             {
01885                 numVtxPerProcRecv[i] = num;
01886                 break;
01887             }
01888         }
01889         /* assert loop did not end without finding processor */
01890         assert( i != (long)neighbourProc.size() );
01891         if( 0 ) printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, num, proc );
01892         fflush( NULL );
01893     }
01894 
01895     /* create list of processors we receive from */
01896     std::vector< int > neighbourProcRecv( num_neighbourProcRecv );
01897     std::vector< int > numVtxPerProcRecvRecv( num_neighbourProcRecv );
01898     for( i = 0, k = 0; i < (long)neighbourProc.size(); i++ )
01899     {
01900         if( 0 )
01901             printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i],
01902                     neighbourProc[i] );
01903         fflush( NULL );
01904         if( numVtxPerProcRecv[i] )
01905         {
01906             neighbourProcRecv[k]     = neighbourProc[i];
01907             numVtxPerProcRecvRecv[k] = numVtxPerProcRecv[i];
01908             k++;
01909         }
01910     }
01911 
01912     /* set up memory for the incoming vertex data blocks */
01913     std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */
01914     std::vector< VertexPack* > packed_vertices_import( num_neighbourProcRecv );
01915     if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import );
01916     for( i = 1; i < num_neighbourProcRecv; i++ )
01917     {
01918         packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecvRecv[i - 1];
01919     }
01920 
01921     /* receive from all processors that have something for us */
01922     std::vector< MPI_Request > request( num_neighbourProcRecv );
01923     for( j = 0; j < num_neighbourProcRecv; j++ )
01924     {
01925         rval = MPI_Irecv( packed_vertices_import[j], 4 * numVtxPerProcRecvRecv[j], MPI_DOUBLE, neighbourProcRecv[j],
01926                           VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( request[j] ) );
01927         CHECK_MPI_RZERO( rval, err );
01928         if( false )
01929         {
01930             printf( "[%d]i%d Scheduling receipt of %d vertices to proc %d\n", rank, iteration, numVtxPerProcRecvRecv[j],
01931                     neighbourProcRecv[j] );
01932             fflush( NULL );
01933         }
01934     }
01935 
01936     /* now send the data blocks */
01937 
01938     std::vector< MPI_Request > requests_send( neighbourProc.size() );
01939     for( j = 0; j < (long)neighbourProc.size(); j++ )
01940     {
01941         if( numVtxPerProcSend[j] )
01942         {
01943             rval = MPI_Isend( packed_vertices_export[j], 4 * numVtxPerProcSend[j], MPI_DOUBLE, neighbourProc[j],
01944                               VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[j] ) );
01945             CHECK_MPI_RZERO( rval, err );
01946             if( 0 )
01947             {
01948                 printf( "[%d]i%d Scheduling send of %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j],
01949                         neighbourProc[j] );
01950                 fflush( NULL );
01951             }
01952         }
01953         else
01954         {
01955             requests_send[j] = MPI_REQUEST_NULL;
01956         }
01957     }
01958 
01959     /* process messages as they arrive */
01960 
01961     int local_id;
01962     for( j = 0; j < num_neighbourProcRecv; j++ )
01963     {
01964         rval = MPI_Waitany( num_neighbourProcRecv, ARRPTR( request ), &k, &status );
01965         CHECK_MPI_RZERO( rval, err );
01966 
01967         /* unpack messages */
01968         proc = status.MPI_SOURCE;
01969         int count;
01970         MPI_Get_count( &status, MPI_INT, &count );
01971         if( 0 )
01972             printf( "[%d]i%d Received %d (%d) vertices from proc %d (%d)\n", rank, iteration, numVtxPerProcRecvRecv[k],
01973                     count, neighbourProcRecv[k], proc );
01974         fflush( NULL );
01975         for( i = 0; i < numVtxPerProcRecvRecv[k]; i++ )
01976         {
01977             local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] );
01978             if( local_id )
01979             {
01980                 MBMesquite::Vector3D coordinates;
01981                 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
01982                                  packed_vertices_import[k][i].z );
01983                 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
01984                 MSQ_ERRZERO( err );
01985                 assert( part_smoothed_flag[local_id] == 0 );
01986                 part_smoothed_flag[local_id] = 1;
01987                 if( 0 )
01988                     printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration,
01989                             (int)( packed_vertices_import[k][i].glob_id ), packed_vertices_import[k][i].x,
01990                             packed_vertices_import[k][i].y, packed_vertices_import[k][i].z );
01991             }
01992             else
01993             {
01994                 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
01995                         packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] );
01996             }
01997         }
01998     }
01999 
02000     /* all receives have completed. it is save to release the memory */
02001     // free(vertex_pack_import);
02002     /* wait until the sends have completed */
02003     std::vector< MPI_Status > stati( neighbourProc.size() );
02004     rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( stati ) );
02005     CHECK_MPI_RZERO( rval, err );
02006     /* all sends have completed. it is save to release the memory */
02007     // free(vertex_pack_export);
02008 
02009     return numVtxImport;
02010 }
02011 
02012 int ParallelHelperImpl::comm_smoothed_vtx_nb_no_all( MsqError& err )
02013 {
02014     int i, j, k, rval;
02015 
02016     // printf("[%d] %d %d non blocking avoid reduce all\n",rank, iteration, pass);fflush(NULL);
02017 
02018     /* how many vertices will we receive */
02019 
02020     std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 );
02021     for( i = 0; i < num_exportVtx; i++ )
02022     {
02023         for( j = 0; j < (long)neighbourProc.size(); j++ )
02024         {
02025             if( exportProc[i] == neighbourProc[j] )
02026             {
02027                 /* increment count */
02028                 numVtxPerProcSend[j]++;
02029                 /* end loop */
02030                 break;
02031             }
02032         }
02033         /* assert loop did not end without finding processor */
02034         assert( j != (long)neighbourProc.size() );
02035     }
02036 
02037     /* tell each processor how many vertices to expect */
02038 
02039     for( j = 0; j < (long)neighbourProc.size(); j++ )
02040     {
02041         if( neighbourProcSendRemain[j] )
02042         {
02043             assert( neighbourProcSendRemain[j] >= numVtxPerProcSend[j] );
02044             neighbourProcSendRemain[j] -= numVtxPerProcSend[j];
02045             rval = MPI_Send( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
02046                              (MPI_Comm)communicator );
02047             CHECK_MPI_RZERO( rval, err );
02048             //    printf("[%d]i%d Announcing %d vertices to proc
02049             //    %d\n",rank,iteration,numVtxPerProcSend[j],neighbourProc[j]); fflush(NULL);
02050         }
02051         else
02052         {
02053             assert( numVtxPerProcSend[j] == 0 );
02054         }
02055     }
02056 
02057     /* place vertex data going to the same processor into consecutive memory space */
02058     std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */
02059     std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() );
02060     if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export );
02061     for( i = 1; i < (long)neighbourProc.size(); i++ )
02062     {
02063         packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1];
02064     }
02065 
02066     std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 );
02067     for( i = 0; i < num_exportVtx; i++ )
02068     {
02069         for( j = 0; j < (long)neighbourProc.size(); j++ )
02070         {
02071             if( exportProc[i] == neighbourProc[j] )
02072             {
02073                 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j];
02074                 numVtxPerProcSendPACKED[j]++;
02075                 MBMesquite::MsqVertex coordinates;
02076                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
02077                 MSQ_ERRZERO( err );
02078                 packing_vertex->x       = coordinates[0];
02079                 packing_vertex->y       = coordinates[1];
02080                 packing_vertex->z       = coordinates[2];
02081                 packing_vertex->glob_id = exportVtxGIDs[i];
02082             }
02083         }
02084     }
02085     // delete [] numVtxPerProcSendPACKED;
02086 
02087     /* now ask each processor how many vertices to expect */
02088 
02089     int num;
02090     int proc;
02091     int numVtxImport          = 0;
02092     int num_neighbourProcRecv = 0;
02093     std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 );
02094     MPI_Status status;
02095 
02096     int num_neighbourProcRecvRemain = 0;
02097     for( j = 0; j < (long)neighbourProc.size(); j++ )
02098     {
02099         if( neighbourProcRecvRemain[j] ) { num_neighbourProcRecvRemain++; }
02100     }
02101     for( j = 0; j < num_neighbourProcRecvRemain; j++ )
02102     {
02103         /* get the vertex count for some processor */
02104         rval = MPI_Recv( &num,                      /* message buffer */
02105                          1,                         /* one data item */
02106                          MPI_INT,                   /* of type int */
02107                          MPI_ANY_SOURCE,            /* receive from any sender */
02108                          VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs from this iteration */
02109                          (MPI_Comm)communicator,    /* default communicator */
02110                          &status );                 /* info about the received message */
02111         CHECK_MPI_RZERO( rval, err );
02112         proc = status.MPI_SOURCE;
02113         /* will we import vertices from this processor */
02114 
02115         //    printf("[%d]i%d Heard we will receive %d vertices from proc
02116         //    %d\n",rank,iteration,num,proc); fflush(NULL);
02117 
02118         if( num )
02119         {
02120             /* increase number of processors we will receive from */
02121             num_neighbourProcRecv++;
02122             /* add number of vertices we will receive to the import total */
02123             numVtxImport += num;
02124         }
02125         /* find this processor in the list */
02126         for( i = 0; i < (long)neighbourProc.size(); i++ )
02127         {
02128             if( neighbourProc[i] == proc )
02129             {
02130                 numVtxPerProcRecv[i] = num;
02131                 assert( neighbourProcRecvRemain[i] >= num );
02132                 neighbourProcRecvRemain[i] -= num;
02133                 break;
02134             }
02135         }
02136         /* assert loop did not end without finding processor */
02137         assert( i != (long)neighbourProc.size() );
02138         if( false ) printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, num, proc );
02139         fflush( NULL );
02140     }
02141 
02142     /* create list of processors we receive from */
02143     std::vector< int > neighbourProcRecv( num_neighbourProcRecv );
02144     std::vector< int > numVtxPerProcRecvRecv( num_neighbourProcRecv );
02145     for( i = 0, k = 0; i < (long)neighbourProc.size(); i++ )
02146     {
02147         if( 0 )
02148             printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i],
02149                     neighbourProc[i] );
02150         fflush( NULL );
02151         if( numVtxPerProcRecv[i] )
02152         {
02153             neighbourProcRecv[k]     = neighbourProc[i];
02154             numVtxPerProcRecvRecv[k] = numVtxPerProcRecv[i];
02155             k++;
02156         }
02157     }
02158 
02159     /* set up memory for the incoming vertex data blocks */
02160     std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */
02161     std::vector< VertexPack* > packed_vertices_import( num_neighbourProcRecv );
02162     if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import );
02163     for( i = 1; i < num_neighbourProcRecv; i++ )
02164     {
02165         packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecvRecv[i - 1];
02166     }
02167 
02168     /* receive from all processors that have something for us */
02169     std::vector< MPI_Request > request( num_neighbourProcRecv );
02170     for( j = 0; j < num_neighbourProcRecv; j++ )
02171     {
02172         rval = MPI_Irecv( packed_vertices_import[j], 4 * numVtxPerProcRecvRecv[j], MPI_DOUBLE, neighbourProcRecv[j],
02173                           VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( request[j] ) );
02174         CHECK_MPI_RZERO( rval, err );
02175         if( false )
02176         {
02177             printf( "[%d]i%d Scheduling receipt of %d vertices to proc %d\n", rank, iteration, numVtxPerProcRecvRecv[j],
02178                     neighbourProcRecv[j] );
02179             fflush( NULL );
02180         }
02181     }
02182 
02183     /* now send the data blocks */
02184 
02185     std::vector< MPI_Request > requests_send( neighbourProc.size() );
02186     for( j = 0; j < (long)neighbourProc.size(); j++ )
02187     {
02188         if( numVtxPerProcSend[j] )
02189         {
02190             rval = MPI_Isend( packed_vertices_export[j], 4 * numVtxPerProcSend[j], MPI_DOUBLE, neighbourProc[j],
02191                               VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[j] ) );
02192             CHECK_MPI_RZERO( rval, err );
02193             if( 0 )
02194             {
02195                 printf( "[%d]i%d Scheduling send of %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j],
02196                         neighbourProc[j] );
02197                 fflush( NULL );
02198             }
02199         }
02200         else
02201         {
02202             requests_send[j] = MPI_REQUEST_NULL;
02203         }
02204     }
02205 
02206     /* process messages as they arrive */
02207 
02208     int local_id;
02209     for( j = 0; j < num_neighbourProcRecv; j++ )
02210     {
02211         rval = MPI_Waitany( num_neighbourProcRecv, ARRPTR( request ), &k, &status );
02212         CHECK_MPI_RZERO( rval, err );
02213 
02214         /* unpack messages */
02215         proc = status.MPI_SOURCE;
02216         int count;
02217         MPI_Get_count( &status, MPI_INT, &count );
02218         if( 0 )
02219             printf( "[%d]i%d Received %d (%d) vertices from proc %d (%d)\n", rank, iteration, numVtxPerProcRecvRecv[k],
02220                     count, neighbourProcRecv[k], proc );
02221         fflush( NULL );
02222         for( i = 0; i < numVtxPerProcRecvRecv[k]; i++ )
02223         {
02224             local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] );
02225             if( local_id )
02226             {
02227                 MBMesquite::Vector3D coordinates;
02228                 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
02229                                  packed_vertices_import[k][i].z );
02230                 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
02231                 MSQ_ERRZERO( err );
02232                 assert( part_smoothed_flag[local_id] == 0 );
02233                 part_smoothed_flag[local_id] = 1;
02234                 if( 0 )
02235                     printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration,
02236                             (int)( packed_vertices_import[k][i].glob_id ), packed_vertices_import[k][i].x,
02237                             packed_vertices_import[k][i].y, packed_vertices_import[k][i].z );
02238             }
02239             else
02240             {
02241                 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
02242                         packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] );
02243             }
02244         }
02245     }
02246 
02247     /* all receives have completed. it is save to release the memory */
02248     // free(vertex_pack_import);
02249     /* wait until the sends have completed */
02250     std::vector< MPI_Status > stati( neighbourProc.size() );
02251     rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( stati ) );
02252     CHECK_MPI_RZERO( rval, err );
02253     /* all sends have completed. it is save to release the memory */
02254     // free(vertex_pack_export);
02255 
02256     return numVtxImport;
02257 }
02258 
02259 int ParallelHelperImpl::comm_smoothed_vtx_b( MsqError& err )
02260 {
02261     int i, j, rval;
02262 
02263     // printf("[%d] %d %d blocking\n",rank, iteration, pass);fflush(NULL);
02264 
02265     /* how many vertices per processor */
02266 
02267     std::vector< int > numVtxPerProc( neighbourProc.size(), 0 );
02268     for( i = 0; i < num_exportVtx; i++ )
02269     {
02270         for( j = 0; j < (long)neighbourProc.size(); j++ )
02271         {
02272             if( exportProc[i] == neighbourProc[j] )
02273             {
02274                 /* increment count */
02275                 numVtxPerProc[j]++;
02276                 /* end loop */
02277                 break;
02278             }
02279             /* assert loop did not end without finding the processor */
02280             assert( j != (long)neighbourProc.size() );
02281         }
02282     }
02283 
02284     /* place vertices going to the same processor into consecutive memory space */
02285 
02286     std::vector< VertexPack > vertex_pack( num_exportVtx + 10 ); /* add 10 to have enough memory */
02287     std::vector< VertexPack* > packed_vertices( neighbourProc.size() );
02288     VertexPack* packing_vertex;
02289     packed_vertices[0] = ARRPTR( vertex_pack );
02290     for( i = 1; i < (long)neighbourProc.size(); i++ )
02291     {
02292         packed_vertices[i] = packed_vertices[i - 1] + numVtxPerProc[i - 1];
02293     }
02294 
02295     std::vector< int > numVtxPackedPerProc( neighbourProc.size(), 0 );
02296 
02297     for( i = 0; i < num_exportVtx; i++ )
02298     {
02299         for( j = 0; j < (long)neighbourProc.size(); j++ )
02300         {
02301             if( exportProc[i] == neighbourProc[j] )
02302             {
02303                 packing_vertex = packed_vertices[j] + numVtxPackedPerProc[j];
02304                 numVtxPackedPerProc[j]++;
02305                 MBMesquite::MsqVertex coordinates;
02306                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
02307                 MSQ_ERRZERO( err );
02308                 packing_vertex->x       = coordinates[0];
02309                 packing_vertex->y       = coordinates[1];
02310                 packing_vertex->z       = coordinates[2];
02311                 packing_vertex->glob_id = exportVtxGIDs[i];
02312             }
02313         }
02314     }
02315 
02316     // delete [] numVtxPackedPerProc;
02317 
02318     /* send each block so the corresponding processor preceeded by the number of vertices */
02319 
02320     for( j = 0; j < (long)neighbourProc.size(); j++ )
02321     {
02322 
02323         //    printf("[%d]i%dp%d Announcing %d vertices to proc
02324         //    %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02325 
02326         rval = MPI_Send( &( numVtxPerProc[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
02327                          (MPI_Comm)communicator );
02328         CHECK_MPI_RZERO( rval, err );
02329         // printf("[%d]i%dp%d Sending %d vertices to proc
02330         // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02331 
02332         /* is there any vertex data to be sent */
02333 
02334         if( numVtxPerProc[j] )
02335         {
02336             rval = MPI_Send( packed_vertices[j], 4 * numVtxPerProc[j], MPI_DOUBLE, neighbourProc[j],
02337                              VERTEX_BLOCK + iteration, (MPI_Comm)communicator );
02338             CHECK_MPI_RZERO( rval, err );
02339             // printf("[%d]i%dp%d Sent %d vertices to proc
02340             // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02341         }
02342     }
02343 
02344     int num;
02345     int proc;
02346     // int tag;
02347     int count;
02348     int numVtxImport = 0;
02349     MPI_Status status;
02350     int local_id;
02351 
02352     //  printf("[%d]i%dp%d Waiting to receive vertices from %d processors ...
02353     //  \n",rank,iteration,pass,neighbourProc.size()); fflush(NULL);
02354 
02355     /* receiving blocks from other processors */
02356 
02357     for( j = 0; j < (long)neighbourProc.size(); j++ )
02358     {
02359         rval = MPI_Recv( &num,                      /* message buffer */
02360                          1,                         /* one data item */
02361                          MPI_INT,                   /* of type int */
02362                          MPI_ANY_SOURCE,            /* receive from any sender */
02363                          VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs */
02364                          (MPI_Comm)communicator,    /* default communicator */
02365                          &status );                 /* info about the received message */
02366         CHECK_MPI_RZERO( rval, err );
02367         proc = status.MPI_SOURCE;
02368         // tag = status.MPI_TAG;
02369         MPI_Get_count( &status, MPI_INT, &count );
02370 
02371         //    printf("[%d]i%dp%d Receiving %d vertices from proc
02372         //    %d/%d/%d\n",rank,iteration,pass,num,proc,tag,count); fflush(NULL);
02373 
02374         /* is there any vertex data to be received */
02375 
02376         if( num )
02377         {
02378 
02379             numVtxImport += num;
02380 
02381             /* do we have enough space allocated */
02382 
02383             if( num_exportVtx + 10 < num )
02384             {
02385                 // if (vertex_pack) free(vertex_pack);
02386                 num_exportVtx = num;
02387                 vertex_pack.resize( num_exportVtx + 10 );
02388                 // vertex_pack = (VertexPack*)malloc(sizeof(VertexPack)*(num_exportVtx+10));
02389             }
02390 
02391             rval = MPI_Recv( ARRPTR( vertex_pack ),    /* message buffer */
02392                              4 * num,                  /* num data's item with 4 doubles each */
02393                              MPI_DOUBLE,               /* of type double */
02394                              proc,                     /* receive from this procesor only */
02395                              VERTEX_BLOCK + iteration, /* receive only VERTEX BLOCKs */
02396                              (MPI_Comm)communicator,   /* default communicator */
02397                              &status );                /* info about the received message */
02398             CHECK_MPI_RZERO( rval, err );
02399 
02400             proc = status.MPI_SOURCE;
02401             // tag = status.MPI_TAG;
02402             MPI_Get_count( &status, MPI_DOUBLE, &count );
02403 
02404             if( count != 4 * num )
02405                 printf( "[%d]i%d WARNING: expected %d vertices = %d bytes from proc %d but only "
02406                         "got %d bytes\n",
02407                         rank, iteration, num, num * 4, proc, count );
02408             fflush( NULL );
02409 
02410             //      printf("[%d]i%d Received %d vertices from proc
02411             //      %d/%d/%d\n",rank,iteration,num,proc,tag,count); fflush(NULL);
02412 
02413             /* update the received vertices in our boundary mesh */
02414             for( i = 0; i < num; i++ )
02415             {
02416                 /*  printf("[%d]i%d updating vertex %d with global_id
02417                  * %d\n",rank,iteration,i,(int)(vertex_pack[i].glob_id)); fflush(NULL); */
02418                 local_id = vertex_map_find( vid_map, (int)( vertex_pack[i].glob_id ), proc );
02419                 if( local_id )
02420                 {
02421                     MBMesquite::Vector3D coordinates;
02422                     coordinates.set( vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z );
02423                     mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
02424                     MSQ_ERRZERO( err );
02425                     assert( part_smoothed_flag[local_id] == 0 );
02426                     part_smoothed_flag[local_id] = 1;
02427                     if( 0 )
02428                         printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration,
02429                                 (int)( vertex_pack[i].glob_id ), vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z );
02430                 }
02431                 else
02432                 {
02433                     printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
02434                             vertex_pack[i].glob_id, proc );
02435                 }
02436             }
02437         }
02438     }
02439     // if (vertex_pack) free(vertex_pack);
02440 
02441     return numVtxImport;
02442 }
02443 
02444 int ParallelHelperImpl::comm_smoothed_vtx_b_no_all( MsqError& err )
02445 {
02446     int i, j, rval;
02447 
02448     // printf("[%d] %d %d blocking avoid reduce all\n",rank, iteration, pass);fflush(NULL);
02449 
02450     /* how many vertices per processor */
02451 
02452     std::vector< int > numVtxPerProc( neighbourProc.size(), 0 );
02453     for( i = 0; i < num_exportVtx; i++ )
02454     {
02455         for( j = 0; j < (long)neighbourProc.size(); j++ )
02456         {
02457             if( exportProc[i] == neighbourProc[j] )
02458             {
02459                 /* increment count */
02460                 numVtxPerProc[j]++;
02461                 /* end loop */
02462                 break;
02463             }
02464             /* assert loop did not end without finding the processor */
02465             assert( j != (long)neighbourProc.size() );
02466         }
02467     }
02468 
02469     /* place vertices going to the same processor into consecutive memory space */
02470 
02471     std::vector< VertexPack > vertex_pack( num_exportVtx + 10 ); /* add 10 to have enough memory */
02472     std::vector< VertexPack* > packed_vertices( neighbourProc.size() );
02473     VertexPack* packing_vertex;
02474     packed_vertices[0] = ARRPTR( vertex_pack );
02475     for( i = 1; i < (long)neighbourProc.size(); i++ )
02476     {
02477         packed_vertices[i] = packed_vertices[i - 1] + numVtxPerProc[i - 1];
02478     }
02479 
02480     std::vector< int > numVtxPackedPerProc( neighbourProc.size(), 0 );
02481 
02482     for( i = 0; i < num_exportVtx; i++ )
02483     {
02484         for( j = 0; j < (long)neighbourProc.size(); j++ )
02485         {
02486             if( exportProc[i] == neighbourProc[j] )
02487             {
02488                 packing_vertex = packed_vertices[j] + numVtxPackedPerProc[j];
02489                 numVtxPackedPerProc[j]++;
02490                 MBMesquite::MsqVertex coordinates;
02491                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
02492                 MSQ_ERRZERO( err );
02493                 packing_vertex->x       = coordinates[0];
02494                 packing_vertex->y       = coordinates[1];
02495                 packing_vertex->z       = coordinates[2];
02496                 packing_vertex->glob_id = exportVtxGIDs[i];
02497             }
02498         }
02499     }
02500 
02501     // delete [] numVtxPackedPerProc;
02502 
02503     /* send each block so the corresponding processor preceeded by the number of vertices */
02504 
02505     for( j = 0; j < (long)neighbourProc.size(); j++ )
02506     {
02507 
02508         if( neighbourProcSendRemain[j] )
02509         {
02510             assert( neighbourProcSendRemain[j] >= numVtxPerProc[j] );
02511             neighbourProcSendRemain[j] -= numVtxPerProc[j];
02512 
02513             // printf("[%d]i%dp%d Announcing %d vertices to proc
02514             // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02515 
02516             rval = MPI_Send( &( numVtxPerProc[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
02517                              (MPI_Comm)communicator );
02518             CHECK_MPI_RZERO( rval, err );
02519 
02520             // printf("[%d]i%dp%d Sending %d vertices to proc
02521             // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02522 
02523             /* is there any vertex data to be sent */
02524 
02525             if( numVtxPerProc[j] )
02526             {
02527                 rval = MPI_Send( packed_vertices[j], 4 * numVtxPerProc[j], MPI_DOUBLE, neighbourProc[j],
02528                                  VERTEX_BLOCK + iteration, (MPI_Comm)communicator );
02529                 CHECK_MPI_RZERO( rval, err );
02530 
02531                 // printf("[%d]i%dp%d Sent %d vertices to proc
02532                 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02533             }
02534         }
02535         else
02536         {
02537             // printf("[%d]i%dp%d no longer sending to %d\n",rank,iteration,neighbourProc[j]);
02538             // fflush(NULL);
02539         }
02540     }
02541 
02542     int num;
02543     int proc;
02544     // int tag;
02545     int count;
02546     int numVtxImport = 0;
02547     MPI_Status status;
02548     int local_id;
02549 
02550     //  printf("[%d]i%dp%d Waiting to receive vertices from %d processors ...
02551     //  \n",rank,iteration,pass,neighbourProc.size()); fflush(NULL);
02552 
02553     /* receiving blocks in any order from other processors */
02554 
02555     int num_neighbourProcRecvRemain = 0;
02556     for( j = 0; j < (long)neighbourProc.size(); j++ )
02557     {
02558         if( neighbourProcRecvRemain[j] ) { num_neighbourProcRecvRemain++; }
02559     }
02560     for( j = 0; j < num_neighbourProcRecvRemain; j++ )
02561     {
02562         rval = MPI_Recv( &num,                      /* message buffer */
02563                          1,                         /* one data item */
02564                          MPI_INT,                   /* of type int */
02565                          MPI_ANY_SOURCE,            /* receive from any sender */
02566                          VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs */
02567                          (MPI_Comm)communicator,    /* default communicator */
02568                          &status );                 /* info about the received message */
02569         CHECK_MPI_RZERO( rval, err );
02570         proc = status.MPI_SOURCE;
02571         // tag = status.MPI_TAG;
02572         MPI_Get_count( &status, MPI_INT, &count );
02573 
02574         // printf("[%d]i%dp%d Receiving %d vertices from proc
02575         // %d/%d/%d\n",rank,iteration,pass,num,proc,tag,count); fflush(NULL);
02576 
02577         /* is there any vertex data to be received */
02578 
02579         if( num )
02580         {
02581 
02582             numVtxImport += num;
02583 
02584             /* do we have enough space allocated */
02585 
02586             if( num_exportVtx + 10 < num )
02587             {
02588                 // if (vertex_pack) free(vertex_pack);
02589                 num_exportVtx = num;
02590                 // vertex_pack = (VertexPack*)malloc(sizeof(VertexPack)*(num_exportVtx+10));
02591                 vertex_pack.resize( num_exportVtx + 10 );
02592             }
02593 
02594             rval = MPI_Recv( ARRPTR( vertex_pack ),    /* message buffer */
02595                              4 * num,                  /* num data's item with 4 doubles each */
02596                              MPI_DOUBLE,               /* of type double */
02597                              proc,                     /* receive from this procesor only */
02598                              VERTEX_BLOCK + iteration, /* receive only VERTEX BLOCKs */
02599                              (MPI_Comm)communicator,   /* default communicator */
02600                              &status );                /* info about the received message */
02601             CHECK_MPI_RZERO( rval, err );
02602 
02603             proc = status.MPI_SOURCE;
02604             // tag = status.MPI_TAG;
02605             MPI_Get_count( &status, MPI_DOUBLE, &count );
02606 
02607             if( count != 4 * num )
02608                 printf( "[%d]i%d WARNING: expected %d vertices = %d bytes from proc %d but only "
02609                         "got %d bytes\n",
02610                         rank, iteration, num, num * 4, proc, count );
02611             fflush( NULL );
02612 
02613             // printf("[%d]i%d Received %d vertices from proc
02614             // %d/%d/%d\n",rank,iteration,num,proc,tag,count); fflush(NULL);
02615 
02616             /* update the received vertices in our boundary mesh */
02617             for( i = 0; i < num; i++ )
02618             {
02619                 /*  printf("[%d]i%d updating vertex %d with global_id
02620                  * %d\n",rank,iteration,i,(int)(vertex_pack[i].glob_id)); fflush(NULL); */
02621                 local_id = vertex_map_find( vid_map, (int)( vertex_pack[i].glob_id ), proc );
02622                 if( local_id )
02623                 {
02624                     MBMesquite::Vector3D coordinates;
02625                     coordinates.set( vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z );
02626                     mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
02627                     MSQ_ERRZERO( err );
02628                     assert( part_smoothed_flag[local_id] == 0 );
02629                     part_smoothed_flag[local_id] = 1;
02630                     if( 0 && rank == 1 )
02631                         printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration,
02632                                 (int)( vertex_pack[i].glob_id ), vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z );
02633                 }
02634                 else
02635                 {
02636                     printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
02637                             vertex_pack[i].glob_id, proc );
02638                 }
02639             }
02640         }
02641         for( i = 0; i < (long)neighbourProc.size(); i++ )
02642         {
02643             if( proc == neighbourProc[i] )
02644             {
02645                 assert( neighbourProcRecvRemain[i] >= num );
02646                 neighbourProcRecvRemain[i] -= num;
02647                 break;
02648             }
02649         }
02650     }
02651     // if (vertex_pack) free(vertex_pack);
02652 
02653     return numVtxImport;
02654 }
02655 
02656 /***********************************************************************
02657 COMPUTING THE INDEPENDENT SET AND EXPORT INFORMATION
02658 ***********************************************************************/
02659 /* the only thing that can prevent a vertex from being in the independent
02660    set is that is has an neighbouring vertex on a different processor that
02661    has not been smoothed yet and that has a larger random number */
02662 
02663 void ParallelHelperImpl::compute_independent_set()
02664 {
02665     int i, j, k, l;
02666     int incident_vtx;
02667     bool done;
02668 
02669     for( i = 0; i < num_vtx_partition_boundary_local; i++ )
02670         in_independent_set[i] = false;
02671 
02672     bool found_more = true;
02673     int found_iter  = 0;
02674 
02675     num_exportVtx = 0;
02676 
02677     while( found_more )
02678     {
02679         found_iter++;
02680         found_more = false;
02681         for( i = 0; i < num_vtx_partition_boundary_local; i++ )
02682         {
02683             /* if this vertex could become part of the independent set */
02684             if( part_smoothed_flag[i] == 0 && in_independent_set[i] == false )
02685             {
02686                 /* assume it's in the independent set */
02687                 done = false;
02688                 /* then loop over the neighbors it has on other processors */
02689                 for( j = 0; !done && j < (long)vtx_off_proc_list[i].size(); j++ )
02690                 {
02691                     incident_vtx = vtx_off_proc_list[i][j];
02692                     /* if this neighbour has not yet been smoothed and is not covered and ... */
02693                     if( part_smoothed_flag[incident_vtx] == 0 )
02694                     {
02695                         /* ... has a higher rand_number than me */
02696                         if( part_rand_number[i] < part_rand_number[incident_vtx] )
02697                         {
02698                             /* then I am not in the independent set */
02699                             done = true;
02700                             break;
02701                         }
02702                         /* ... or has the same rand_number than me but a higher processor id */
02703                         else if( ( part_rand_number[i] == part_rand_number[incident_vtx] ) &&
02704                                  ( rank < part_proc_owner[incident_vtx] ) )
02705                         {
02706                             /* then I am not in the independent set */
02707                             done = true;
02708                             break;
02709                         }
02710                     }
02711                 }
02712                 /* if the vertex is in the independent set, add it to the export list */
02713                 if( !done )
02714                 {
02715                     found_more = true;
02716                     //  if (found_iter > 1) printf("[%d]i%d found another one %d in iteration
02717                     //%d\n",rank,iteration,i, found_iter);
02718                     in_independent_set[i] = true;
02719                     /* mark vertices with lower random numbers as covered */
02720                     for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ )
02721                     {
02722                         incident_vtx = vtx_off_proc_list[i][j];
02723                         /* if this neighbour has not yet been smoothed or covered mark it as covered
02724                          */
02725                         if( part_smoothed_flag[incident_vtx] == 0 ) { part_smoothed_flag[incident_vtx] = 2; }
02726                     }
02727                     k = num_exportVtx;
02728                     /* then loop over the neighbors it has on other processors */
02729                     for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ )
02730                     {
02731                         incident_vtx = vtx_off_proc_list[i][j];
02732                         /* check to see if this processor already on the list */
02733                         done = false;
02734                         for( l = k; l < num_exportVtx && !done; l++ )
02735                         {
02736                             if( exportProc[l] == part_proc_owner[incident_vtx] ) { done = true; }
02737                         }
02738                         /* if it's not on the list add it */
02739                         if( !done )
02740                         {
02741                             exportVtxLIDs[num_exportVtx] = i;
02742                             exportVtxGIDs[num_exportVtx] = part_gid[i];
02743                             exportProc[num_exportVtx]    = part_proc_owner[incident_vtx];
02744                             num_exportVtx++;
02745                         }
02746                     }
02747                 }
02748             }
02749         }
02750     }
02751 
02752     if( false )
02753     {
02754         int in_set = 0;
02755         for( i = 0; i < num_vtx_partition_boundary_local; i++ )
02756             if( in_independent_set[i] ) in_set++;
02757         ;
02758         printf( "[%d]i%d independent set has %d of %d vertices sent out %d times\n", rank, iteration, in_set,
02759                 num_vtx_partition_boundary_local, num_exportVtx );
02760         fflush( NULL );
02761     }
02762 
02763     /* unmark the vertices that have been marked as covered */
02764     for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ )
02765         if( part_smoothed_flag[i] == 2 ) part_smoothed_flag[i] = 0;
02766 }
02767 
02768 int ParallelHelperImpl::get_rank() const
02769 {
02770     return rank;
02771 }
02772 
02773 int ParallelHelperImpl::get_nprocs() const
02774 {
02775     return nprocs;
02776 }
02777 
02778 bool ParallelHelperImpl::is_our_element( MBMesquite::Mesh::ElementHandle element_handle, MsqError& err ) const
02779 {
02780     int i;
02781     std::vector< Mesh::VertexHandle > pvertices;
02782     std::vector< size_t > junk;
02783     mesh->elements_get_attached_vertices( &element_handle, 1, pvertices, junk, err );
02784     MSQ_ERRZERO( err );
02785     int num_verts = pvertices.size();
02786     std::vector< int > proc_ids( num_verts );
02787     mesh->vertices_get_processor_id( ARRPTR( pvertices ), ARRPTR( proc_ids ), num_verts, err );
02788     MSQ_ERRZERO( err );
02789     int max_proc_id = proc_ids[0];
02790     for( i = 1; i < num_verts; i++ )
02791         if( max_proc_id < proc_ids[i] ) max_proc_id = proc_ids[i];
02792     return ( max_proc_id == rank );
02793 }
02794 
02795 bool ParallelHelperImpl::is_our_vertex( MBMesquite::Mesh::VertexHandle vertex_handle, MsqError& err ) const
02796 {
02797     int proc_id;
02798     mesh->vertices_get_processor_id( &vertex_handle, &proc_id, 1, err );
02799     return !MSQ_CHKERR( err ) && ( proc_id == rank );
02800 }
02801 
02802 void ParallelHelperImpl::communicate_min_max_to_all( double* minimum, double* maximum, MsqError& err ) const
02803 {
02804     double d_min[2];
02805     double d_min_recv[2];
02806     d_min[0] = -( *maximum );
02807     d_min[1] = *minimum;
02808     int rval = MPI_Allreduce( d_min, d_min_recv, 2, MPI_DOUBLE, MPI_MIN, (MPI_Comm)communicator );
02809     CHECK_MPI( rval, err );
02810     *maximum = -d_min_recv[0];
02811     *minimum = d_min_recv[1];
02812 }
02813 
02814 void ParallelHelperImpl::communicate_min_max_to_zero( double* minimum, double* maximum, MsqError& err ) const
02815 {
02816     double d_min[2];
02817     double d_min_recv[2];
02818     d_min[0] = -( *maximum );
02819     d_min[1] = *minimum;
02820     int rval = MPI_Reduce( d_min, d_min_recv, 2, MPI_DOUBLE, MPI_MIN, 0, (MPI_Comm)communicator );
02821     CHECK_MPI( rval, err );
02822     if( rank == 0 )
02823     {
02824         *maximum = -d_min_recv[0];
02825         *minimum = d_min_recv[1];
02826     }
02827 }
02828 
02829 void ParallelHelperImpl::communicate_sums_to_zero( size_t* freeElementCount, int* invertedElementCount,
02830                                                    size_t* elementCount, int* invertedSampleCount, size_t* sampleCount,
02831                                                    long unsigned int* count, long unsigned int* invalid, double* sum,
02832                                                    double* sqrSum, MsqError& err ) const
02833 {
02834     double d_sum[9];
02835     double d_sum_recv[9];
02836 
02837     d_sum[0] = (double)( *freeElementCount );
02838     d_sum[1] = (double)( *invertedElementCount );
02839     d_sum[2] = (double)( *elementCount );
02840     d_sum[3] = (double)( *invertedSampleCount );
02841     d_sum[4] = (double)( *sampleCount );
02842     d_sum[5] = (double)( *count );
02843     d_sum[6] = (double)( *invalid );
02844     d_sum[7] = *sum;
02845     d_sum[8] = *sqrSum;
02846 
02847     int rval = MPI_Reduce( d_sum, d_sum_recv, 9, MPI_DOUBLE, MPI_SUM, 0, (MPI_Comm)communicator );
02848     CHECK_MPI( rval, err );
02849 
02850     if( rank == 0 )
02851     {
02852         *freeElementCount     = (size_t)d_sum_recv[0];
02853         *invertedElementCount = (int)d_sum_recv[1];
02854         *elementCount         = (size_t)d_sum_recv[2];
02855         *invertedSampleCount  = (int)d_sum_recv[3];
02856         *sampleCount          = (size_t)d_sum_recv[4];
02857         *count                = (long unsigned int)d_sum_recv[5];
02858         *invalid              = (long unsigned int)d_sum_recv[6];
02859         *sum                  = d_sum_recv[7];
02860         *sqrSum               = d_sum_recv[8];
02861     }
02862 }
02863 
02864 void ParallelHelperImpl::communicate_power_sum_to_zero( double* pMean, MsqError& err ) const
02865 {
02866     double result;
02867     int rval = MPI_Reduce( pMean, &result, 1, MPI_DOUBLE, MPI_SUM, 0, (MPI_Comm)communicator );
02868     CHECK_MPI( rval, err );
02869     if( rank == 0 ) *pMean = result;
02870 }
02871 
02872 void ParallelHelperImpl::communicate_histogram_to_zero( std::vector< int >& histogram, MsqError& err ) const
02873 {
02874     std::vector< int > histogram_recv( histogram.size() );
02875     int rval = MPI_Reduce( &( histogram[0] ), &( histogram_recv[0] ), histogram.size(), MPI_INT, MPI_SUM, 0,
02876                            (MPI_Comm)communicator );
02877     CHECK_MPI( rval, err );
02878     if( rank == 0 ) { histogram.swap( histogram_recv ); }
02879 }
02880 
02881 /// if any proc has a true input @param value, return true to all
02882 void ParallelHelperImpl::communicate_any_true( bool& value, MsqError& err ) const
02883 {
02884     int byte_out = value, byte_in;
02885     int rval     = MPI_Allreduce( &byte_out, &byte_in, 1, MPI_INT, MPI_MAX, (MPI_Comm)communicator );
02886     CHECK_MPI( rval, err );
02887     value = ( byte_in != 0 );
02888 }
02889 
02890 /// srkenno AT sandia.gov 1/19/12: bug fix: changed from MPI_MAX to MIN - this makes the name of the
02891 /// function correct, if all proc's values are true, it returns true.  @see communicate_any_true
02892 /// above
02893 
02894 /// return true in @param value if all procs have value=true, else if any have it false, return
02895 /// false
02896 void ParallelHelperImpl::communicate_all_true( bool& value, MsqError& err ) const
02897 {
02898     int byte_out = value, byte_in;
02899     int rval     = MPI_Allreduce( &byte_out, &byte_in, 1, MPI_INT, MPI_MIN, (MPI_Comm)communicator );
02900     CHECK_MPI( rval, err );
02901     value = ( byte_in != 0 );
02902 }
02903 
02904 }  // namespace MBMesquite
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines