MOAB: Mesh Oriented datABase  (version 5.2.1)
ParallelHelper.cpp
Go to the documentation of this file.
00001 #include "ParallelHelper.hpp"
00002 
00003 #include <mpi.h>
00004 
00005 #include <stdio.h>
00006 #include <assert.h>
00007 #include <iostream>
00008 #include <stdlib.h>
00009 #include <algorithm>
00010 #include <time.h>
00011 
00012 #include "MsqVertex.hpp"
00013 #include "MsqError.hpp"
00014 
00015 #define VERTEX_HEADER              1
00016 #define VERTEX_BLOCK               1000
00017 #define GHOST_NODE_INFO            2000
00018 #define GHOST_NODE_VERTICES_WANTED 2001
00019 #define GHOST_NODE_VERTEX_GIDS     2002
00020 #define GHOST_NODE_VERTEX_UPDATES  2003
00021 
00022 #define ARRPTR( x ) arrptr( x, true )
00023 
00024 namespace MBMesquite
00025 {
00026 
00027 void parallel_barrier()
00028 {
00029     int is_init = 0;
00030     int err     = MPI_Initialized( &is_init );
00031     if( MPI_SUCCESS != err ) return;
00032     if( is_init ) MPI_Barrier( MPI_COMM_WORLD );
00033 }
00034 
00035 int get_parallel_rank()
00036 {
00037     int rank    = 0;
00038     int is_init = 0;
00039     int err     = MPI_Initialized( &is_init );
00040     if( MPI_SUCCESS != err ) return 0;
00041     if( is_init ) MPI_Comm_rank( MPI_COMM_WORLD, &rank );
00042     return rank;
00043 }
00044 
00045 int get_parallel_size()
00046 {
00047     int nprocs  = 0;
00048     int is_init = 0;
00049     int err     = MPI_Initialized( &is_init );
00050     if( MPI_SUCCESS != err ) return 0;
00051     if( is_init ) MPI_Comm_size( MPI_COMM_WORLD, &nprocs );
00052     return nprocs;
00053 }
00054 
00055 double reduce_parallel_max( double value )
00056 {
00057     int is_init = 0;
00058     int err     = MPI_Initialized( &is_init );
00059     if( MPI_SUCCESS != err ) return value;
00060     if( !is_init ) return value;
00061 
00062     double d_max[1];
00063     double d_max_recv[1];
00064     d_max[0] = value;
00065     int rval = MPI_Allreduce( d_max, d_max_recv, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
00066     if( MPI_SUCCESS != rval ) return value;
00067 
00068     return d_max_recv[0];
00069 }
00070 
00071 static const char* mpi_err_string( int error_code )
00072 {
00073     static char buffer[128];
00074     int len = sizeof( buffer );
00075     int e   = MPI_Error_string( error_code, buffer, &len );
00076     if( MPI_SUCCESS != e ) len = 0;
00077 
00078     if( len >= (int)sizeof( buffer ) ) len = sizeof( buffer ) - 1;
00079     buffer[len] = '\0';
00080     return buffer;
00081 }
00082 
00083 #define CHECK_MPI( RVAL, ERR )                                                                          \
00084     do                                                                                                  \
00085     {                                                                                                   \
00086         if( MPI_SUCCESS != ( RVAL ) )                                                                   \
00087         {                                                                                               \
00088             MSQ_SETERR( err )                                                                           \
00089             ( MsqError::UNKNOWN_ERROR, "MPI Error %d: %s", (int)( RVAL ), mpi_err_string( ( RVAL ) ) ); \
00090             return;                                                                                     \
00091         }                                                                                               \
00092     } while( false )
00093 
00094 #define CHECK_MPI_RZERO( RVAL, ERR )                                                                    \
00095     do                                                                                                  \
00096     {                                                                                                   \
00097         if( MPI_SUCCESS != ( RVAL ) )                                                                   \
00098         {                                                                                               \
00099             MSQ_SETERR( err )                                                                           \
00100             ( MsqError::UNKNOWN_ERROR, "MPI Error %d: %s", (int)( RVAL ), mpi_err_string( ( RVAL ) ) ); \
00101             return 0;                                                                                   \
00102         }                                                                                               \
00103     } while( false )
00104 
00105 static bool vertex_map_insert( VertexIdMap& map, size_t glob_id, int proc_id, int value )
00106 {
00107     VertexIdMapKey vid;
00108     vid.glob_id = glob_id;
00109     vid.proc_id = proc_id;
00110     return map.insert( VertexIdMap::value_type( vid, value ) ).second;
00111 }
00112 
00113 static int vertex_map_find( const VertexIdMap& map, size_t glob_id, int proc_id )
00114 {
00115     VertexIdMapKey vid;
00116     vid.glob_id                             = glob_id;
00117     vid.proc_id                             = proc_id;
00118     VertexIdMap::const_iterator map_element = map.find( vid );
00119     if( map_element == map.end() ) { return 0; }
00120     else
00121     {
00122         return map_element->second;
00123     }
00124 }
00125 
00126 static void my_quicksort( int* a, size_t* b, MBMesquite::Mesh::VertexHandle* c, int i, int j )
00127 {
00128     int in_i = i;
00129     int in_j = j;
00130     int wa;
00131     size_t wb;
00132     MBMesquite::Mesh::VertexHandle w1;
00133     int key = a[( i + j ) / 2];
00134     do
00135     {
00136         while( a[i] < key )
00137             i++;
00138         while( a[j] > key )
00139             j--;
00140         if( i < j )
00141         {
00142             wa   = a[i];
00143             a[i] = a[j];
00144             a[j] = wa;
00145             wb   = b[i];
00146             b[i] = b[j];
00147             b[j] = wb;
00148             w1   = c[i];
00149             c[i] = c[j];
00150             c[j] = w1;
00151         }
00152     } while( ++i <= --j );
00153     if( i == j + 3 )
00154     {
00155         i--;
00156         j++;
00157     }
00158     if( j > in_i ) my_quicksort( a, b, c, in_i, j );
00159     if( i < in_j ) my_quicksort( a, b, c, i, in_j );
00160 }
00161 
00162 static int hash6432shift( unsigned long long key )
00163 {
00164     key = ( ~key ) + ( key << 18 );  // key = (key << 18) - key - 1;
00165     key = key ^ ( key >> 31 );
00166     key = key * 21;  // key = (key + (key << 2)) + (key << 4);
00167     key = key ^ ( key >> 11 );
00168     key = key + ( key << 6 );
00169     key = key ^ ( key >> 22 );
00170     return (int)key;
00171 }
00172 
00173 static unsigned long long hash64shift( unsigned long long key )
00174 {
00175     key = ( ~key ) + ( key << 21 );  // key = (key << 21) - key - 1;
00176     key = key ^ ( key >> 24 );
00177     key = ( key + ( key << 3 ) ) + ( key << 8 );  // key * 265
00178     key = key ^ ( key >> 14 );
00179     key = ( key + ( key << 2 ) ) + ( key << 4 );  // key * 21
00180     key = key ^ ( key >> 28 );
00181     key = key + ( key << 31 );
00182     return key;
00183 }
00184 
00185 static double generate_random_number( int generate_random_numbers, int proc_id, size_t glob_id )
00186 {
00187     int gid;
00188 
00189     if( sizeof( size_t ) == sizeof( unsigned long long ) ) { gid = hash6432shift( (unsigned long long)glob_id ); }
00190     else
00191     {
00192         gid = (int)glob_id;
00193     }
00194 
00195     if( generate_random_numbers == 1 )
00196     {
00197         // count number of on bits
00198         int on            = 0;
00199         unsigned int mist = (unsigned int)gid;
00200         while( mist )
00201         {
00202             if( mist & 1 ) on++;
00203             mist = mist >> 1;
00204         }
00205         unsigned short xsubi[3];
00206         if( on & 1 )
00207             mist = (unsigned int)gid;
00208         else
00209             mist = (unsigned int)( -gid );
00210         if( on & 2 )
00211         {
00212             xsubi[0] = (unsigned short)( 65535 & mist );
00213             xsubi[1] = (unsigned short)( 65535 & ( mist >> 16 ) );
00214         }
00215         else
00216         {
00217             xsubi[1] = (unsigned short)( 65535 & mist );
00218             xsubi[0] = (unsigned short)( 65535 & ( mist >> 16 ) );
00219         }
00220         xsubi[2] = proc_id;
00221 
00222 #if( defined WIN32 || defined WIN64 )
00223         time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 );
00224         srand( (int)seed );
00225         return rand() / ( RAND_MAX + 1.0 );
00226 #else
00227         return erand48( xsubi );
00228 #endif
00229     }
00230     else if( generate_random_numbers == 2 )
00231     {
00232         // count number of on bits
00233         int on            = 0;
00234         unsigned int mist = (unsigned int)gid;
00235         while( mist )
00236         {
00237             if( mist & 1 ) on++;
00238             mist = mist >> 1;
00239         }
00240         unsigned short xsubi[3];
00241         if( on & 1 )
00242             mist = (unsigned int)gid;
00243         else
00244             mist = (unsigned int)( -gid );
00245         if( on & 2 )
00246         {
00247             xsubi[0] = (unsigned short)( 65535 & mist );
00248             xsubi[1] = (unsigned short)( 65535 & ( mist >> 16 ) );
00249         }
00250         else
00251         {
00252             xsubi[1] = (unsigned short)( 65535 & mist );
00253             xsubi[0] = (unsigned short)( 65535 & ( mist >> 16 ) );
00254         }
00255         xsubi[2] = proc_id ^ xsubi[1];
00256 
00257 #if( defined WIN32 || defined WIN64 )
00258         time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 );
00259         srand( (int)seed );
00260         return rand() / ( RAND_MAX + 1.0 );
00261 #else
00262         return erand48( xsubi );
00263 #endif
00264     }
00265     else if( generate_random_numbers == 3 )
00266     {
00267         unsigned long long key = (unsigned long long)gid;
00268         key                    = key << 32;
00269         key                    = key | proc_id;
00270         return (double)hash64shift( key );
00271     }
00272     else if( generate_random_numbers == 4 )
00273     {
00274         unsigned long long key = (unsigned long long)gid;
00275         key                    = key << 32;
00276         key                    = key | proc_id;
00277         key                    = hash64shift( key );
00278         unsigned short xsubi[3];
00279         xsubi[0] = (unsigned short)( key >> 48 );
00280         xsubi[1] = (unsigned short)( key >> 32 );
00281         xsubi[2] = (unsigned short)( key >> 16 );
00282 
00283 #if( defined WIN32 || defined WIN64 )
00284         time_t seed = ( gid + xsubi[1] ) / ( ( time( NULL ) % 10 ) + 1 );
00285         srand( (int)seed );
00286         return rand() / ( RAND_MAX + 1.0 );
00287 #else
00288         return erand48( xsubi );
00289 #endif
00290     }
00291     else
00292     {
00293         return -1;
00294     }
00295 }
00296 
00297 ParallelHelperImpl::ParallelHelperImpl()
00298 {
00299     mesh = 0;
00300     set_communicator( MPI_COMM_WORLD );
00301     communication_model = TrulyNonBlockingAvoidAllReduce;
00302 
00303     MPI_Comm_rank( (MPI_Comm)communicator, &rank );
00304     MPI_Comm_size( (MPI_Comm)communicator, &nprocs );
00305 
00306     // variable defaults for VertexMover::loop_over_mesh()
00307     generate_random_numbers = 2;
00308 
00309     // memory chunks referenced for VertexMover::loop_over_mesh()
00310     vertices.clear();
00311     vtx_in_partition_boundary.clear();
00312     part_vertices.clear();
00313     unghost_vertices.clear();
00314     part_proc_owner.clear();
00315     part_gid.clear();
00316     part_smoothed_flag.clear();
00317     part_rand_number.clear();
00318     exportVtxGIDs.clear();
00319     exportVtxLIDs.clear();
00320     exportProc.clear();
00321     in_independent_set.clear();
00322     vid_map.clear();
00323     neighbourProcSend.clear();
00324     neighbourProcRecv.clear();
00325     neighbourProcSendRemain.clear();
00326     neighbourProcRecvRemain.clear();
00327     vtx_off_proc_list.clear();
00328     neighbourProc.clear();
00329 }
00330 
00331 ParallelHelperImpl::~ParallelHelperImpl() {}
00332 
00333 void ParallelHelperImpl::set_parallel_mesh( ParallelMesh* pmesh )
00334 {
00335     this->mesh = pmesh;
00336 }
00337 
00338 void ParallelHelperImpl::set_communicator( size_t comm )
00339 {
00340     communicator = comm;
00341     MPI_Comm_rank( (MPI_Comm)communicator, &rank );
00342     MPI_Comm_size( (MPI_Comm)communicator, &nprocs );
00343 }
00344 
00345 void ParallelHelperImpl::set_communication_model( int model, MsqError& )
00346 {
00347     communication_model = model;
00348 }
00349 
00350 void ParallelHelperImpl::set_generate_random_numbers( int grn, MsqError& )
00351 {
00352     this->generate_random_numbers = grn;
00353 }
00354 
00355 void ParallelHelperImpl::smoothing_init( MsqError& err )
00356 {
00357 
00358     int i, j, k, rval;
00359     size_t l;
00360 
00361     if( !mesh )
00362     {
00363         MSQ_SETERR( err )( MsqError::INVALID_STATE );
00364         return;
00365     }
00366     if( nprocs == 1 ) return;
00367 
00368     /* get the vertices */
00369     mesh->get_all_vertices( vertices, err );MSQ_ERRRTN( err );
00370     num_vertex = vertices.size();
00371 
00372     /* allocate the data arrays we'll use for smoothing */
00373     std::vector< size_t > gid( num_vertex );
00374     std::vector< int > proc_owner( num_vertex );
00375     std::vector< unsigned char > app_fixed( num_vertex );
00376 
00377     /* get the data from the mesquite mesh */
00378     mesh->vertices_get_global_id( ARRPTR( vertices ), ARRPTR( gid ), num_vertex, err );MSQ_ERRRTN( err );
00379     mesh->vertices_get_byte( ARRPTR( vertices ), ARRPTR( app_fixed ), num_vertex, err );MSQ_ERRRTN( err );
00380     mesh->vertices_get_processor_id( ARRPTR( vertices ), ARRPTR( proc_owner ), num_vertex, err );MSQ_ERRRTN( err );
00381     if( 0 )
00382     {
00383         int ncull = 0;
00384         for( i = 0; i < num_vertex; ++i )
00385         {
00386             if( app_fixed[i] & MsqVertex::MSQ_CULLED ) { ++ncull; }
00387         }
00388         std::cout << "P[" << rank << "] smoothing_init ncull= " << ncull << " num_vertex= " << num_vertex << std::endl;
00389     }
00390 
00391     /* only interested in fixed flag from vertex byte? Clear others. */
00392     // srkenno AT sandia.gov 1/19/12: bug fix: changed from |= which makes all vertices fixed
00393     for( i = 0; i < num_vertex; ++i )
00394         app_fixed[i] &= MsqVertex::MSQ_HARD_FIXED;
00395 
00396     /* create temporary Tag for the local IDs */
00397     std::vector< int > lid( num_vertex );
00398     for( i = 0; i < num_vertex; i++ )
00399         lid[i] = i;
00400     const char LOCAL_ID_NAME[] = "LOCAL_ID";
00401     TagHandle lid_tag          = mesh->tag_create( LOCAL_ID_NAME, Mesh::INT, 1, NULL, err );MSQ_ERRRTN( err );
00402     mesh->tag_set_vertex_data( lid_tag, num_vertex, ARRPTR( vertices ), ARRPTR( lid ), err );MSQ_ERRRTN( err );
00403 
00404     if( 0 ) printf( "[%d] set local tags on %d vertices\n", rank, num_vertex );
00405 
00406     /* get the elements */
00407     std::vector< MBMesquite::Mesh::ElementHandle > elements;
00408     mesh->get_all_elements( elements, err );MSQ_ERRRTN( err );
00409     int num_elems = elements.size();
00410 
00411     /****************************************************************
00412     PARTITION BOUNDARY VERTEX DETERMINATION
00413     ***********************************************************************/
00414 
00415     /* initialize the vertex partition boundary array */
00416     vtx_in_partition_boundary.clear();
00417     vtx_in_partition_boundary.resize( num_vertex, 0 );
00418     int incident_vtx, vtx_off_proc, vtx_on_proc;
00419 
00420     /* get the array that contains the adjacent vertices for each mesh element */
00421     std::vector< MBMesquite::Mesh::VertexHandle > adj_vertices;
00422     std::vector< size_t > vtx_offsets;
00423     mesh->elements_get_attached_vertices( ARRPTR( elements ), num_elems, adj_vertices, vtx_offsets, err );
00424     std::vector< int > adj_vertices_lid( adj_vertices.size() );
00425     mesh->tag_get_vertex_data( lid_tag, adj_vertices.size(), ARRPTR( adj_vertices ), ARRPTR( adj_vertices_lid ), err );
00426 
00427     if( 0 ) printf( "[%d] gotten adjacent elements for %d elements\n", rank, num_elems );
00428 
00429     /* determine which vertices are smoothed as part of the boundary (and which are unused ghost
00430      * vertices) */
00431     num_vtx_partition_boundary_local  = 0;
00432     num_vtx_partition_boundary_remote = 0;
00433     for( i = 0; i < num_elems; i++ )
00434     {
00435         /* count how many vertices of the current element are on/off a different processor */
00436         vtx_off_proc = 0;
00437         vtx_on_proc  = 0;
00438         for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ )
00439         {
00440             incident_vtx = adj_vertices_lid[j];
00441             /* obviously the vertex only counts if it is not app_fixed */
00442             if( !app_fixed[incident_vtx] )
00443             {
00444                 if( proc_owner[incident_vtx] != rank ) { vtx_off_proc++; }
00445                 else
00446                 {
00447                     vtx_on_proc++;
00448                 }
00449             }
00450         }
00451 
00452         /* if vertices are on different processors mark all local vertices as boundary1 and all
00453          * remote vertices as boundary2 */
00454         if( vtx_off_proc > 0 && vtx_on_proc > 0 )
00455         {
00456             /* collect stats */
00457             //  smooth_stats.num_part_bndy_elem++;
00458             /* mark the vertices */
00459             for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ )
00460             {
00461                 incident_vtx = adj_vertices_lid[j];
00462                 /* obviously the vertex does not need to be marked if it was already marked or if it
00463                  * is app_fixed*/
00464                 if( vtx_in_partition_boundary[incident_vtx] <= 0 && app_fixed[incident_vtx] == 0 )
00465                 {
00466                     /* mark and count the vertex */
00467                     if( proc_owner[incident_vtx] != rank )
00468                     {
00469                         vtx_in_partition_boundary[incident_vtx] = 2;
00470                         num_vtx_partition_boundary_remote++;
00471                     }
00472                     else
00473                     {
00474                         vtx_in_partition_boundary[incident_vtx] = 1;
00475                         num_vtx_partition_boundary_local++;
00476                     }
00477                 }
00478             }
00479         }
00480         else if( vtx_off_proc > 0 )
00481         {
00482             /* mark the vertices as boundary-1 (aka unused ghost) if the element has only
00483              * off-processor vertices */
00484             for( j = vtx_offsets[i]; j < (int)( vtx_offsets[i + 1] ); j++ )
00485             {
00486                 incident_vtx = adj_vertices_lid[j];
00487                 /* obviously the vertex is not marked if it was already marked or if it is
00488                  * app_fixed*/
00489                 if( vtx_in_partition_boundary[incident_vtx] == 0 && app_fixed[incident_vtx] == 0 )
00490                 { vtx_in_partition_boundary[incident_vtx] = -1; }
00491             }
00492         }
00493     }
00494 
00495     if( 0 )
00496     {
00497         printf( "[%d]i%d local %d remote %d ", rank, iteration, num_vtx_partition_boundary_local,
00498                 num_vtx_partition_boundary_remote );
00499         printf( "[%d]i%d pb1 ", rank, iteration );
00500         for( i = 0; i < num_vertex; i++ )
00501             if( vtx_in_partition_boundary[i] == 1 ) printf( "%d,%lu ", i, gid[i] );
00502         printf( "\n" );
00503         printf( "[%d]i%d pb2 ", rank, iteration );
00504         for( i = 0; i < num_vertex; i++ )
00505             if( vtx_in_partition_boundary[i] == 2 ) printf( "%d,%lu ", i, gid[i] );
00506         printf( "\n" );
00507         fflush( NULL );
00508     }
00509 
00510     num_vtx_partition_boundary = num_vtx_partition_boundary_local + num_vtx_partition_boundary_remote;
00511 
00512     /********************************************************************
00513    COLLECT THE PARTITION BOUNDARY VERTICES AND THE UNUSED GHOST VERTICES
00514     ********************************************************************/
00515 
00516     /* create the vectors to store the partition boundary vertex data */
00517     part_vertices.resize( num_vtx_partition_boundary );
00518     part_proc_owner.resize( num_vtx_partition_boundary );
00519     part_gid.resize( num_vtx_partition_boundary );
00520     part_smoothed_flag.resize( num_vtx_partition_boundary );
00521     part_rand_number.resize( num_vtx_partition_boundary );
00522 
00523     /* create the partition boundary map and its inverse */
00524     std::vector< int > vtx_partition_boundary_map_inverse( num_vertex );
00525 
00526     j = 0;
00527     /* first we map the partition boundary vertices that we will smooth on this processor */
00528     for( i = 0; i < num_vertex; i++ )
00529     {
00530         if( vtx_in_partition_boundary[i] == 1 )
00531         {
00532             part_vertices[j]   = vertices[i];
00533             part_proc_owner[j] = rank;
00534             assert( proc_owner[i] == rank );
00535             part_gid[j]                           = gid[i];
00536             vtx_partition_boundary_map_inverse[i] = j;
00537             j++;
00538         }
00539     }
00540 
00541     vid_map.clear();
00542 
00543     /* then we map the ghost vertices that will be smoothed on other processors */
00544     for( i = 0; i < num_vertex; i++ )
00545     {
00546         if( vtx_in_partition_boundary[i] == 2 )
00547         {
00548             part_vertices[j]   = vertices[i];
00549             part_proc_owner[j] = proc_owner[i];
00550             assert( proc_owner[i] != rank );
00551             part_gid[j]                           = gid[i];
00552             vtx_partition_boundary_map_inverse[i] = j;
00553             /* only insert those vertices in the map that are smoothed on other processors */
00554             vertex_map_insert( vid_map, part_gid[j], part_proc_owner[j], j );
00555             // printf("[%d] inserting vertex with gid %u and pid %d \n", rank, part_gid[j],
00556             // part_proc_owner[j]);
00557             j++;
00558         }
00559     }
00560 
00561     /* create our own 'very pseudo random' numbers */
00562     for( i = 0; i < num_vtx_partition_boundary; i++ )
00563         part_rand_number[i] = generate_random_number( generate_random_numbers, part_proc_owner[i], part_gid[i] );
00564 
00565     /* count the number of unused ghost vertices */
00566     unghost_num_vtx = 0;
00567     for( i = 0; i < num_vertex; i++ )
00568     {
00569         if( vtx_in_partition_boundary[i] == -1 ) { unghost_num_vtx++; }
00570     }
00571     // printf("[%d] found %d unused ghost vertices (local %d remote %d)\n",rank, unghost_num_vtx,
00572     // num_vtx_partition_boundary_local,num_vtx_partition_boundary_remote);
00573 
00574     /* create the vectors to store the unused ghost vertices */
00575     unghost_vertices.resize( unghost_num_vtx );
00576     std::vector< int > unghost_proc_owner( unghost_num_vtx );
00577     std::vector< size_t > unghost_gid( unghost_num_vtx );
00578 
00579     /* store the unused ghost vertices that are copies of vertices from other processors and will
00580      * need to be received */
00581     j = 0;
00582     for( i = 0; i < num_vertex; i++ )
00583     {
00584         if( vtx_in_partition_boundary[i] == -1 )
00585         {
00586             unghost_vertices[j]   = vertices[i];
00587             unghost_proc_owner[j] = proc_owner[i];
00588             assert( proc_owner[i] != rank );
00589             unghost_gid[j] = gid[i];
00590             // printf(" %d", unghost_gid[j]);
00591             j++;
00592         }
00593     }
00594 
00595     /* no longer needed */
00596     // delete [] gid; gid = 0;
00597     // delete [] proc_owner; proc_owner = 0;
00598 
00599     unghost_num_procs = 0;
00600     unghost_procs.clear();
00601     unghost_procs_offset.clear();
00602     unghost_procs_num_vtx.clear();
00603     if( unghost_num_vtx )
00604     {
00605         /* sort the unused ghost vertices by processor */
00606         my_quicksort( ARRPTR( unghost_proc_owner ), ARRPTR( unghost_gid ), &( unghost_vertices[0] ), 0,
00607                       unghost_num_vtx - 1 );
00608 
00609         /* count the number of processors we have unused ghost data from that we want to get updated
00610          */
00611         unghost_num_procs = 1;
00612         for( i = 1; i < unghost_num_vtx; i++ )
00613         {
00614             if( unghost_proc_owner[i - 1] != unghost_proc_owner[i] ) unghost_num_procs++;
00615         }
00616 
00617         /* get the ids of those processors and the number of vertices we want from each */
00618         unghost_procs.resize( unghost_num_procs );
00619         unghost_procs_offset.resize( unghost_num_procs + 1 );
00620         unghost_procs_num_vtx.resize( unghost_num_procs );
00621         unghost_procs[0]        = unghost_proc_owner[0];
00622         unghost_procs_offset[0] = 0;
00623         for( i = 1, j = 1; i < unghost_num_vtx; i++ )
00624         {
00625             if( unghost_proc_owner[i - 1] != unghost_proc_owner[i] )
00626             {
00627                 unghost_procs[j]             = unghost_proc_owner[i];
00628                 unghost_procs_offset[j]      = i;
00629                 unghost_procs_num_vtx[j - 1] = unghost_procs_offset[j] - unghost_procs_offset[j - 1];
00630                 assert( unghost_procs_num_vtx[j - 1] > 0 );
00631                 j++;
00632             }
00633         }
00634         unghost_procs_offset[j]      = i;
00635         unghost_procs_num_vtx[j - 1] = unghost_procs_offset[j] - unghost_procs_offset[j - 1];
00636         assert( unghost_procs_num_vtx[j - 1] > 0 );
00637         assert( j == unghost_num_procs );
00638 
00639         // delete [] unghost_proc_owner; unghost_proc_owner = 0;
00640 
00641         // printf("[%d] have ugns from %d processor(s) (%d,%d,%d)\n", rank, unghost_num_procs,
00642         // unghost_procs[0], (unghost_num_procs>1) ? unghost_procs[1] : -1, (unghost_num_procs>2) ?
00643         // unghost_procs[1] : -1);
00644     }
00645 
00646     /* this will eventually store to how many processors each processor needs to send unused ghost
00647      * data to */
00648     std::vector< int > num_sends_of_unghost;
00649 
00650     /* gather the information about how many processors need ghost data */
00651     if( rank == 0 )
00652     {
00653         /* this will eventually store to how many processors each processor needs to send unused
00654          * ghost data to */
00655         num_sends_of_unghost.resize( nprocs );
00656     }
00657     /* temporary used for the initial gather in which each proc tells the root from how many procs
00658      * it wants unused ghost data updates */
00659     rval = MPI_Gather( &unghost_num_procs, 1, MPI_INT, ARRPTR( num_sends_of_unghost ), 1, MPI_INT, 0,
00660                        (MPI_Comm)communicator );
00661     CHECK_MPI( rval, err );
00662 
00663     /* now each processor tells the root node from which processors they want unused ghost nodes
00664      * information */
00665     if( rank == 0 )
00666     {
00667         int procs_num = 0;
00668         int procs_max = 0;
00669         /* first count how many processors will send unused ghost node info and find the maximum
00670          * they will send */
00671         for( i = 1; i < nprocs; i++ )
00672         {
00673             if( num_sends_of_unghost[i] )
00674             {
00675                 procs_num++;
00676                 if( num_sends_of_unghost[i] > procs_max ) procs_max = num_sends_of_unghost[i];
00677             }
00678         }
00679         /* clean the temporary used array */
00680         for( i = 0; i < nprocs; i++ )
00681             num_sends_of_unghost[i] = 0;
00682         /* process rank 0's unused ghost nodes procs first */
00683         for( j = 0; j < unghost_num_procs; j++ )
00684         {
00685             num_sends_of_unghost[unghost_procs[j]]++;
00686         }
00687         /* now we process messages from all other processors that want unused ghost node information
00688          */
00689         int* unghost_procs_array = new int[procs_max];
00690         for( i = 0; i < procs_num; i++ )
00691         {
00692             MPI_Status status;
00693             rval = MPI_Recv( unghost_procs_array, procs_max, MPI_INT, MPI_ANY_SOURCE, GHOST_NODE_INFO,
00694                              (MPI_Comm)communicator, &status );
00695             CHECK_MPI( rval, err );
00696             int count;
00697             MPI_Get_count( &status, MPI_INT, &count );
00698             for( j = 0; j < count; j++ )
00699             {
00700                 num_sends_of_unghost[unghost_procs_array[j]]++;
00701             }
00702         }
00703         delete[] unghost_procs_array;
00704     }
00705     else
00706     {
00707         if( unghost_num_vtx )
00708         {
00709             rval = MPI_Send( ARRPTR( unghost_procs ), unghost_num_procs, MPI_INT, 0, GHOST_NODE_INFO,
00710                              (MPI_Comm)communicator );
00711             CHECK_MPI( rval, err );
00712         }
00713     }
00714 
00715     /* now the root node knows for each processor on how many other processors they have ghost nodes
00716      * (which need updating) */
00717     /* the scatter distributes this information to each processor */
00718     rval = MPI_Scatter( ARRPTR( num_sends_of_unghost ), 1, MPI_INT, &update_num_procs, 1, MPI_INT, 0,
00719                         (MPI_Comm)communicator );
00720     CHECK_MPI( rval, err );
00721 
00722     // if (rank == 0) delete [] num_sends_of_unghost;
00723 
00724     // printf("[%d] i have unused ghost nodes from %d procs and i need to send updates to %d
00725     // procs\n", rank, unghost_num_procs, update_num_procs);
00726 
00727     /* now the processors can negotiate amongst themselves: */
00728 
00729     /* first tell each processor the number of unused ghost nodes we want from them */
00730     std::vector< MPI_Request > requests_unghost( unghost_num_procs );
00731     for( j = 0; j < unghost_num_procs; j++ )
00732     {
00733         rval = MPI_Isend( &( unghost_procs_num_vtx[j] ), 1, MPI_INT, unghost_procs[j], GHOST_NODE_VERTICES_WANTED,
00734                           (MPI_Comm)communicator, &( requests_unghost[j] ) );
00735         CHECK_MPI( rval, err );
00736     }
00737 
00738     /* then listen to as many processors as there are that want updates from us */
00739     std::vector< MPI_Request > requests_updates( update_num_procs );
00740     update_procs_num_vtx.resize( update_num_procs );
00741     for( j = 0; j < update_num_procs; j++ )
00742     {
00743         rval = MPI_Irecv( &( update_procs_num_vtx[j] ), 1, MPI_INT, MPI_ANY_SOURCE, GHOST_NODE_VERTICES_WANTED,
00744                           (MPI_Comm)communicator, &( requests_updates[j] ) );
00745         CHECK_MPI( rval, err );
00746     }
00747 
00748     /* wait until we have heard from all processors how many ghost nodes updates they want from us
00749      */
00750     std::vector< MPI_Status > status_update( update_num_procs );
00751     update_procs.resize( update_num_procs );
00752     rval = MPI_Waitall( update_num_procs, ARRPTR( requests_updates ), ARRPTR( status_update ) );
00753     CHECK_MPI( rval, err );
00754     for( j = 0; j < update_num_procs; j++ )
00755     {
00756         update_procs[j] = status_update[j].MPI_SOURCE;
00757         // printf("[%d] i have to send %d vertices to processor %d\n", rank,
00758         // update_procs_num_vtx[j], update_procs[j]);
00759     }
00760 
00761     /* count the total number of vertices that we need to update elsewhere */
00762     update_procs_offset.resize( update_num_procs + 1 );
00763     update_num_vtx         = 0;
00764     update_procs_offset[0] = 0;
00765     for( j = 0; j < update_num_procs; j++ )
00766     {
00767         update_num_vtx += update_procs_num_vtx[j];
00768         update_procs_offset[j + 1] = update_num_vtx;
00769     }
00770 
00771     /* create enough space to receive all the vertex indices */
00772     update_gid.resize( update_num_vtx );
00773 
00774     /* tell each processor which vertices we want from them */
00775     for( j = 0; j < unghost_num_procs; j++ )
00776     {
00777         rval = MPI_Isend( &( unghost_gid[unghost_procs_offset[j]] ), unghost_procs_num_vtx[j],
00778                           sizeof( size_t ) == 4 ? MPI_INT : MPI_DOUBLE, unghost_procs[j], GHOST_NODE_VERTEX_GIDS,
00779                           (MPI_Comm)communicator, &( requests_unghost[j] ) );
00780         CHECK_MPI( rval, err );
00781     }
00782 
00783     /* receive from each processor the info which vertices they want from us */
00784     for( j = 0; j < update_num_procs; j++ )
00785     {
00786         rval = MPI_Irecv( &( update_gid[update_procs_offset[j]] ), update_procs_num_vtx[j],
00787                           sizeof( size_t ) == 4 ? MPI_INT : MPI_DOUBLE, update_procs[j], GHOST_NODE_VERTEX_GIDS,
00788                           (MPI_Comm)communicator, &( requests_updates[j] ) );
00789         CHECK_MPI( rval, err );
00790     }
00791 
00792     /* wait until we have heard from all processors which vertices they want from us */
00793     rval = MPI_Waitall( update_num_procs, ARRPTR( requests_updates ), ARRPTR( status_update ) );
00794     CHECK_MPI( rval, err );
00795 
00796     /* wait until we have sent to all processors which vertices we want from them */
00797     std::vector< MPI_Status > status_unghost( unghost_num_procs );
00798     rval = MPI_Waitall( unghost_num_procs, ARRPTR( requests_unghost ), ARRPTR( status_unghost ) );
00799     CHECK_MPI( rval, err );
00800 
00801     /*
00802     for (j = 0; j < update_num_procs; j++)
00803     {
00804       printf("[%d] will send to proc %d:", rank, update_procs[j]);
00805       for (i = update_procs_offset[j]; i < update_procs_offset[j+1]; i++) printf(" %d",
00806     update_gid[i]); printf("\n");
00807     }
00808     */
00809 
00810     /***********************************************************************
00811    COMPUTE THE SET OF NEIGHBORS THAT OUR VERTICES HAVE ON OTHER PROCESSORS
00812                  COMPUTE THE SET OF NEIGHBORS PROCESSORS
00813     ***********************************************************************/
00814 
00815     if( num_vtx_partition_boundary_local == 0 )
00816     {
00817         /* this processor does not partake in the boundary smoothing */
00818         neighbourProc.clear();
00819         mesh->tag_delete( lid_tag, err );
00820         vtx_partition_boundary_map_inverse.clear();
00821         return;
00822     }
00823 
00824     /* init the neighbour processor list */
00825     neighbourProc.clear();
00826 
00827     /* init the neighbour lists */
00828 
00829     vtx_off_proc_list.clear();
00830     vtx_off_proc_list.resize( num_vtx_partition_boundary_local );
00831 
00832     /* get the adjacency arrays that we need */
00833     std::vector< MBMesquite::Mesh::ElementHandle > adj_elements;
00834     std::vector< MBMesquite::Mesh::VertexHandle > adj_adj_vertices;
00835     std::vector< size_t > elem_offsets;
00836     std::vector< size_t > adj_vtx_offsets;
00837     mesh->vertices_get_attached_elements( ARRPTR( part_vertices ), num_vtx_partition_boundary_local, adj_elements,
00838                                           elem_offsets, err );
00839     mesh->elements_get_attached_vertices( ARRPTR( adj_elements ), adj_elements.size(), adj_adj_vertices,
00840                                           adj_vtx_offsets, err );
00841     // delete adj_elements; adj_elements = 0;
00842     std::vector< int > adj_adj_vertices_lid( adj_adj_vertices.size() );
00843     mesh->tag_get_vertex_data( lid_tag, adj_adj_vertices.size(), ARRPTR( adj_adj_vertices ),
00844                                ARRPTR( adj_adj_vertices_lid ), err );
00845     // delete adj_adj_vertices; adj_adj_vertices = 0;
00846     mesh->tag_delete( lid_tag, err );
00847 
00848     for( i = 0; i < num_vtx_partition_boundary_local; i++ )
00849     {
00850         /* loop over the elements surrounding that vertex */
00851         for( j = elem_offsets[i]; j < (int)( elem_offsets[i + 1] ); j++ )
00852         {
00853             /* loop over the neighbors of the considered vertex (i.e. the vertices of these element)
00854              */
00855             for( k = adj_vtx_offsets[j]; k < (int)( adj_vtx_offsets[j + 1] ); k++ )
00856             {
00857                 /* get the next neighbour */
00858                 incident_vtx = adj_adj_vertices_lid[k];
00859                 /* if this neighbour is a vertex that is smoothed on a different processor */
00860                 if( vtx_in_partition_boundary[incident_vtx] == 2 )
00861                 {
00862                     /* then map it into our domain */
00863                     incident_vtx = vtx_partition_boundary_map_inverse[incident_vtx];
00864                     /* is this vertex already in our neighbour list ? */
00865                     if( std::find( vtx_off_proc_list[i].begin(), vtx_off_proc_list[i].end(), incident_vtx ) ==
00866                         vtx_off_proc_list[i].end() )
00867                     {
00868                         /* if the vertex is not in the list yet ... add it */
00869                         vtx_off_proc_list[i].push_back( incident_vtx );
00870                         /* is the processor of this vertex already in the processor list */
00871                         incident_vtx = part_proc_owner[incident_vtx];
00872                         /* check by scanning the list for this processor */
00873                         if( std::find( neighbourProc.begin(), neighbourProc.end(), incident_vtx ) ==
00874                             neighbourProc.end() )
00875                         {
00876                             /* the processor is not in the list yet ... add it */
00877                             neighbourProc.push_back( incident_vtx );
00878                         }
00879                     }
00880                 }
00881             }
00882         }
00883     }
00884 
00885     /* sort the list of neighbour processors */
00886 
00887     std::sort( neighbourProc.begin(), neighbourProc.end() );
00888 
00889     /***********************************************************************
00890       COMPUTE HOW MANY VERTICES WE NEED TO SEND/RECV FROM EACH PROCESSOR
00891     ***********************************************************************/
00892     neighbourProcSend.clear();
00893     neighbourProcRecv.clear();
00894     neighbourProcSendRemain.clear();
00895     neighbourProcRecvRemain.clear();
00896 
00897     if( communication_model & 1 )  // AVOID_ALL_REDUCE
00898     {
00899         total_num_vertices_to_smooth = num_vtx_partition_boundary_local;
00900         total_num_vertices_to_recv   = num_vtx_partition_boundary_remote;
00901         neighbourProcSend.resize( neighbourProc.size(), 0 );
00902         neighbourProcRecv.resize( neighbourProc.size(), 0 );
00903 
00904         /* for each vertex we smooth find the processors we need to send it too */
00905         for( i = 0; i < num_vtx_partition_boundary_local; i++ )
00906         {
00907             /* loop over its adjacent off-processor vertices */
00908             for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ )
00909             {
00910                 /* get the processor id of these vertices */
00911                 incident_vtx = part_proc_owner[vtx_off_proc_list[i][j]];
00912                 /* check if we got this processor id before */
00913                 for( k = 0; k < j; k++ )
00914                 {
00915                     if( incident_vtx == part_proc_owner[vtx_off_proc_list[i][k]] )
00916                     {
00917                         /* if we have has this procesor id already we do not need to count it again
00918                          */
00919                         incident_vtx = -1;
00920                         break;
00921                     }
00922                 }
00923                 /* if this was a new processor id */
00924                 if( incident_vtx != -1 )
00925                 {
00926                     /* find the processor in the list and increment its counter */
00927                     for( l = 0; l < neighbourProc.size(); l++ )
00928                     {
00929                         if( neighbourProc[l] == incident_vtx )
00930                         {
00931                             neighbourProcSend[l]++;
00932                             break;
00933                         }
00934                     }
00935                 }
00936             }
00937         }
00938         for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ )
00939         {
00940             incident_vtx = part_proc_owner[i];
00941             for( l = 0; l < neighbourProc.size(); l++ )
00942             {
00943                 if( neighbourProc[l] == incident_vtx )
00944                 {
00945                     neighbourProcRecv[l]++;
00946                     break;
00947                 }
00948             }
00949         }
00950         neighbourProcSendRemain.resize( neighbourProc.size() );
00951         neighbourProcRecvRemain.resize( neighbourProc.size() );
00952     }
00953 
00954     exportVtxGIDs.resize( num_vtx_partition_boundary );
00955     exportVtxLIDs.resize( num_vtx_partition_boundary );
00956     exportProc.resize( num_vtx_partition_boundary );
00957     in_independent_set.resize( num_vtx_partition_boundary_local );
00958 }
00959 
00960 void ParallelHelperImpl::compute_first_independent_set( std::vector< Mesh::VertexHandle >& fixed_vertices )
00961 {
00962     if( nprocs == 1 ) return;
00963 
00964     int i;
00965 
00966     // to avoid all reduce we need to know how many vertices we send & receive
00967     if( communication_model & 1 )  // AVOID_ALL_REDUCE
00968     {
00969         for( i = 0; i < (long)neighbourProc.size(); i++ )
00970         {
00971             neighbourProcSendRemain[i] = neighbourProcSend[i];
00972             neighbourProcRecvRemain[i] = neighbourProcRecv[i];
00973         }
00974     }
00975 
00976     // this is iteration zero of the bounday smooting process
00977     iteration = 0;
00978 
00979     // mark all boundary partition vertices as not smoothed
00980     for( i = 0; i < num_vtx_partition_boundary; i++ )
00981     {
00982         part_smoothed_flag[i] = 0;
00983     }
00984 
00985     // populates the in_independent_set and the vertex export arrays
00986     compute_independent_set();
00987 
00988     // counts how many vertices are already smoothed
00989     num_already_smoothed_vertices = 0;
00990 
00991     fixed_vertices.clear();
00992 
00993     /* mark which local boundary partition vertices are already smoothed (i.e. they are in the 1st
00994      * independent set) */
00995     for( i = 0; i < num_vtx_partition_boundary_local; i++ )
00996     {
00997         if( in_independent_set[i] )
00998         {
00999             part_smoothed_flag[i] = 1;
01000             num_already_smoothed_vertices++;
01001         }
01002         else
01003         {
01004             fixed_vertices.push_back( part_vertices[i] );  // fix vertices *not* in the independent set
01005         }
01006     }
01007 
01008     if( 0 )
01009     {
01010         printf( "[%d]i%d after first we smoothed %d of %d\n", rank, iteration, num_already_smoothed_vertices,
01011                 num_vtx_partition_boundary_local );
01012         fflush( NULL );
01013     }
01014 
01015     // fix the ghost vertices that are smoothed on another processor
01016     for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ )
01017     {
01018         fixed_vertices.push_back( part_vertices[i] );
01019     }
01020 
01021     // fix the ghost vertices that are unused
01022     for( i = 0; i < (int)( unghost_vertices.size() ); i++ )
01023     {
01024         fixed_vertices.push_back( unghost_vertices[i] );
01025     }
01026 }
01027 
01028 void ParallelHelperImpl::communicate_first_independent_set( MsqError& err )
01029 {
01030     if( nprocs == 1 ) return;
01031 
01032     switch( communication_model )
01033     {
01034         case TrulyNonBlocking:
01035             num_already_recv_vertices = comm_smoothed_vtx_tnb( err );
01036             break;
01037         case TrulyNonBlockingAvoidAllReduce:
01038             num_already_recv_vertices = comm_smoothed_vtx_tnb_no_all( err );
01039             break;
01040         case NonBlocking:
01041             num_already_recv_vertices = comm_smoothed_vtx_nb( err );
01042             break;
01043         case NonBlockingAvoidAllReduce:
01044             num_already_recv_vertices = comm_smoothed_vtx_nb_no_all( err );
01045             break;
01046         case Blocking:
01047             num_already_recv_vertices = comm_smoothed_vtx_b( err );
01048             break;
01049         case BlockingAvoidAllReduce:
01050             num_already_recv_vertices = comm_smoothed_vtx_b_no_all( err );
01051             break;
01052     }
01053     global_work_remains = ( neighbourProc.size() ? 1 : 0 );MSQ_CHKERR( err );
01054 }
01055 
01056 bool ParallelHelperImpl::compute_next_independent_set()
01057 {
01058     if( nprocs == 1 ) return false;
01059 
01060     if( global_work_remains && ( iteration < 20 ) )
01061     {
01062         iteration++;
01063         if( 0 ) printf( "[%d] work remains %d after %d iterations\n", rank, global_work_remains, iteration );
01064         compute_independent_set();
01065         next_vtx_partition_boundary = 0;
01066         return true;
01067     }
01068     else
01069     {
01070         if( global_work_remains )
01071         { printf( "WARNING: global work remains %d after %d iterations\n", global_work_remains, iteration ); }
01072         return false;
01073     }
01074 }
01075 
01076 bool ParallelHelperImpl::get_next_partition_boundary_vertex( MBMesquite::Mesh::VertexHandle& vertex_handle )
01077 {
01078     while( next_vtx_partition_boundary < num_vtx_partition_boundary_local )
01079     {
01080         if( in_independent_set[next_vtx_partition_boundary] )
01081         {
01082             vertex_handle = part_vertices[next_vtx_partition_boundary];
01083             num_already_smoothed_vertices++;
01084             assert( part_smoothed_flag[next_vtx_partition_boundary] == 0 );
01085             part_smoothed_flag[next_vtx_partition_boundary] = 1;
01086             next_vtx_partition_boundary++;
01087             return true;
01088         }
01089         next_vtx_partition_boundary++;
01090     }
01091     if( 0 )
01092     {
01093         printf( "[%d]i%d after next we smoothed %d of %d\n", rank, iteration, num_already_smoothed_vertices,
01094                 num_vtx_partition_boundary_local );
01095         fflush( NULL );
01096     }
01097     return false;
01098 }
01099 
01100 void ParallelHelperImpl::communicate_next_independent_set( MsqError& err )
01101 {
01102     switch( communication_model )
01103     {
01104         case TrulyNonBlocking:
01105             num_already_recv_vertices += comm_smoothed_vtx_tnb( err );
01106             break;
01107         case TrulyNonBlockingAvoidAllReduce:
01108             num_already_recv_vertices += comm_smoothed_vtx_tnb_no_all( err );
01109             break;
01110         case NonBlocking:
01111             num_already_recv_vertices += comm_smoothed_vtx_nb( err );
01112             break;
01113         case NonBlockingAvoidAllReduce:
01114             num_already_recv_vertices += comm_smoothed_vtx_nb_no_all( err );
01115             break;
01116         case Blocking:
01117             num_already_recv_vertices += comm_smoothed_vtx_b( err );
01118             break;
01119         case BlockingAvoidAllReduce:
01120             num_already_recv_vertices += comm_smoothed_vtx_b_no_all( err );
01121             break;
01122     }
01123 
01124     if( communication_model & 1 )  // AVOID_ALL_REDUCE
01125     {
01126         global_work_remains = ( total_num_vertices_to_smooth - num_already_smoothed_vertices ) +
01127                               ( total_num_vertices_to_recv - num_already_recv_vertices );
01128         if( 0 )
01129         {
01130             printf( "[%d]i%d %d - %d + %d  - %d = %d \n", rank, iteration, total_num_vertices_to_smooth,
01131                     num_already_smoothed_vertices, total_num_vertices_to_recv, num_already_recv_vertices,
01132                     global_work_remains );
01133             fflush( NULL );
01134         }
01135     }
01136     else
01137     {
01138         int i, work_remains = 0;
01139         for( i = 0; i < num_vtx_partition_boundary_local; i++ )
01140         {
01141             if( part_smoothed_flag[i] == 0 ) { work_remains++; }
01142         }
01143         int rval = MPI_Allreduce( &work_remains, &global_work_remains, 1, MPI_INT, MPI_SUM, (MPI_Comm)communicator );
01144         CHECK_MPI( rval, err );
01145     }
01146 }
01147 
01148 void ParallelHelperImpl::smoothing_close( MsqError& err )
01149 {
01150     int i, j, rval;
01151 
01152     if( nprocs == 1 ) return;
01153 
01154     //  printf("[%d] used %d iterations\n", rank, iteration);
01155 
01156     // communicate unused ghost nodes
01157 
01158     std::vector< double > update_updates;
01159     std::vector< MPI_Request > update_requests;
01160 
01161     if( update_num_procs )
01162     {
01163         /* get the tags so we can find the requested vertices */
01164         std::vector< size_t > gid( num_vertex );
01165         mesh->vertices_get_global_id( ARRPTR( vertices ), ARRPTR( gid ), num_vertex, err );MSQ_ERRRTN( err );
01166         std::vector< unsigned char > app_fixed( num_vertex );
01167         mesh->vertices_get_byte( ARRPTR( vertices ), ARRPTR( app_fixed ), num_vertex, err );MSQ_ERRRTN( err );
01168         std::vector< int > proc_owner( num_vertex );
01169         mesh->vertices_get_processor_id( ARRPTR( vertices ), ARRPTR( proc_owner ), num_vertex, err );MSQ_ERRRTN( err );
01170 
01171         if( 0 )
01172         {
01173             int ncull = 0;
01174             for( i = 0; i < num_vertex; ++i )
01175             {
01176                 if( app_fixed[i] & MsqVertex::MSQ_CULLED ) { ++ncull; }
01177             }
01178             std::cout << "P[" << rank << "] ncull= " << ncull << " num_vertex= " << num_vertex << std::endl;
01179         }
01180 
01181         /* only interested in fixed flag from vertex byte? Clear others. */
01182         // srkenno AT sandia.gov 1/19/12: bug fix: changed from |= which makes all vertices fixed
01183         for( i = 0; i < num_vertex; ++i )
01184             app_fixed[i] &= MsqVertex::MSQ_HARD_FIXED;
01185 
01186         /* insert all our unfixed vertices into a map so we can find the requested vertices
01187          * efficiently */
01188         VertexIdMap temp_vid_map;
01189         for( j = 0; j < num_vertex; j++ )
01190         {
01191             if( proc_owner[j] == rank && app_fixed[j] == false ) { vertex_map_insert( temp_vid_map, gid[j], rank, j ); }
01192         }
01193 
01194         /* deallocate the tags */
01195         // delete [] gid; gid = 0;
01196         // delete [] app_fixed; app_fixed = 0;
01197         // delete [] proc_owner; proc_owner = 0;
01198 
01199         /* find the requested updates and collect them into an array */
01200         MBMesquite::MsqVertex coordinates;
01201         update_updates.resize( update_num_vtx * 3 );
01202         for( i = 0; i < update_num_vtx; i++ )
01203         {
01204             j = vertex_map_find( temp_vid_map, update_gid[i], rank );
01205             mesh->vertices_get_coordinates( &( vertices[j] ), &coordinates, 1, err );MSQ_ERRRTN( err );
01206             update_updates[3 * i + 0] = coordinates[0];
01207             update_updates[3 * i + 1] = coordinates[1];
01208             update_updates[3 * i + 2] = coordinates[2];
01209             //      printf("[%d] send gid %d with %g %g %g\n", rank, update_gid[i], coordinates[0],
01210             //      coordinates[1], coordinates[2]);
01211         }
01212 
01213         /* deallocate the map and the gid array */
01214         // delete temp_vid_map; temp_vid_map = 0;
01215         // delete [] update_gid; update_gid = 0;
01216 
01217         update_requests.resize( update_num_procs );
01218         /* send each processor the unused ghost node updates that they requested */
01219         for( j = 0; j < update_num_procs; j++ )
01220         {
01221             rval = MPI_Isend( &( update_updates[update_procs_offset[j] * 3] ), update_procs_num_vtx[j] * 3, MPI_DOUBLE,
01222                               update_procs[j], GHOST_NODE_VERTEX_UPDATES, (MPI_Comm)communicator,
01223                               &( update_requests[j] ) );
01224             //      printf("[%d] sending %d of %d from %d with offset %d \n", rank,
01225             //      update_procs_num_vtx[j], update_num_vtx, update_procs[j],
01226             //      update_procs_offset[j]);
01227             CHECK_MPI( rval, err );
01228         }
01229 
01230         /* deallocate more arrays that we no longer need */
01231         // delete [] update_procs_offset; update_procs_offset = 0;
01232         // delete [] update_procs_num_vtx; update_procs_num_vtx = 0;
01233         // delete [] update_procs; update_procs = 0;
01234     }
01235 
01236     if( unghost_num_procs )
01237     {
01238         std::vector< MPI_Request > unghost_requests( unghost_num_procs );
01239         /* receive from each processor the unused ghost nodes updates i want from them */
01240         std::vector< double > unghost_updates( unghost_num_vtx * 3 );
01241         for( j = 0; j < unghost_num_procs; j++ )
01242         {
01243             rval = MPI_Irecv( &( unghost_updates[unghost_procs_offset[j] * 3] ), unghost_procs_num_vtx[j] * 3,
01244                               MPI_DOUBLE, unghost_procs[j], GHOST_NODE_VERTEX_UPDATES, (MPI_Comm)communicator,
01245                               &( unghost_requests[j] ) );
01246             //      printf("[%d] receiving %d of %d from %d with offset %d \n", rank,
01247             //      unghost_procs_num_vtx[j], unghost_num_vtx, unghost_procs[j],
01248             //      unghost_procs_offset[j]);
01249             CHECK_MPI( rval, err );
01250         }
01251 
01252         /* deallocate more arrays that we no longer need */
01253         // delete [] unghost_procs_offset; unghost_procs_offset = 0;
01254         // delete [] unghost_procs_num_vtx; unghost_procs_num_vtx = 0;
01255         // delete [] unghost_procs; unghost_procs = 0;
01256 
01257         std::vector< MPI_Status > status( unghost_num_procs );
01258         rval = MPI_Waitall( unghost_num_procs, ARRPTR( unghost_requests ), ARRPTR( status ) );
01259         CHECK_MPI( rval, err );
01260 
01261         /* apply the received updates for the unused ghost vertices */
01262         for( i = 0; i < unghost_num_vtx; i++ )
01263         {
01264             MBMesquite::Vector3D coordinates;
01265             coordinates[0] = unghost_updates[3 * i + 0];
01266             coordinates[1] = unghost_updates[3 * i + 1];
01267             coordinates[2] = unghost_updates[3 * i + 2];
01268             //      printf("[%d] recv %g %g %g\n", rank, coordinates[0], coordinates[1],
01269             //      coordinates[2]);
01270             mesh->vertex_set_coordinates( unghost_vertices[i], coordinates, err );MSQ_ERRRTN( err );
01271         }
01272 
01273         /* deallocate more arrays that we no longer need */
01274         // delete unghost_vertices; unghost_vertices = 0;
01275         // delete [] unghost_updates; unghost_updates = 0;
01276         // delete [] unghost_requests; unghost_requests = 0;
01277     }
01278 
01279     // if (update_num_procs)
01280     //{
01281     //  delete [] update_updates; update_updates = 0;
01282     //  delete [] update_requests; update_requests = 0;
01283     //}
01284 
01285     // if (vertices) delete vertices; vertices = 0;
01286     // if (vtx_in_partition_boundary) delete [] vtx_in_partition_boundary; vtx_in_partition_boundary
01287     // = 0; if (part_vertices) delete part_vertices; part_vertices = 0; if (unghost_vertices) delete
01288     // unghost_vertices; unghost_vertices = 0; if (part_proc_owner) delete [] part_proc_owner;
01289     // part_proc_owner = 0; if (part_gid) delete [] part_gid; part_gid = 0; if (part_smoothed_flag)
01290     // delete [] part_smoothed_flag; part_smoothed_flag = 0; if (part_rand_number) delete []
01291     // part_rand_number; part_rand_number = 0; if (exportVtxGIDs) delete [] exportVtxGIDs;
01292     // exportVtxGIDs = 0; if (exportVtxLIDs) delete [] exportVtxLIDs; exportVtxLIDs = 0; if
01293     // (exportProc) delete [] exportProc; exportProc = 0; if (in_independent_set) delete []
01294     // in_independent_set; in_independent_set = 0; if (vid_map) delete vid_map; vid_map = 0; if
01295     // (neighbourProcSend) delete [] neighbourProcSend; neighbourProcSend = 0; if (neighbourProcRecv)
01296     // delete [] neighbourProcRecv; neighbourProcRecv = 0; if (neighbourProcSendRemain) delete []
01297     // neighbourProcSendRemain; neighbourProcSendRemain = 0; if (neighbourProcRecvRemain) delete []
01298     // neighbourProcRecvRemain; neighbourProcRecvRemain = 0; if (vtx_off_proc_list_size) delete []
01299     // vtx_off_proc_list_size; vtx_off_proc_list_size = 0; if (vtx_off_proc_list) {
01300     //  for (i = 0; i < num_vtx_partition_boundary_local; i++) free(vtx_off_proc_list[i]);
01301     //  delete [] vtx_off_proc_list; vtx_off_proc_list = 0;
01302     //}
01303     // if (neighbourProc) free(neighbourProc); neighbourProc = 0;
01304 }
01305 
01306 typedef struct VertexPack
01307 {
01308     double x;
01309     double y;
01310     double z;
01311     union
01312     {
01313         double mist;
01314         size_t glob_id;
01315     };
01316 } VertexPack;
01317 
01318 int ParallelHelperImpl::comm_smoothed_vtx_tnb( MsqError& err )
01319 {
01320     int i, j, k, rval;
01321 
01322     // printf("[%d]i%d truly non blocking\n",rank, iteration);fflush(NULL);
01323 
01324     /* compute how many vertices we send to each processor */
01325 
01326     std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 );
01327     std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 );
01328     for( i = 0; i < num_exportVtx; i++ )
01329     {
01330         for( j = 0; j < (long)neighbourProc.size(); j++ )
01331         {
01332             if( exportProc[i] == neighbourProc[j] )
01333             {
01334                 /* increment count */
01335                 numVtxPerProcSend[j]++;
01336                 /* end loop */
01337                 break;
01338             }
01339         }
01340         /* did loop end without finding the processor */
01341         if( j == (long)neighbourProc.size() )
01342         {
01343             printf( "[%d]i%d WARNING: did not find exportProc[%d] = %d in list of %lu processors.\n", rank, iteration,
01344                     i, exportProc[i], (unsigned long)neighbourProc.size() );
01345         }
01346     }
01347 
01348     /* tell each processor how many vertices they can expect from us */
01349     /* also ask each processor how many vertices we can expect from them */
01350 
01351     int num_neighbourProcSend = 0;
01352     int num_neighbourProcRecv = 0;
01353     std::vector< MPI_Request > requests_send( neighbourProc.size() );
01354     std::vector< MPI_Request > requests_recv( neighbourProc.size() );
01355     for( j = 0; j < (long)neighbourProc.size(); j++ )
01356     {
01357         /* send the vertex count to this processor */
01358         if( 0 )
01359         {
01360             printf( "[%d]i%d Announce send %d vertices from proc %d\n", rank, iteration, numVtxPerProcSend[j],
01361                     neighbourProc[j] );
01362             fflush( NULL );
01363         }
01364         rval = MPI_Isend( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01365                           (MPI_Comm)communicator, &( requests_send[j] ) );
01366         CHECK_MPI_RZERO( rval, err );
01367         num_neighbourProcSend++;
01368 
01369         /* recv the vertex count for this processor */
01370         if( 0 )
01371         {
01372             printf( "[%d]i%d Listen  recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[j],
01373                     neighbourProc[j] );
01374             fflush( NULL );
01375         }
01376         rval = MPI_Irecv( &( numVtxPerProcRecv[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01377                           (MPI_Comm)communicator, &( requests_recv[j] ) );
01378         CHECK_MPI_RZERO( rval, err );
01379         num_neighbourProcRecv++;
01380     }
01381 
01382     /* set up memory for the outgoing vertex data blocks */
01383 
01384     std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */
01385     std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() );
01386     if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export );
01387     for( i = 1; i < (long)neighbourProc.size(); i++ )
01388     {
01389         packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1];
01390     }
01391 
01392     /* place vertex data going to the same processor into consecutive memory space */
01393 
01394     std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size() );
01395     for( i = 0; i < (long)neighbourProc.size(); i++ )
01396     {
01397         numVtxPerProcSendPACKED[i] = 0;
01398     }
01399     for( i = 0; i < num_exportVtx; i++ )
01400     {
01401         for( j = 0; j < (long)neighbourProc.size(); j++ )
01402         {
01403             if( exportProc[i] == neighbourProc[j] )
01404             {
01405                 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j];
01406                 numVtxPerProcSendPACKED[j]++;
01407                 MBMesquite::MsqVertex coordinates;
01408                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
01409                 MSQ_ERRZERO( err );
01410                 packing_vertex->x       = coordinates[0];
01411                 packing_vertex->y       = coordinates[1];
01412                 packing_vertex->z       = coordinates[2];
01413                 packing_vertex->glob_id = exportVtxGIDs[i];
01414             }
01415         }
01416     }
01417 
01418     /* wait until we have heard from all processors how many vertices we will receive from them */
01419 
01420     std::vector< MPI_Status > status( neighbourProc.size() );
01421 
01422     if( num_neighbourProcRecv )
01423     {
01424         rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_recv ), ARRPTR( status ) );
01425         CHECK_MPI_RZERO( rval, err );
01426     }
01427 
01428     /* how many vertices will we receive */
01429 
01430     int numVtxImport = 0;
01431     for( i = 0; i < (long)neighbourProc.size(); i++ )
01432     {
01433         numVtxImport += numVtxPerProcRecv[i];
01434     }
01435 
01436     /* set up memory for the incoming vertex data blocks */
01437 
01438     std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */
01439     std::vector< VertexPack* > packed_vertices_import( neighbourProc.size() );
01440     if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import );
01441     for( i = 1; i < (long)neighbourProc.size(); i++ )
01442     {
01443         packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecv[i - 1];
01444     }
01445 
01446     /* post receives for all processors that have something for us */
01447     /* post sends for all processors that we have something for */
01448 
01449     num_neighbourProcRecv = 0;
01450 
01451     for( i = 0; i < (long)neighbourProc.size(); i++ )
01452     {
01453         if( numVtxPerProcRecv[i] )
01454         {
01455             if( 0 )
01456             {
01457                 printf( "[%d]i%d Will recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i],
01458                         neighbourProc[i] );
01459                 fflush( NULL );
01460             }
01461             rval =
01462                 MPI_Irecv( packed_vertices_import[i], 4 * numVtxPerProcRecv[i], MPI_DOUBLE_PRECISION, neighbourProc[i],
01463                            VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_recv[i] ) );
01464             CHECK_MPI_RZERO( rval, err );
01465             num_neighbourProcRecv++;
01466         }
01467         else
01468         {
01469             requests_recv[i] = MPI_REQUEST_NULL;
01470         }
01471         if( numVtxPerProcSend[i] )
01472         {
01473             if( 0 )
01474             {
01475                 printf( "[%d]i%d Will send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[i],
01476                         neighbourProc[i] );
01477                 fflush( NULL );
01478             }
01479             rval =
01480                 MPI_Isend( packed_vertices_export[i], 4 * numVtxPerProcSend[i], MPI_DOUBLE_PRECISION, neighbourProc[i],
01481                            VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[i] ) );
01482             CHECK_MPI_RZERO( rval, err );
01483         }
01484         else
01485         {
01486             requests_send[i] = MPI_REQUEST_NULL;
01487         }
01488     }
01489 
01490     /* wait for some receive to arrive */
01491 
01492     int local_id;
01493     while( num_neighbourProcRecv )
01494     {
01495         rval = MPI_Waitany( neighbourProc.size(), ARRPTR( requests_recv ), &k, ARRPTR( status ) );
01496         CHECK_MPI_RZERO( rval, err );
01497         /* unpack all vertices */
01498         for( i = 0; i < numVtxPerProcRecv[k]; i++ )
01499         {
01500             local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProc[k] );
01501             if( local_id )
01502             {
01503                 MBMesquite::Vector3D coordinates;
01504                 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
01505                                  packed_vertices_import[k][i].z );
01506                 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
01507                 MSQ_ERRZERO( err );
01508                 assert( part_smoothed_flag[local_id] == 0 );
01509                 part_smoothed_flag[local_id] = 1;
01510             }
01511             else
01512             {
01513                 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
01514                         packed_vertices_import[k][i].glob_id, neighbourProc[k] );
01515             }
01516         }
01517         num_neighbourProcRecv--;
01518     }
01519 
01520     /* all receives have completed. it is save to release the memory */
01521     // free(vertex_pack_import);
01522 
01523     /* wait until the sends have completed */
01524 
01525     if( num_neighbourProcSend )
01526     {
01527         rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( status ) );
01528         CHECK_MPI_RZERO( rval, err );
01529     }
01530 
01531     /* all sends have completed. it is save to release the memory */
01532     // free(vertex_pack_export);
01533 
01534     return numVtxImport;
01535 }
01536 
01537 int ParallelHelperImpl::comm_smoothed_vtx_tnb_no_all( MsqError& err )
01538 {
01539     int i, j, k, rval;
01540 
01541     // printf("[%d]i%d truly non blocking avoid reduce all\n", rank, iteration); fflush(NULL);
01542 
01543     /* compute how many vertices we send to each processor */
01544 
01545     std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 );
01546     std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 );
01547     for( i = 0; i < num_exportVtx; i++ )
01548     {
01549         for( j = 0; j < (long)neighbourProc.size(); j++ )
01550         {
01551             if( exportProc[i] == neighbourProc[j] )
01552             {
01553                 /* increment count */
01554                 numVtxPerProcSend[j]++;
01555                 /* end loop */
01556                 break;
01557             }
01558         }
01559         /* did loop end without finding the processor */
01560         if( j == (long)neighbourProc.size() )
01561         {
01562             printf( "[%d]i%d WARNING: did not find exportProc[%d] = %d in list of %lu processors.\n", rank, iteration,
01563                     i, exportProc[i], (unsigned long)neighbourProc.size() );
01564         }
01565     }
01566 
01567     /* tell each processor how many vertices they can expect from us */
01568     /* also ask each processor how many vertices we can expect from them */
01569 
01570     int num_neighbourProcSend = 0;
01571     int num_neighbourProcRecv = 0;
01572     std::vector< MPI_Request > requests_send( neighbourProc.size() );
01573     std::vector< MPI_Request > requests_recv( neighbourProc.size() );
01574     for( j = 0; j < (long)neighbourProc.size(); j++ )
01575     {
01576         if( neighbourProcSendRemain[j] )
01577         {
01578             /* send the vertex count to this processor */
01579             if( 0 )
01580             {
01581                 printf( "[%d]i%d Announce send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j],
01582                         neighbourProc[j] );
01583                 fflush( NULL );
01584             }
01585             rval = MPI_Isend( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01586                               (MPI_Comm)communicator, &( requests_send[j] ) );
01587             CHECK_MPI_RZERO( rval, err );
01588             num_neighbourProcSend++;
01589         }
01590         else
01591         {
01592             requests_send[j] = MPI_REQUEST_NULL;
01593         }
01594         if( neighbourProcRecvRemain[j] )
01595         {
01596             /* recv the vertex count for this processor */
01597             if( 0 )
01598             {
01599                 printf( "[%d]i%d Listen recv xx vertices from proc %d\n", rank, iteration, neighbourProc[j] );
01600                 fflush( NULL );
01601             }
01602             rval = MPI_Irecv( &( numVtxPerProcRecv[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01603                               (MPI_Comm)communicator, &( requests_recv[j] ) );
01604             CHECK_MPI_RZERO( rval, err );
01605             num_neighbourProcRecv++;
01606         }
01607         else
01608         {
01609             requests_recv[j] = MPI_REQUEST_NULL;
01610         }
01611     }
01612 
01613     /* set up memory for the outgoing vertex data blocks */
01614 
01615     std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */
01616     std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() );
01617     if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export );
01618     for( i = 1; i < (long)neighbourProc.size(); i++ )
01619     {
01620         packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1];
01621     }
01622 
01623     /* place vertex data going to the same processor into consecutive memory space */
01624 
01625     std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 );
01626     for( i = 0; i < num_exportVtx; i++ )
01627     {
01628         for( j = 0; j < (long)neighbourProc.size(); j++ )
01629         {
01630             if( exportProc[i] == neighbourProc[j] )
01631             {
01632                 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j];
01633                 numVtxPerProcSendPACKED[j]++;
01634                 MBMesquite::MsqVertex coordinates;
01635                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
01636                 MSQ_ERRZERO( err );
01637                 packing_vertex->x       = coordinates[0];
01638                 packing_vertex->y       = coordinates[1];
01639                 packing_vertex->z       = coordinates[2];
01640                 packing_vertex->glob_id = exportVtxGIDs[i];
01641                 if( 0 )
01642                     printf( "[%d]i%d vertex %lu packed %g %g %g\n", rank, iteration, (unsigned long)exportVtxGIDs[i],
01643                             packing_vertex->x, packing_vertex->y, packing_vertex->z );
01644             }
01645         }
01646     }
01647 
01648     /* wait until we have heard from all processors how many vertices we will receive from them */
01649 
01650     std::vector< MPI_Status > status( neighbourProc.size() );
01651 
01652     if( num_neighbourProcRecv )
01653     {
01654         rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_recv ), ARRPTR( status ) );
01655         CHECK_MPI_RZERO( rval, err );
01656     }
01657 
01658     /* how many vertices will we receive */
01659 
01660     int numVtxImport = 0;
01661     for( i = 0; i < (long)neighbourProc.size(); i++ )
01662     {
01663         numVtxImport += numVtxPerProcRecv[i];
01664         neighbourProcRecvRemain[i] -= numVtxPerProcRecv[i];
01665         neighbourProcSendRemain[i] -= numVtxPerProcSend[i];
01666     }
01667 
01668     /* set up memory for the incoming vertex data blocks */
01669 
01670     std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */
01671     std::vector< VertexPack* > packed_vertices_import( neighbourProc.size() );
01672     if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import );
01673     for( i = 1; i < (long)neighbourProc.size(); i++ )
01674     {
01675         packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecv[i - 1];
01676     }
01677 
01678     /* post receives for all processors that have something for us */
01679     /* post sends for all processors that we have something for */
01680 
01681     num_neighbourProcRecv = 0;
01682 
01683     for( i = 0; i < (long)neighbourProc.size(); i++ )
01684     {
01685         if( 0 )
01686         {
01687             printf( "[%d]i%d Will recv %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i],
01688                     neighbourProc[i] );
01689             fflush( NULL );
01690         }
01691         if( numVtxPerProcRecv[i] )
01692         {
01693             rval =
01694                 MPI_Irecv( packed_vertices_import[i], 4 * numVtxPerProcRecv[i], MPI_DOUBLE_PRECISION, neighbourProc[i],
01695                            VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_recv[i] ) );
01696             CHECK_MPI_RZERO( rval, err );
01697             num_neighbourProcRecv++;
01698         }
01699         else
01700         {
01701             requests_recv[i] = MPI_REQUEST_NULL;
01702         }
01703         if( 0 )
01704         {
01705             printf( "[%d]i%d Will send %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[i],
01706                     neighbourProc[i] );
01707             fflush( NULL );
01708         }
01709         if( numVtxPerProcSend[i] )
01710         {
01711             rval =
01712                 MPI_Isend( packed_vertices_export[i], 4 * numVtxPerProcSend[i], MPI_DOUBLE_PRECISION, neighbourProc[i],
01713                            VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[i] ) );
01714             CHECK_MPI_RZERO( rval, err );
01715         }
01716         else
01717         {
01718             requests_send[i] = MPI_REQUEST_NULL;
01719         }
01720     }
01721 
01722     /* wait for some receive to arrive */
01723 
01724     int local_id;
01725     while( num_neighbourProcRecv )
01726     {
01727         rval = MPI_Waitany( neighbourProc.size(), ARRPTR( requests_recv ), &k, ARRPTR( status ) );
01728         CHECK_MPI_RZERO( rval, err );
01729         /* unpack all vertices */
01730         for( i = 0; i < numVtxPerProcRecv[k]; i++ )
01731         {
01732             local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProc[k] );
01733             if( local_id )
01734             {
01735                 MBMesquite::Vector3D coordinates;
01736                 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
01737                                  packed_vertices_import[k][i].z );
01738                 if( 0 )
01739                     printf( "[%d]i%d vertex %d becomes %g %g %g\n", rank, iteration, local_id,
01740                             packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
01741                             packed_vertices_import[k][i].z );
01742                 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
01743                 MSQ_ERRZERO( err );
01744                 assert( part_smoothed_flag[local_id] == 0 );
01745                 part_smoothed_flag[local_id] = 1;
01746             }
01747             else
01748             {
01749                 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
01750                         packed_vertices_import[k][i].glob_id, neighbourProc[k] );
01751             }
01752         }
01753         num_neighbourProcRecv--;
01754     }
01755 
01756     /* all receives have completed. it is save to release the memory */
01757     // free(vertex_pack_import);
01758 
01759     /* wait until the sends have completed */
01760 
01761     if( num_neighbourProcSend )
01762     {
01763         rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( status ) );
01764         CHECK_MPI_RZERO( rval, err );
01765     }
01766 
01767     /* all sends have completed. it is save to release the memory */
01768     // free(vertex_pack_export);
01769 
01770     return numVtxImport;
01771 }
01772 
01773 int ParallelHelperImpl::comm_smoothed_vtx_nb( MsqError& err )
01774 {
01775     int i, j, k, rval;
01776 
01777     // printf("[%d] %d %d non blocking\n",rank, iteration, pass);fflush(NULL);
01778 
01779     /* how many vertices will we receive */
01780 
01781     std::vector< int > numVtxPerProcSend( neighbourProc.size() );
01782     for( i = 0; i < (long)neighbourProc.size(); i++ )
01783     {
01784         numVtxPerProcSend[i] = 0;
01785     }
01786     for( i = 0; i < num_exportVtx; i++ )
01787     {
01788         for( j = 0; j < (long)neighbourProc.size(); j++ )
01789         {
01790             if( exportProc[i] == neighbourProc[j] )
01791             {
01792                 /* increment count */
01793                 numVtxPerProcSend[j]++;
01794                 /* end loop */
01795                 break;
01796             }
01797         }
01798         /* assert loop did not end without finding processor */
01799         assert( j != (long)neighbourProc.size() );
01800     }
01801 
01802     /* tell each processor how many vertices to expect */
01803 
01804     for( j = 0; j < (long)neighbourProc.size(); j++ )
01805     {
01806         rval = MPI_Send( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
01807                          (MPI_Comm)communicator );
01808         CHECK_MPI_RZERO( rval, err );
01809         //    printf("[%d]i%d Announcing %d vertices to proc
01810         //    %d\n",rank,iteration,numVtxPerProcSend[j],neighbourProc[j]); fflush(NULL);
01811     }
01812 
01813     /* place vertex data going to the same processor into consecutive memory space */
01814     std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */
01815     std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() );
01816     if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export );
01817     for( i = 1; i < (long)neighbourProc.size(); i++ )
01818     {
01819         packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1];
01820     }
01821 
01822     std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 );
01823     for( i = 0; i < num_exportVtx; i++ )
01824     {
01825         for( j = 0; j < (long)neighbourProc.size(); j++ )
01826         {
01827             if( exportProc[i] == neighbourProc[j] )
01828             {
01829                 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j];
01830                 numVtxPerProcSendPACKED[j]++;
01831                 MBMesquite::MsqVertex coordinates;
01832                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
01833                 MSQ_ERRZERO( err );
01834                 packing_vertex->x       = coordinates[0];
01835                 packing_vertex->y       = coordinates[1];
01836                 packing_vertex->z       = coordinates[2];
01837                 packing_vertex->glob_id = exportVtxGIDs[i];
01838             }
01839         }
01840     }
01841     // delete [] numVtxPerProcSendPACKED;
01842 
01843     /* now ask each processor how many vertices to expect */
01844 
01845     int num;
01846     int proc;
01847     int numVtxImport          = 0;
01848     int num_neighbourProcRecv = 0;
01849     std::vector< int > numVtxPerProcRecv( neighbourProc.size() );
01850     MPI_Status status;
01851     for( j = 0; j < (long)neighbourProc.size(); j++ )
01852     {
01853         numVtxPerProcRecv[j] = 0;
01854     }
01855 
01856     for( j = 0; j < (long)neighbourProc.size(); j++ )
01857     {
01858         /* get the vertex count for some processor */
01859         rval = MPI_Recv( &num,                      /* message buffer */
01860                          1,                         /* one data item */
01861                          MPI_INT,                   /* of type int */
01862                          MPI_ANY_SOURCE,            /* receive from any sender */
01863                          VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs from this iteration */
01864                          (MPI_Comm)communicator,    /* default communicator */
01865                          &status );                 /* info about the received message */
01866         CHECK_MPI_RZERO( rval, err );
01867         proc = status.MPI_SOURCE;
01868         /* will we import vertices from this processor */
01869 
01870         //    printf("[%d]i%d Heard we will receive %d vertices from proc
01871         //    %d\n",rank,iteration,num,proc); fflush(NULL);
01872 
01873         if( num )
01874         {
01875             /* increase number of processors we will receive from */
01876             num_neighbourProcRecv++;
01877             /* add number of vertices we will receive to the import total */
01878             numVtxImport += num;
01879         }
01880         /* find this processor in the list */
01881         for( i = 0; i < (long)neighbourProc.size(); i++ )
01882         {
01883             if( neighbourProc[i] == proc )
01884             {
01885                 numVtxPerProcRecv[i] = num;
01886                 break;
01887             }
01888         }
01889         /* assert loop did not end without finding processor */
01890         assert( i != (long)neighbourProc.size() );
01891         if( 0 ) printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, num, proc );
01892         fflush( NULL );
01893     }
01894 
01895     /* create list of processors we receive from */
01896     std::vector< int > neighbourProcRecv( num_neighbourProcRecv );
01897     std::vector< int > numVtxPerProcRecvRecv( num_neighbourProcRecv );
01898     for( i = 0, k = 0; i < (long)neighbourProc.size(); i++ )
01899     {
01900         if( 0 )
01901             printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i],
01902                     neighbourProc[i] );
01903         fflush( NULL );
01904         if( numVtxPerProcRecv[i] )
01905         {
01906             neighbourProcRecv[k]     = neighbourProc[i];
01907             numVtxPerProcRecvRecv[k] = numVtxPerProcRecv[i];
01908             k++;
01909         }
01910     }
01911 
01912     /* set up memory for the incoming vertex data blocks */
01913     std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */
01914     std::vector< VertexPack* > packed_vertices_import( num_neighbourProcRecv );
01915     if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import );
01916     for( i = 1; i < num_neighbourProcRecv; i++ )
01917     {
01918         packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecvRecv[i - 1];
01919     }
01920 
01921     /* receive from all processors that have something for us */
01922     std::vector< MPI_Request > request( num_neighbourProcRecv );
01923     for( j = 0; j < num_neighbourProcRecv; j++ )
01924     {
01925         rval = MPI_Irecv( packed_vertices_import[j], 4 * numVtxPerProcRecvRecv[j], MPI_DOUBLE_PRECISION,
01926                           neighbourProcRecv[j], VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( request[j] ) );
01927         CHECK_MPI_RZERO( rval, err );
01928         if( 0 )
01929         {
01930             printf( "[%d]i%d Scheduling receipt of %d vertices to proc %d\n", rank, iteration, numVtxPerProcRecvRecv[j],
01931                     neighbourProcRecv[j] );
01932             fflush( NULL );
01933         }
01934     }
01935 
01936     /* now send the data blocks */
01937 
01938     std::vector< MPI_Request > requests_send( neighbourProc.size() );
01939     for( j = 0; j < (long)neighbourProc.size(); j++ )
01940     {
01941         if( numVtxPerProcSend[j] )
01942         {
01943             rval =
01944                 MPI_Isend( packed_vertices_export[j], 4 * numVtxPerProcSend[j], MPI_DOUBLE_PRECISION, neighbourProc[j],
01945                            VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[j] ) );
01946             CHECK_MPI_RZERO( rval, err );
01947             if( 0 )
01948             {
01949                 printf( "[%d]i%d Scheduling send of %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j],
01950                         neighbourProc[j] );
01951                 fflush( NULL );
01952             }
01953         }
01954         else
01955         {
01956             requests_send[j] = MPI_REQUEST_NULL;
01957         }
01958     }
01959 
01960     /* process messages as they arrive */
01961 
01962     int local_id;
01963     for( j = 0; j < num_neighbourProcRecv; j++ )
01964     {
01965         rval = MPI_Waitany( num_neighbourProcRecv, ARRPTR( request ), &k, &status );
01966         CHECK_MPI_RZERO( rval, err );
01967 
01968         /* unpack messages */
01969         proc = status.MPI_SOURCE;
01970         int count;
01971         MPI_Get_count( &status, MPI_INT, &count );
01972         if( 0 )
01973             printf( "[%d]i%d Received %d (%d) vertices from proc %d (%d)\n", rank, iteration, numVtxPerProcRecvRecv[k],
01974                     count, neighbourProcRecv[k], proc );
01975         fflush( NULL );
01976         for( i = 0; i < numVtxPerProcRecvRecv[k]; i++ )
01977         {
01978             local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] );
01979             if( local_id )
01980             {
01981                 MBMesquite::Vector3D coordinates;
01982                 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
01983                                  packed_vertices_import[k][i].z );
01984                 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
01985                 MSQ_ERRZERO( err );
01986                 assert( part_smoothed_flag[local_id] == 0 );
01987                 part_smoothed_flag[local_id] = 1;
01988                 if( 0 )
01989                     printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration,
01990                             (int)( packed_vertices_import[k][i].glob_id ), packed_vertices_import[k][i].x,
01991                             packed_vertices_import[k][i].y, packed_vertices_import[k][i].z );
01992             }
01993             else
01994             {
01995                 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
01996                         packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] );
01997             }
01998         }
01999     }
02000 
02001     /* all receives have completed. it is save to release the memory */
02002     // free(vertex_pack_import);
02003     /* wait until the sends have completed */
02004     std::vector< MPI_Status > stati( neighbourProc.size() );
02005     rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( stati ) );
02006     CHECK_MPI_RZERO( rval, err );
02007     /* all sends have completed. it is save to release the memory */
02008     // free(vertex_pack_export);
02009 
02010     return numVtxImport;
02011 }
02012 
02013 int ParallelHelperImpl::comm_smoothed_vtx_nb_no_all( MsqError& err )
02014 {
02015     int i, j, k, rval;
02016 
02017     // printf("[%d] %d %d non blocking avoid reduce all\n",rank, iteration, pass);fflush(NULL);
02018 
02019     /* how many vertices will we receive */
02020 
02021     std::vector< int > numVtxPerProcSend( neighbourProc.size(), 0 );
02022     for( i = 0; i < num_exportVtx; i++ )
02023     {
02024         for( j = 0; j < (long)neighbourProc.size(); j++ )
02025         {
02026             if( exportProc[i] == neighbourProc[j] )
02027             {
02028                 /* increment count */
02029                 numVtxPerProcSend[j]++;
02030                 /* end loop */
02031                 break;
02032             }
02033         }
02034         /* assert loop did not end without finding processor */
02035         assert( j != (long)neighbourProc.size() );
02036     }
02037 
02038     /* tell each processor how many vertices to expect */
02039 
02040     for( j = 0; j < (long)neighbourProc.size(); j++ )
02041     {
02042         if( neighbourProcSendRemain[j] )
02043         {
02044             assert( neighbourProcSendRemain[j] >= numVtxPerProcSend[j] );
02045             neighbourProcSendRemain[j] -= numVtxPerProcSend[j];
02046             rval = MPI_Send( &( numVtxPerProcSend[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
02047                              (MPI_Comm)communicator );
02048             CHECK_MPI_RZERO( rval, err );
02049             //    printf("[%d]i%d Announcing %d vertices to proc
02050             //    %d\n",rank,iteration,numVtxPerProcSend[j],neighbourProc[j]); fflush(NULL);
02051         }
02052         else
02053         {
02054             assert( numVtxPerProcSend[j] == 0 );
02055         }
02056     }
02057 
02058     /* place vertex data going to the same processor into consecutive memory space */
02059     std::vector< VertexPack > vertex_pack_export( num_exportVtx + 10 ); /* add 10 to have enough memory */
02060     std::vector< VertexPack* > packed_vertices_export( neighbourProc.size() );
02061     if( neighbourProc.size() ) packed_vertices_export[0] = ARRPTR( vertex_pack_export );
02062     for( i = 1; i < (long)neighbourProc.size(); i++ )
02063     {
02064         packed_vertices_export[i] = packed_vertices_export[i - 1] + numVtxPerProcSend[i - 1];
02065     }
02066 
02067     std::vector< int > numVtxPerProcSendPACKED( neighbourProc.size(), 0 );
02068     for( i = 0; i < num_exportVtx; i++ )
02069     {
02070         for( j = 0; j < (long)neighbourProc.size(); j++ )
02071         {
02072             if( exportProc[i] == neighbourProc[j] )
02073             {
02074                 VertexPack* packing_vertex = packed_vertices_export[j] + numVtxPerProcSendPACKED[j];
02075                 numVtxPerProcSendPACKED[j]++;
02076                 MBMesquite::MsqVertex coordinates;
02077                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
02078                 MSQ_ERRZERO( err );
02079                 packing_vertex->x       = coordinates[0];
02080                 packing_vertex->y       = coordinates[1];
02081                 packing_vertex->z       = coordinates[2];
02082                 packing_vertex->glob_id = exportVtxGIDs[i];
02083             }
02084         }
02085     }
02086     // delete [] numVtxPerProcSendPACKED;
02087 
02088     /* now ask each processor how many vertices to expect */
02089 
02090     int num;
02091     int proc;
02092     int numVtxImport          = 0;
02093     int num_neighbourProcRecv = 0;
02094     std::vector< int > numVtxPerProcRecv( neighbourProc.size(), 0 );
02095     MPI_Status status;
02096 
02097     int num_neighbourProcRecvRemain = 0;
02098     for( j = 0; j < (long)neighbourProc.size(); j++ )
02099     {
02100         if( neighbourProcRecvRemain[j] ) { num_neighbourProcRecvRemain++; }
02101     }
02102     for( j = 0; j < num_neighbourProcRecvRemain; j++ )
02103     {
02104         /* get the vertex count for some processor */
02105         rval = MPI_Recv( &num,                      /* message buffer */
02106                          1,                         /* one data item */
02107                          MPI_INT,                   /* of type int */
02108                          MPI_ANY_SOURCE,            /* receive from any sender */
02109                          VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs from this iteration */
02110                          (MPI_Comm)communicator,    /* default communicator */
02111                          &status );                 /* info about the received message */
02112         CHECK_MPI_RZERO( rval, err );
02113         proc = status.MPI_SOURCE;
02114         /* will we import vertices from this processor */
02115 
02116         //    printf("[%d]i%d Heard we will receive %d vertices from proc
02117         //    %d\n",rank,iteration,num,proc); fflush(NULL);
02118 
02119         if( num )
02120         {
02121             /* increase number of processors we will receive from */
02122             num_neighbourProcRecv++;
02123             /* add number of vertices we will receive to the import total */
02124             numVtxImport += num;
02125         }
02126         /* find this processor in the list */
02127         for( i = 0; i < (long)neighbourProc.size(); i++ )
02128         {
02129             if( neighbourProc[i] == proc )
02130             {
02131                 numVtxPerProcRecv[i] = num;
02132                 assert( neighbourProcRecvRemain[i] >= num );
02133                 neighbourProcRecvRemain[i] -= num;
02134                 break;
02135             }
02136         }
02137         /* assert loop did not end without finding processor */
02138         assert( i != (long)neighbourProc.size() );
02139         if( 0 ) printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, num, proc );
02140         fflush( NULL );
02141     }
02142 
02143     /* create list of processors we receive from */
02144     std::vector< int > neighbourProcRecv( num_neighbourProcRecv );
02145     std::vector< int > numVtxPerProcRecvRecv( num_neighbourProcRecv );
02146     for( i = 0, k = 0; i < (long)neighbourProc.size(); i++ )
02147     {
02148         if( 0 )
02149             printf( "[%d]i%d Will receive %d vertices from proc %d\n", rank, iteration, numVtxPerProcRecv[i],
02150                     neighbourProc[i] );
02151         fflush( NULL );
02152         if( numVtxPerProcRecv[i] )
02153         {
02154             neighbourProcRecv[k]     = neighbourProc[i];
02155             numVtxPerProcRecvRecv[k] = numVtxPerProcRecv[i];
02156             k++;
02157         }
02158     }
02159 
02160     /* set up memory for the incoming vertex data blocks */
02161     std::vector< VertexPack > vertex_pack_import( numVtxImport + 10 ); /* add 10 to have enough memory */
02162     std::vector< VertexPack* > packed_vertices_import( num_neighbourProcRecv );
02163     if( neighbourProc.size() ) packed_vertices_import[0] = ARRPTR( vertex_pack_import );
02164     for( i = 1; i < num_neighbourProcRecv; i++ )
02165     {
02166         packed_vertices_import[i] = packed_vertices_import[i - 1] + numVtxPerProcRecvRecv[i - 1];
02167     }
02168 
02169     /* receive from all processors that have something for us */
02170     std::vector< MPI_Request > request( num_neighbourProcRecv );
02171     for( j = 0; j < num_neighbourProcRecv; j++ )
02172     {
02173         rval = MPI_Irecv( packed_vertices_import[j], 4 * numVtxPerProcRecvRecv[j], MPI_DOUBLE_PRECISION,
02174                           neighbourProcRecv[j], VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( request[j] ) );
02175         CHECK_MPI_RZERO( rval, err );
02176         if( 0 )
02177         {
02178             printf( "[%d]i%d Scheduling receipt of %d vertices to proc %d\n", rank, iteration, numVtxPerProcRecvRecv[j],
02179                     neighbourProcRecv[j] );
02180             fflush( NULL );
02181         }
02182     }
02183 
02184     /* now send the data blocks */
02185 
02186     std::vector< MPI_Request > requests_send( neighbourProc.size() );
02187     for( j = 0; j < (long)neighbourProc.size(); j++ )
02188     {
02189         if( numVtxPerProcSend[j] )
02190         {
02191             rval =
02192                 MPI_Isend( packed_vertices_export[j], 4 * numVtxPerProcSend[j], MPI_DOUBLE_PRECISION, neighbourProc[j],
02193                            VERTEX_BLOCK + iteration, (MPI_Comm)communicator, &( requests_send[j] ) );
02194             CHECK_MPI_RZERO( rval, err );
02195             if( 0 )
02196             {
02197                 printf( "[%d]i%d Scheduling send of %d vertices to proc %d\n", rank, iteration, numVtxPerProcSend[j],
02198                         neighbourProc[j] );
02199                 fflush( NULL );
02200             }
02201         }
02202         else
02203         {
02204             requests_send[j] = MPI_REQUEST_NULL;
02205         }
02206     }
02207 
02208     /* process messages as they arrive */
02209 
02210     int local_id;
02211     for( j = 0; j < num_neighbourProcRecv; j++ )
02212     {
02213         rval = MPI_Waitany( num_neighbourProcRecv, ARRPTR( request ), &k, &status );
02214         CHECK_MPI_RZERO( rval, err );
02215 
02216         /* unpack messages */
02217         proc = status.MPI_SOURCE;
02218         int count;
02219         MPI_Get_count( &status, MPI_INT, &count );
02220         if( 0 )
02221             printf( "[%d]i%d Received %d (%d) vertices from proc %d (%d)\n", rank, iteration, numVtxPerProcRecvRecv[k],
02222                     count, neighbourProcRecv[k], proc );
02223         fflush( NULL );
02224         for( i = 0; i < numVtxPerProcRecvRecv[k]; i++ )
02225         {
02226             local_id = vertex_map_find( vid_map, packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] );
02227             if( local_id )
02228             {
02229                 MBMesquite::Vector3D coordinates;
02230                 coordinates.set( packed_vertices_import[k][i].x, packed_vertices_import[k][i].y,
02231                                  packed_vertices_import[k][i].z );
02232                 mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
02233                 MSQ_ERRZERO( err );
02234                 assert( part_smoothed_flag[local_id] == 0 );
02235                 part_smoothed_flag[local_id] = 1;
02236                 if( 0 )
02237                     printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration,
02238                             (int)( packed_vertices_import[k][i].glob_id ), packed_vertices_import[k][i].x,
02239                             packed_vertices_import[k][i].y, packed_vertices_import[k][i].z );
02240             }
02241             else
02242             {
02243                 printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
02244                         packed_vertices_import[k][i].glob_id, neighbourProcRecv[k] );
02245             }
02246         }
02247     }
02248 
02249     /* all receives have completed. it is save to release the memory */
02250     // free(vertex_pack_import);
02251     /* wait until the sends have completed */
02252     std::vector< MPI_Status > stati( neighbourProc.size() );
02253     rval = MPI_Waitall( neighbourProc.size(), ARRPTR( requests_send ), ARRPTR( stati ) );
02254     CHECK_MPI_RZERO( rval, err );
02255     /* all sends have completed. it is save to release the memory */
02256     // free(vertex_pack_export);
02257 
02258     return numVtxImport;
02259 }
02260 
02261 int ParallelHelperImpl::comm_smoothed_vtx_b( MsqError& err )
02262 {
02263     int i, j, rval;
02264 
02265     // printf("[%d] %d %d blocking\n",rank, iteration, pass);fflush(NULL);
02266 
02267     /* how many vertices per processor */
02268 
02269     std::vector< int > numVtxPerProc( neighbourProc.size(), 0 );
02270     for( i = 0; i < num_exportVtx; i++ )
02271     {
02272         for( j = 0; j < (long)neighbourProc.size(); j++ )
02273         {
02274             if( exportProc[i] == neighbourProc[j] )
02275             {
02276                 /* increment count */
02277                 numVtxPerProc[j]++;
02278                 /* end loop */
02279                 break;
02280             }
02281             /* assert loop did not end without finding the processor */
02282             assert( j != (long)neighbourProc.size() );
02283         }
02284     }
02285 
02286     /* place vertices going to the same processor into consecutive memory space */
02287 
02288     std::vector< VertexPack > vertex_pack( num_exportVtx + 10 ); /* add 10 to have enough memory */
02289     std::vector< VertexPack* > packed_vertices( neighbourProc.size() );
02290     VertexPack* packing_vertex;
02291     packed_vertices[0] = ARRPTR( vertex_pack );
02292     for( i = 1; i < (long)neighbourProc.size(); i++ )
02293     {
02294         packed_vertices[i] = packed_vertices[i - 1] + numVtxPerProc[i - 1];
02295     }
02296 
02297     std::vector< int > numVtxPackedPerProc( neighbourProc.size(), 0 );
02298 
02299     for( i = 0; i < num_exportVtx; i++ )
02300     {
02301         for( j = 0; j < (long)neighbourProc.size(); j++ )
02302         {
02303             if( exportProc[i] == neighbourProc[j] )
02304             {
02305                 packing_vertex = packed_vertices[j] + numVtxPackedPerProc[j];
02306                 numVtxPackedPerProc[j]++;
02307                 MBMesquite::MsqVertex coordinates;
02308                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
02309                 MSQ_ERRZERO( err );
02310                 packing_vertex->x       = coordinates[0];
02311                 packing_vertex->y       = coordinates[1];
02312                 packing_vertex->z       = coordinates[2];
02313                 packing_vertex->glob_id = exportVtxGIDs[i];
02314             }
02315         }
02316     }
02317 
02318     // delete [] numVtxPackedPerProc;
02319 
02320     /* send each block so the corresponding processor preceeded by the number of vertices */
02321 
02322     for( j = 0; j < (long)neighbourProc.size(); j++ )
02323     {
02324 
02325         //    printf("[%d]i%dp%d Announcing %d vertices to proc
02326         //    %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02327 
02328         rval = MPI_Send( &( numVtxPerProc[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
02329                          (MPI_Comm)communicator );
02330         CHECK_MPI_RZERO( rval, err );
02331         // printf("[%d]i%dp%d Sending %d vertices to proc
02332         // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02333 
02334         /* is there any vertex data to be sent */
02335 
02336         if( numVtxPerProc[j] )
02337         {
02338             rval = MPI_Send( packed_vertices[j], 4 * numVtxPerProc[j], MPI_DOUBLE_PRECISION, neighbourProc[j],
02339                              VERTEX_BLOCK + iteration, (MPI_Comm)communicator );
02340             CHECK_MPI_RZERO( rval, err );
02341             // printf("[%d]i%dp%d Sent %d vertices to proc
02342             // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02343         }
02344     }
02345 
02346     int num;
02347     int proc;
02348     // int tag;
02349     int count;
02350     int numVtxImport = 0;
02351     MPI_Status status;
02352     int local_id;
02353 
02354     //  printf("[%d]i%dp%d Waiting to receive vertices from %d processors ...
02355     //  \n",rank,iteration,pass,neighbourProc.size()); fflush(NULL);
02356 
02357     /* receiving blocks from other processors */
02358 
02359     for( j = 0; j < (long)neighbourProc.size(); j++ )
02360     {
02361         rval = MPI_Recv( &num,                      /* message buffer */
02362                          1,                         /* one data item */
02363                          MPI_INT,                   /* of type int */
02364                          MPI_ANY_SOURCE,            /* receive from any sender */
02365                          VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs */
02366                          (MPI_Comm)communicator,    /* default communicator */
02367                          &status );                 /* info about the received message */
02368         CHECK_MPI_RZERO( rval, err );
02369         proc = status.MPI_SOURCE;
02370         // tag = status.MPI_TAG;
02371         MPI_Get_count( &status, MPI_INT, &count );
02372 
02373         //    printf("[%d]i%dp%d Receiving %d vertices from proc
02374         //    %d/%d/%d\n",rank,iteration,pass,num,proc,tag,count); fflush(NULL);
02375 
02376         /* is there any vertex data to be received */
02377 
02378         if( num )
02379         {
02380 
02381             numVtxImport += num;
02382 
02383             /* do we have enough space allocated */
02384 
02385             if( num_exportVtx + 10 < num )
02386             {
02387                 // if (vertex_pack) free(vertex_pack);
02388                 num_exportVtx = num;
02389                 vertex_pack.resize( num_exportVtx + 10 );
02390                 // vertex_pack = (VertexPack*)malloc(sizeof(VertexPack)*(num_exportVtx+10));
02391             }
02392 
02393             rval = MPI_Recv( ARRPTR( vertex_pack ),    /* message buffer */
02394                              4 * num,                  /* num data's item with 4 doubles each */
02395                              MPI_DOUBLE_PRECISION,     /* of type double */
02396                              proc,                     /* receive from this procesor only */
02397                              VERTEX_BLOCK + iteration, /* receive only VERTEX BLOCKs */
02398                              (MPI_Comm)communicator,   /* default communicator */
02399                              &status );                /* info about the received message */
02400             CHECK_MPI_RZERO( rval, err );
02401 
02402             proc = status.MPI_SOURCE;
02403             // tag = status.MPI_TAG;
02404             MPI_Get_count( &status, MPI_DOUBLE_PRECISION, &count );
02405 
02406             if( count != 4 * num )
02407                 printf( "[%d]i%d WARNING: expected %d vertices = %d bytes from proc %d but only "
02408                         "got %d bytes\n",
02409                         rank, iteration, num, num * 4, proc, count );
02410             fflush( NULL );
02411 
02412             //      printf("[%d]i%d Received %d vertices from proc
02413             //      %d/%d/%d\n",rank,iteration,num,proc,tag,count); fflush(NULL);
02414 
02415             /* update the received vertices in our boundary mesh */
02416             for( i = 0; i < num; i++ )
02417             {
02418                 /*  printf("[%d]i%d updating vertex %d with global_id
02419                  * %d\n",rank,iteration,i,(int)(vertex_pack[i].glob_id)); fflush(NULL); */
02420                 local_id = vertex_map_find( vid_map, (int)( vertex_pack[i].glob_id ), proc );
02421                 if( local_id )
02422                 {
02423                     MBMesquite::Vector3D coordinates;
02424                     coordinates.set( vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z );
02425                     mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
02426                     MSQ_ERRZERO( err );
02427                     assert( part_smoothed_flag[local_id] == 0 );
02428                     part_smoothed_flag[local_id] = 1;
02429                     if( 0 )
02430                         printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration,
02431                                 (int)( vertex_pack[i].glob_id ), vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z );
02432                 }
02433                 else
02434                 {
02435                     printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
02436                             vertex_pack[i].glob_id, proc );
02437                 }
02438             }
02439         }
02440     }
02441     // if (vertex_pack) free(vertex_pack);
02442 
02443     return numVtxImport;
02444 }
02445 
02446 int ParallelHelperImpl::comm_smoothed_vtx_b_no_all( MsqError& err )
02447 {
02448     int i, j, rval;
02449 
02450     // printf("[%d] %d %d blocking avoid reduce all\n",rank, iteration, pass);fflush(NULL);
02451 
02452     /* how many vertices per processor */
02453 
02454     std::vector< int > numVtxPerProc( neighbourProc.size(), 0 );
02455     for( i = 0; i < num_exportVtx; i++ )
02456     {
02457         for( j = 0; j < (long)neighbourProc.size(); j++ )
02458         {
02459             if( exportProc[i] == neighbourProc[j] )
02460             {
02461                 /* increment count */
02462                 numVtxPerProc[j]++;
02463                 /* end loop */
02464                 break;
02465             }
02466             /* assert loop did not end without finding the processor */
02467             assert( j != (long)neighbourProc.size() );
02468         }
02469     }
02470 
02471     /* place vertices going to the same processor into consecutive memory space */
02472 
02473     std::vector< VertexPack > vertex_pack( num_exportVtx + 10 ); /* add 10 to have enough memory */
02474     std::vector< VertexPack* > packed_vertices( neighbourProc.size() );
02475     VertexPack* packing_vertex;
02476     packed_vertices[0] = ARRPTR( vertex_pack );
02477     for( i = 1; i < (long)neighbourProc.size(); i++ )
02478     {
02479         packed_vertices[i] = packed_vertices[i - 1] + numVtxPerProc[i - 1];
02480     }
02481 
02482     std::vector< int > numVtxPackedPerProc( neighbourProc.size(), 0 );
02483 
02484     for( i = 0; i < num_exportVtx; i++ )
02485     {
02486         for( j = 0; j < (long)neighbourProc.size(); j++ )
02487         {
02488             if( exportProc[i] == neighbourProc[j] )
02489             {
02490                 packing_vertex = packed_vertices[j] + numVtxPackedPerProc[j];
02491                 numVtxPackedPerProc[j]++;
02492                 MBMesquite::MsqVertex coordinates;
02493                 mesh->vertices_get_coordinates( &part_vertices[exportVtxLIDs[i]], &coordinates, 1, err );
02494                 MSQ_ERRZERO( err );
02495                 packing_vertex->x       = coordinates[0];
02496                 packing_vertex->y       = coordinates[1];
02497                 packing_vertex->z       = coordinates[2];
02498                 packing_vertex->glob_id = exportVtxGIDs[i];
02499             }
02500         }
02501     }
02502 
02503     // delete [] numVtxPackedPerProc;
02504 
02505     /* send each block so the corresponding processor preceeded by the number of vertices */
02506 
02507     for( j = 0; j < (long)neighbourProc.size(); j++ )
02508     {
02509 
02510         if( neighbourProcSendRemain[j] )
02511         {
02512             assert( neighbourProcSendRemain[j] >= numVtxPerProc[j] );
02513             neighbourProcSendRemain[j] -= numVtxPerProc[j];
02514 
02515             // printf("[%d]i%dp%d Announcing %d vertices to proc
02516             // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02517 
02518             rval = MPI_Send( &( numVtxPerProc[j] ), 1, MPI_INT, neighbourProc[j], VERTEX_HEADER + iteration,
02519                              (MPI_Comm)communicator );
02520             CHECK_MPI_RZERO( rval, err );
02521 
02522             // printf("[%d]i%dp%d Sending %d vertices to proc
02523             // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02524 
02525             /* is there any vertex data to be sent */
02526 
02527             if( numVtxPerProc[j] )
02528             {
02529                 rval = MPI_Send( packed_vertices[j], 4 * numVtxPerProc[j], MPI_DOUBLE_PRECISION, neighbourProc[j],
02530                                  VERTEX_BLOCK + iteration, (MPI_Comm)communicator );
02531                 CHECK_MPI_RZERO( rval, err );
02532 
02533                 // printf("[%d]i%dp%d Sent %d vertices to proc
02534                 // %d\n",rank,iteration,pass,numVtxPerProc[j],neighbourProc[j]); fflush(NULL);
02535             }
02536         }
02537         else
02538         {
02539             // printf("[%d]i%dp%d no longer sending to %d\n",rank,iteration,neighbourProc[j]);
02540             // fflush(NULL);
02541         }
02542     }
02543 
02544     int num;
02545     int proc;
02546     // int tag;
02547     int count;
02548     int numVtxImport = 0;
02549     MPI_Status status;
02550     int local_id;
02551 
02552     //  printf("[%d]i%dp%d Waiting to receive vertices from %d processors ...
02553     //  \n",rank,iteration,pass,neighbourProc.size()); fflush(NULL);
02554 
02555     /* receiving blocks in any order from other processors */
02556 
02557     int num_neighbourProcRecvRemain = 0;
02558     for( j = 0; j < (long)neighbourProc.size(); j++ )
02559     {
02560         if( neighbourProcRecvRemain[j] ) { num_neighbourProcRecvRemain++; }
02561     }
02562     for( j = 0; j < num_neighbourProcRecvRemain; j++ )
02563     {
02564         rval = MPI_Recv( &num,                      /* message buffer */
02565                          1,                         /* one data item */
02566                          MPI_INT,                   /* of type int */
02567                          MPI_ANY_SOURCE,            /* receive from any sender */
02568                          VERTEX_HEADER + iteration, /* receive only VERTEX HEADERs */
02569                          (MPI_Comm)communicator,    /* default communicator */
02570                          &status );                 /* info about the received message */
02571         CHECK_MPI_RZERO( rval, err );
02572         proc = status.MPI_SOURCE;
02573         // tag = status.MPI_TAG;
02574         MPI_Get_count( &status, MPI_INT, &count );
02575 
02576         // printf("[%d]i%dp%d Receiving %d vertices from proc
02577         // %d/%d/%d\n",rank,iteration,pass,num,proc,tag,count); fflush(NULL);
02578 
02579         /* is there any vertex data to be received */
02580 
02581         if( num )
02582         {
02583 
02584             numVtxImport += num;
02585 
02586             /* do we have enough space allocated */
02587 
02588             if( num_exportVtx + 10 < num )
02589             {
02590                 // if (vertex_pack) free(vertex_pack);
02591                 num_exportVtx = num;
02592                 // vertex_pack = (VertexPack*)malloc(sizeof(VertexPack)*(num_exportVtx+10));
02593                 vertex_pack.resize( num_exportVtx + 10 );
02594             }
02595 
02596             rval = MPI_Recv( ARRPTR( vertex_pack ),    /* message buffer */
02597                              4 * num,                  /* num data's item with 4 doubles each */
02598                              MPI_DOUBLE_PRECISION,     /* of type double */
02599                              proc,                     /* receive from this procesor only */
02600                              VERTEX_BLOCK + iteration, /* receive only VERTEX BLOCKs */
02601                              (MPI_Comm)communicator,   /* default communicator */
02602                              &status );                /* info about the received message */
02603             CHECK_MPI_RZERO( rval, err );
02604 
02605             proc = status.MPI_SOURCE;
02606             // tag = status.MPI_TAG;
02607             MPI_Get_count( &status, MPI_DOUBLE_PRECISION, &count );
02608 
02609             if( count != 4 * num )
02610                 printf( "[%d]i%d WARNING: expected %d vertices = %d bytes from proc %d but only "
02611                         "got %d bytes\n",
02612                         rank, iteration, num, num * 4, proc, count );
02613             fflush( NULL );
02614 
02615             // printf("[%d]i%d Received %d vertices from proc
02616             // %d/%d/%d\n",rank,iteration,num,proc,tag,count); fflush(NULL);
02617 
02618             /* update the received vertices in our boundary mesh */
02619             for( i = 0; i < num; i++ )
02620             {
02621                 /*  printf("[%d]i%d updating vertex %d with global_id
02622                  * %d\n",rank,iteration,i,(int)(vertex_pack[i].glob_id)); fflush(NULL); */
02623                 local_id = vertex_map_find( vid_map, (int)( vertex_pack[i].glob_id ), proc );
02624                 if( local_id )
02625                 {
02626                     MBMesquite::Vector3D coordinates;
02627                     coordinates.set( vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z );
02628                     mesh->vertex_set_coordinates( part_vertices[local_id], coordinates, err );
02629                     MSQ_ERRZERO( err );
02630                     assert( part_smoothed_flag[local_id] == 0 );
02631                     part_smoothed_flag[local_id] = 1;
02632                     if( 0 && rank == 1 )
02633                         printf( "[%d]i%d updating vertex with global_id %d to %g %g %g \n", rank, iteration,
02634                                 (int)( vertex_pack[i].glob_id ), vertex_pack[i].x, vertex_pack[i].y, vertex_pack[i].z );
02635                 }
02636                 else
02637                 {
02638                     printf( "[%d]i%d vertex with gid %lu and pid %d not in map\n", rank, iteration,
02639                             vertex_pack[i].glob_id, proc );
02640                 }
02641             }
02642         }
02643         for( i = 0; i < (long)neighbourProc.size(); i++ )
02644         {
02645             if( proc == neighbourProc[i] )
02646             {
02647                 assert( neighbourProcRecvRemain[i] >= num );
02648                 neighbourProcRecvRemain[i] -= num;
02649                 break;
02650             }
02651         }
02652     }
02653     // if (vertex_pack) free(vertex_pack);
02654 
02655     return numVtxImport;
02656 }
02657 
02658 /***********************************************************************
02659 COMPUTING THE INDEPENDENT SET AND EXPORT INFORMATION
02660 ***********************************************************************/
02661 /* the only thing that can prevent a vertex from being in the independent
02662    set is that is has an neighbouring vertex on a different processor that
02663    has not been smoothed yet and that has a larger random number */
02664 
02665 void ParallelHelperImpl::compute_independent_set()
02666 {
02667     int i, j, k, l;
02668     int incident_vtx;
02669     bool done;
02670 
02671     for( i = 0; i < num_vtx_partition_boundary_local; i++ )
02672         in_independent_set[i] = false;
02673 
02674     bool found_more = true;
02675     int found_iter  = 0;
02676 
02677     num_exportVtx = 0;
02678 
02679     while( found_more )
02680     {
02681         found_iter++;
02682         found_more = false;
02683         for( i = 0; i < num_vtx_partition_boundary_local; i++ )
02684         {
02685             /* if this vertex could become part of the independent set */
02686             if( part_smoothed_flag[i] == 0 && in_independent_set[i] == false )
02687             {
02688                 /* assume it's in the independent set */
02689                 done = false;
02690                 /* then loop over the neighbors it has on other processors */
02691                 for( j = 0; !done && j < (long)vtx_off_proc_list[i].size(); j++ )
02692                 {
02693                     incident_vtx = vtx_off_proc_list[i][j];
02694                     /* if this neighbour has not yet been smoothed and is not covered and ... */
02695                     if( part_smoothed_flag[incident_vtx] == 0 )
02696                     {
02697                         /* ... has a higher rand_number than me */
02698                         if( part_rand_number[i] < part_rand_number[incident_vtx] )
02699                         {
02700                             /* then I am not in the independent set */
02701                             done = true;
02702                             break;
02703                         }
02704                         /* ... or has the same rand_number than me but a higher processor id */
02705                         else if( ( part_rand_number[i] == part_rand_number[incident_vtx] ) &&
02706                                  ( rank < part_proc_owner[incident_vtx] ) )
02707                         {
02708                             /* then I am not in the independent set */
02709                             done = true;
02710                             break;
02711                         }
02712                     }
02713                 }
02714                 /* if the vertex is in the independent set, add it to the export list */
02715                 if( !done )
02716                 {
02717                     found_more = true;
02718                     //  if (found_iter > 1) printf("[%d]i%d found another one %d in iteration
02719                     //%d\n",rank,iteration,i, found_iter);
02720                     in_independent_set[i] = true;
02721                     /* mark vertices with lower random numbers as covered */
02722                     for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ )
02723                     {
02724                         incident_vtx = vtx_off_proc_list[i][j];
02725                         /* if this neighbour has not yet been smoothed or covered mark it as covered
02726                          */
02727                         if( part_smoothed_flag[incident_vtx] == 0 ) { part_smoothed_flag[incident_vtx] = 2; }
02728                     }
02729                     k = num_exportVtx;
02730                     /* then loop over the neighbors it has on other processors */
02731                     for( j = 0; j < (long)vtx_off_proc_list[i].size(); j++ )
02732                     {
02733                         incident_vtx = vtx_off_proc_list[i][j];
02734                         /* check to see if this processor already on the list */
02735                         done = false;
02736                         for( l = k; l < num_exportVtx && !done; l++ )
02737                         {
02738                             if( exportProc[l] == part_proc_owner[incident_vtx] ) { done = true; }
02739                         }
02740                         /* if it's not on the list add it */
02741                         if( !done )
02742                         {
02743                             exportVtxLIDs[num_exportVtx] = i;
02744                             exportVtxGIDs[num_exportVtx] = part_gid[i];
02745                             exportProc[num_exportVtx]    = part_proc_owner[incident_vtx];
02746                             num_exportVtx++;
02747                         }
02748                     }
02749                 }
02750             }
02751         }
02752     }
02753 
02754     if( 0 )
02755     {
02756         int in_set = 0;
02757         for( i = 0; i < num_vtx_partition_boundary_local; i++ )
02758             if( in_independent_set[i] ) in_set++;
02759         ;
02760         printf( "[%d]i%d independent set has %d of %d vertices sent out %d times\n", rank, iteration, in_set,
02761                 num_vtx_partition_boundary_local, num_exportVtx );
02762         fflush( NULL );
02763     }
02764 
02765     /* unmark the vertices that have been marked as covered */
02766     for( i = num_vtx_partition_boundary_local; i < num_vtx_partition_boundary; i++ )
02767         if( part_smoothed_flag[i] == 2 ) part_smoothed_flag[i] = 0;
02768 }
02769 
02770 int ParallelHelperImpl::get_rank() const
02771 {
02772     return rank;
02773 }
02774 
02775 int ParallelHelperImpl::get_nprocs() const
02776 {
02777     return nprocs;
02778 }
02779 
02780 bool ParallelHelperImpl::is_our_element( MBMesquite::Mesh::ElementHandle element_handle, MsqError& err ) const
02781 {
02782     int i;
02783     std::vector< Mesh::VertexHandle > pvertices;
02784     std::vector< size_t > junk;
02785     mesh->elements_get_attached_vertices( &element_handle, 1, pvertices, junk, err );
02786     MSQ_ERRZERO( err );
02787     int num_verts = pvertices.size();
02788     std::vector< int > proc_ids( num_verts );
02789     mesh->vertices_get_processor_id( ARRPTR( pvertices ), ARRPTR( proc_ids ), num_verts, err );
02790     MSQ_ERRZERO( err );
02791     int max_proc_id = proc_ids[0];
02792     for( i = 1; i < num_verts; i++ )
02793         if( max_proc_id < proc_ids[i] ) max_proc_id = proc_ids[i];
02794     return ( max_proc_id == rank );
02795 }
02796 
02797 bool ParallelHelperImpl::is_our_vertex( MBMesquite::Mesh::VertexHandle vertex_handle, MsqError& err ) const
02798 {
02799     int proc_id;
02800     mesh->vertices_get_processor_id( &vertex_handle, &proc_id, 1, err );
02801     return !MSQ_CHKERR( err ) && ( proc_id == rank );
02802 }
02803 
02804 void ParallelHelperImpl::communicate_min_max_to_all( double* minimum, double* maximum, MsqError& err ) const
02805 {
02806     double d_min[2];
02807     double d_min_recv[2];
02808     d_min[0] = -( *maximum );
02809     d_min[1] = *minimum;
02810     int rval = MPI_Allreduce( d_min, d_min_recv, 2, MPI_DOUBLE, MPI_MIN, (MPI_Comm)communicator );
02811     CHECK_MPI( rval, err );
02812     *maximum = -d_min_recv[0];
02813     *minimum = d_min_recv[1];
02814 }
02815 
02816 void ParallelHelperImpl::communicate_min_max_to_zero( double* minimum, double* maximum, MsqError& err ) const
02817 {
02818     double d_min[2];
02819     double d_min_recv[2];
02820     d_min[0] = -( *maximum );
02821     d_min[1] = *minimum;
02822     int rval = MPI_Reduce( d_min, d_min_recv, 2, MPI_DOUBLE, MPI_MIN, 0, (MPI_Comm)communicator );
02823     CHECK_MPI( rval, err );
02824     if( rank == 0 )
02825     {
02826         *maximum = -d_min_recv[0];
02827         *minimum = d_min_recv[1];
02828     }
02829 }
02830 
02831 void ParallelHelperImpl::communicate_sums_to_zero( size_t* freeElementCount, int* invertedElementCount,
02832                                                    size_t* elementCount, int* invertedSampleCount, size_t* sampleCount,
02833                                                    long unsigned int* count, long unsigned int* invalid, double* sum,
02834                                                    double* sqrSum, MsqError& err ) const
02835 {
02836     double d_sum[9];
02837     double d_sum_recv[9];
02838 
02839     d_sum[0] = (double)( *freeElementCount );
02840     d_sum[1] = (double)( *invertedElementCount );
02841     d_sum[2] = (double)( *elementCount );
02842     d_sum[3] = (double)( *invertedSampleCount );
02843     d_sum[4] = (double)( *sampleCount );
02844     d_sum[5] = (double)( *count );
02845     d_sum[6] = (double)( *invalid );
02846     d_sum[7] = *sum;
02847     d_sum[8] = *sqrSum;
02848 
02849     int rval = MPI_Reduce( d_sum, d_sum_recv, 9, MPI_DOUBLE, MPI_SUM, 0, (MPI_Comm)communicator );
02850     CHECK_MPI( rval, err );
02851 
02852     if( rank == 0 )
02853     {
02854         *freeElementCount     = (size_t)d_sum_recv[0];
02855         *invertedElementCount = (int)d_sum_recv[1];
02856         *elementCount         = (size_t)d_sum_recv[2];
02857         *invertedSampleCount  = (int)d_sum_recv[3];
02858         *sampleCount          = (size_t)d_sum_recv[4];
02859         *count                = (long unsigned int)d_sum_recv[5];
02860         *invalid              = (long unsigned int)d_sum_recv[6];
02861         *sum                  = d_sum_recv[7];
02862         *sqrSum               = d_sum_recv[8];
02863     }
02864 }
02865 
02866 void ParallelHelperImpl::communicate_power_sum_to_zero( double* pMean, MsqError& err ) const
02867 {
02868     double result;
02869     int rval = MPI_Reduce( pMean, &result, 1, MPI_DOUBLE, MPI_SUM, 0, (MPI_Comm)communicator );
02870     CHECK_MPI( rval, err );
02871     if( rank == 0 ) *pMean = result;
02872 }
02873 
02874 void ParallelHelperImpl::communicate_histogram_to_zero( std::vector< int >& histogram, MsqError& err ) const
02875 {
02876     std::vector< int > histogram_recv( histogram.size() );
02877     int rval = MPI_Reduce( &( histogram[0] ), &( histogram_recv[0] ), histogram.size(), MPI_INT, MPI_SUM, 0,
02878                            (MPI_Comm)communicator );
02879     CHECK_MPI( rval, err );
02880     if( rank == 0 ) { histogram.swap( histogram_recv ); }
02881 }
02882 
02883 /// if any proc has a true input @param value, return true to all
02884 void ParallelHelperImpl::communicate_any_true( bool& value, MsqError& err ) const
02885 {
02886     int byte_out = value, byte_in;
02887     int rval     = MPI_Allreduce( &byte_out, &byte_in, 1, MPI_INT, MPI_MAX, (MPI_Comm)communicator );
02888     CHECK_MPI( rval, err );
02889     value = ( byte_in != 0 );
02890 }
02891 
02892 /// srkenno AT sandia.gov 1/19/12: bug fix: changed from MPI_MAX to MIN - this makes the name of the
02893 /// function correct, if all proc's values are true, it returns true.  @see communicate_any_true
02894 /// above
02895 
02896 /// return true in @param value if all procs have value=true, else if any have it false, return
02897 /// false
02898 void ParallelHelperImpl::communicate_all_true( bool& value, MsqError& err ) const
02899 {
02900     int byte_out = value, byte_in;
02901     int rval     = MPI_Allreduce( &byte_out, &byte_in, 1, MPI_INT, MPI_MIN, (MPI_Comm)communicator );
02902     CHECK_MPI( rval, err );
02903     value = ( byte_in != 0 );
02904 }
02905 
02906 }  // namespace MBMesquite
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines