fathom/moab-docs/test__mapweights__bcast_8cpp_source.html

00001 /**
00002  * @file Read and distribute mapping weights to multiple processes
00003  * @author Vijay Mahadevan (mahadevan@anl.gov)
00004  * @brief
00005  * @version 0.1
00006  * @date 2020-07-13
00007  *
00008  */
00009
00010 #include <iostream>
00011 #include <vector>
00012 #include "moab/Core.hpp"
00013
00014 #ifndef MOAB_HAVE_TEMPESTREMAP
00015 #error Need TempestRemap dependency for this test
00016 #endif
00017
00018 #include "OfflineMap.h"
00019 #include "moab/Remapping/TempestRemapper.hpp"
00020 #include "moab/Remapping/TempestOnlineMap.hpp"
00021 #include "moab/ParallelComm.hpp"
00022
00023 #ifndef IS_BUILDING_MB
00024 #define IS_BUILDING_MB
00025 #endif
00026
00027 #include "Internals.hpp"
00028 #include "TestRunner.hpp"
00029
00030 #include <netcdf.h>
00031 #include <netcdf_par.h>
00032
00033 using namespace moab;
00034
00035 std::string inMapFilename = "";
00036
00037 void test_tempest_map_bcast();
00038 void read_buffered_map();
00039 void read_map_from_disk();
00040
00041 int main( int argc, char** argv )
00042 {
00043     if( argc > 1 )
00044         inMapFilename = std::string( argv[1] );
00045     else
00046         inMapFilename = TestDir + "unittest/outCS5ICOD5_map.nc";
00047
00048     REGISTER_TEST( test_tempest_map_bcast );
00049     REGISTER_TEST( read_buffered_map );
00050     REGISTER_TEST( read_map_from_disk );
00051
00052 #ifdef MOAB_HAVE_MPI
00053     MPI_Init( &argc, &argv );
00054 #endif
00055     int result = RUN_TESTS( argc, argv );
00056 #ifdef MOAB_HAVE_MPI
00057     MPI_Finalize();
00058 #endif
00059     return result;
00060 }
00061
00062 #define NCERROREXIT( err, MSG )                                                     \
00063     if( err )                                                                       \
00064     {                                                                               \
00065         std::cout << "Error (" << ( err ) << "): " << ( MSG );                      \
00066         std::cout << "\nAborting with message: " << nc_strerror( err ) << "... \n"; \
00067         return;                                                                     \
00068     }
00069
00070 void read_buffered_map()
00071 {
00072     NcError error( NcError::silent_nonfatal );
00073
00074     // Define our total buffer size to use
00075     NcFile* ncMap = NULL;
00076     NcVar *varRow = NULL, *varCol = NULL, *varS = NULL;
00077
00078 #ifdef MOAB_HAVE_MPI
00079     int mpierr         = 0;
00080     const int rootProc = 0;
00081 #endif
00082     // Main inputs;
00083     //   1. Map Filename
00084     //   2. Ownership info from the processes to be gathered on root
00085     const int nBufferSize = 64 * 1024;  // 64 KB
00086     const int nNNZBytes   = 2 * sizeof( int ) + sizeof( double );
00087     const int nMaxEntries = nBufferSize / nNNZBytes;
00088     int nA = 0, nB = 0, nVA = 0, nVB = 0, nS = 0;
00089     double dTolerance = 1e-08;
00090
00091     int nprocs = 1, rank = 0;
00092 #ifdef MOAB_HAVE_MPI
00093     MPI_Comm commW = MPI_COMM_WORLD;
00094     MPI_Comm_size( commW, &nprocs );
00095     MPI_Comm_rank( commW, &rank );
00096 #endif
00097
00098 #ifdef MOAB_HAVE_MPI
00099     if( rank == rootProc )
00100 #endif
00101     {
00102         ncMap = new NcFile( inMapFilename.c_str(), NcFile::ReadOnly );
00103         if( !ncMap->is_valid() )
00104         {
00105             _EXCEPTION1( "Unable to open input map file \"%s\"", inMapFilename.c_str() );
00106         }
00107
00108         // Source and Target mesh resolutions
00109         NcDim* dimNA = ncMap->get_dim( "n_a" );
00110         if( dimNA == NULL )
00111         {
00112             _EXCEPTIONT( "Input map missing dimension \"n_a\"" );
00113         }
00114         else
00115             nA = dimNA->size();
00116
00117         NcDim* dimNB = ncMap->get_dim( "n_b" );
00118         if( dimNB == NULL )
00119         {
00120             _EXCEPTIONT( "Input map missing dimension \"n_b\"" );
00121         }
00122         else
00123             nB = dimNB->size();
00124
00125         NcDim* dimNVA = ncMap->get_dim( "nv_a" );
00126         if( dimNVA == NULL )
00127         {
00128             _EXCEPTIONT( "Input map missing dimension \"nv_a\"" );
00129         }
00130         else
00131             nVA = dimNVA->size();
00132
00133         NcDim* dimNVB = ncMap->get_dim( "nv_b" );
00134         if( dimNVB == NULL )
00135         {
00136             _EXCEPTIONT( "Input map missing dimension \"nv_b\"" );
00137         }
00138         else
00139             nVB = dimNVB->size();
00140
00141         // Read SparseMatrix entries
00142         NcDim* dimNS = ncMap->get_dim( "n_s" );
00143         if( dimNS == NULL )
00144         {
00145             _EXCEPTION1( "Map file \"%s\" does not contain dimension \"n_s\"", inMapFilename.c_str() );
00146         }
00147         else
00148             nS = dimNS->size();  // store total number of nonzeros
00149
00150         varRow = ncMap->get_var( "row" );
00151         if( varRow == NULL )
00152         {
00153             _EXCEPTION1( "Map file \"%s\" does not contain variable \"row\"", inMapFilename.c_str() );
00154         }
00155
00156         varCol = ncMap->get_var( "col" );
00157         if( varCol == NULL )
00158         {
00159             _EXCEPTION1( "Map file \"%s\" does not contain variable \"col\"", inMapFilename.c_str() );
00160         }
00161
00162         varS = ncMap->get_var( "S" );
00163         if( varS == NULL )
00164         {
00165             _EXCEPTION1( "Map file \"%s\" does not contain variable \"S\"", inMapFilename.c_str() );
00166         }
00167     }
00168
00169     // const int nTotalBytes = nS * nNNZBytes;
00170     int nEntriesRemaining = nS;
00171     int nBufferedReads    = static_cast< int >( ceil( ( 1.0 * nS ) / ( 1.0 * nMaxEntries ) ) );
00172     int runData[6]        = { nA, nB, nVA, nVB, nS, nBufferedReads };
00173
00174     mpierr = MPI_Bcast( runData, 6, MPI_INT, rootProc, commW );
00175     CHECK_EQUAL( mpierr, 0 );
00176
00177 #ifdef MOAB_HAVE_MPI
00178     if( rank != rootProc )
00179     {
00180         nA             = runData[0];
00181         nB             = runData[1];
00182         nVA            = runData[2];
00183         nVB            = runData[3];
00184         nS             = runData[4];
00185         nBufferedReads = runData[5];
00186     }
00187     else
00188         printf( "Global parameters: nA = %d, nB = %d, nS = %d\n", nA, nB, nS );
00189 #else
00190     printf( "Global parameters: nA = %d, nB = %d, nS = %d\n", nA, nB, nS );
00191 #endif
00192
00193     std::vector< int > rowOwnership( nprocs );
00194     int nGRowPerPart   = nB / nprocs;
00195     int nGRowRemainder = nB % nprocs;  // Keep the remainder in root
00196     rowOwnership[0]    = nGRowPerPart + nGRowRemainder;
00197     for( int ip = 1, roffset = rowOwnership[0]; ip < nprocs; ++ip )
00198     {
00199         roffset += nGRowPerPart;
00200         rowOwnership[ip] = roffset;
00201     }
00202
00203     // Let us declare the map object for every process
00204     OfflineMap mapRemap;
00205     SparseMatrix< double >& sparseMatrix = mapRemap.GetSparseMatrix();
00206
00207 #ifdef MOAB_HAVE_MPI
00208     if( rank == rootProc )
00209 #endif
00210         printf( "Parameters: nA=%d, nB=%d, nVA=%d, nVB=%d, nS=%d, nNNZBytes = %d, nBufferedReads = %d\n", nA, nB, nVA,
00211                 nVB, nS, nNNZBytes, nBufferedReads );
00212
00213     std::map< int, int > rowMap, colMap;
00214     int rindexMax = 0, cindexMax = 0;
00215     long offset = 0;
00216
00217     /* Split the rows and send to processes in chunks
00218        Let us start the buffered read */
00219     for( int iRead = 0; iRead < nBufferedReads; ++iRead )
00220     {
00221         // pointers to the data
00222         DataArray1D< int > vecRow;
00223         DataArray1D< int > vecCol;
00224         DataArray1D< double > vecS;
00225         std::vector< std::vector< int > > dataPerProcess( nprocs );
00226         std::vector< int > nDataPerProcess( nprocs, 0 );
00227         for( int ip = 0; ip < nprocs; ++ip )
00228             dataPerProcess[ip].reserve( std::max( 100, nMaxEntries / nprocs ) );
00229
00230 #ifdef MOAB_HAVE_MPI
00231         if( rank == rootProc )
00232 #endif
00233         {
00234             int nLocSize = std::min( nEntriesRemaining, static_cast< int >( ceil( nBufferSize * 1.0 / nNNZBytes ) ) );
00235
00236             printf( "Reading file: elements %ld to %ld\n", offset, offset + nLocSize );
00237
00238             // Allocate and resize based on local buffer size
00239             vecRow.Allocate( nLocSize );
00240             vecCol.Allocate( nLocSize );
00241             vecS.Allocate( nLocSize );
00242
00243             varRow->set_cur( (long)( offset ) );
00244             varRow->get( &( vecRow[0] ), nLocSize );
00245
00246             varCol->set_cur( (long)( offset ) );
00247             varCol->get( &( vecCol[0] ), nLocSize );
00248
00249             varS->set_cur( (long)( offset ) );
00250             varS->get( &( vecS[0] ), nLocSize );
00251
00252             // Decrement vecRow and vecCol
00253             for( size_t i = 0; i < vecRow.GetRows(); i++ )
00254             {
00255                 vecRow[i]--;
00256                 vecCol[i]--;
00257
00258                 // TODO: Fix this linear search to compute process ownership
00259                 int pOwner = -1;
00260                 if( rowOwnership[0] > vecRow[i] )
00261                     pOwner = 0;
00262                 else
00263                 {
00264                     for( int ip = 1; ip < nprocs; ++ip )
00265                     {
00266                         if( rowOwnership[ip - 1] <= vecRow[i] && rowOwnership[ip] > vecRow[i] )
00267                         {
00268                             pOwner = ip;
00269                             break;
00270                         }
00271                     }
00272                 }
00273
00274                 assert( pOwner >= 0 && pOwner < nprocs );
00275                 dataPerProcess[pOwner].push_back( i );
00276             }
00277
00278             offset += nLocSize;
00279             nEntriesRemaining -= nLocSize;
00280
00281             for( int ip = 0; ip < nprocs; ++ip )
00282                 nDataPerProcess[ip] = dataPerProcess[ip].size();
00283         }
00284
00285         // Communicate the number of DoF data to expect from root
00286         int nEntriesComm = 0;
00287         mpierr = MPI_Scatter( nDataPerProcess.data(), 1, MPI_INT, &nEntriesComm, 1, MPI_INT, rootProc, commW );
00288         CHECK_EQUAL( mpierr, 0 );
00289
00290 #ifdef MOAB_HAVE_MPI
00291         if( rank == rootProc )
00292 #endif
00293         {
00294             std::vector< MPI_Request > cRequests;
00295             std::vector< MPI_Status > cStats( 2 * nprocs - 2 );
00296             cRequests.reserve( 2 * nprocs - 2 );
00297
00298             // First send out all the relevant data buffers to other process
00299             std::vector< std::vector< int > > dataRowColsAll( nprocs );
00300             std::vector< std::vector< double > > dataEntriesAll( nprocs );
00301             for( int ip = 1; ip < nprocs; ++ip )
00302             {
00303                 const int nDPP = nDataPerProcess[ip];
00304                 if( nDPP > 0 )
00305                 {
00306                     std::vector< int >& dataRowCols = dataRowColsAll[ip];
00307                     dataRowCols.resize( 2 * nDPP );
00308                     std::vector< double >& dataEntries = dataEntriesAll[ip];
00309                     dataEntries.resize( nDPP );
00310
00311                     for( int ij = 0; ij < nDPP; ++ij )
00312                     {
00313                         dataRowCols[ij * 2]     = vecRow[dataPerProcess[ip][ij]];
00314                         dataRowCols[ij * 2 + 1] = vecCol[dataPerProcess[ip][ij]];
00315                         dataEntries[ij]         = vecS[dataPerProcess[ip][ij]];
00316                     }
00317
00318                     MPI_Request rcsend, dsend;
00319                     mpierr = MPI_Isend( dataRowCols.data(), 2 * nDPP, MPI_INT, ip, iRead * 1000, commW, &rcsend );
00320                     CHECK_EQUAL( mpierr, 0 );
00321                     mpierr = MPI_Isend( dataEntries.data(), nDPP, MPI_DOUBLE, ip, iRead * 1000 + 1, commW, &dsend );
00322                     CHECK_EQUAL( mpierr, 0 );
00323
00324                     cRequests.push_back( rcsend );
00325                     cRequests.push_back( dsend );
00326                 }
00327             }
00328
00329             // Next perform all necessary local work while we wait for the buffers to be sent out
00330             // Compute an offset for the rows and columns by creating a local to global mapping for rootProc
00331             int rindex = 0, cindex = 0;
00332             assert( dataPerProcess[0].size() - nEntriesComm == 0 );  // sanity check
00333             for( int i = 0; i < nEntriesComm; ++i )
00334             {
00335                 const int& vecRowValue = vecRow[dataPerProcess[0][i]];
00336                 const int& vecColValue = vecCol[dataPerProcess[0][i]];
00337
00338                 std::map< int, int >::iterator riter = rowMap.find( vecRowValue );
00339                 if( riter == rowMap.end() )
00340                 {
00341                     rowMap[vecRowValue] = rindexMax;
00342                     rindex              = rindexMax;
00343                     rindexMax++;
00344                 }
00345                 else
00346                     rindex = riter->second;
00347
00348                 std::map< int, int >::iterator citer = colMap.find( vecColValue );
00349                 if( citer == colMap.end() )
00350                 {
00351                     colMap[vecColValue] = cindexMax;
00352                     cindex              = cindexMax;
00353                     cindexMax++;
00354                 }
00355                 else
00356                     cindex = citer->second;
00357
00358                 sparseMatrix( rindex, cindex ) = vecS[i];
00359             }
00360
00361             // Wait until all communication is pushed out
00362             mpierr = MPI_Waitall( cRequests.size(), cRequests.data(), cStats.data() );
00363             CHECK_EQUAL( mpierr, 0 );
00364         }  // if( rank == rootProc )
00365 #ifdef MOAB_HAVE_MPI
00366         else
00367         {
00368             if( nEntriesComm > 0 )
00369             {
00370                 MPI_Request cRequests[2];
00371                 MPI_Status cStats[2];
00372                 /* code */
00373                 // create buffers to receive the data from root process
00374                 std::vector< int > dataRowCols( 2 * nEntriesComm );
00375                 vecRow.Allocate( nEntriesComm );
00376                 vecCol.Allocate( nEntriesComm );
00377                 vecS.Allocate( nEntriesComm );
00378
00379                 /* TODO: combine row and column scatters together. Save one communication by better packing */
00380                 // Scatter the rows, cols and entries to the processes according to the partition
00381                 mpierr = MPI_Irecv( dataRowCols.data(), 2 * nEntriesComm, MPI_INT, rootProc, iRead * 1000, commW,
00382                                     &cRequests[0] );
00383                 CHECK_EQUAL( mpierr, 0 );
00384
00385                 mpierr = MPI_Irecv( vecS, nEntriesComm, MPI_DOUBLE, rootProc, iRead * 1000 + 1, commW, &cRequests[1] );
00386                 CHECK_EQUAL( mpierr, 0 );
00387
00388                 // Wait until all communication is pushed out
00389                 mpierr = MPI_Waitall( 2, cRequests, cStats );
00390                 CHECK_EQUAL( mpierr, 0 );
00391
00392                 // Compute an offset for the rows and columns by creating a local to global mapping
00393                 int rindex = 0, cindex = 0;
00394                 for( int i = 0; i < nEntriesComm; ++i )
00395                 {
00396                     const int& vecRowValue = dataRowCols[i * 2];
00397                     const int& vecColValue = dataRowCols[i * 2 + 1];
00398
00399                     std::map< int, int >::iterator riter = rowMap.find( vecRowValue );
00400                     if( riter == rowMap.end() )
00401                     {
00402                         rowMap[vecRowValue] = rindexMax;
00403                         rindex              = rindexMax;
00404                         rindexMax++;
00405                     }
00406                     else
00407                         rindex = riter->second;
00408                     vecRow[i] = rindex;
00409
00410                     std::map< int, int >::iterator citer = colMap.find( vecColValue );
00411                     if( citer == colMap.end() )
00412                     {
00413                         colMap[vecColValue] = cindexMax;
00414                         cindex              = cindexMax;
00415                         cindexMax++;
00416                     }
00417                     else
00418                         cindex = citer->second;
00419                     vecCol[i] = cindex;
00420
00421                     sparseMatrix( rindex, cindex ) = vecS[i];
00422                 }
00423
00424                 // clear the local buffer
00425                 dataRowCols.clear();
00426             }  // if( nEntriesComm > 0 )
00427         }      // if( rank != rootProc )
00428
00429         MPI_Barrier( commW );
00430 #endif
00431     }
00432
00433 #ifdef MOAB_HAVE_MPI
00434     if( rank == rootProc )
00435 #endif
00436     {
00437         assert( nEntriesRemaining == 0 );
00438         ncMap->close();
00439     }
00440
00441     /**
00442      * Perform a series of checks to ensure that the local maps in each process still preserve map properties
00443      * Let us run our usual tests to verify that the map data has been distributed correctly
00444      *   1) Consistency check -- row sum = 1.0
00445      *   2) Disable conservation checks that require global column sums (and hence more communication/reduction)
00446      *   3) Perform monotonicity check and ensure failures globally are same as serial case
00447      */
00448     printf( "Rank %d: sparsematrix size = %d x %d\n", rank, sparseMatrix.GetRows(), sparseMatrix.GetColumns() );
00449
00450     // consistency: row sums
00451     {
00452         int isConsistentP = mapRemap.IsConsistent( dTolerance );
00453         if( 0 != isConsistentP )
00454         {
00455             printf( "Rank %d failed consistency checks...\n", rank );
00456         }
00457         CHECK_EQUAL( 0, isConsistentP );
00458     }
00459
00460     // conservation: we will disable this conservation check for now.
00461     // int isConservativeP = mapRemap.IsConservative( dTolerance );
00462     // CHECK_EQUAL( 0, isConservativeP );
00463
00464     // monotonicity: local failures
00465     {
00466         int isMonotoneP = mapRemap.IsMonotone( dTolerance );
00467         // Accumulate sum of failures to ensure that it is exactly same as serial case
00468         int isMonotoneG;
00469         mpierr = MPI_Allreduce( &isMonotoneP, &isMonotoneG, 1, MPI_INT, MPI_SUM, commW );
00470         CHECK_EQUAL( mpierr, 0 );
00471
00472         // 4600 monotonicity fails in serial and parallel for the test map file
00473         CHECK_EQUAL( 4600, isMonotoneG );
00474     }
00475
00476     delete ncMap;
00477
00478     return;
00479 }
00480
00481 void read_map_from_disk()
00482 {
00483     moab::ErrorCode rval;
00484     NcError error( NcError::verbose_nonfatal );
00485     MPI_Comm commW = MPI_COMM_WORLD;
00486     std::vector< int > tmp_owned_ids;
00487     double dTolerance = 1e-08;
00488
00489     std::string remap_weights_filename = TestDir + "unittest/outCS5ICOD5_map.nc";
00490
00491     moab::Interface* mb = new moab::Core();
00492     moab::ParallelComm pcomm( mb, commW );
00493
00494     moab::TempestRemapper remapper( mb, &pcomm );
00495     remapper.meshValidate     = true;
00496     remapper.constructEdgeMap = true;
00497
00498     // Do not create new filesets; Use the sets from our respective applications
00499     remapper.initialize( false );
00500
00501     moab::TempestOnlineMap onlinemap( &remapper );
00502
00503     rval = onlinemap.ReadParallelMap( remap_weights_filename.c_str(), tmp_owned_ids, true );
00504     CHECK_EQUAL( rval, moab::MB_SUCCESS );
00505
00506     // consistency: row sums
00507     int isConsistentP = onlinemap.IsConsistent( dTolerance );
00508     CHECK_EQUAL( 0, isConsistentP );
00509
00510     // conservation: we will disable this conservation check for now
00511     // since we do not have information about source and target areas
00512     // to accurately decipher conservation data satisfaction
00513     // int isConservativeP = onlinemap.IsConservative( dTolerance );
00514     // CHECK_EQUAL( 0, isConservativeP );
00515
00516     // monotonicity: global failures
00517     int isMonotoneP = onlinemap.IsMonotone( dTolerance );
00518     // Accumulate sum of failures to ensure that it is exactly same as serial case
00519     // 4600 fails in serial. What about parallel ? Check and confirm.
00520     CHECK_EQUAL( 4600, isMonotoneP );
00521 }
00522
00523 void test_tempest_map_bcast()
00524 {
00525     /*
00526      *
00527      * Load mapping file generated with CS5 and ICOD5 meshes with FV-FV
00528      * projection for field data. It has following attributes:
00529      *
00530      *      Analyzing map
00531      *      ..Per-dof consistency  (tol 1.00000e-08).. PASS
00532      *      ..Per-dof conservation (tol 1.00000e-08).. PASS
00533      *      ..Weights within range [-10,+10].. Done
00534      *      ..
00535      *      ..  Total nonzero entries: 8976
00536      *      ..   Column index min/max: 1 / 150 (150 source dofs)
00537      *      ..      Row index min/max: 1 / 252 (252 target dofs)
00538      *      ..      Source area / 4pi: 9.999999999991666e-01
00539      *      ..    Source area min/max: 7.758193626656797e-02 / 9.789674024202324e-02
00540      *      ..      Target area / 4pi: 9.999999999999928e-01
00541      *      ..    Target area min/max: 3.418848585325412e-02 / 5.362041648788460e-02
00542      *      ..    Source frac min/max: 9.999999999997851e-01 / 1.000000000003865e+00
00543      *      ..    Target frac min/max: 9.999999999993099e-01 / 1.000000000000784e+00
00544      *      ..    Map weights min/max: -2.062659472710135e-01 / 1.143815352351317e+00
00545      *      ..       Row sums min/max: 9.999999999993099e-01 / 1.000000000000784e+00
00546      *      ..   Consist. err min/max: -6.901146321069973e-13 / 7.838174553853605e-13
00547      *      ..    Col wt.sums min/max: 9.999999999997854e-01 / 1.000000000003865e+00
00548      *      ..   Conserv. err min/max: -2.146061106600428e-13 / 3.865352482534945e-12
00549      *      ..
00550      *      ..Histogram of nonzero entries in sparse matrix
00551      *      ....Column 1: Number of nonzero entries (bin minimum)
00552      *      ....Column 2: Number of columns with that many nonzero values
00553      *      ....Column 3: Number of rows with that many nonzero values
00554      *      ..[[25, 0, 2],[29, 0, 8],[30, 0, 10],[31, 150, 232]]
00555      *      ..
00556      *      ..Histogram of weights
00557      *      ....Column 1: Lower bound on weights
00558      *      ....Column 2: Upper bound on weights
00559      *      ....Column 3: # of weights in that bin
00560      *      ..[[-1, 0, 4577],[0, 1, 4365],[1, 2, 34]]
00561      *
00562      */
00563     NcError error( NcError::silent_nonfatal );
00564     MPI_Comm commW                = MPI_COMM_WORLD;
00565     const int rootProc            = 0;
00566     const std::string outFilename = inMapFilename;
00567     double dTolerance             = 1e-08;
00568     int mpierr                    = 0;
00569
00570     int nprocs, rank;
00571     MPI_Comm_size( MPI_COMM_WORLD, &nprocs );
00572     MPI_Comm_rank( MPI_COMM_WORLD, &rank );
00573
00574     const double dFillValueOverride = 0.0;
00575     OfflineMap mapRemap;
00576     if( rank == 0 )
00577     {
00578         std::cout << "Reading file: " << outFilename << std::endl;
00579         mapRemap.Read( outFilename );
00580         mapRemap.SetFillValueOverrideDbl( dFillValueOverride );
00581         mapRemap.SetFillValueOverride( static_cast< float >( dFillValueOverride ) );
00582
00583         int isConsistent = mapRemap.IsConsistent( dTolerance );
00584         CHECK_EQUAL( 0, isConsistent );
00585         int isConservative = mapRemap.IsConservative( dTolerance );
00586         CHECK_EQUAL( 0, isConservative );
00587         int isMonotone = mapRemap.IsMonotone( dTolerance );
00588         CHECK_EQUAL( 4600, isMonotone );
00589
00590         // verify that the source and target mesh areas are equal
00591         DataArray1D< double >& srcAreas = mapRemap.GetSourceAreas();
00592         DataArray1D< double >& tgtAreas = mapRemap.GetTargetAreas();
00593
00594         double totSrcArea = 0.0;
00595         for( size_t i = 0; i < srcAreas.GetRows(); ++i )
00596             totSrcArea += srcAreas[i];
00597
00598         double totTgtArea = 0.0;
00599         for( size_t i = 0; i < tgtAreas.GetRows(); ++i )
00600             totTgtArea += tgtAreas[i];
00601
00602         CHECK_REAL_EQUAL( totSrcArea, totTgtArea, 1e-10 );
00603     }
00604
00605     MPI_Barrier( commW );
00606
00607     SparseMatrix< double >& sparseMatrix = mapRemap.GetSparseMatrix();
00608
00609     // Get the row, col and entry data tupl
00610     // Retrieve all data from the map operator on root process
00611     DataArray1D< int > dataRowsGlobal, dataColsGlobal;
00612     DataArray1D< double > dataEntriesGlobal;
00613     if( rank == 0 )
00614     {
00615         sparseMatrix.GetEntries( dataRowsGlobal, dataColsGlobal, dataEntriesGlobal );
00616         std::cout << "Total NNZ in remap matrix = " << dataRowsGlobal.GetRows() << std::endl;
00617     }
00618
00619     int *rg = nullptr, *cg = nullptr;
00620     double* de               = nullptr;
00621     int nMapGlobalRowCols[3] = { sparseMatrix.GetRows(), sparseMatrix.GetColumns(),
00622                                  static_cast< int >( dataEntriesGlobal.GetRows() ) };
00623
00624     /* split the rows and send to processes in chunks */
00625     std::vector< int > rowAllocation;
00626     std::vector< int > sendCounts;
00627     std::vector< int > displs;
00628     const int NDATA = 3;
00629     if( rank == 0 )
00630     {
00631         rg = dataRowsGlobal;
00632         cg = dataColsGlobal;
00633         de = dataEntriesGlobal;
00634
00635         // Let us do a trivial partitioning of the row data
00636         rowAllocation.resize( nprocs * NDATA );
00637         displs.resize( nprocs );
00638         sendCounts.resize( nprocs );
00639
00640         int nRowPerPart = nMapGlobalRowCols[0] / nprocs;
00641         int remainder   = nMapGlobalRowCols[0] % nprocs;  // Keep the remainder in root
00642         // printf( "nRowPerPart = %d, remainder = %d\n", nRowPerPart, remainder );
00643         int sum = 0;
00644         for( int i = 0; i < nprocs; ++i )
00645         {
00646             rowAllocation[i * NDATA]     = nRowPerPart + ( i == 0 ? remainder : 0 );
00647             rowAllocation[i * NDATA + 1] = std::find( rg, rg + dataRowsGlobal.GetRows(), sum ) - rg;
00648             sum += rowAllocation[i * NDATA];
00649             displs[i] = rowAllocation[i * NDATA + 1];
00650         }
00651         for( int i = 0; i < nprocs - 1; ++i )
00652         {
00653             rowAllocation[i * NDATA + 2] = rowAllocation[( i + 1 ) * NDATA + 1] - rowAllocation[i * NDATA + 1];
00654             sendCounts[i]                = rowAllocation[i * NDATA + 2];
00655         }
00656         rowAllocation[( nprocs - 1 ) * NDATA + 2] = nMapGlobalRowCols[2] - displs[nprocs - 1];
00657         sendCounts[nprocs - 1]                    = rowAllocation[( nprocs - 1 ) * NDATA + 2];
00658
00659         // for( int i = 0; i < nprocs; i++ )
00660         //     printf( "sendcounts[%d] = %d, %d, %d\tdispls[%d] = %d, %d\n", i, rowAllocation[i * NDATA],
00661         //             rowAllocation[i * NDATA + 1], rowAllocation[i * NDATA + 2], i, displs[i], sendCounts[i] );
00662     }
00663
00664     mpierr = MPI_Bcast( nMapGlobalRowCols, 3, MPI_INT, rootProc, commW );
00665     CHECK_EQUAL( mpierr, 0 );
00666
00667     int allocationSize[NDATA] = { 0, 0, 0 };
00668     mpierr = MPI_Scatter( rowAllocation.data(), NDATA, MPI_INT, &allocationSize, NDATA, MPI_INT, rootProc, commW );
00669     CHECK_EQUAL( mpierr, 0 );
00670
00671     // create buffers to receive the data from root process
00672     DataArray1D< int > dataRows, dataCols;
00673     DataArray1D< double > dataEntries;
00674     dataRows.Allocate( allocationSize[2] );
00675     dataCols.Allocate( allocationSize[2] );
00676     dataEntries.Allocate( allocationSize[2] );
00677
00678     /* TODO: combine row and column scatters together. Save one communication by better packing */
00679     // Scatter the rows, cols and entries to the processes according to the partition
00680     mpierr = MPI_Scatterv( rg, sendCounts.data(), displs.data(), MPI_INT, (int*)( dataRows ), dataRows.GetByteSize(),
00681                            MPI_INT, rootProc, commW );
00682     CHECK_EQUAL( mpierr, 0 );
00683
00684     mpierr = MPI_Scatterv( cg, sendCounts.data(), displs.data(), MPI_INT, (int*)( dataCols ), dataCols.GetByteSize(),
00685                            MPI_INT, rootProc, commW );
00686     CHECK_EQUAL( mpierr, 0 );
00687
00688     mpierr = MPI_Scatterv( de, sendCounts.data(), displs.data(), MPI_DOUBLE, (double*)( dataEntries ),
00689                            dataEntries.GetByteSize(), MPI_DOUBLE, rootProc, commW );
00690     CHECK_EQUAL( mpierr, 0 );
00691
00692     // Compute an offset for the rows and columns by creating a local to global mapping
00693     std::vector< int > rowMap, colMap;
00694     for( int i = 0; i < allocationSize[2]; ++i )
00695     {
00696         int rindex, cindex;
00697         std::vector< int >::iterator riter = std::find( rowMap.begin(), rowMap.end(), dataRows[i] );
00698         if( riter == rowMap.end() )
00699         {
00700             rowMap.push_back( dataRows[i] );
00701             rindex = rowMap.size() - 1;
00702         }
00703         else
00704             rindex = riter - rowMap.begin();
00705         dataRows[i] = rindex;
00706
00707         std::vector< int >::iterator citer = std::find( colMap.begin(), colMap.end(), dataCols[i] );
00708         if( citer == colMap.end() )
00709         {
00710             colMap.push_back( dataCols[i] );
00711             cindex = colMap.size() - 1;
00712         }
00713         else
00714             cindex = citer - colMap.begin();
00715         dataCols[i] = cindex;
00716     }
00717
00718     // Now set the data received from root onto the sparse matrix
00719     sparseMatrix.SetEntries( dataRows, dataCols, dataEntries );
00720
00721     /**
00722      * Perform a series of checks to ensure that the local maps in each process still preserve map properties
00723      * Let us run our usual tests to verify that the map data has been distributed correctly
00724      *   1) Consistency check -- row sum = 1.0
00725      *   2) Disable conservation checks that require global column sums (and hence more communication/reduction)
00726      *   3) Perform monotonicity check and ensure failures globally are same as serial case
00727      */
00728
00729     // consistency: row sums
00730     int isConsistentP = mapRemap.IsConsistent( dTolerance );
00731     CHECK_EQUAL( 0, isConsistentP );
00732
00733     // conservation: we will disable this conservation check for now.
00734     // int isConservativeP = mapRemap.IsConservative( dTolerance );
00735     // CHECK_EQUAL( 0, isConservativeP );
00736
00737     // monotonicity: local failures
00738     int isMonotoneP = mapRemap.IsMonotone( dTolerance );
00739     // Accumulate sum of failures to ensure that it is exactly same as serial case
00740     int isMonotoneG;
00741     mpierr = MPI_Allreduce( &isMonotoneP, &isMonotoneG, 1, MPI_INT, MPI_SUM, commW );
00742     CHECK_EQUAL( mpierr, 0 );
00743     // 4600 fails in serial. What about parallel ? Check and confirm.
00744     CHECK_EQUAL( 4600, isMonotoneG );
00745
00746     return;
00747 }