MOAB: Mesh Oriented datABase
(version 5.2.1)
|
00001 /** 00002 * @file Read and distribute mapping weights to multiple processes 00003 * @author Vijay Mahadevan (mahadevan@anl.gov) 00004 * @brief 00005 * @version 0.1 00006 * @date 2020-07-13 00007 * 00008 */ 00009 00010 #include <iostream> 00011 #include <vector> 00012 #include "moab/Core.hpp" 00013 00014 #ifndef MOAB_HAVE_TEMPESTREMAP 00015 #error Need TempestRemap dependency for this test 00016 #endif 00017 00018 #include "OfflineMap.h" 00019 #include "moab/Remapping/TempestRemapper.hpp" 00020 #include "moab/ParallelComm.hpp" 00021 00022 #ifndef IS_BUILDING_MB 00023 #define IS_BUILDING_MB 00024 #endif 00025 00026 #include "Internals.hpp" 00027 #include "TestRunner.hpp" 00028 00029 #include <netcdf.h> 00030 #include <netcdf_par.h> 00031 00032 using namespace moab; 00033 00034 std::string inMapFilename = "outCS5ICOD5_map.nc"; 00035 00036 void test_tempest_map_bcast(); 00037 void read_buffered_map(); 00038 00039 int main( int argc, char** argv ) 00040 { 00041 if( argc > 1 ) 00042 inMapFilename = std::string( argv[1] ); 00043 else 00044 inMapFilename = TestDir + "/" + inMapFilename; 00045 00046 REGISTER_TEST( test_tempest_map_bcast ); 00047 REGISTER_TEST( read_buffered_map ); 00048 00049 #ifdef MOAB_HAVE_MPI 00050 MPI_Init( &argc, &argv ); 00051 #endif 00052 int result = RUN_TESTS( argc, argv ); 00053 #ifdef MOAB_HAVE_MPI 00054 MPI_Finalize(); 00055 #endif 00056 return result; 00057 } 00058 00059 00060 #define NCERROREXIT( err, MSG ) \ 00061 if( err ) \ 00062 { \ 00063 std::cout << "Error (" << err << "): " << MSG; \ 00064 std::cout << "\nAborting with message: " << nc_strerror( err ) << "... \n"; \ 00065 return; \ 00066 } 00067 00068 00069 void read_buffered_map() 00070 { 00071 NcError error( NcError::silent_nonfatal ); 00072 00073 // Define our total buffer size to use 00074 NcFile* ncMap = NULL; 00075 NcVar *varRow = NULL, *varCol = NULL, *varS = NULL; 00076 00077 #ifdef MOAB_HAVE_MPI 00078 int mpierr = 0; 00079 const int rootProc = 0; 00080 #endif 00081 // Main inputs; 00082 // 1. Map Filename 00083 // 2. Ownership info from the processes to be gathered on root 00084 const int nBufferSize = 64 * 1024; // 64 KB 00085 const int nNNZBytes = 2 * sizeof( int ) + sizeof( double ); 00086 const int nMaxEntries = nBufferSize / nNNZBytes; 00087 int nA = 0, nB = 0, nVA = 0, nVB = 0, nS = 0; 00088 double dTolerance = 1e-08; 00089 00090 int nprocs = 1, rank = 0; 00091 #ifdef MOAB_HAVE_MPI 00092 MPI_Comm commW = MPI_COMM_WORLD; 00093 MPI_Comm_size( commW, &nprocs ); 00094 MPI_Comm_rank( commW, &rank ); 00095 #endif 00096 00097 #ifdef MOAB_HAVE_MPI 00098 if( rank == rootProc ) 00099 #endif 00100 { 00101 ncMap = new NcFile( inMapFilename.c_str(), NcFile::ReadOnly ); 00102 if( !ncMap->is_valid() ) { _EXCEPTION1( "Unable to open input map file \"%s\"", inMapFilename.c_str() ); } 00103 00104 // Source and Target mesh resolutions 00105 NcDim* dimNA = ncMap->get_dim( "n_a" ); 00106 if( dimNA == NULL ) { _EXCEPTIONT( "Input map missing dimension \"n_a\"" ); } 00107 else nA = dimNA->size(); 00108 00109 NcDim* dimNB = ncMap->get_dim( "n_b" ); 00110 if( dimNB == NULL ) { _EXCEPTIONT( "Input map missing dimension \"n_b\"" ); } 00111 else nB = dimNB->size(); 00112 00113 NcDim* dimNVA = ncMap->get_dim( "nv_a" ); 00114 if( dimNVA == NULL ) { _EXCEPTIONT( "Input map missing dimension \"nv_a\"" ); } 00115 else nVA = dimNVA->size(); 00116 00117 NcDim* dimNVB = ncMap->get_dim( "nv_b" ); 00118 if( dimNVB == NULL ) { _EXCEPTIONT( "Input map missing dimension \"nv_b\"" ); } 00119 else nVB = dimNVB->size(); 00120 00121 // Read SparseMatrix entries 00122 NcDim* dimNS = ncMap->get_dim( "n_s" ); 00123 if( dimNS == NULL ) 00124 { 00125 _EXCEPTION1( "Map file \"%s\" does not contain dimension \"n_s\"", inMapFilename.c_str() ); 00126 } 00127 else 00128 nS = dimNS->size(); // store total number of nonzeros 00129 00130 varRow = ncMap->get_var( "row" ); 00131 if( varRow == NULL ) 00132 { _EXCEPTION1( "Map file \"%s\" does not contain variable \"row\"", inMapFilename.c_str() ); } 00133 00134 varCol = ncMap->get_var( "col" ); 00135 if( varCol == NULL ) 00136 { 00137 _EXCEPTION1( "Map file \"%s\" does not contain variable \"col\"", inMapFilename.c_str() ); 00138 } 00139 00140 varS = ncMap->get_var( "S" ); 00141 if( varS == NULL ) { _EXCEPTION1( "Map file \"%s\" does not contain variable \"S\"", inMapFilename.c_str() ); } 00142 } 00143 00144 // const int nTotalBytes = nS * nNNZBytes; 00145 int nEntriesRemaining = nS; 00146 int nBufferedReads = static_cast< int >( ceil( ( 1.0 * nS ) / ( 1.0 * nMaxEntries ) ) ); 00147 int runData[6] = { nA, nB, nVA, nVB, nS, nBufferedReads }; 00148 00149 mpierr = MPI_Bcast( runData, 6, MPI_INT, rootProc, commW ); 00150 CHECK_EQUAL( mpierr, 0 ); 00151 00152 #ifdef MOAB_HAVE_MPI 00153 if( rank != rootProc ) 00154 { 00155 nA = runData[0]; 00156 nB = runData[1]; 00157 nVA = runData[2]; 00158 nVB = runData[3]; 00159 nS = runData[4]; 00160 nBufferedReads = runData[5]; 00161 } 00162 else 00163 printf( "Global parameters: nA = %d, nB = %d, nS = %d\n", nA, nB, nS ); 00164 #else 00165 printf( "Global parameters: nA = %d, nB = %d, nS = %d\n", nA, nB, nS ); 00166 #endif 00167 00168 std::vector<int> rowOwnership( nprocs ); 00169 int nGRowPerPart = nB / nprocs; 00170 int nGRowRemainder = nB % nprocs; // Keep the remainder in root 00171 rowOwnership[0] = nGRowPerPart + nGRowRemainder; 00172 for( int ip = 1, roffset = rowOwnership[0]; ip < nprocs; ++ip ) 00173 { 00174 roffset += nGRowPerPart; 00175 rowOwnership[ip] = roffset; 00176 } 00177 00178 // Let us declare the map object for every process 00179 OfflineMap mapRemap; 00180 SparseMatrix< double >& sparseMatrix = mapRemap.GetSparseMatrix(); 00181 00182 #ifdef MOAB_HAVE_MPI 00183 if( rank == rootProc ) 00184 #endif 00185 printf( "Parameters: nA=%d, nB=%d, nVA=%d, nVB=%d, nS=%d, nNNZBytes = %d, nBufferedReads = %d\n", nA, nB, nVA, 00186 nVB, nS, nNNZBytes, nBufferedReads ); 00187 00188 std::map< int, int > rowMap, colMap; 00189 int rindexMax = 0, cindexMax = 0; 00190 long offset = 0; 00191 00192 /* Split the rows and send to processes in chunks 00193 Let us start the buffered read */ 00194 for( int iRead = 0; iRead < nBufferedReads; ++iRead ) 00195 { 00196 // pointers to the data 00197 DataArray1D< int > vecRow; 00198 DataArray1D< int > vecCol; 00199 DataArray1D< double > vecS; 00200 std::vector< std::vector< int > > dataPerProcess( nprocs ); 00201 std::vector< int > nDataPerProcess( nprocs, 0 ); 00202 for( int ip = 0; ip < nprocs; ++ip ) 00203 dataPerProcess[ip].reserve( std::max( 100, nMaxEntries / nprocs ) ); 00204 00205 #ifdef MOAB_HAVE_MPI 00206 if( rank == rootProc ) 00207 #endif 00208 { 00209 int nLocSize = std::min( nEntriesRemaining, static_cast< int >( ceil( nBufferSize * 1.0 / nNNZBytes ) ) ); 00210 00211 printf( "Reading file: elements %ld to %ld\n", offset, offset + nLocSize ); 00212 00213 // Allocate and resize based on local buffer size 00214 vecRow.Allocate( nLocSize ); 00215 vecCol.Allocate( nLocSize ); 00216 vecS.Allocate( nLocSize ); 00217 00218 varRow->set_cur( (long)( offset ) ); 00219 varRow->get( &( vecRow[0] ), nLocSize ); 00220 00221 varCol->set_cur( (long)( offset ) ); 00222 varCol->get( &( vecCol[0] ), nLocSize ); 00223 00224 varS->set_cur( (long)( offset ) ); 00225 varS->get( &( vecS[0] ), nLocSize ); 00226 00227 // Decrement vecRow and vecCol 00228 for( size_t i = 0; i < vecRow.GetRows(); i++ ) 00229 { 00230 vecRow[i]--; 00231 vecCol[i]--; 00232 00233 // TODO: Fix this linear search to compute process ownership 00234 int pOwner = -1; 00235 if( rowOwnership[0] > vecRow[i] ) 00236 pOwner = 0; 00237 else 00238 { 00239 for( int ip = 1; ip < nprocs; ++ip ) 00240 { 00241 if( rowOwnership[ip - 1] <= vecRow[i] && rowOwnership[ip] > vecRow[i] ) 00242 { 00243 pOwner = ip; 00244 break; 00245 } 00246 } 00247 } 00248 00249 assert( pOwner >= 0 && pOwner < nprocs ); 00250 dataPerProcess[pOwner].push_back( i ); 00251 } 00252 00253 offset += nLocSize; 00254 nEntriesRemaining -= nLocSize; 00255 00256 for( int ip = 0; ip < nprocs; ++ip ) 00257 nDataPerProcess[ip] = dataPerProcess[ip].size(); 00258 } 00259 00260 // Communicate the number of DoF data to expect from root 00261 int nEntriesComm = 0; 00262 mpierr = MPI_Scatter( nDataPerProcess.data(), 1, MPI_INT, &nEntriesComm, 1, MPI_INT, rootProc, commW ); 00263 CHECK_EQUAL( mpierr, 0 ); 00264 00265 #ifdef MOAB_HAVE_MPI 00266 if( rank == rootProc ) 00267 #endif 00268 { 00269 std::vector< MPI_Request > cRequests; 00270 std::vector< MPI_Status > cStats( 2 * nprocs - 2 ); 00271 cRequests.reserve( 2 * nprocs - 2 ); 00272 00273 // First send out all the relevant data buffers to other process 00274 std::vector< std::vector< int > > dataRowColsAll( nprocs ); 00275 std::vector< std::vector< double > > dataEntriesAll( nprocs ); 00276 for( int ip = 1; ip < nprocs; ++ip ) 00277 { 00278 const int nDPP = nDataPerProcess[ip]; 00279 if( nDPP > 0 ) 00280 { 00281 std::vector< int >& dataRowCols = dataRowColsAll[ip]; 00282 dataRowCols.resize( 2 * nDPP ); 00283 std::vector< double >& dataEntries = dataEntriesAll[ip]; 00284 dataEntries.resize( nDPP ); 00285 00286 for( int ij = 0; ij < nDPP; ++ij ) 00287 { 00288 dataRowCols[ij * 2] = vecRow[dataPerProcess[ip][ij]]; 00289 dataRowCols[ij * 2 + 1] = vecCol[dataPerProcess[ip][ij]]; 00290 dataEntries[ij] = vecS[dataPerProcess[ip][ij]]; 00291 } 00292 00293 MPI_Request rcsend, dsend; 00294 mpierr = MPI_Isend( dataRowCols.data(), 2 * nDPP, MPI_INT, ip, iRead * 1000, commW, &rcsend ); 00295 CHECK_EQUAL( mpierr, 0 ); 00296 mpierr = MPI_Isend( dataEntries.data(), nDPP, MPI_DOUBLE, ip, iRead * 1000 + 1, commW, &dsend ); 00297 CHECK_EQUAL( mpierr, 0 ); 00298 00299 cRequests.push_back( rcsend ); 00300 cRequests.push_back( dsend ); 00301 } 00302 } 00303 00304 // Next perform all necessary local work while we wait for the buffers to be sent out 00305 // Compute an offset for the rows and columns by creating a local to global mapping for rootProc 00306 int rindex = 0, cindex = 0; 00307 assert( dataPerProcess[0].size() - nEntriesComm == 0 ); // sanity check 00308 for( int i = 0; i < nEntriesComm; ++i ) 00309 { 00310 const int& vecRowValue = vecRow[dataPerProcess[0][i]]; 00311 const int& vecColValue = vecCol[dataPerProcess[0][i]]; 00312 00313 std::map< int, int >::iterator riter = rowMap.find( vecRowValue ); 00314 if( riter == rowMap.end() ) 00315 { 00316 rowMap[vecRowValue] = rindexMax; 00317 rindex = rindexMax; 00318 rindexMax++; 00319 } 00320 else 00321 rindex = riter->second; 00322 00323 std::map< int, int >::iterator citer = colMap.find( vecColValue ); 00324 if( citer == colMap.end() ) 00325 { 00326 colMap[vecColValue] = cindexMax; 00327 cindex = cindexMax; 00328 cindexMax++; 00329 } 00330 else 00331 cindex = citer->second; 00332 00333 sparseMatrix( rindex, cindex ) = vecS[i]; 00334 } 00335 00336 // Wait until all communication is pushed out 00337 mpierr = MPI_Waitall( cRequests.size(), cRequests.data(), cStats.data() ); 00338 CHECK_EQUAL( mpierr, 0 ); 00339 } // if( rank == rootProc ) 00340 #ifdef MOAB_HAVE_MPI 00341 else 00342 { 00343 if( nEntriesComm > 0 ) 00344 { 00345 MPI_Request cRequests[2]; 00346 MPI_Status cStats[2]; 00347 /* code */ 00348 // create buffers to receive the data from root process 00349 std::vector< int > dataRowCols( 2 * nEntriesComm ); 00350 vecRow.Allocate( nEntriesComm ); 00351 vecCol.Allocate( nEntriesComm ); 00352 vecS.Allocate( nEntriesComm ); 00353 00354 /* TODO: combine row and column scatters together. Save one communication by better packing */ 00355 // Scatter the rows, cols and entries to the processes according to the partition 00356 mpierr = MPI_Irecv( dataRowCols.data(), 2 * nEntriesComm, MPI_INT, rootProc, iRead * 1000, commW, 00357 &cRequests[0] ); 00358 CHECK_EQUAL( mpierr, 0 ); 00359 00360 mpierr = MPI_Irecv( vecS, nEntriesComm, MPI_DOUBLE, rootProc, iRead * 1000 + 1, commW, &cRequests[1] ); 00361 CHECK_EQUAL( mpierr, 0 ); 00362 00363 // Wait until all communication is pushed out 00364 mpierr = MPI_Waitall( 2, cRequests, cStats ); 00365 CHECK_EQUAL( mpierr, 0 ); 00366 00367 // Compute an offset for the rows and columns by creating a local to global mapping 00368 int rindex = 0, cindex = 0; 00369 for( int i = 0; i < nEntriesComm; ++i ) 00370 { 00371 const int& vecRowValue = dataRowCols[i * 2]; 00372 const int& vecColValue = dataRowCols[i * 2 + 1]; 00373 00374 std::map< int, int >::iterator riter = rowMap.find( vecRowValue ); 00375 if( riter == rowMap.end() ) 00376 { 00377 rowMap[vecRowValue] = rindexMax; 00378 rindex = rindexMax; 00379 rindexMax++; 00380 } 00381 else 00382 rindex = riter->second; 00383 vecRow[i] = rindex; 00384 00385 std::map< int, int >::iterator citer = colMap.find( vecColValue ); 00386 if( citer == colMap.end() ) 00387 { 00388 colMap[vecColValue] = cindexMax; 00389 cindex = cindexMax; 00390 cindexMax++; 00391 } 00392 else 00393 cindex = citer->second; 00394 vecCol[i] = cindex; 00395 00396 sparseMatrix( rindex, cindex ) = vecS[i]; 00397 } 00398 00399 // clear the local buffer 00400 dataRowCols.clear(); 00401 } // if( nEntriesComm > 0 ) 00402 } // if( rank != rootProc ) 00403 00404 MPI_Barrier( commW ); 00405 #endif 00406 } 00407 00408 #ifdef MOAB_HAVE_MPI 00409 if( rank == rootProc ) 00410 #endif 00411 { 00412 assert( nEntriesRemaining == 0 ); 00413 ncMap->close(); 00414 } 00415 00416 /** 00417 * Perform a series of checks to ensure that the local maps in each process still preserve map properties 00418 * Let us run our usual tests to verify that the map data has been distributed correctly 00419 * 1) Consistency check -- row sum = 1.0 00420 * 2) Disable conservation checks that require global column sums (and hence more communication/reduction) 00421 * 3) Perform monotonicity check and ensure failures globally are same as serial case 00422 */ 00423 printf( "Rank %d: sparsematrix size = %d x %d\n", rank, sparseMatrix.GetRows(), sparseMatrix.GetColumns() ); 00424 00425 // consistency: row sums 00426 { 00427 int isConsistentP = mapRemap.IsConsistent( dTolerance ); 00428 if( 0 != isConsistentP ) { printf( "Rank %d failed consistency checks...\n", rank ); } 00429 CHECK_EQUAL( 0, isConsistentP ); 00430 } 00431 00432 // conservation: we will disable this conservation check for now. 00433 // int isConservativeP = mapRemap.IsConservative( dTolerance ); 00434 // CHECK_EQUAL( 0, isConservativeP ); 00435 00436 // monotonicity: local failures 00437 { 00438 int isMonotoneP = mapRemap.IsMonotone( dTolerance ); 00439 // Accumulate sum of failures to ensure that it is exactly same as serial case 00440 int isMonotoneG; 00441 mpierr = MPI_Allreduce( &isMonotoneP, &isMonotoneG, 1, MPI_INT, MPI_SUM, commW ); 00442 CHECK_EQUAL( mpierr, 0 ); 00443 00444 // 4600 monotonicity fails in serial and parallel for the test map file 00445 CHECK_EQUAL( 4600, isMonotoneG ); 00446 } 00447 00448 delete ncMap; 00449 00450 return; 00451 } 00452 00453 void test_tempest_map_bcast() 00454 { 00455 /* 00456 * 00457 * Load mapping file generated with CS5 and ICOD5 meshes with FV-FV 00458 * projection for field data. It has following attributes: 00459 * 00460 * Analyzing map 00461 * ..Per-dof consistency (tol 1.00000e-08).. PASS 00462 * ..Per-dof conservation (tol 1.00000e-08).. PASS 00463 * ..Weights within range [-10,+10].. Done 00464 * .. 00465 * .. Total nonzero entries: 8976 00466 * .. Column index min/max: 1 / 150 (150 source dofs) 00467 * .. Row index min/max: 1 / 252 (252 target dofs) 00468 * .. Source area / 4pi: 9.999999999991666e-01 00469 * .. Source area min/max: 7.758193626656797e-02 / 9.789674024202324e-02 00470 * .. Target area / 4pi: 9.999999999999928e-01 00471 * .. Target area min/max: 3.418848585325412e-02 / 5.362041648788460e-02 00472 * .. Source frac min/max: 9.999999999997851e-01 / 1.000000000003865e+00 00473 * .. Target frac min/max: 9.999999999993099e-01 / 1.000000000000784e+00 00474 * .. Map weights min/max: -2.062659472710135e-01 / 1.143815352351317e+00 00475 * .. Row sums min/max: 9.999999999993099e-01 / 1.000000000000784e+00 00476 * .. Consist. err min/max: -6.901146321069973e-13 / 7.838174553853605e-13 00477 * .. Col wt.sums min/max: 9.999999999997854e-01 / 1.000000000003865e+00 00478 * .. Conserv. err min/max: -2.146061106600428e-13 / 3.865352482534945e-12 00479 * .. 00480 * ..Histogram of nonzero entries in sparse matrix 00481 * ....Column 1: Number of nonzero entries (bin minimum) 00482 * ....Column 2: Number of columns with that many nonzero values 00483 * ....Column 3: Number of rows with that many nonzero values 00484 * ..[[25, 0, 2],[29, 0, 8],[30, 0, 10],[31, 150, 232]] 00485 * .. 00486 * ..Histogram of weights 00487 * ....Column 1: Lower bound on weights 00488 * ....Column 2: Upper bound on weights 00489 * ....Column 3: # of weights in that bin 00490 * ..[[-1, 0, 4577],[0, 1, 4365],[1, 2, 34]] 00491 * 00492 */ 00493 NcError error( NcError::silent_nonfatal ); 00494 MPI_Comm commW = MPI_COMM_WORLD; 00495 const int rootProc = 0; 00496 const std::string outFilename = inMapFilename; 00497 double dTolerance = 1e-08; 00498 int mpierr = 0; 00499 00500 int nprocs, rank; 00501 MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 00502 MPI_Comm_rank( MPI_COMM_WORLD, &rank ); 00503 00504 const double dFillValueOverride = 0.0; 00505 OfflineMap mapRemap; 00506 if( rank == 0 ) 00507 { 00508 std::cout << "Reading file: " << outFilename << std::endl; 00509 mapRemap.Read( outFilename ); 00510 mapRemap.SetFillValueOverrideDbl( dFillValueOverride ); 00511 mapRemap.SetFillValueOverride( static_cast< float >( dFillValueOverride ) ); 00512 00513 int isConsistent = mapRemap.IsConsistent( dTolerance ); 00514 CHECK_EQUAL( 0, isConsistent ); 00515 int isConservative = mapRemap.IsConservative( dTolerance ); 00516 CHECK_EQUAL( 0, isConservative ); 00517 int isMonotone = mapRemap.IsMonotone( dTolerance ); 00518 CHECK_EQUAL( 4600, isMonotone ); 00519 00520 // verify that the source and target mesh areas are equal 00521 DataArray1D< double >& srcAreas = mapRemap.GetSourceAreas(); 00522 DataArray1D< double >& tgtAreas = mapRemap.GetTargetAreas(); 00523 00524 double totSrcArea = 0.0; 00525 for( size_t i = 0; i < srcAreas.GetRows(); ++i ) 00526 totSrcArea += srcAreas[i]; 00527 00528 double totTgtArea = 0.0; 00529 for( size_t i = 0; i < tgtAreas.GetRows(); ++i ) 00530 totTgtArea += tgtAreas[i]; 00531 00532 CHECK_REAL_EQUAL( totSrcArea, totTgtArea, 1e-10 ); 00533 } 00534 00535 MPI_Barrier( commW ); 00536 00537 SparseMatrix< double >& sparseMatrix = mapRemap.GetSparseMatrix(); 00538 00539 // Get the row, col and entry data tupl 00540 // Retrieve all data from the map operator on root process 00541 DataArray1D< int > dataRowsGlobal, dataColsGlobal; 00542 DataArray1D< double > dataEntriesGlobal; 00543 if( rank == 0 ) 00544 { 00545 sparseMatrix.GetEntries( dataRowsGlobal, dataColsGlobal, dataEntriesGlobal ); 00546 std::cout << "Total NNZ in remap matrix = " << dataRowsGlobal.GetRows() << std::endl; 00547 } 00548 00549 int *rg, *cg; 00550 double* de; 00551 int nMapGlobalRowCols[3] = { sparseMatrix.GetRows(), sparseMatrix.GetColumns(), 00552 static_cast< int >( dataEntriesGlobal.GetRows() ) }; 00553 00554 /* split the rows and send to processes in chunks */ 00555 std::vector< int > rowAllocation; 00556 std::vector< int > sendCounts; 00557 std::vector< int > displs; 00558 const int NDATA = 3; 00559 if( rank == 0 ) 00560 { 00561 rg = dataRowsGlobal; 00562 cg = dataColsGlobal; 00563 de = dataEntriesGlobal; 00564 00565 // Let us do a trivial partitioning of the row data 00566 rowAllocation.resize( nprocs * NDATA ); 00567 displs.resize( nprocs ); 00568 sendCounts.resize( nprocs ); 00569 00570 int nRowPerPart = nMapGlobalRowCols[0] / nprocs; 00571 int remainder = nMapGlobalRowCols[0] % nprocs; // Keep the remainder in root 00572 // printf( "nRowPerPart = %d, remainder = %d\n", nRowPerPart, remainder ); 00573 int sum = 0; 00574 for( int i = 0; i < nprocs; ++i ) 00575 { 00576 rowAllocation[i * NDATA] = nRowPerPart + ( i == 0 ? remainder : 0 ); 00577 rowAllocation[i * NDATA + 1] = std::find( rg, rg + dataRowsGlobal.GetRows(), sum ) - rg; 00578 sum += rowAllocation[i * NDATA]; 00579 displs[i] = rowAllocation[i * NDATA + 1]; 00580 } 00581 for( int i = 0; i < nprocs - 1; ++i ) 00582 { 00583 rowAllocation[i * NDATA + 2] = rowAllocation[( i + 1 ) * NDATA + 1] - rowAllocation[i * NDATA + 1]; 00584 sendCounts[i] = rowAllocation[i * NDATA + 2]; 00585 } 00586 rowAllocation[( nprocs - 1 ) * NDATA + 2] = nMapGlobalRowCols[2] - displs[nprocs - 1]; 00587 sendCounts[nprocs - 1] = rowAllocation[( nprocs - 1 ) * NDATA + 2]; 00588 00589 // for( int i = 0; i < nprocs; i++ ) 00590 // printf( "sendcounts[%d] = %d, %d, %d\tdispls[%d] = %d, %d\n", i, rowAllocation[i * NDATA], 00591 // rowAllocation[i * NDATA + 1], rowAllocation[i * NDATA + 2], i, displs[i], sendCounts[i] ); 00592 } 00593 00594 mpierr = MPI_Bcast( nMapGlobalRowCols, 3, MPI_INT, rootProc, commW ); 00595 CHECK_EQUAL( mpierr, 0 ); 00596 00597 int allocationSize[NDATA] = { 0, 0, 0 }; 00598 mpierr = 00599 MPI_Scatter( rowAllocation.data(), NDATA, MPI_INT, &allocationSize, NDATA, MPI_INT, rootProc, commW ); 00600 CHECK_EQUAL( mpierr, 0 ); 00601 00602 // create buffers to receive the data from root process 00603 DataArray1D< int > dataRows, dataCols; 00604 DataArray1D< double > dataEntries; 00605 dataRows.Allocate( allocationSize[2] ); 00606 dataCols.Allocate( allocationSize[2] ); 00607 dataEntries.Allocate( allocationSize[2] ); 00608 00609 /* TODO: combine row and column scatters together. Save one communication by better packing */ 00610 // Scatter the rows, cols and entries to the processes according to the partition 00611 mpierr = MPI_Scatterv( rg, sendCounts.data(), displs.data(), MPI_INT, (int*)( dataRows ), 00612 dataRows.GetByteSize(), MPI_INT, rootProc, commW ); 00613 CHECK_EQUAL( mpierr, 0 ); 00614 00615 mpierr = MPI_Scatterv( cg, sendCounts.data(), displs.data(), MPI_INT, (int*)( dataCols ), 00616 dataCols.GetByteSize(), MPI_INT, rootProc, commW ); 00617 CHECK_EQUAL( mpierr, 0 ); 00618 00619 mpierr = MPI_Scatterv( de, sendCounts.data(), displs.data(), MPI_DOUBLE, (double*)( dataEntries ), 00620 dataEntries.GetByteSize(), MPI_DOUBLE, rootProc, commW ); 00621 CHECK_EQUAL( mpierr, 0 ); 00622 00623 // Compute an offset for the rows and columns by creating a local to global mapping 00624 std::vector< int > rowMap, colMap; 00625 for( int i = 0; i < allocationSize[2]; ++i ) 00626 { 00627 int rindex, cindex; 00628 std::vector< int >::iterator riter = std::find( rowMap.begin(), rowMap.end(), dataRows[i] ); 00629 if( riter == rowMap.end() ) 00630 { 00631 rowMap.push_back( dataRows[i] ); 00632 rindex = rowMap.size() - 1; 00633 } 00634 else 00635 rindex = riter - rowMap.begin(); 00636 dataRows[i] = rindex; 00637 00638 std::vector< int >::iterator citer = std::find( colMap.begin(), colMap.end(), dataCols[i] ); 00639 if( citer == colMap.end() ) 00640 { 00641 colMap.push_back( dataCols[i] ); 00642 cindex = colMap.size() - 1; 00643 } 00644 else 00645 cindex = citer - colMap.begin(); 00646 dataCols[i] = cindex; 00647 } 00648 00649 // Now set the data received from root onto the sparse matrix 00650 sparseMatrix.SetEntries( dataRows, dataCols, dataEntries ); 00651 00652 /** 00653 * Perform a series of checks to ensure that the local maps in each process still preserve map properties 00654 * Let us run our usual tests to verify that the map data has been distributed correctly 00655 * 1) Consistency check -- row sum = 1.0 00656 * 2) Disable conservation checks that require global column sums (and hence more communication/reduction) 00657 * 3) Perform monotonicity check and ensure failures globally are same as serial case 00658 */ 00659 00660 // consistency: row sums 00661 int isConsistentP = mapRemap.IsConsistent( dTolerance ); 00662 CHECK_EQUAL( 0, isConsistentP ); 00663 00664 // conservation: we will disable this conservation check for now. 00665 // int isConservativeP = mapRemap.IsConservative( dTolerance ); 00666 // CHECK_EQUAL( 0, isConservativeP ); 00667 00668 // monotonicity: local failures 00669 int isMonotoneP = mapRemap.IsMonotone( dTolerance ); 00670 // Accumulate sum of failures to ensure that it is exactly same as serial case 00671 int isMonotoneG; 00672 mpierr = MPI_Allreduce( &isMonotoneP, &isMonotoneG, 1, MPI_INT, MPI_SUM, commW ); 00673 CHECK_EQUAL( mpierr, 0 ); 00674 // 4600 fails in serial. What about parallel ? Check and confirm. 00675 CHECK_EQUAL( 4600, isMonotoneG ); 00676 00677 return; 00678 }