Mesh Oriented datABase
(version 5.4.1)
Array-based unstructured mesh datastructure
|
00001 /* 00002 * This example will show one of the building blocks of parallel infrastructure in MOAB 00003 * More exactly, if we have some homogeneous data to communicate from each processor to a list of 00004 * other processors, how do we do it? 00005 * 00006 * introduce the TupleList and crystal router to MOAB users. 00007 * 00008 * This technology is used in resolving shared vertices / sets between partitions 00009 * It is used in the mbcoupler for sending data (target points) to the proper processor, and 00010 * communicate back the results. Also, it is used to communicate departure mesh for intersection in 00011 * parallel 00012 * 00013 * It is a way of doing MPI_gatheralltoallv(), when the communication matrix is sparse 00014 * 00015 * It is assumed that every proc needs to communicate only with a few of the other processors. 00016 * If every processor needs to communicate with all other, then we will have to use paired isend 00017 * and irecv, the communication matrix is full 00018 * 00019 * the example needs to be launched in parallel. 00020 * Every proc will build a list of tuples, that will be send to a few procs; 00021 * In general, we will send to num_comms tasks, and about num_tuples to each task 00022 * We vary num_comms and num_tuples for processor 00023 * 00024 * we will send long ints of the form 00025 * 100000 * send + 1000* rank +j, where j is the index of tuple 00026 * 00027 * after routing, we verify we received 00028 * 100000 * rank + 1000 * from 00029 * 00030 * For some reportrank we also print the tuples. 00031 * 00032 * after routing, we will see if we received, as expected. Should run on at least 2 processors. 00033 * 00034 * Note: We do not need a moab instance for this example 00035 * 00036 */ 00037 00038 /** @example CrystalRouterExample.cpp \n 00039 * \brief generalized gather scatter using tuples \n 00040 * <b>To run</b>: mpiexec -np <n> CrystalRouterExample -r [reportrank] -t [num_tuples] -n 00041 * [num_comms] \n 00042 * 00043 */ 00044 // 00045 #include "moab/MOABConfig.h" 00046 #ifdef MOAB_HAVE_MPI 00047 #include "moab/ProcConfig.hpp" 00048 #endif 00049 #include "moab/TupleList.hpp" 00050 #include "moab/ProgOptions.hpp" 00051 #include "moab/ErrorHandler.hpp" 00052 #include <ctime> 00053 #include <iostream> 00054 #include <sstream> 00055 00056 const char BRIEF_DESC[] = "Example of gather scatter with tuple lists \n"; 00057 std::ostringstream LONG_DESC; 00058 00059 using namespace moab; 00060 using namespace std; 00061 00062 int main( int argc, char** argv ) 00063 { 00064 #ifdef MOAB_HAVE_MPI 00065 MPI_Init( &argc, &argv ); 00066 00067 // Initialize error handler, required for this example (not using a moab instance) 00068 MBErrorHandler_Init(); 00069 00070 ProcConfig pc( MPI_COMM_WORLD ); 00071 int size = pc.proc_size(); 00072 int rank = pc.proc_rank(); 00073 00074 // Start copy 00075 LONG_DESC << "This program does a gather scatter with a list of tuples. \n" 00076 " It tries to see how much communication costs in terms of time and memory. \n" 00077 << "It starts with creating a list of tuples to be sent from each processor, \n to a " 00078 "list of other processors.\n" 00079 << "The number of tuples and how many tasks to communicate to are controlled by " 00080 "input parameters.\n" 00081 << "After communication, we verify locally if we received what we expected. \n"; 00082 ProgOptions opts( LONG_DESC.str(), BRIEF_DESC ); 00083 00084 // How many procs communicate to current proc, on average (we will vary that too) 00085 int num_comms = 2; 00086 opts.addOpt< int >( "num_comms,n", "each task will send to about num_comms other tasks some tuples (default 2)", 00087 &num_comms ); 00088 00089 int num_tuples = 4; 00090 opts.addOpt< int >( "num_tuples,t", "each task will send to some task about num_tuples tuples (default 4)", 00091 &num_tuples ); 00092 00093 int reportrank = size + 1; 00094 opts.addOpt< int >( "reporting_rank,r", 00095 "this rank will report the tuples sent and the tuples received; it could " 00096 "be higher than num_procs, then no reporting", 00097 &reportrank ); 00098 00099 opts.parseCommandLine( argc, argv ); 00100 00101 if( rank == reportrank || ( reportrank >= size && 0 == rank ) ) 00102 { 00103 cout << " There are " << size << " tasks in example.\n"; 00104 cout << " We will send groups of " << num_tuples << " from each task towards " << num_comms 00105 << " other tasks.\n"; 00106 } 00107 00108 // Send some data from proc i to i + n/2, also to i + n/2 + 1 modulo n, where n is num procs 00109 00110 gs_data::crystal_data* cd = pc.crystal_router(); 00111 00112 long total_n_tuples = num_comms * num_tuples; 00113 00114 // Vary the number of tasks to send to, and the number of tuples to send 00115 if( rank < size / 2 ) 00116 num_comms--; 00117 else 00118 num_comms++; 00119 00120 if( rank < size / 3 ) 00121 num_tuples *= 2; 00122 else if( rank > size - size / 3 ) 00123 num_tuples /= 2; 00124 00125 TupleList tl; 00126 // At most num_tuples* num_comms to send 00127 // We do a preallocate with this; some tuples on some processors might need more memory, to be 00128 // able to grow locally; Some tasks might receive more tuples though, and in the process, some 00129 // might grow more than others. By doing these logP sends/receives, we do not grow local memory 00130 // too much. 00131 tl.initialize( 1, 1, 0, 1, num_tuples * num_comms ); 00132 tl.enableWriteAccess(); 00133 // Form num_tuples*num_comms tuples, send to various ranks 00134 unsigned int n = tl.get_n(); 00135 for( int i = 0; i < num_comms; i++ ) 00136 { 00137 int sendTo = rank + i * size / 2 + 1; // Spread out the send to, for a stress-like test 00138 sendTo = sendTo % size; // 00139 long intToSend = 1000 * rank + 100000 * sendTo; 00140 for( int j = 0; j < num_tuples; j++ ) 00141 { 00142 n = tl.get_n(); 00143 tl.vi_wr[n] = sendTo; 00144 tl.vl_wr[n] = intToSend + j; 00145 tl.vr_wr[n] = 10000. * rank + j; 00146 tl.inc_n(); 00147 } 00148 } 00149 00150 if( rank == reportrank ) 00151 { 00152 cout << "rank " << rank << "\n"; 00153 tl.print( " before sending" ); 00154 } 00155 00156 clock_t tt = clock(); 00157 // All communication happens here; no mpi calls for the user 00158 ErrorCode rval = cd->gs_transfer( 1, tl, 0 );MB_CHK_SET_ERR( rval, "Error in tuple transfer" ); 00159 00160 double secs = 0; 00161 if( rank == reportrank || ( reportrank >= size && 0 == rank ) ) 00162 { 00163 secs = ( clock() - tt ) / (double)CLOCKS_PER_SEC; 00164 } 00165 if( rank == reportrank ) 00166 { 00167 cout << "rank " << rank << "\n"; 00168 tl.print( " after transfer" ); 00169 } 00170 00171 // Check that all tuples received have the form 10000*rank + 100*from 00172 unsigned int received = tl.get_n(); 00173 for( int i = 0; i < (int)received; i++ ) 00174 { 00175 int from = tl.vi_rd[i]; 00176 long valrec = tl.vl_rd[i]; 00177 int remainder = valrec - 100000 * rank - 1000 * from; 00178 if( remainder < 0 || remainder >= num_tuples * 4 ) 00179 cout << " error: tuple " << i << " received at proc rank " << rank << " from proc " << from << " has value " 00180 << valrec << " remainder " << remainder << "\n"; 00181 } 00182 00183 if( rank == reportrank || ( reportrank >= size && 0 == rank ) ) 00184 { 00185 cout << "communication of about " << total_n_tuples << " tuples/per proc took " << secs << " seconds" 00186 << "\n"; 00187 tt = clock(); 00188 } 00189 00190 // Finalize error handler, required for this example (not using a moab instance) 00191 MBErrorHandler_Finalize(); 00192 00193 MPI_Finalize(); 00194 #else 00195 std::cout << " Build with MPI for this example to work\n"; 00196 #endif 00197 00198 return 0; 00199 }