fathom/moab-docs/ParCommGraph_8hpp_source.html

00001 /*
00002  * ParCommGraph.hpp
00003  *
00004  *  will be used to setup communication between 2 distributed meshes, in which one mesh was migrated
00005  * from the other. (one example is atmosphere mesh migrated to coupler pes)
00006  *
00007  *  there are 3 communicators in play, one for each mesh, and one for the joined
00008  *  communicator, that spans both sets of processes; to send mesh or tag data we need to use the
00009  * joint communicator, use nonblocking MPI_iSend and blocking MPI_Recv receives
00010  *
00011  *  various methods should be available to migrate meshes; trivial, using graph partitioner (Zoltan
00012  * PHG) and using a geometric partitioner  (Zoltan RCB)
00013  *
00014  *  communicators are represented by their MPI groups, not by their communicators, because
00015  *  the groups are always defined, irrespective of what tasks are they on. Communicators can be
00016  * MPI_NULL, while MPI_Groups are always defined
00017  *
00018  *  Some of the methods in here are executed over the sender communicator, some are over the
00019  * receiver communicator They can switch places, what was sender becomes the receiver and viceversa
00020  *
00021  *  The name "graph" is in the sense of a bipartite graph, in which we can separate senders and
00022  * receivers tasks
00023  *
00024  *  The info stored in the ParCommGraph helps in migrating fields (MOAB tags) from component to the
00025  * coupler and back
00026  *
00027  *  So initially the ParCommGraph is assisting in mesh migration (from component to coupler) and
00028  * then is used to migrate tag data from component to coupler and back from coupler to component.
00029  *
00030  *  The same class is used after intersection (which is done on the coupler pes between 2 different
00031  * component migrated meshes) and it alters communication pattern between the original component pes
00032  * and coupler pes;
00033  *
00034  *   We added a new way to send tags between 2 models; the first application of the new method is to
00035  * send tag from atm dynamics model (spectral elements, with np x np tags defined on each element,
00036  * according to the GLOBAL_DOFS tag associated to each element) towards the atm physics model, which
00037  * is just a point cloud of vertices distributed differently to the physics model pes; matching is
00038  * done using GLOBAL_ID tag on vertices; Right now, we assume that the models are on different pes,
00039  * but the joint communicator covers both and that the ids of the tasks are with respect to the
00040  * joint communicator
00041  *
00042  *
00043  */
00044 #include "moab_mpi.h"
00045 #include "moab/Interface.hpp"
00046 #include "moab/ParallelComm.hpp"
00047 #include <map>
00048
00049 #ifndef SRC_PARALLEL_MOAB_PARCOMMGRAPH_HPP_
00050 #define SRC_PARALLEL_MOAB_PARCOMMGRAPH_HPP_
00051
00052 namespace moab
00053 {
00054
00055 class ParCommGraph
00056 {
00057   public:
00058     enum TypeGraph
00059     {
00060         INITIAL_MIGRATE,
00061         COVERAGE,
00062         DOF_BASED
00063     };
00064     virtual ~ParCommGraph();
00065
00066     /**
00067      * \brief collective constructor, will be called on all sender tasks and receiver tasks
00068      * \param[in]  joincomm  joint MPI communicator that covers both sender and receiver MPI groups
00069      * \param[in]  group1   MPI group formed with sender tasks; (sender usually loads the mesh in a
00070      * migrate scenario) \param[in]  group2   MPI group formed with receiver tasks; (receiver
00071      * usually receives the mesh in a migrate scenario) \param[in]  coid1    sender component unique
00072      * identifier in a coupled application (in climate simulations could be the Atmosphere Comp id =
00073      * 5 or Ocean Comp ID , 17) \param[in]  coid2    receiver component unique identifier in a
00074      * coupled application (it is usually the coupler, 2, in E3SM)
00075      *
00076      * this graph will be formed on sender and receiver tasks, and, in principle, will hold info
00077      * about how the local entities are distributed on the other side
00078      *
00079      * Its major role is to help in migration of data, from component to the coupler and vice-versa;
00080      * Each local entity has a corresponding task (or tasks) on the other side, to where the data
00081      * needs to be sent
00082      *
00083      * important data stored in ParCommGraph, immediately it is created
00084      *   - all sender and receiver tasks ids, with respect to the joint communicator
00085      *   - local rank in sender and receiver group (-1 if not part of the respective group)
00086      *   - rank in the joint communicator (from 0)
00087      */
00088     ParCommGraph( MPI_Comm joincomm, MPI_Group group1, MPI_Group group2, int coid1, int coid2 );
00089
00090     /**
00091      * \brief copy constructor will copy only the senders, receivers, compid1, etc
00092      */
00093     ParCommGraph( const ParCommGraph& );
00094
00095     /**
00096       \brief  Based on the number of elements on each task in group 1, partition for group 2,
00097    trivially
00098
00099    <B>Operations:</B> it is called on every receiver task; decides how are all elements distributed
00100
00101     Note:  establish how many elements are sent from each task in group 1 to tasks in group 2
00102           This call is usually made on a root / master process, and will construct local maps that
00103    are member data, which contain the communication graph, in both directions Also, number of
00104    elements migrated/exchanged between each sender/receiver
00105
00106      \param[in]  numElemsPerTaskInGroup1 (std::vector<int> &)  number of elements on each sender
00107    task
00108      */
00109
00110     ErrorCode compute_trivial_partition( std::vector< int >& numElemsPerTaskInGroup1 );
00111
00112     /**
00113        \brief  pack information about receivers view of the graph, for future sending to receiver
00114       root
00115
00116       <B>Operations:</B> Local, called on root process of the senders group
00117
00118        \param[out] packed_recv_array
00119          packed data will be sent to the root of receivers, and distributed from there, and
00120            will have this information, for each receiver, concatenated
00121          receiver 1 task, number of senders for receiver 1, then sender tasks for receiver 1,
00122       receiver 2 task, number of senders for receiver 2, sender tasks for receiver 2, etc Note: only
00123       the root of senders will compute this, and send it over to the receiver root, which will
00124         distribute it over each receiver; We do not pack the sizes of data to be sent, only the
00125       senders for each of the receivers (could be of size O(n^2) , where n is the number of tasks ;
00126       but in general, it should be more like O(n) ). Each sender sends to a "finite" number of
00127       receivers, and each receiver receives from a finite number of senders). We need this info to
00128       decide how to set up the send/receive waiting game for non-blocking communication )
00129      */
00130     ErrorCode pack_receivers_graph( std::vector< int >& packed_recv_array );
00131
00132     // get methods for private data
00133     bool is_root_sender()
00134     {
00135         return rootSender;
00136     }
00137
00138     bool is_root_receiver()
00139     {
00140         return rootReceiver;
00141     }
00142
00143     int sender( int index )
00144     {
00145         return senderTasks[index];
00146     }
00147
00148     int receiver( int index )
00149     {
00150         return receiverTasks[index];
00151     }
00152
00153     int get_component_id1()
00154     {
00155         return compid1;
00156     }
00157     int get_component_id2()
00158     {
00159         return compid2;
00160     }
00161
00162     int get_context_id()
00163     {
00164         return context_id;
00165     }
00166     void set_context_id( int other_id )
00167     {
00168         context_id = other_id;
00169     }
00170
00171     EntityHandle get_cover_set()
00172     {
00173         return cover_set;
00174     }
00175     void set_cover_set( EntityHandle cover )
00176     {
00177         cover_set = cover;
00178     }
00179
00180     // return local graph for a specific task
00181     ErrorCode split_owned_range( int sender_rank, Range& owned );
00182
00183     ErrorCode split_owned_range( Range& owned );
00184
00185     ErrorCode send_graph( MPI_Comm jcomm );
00186
00187     ErrorCode send_graph_partition( ParallelComm* pco, MPI_Comm jcomm );
00188
00189     ErrorCode send_mesh_parts( MPI_Comm jcomm, ParallelComm* pco, Range& owned );
00190
00191     // this is called on receiver side
00192     ErrorCode receive_comm_graph( MPI_Comm jcomm, ParallelComm* pco, std::vector< int >& pack_array );
00193
00194     ErrorCode receive_mesh( MPI_Comm jcomm,
00195                             ParallelComm* pco,
00196                             EntityHandle local_set,
00197                             std::vector< int >& senders_local );
00198
00199     ErrorCode release_send_buffers();
00200
00201     ErrorCode send_tag_values( MPI_Comm jcomm, ParallelComm* pco, Range& owned, std::vector< Tag >& tag_handles );
00202
00203     ErrorCode receive_tag_values( MPI_Comm jcomm, ParallelComm* pco, Range& owned, std::vector< Tag >& tag_handles );
00204
00205     // getter method
00206     const std::vector< int >& senders()
00207     {
00208         return senderTasks;
00209     }  // reference copy; refers to sender tasks in joint comm
00210     const std::vector< int >& receivers()
00211     {
00212         return receiverTasks;
00213     }
00214
00215     ErrorCode settle_send_graph( TupleList& TLcovIDs );
00216
00217     // this will set after_cov_rec_sizes
00218     void SetReceivingAfterCoverage(
00219         std::map< int, std::set< int > >& idsFromProcs );  // will make sense only on receivers, right now after cov
00220
00221     // strideComp is np x np, or 1, in our cases
00222     // will fill up ordered lists for corresponding IDs on the other component
00223     // will form back and forth information, from ordered list of IDs, to valuesComp
00224     void settle_comm_by_ids( int comp, TupleList& TLBackToComp, std::vector< int >& valuesComp );
00225
00226     // after map read, we need to know what entities we need to send to receiver
00227     ErrorCode set_split_ranges( int comp,
00228                                 TupleList& TLBackToComp1,
00229                                 std::vector< int >& valuesComp1,
00230                                 int lenTag,
00231                                 Range& ents_of_interest,
00232                                 int type );
00233
00234     // new methods to migrate mesh after reading map
00235     ErrorCode form_tuples_to_migrate_mesh( Interface* mb, TupleList& TLv, TupleList& TLc, int type, int lenTagType1 );
00236     ErrorCode form_mesh_from_tuples( Interface* mb,
00237                                      TupleList& TLv,
00238                                      TupleList& TLc,
00239                                      int type,
00240                                      int lenTagType1,
00241                                      EntityHandle fset,
00242                                      Range& primary_ents,
00243                                      std::vector< int >& values_entities );
00244
00245     // new partition calculation
00246     ErrorCode compute_partition( ParallelComm* pco, Range& owned, int met );
00247
00248     // dump local information about graph
00249     ErrorCode dump_comm_information( std::string prefix, int is_send );
00250
00251   private:
00252     /**
00253     \brief find ranks of a group with respect to an encompassing communicator
00254
00255     <B>Operations:</B> Local, usually called on root process of the group
00256
00257     \param[in]  joincomm (MPI_Comm)
00258     \param[in]  group (MPI_Group)
00259     \param[out] ranks ( std::vector<int>)  ranks with respect to the joint communicator
00260   */
00261     void find_group_ranks( MPI_Group group, MPI_Comm join, std::vector< int >& ranks );
00262
00263     MPI_Comm comm;
00264     std::vector< int > senderTasks;    // these are the sender tasks in joint comm
00265     std::vector< int > receiverTasks;  // these are all the receiver tasks in joint comm
00266     bool rootSender;
00267     bool rootReceiver;
00268     int rankInGroup1, rankInGroup2;  // group 1 is sender, 2 is receiver
00269     int rankInJoin, joinSize;
00270     int compid1, compid2;
00271     int context_id;          // used to identify the other comp for intersection
00272     EntityHandle cover_set;  // will be initialized only if it is the receiver parcomm graph, in
00273                              // CoverageGraph
00274
00275     // communication graph from group1 to group2;
00276     //  graph[task1] = vec1; // vec1 is a stl vector of tasks in group2
00277     std::map< int, std::vector< int > > recv_graph;  // to what tasks from group2 to send  (actual communication graph)
00278     std::map< int, std::vector< int > >
00279         recv_sizes;  // how many elements to actually send from a sender task to receiver tasks
00280     std::map< int, std::vector< int > >
00281         sender_graph;  // to what tasks from group2 to send  (actual communication graph)
00282     std::map< int, std::vector< int > >
00283         sender_sizes;  // how many elements to actually send from a sender task to receiver tasks
00284
00285     std::vector< ParallelComm::Buffer* > localSendBuffs;  // this will store the pointers to the Buffers
00286     //                                    will be  released only when all mpi requests are waited
00287     //                                    for
00288     int* comm_graph;  // this will store communication graph, on sender master, sent by nonblocking
00289                       // send to the master receiver first integer will be the size of the graph,
00290                       // the rest will be the packed graph, for trivial partition
00291
00292     // these will be now used to store ranges to be sent from current sender to each receiver in
00293     // joint comm
00294     std::map< int, Range > split_ranges;
00295
00296     std::vector< MPI_Request > sendReqs;  // there will be multiple requests, 2 for comm graph, 2 for each Buffer
00297     // there are as many buffers as sender_graph[rankInJoin].size()
00298
00299     // active on both receiver and sender sides
00300     std::vector< int > corr_tasks;  // subset of the senderTasks, in the joint comm for sender;
00301                                     // subset of receiverTasks for receiver side
00302     std::vector< int > corr_sizes;  // how many primary entities corresponding to the other side
00303     // so what we know is that the local range corresponds to remote corr_sizes[i] size ranges on
00304     // tasks corr_tasks[i]
00305
00306     // these will be used now after coverage, quick fix; they will also be populated by
00307     // iMOAB_CoverageGraph
00308     TypeGraph graph_type;  // this should be false , set to true in settle send graph, to use send_IDs_map
00309     std::map< int, std::vector< int > > involved_IDs_map;  // replace send and recv IDs_mapp with involved_IDs_map
00310     // used only for third method: DOF_BASED
00311     std::map< int, std::vector< int > >
00312         map_index;  // from index in involved[] to index in values[] of tag, for each corr task
00313     std::map< int, std::vector< int > > map_ptr;  //  lmap[ie], lmap[ie+1], pointer into map_index[corrTask]
00314 };
00315
00316 }  // namespace moab
00317 #endif /* SRC_PARALLEL_MOAB_PARCOMMGRAPH_HPP_ */