fathom/moab-docs-develop/ParCommGraph_8hpp_source.html

00001 /*
00002  * ParCommGraph.hpp
00003  *
00004  *  will be used to setup communication between 2 distributed meshes, in which one mesh was migrated
00005  * from the other. (one example is atmosphere mesh migrated to coupler pes)
00006  *
00007  *  there are 3 communicators in play, one for each mesh, and one for the joined
00008  *  communicator, that spans both sets of processes; to send mesh or tag data we need to use the
00009  * joint communicator, use nonblocking MPI_iSend and blocking MPI_Recv receives
00010  *
00011  *  various methods should be available to migrate meshes; trivial, using graph partitioner (Zoltan
00012  * PHG) and using a geometric partitioner  (Zoltan RCB)
00013  *
00014  *  communicators are represented by their MPI groups, not by their communicators, because
00015  *  the groups are always defined, irrespective of what tasks are they on. Communicators can be
00016  * MPI_NULL, while MPI_Groups are always defined
00017  *
00018  *  Some of the methods in here are executed over the sender communicator, some are over the
00019  * receiver communicator They can switch places, what was sender becomes the receiver and viceversa
00020  *
00021  *  The name "graph" is in the sense of a bipartite graph, in which we can separate senders and
00022  * receivers tasks
00023  *
00024  *  The info stored in the ParCommGraph helps in migrating fields (MOAB tags) from component to the
00025  * coupler and back
00026  *
00027  *  So initially the ParCommGraph is assisting in mesh migration (from component to coupler) and
00028  * then is used to migrate tag data from component to coupler and back from coupler to component.
00029  *
00030  *  The same class is used after intersection (which is done on the coupler pes between 2 different
00031  * component migrated meshes) and it alters communication pattern between the original component pes
00032  * and coupler pes;
00033  *
00034  *   We added a new way to send tags between 2 models; the first application of the new method is to
00035  * send tag from atm dynamics model (spectral elements, with np x np tags defined on each element,
00036  * according to the GLOBAL_DOFS tag associated to each element) towards the atm physics model, which
00037  * is just a point cloud of vertices distributed differently to the physics model pes; matching is
00038  * done using GLOBAL_ID tag on vertices; Right now, we assume that the models are on different pes,
00039  * but the joint communicator covers both and that the ids of the tasks are with respect to the
00040  * joint communicator
00041  *
00042  *
00043  */
00044 #include "moab_mpi.h"
00045 #include "moab/Interface.hpp"
00046 #include "moab/ParallelComm.hpp"
00047 #include
00048
00049 #ifndef SRC_PARALLEL_MOAB_PARCOMMGRAPH_HPP_
00050 #define SRC_PARALLEL_MOAB_PARCOMMGRAPH_HPP_
00051
00052 namespace moab
00053 {
00054
00055 class ParCommGraph
00056 {
00057   public:
00058     enum TypeGraph
00059     {
00060         INITIAL_MIGRATE,
00061         COVERAGE,
00062         DOF_BASED
00063     };
00064     virtual ~ParCommGraph();
00065
00066     /**
00067      * \brief collective constructor, will be called on all sender tasks and receiver tasks
00068      * \param[in]  joincomm  joint MPI communicator that covers both sender and receiver MPI groups
00069      * \param[in]  group1   MPI group formed with sender tasks; (sender usually loads the mesh in a
00070      * migrate scenario) \param[in]  group2   MPI group formed with receiver tasks; (receiver
00071      * usually receives the mesh in a migrate scenario) \param[in]  coid1    sender component unique
00072      * identifier in a coupled application (in climate simulations could be the Atmosphere Comp id =
00073      * 5 or Ocean Comp ID , 17) \param[in]  coid2    receiver component unique identifier in a
00074      * coupled application (it is usually the coupler, 2, in E3SM)
00075      *
00076      * this graph will be formed on sender and receiver tasks, and, in principle, will hold info
00077      * about how the local entities are distributed on the other side
00078      *
00079      * Its major role is to help in migration of data, from component to the coupler and vice-versa;
00080      * Each local entity has a corresponding task (or tasks) on the other side, to where the data
00081      * needs to be sent
00082      *
00083      * important data stored in ParCommGraph, immediately it is created
00084      *   - all sender and receiver tasks ids, with respect to the joint communicator
00085      *   - local rank in sender and receiver group (-1 if not part of the respective group)
00086      *   - rank in the joint communicator (from 0)
00087      */
00088     ParCommGraph( MPI_Comm joincomm, MPI_Group group1, MPI_Group group2, int coid1, int coid2 );
00089
00090     /**
00091      * \brief copy constructor will copy only the senders, receivers, compid1, etc
00092      */
00093     ParCommGraph( const ParCommGraph& );
00094
00095     /**
00096       \brief  Based on the number of elements on each task in group 1, partition for group 2,
00097    trivially
00098
00099    Operations: it is called on every receiver task; decides how are all elements distributed
00100
00101     Note:  establish how many elements are sent from each task in group 1 to tasks in group 2
00102           This call is usually made on a root / master process, and will construct local maps that
00103    are member data, which contain the communication graph, in both directions Also, number of
00104    elements migrated/exchanged between each sender/receiver
00105
00106      \param[in]  numElemsPerTaskInGroup1 (std::vector &)  number of elements on each sender
00107    task
00108      */
00109
00110     ErrorCode compute_trivial_partition( std::vector< int >& numElemsPerTaskInGroup1 );
00111
00112     /**
00113        \brief  pack information about receivers view of the graph, for future sending to receiver
00114       root
00115
00116       Operations: Local, called on root process of the senders group
00117
00118        \param[out] packed_recv_array
00119          packed data will be sent to the root of receivers, and distributed from there, and
00120            will have this information, for each receiver, concatenated
00121          receiver 1 task, number of senders for receiver 1, then sender tasks for receiver 1,
00122       receiver 2 task, number of senders for receiver 2, sender tasks for receiver 2, etc Note: only
00123       the root of senders will compute this, and send it over to the receiver root, which will
00124         distribute it over each receiver; We do not pack the sizes of data to be sent, only the
00125       senders for each of the receivers (could be of size O(n^2) , where n is the number of tasks ;
00126       but in general, it should be more like O(n) ). Each sender sends to a "finite" number of
00127       receivers, and each receiver receives from a finite number of senders). We need this info to
00128       decide how to set up the send/receive waiting game for non-blocking communication )
00129      */
00130     ErrorCode pack_receivers_graph( std::vector< int >& packed_recv_array );
00131
00132     // get methods for private data
00133     bool is_root_sender()
00134     {
00135         return rootSender;
00136     }
00137
00138     bool is_root_receiver()
00139     {
00140         return rootReceiver;
00141     }
00142
00143     int sender( int index )
00144     {
00145         return senderTasks[index];
00146     }
00147
00148     int receiver( int index )
00149     {
00150         return receiverTasks[index];
00151     }
00152
00153     int get_component_id1()
00154     {
00155         return compid1;
00156     }
00157     int get_component_id2()
00158     {
00159         return compid2;
00160     }
00161
00162     int get_context_id()
00163     {
00164         return context_id;
00165     }
00166     void set_context_id( int other_id )
00167     {
00168         context_id = other_id;
00169     }
00170
00171     EntityHandle get_cover_set()
00172     {
00173         return cover_set;
00174     }
00175     void set_cover_set( EntityHandle cover )
00176     {
00177         cover_set = cover;
00178     }
00179
00180     // return local graph for a specific task
00181     ErrorCode split_owned_range( int sender_rank, Range& owned );
00182
00183     ErrorCode split_owned_range( Range& owned );
00184
00185     ErrorCode send_graph( MPI_Comm jcomm, std::vector< char >& zBuff );
00186
00187     ErrorCode send_graph_partition( ParallelComm* pco, MPI_Comm jcomm, std::vector< char >& zoltanBuffer );
00188
00189     ErrorCode send_mesh_parts( MPI_Comm jcomm, ParallelComm* pco, Range& owned );
00190
00191     // this is called on receiver side
00192     ErrorCode receive_comm_graph( MPI_Comm jcomm,
00193                                   ParallelComm* pco,
00194                                   std::vector< int >& pack_array,
00195                                   std::vector< char >& zoltanBuffer );
00196
00197     ErrorCode receive_mesh( MPI_Comm jcomm,
00198                             ParallelComm* pco,
00199                             EntityHandle local_set,
00200                             std::vector< int >& senders_local );
00201
00202     ErrorCode release_send_buffers();
00203
00204     ErrorCode send_tag_values( MPI_Comm jcomm, ParallelComm* pco, Range& owned, std::vector< Tag >& tag_handles );
00205
00206     ErrorCode receive_tag_values( MPI_Comm jcomm, ParallelComm* pco, Range& owned, std::vector< Tag >& tag_handles );
00207
00208     // getter method
00209     const std::vector< int >& senders()
00210     {
00211         return senderTasks;
00212     }  // reference copy; refers to sender tasks in joint comm
00213     const std::vector< int >& receivers()
00214     {
00215         return receiverTasks;
00216     }
00217
00218     ErrorCode settle_send_graph( TupleList& TLcovIDs );
00219
00220     // this will set after_cov_rec_sizes
00221     void SetReceivingAfterCoverage(
00222         std::map< int, std::set< int > >& idsFromProcs );  // will make sense only on receivers, right now after cov
00223
00224     // strideComp is np x np, or 1, in our cases
00225     // will fill up ordered lists for corresponding IDs on the other component
00226     // will form back and forth information, from ordered list of IDs, to valuesComp
00227     void settle_comm_by_ids( int comp, TupleList& TLBackToComp, std::vector< int >& valuesComp );
00228
00229     // after map read, we need to know what entities we need to send to receiver
00230     ErrorCode set_split_ranges( int comp,
00231                                 TupleList& TLBackToComp1,
00232                                 std::vector< int >& valuesComp1,
00233                                 int lenTag,
00234                                 Range& ents_of_interest,
00235                                 int type );
00236
00237     // new methods to migrate mesh after reading map
00238     ErrorCode form_tuples_to_migrate_mesh( Interface* mb, TupleList& TLv, TupleList& TLc, int type, int lenTagType1 );
00239     ErrorCode form_mesh_from_tuples( Interface* mb,
00240                                      TupleList& TLv,
00241                                      TupleList& TLc,
00242                                      int type,
00243                                      int lenTagType1,
00244                                      EntityHandle fset,
00245                                      Range& primary_ents,
00246                                      std::vector< int >& values_entities );
00247
00248     // new partition calculation
00249     ErrorCode compute_partition( ParallelComm* pco, Range& owned, int met, std::vector< char >& zoltanBuffer );
00250
00251     // dump local information about graph
00252     ErrorCode dump_comm_information( const std::string & prefix, int is_send, int verbose );
00253
00254   private:
00255     /**
00256     \brief find ranks of a group with respect to an encompassing communicator
00257
00258     Operations: Local, usually called on root process of the group
00259
00260     \param[in]  joincomm (MPI_Comm)
00261     \param[in]  group (MPI_Group)
00262     \param[out] ranks ( std::vector)  ranks with respect to the joint communicator
00263   */
00264     void find_group_ranks( MPI_Group group, MPI_Comm join, std::vector< int >& ranks );
00265
00266     MPI_Comm comm;
00267     std::vector< int > senderTasks;    // these are the sender tasks in joint comm
00268     std::vector< int > receiverTasks;  // these are all the receiver tasks in joint comm
00269     bool rootSender;
00270     bool rootReceiver;
00271     int rankInGroup1, rankInGroup2;  // group 1 is sender, 2 is receiver
00272     int rankInJoin, joinSize;
00273     int compid1, compid2;
00274     int context_id;          // used to identify the other comp for intersection
00275     EntityHandle cover_set;  // will be initialized only if it is the receiver parcomm graph, in
00276                              // CoverageGraph
00277
00278     // communication graph from group1 to group2;
00279     //  graph[task1] = vec1; // vec1 is a stl vector of tasks in group2
00280     std::map< int, std::vector< int > > recv_graph;  // to what tasks from group2 to send  (actual communication graph)
00281     std::map< int, std::vector< int > >
00282         recv_sizes;  // how many elements to actually send from a sender task to receiver tasks
00283     std::map< int, std::vector< int > >
00284         sender_graph;  // to what tasks from group2 to send  (actual communication graph)
00285     std::map< int, std::vector< int > >
00286         sender_sizes;  // how many elements to actually send from a sender task to receiver tasks
00287
00288     std::vector< ParallelComm::Buffer* > localSendBuffs;  // this will store the pointers to the Buffers
00289     //                                    will be  released only when all mpi requests are waited
00290     //                                    for
00291     std::vector< int > comm_graph;  // this will store communication graph, on sender root, sent by nonblocking
00292                                     // send to the master receiver;  first integer will be the size of the graph,
00293                                     // the rest will be the packed graph, for trivial partition or Zoltan partition
00294                                     // the last integer will be a semaphore now that signals if we will send another
00295                                     // array, for the Zoltan buffer that stores right now the RCB tree
00296     std::vector< char > zBuff;  // this will keep the Zoltan buffer to not go out of scope before we are done sending
00297     //
00298
00299     // these will be now used to store ranges to be sent from current sender to each receiver in
00300     // joint comm
00301     std::map< int, Range > split_ranges;
00302
00303     std::vector< MPI_Request > sendReqs;  // there will be multiple requests, 2 for comm graph, 2 for each Buffer
00304     // there are as many buffers as sender_graph[rankInJoin].size()
00305
00306     // active on both receiver and sender sides
00307     std::vector< int > corr_tasks;  // subset of the senderTasks, in the joint comm for sender;
00308                                     // subset of receiverTasks for receiver side
00309     std::vector< int > corr_sizes;  // how many primary entities corresponding to the other side
00310     // so what we know is that the local range corresponds to remote corr_sizes[i] size ranges on
00311     // tasks corr_tasks[i]
00312
00313     // these will be used now after coverage, quick fix; they will also be populated by
00314     // iMOAB_CoverageGraph
00315     TypeGraph graph_type;  // this should be false , set to true in settle send graph, to use send_IDs_map
00316     std::map< int, std::vector< int > > involved_IDs_map;  // replace send and recv IDs_mapp with involved_IDs_map
00317     // used only for third method: DOF_BASED
00318     std::map< int, std::vector< int > >
00319         map_index;  // from index in involved[] to index in values[] of tag, for each corr task
00320     std::map< int, std::vector< int > > map_ptr;  //  lmap[ie], lmap[ie+1], pointer into map_index[corrTask]
00321 };
00322
00323 }  // namespace moab
00324 #endif /* SRC_PARALLEL_MOAB_PARCOMMGRAPH_HPP_ */