MOAB: Mesh Oriented datABase  (version 5.4.1)
ReadHDF5Dataset.hpp
Go to the documentation of this file.
00001 /** \file   ReadHDF5Dataset.hpp
00002  *  \author Jason Kraftcheck
00003  *  \date   2010-07-09
00004  */
00005 
00006 #ifndef moab_READ_HDF5DATASET_HPP
00007 #define moab_READ_HDF5DATASET_HPP
00008 
00009 #include "moab/MOABConfig.h"
00010 #ifdef MOAB_HAVE_MPI
00011 #include <moab_mpi.h>
00012 #endif
00013 
00014 #include <cstdlib>  // for size_t
00015 #include <H5Ipublic.h>
00016 #include <H5Spublic.h>
00017 
00018 #include "moab/Range.hpp"
00019 #include <vector>
00020 
00021 namespace moab
00022 {
00023 
00024 /**\brief Utility used for reading portions of an HDF5 dataset
00025  *
00026  * Implement iterative read of table where:
00027  * - subset of rows to be read can be specified usign an Range of offsets
00028  * - each read fills as much as possible of a passed buffer
00029  * - each read call reads a subsequent set of rows of the data set in an
00030  *   iterator-like fashion.
00031  *
00032  * NOTE: This class also implements an RAII pattern for the data set handle:
00033  *       It will close the data set in its destructor unless it is specified
00034  *       to the constructor that only a single column should be read.
00035  *
00036  * NOTE: This class will always do collective IO for parallel reads.
00037  */
00038 class ReadHDF5Dataset
00039 {
00040   public:
00041 #ifdef MOAB_HAVE_MPI
00042     typedef MPI_Comm Comm;
00043 #else
00044     typedef int Comm;
00045 #endif
00046 
00047     class Exception
00048     {
00049       public:
00050         int line_no;
00051         Exception( int l ) : line_no( l ) {}
00052     };
00053 
00054     /**\brief Setup to read entire table
00055      *\param data_set_handle The HDF5 DataSet to read.
00056      *\param parallel    Doing true partial-read parallel read (as opposed
00057      *                   to read and delete where collective IO is done for
00058      *                   everything because all procs read the same stuff.)
00059      *\param communictor If \c parallel is \c true and \c io_prop is
00060      *                   \c H5FD_MPIO_COLLECTIVE, then this
00061      *                   must be a pointer to the MPI_Communicator value.
00062      *\param close_data_set_on_destruct Call \c H5Dclose on passed
00063      *                 \c data_set_handle in desturctor.
00064      *
00065      *\NOTE If \c parallel is \c true and \c io_prop is \c H5FD_MPIO_COLLECTIVE,
00066      *      then not only must \c communicator be non-null, but this call must
00067      *      be made collectively!
00068 
00069      *\NOTE Class instance will not be usable until one of either
00070      *      \c set_file_ids or \c set_all_file_ids is called.
00071      */
00072     ReadHDF5Dataset( const char* debug_desc,
00073                      hid_t data_set_handle,
00074                      bool parallel,
00075                      const Comm* communicator        = 0,
00076                      bool close_data_set_on_destruct = true );
00077 
00078     ReadHDF5Dataset( const char* debug_desc, bool parallel, const Comm* communicator = 0 );
00079     void init( hid_t data_set_handle, bool close_data_set_on_destruct = true );
00080 
00081     bool will_close_data_set() const
00082     {
00083         return closeDataSet;
00084     }
00085     void close_data_set_on_destruct( bool val )
00086     {
00087         closeDataSet = val;
00088     }
00089 
00090     ~ReadHDF5Dataset();
00091 
00092     /**\brief Change file ids to read from.
00093      *
00094      *\param file_ids    List of rows to read from dataset
00095      *\param start_id    Rows of dataset are enumerating beginning with
00096      *                   this value.  Thus the offset row to be read from
00097      *                   dataset will be \c file_ids.begin() - \c start_id .
00098      *\param row_count   Read buffer size in number of table rows.
00099      *\param data_type       The data type of the buffer into which table values
00100      *                       are to be read.
00101      */
00102     void set_file_ids( const Range& file_ids, EntityHandle start_id, hsize_t row_cout, hid_t data_type );
00103 
00104     /**\brief Read all values in dataset (undo set_file_ids)
00105      *
00106      *\param row_count   Read buffer size in number of table rows.
00107      *\param data_type       The data type of the buffer into which table values
00108      *                       are to be read.
00109      */
00110     void set_all_file_ids( hsize_t row_count, hid_t data_type );
00111 
00112     /**\brief Return false if more data to read, true otherwise
00113      *
00114      * Test if the iterative read has reached the end.
00115      */
00116     bool done() const
00117     {
00118         return ( currOffset == rangeEnd ) && ( readCount == 0 );
00119     }
00120 
00121     /**\brief Read rows of table
00122      *
00123      * Read up to max_num_rows from data set.
00124      *\param buffer    Memory in which to store values read from data set
00125      *\param rows_read The actual number of rows read from the table.  Will
00126      *                 never exceed \c max_rows .
00127      */
00128     void read( void* buffer, size_t& rows_read );
00129 
00130     /**\brief Return position in \c Range of file IDs at which next read will start
00131      */
00132     Range::const_iterator next_file_id() const
00133     {
00134         return currOffset;
00135     }
00136 
00137     /**\brief Do null read operation
00138      *
00139      * Do a read call requesting no data.  This functionality is provided
00140      * so as to allow collective IO when not all processes need to make the
00141      * same number of read calls.  To prevent deadlock in this case, processes
00142      * that have finished their necessary read calls can call this function
00143      * so that all processes are calling the read method collectively.
00144      */
00145     void null_read();
00146 
00147     unsigned columns() const;
00148     void set_column( unsigned c );
00149 
00150     unsigned long get_read_count() const
00151     {
00152         return readCount;
00153     }
00154     const char* get_debug_desc() const
00155     {
00156         return mpeDesc.c_str();
00157     }
00158 
00159     static void set_hyperslab_selection_limit( size_t val )
00160     {
00161         hyperslabSelectionLimit = val;
00162     }
00163     static void default_hyperslab_selection_limit();
00164 
00165     /** Use non-standard 'APPEND' operation for hyperslab selection */
00166     static void append_hyperslabs()
00167     {
00168         hyperslabSelectOp = H5S_SELECT_APPEND;
00169     }
00170     /** Revert to default select behavior for standard HDF5 library */
00171     static void or_hyperslabs()
00172     {
00173         hyperslabSelectOp = H5S_SELECT_OR;
00174     }
00175 
00176   private:
00177     Range::const_iterator next_end( Range::const_iterator iter );
00178 
00179     Range internalRange;  //!< used when reading entire dataset
00180 
00181     bool closeDataSet;  //!< close dataset in destructor
00182     hsize_t dataSetOffset[64], dataSetCount[64];
00183     hid_t dataSet;        //!< Handle for HDF5 data set
00184     hid_t dataSpace;      //!< Data space for data set
00185     hid_t dataType;       //!< Data type client code wants for data
00186     hid_t fileType;       //!< Data type as stored in data set
00187     hid_t ioProp;         //!< Used to specify collective IO
00188     int dataSpaceRank;    //!< Rank of data set
00189     hsize_t rowsInTable;  //!< Total number of rows in dataset
00190     bool doConversion;    //!< True if dataType != fileType
00191     bool nativeParallel;  //!< If true then reading different data on different procs
00192 
00193     hsize_t readCount;   //!< Number of actual reads to do
00194     hsize_t bufferSize;  //!< size of buffer passed to \c read, in number of rows
00195     const Comm* mpiComm;
00196 
00197     Range::const_iterator currOffset, rangeEnd;
00198     EntityHandle startID;
00199 
00200     static bool haveMPEEvents;
00201     static std::pair< int, int > mpeReadEvent;
00202     static std::pair< int, int > mpeReduceEvent;
00203     std::string mpeDesc;
00204 
00205     static size_t hyperslabSelectionLimit;
00206     static H5S_seloper_t hyperslabSelectOp;
00207 };
00208 
00209 }  // namespace moab
00210 
00211 #endif  // moab_READ_HDF5DATASET_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines