MOAB: Mesh Oriented datABase  (version 5.2.1)
ReadHDF5Dataset.hpp
Go to the documentation of this file.
00001 /** \file   ReadHDF5Dataset.hpp
00002  *  \author Jason Kraftcheck
00003  *  \date   2010-07-09
00004  */
00005 
00006 #ifndef moab_READ_HDF5DATASET_HPP
00007 #define moab_READ_HDF5DATASET_HPP
00008 
00009 #include "moab/MOABConfig.h"
00010 #ifdef MOAB_HAVE_MPI
00011 #include <moab_mpi.h>
00012 #endif
00013 
00014 #include <cstdlib>  // for size_t
00015 #include <H5Ipublic.h>
00016 #include <H5Spublic.h>
00017 
00018 #include "moab/Range.hpp"
00019 #include <vector>
00020 
00021 namespace moab
00022 {
00023 
00024 /**\brief Utility used for reading portions of an HDF5 dataset
00025  *
00026  * Implement iterative read of table where:
00027  * - subset of rows to be read can be specified usign an Range of offsets
00028  * - each read fills as much as possible of a passed buffer
00029  * - each read call reads a subsequent set of rows of the data set in an
00030  *   iterator-like fashion.
00031  *
00032  * NOTE: This class also implements an RAII pattern for the data set handle:
00033  *       It will close the data set in its destructor unless it is specified
00034  *       to the constructor that only a single column should be read.
00035  *
00036  * NOTE: This class will always do collective IO for parallel reads.
00037  */
00038 class ReadHDF5Dataset
00039 {
00040   public:
00041 #ifdef MOAB_HAVE_MPI
00042     typedef MPI_Comm Comm;
00043 #else
00044     typedef int Comm;
00045 #endif
00046 
00047     class Exception
00048     {
00049       public:
00050         int line_no;
00051         Exception( int l ) : line_no( l ) {}
00052     };
00053 
00054     /**\brief Setup to read entire table
00055      *\param data_set_handle The HDF5 DataSet to read.
00056      *\param parallel    Doing true partial-read parallel read (as opposed
00057      *                   to read and delete where collective IO is done for
00058      *                   everything because all procs read the same stuff.)
00059      *\param communictor If \c parallel is \c true and \c io_prop is
00060      *                   \c H5FD_MPIO_COLLECTIVE, then this
00061      *                   must be a pointer to the MPI_Communicator value.
00062      *\param close_data_set_on_destruct Call \c H5Dclose on passed
00063      *                 \c data_set_handle in desturctor.
00064      *
00065      *\NOTE If \c parallel is \c true and \c io_prop is \c H5FD_MPIO_COLLECTIVE,
00066      *      then not only must \c communicator be non-null, but this call must
00067      *      be made collectively!
00068 
00069      *\NOTE Class instance will not be usable until one of either
00070      *      \c set_file_ids or \c set_all_file_ids is called.
00071      */
00072     ReadHDF5Dataset( const char* debug_desc, hid_t data_set_handle, bool parallel, const Comm* communicator = 0,
00073                      bool close_data_set_on_destruct = true );
00074 
00075     ReadHDF5Dataset( const char* debug_desc, bool parallel, const Comm* communicator = 0 );
00076     void init( hid_t data_set_handle, bool close_data_set_on_destruct = true );
00077 
00078     bool will_close_data_set() const
00079     {
00080         return closeDataSet;
00081     }
00082     void close_data_set_on_destruct( bool val )
00083     {
00084         closeDataSet = val;
00085     }
00086 
00087     ~ReadHDF5Dataset();
00088 
00089     /**\brief Change file ids to read from.
00090      *
00091      *\param file_ids    List of rows to read from dataset
00092      *\param start_id    Rows of dataset are enumerating beginning with
00093      *                   this value.  Thus the offset row to be read from
00094      *                   dataset will be \c file_ids.begin() - \c start_id .
00095      *\param row_count   Read buffer size in number of table rows.
00096      *\param data_type       The data type of the buffer into which table values
00097      *                       are to be read.
00098      */
00099     void set_file_ids( const Range& file_ids, EntityHandle start_id, hsize_t row_cout, hid_t data_type );
00100 
00101     /**\brief Read all values in dataset (undo set_file_ids)
00102      *
00103      *\param row_count   Read buffer size in number of table rows.
00104      *\param data_type       The data type of the buffer into which table values
00105      *                       are to be read.
00106      */
00107     void set_all_file_ids( hsize_t row_count, hid_t data_type );
00108 
00109     /**\brief Return false if more data to read, true otherwise
00110      *
00111      * Test if the iterative read has reached the end.
00112      */
00113     bool done() const
00114     {
00115         return ( currOffset == rangeEnd ) && ( readCount == 0 );
00116     }
00117 
00118     /**\brief Read rows of table
00119      *
00120      * Read up to max_num_rows from data set.
00121      *\param buffer    Memory in which to store values read from data set
00122      *\param rows_read The actual number of rows read from the table.  Will
00123      *                 never exceed \c max_rows .
00124      */
00125     void read( void* buffer, size_t& rows_read );
00126 
00127     /**\brief Return position in \c Range of file IDs at which next read will start
00128      */
00129     Range::const_iterator next_file_id() const
00130     {
00131         return currOffset;
00132     }
00133 
00134     /**\brief Do null read operation
00135      *
00136      * Do a read call requesting no data.  This functionality is provided
00137      * so as to allow collective IO when not all processes need to make the
00138      * same number of read calls.  To prevent deadlock in this case, processes
00139      * that have finished their necessary read calls can call this function
00140      * so that all processes are calling the read method collectively.
00141      */
00142     void null_read();
00143 
00144     unsigned columns() const;
00145     void set_column( unsigned c );
00146 
00147     unsigned long get_read_count() const
00148     {
00149         return readCount;
00150     }
00151     const char* get_debug_desc() const
00152     {
00153         return mpeDesc.c_str();
00154     }
00155 
00156     static void set_hyperslab_selection_limit( size_t val )
00157     {
00158         hyperslabSelectionLimit = val;
00159     }
00160     static void default_hyperslab_selection_limit();
00161 
00162     /** Use non-standard 'APPEND' operation for hyperslab selection */
00163     static void append_hyperslabs()
00164     {
00165         hyperslabSelectOp = H5S_SELECT_APPEND;
00166     }
00167     /** Revert to default select behavior for standard HDF5 library */
00168     static void or_hyperslabs()
00169     {
00170         hyperslabSelectOp = H5S_SELECT_OR;
00171     }
00172 
00173   private:
00174     Range::const_iterator next_end( Range::const_iterator iter );
00175 
00176     Range internalRange;  //!< used when reading entire dataset
00177 
00178     bool closeDataSet;  //!< close dataset in destructor
00179     hsize_t dataSetOffset[64], dataSetCount[64];
00180     hid_t dataSet;        //!< Handle for HDF5 data set
00181     hid_t dataSpace;      //!< Data space for data set
00182     hid_t dataType;       //!< Data type client code wants for data
00183     hid_t fileType;       //!< Data type as stored in data set
00184     hid_t ioProp;         //!< Used to specify collective IO
00185     int dataSpaceRank;    //!< Rank of data set
00186     hsize_t rowsInTable;  //!< Total number of rows in dataset
00187     bool doConversion;    //!< True if dataType != fileType
00188     bool nativeParallel;  //!< If true then reading different data on different procs
00189 
00190     hsize_t readCount;   //!< Number of actual reads to do
00191     hsize_t bufferSize;  //!< size of buffer passed to \c read, in number of rows
00192     const Comm* mpiComm;
00193 
00194     Range::const_iterator currOffset, rangeEnd;
00195     EntityHandle startID;
00196 
00197     static bool haveMPEEvents;
00198     static std::pair< int, int > mpeReadEvent;
00199     static std::pair< int, int > mpeReduceEvent;
00200     std::string mpeDesc;
00201 
00202     static size_t hyperslabSelectionLimit;
00203     static H5S_seloper_t hyperslabSelectOp;
00204 };
00205 
00206 }  // namespace moab
00207 
00208 #endif  // moab_READ_HDF5DATASET_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines