Mesh Oriented datABase  (version 5.4.1)
Array-based unstructured mesh datastructure
ReadHDF5Dataset.cpp
Go to the documentation of this file.
00001 /** \file   ReadHDF5Dataset.cpp
00002  *  \author Jason Kraftcheck
00003  *  \date   2010-07-09
00004  */
00005 
00006 #include <cassert>
00007 #include <cstring>
00008 #include <cstdlib>
00009 #include <iostream>
00010 #include "moab/MOABConfig.h"
00011 #include "ReadHDF5Dataset.hpp"
00012 
00013 #include "moab_mpe.h"
00014 
00015 #include <H5Dpublic.h>
00016 #include <H5Tpublic.h>
00017 #include <H5Ppublic.h>
00018 #include <H5Spublic.h>
00019 #ifdef MOAB_HAVE_HDF5_PARALLEL
00020 #include <H5FDmpi.h>
00021 #include <H5FDmpio.h>
00022 #endif
00023 
00024 #define HDF5_16API ( H5_VERS_MAJOR < 2 && H5_VERS_MINOR < 8 )
00025 
00026 namespace moab
00027 {
00028 
00029 // Selection of hyperslabs appears to be superlinear.  Don't try to select
00030 // more than a few thousand at a time or things start to get real slow.
00031 const size_t DEFAULT_HYPERSLAB_SELECTION_LIMIT  = 200;
00032 size_t ReadHDF5Dataset::hyperslabSelectionLimit = DEFAULT_HYPERSLAB_SELECTION_LIMIT;
00033 void ReadHDF5Dataset::default_hyperslab_selection_limit()
00034 {
00035     hyperslabSelectionLimit = DEFAULT_HYPERSLAB_SELECTION_LIMIT;
00036 }
00037 
00038 H5S_seloper_t ReadHDF5Dataset::hyperslabSelectOp = H5S_SELECT_OR;
00039 
00040 #ifdef MOAB_HAVE_LIBMPE
00041 static std::pair< int, int > allocate_mpe_state( const char* name, const char* color )
00042 {
00043     std::pair< int, int > result;
00044     result.first  = MPE_Log_get_event_number();
00045     result.second = MPE_Log_get_event_number();
00046     MPE_Describe_state( result.first, result.second, name, color );
00047     return result;
00048 }
00049 #else
00050 static std::pair< int, int > allocate_mpe_state( const char*, const char* )
00051 {
00052     return std::pair< int, int >();
00053 }
00054 #endif
00055 
00056 bool ReadHDF5Dataset::haveMPEEvents = false;
00057 std::pair< int, int > ReadHDF5Dataset::mpeReadEvent;
00058 std::pair< int, int > ReadHDF5Dataset::mpeReduceEvent;
00059 
00060 ReadHDF5Dataset::ReadHDF5Dataset( const char* debug_desc, bool parallel, const Comm* communicator )
00061     : closeDataSet( false ), dataSet( -1 ), dataSpace( -1 ), dataType( -1 ), fileType( -1 ), ioProp( H5P_DEFAULT ),
00062       dataSpaceRank( 0 ), rowsInTable( 0 ), doConversion( false ), nativeParallel( parallel ), readCount( 0 ),
00063       bufferSize( 0 ), mpiComm( communicator ), mpeDesc( debug_desc )
00064 {
00065     if( !haveMPEEvents )
00066     {
00067         haveMPEEvents  = true;
00068         mpeReadEvent   = allocate_mpe_state( "ReadHDF5Dataset::read", "yellow" );
00069         mpeReduceEvent = allocate_mpe_state( "ReadHDF5Dataset::all_reduce", "yellow" );
00070     }
00071 
00072 #ifndef MOAB_HAVE_HDF5_PARALLEL
00073     if( nativeParallel ) throw Exception( __LINE__ );
00074 #else
00075     if( nativeParallel && !mpiComm ) throw Exception( __LINE__ );
00076 
00077     if( mpiComm )
00078     {
00079         ioProp = H5Pcreate( H5P_DATASET_XFER );
00080         H5Pset_dxpl_mpio( ioProp, H5FD_MPIO_COLLECTIVE );
00081     }
00082 #endif
00083 }
00084 
00085 ReadHDF5Dataset::ReadHDF5Dataset( const char* debug_desc,
00086                                   hid_t data_set_handle,
00087                                   bool parallel,
00088                                   const Comm* communicator,
00089                                   bool close_data_set )
00090     : closeDataSet( close_data_set ), dataSet( data_set_handle ), dataSpace( -1 ), dataType( -1 ), fileType( -1 ),
00091       ioProp( H5P_DEFAULT ), dataSpaceRank( 0 ), rowsInTable( 0 ), doConversion( false ), nativeParallel( parallel ),
00092       readCount( 0 ), bufferSize( 0 ), mpiComm( communicator ), mpeDesc( debug_desc )
00093 {
00094     if( !haveMPEEvents )
00095     {
00096         haveMPEEvents  = true;
00097         mpeReadEvent   = allocate_mpe_state( "ReadHDF5Dataset::read", "yellow" );
00098         mpeReduceEvent = allocate_mpe_state( "ReadHDF5Dataset::all_reduce", "yellow" );
00099     }
00100 
00101     init( data_set_handle, close_data_set );
00102 
00103 #ifndef MOAB_HAVE_HDF5_PARALLEL
00104     if( nativeParallel ) throw Exception( __LINE__ );
00105 #else
00106     if( nativeParallel && !mpiComm ) throw Exception( __LINE__ );
00107 
00108     if( mpiComm )
00109     {
00110         ioProp = H5Pcreate( H5P_DATASET_XFER );
00111         H5Pset_dxpl_mpio( ioProp, H5FD_MPIO_COLLECTIVE );
00112     }
00113 #endif
00114 }
00115 
00116 void ReadHDF5Dataset::init( hid_t data_set_handle, bool close_data_set )
00117 {
00118     closeDataSet = close_data_set;
00119     dataSet      = data_set_handle;
00120 
00121     fileType = H5Dget_type( data_set_handle );
00122     if( fileType < 0 ) throw Exception( __LINE__ );
00123 
00124     dataSpace = H5Dget_space( dataSet );
00125     if( dataSpace < 0 ) throw Exception( __LINE__ );
00126 
00127     dataSpaceRank = H5Sget_simple_extent_dims( dataSpace, dataSetCount, dataSetOffset );
00128     if( dataSpaceRank < 0 ) throw Exception( __LINE__ );
00129     rowsInTable = dataSetCount[0];
00130 
00131     for( int i = 0; i < dataSpaceRank; ++i )
00132         dataSetOffset[i] = 0;
00133 
00134     currOffset = rangeEnd = internalRange.end();
00135 }
00136 
00137 unsigned ReadHDF5Dataset::columns() const
00138 {
00139     if( dataSpaceRank == 1 )
00140         return 1;
00141     else if( dataSpaceRank == 2 )
00142         return dataSetCount[1];
00143 
00144     throw Exception( __LINE__ );
00145 }
00146 
00147 void ReadHDF5Dataset::set_column( unsigned column )
00148 {
00149     if( dataSpaceRank != 2 || column >= dataSetCount[1] ) throw Exception( __LINE__ );
00150     dataSetCount[1]  = 1;
00151     dataSetOffset[1] = column;
00152 }
00153 
00154 Range::const_iterator ReadHDF5Dataset::next_end( Range::const_iterator iter )
00155 {
00156     size_t slabs_remaining = hyperslabSelectionLimit;
00157     size_t avail           = bufferSize;
00158     while( iter != rangeEnd && slabs_remaining )
00159     {
00160         size_t count = *( iter.end_of_block() ) - *iter + 1;
00161         if( count >= avail )
00162         {
00163             iter += avail;
00164             break;
00165         }
00166 
00167         avail -= count;
00168         iter += count;
00169         --slabs_remaining;
00170     }
00171     return iter;
00172 }
00173 
00174 void ReadHDF5Dataset::set_file_ids( const Range& file_ids, EntityHandle start_id, hsize_t row_count, hid_t data_type )
00175 {
00176     startID    = start_id;
00177     currOffset = file_ids.begin();
00178     rangeEnd   = file_ids.end();
00179     readCount  = 0;
00180     bufferSize = row_count;
00181 
00182     // if a) user specified buffer size and b) we're doing a true
00183     // parallel partial read and c) we're doing collective I/O, then
00184     // we need to know the maximum number of reads that will be done.
00185 #ifdef MOAB_HAVE_HDF5_PARALLEL
00186     if( nativeParallel )
00187     {
00188         Range::const_iterator iter = currOffset;
00189         while( iter != rangeEnd )
00190         {
00191             ++readCount;
00192             iter = next_end( iter );
00193         }
00194 
00195         MPE_Log_event( mpeReduceEvent.first, (int)readCount, mpeDesc.c_str() );
00196         unsigned long recv = readCount, send = readCount;
00197         MPI_Allreduce( &send, &recv, 1, MPI_UNSIGNED_LONG, MPI_MAX, *mpiComm );
00198         readCount = recv;
00199         MPE_Log_event( mpeReduceEvent.second, (int)readCount, mpeDesc.c_str() );
00200     }
00201 #endif
00202 
00203     dataType     = data_type;
00204     htri_t equal = H5Tequal( fileType, dataType );
00205     if( equal < 0 ) throw Exception( __LINE__ );
00206     doConversion = !equal;
00207 
00208     // We always read in the format of the file to avoid stupind HDF5
00209     // library behavior when reading in parallel.  We call H5Tconvert
00210     // ourselves to do the data conversion.  If the type we're reading
00211     // from the file is larger than the type we want in memory, then
00212     // we need to reduce num_rows so that we can read the larger type
00213     // from the file into the passed buffer mean to accomodate num_rows
00214     // of values of the smaller in-memory type.
00215     if( doConversion )
00216     {
00217         size_t mem_size, file_size;
00218         mem_size  = H5Tget_size( dataType );
00219         file_size = H5Tget_size( fileType );
00220         if( file_size > mem_size ) bufferSize = bufferSize * mem_size / file_size;
00221     }
00222 }
00223 
00224 void ReadHDF5Dataset::set_all_file_ids( hsize_t row_count, hid_t data_type )
00225 {
00226     internalRange.clear();
00227     internalRange.insert( (EntityHandle)1, (EntityHandle)( rowsInTable ) );
00228     set_file_ids( internalRange, 1, row_count, data_type );
00229 }
00230 
00231 ReadHDF5Dataset::~ReadHDF5Dataset()
00232 {
00233     if( fileType >= 0 ) H5Tclose( fileType );
00234     if( dataSpace >= 0 ) H5Sclose( dataSpace );
00235     if( closeDataSet && dataSet >= 0 ) H5Dclose( dataSet );
00236     dataSpace = dataSet = -1;
00237     if( ioProp != H5P_DEFAULT ) H5Pclose( ioProp );
00238 }
00239 
00240 void ReadHDF5Dataset::read( void* buffer, size_t& rows_read )
00241 {
00242     herr_t err;
00243     rows_read = 0;
00244 
00245     MPE_Log_event( mpeReadEvent.first, (int)readCount, mpeDesc.c_str() );
00246     if( currOffset != rangeEnd )
00247     {
00248 
00249         // Build H5S hyperslab selection describing the portions of the
00250         // data set to read
00251         H5S_seloper_t sop       = H5S_SELECT_SET;
00252         Range::iterator new_end = next_end( currOffset );
00253         while( currOffset != new_end )
00254         {
00255             size_t count = *( currOffset.end_of_block() ) - *currOffset + 1;
00256             if( new_end != rangeEnd && *currOffset + count > *new_end )
00257             {
00258                 count = *new_end - *currOffset;
00259             }
00260             rows_read += count;
00261 
00262             dataSetOffset[0] = *currOffset - startID;
00263             dataSetCount[0]  = count;
00264             err              = H5Sselect_hyperslab( dataSpace, sop, dataSetOffset, NULL, dataSetCount, 0 );
00265             if( err < 0 ) throw Exception( __LINE__ );
00266             sop = hyperslabSelectOp;  // subsequent calls to select_hyperslab append
00267 
00268             currOffset += count;
00269         }
00270 
00271         // Create a data space describing the memory in which to read the data
00272         dataSetCount[0] = rows_read;
00273         hid_t mem_id    = H5Screate_simple( dataSpaceRank, dataSetCount, NULL );
00274         if( mem_id < 0 ) throw Exception( __LINE__ );
00275 
00276         // Do the actual read
00277         err = H5Dread( dataSet, fileType, mem_id, dataSpace, ioProp, buffer );
00278         H5Sclose( mem_id );
00279         if( err < 0 ) throw Exception( __LINE__ );
00280 
00281         if( readCount ) --readCount;
00282 
00283         if( doConversion )
00284         {
00285             err = H5Tconvert( fileType, dataType, rows_read * columns(), buffer, 0, H5P_DEFAULT );
00286             if( err < 0 ) throw Exception( __LINE__ );
00287         }
00288     }
00289     else if( readCount )
00290     {
00291         null_read();
00292         --readCount;
00293     }
00294     MPE_Log_event( mpeReadEvent.second, (int)readCount, mpeDesc.c_str() );
00295 }
00296 
00297 void ReadHDF5Dataset::null_read()
00298 {
00299     herr_t err;
00300     err = H5Sselect_none( dataSpace );
00301     if( err < 0 ) throw Exception( __LINE__ );
00302 
00303     //#if HDF5_16API
00304     hsize_t one  = 1;
00305     hid_t mem_id = H5Screate_simple( 1, &one, NULL );
00306     if( mem_id < 0 ) throw Exception( __LINE__ );
00307     err = H5Sselect_none( mem_id );
00308     if( err < 0 )
00309     {
00310         H5Sclose( mem_id );
00311         throw Exception( __LINE__ );
00312     }
00313     //#else
00314     //  hid_t mem_id = H5Screate(H5S_NULL);
00315     //  if (mem_id < 0)
00316     //    throw Exception(__LINE__);
00317     //#endif
00318 
00319     err = H5Dread( dataSet, fileType, mem_id, dataSpace, ioProp, 0 );
00320     H5Sclose( mem_id );
00321     if( err < 0 ) throw Exception( __LINE__ );
00322 }
00323 
00324 }  // namespace moab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines