MOAB: Mesh Oriented datABase  (version 5.2.1)
ReadHDF5VarLen.cpp
Go to the documentation of this file.
00001 /** \file   ReadHDF5VarLen.cpp
00002  *  \author Jason Kraftcheck
00003  *  \date   2010-09-04
00004  */
00005 
00006 #include "ReadHDF5VarLen.hpp"
00007 #include "ReadHDF5Dataset.hpp"
00008 #include "H5Tpublic.h"
00009 #include <assert.h>
00010 
00011 namespace moab
00012 {
00013 
00014 bool ReadHDF5VarLen::is_ranged( EntityHandle file_id, Range::const_iterator& ranged_iter,
00015                                 Range::const_iterator range_end )
00016 {
00017     if( ranged_iter == range_end ) return false;
00018 
00019     assert( file_id <= *ranged_iter );
00020     if( *ranged_iter != file_id ) return false;
00021 
00022     ++ranged_iter;
00023     return true;
00024 }
00025 
00026 ErrorCode ReadHDF5VarLen::read_data( ReadHDF5Dataset& data_set, const Range& offsets, EntityHandle start_offset,
00027                                      hid_t data_type, const Range& file_ids,
00028                                      const std::vector< unsigned >& vals_per_ent, const Range& ranged_file_ids )
00029 {
00030     ErrorCode rval;
00031     const size_t value_size          = H5Tget_size( data_type );
00032     const size_t buffer_size         = bufferSize / value_size;
00033     unsigned char* const data_buffer = reinterpret_cast< unsigned char* >( dataBuffer );
00034     std::vector< unsigned char > partial;  // for when we read only part of the contents of a set/entity
00035     Range::const_iterator fileid_iter                  = file_ids.begin();
00036     Range::const_iterator ranged_iter                  = ranged_file_ids.begin();
00037     std::vector< unsigned >::const_iterator count_iter = vals_per_ent.begin();
00038     size_t count, offset;
00039     bool ranged;
00040     int nn = 0;
00041 
00042     assert( file_ids.size() == vals_per_ent.size() );
00043 
00044     try
00045     {
00046         data_set.set_file_ids( offsets, start_offset, buffer_size, data_type );
00047     }
00048     catch( ReadHDF5Dataset::Exception& )
00049     {
00050         return MB_FAILURE;
00051     }
00052 
00053     dbgOut.printf( 3, "Reading %s in %lu chunks\n", data_set.get_debug_desc(), data_set.get_read_count() );
00054 
00055     while( !data_set.done() )
00056     {
00057         dbgOut.printf( 3, "Reading chunk %d of %s\n", ++nn, data_set.get_debug_desc() );
00058         try
00059         {
00060             data_set.read( data_buffer, count );
00061         }
00062         catch( ReadHDF5Dataset::Exception& )
00063         {
00064             return MB_FAILURE;
00065         }
00066 
00067         assert( 0 == count || fileid_iter != file_ids.end() );
00068 
00069         // Handle 'special' case where we read some, but not all
00070         // of the data for an entity during the last iteration.
00071         offset = 0;
00072         if( !partial.empty() )
00073         {  // didn't read all of previous entity
00074             assert( fileid_iter != file_ids.end() );
00075             assert( 0 == ( partial.size() % value_size ) );
00076             size_t num_prev = partial.size() / value_size;
00077             offset          = *count_iter - num_prev;
00078             if( offset > count )
00079             {  // still don't have all
00080                 partial.insert( partial.end(), data_buffer, data_buffer + count * value_size );
00081                 continue;
00082             }
00083 
00084             partial.insert( partial.end(), data_buffer, data_buffer + offset * value_size );
00085 
00086             ranged = is_ranged( *fileid_iter, ranged_iter, ranged_file_ids.end() );
00087             assert( partial.size() == *count_iter * value_size );
00088             rval = store_data( *fileid_iter, &partial[0], *count_iter, ranged );
00089             if( MB_SUCCESS != rval ) return rval;
00090 
00091             ++count_iter;
00092             ++fileid_iter;
00093             partial.clear();
00094         }
00095 
00096         // Process contents for all entities for which we
00097         // have read the complete list
00098         while( count_iter != vals_per_ent.end() && offset + *count_iter <= count )
00099         {
00100             assert( fileid_iter != file_ids.end() );
00101             ranged = is_ranged( *fileid_iter, ranged_iter, ranged_file_ids.end() );
00102             rval   = store_data( *fileid_iter, data_buffer + offset * value_size, *count_iter, ranged );
00103             if( MB_SUCCESS != rval ) return rval;
00104 
00105             offset += *count_iter;
00106             ++count_iter;
00107             ++fileid_iter;
00108         }
00109 
00110         // If we did not read all of the final entity,
00111         // store what we did read to be processed in the
00112         // next iteration
00113         if( offset < count )
00114         {
00115             assert( partial.empty() );
00116             partial.insert( partial.end(), data_buffer + offset * value_size, data_buffer + count * value_size );
00117         }
00118     }
00119     // NOTE: If the last set is empty, we will not process it here
00120     // assert(fileid_iter == file_ids.end());
00121 #ifndef NDEBUG
00122     for( ; fileid_iter != file_ids.end(); ++fileid_iter )
00123     {
00124         assert( 0 == *count_iter );
00125         ++count_iter;
00126     }
00127 #endif
00128     return MB_SUCCESS;
00129 }
00130 /*
00131 ErrorCode ReadHDF5VarLen::read_offsets( ReadHDF5Dataset& data_set,
00132                                         const Range& file_ids,
00133                                         EntityHandle start_file_id,
00134                                         unsigned num_columns,
00135                                         const unsigned indices[],
00136                                         EntityHandle nudge,
00137                                         Range offsets_out[],
00138                                         std::vector<unsigned> counts_out[],
00139                                         Range* ranged_file_ids = 0 )
00140 {
00141   const int local_index = 1;
00142 
00143     // sanity check
00144   const unsigned max_cols = ranged_file_ids ? data_set.columns() - 1 : data_set.columns()
00145   for (unsigned i = 0; i < num_columns; ++i) {
00146     assert(indices[i] >= max_cols);
00147     if (indices[i] >= max_cols)
00148       return MB_FAILURE;
00149  }
00150 
00151     // Use hints to make sure insertion into ranges is O(1)
00152   std::vector<Range::iterator> hints;
00153   if (ranged_file_ids) {
00154     hints.resize( num_colums + 1 );
00155     hints.back() = ranged_file_ids->begin();
00156   }
00157   else {
00158     hints.resize( num_columns );
00159   }
00160   for (unsigned i = 0; i < num_columns; ++i)
00161     offsets_out[i].clear();
00162     counts_out[i].clear();
00163     counts_out[i].reserve( file_ids.size() );
00164     hints[i] = offsets_out[i].begin();
00165   }
00166 
00167     // If we only need one column from a multi-column data set,
00168     // then read only that column.
00169   if (num_columns == 1 && data_set.columns() > 1 && !ranged_file_ids) {
00170     data_set.set_column( indices[0] );
00171     indices = &local_index;
00172   }
00173   else if (ranged_file_ids && data_set.columns() > 1 && 0 == num_columns) {
00174     data_set.set_column( data_set.columns() - 1 );
00175   }
00176     // NOTE: do not move this above the previous block.
00177     //       The previous block changes the results of data_set.columns()!
00178   const size_t table_columns = data_set.columns();
00179 
00180     // Calculate which rows we need to read from the offsets table
00181   Range rows;
00182   Range::iterator hint = rows.begin();
00183   Range::const_pair_iterator pair = file_ids.const_pair_begin();
00184     // special case if reading first entity in dataset, because
00185     // there is no previous end value.
00186   if (pair != file_ids.const_pair_end() && pair->first == start_file_id)
00187     hint = rows.insert( nudge, pair->second - start_file_id + nudge );
00188   while (pair != file_ids.const_pair_end()) {
00189     hint = rows.insert( hint,
00190                         pair->first + nudge - 1 - start_file_id,
00191                         pair->second + nudge - start_file_id );
00192     ++pair;
00193   }
00194 
00195     // set up read of offsets dataset
00196   hsize_t buffer_size = bufferSize / (sizeof(hssize_t) * data_set.columns());
00197   hssize_t* buffer = reinterpret_cast<hssize_t*>(dataBuffer);
00198   data_set.set_file_ids( rows, nudge, buffer_size, H5T_NATIVE_HSSIZE );
00199   std::vector<hssize_t> prev_end;
00200     // If we're reading the first row of the table, then the
00201     // previous end is implicitly -1.
00202   if (!file_ids.empty() && file_ids.front() == start_file_id)
00203     prev_end.resize(num_columns,-1);
00204 
00205     // read offset table
00206   size_t count, offset;
00207   Range::const_iterator fiter = file_ids.begin();
00208   while (!data_set.done()) {
00209     try {
00210       data_set.read( buffer, count );
00211     }
00212     catch (ReadHDF5Dataset::Exception e) {
00213       return MB_FAILURE;
00214     }
00215     if (!count) // might have been NULL read for collective IO
00216       continue;
00217 
00218       // If the previous end values were read in the previous iteration,
00219       // then they're stored in prev_end.
00220     size_t offset = 0;
00221     if (!prev_end.empty()) {
00222        for (unsigned i = 0; i < num_columns; ++i) {
00223         counts_out[i].push_back( buffer[indices[i]] - prev_end[i] );
00224         hints[i] = offsets_out[i].insert( hints[i],
00225                                           prev_end[i] + 1 + nudge,
00226                                           buffer[indices[i]] + nudge );
00227       }
00228       if (ranged_file_ids && (buffer[table_columns-1] & mhdf_SET_RANGE_BIT))
00229         hints.back() = ranged_file_ids->insert( hints.back(), *fiter );
00230       ++fiter;
00231       offset = 1;
00232       prev_end.clear();
00233     }
00234 
00235     while (offset < count) {
00236       assert(fiter != file_ids.end());
00237         // whenever we get to a gap between blocks we need to
00238         // advance one step because we read an extra end id
00239         // preceding teah block
00240       if (fiter == fiter.start_of_block()) {
00241         if (offset == count-1)
00242           break;
00243         ++offset;
00244       }
00245 
00246       for (unsigned i = 0; i < num_columns; ++i) {
00247         size_t s = buffer[(offset-1)*table_columns+indices[i]] + 1;
00248         size_t e = buffer[ offset   *table_columns+indices[i]];
00249         counts_out.push_back( e - s + 1 );
00250         hints[i] = offsets_out.insert( hints[i], s, e );
00251       }
00252       if (ranged_file_ids && (buffer[offset*table_columns+table_columns-1] & mhdf_SET_RANGE_BIT))
00253         hints.back() = ranged_file_ids->insert( hints.back(), *fiter );
00254 
00255       ++fiter;
00256       ++offset;
00257     }
00258 
00259       // If we did not end on the boundary between two blocks,
00260       // then we need to save the end indices for the final entry
00261       // for use in the next iteration.  Similarly, if we ended
00262       // with extra values that were read with the express intention
00263       // of getting the previous end values for a block, we need to
00264       // save them.  This case only arises if we hit the break in
00265       // the above loop.
00266     if (fiter != fiter.start_of_block() || offset < count) {
00267       assert(prev_end.empty());
00268       if (offset == count) {
00269         --offset;
00270         assert(fiter != fiter.start_of_block());
00271       }
00272       else {
00273         assert(offset+1 == count);
00274         assert(fiter == fiter.start_of_block());
00275       }
00276       for (unsigned i = 0; i < num_columns; ++i)
00277         prev_end.push_back(buffer[offset*table_columns+indices[i]]);
00278     }
00279   }
00280   assert(prev_end.empty());
00281   assert(fiter == file_ids.end());
00282 
00283   return MB_SUCCESS;
00284 }
00285 */
00286 ErrorCode ReadHDF5VarLen::read_offsets( ReadHDF5Dataset& data_set, const Range& file_ids, EntityHandle start_file_id,
00287                                         EntityHandle nudge, Range& offsets_out, std::vector< unsigned >& counts_out )
00288 {
00289 
00290     // Use hints to make sure insertion into ranges is O(1)
00291     offsets_out.clear();
00292     counts_out.clear();
00293     counts_out.reserve( file_ids.size() );
00294     Range::iterator hint;
00295 
00296     // Calculate which rows we need to read from the offsets table
00297     Range rows;
00298     hint                            = rows.begin();
00299     Range::const_pair_iterator pair = file_ids.const_pair_begin();
00300     // special case if reading first entity in dataset, because
00301     // there is no previous end value.
00302     if( pair != file_ids.const_pair_end() && pair->first == start_file_id )
00303     {
00304         hint = rows.insert( nudge, pair->second - start_file_id + nudge );
00305         ++pair;
00306     }
00307     while( pair != file_ids.const_pair_end() )
00308     {
00309         hint = rows.insert( hint, pair->first - start_file_id + nudge - 1, pair->second - start_file_id + nudge );
00310         ++pair;
00311     }
00312 
00313     // set up read of offsets dataset
00314     hsize_t buffer_size = bufferSize / sizeof( hssize_t );
00315     hssize_t* buffer    = reinterpret_cast< hssize_t* >( dataBuffer );
00316     data_set.set_file_ids( rows, nudge, buffer_size, H5T_NATIVE_HSSIZE );
00317     hssize_t prev_end;
00318     bool have_prev_end = false;
00319     // If we're reading the first row of the table, then the
00320     // previous end is implicitly -1.
00321     if( !file_ids.empty() && file_ids.front() == start_file_id )
00322     {
00323         prev_end      = -1;
00324         have_prev_end = true;
00325     }
00326 
00327     dbgOut.printf( 3, "Reading %s in %lu chunks\n", data_set.get_debug_desc(), data_set.get_read_count() );
00328 
00329     // read offset table
00330     size_t count, offset;
00331     Range::const_iterator fiter = file_ids.begin();
00332     hint                        = offsets_out.begin();
00333     int nn                      = 0;
00334     while( !data_set.done() )
00335     {
00336         dbgOut.printf( 3, "Reading chunk %d of %s\n", ++nn, data_set.get_debug_desc() );
00337         try
00338         {
00339             data_set.read( buffer, count );
00340         }
00341         catch( ReadHDF5Dataset::Exception& )
00342         {
00343             return MB_FAILURE;
00344         }
00345         if( !count )  // might have been NULL read for collective IO
00346             continue;
00347 
00348         // If the previous end values were read in the previous iteration,
00349         // then they're stored in prev_end.
00350         offset = 0;
00351         if( have_prev_end )
00352         {
00353             counts_out.push_back( buffer[0] - prev_end );
00354             hint = offsets_out.insert( hint, prev_end + 1 + nudge, buffer[0] + nudge );
00355             ++fiter;
00356             offset        = 1;
00357             have_prev_end = false;
00358         }
00359 
00360         while( offset < count )
00361         {
00362             assert( fiter != file_ids.end() );
00363             // whenever we get to a gap between blocks we need to
00364             // advance one step because we read an extra end id
00365             // preceding teah block
00366             if( fiter == fiter.start_of_block() )
00367             {
00368                 if( offset == count - 1 ) break;
00369                 ++offset;
00370             }
00371 
00372             size_t s = buffer[offset - 1] + 1;
00373             size_t e = buffer[offset];
00374             counts_out.push_back( e - s + 1 );
00375             hint = offsets_out.insert( hint, s + nudge, e + nudge );
00376 
00377             ++fiter;
00378             ++offset;
00379         }
00380 
00381         // If we did not end on the boundary between two blocks,
00382         // then we need to save the end indices for the final entry
00383         // for use in the next iteration.  Similarly, if we ended
00384         // with extra values that were read with the express intention
00385         // of getting the previous end values for a block, we need to
00386         // save them.  This case only arises if we hit the break in
00387         // the above loop.
00388         if( fiter != fiter.start_of_block() || offset < count )
00389         {
00390             assert( !have_prev_end );
00391             if( offset == count )
00392             {
00393                 --offset;
00394                 assert( fiter != fiter.start_of_block() );
00395             }
00396             else
00397             {
00398                 assert( offset + 1 == count );
00399                 assert( fiter == fiter.start_of_block() );
00400             }
00401             have_prev_end = true;
00402             prev_end      = buffer[offset];
00403         }
00404     }
00405     assert( !have_prev_end );
00406     assert( fiter == file_ids.end() );
00407 
00408     return MB_SUCCESS;
00409 }
00410 
00411 }  // namespace moab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines