![]() |
Mesh Oriented datABase
(version 5.4.1)
Array-based unstructured mesh datastructure
|
00001 /** \file ReadHDF5VarLen.cpp
00002 * \author Jason Kraftcheck
00003 * \date 2010-09-04
00004 */
00005
00006 #include "ReadHDF5VarLen.hpp"
00007 #include "ReadHDF5Dataset.hpp"
00008 #include "H5Tpublic.h"
00009 #include
00010
00011 namespace moab
00012 {
00013
00014 bool ReadHDF5VarLen::is_ranged( EntityHandle file_id,
00015 Range::const_iterator& ranged_iter,
00016 Range::const_iterator range_end )
00017 {
00018 if( ranged_iter == range_end ) return false;
00019
00020 assert( file_id <= *ranged_iter );
00021 if( *ranged_iter != file_id ) return false;
00022
00023 ++ranged_iter;
00024 return true;
00025 }
00026
00027 ErrorCode ReadHDF5VarLen::read_data( ReadHDF5Dataset& data_set,
00028 const Range& offsets,
00029 EntityHandle start_offset,
00030 hid_t data_type,
00031 const Range& file_ids,
00032 const std::vector< unsigned >& vals_per_ent,
00033 const Range& ranged_file_ids )
00034 {
00035 ErrorCode rval;
00036 const size_t value_size = H5Tget_size( data_type );
00037 const size_t buffer_size = bufferSize / value_size;
00038 unsigned char* const data_buffer = reinterpret_cast< unsigned char* >( dataBuffer );
00039 std::vector< unsigned char > partial; // for when we read only part of the contents of a set/entity
00040 Range::const_iterator fileid_iter = file_ids.begin();
00041 Range::const_iterator ranged_iter = ranged_file_ids.begin();
00042 std::vector< unsigned >::const_iterator count_iter = vals_per_ent.begin();
00043 size_t count, offset;
00044 bool ranged;
00045 int nn = 0;
00046
00047 assert( file_ids.size() == vals_per_ent.size() );
00048
00049 try
00050 {
00051 data_set.set_file_ids( offsets, start_offset, buffer_size, data_type );
00052 }
00053 catch( ReadHDF5Dataset::Exception& )
00054 {
00055 return MB_FAILURE;
00056 }
00057
00058 dbgOut.printf( 3, "Reading %s in %lu chunks\n", data_set.get_debug_desc(), data_set.get_read_count() );
00059
00060 while( !data_set.done() )
00061 {
00062 dbgOut.printf( 3, "Reading chunk %d of %s\n", ++nn, data_set.get_debug_desc() );
00063 try
00064 {
00065 data_set.read( data_buffer, count );
00066 }
00067 catch( ReadHDF5Dataset::Exception& )
00068 {
00069 return MB_FAILURE;
00070 }
00071
00072 assert( 0 == count || fileid_iter != file_ids.end() );
00073
00074 // Handle 'special' case where we read some, but not all
00075 // of the data for an entity during the last iteration.
00076 offset = 0;
00077 if( !partial.empty() )
00078 { // didn't read all of previous entity
00079 assert( fileid_iter != file_ids.end() );
00080 assert( 0 == ( partial.size() % value_size ) );
00081 size_t num_prev = partial.size() / value_size;
00082 offset = *count_iter - num_prev;
00083 if( offset > count )
00084 { // still don't have all
00085 partial.insert( partial.end(), data_buffer, data_buffer + count * value_size );
00086 continue;
00087 }
00088
00089 partial.insert( partial.end(), data_buffer, data_buffer + offset * value_size );
00090
00091 ranged = is_ranged( *fileid_iter, ranged_iter, ranged_file_ids.end() );
00092 assert( partial.size() == *count_iter * value_size );
00093 rval = store_data( *fileid_iter, &partial[0], *count_iter, ranged );
00094 if( MB_SUCCESS != rval ) return rval;
00095
00096 ++count_iter;
00097 ++fileid_iter;
00098 partial.clear();
00099 }
00100
00101 // Process contents for all entities for which we
00102 // have read the complete list
00103 while( count_iter != vals_per_ent.end() && offset + *count_iter <= count )
00104 {
00105 assert( fileid_iter != file_ids.end() );
00106 ranged = is_ranged( *fileid_iter, ranged_iter, ranged_file_ids.end() );
00107 rval = store_data( *fileid_iter, data_buffer + offset * value_size, *count_iter, ranged );
00108 if( MB_SUCCESS != rval ) return rval;
00109
00110 offset += *count_iter;
00111 ++count_iter;
00112 ++fileid_iter;
00113 }
00114
00115 // If we did not read all of the final entity,
00116 // store what we did read to be processed in the
00117 // next iteration
00118 if( offset < count )
00119 {
00120 assert( partial.empty() );
00121 partial.insert( partial.end(), data_buffer + offset * value_size, data_buffer + count * value_size );
00122 }
00123 }
00124 // NOTE: If the last set is empty, we will not process it here
00125 // assert(fileid_iter == file_ids.end());
00126 #ifndef NDEBUG
00127 for( ; fileid_iter != file_ids.end(); ++fileid_iter )
00128 {
00129 assert( 0 == *count_iter );
00130 ++count_iter;
00131 }
00132 #endif
00133 return MB_SUCCESS;
00134 }
00135 /*
00136 ErrorCode ReadHDF5VarLen::read_offsets( ReadHDF5Dataset& data_set,
00137 const Range& file_ids,
00138 EntityHandle start_file_id,
00139 unsigned num_columns,
00140 const unsigned indices[],
00141 EntityHandle nudge,
00142 Range offsets_out[],
00143 std::vector counts_out[],
00144 Range* ranged_file_ids = 0 )
00145 {
00146 const int local_index = 1;
00147
00148 // sanity check
00149 const unsigned max_cols = ranged_file_ids ? data_set.columns() - 1 : data_set.columns()
00150 for (unsigned i = 0; i < num_columns; ++i) {
00151 assert(indices[i] >= max_cols);
00152 if (indices[i] >= max_cols)
00153 return MB_FAILURE;
00154 }
00155
00156 // Use hints to make sure insertion into ranges is O(1)
00157 std::vector hints;
00158 if (ranged_file_ids) {
00159 hints.resize( num_colums + 1 );
00160 hints.back() = ranged_file_ids->begin();
00161 }
00162 else {
00163 hints.resize( num_columns );
00164 }
00165 for (unsigned i = 0; i < num_columns; ++i)
00166 offsets_out[i].clear();
00167 counts_out[i].clear();
00168 counts_out[i].reserve( file_ids.size() );
00169 hints[i] = offsets_out[i].begin();
00170 }
00171
00172 // If we only need one column from a multi-column data set,
00173 // then read only that column.
00174 if (num_columns == 1 && data_set.columns() > 1 && !ranged_file_ids) {
00175 data_set.set_column( indices[0] );
00176 indices = &local_index;
00177 }
00178 else if (ranged_file_ids && data_set.columns() > 1 && 0 == num_columns) {
00179 data_set.set_column( data_set.columns() - 1 );
00180 }
00181 // NOTE: do not move this above the previous block.
00182 // The previous block changes the results of data_set.columns()!
00183 const size_t table_columns = data_set.columns();
00184
00185 // Calculate which rows we need to read from the offsets table
00186 Range rows;
00187 Range::iterator hint = rows.begin();
00188 Range::const_pair_iterator pair = file_ids.const_pair_begin();
00189 // special case if reading first entity in dataset, because
00190 // there is no previous end value.
00191 if (pair != file_ids.const_pair_end() && pair->first == start_file_id)
00192 hint = rows.insert( nudge, pair->second - start_file_id + nudge );
00193 while (pair != file_ids.const_pair_end()) {
00194 hint = rows.insert( hint,
00195 pair->first + nudge - 1 - start_file_id,
00196 pair->second + nudge - start_file_id );
00197 ++pair;
00198 }
00199
00200 // set up read of offsets dataset
00201 hsize_t buffer_size = bufferSize / (sizeof(hssize_t) * data_set.columns());
00202 hssize_t* buffer = reinterpret_cast(dataBuffer);
00203 data_set.set_file_ids( rows, nudge, buffer_size, H5T_NATIVE_HSSIZE );
00204 std::vector prev_end;
00205 // If we're reading the first row of the table, then the
00206 // previous end is implicitly -1.
00207 if (!file_ids.empty() && file_ids.front() == start_file_id)
00208 prev_end.resize(num_columns,-1);
00209
00210 // read offset table
00211 size_t count, offset;
00212 Range::const_iterator fiter = file_ids.begin();
00213 while (!data_set.done()) {
00214 try {
00215 data_set.read( buffer, count );
00216 }
00217 catch (ReadHDF5Dataset::Exception e) {
00218 return MB_FAILURE;
00219 }
00220 if (!count) // might have been NULL read for collective IO
00221 continue;
00222
00223 // If the previous end values were read in the previous iteration,
00224 // then they're stored in prev_end.
00225 size_t offset = 0;
00226 if (!prev_end.empty()) {
00227 for (unsigned i = 0; i < num_columns; ++i) {
00228 counts_out[i].push_back( buffer[indices[i]] - prev_end[i] );
00229 hints[i] = offsets_out[i].insert( hints[i],
00230 prev_end[i] + 1 + nudge,
00231 buffer[indices[i]] + nudge );
00232 }
00233 if (ranged_file_ids && (buffer[table_columns-1] & mhdf_SET_RANGE_BIT))
00234 hints.back() = ranged_file_ids->insert( hints.back(), *fiter );
00235 ++fiter;
00236 offset = 1;
00237 prev_end.clear();
00238 }
00239
00240 while (offset < count) {
00241 assert(fiter != file_ids.end());
00242 // whenever we get to a gap between blocks we need to
00243 // advance one step because we read an extra end id
00244 // preceding teah block
00245 if (fiter == fiter.start_of_block()) {
00246 if (offset == count-1)
00247 break;
00248 ++offset;
00249 }
00250
00251 for (unsigned i = 0; i < num_columns; ++i) {
00252 size_t s = buffer[(offset-1)*table_columns+indices[i]] + 1;
00253 size_t e = buffer[ offset *table_columns+indices[i]];
00254 counts_out.push_back( e - s + 1 );
00255 hints[i] = offsets_out.insert( hints[i], s, e );
00256 }
00257 if (ranged_file_ids && (buffer[offset*table_columns+table_columns-1] & mhdf_SET_RANGE_BIT))
00258 hints.back() = ranged_file_ids->insert( hints.back(), *fiter );
00259
00260 ++fiter;
00261 ++offset;
00262 }
00263
00264 // If we did not end on the boundary between two blocks,
00265 // then we need to save the end indices for the final entry
00266 // for use in the next iteration. Similarly, if we ended
00267 // with extra values that were read with the express intention
00268 // of getting the previous end values for a block, we need to
00269 // save them. This case only arises if we hit the break in
00270 // the above loop.
00271 if (fiter != fiter.start_of_block() || offset < count) {
00272 assert(prev_end.empty());
00273 if (offset == count) {
00274 --offset;
00275 assert(fiter != fiter.start_of_block());
00276 }
00277 else {
00278 assert(offset+1 == count);
00279 assert(fiter == fiter.start_of_block());
00280 }
00281 for (unsigned i = 0; i < num_columns; ++i)
00282 prev_end.push_back(buffer[offset*table_columns+indices[i]]);
00283 }
00284 }
00285 assert(prev_end.empty());
00286 assert(fiter == file_ids.end());
00287
00288 return MB_SUCCESS;
00289 }
00290 */
00291 ErrorCode ReadHDF5VarLen::read_offsets( ReadHDF5Dataset& data_set,
00292 const Range& file_ids,
00293 EntityHandle start_file_id,
00294 EntityHandle nudge,
00295 Range& offsets_out,
00296 std::vector< unsigned >& counts_out )
00297 {
00298
00299 // Use hints to make sure insertion into ranges is O(1)
00300 offsets_out.clear();
00301 counts_out.clear();
00302 counts_out.reserve( file_ids.size() );
00303 Range::iterator hint;
00304
00305 // Calculate which rows we need to read from the offsets table
00306 Range rows;
00307 hint = rows.begin();
00308 Range::const_pair_iterator pair = file_ids.const_pair_begin();
00309 // special case if reading first entity in dataset, because
00310 // there is no previous end value.
00311 if( pair != file_ids.const_pair_end() && pair->first == start_file_id )
00312 {
00313 hint = rows.insert( nudge, pair->second - start_file_id + nudge );
00314 ++pair;
00315 }
00316 while( pair != file_ids.const_pair_end() )
00317 {
00318 hint = rows.insert( hint, pair->first - start_file_id + nudge - 1, pair->second - start_file_id + nudge );
00319 ++pair;
00320 }
00321
00322 // set up read of offsets dataset
00323 hsize_t buffer_size = bufferSize / sizeof( hssize_t );
00324 hssize_t* buffer = reinterpret_cast< hssize_t* >( dataBuffer );
00325 data_set.set_file_ids( rows, nudge, buffer_size, H5T_NATIVE_HSSIZE );
00326 hssize_t prev_end;
00327 bool have_prev_end = false;
00328 // If we're reading the first row of the table, then the
00329 // previous end is implicitly -1.
00330 if( !file_ids.empty() && file_ids.front() == start_file_id )
00331 {
00332 prev_end = -1;
00333 have_prev_end = true;
00334 }
00335
00336 dbgOut.printf( 3, "Reading %s in %lu chunks\n", data_set.get_debug_desc(), data_set.get_read_count() );
00337
00338 // read offset table
00339 size_t count, offset;
00340 Range::const_iterator fiter = file_ids.begin();
00341 hint = offsets_out.begin();
00342 int nn = 0;
00343 while( !data_set.done() )
00344 {
00345 dbgOut.printf( 3, "Reading chunk %d of %s\n", ++nn, data_set.get_debug_desc() );
00346 try
00347 {
00348 data_set.read( buffer, count );
00349 }
00350 catch( ReadHDF5Dataset::Exception& )
00351 {
00352 return MB_FAILURE;
00353 }
00354 if( !count ) // might have been NULL read for collective IO
00355 continue;
00356
00357 // If the previous end values were read in the previous iteration,
00358 // then they're stored in prev_end.
00359 offset = 0;
00360 if( have_prev_end )
00361 {
00362 counts_out.push_back( buffer[0] - prev_end );
00363 hint = offsets_out.insert( hint, prev_end + 1 + nudge, buffer[0] + nudge );
00364 ++fiter;
00365 offset = 1;
00366 have_prev_end = false;
00367 }
00368
00369 while( offset < count )
00370 {
00371 assert( fiter != file_ids.end() );
00372 // whenever we get to a gap between blocks we need to
00373 // advance one step because we read an extra end id
00374 // preceding teah block
00375 if( fiter == fiter.start_of_block() )
00376 {
00377 if( offset == count - 1 ) break;
00378 ++offset;
00379 }
00380
00381 size_t s = buffer[offset - 1] + 1;
00382 size_t e = buffer[offset];
00383 counts_out.push_back( e - s + 1 );
00384 hint = offsets_out.insert( hint, s + nudge, e + nudge );
00385
00386 ++fiter;
00387 ++offset;
00388 }
00389
00390 // If we did not end on the boundary between two blocks,
00391 // then we need to save the end indices for the final entry
00392 // for use in the next iteration. Similarly, if we ended
00393 // with extra values that were read with the express intention
00394 // of getting the previous end values for a block, we need to
00395 // save them. This case only arises if we hit the break in
00396 // the above loop.
00397 if( fiter != fiter.start_of_block() || offset < count )
00398 {
00399 assert( !have_prev_end );
00400 if( offset == count )
00401 {
00402 --offset;
00403 assert( fiter != fiter.start_of_block() );
00404 }
00405 else
00406 {
00407 assert( offset + 1 == count );
00408 assert( fiter == fiter.start_of_block() );
00409 }
00410 have_prev_end = true;
00411 prev_end = buffer[offset];
00412 }
00413 }
00414 assert( !have_prev_end );
00415 assert( fiter == file_ids.end() );
00416
00417 return MB_SUCCESS;
00418 }
00419
00420 } // namespace moab