MOAB: Mesh Oriented datABase
(version 5.4.1)
|
00001 /** \file ReadHDF5VarLen.cpp 00002 * \author Jason Kraftcheck 00003 * \date 2010-09-04 00004 */ 00005 00006 #include "ReadHDF5VarLen.hpp" 00007 #include "ReadHDF5Dataset.hpp" 00008 #include "H5Tpublic.h" 00009 #include <cassert> 00010 00011 namespace moab 00012 { 00013 00014 bool ReadHDF5VarLen::is_ranged( EntityHandle file_id, 00015 Range::const_iterator& ranged_iter, 00016 Range::const_iterator range_end ) 00017 { 00018 if( ranged_iter == range_end ) return false; 00019 00020 assert( file_id <= *ranged_iter ); 00021 if( *ranged_iter != file_id ) return false; 00022 00023 ++ranged_iter; 00024 return true; 00025 } 00026 00027 ErrorCode ReadHDF5VarLen::read_data( ReadHDF5Dataset& data_set, 00028 const Range& offsets, 00029 EntityHandle start_offset, 00030 hid_t data_type, 00031 const Range& file_ids, 00032 const std::vector< unsigned >& vals_per_ent, 00033 const Range& ranged_file_ids ) 00034 { 00035 ErrorCode rval; 00036 const size_t value_size = H5Tget_size( data_type ); 00037 const size_t buffer_size = bufferSize / value_size; 00038 unsigned char* const data_buffer = reinterpret_cast< unsigned char* >( dataBuffer ); 00039 std::vector< unsigned char > partial; // for when we read only part of the contents of a set/entity 00040 Range::const_iterator fileid_iter = file_ids.begin(); 00041 Range::const_iterator ranged_iter = ranged_file_ids.begin(); 00042 std::vector< unsigned >::const_iterator count_iter = vals_per_ent.begin(); 00043 size_t count, offset; 00044 bool ranged; 00045 int nn = 0; 00046 00047 assert( file_ids.size() == vals_per_ent.size() ); 00048 00049 try 00050 { 00051 data_set.set_file_ids( offsets, start_offset, buffer_size, data_type ); 00052 } 00053 catch( ReadHDF5Dataset::Exception& ) 00054 { 00055 return MB_FAILURE; 00056 } 00057 00058 dbgOut.printf( 3, "Reading %s in %lu chunks\n", data_set.get_debug_desc(), data_set.get_read_count() ); 00059 00060 while( !data_set.done() ) 00061 { 00062 dbgOut.printf( 3, "Reading chunk %d of %s\n", ++nn, data_set.get_debug_desc() ); 00063 try 00064 { 00065 data_set.read( data_buffer, count ); 00066 } 00067 catch( ReadHDF5Dataset::Exception& ) 00068 { 00069 return MB_FAILURE; 00070 } 00071 00072 assert( 0 == count || fileid_iter != file_ids.end() ); 00073 00074 // Handle 'special' case where we read some, but not all 00075 // of the data for an entity during the last iteration. 00076 offset = 0; 00077 if( !partial.empty() ) 00078 { // didn't read all of previous entity 00079 assert( fileid_iter != file_ids.end() ); 00080 assert( 0 == ( partial.size() % value_size ) ); 00081 size_t num_prev = partial.size() / value_size; 00082 offset = *count_iter - num_prev; 00083 if( offset > count ) 00084 { // still don't have all 00085 partial.insert( partial.end(), data_buffer, data_buffer + count * value_size ); 00086 continue; 00087 } 00088 00089 partial.insert( partial.end(), data_buffer, data_buffer + offset * value_size ); 00090 00091 ranged = is_ranged( *fileid_iter, ranged_iter, ranged_file_ids.end() ); 00092 assert( partial.size() == *count_iter * value_size ); 00093 rval = store_data( *fileid_iter, &partial[0], *count_iter, ranged ); 00094 if( MB_SUCCESS != rval ) return rval; 00095 00096 ++count_iter; 00097 ++fileid_iter; 00098 partial.clear(); 00099 } 00100 00101 // Process contents for all entities for which we 00102 // have read the complete list 00103 while( count_iter != vals_per_ent.end() && offset + *count_iter <= count ) 00104 { 00105 assert( fileid_iter != file_ids.end() ); 00106 ranged = is_ranged( *fileid_iter, ranged_iter, ranged_file_ids.end() ); 00107 rval = store_data( *fileid_iter, data_buffer + offset * value_size, *count_iter, ranged ); 00108 if( MB_SUCCESS != rval ) return rval; 00109 00110 offset += *count_iter; 00111 ++count_iter; 00112 ++fileid_iter; 00113 } 00114 00115 // If we did not read all of the final entity, 00116 // store what we did read to be processed in the 00117 // next iteration 00118 if( offset < count ) 00119 { 00120 assert( partial.empty() ); 00121 partial.insert( partial.end(), data_buffer + offset * value_size, data_buffer + count * value_size ); 00122 } 00123 } 00124 // NOTE: If the last set is empty, we will not process it here 00125 // assert(fileid_iter == file_ids.end()); 00126 #ifndef NDEBUG 00127 for( ; fileid_iter != file_ids.end(); ++fileid_iter ) 00128 { 00129 assert( 0 == *count_iter ); 00130 ++count_iter; 00131 } 00132 #endif 00133 return MB_SUCCESS; 00134 } 00135 /* 00136 ErrorCode ReadHDF5VarLen::read_offsets( ReadHDF5Dataset& data_set, 00137 const Range& file_ids, 00138 EntityHandle start_file_id, 00139 unsigned num_columns, 00140 const unsigned indices[], 00141 EntityHandle nudge, 00142 Range offsets_out[], 00143 std::vector<unsigned> counts_out[], 00144 Range* ranged_file_ids = 0 ) 00145 { 00146 const int local_index = 1; 00147 00148 // sanity check 00149 const unsigned max_cols = ranged_file_ids ? data_set.columns() - 1 : data_set.columns() 00150 for (unsigned i = 0; i < num_columns; ++i) { 00151 assert(indices[i] >= max_cols); 00152 if (indices[i] >= max_cols) 00153 return MB_FAILURE; 00154 } 00155 00156 // Use hints to make sure insertion into ranges is O(1) 00157 std::vector<Range::iterator> hints; 00158 if (ranged_file_ids) { 00159 hints.resize( num_colums + 1 ); 00160 hints.back() = ranged_file_ids->begin(); 00161 } 00162 else { 00163 hints.resize( num_columns ); 00164 } 00165 for (unsigned i = 0; i < num_columns; ++i) 00166 offsets_out[i].clear(); 00167 counts_out[i].clear(); 00168 counts_out[i].reserve( file_ids.size() ); 00169 hints[i] = offsets_out[i].begin(); 00170 } 00171 00172 // If we only need one column from a multi-column data set, 00173 // then read only that column. 00174 if (num_columns == 1 && data_set.columns() > 1 && !ranged_file_ids) { 00175 data_set.set_column( indices[0] ); 00176 indices = &local_index; 00177 } 00178 else if (ranged_file_ids && data_set.columns() > 1 && 0 == num_columns) { 00179 data_set.set_column( data_set.columns() - 1 ); 00180 } 00181 // NOTE: do not move this above the previous block. 00182 // The previous block changes the results of data_set.columns()! 00183 const size_t table_columns = data_set.columns(); 00184 00185 // Calculate which rows we need to read from the offsets table 00186 Range rows; 00187 Range::iterator hint = rows.begin(); 00188 Range::const_pair_iterator pair = file_ids.const_pair_begin(); 00189 // special case if reading first entity in dataset, because 00190 // there is no previous end value. 00191 if (pair != file_ids.const_pair_end() && pair->first == start_file_id) 00192 hint = rows.insert( nudge, pair->second - start_file_id + nudge ); 00193 while (pair != file_ids.const_pair_end()) { 00194 hint = rows.insert( hint, 00195 pair->first + nudge - 1 - start_file_id, 00196 pair->second + nudge - start_file_id ); 00197 ++pair; 00198 } 00199 00200 // set up read of offsets dataset 00201 hsize_t buffer_size = bufferSize / (sizeof(hssize_t) * data_set.columns()); 00202 hssize_t* buffer = reinterpret_cast<hssize_t*>(dataBuffer); 00203 data_set.set_file_ids( rows, nudge, buffer_size, H5T_NATIVE_HSSIZE ); 00204 std::vector<hssize_t> prev_end; 00205 // If we're reading the first row of the table, then the 00206 // previous end is implicitly -1. 00207 if (!file_ids.empty() && file_ids.front() == start_file_id) 00208 prev_end.resize(num_columns,-1); 00209 00210 // read offset table 00211 size_t count, offset; 00212 Range::const_iterator fiter = file_ids.begin(); 00213 while (!data_set.done()) { 00214 try { 00215 data_set.read( buffer, count ); 00216 } 00217 catch (ReadHDF5Dataset::Exception e) { 00218 return MB_FAILURE; 00219 } 00220 if (!count) // might have been NULL read for collective IO 00221 continue; 00222 00223 // If the previous end values were read in the previous iteration, 00224 // then they're stored in prev_end. 00225 size_t offset = 0; 00226 if (!prev_end.empty()) { 00227 for (unsigned i = 0; i < num_columns; ++i) { 00228 counts_out[i].push_back( buffer[indices[i]] - prev_end[i] ); 00229 hints[i] = offsets_out[i].insert( hints[i], 00230 prev_end[i] + 1 + nudge, 00231 buffer[indices[i]] + nudge ); 00232 } 00233 if (ranged_file_ids && (buffer[table_columns-1] & mhdf_SET_RANGE_BIT)) 00234 hints.back() = ranged_file_ids->insert( hints.back(), *fiter ); 00235 ++fiter; 00236 offset = 1; 00237 prev_end.clear(); 00238 } 00239 00240 while (offset < count) { 00241 assert(fiter != file_ids.end()); 00242 // whenever we get to a gap between blocks we need to 00243 // advance one step because we read an extra end id 00244 // preceding teah block 00245 if (fiter == fiter.start_of_block()) { 00246 if (offset == count-1) 00247 break; 00248 ++offset; 00249 } 00250 00251 for (unsigned i = 0; i < num_columns; ++i) { 00252 size_t s = buffer[(offset-1)*table_columns+indices[i]] + 1; 00253 size_t e = buffer[ offset *table_columns+indices[i]]; 00254 counts_out.push_back( e - s + 1 ); 00255 hints[i] = offsets_out.insert( hints[i], s, e ); 00256 } 00257 if (ranged_file_ids && (buffer[offset*table_columns+table_columns-1] & mhdf_SET_RANGE_BIT)) 00258 hints.back() = ranged_file_ids->insert( hints.back(), *fiter ); 00259 00260 ++fiter; 00261 ++offset; 00262 } 00263 00264 // If we did not end on the boundary between two blocks, 00265 // then we need to save the end indices for the final entry 00266 // for use in the next iteration. Similarly, if we ended 00267 // with extra values that were read with the express intention 00268 // of getting the previous end values for a block, we need to 00269 // save them. This case only arises if we hit the break in 00270 // the above loop. 00271 if (fiter != fiter.start_of_block() || offset < count) { 00272 assert(prev_end.empty()); 00273 if (offset == count) { 00274 --offset; 00275 assert(fiter != fiter.start_of_block()); 00276 } 00277 else { 00278 assert(offset+1 == count); 00279 assert(fiter == fiter.start_of_block()); 00280 } 00281 for (unsigned i = 0; i < num_columns; ++i) 00282 prev_end.push_back(buffer[offset*table_columns+indices[i]]); 00283 } 00284 } 00285 assert(prev_end.empty()); 00286 assert(fiter == file_ids.end()); 00287 00288 return MB_SUCCESS; 00289 } 00290 */ 00291 ErrorCode ReadHDF5VarLen::read_offsets( ReadHDF5Dataset& data_set, 00292 const Range& file_ids, 00293 EntityHandle start_file_id, 00294 EntityHandle nudge, 00295 Range& offsets_out, 00296 std::vector< unsigned >& counts_out ) 00297 { 00298 00299 // Use hints to make sure insertion into ranges is O(1) 00300 offsets_out.clear(); 00301 counts_out.clear(); 00302 counts_out.reserve( file_ids.size() ); 00303 Range::iterator hint; 00304 00305 // Calculate which rows we need to read from the offsets table 00306 Range rows; 00307 hint = rows.begin(); 00308 Range::const_pair_iterator pair = file_ids.const_pair_begin(); 00309 // special case if reading first entity in dataset, because 00310 // there is no previous end value. 00311 if( pair != file_ids.const_pair_end() && pair->first == start_file_id ) 00312 { 00313 hint = rows.insert( nudge, pair->second - start_file_id + nudge ); 00314 ++pair; 00315 } 00316 while( pair != file_ids.const_pair_end() ) 00317 { 00318 hint = rows.insert( hint, pair->first - start_file_id + nudge - 1, pair->second - start_file_id + nudge ); 00319 ++pair; 00320 } 00321 00322 // set up read of offsets dataset 00323 hsize_t buffer_size = bufferSize / sizeof( hssize_t ); 00324 hssize_t* buffer = reinterpret_cast< hssize_t* >( dataBuffer ); 00325 data_set.set_file_ids( rows, nudge, buffer_size, H5T_NATIVE_HSSIZE ); 00326 hssize_t prev_end; 00327 bool have_prev_end = false; 00328 // If we're reading the first row of the table, then the 00329 // previous end is implicitly -1. 00330 if( !file_ids.empty() && file_ids.front() == start_file_id ) 00331 { 00332 prev_end = -1; 00333 have_prev_end = true; 00334 } 00335 00336 dbgOut.printf( 3, "Reading %s in %lu chunks\n", data_set.get_debug_desc(), data_set.get_read_count() ); 00337 00338 // read offset table 00339 size_t count, offset; 00340 Range::const_iterator fiter = file_ids.begin(); 00341 hint = offsets_out.begin(); 00342 int nn = 0; 00343 while( !data_set.done() ) 00344 { 00345 dbgOut.printf( 3, "Reading chunk %d of %s\n", ++nn, data_set.get_debug_desc() ); 00346 try 00347 { 00348 data_set.read( buffer, count ); 00349 } 00350 catch( ReadHDF5Dataset::Exception& ) 00351 { 00352 return MB_FAILURE; 00353 } 00354 if( !count ) // might have been NULL read for collective IO 00355 continue; 00356 00357 // If the previous end values were read in the previous iteration, 00358 // then they're stored in prev_end. 00359 offset = 0; 00360 if( have_prev_end ) 00361 { 00362 counts_out.push_back( buffer[0] - prev_end ); 00363 hint = offsets_out.insert( hint, prev_end + 1 + nudge, buffer[0] + nudge ); 00364 ++fiter; 00365 offset = 1; 00366 have_prev_end = false; 00367 } 00368 00369 while( offset < count ) 00370 { 00371 assert( fiter != file_ids.end() ); 00372 // whenever we get to a gap between blocks we need to 00373 // advance one step because we read an extra end id 00374 // preceding teah block 00375 if( fiter == fiter.start_of_block() ) 00376 { 00377 if( offset == count - 1 ) break; 00378 ++offset; 00379 } 00380 00381 size_t s = buffer[offset - 1] + 1; 00382 size_t e = buffer[offset]; 00383 counts_out.push_back( e - s + 1 ); 00384 hint = offsets_out.insert( hint, s + nudge, e + nudge ); 00385 00386 ++fiter; 00387 ++offset; 00388 } 00389 00390 // If we did not end on the boundary between two blocks, 00391 // then we need to save the end indices for the final entry 00392 // for use in the next iteration. Similarly, if we ended 00393 // with extra values that were read with the express intention 00394 // of getting the previous end values for a block, we need to 00395 // save them. This case only arises if we hit the break in 00396 // the above loop. 00397 if( fiter != fiter.start_of_block() || offset < count ) 00398 { 00399 assert( !have_prev_end ); 00400 if( offset == count ) 00401 { 00402 --offset; 00403 assert( fiter != fiter.start_of_block() ); 00404 } 00405 else 00406 { 00407 assert( offset + 1 == count ); 00408 assert( fiter == fiter.start_of_block() ); 00409 } 00410 have_prev_end = true; 00411 prev_end = buffer[offset]; 00412 } 00413 } 00414 assert( !have_prev_end ); 00415 assert( fiter == file_ids.end() ); 00416 00417 return MB_SUCCESS; 00418 } 00419 00420 } // namespace moab