MOAB: Mesh Oriented datABase
(version 5.3.1)
|
00001 /** \file ReadHDF5VarLen.cpp 00002 * \author Jason Kraftcheck 00003 * \date 2010-09-04 00004 */ 00005 00006 #include "ReadHDF5VarLen.hpp" 00007 #include "ReadHDF5Dataset.hpp" 00008 #include "H5Tpublic.h" 00009 #include <cassert> 00010 00011 namespace moab 00012 { 00013 00014 bool ReadHDF5VarLen::is_ranged( EntityHandle file_id, Range::const_iterator& ranged_iter, 00015 Range::const_iterator range_end ) 00016 { 00017 if( ranged_iter == range_end ) return false; 00018 00019 assert( file_id <= *ranged_iter ); 00020 if( *ranged_iter != file_id ) return false; 00021 00022 ++ranged_iter; 00023 return true; 00024 } 00025 00026 ErrorCode ReadHDF5VarLen::read_data( ReadHDF5Dataset& data_set, const Range& offsets, EntityHandle start_offset, 00027 hid_t data_type, const Range& file_ids, 00028 const std::vector< unsigned >& vals_per_ent, const Range& ranged_file_ids ) 00029 { 00030 ErrorCode rval; 00031 const size_t value_size = H5Tget_size( data_type ); 00032 const size_t buffer_size = bufferSize / value_size; 00033 unsigned char* const data_buffer = reinterpret_cast< unsigned char* >( dataBuffer ); 00034 std::vector< unsigned char > partial; // for when we read only part of the contents of a set/entity 00035 Range::const_iterator fileid_iter = file_ids.begin(); 00036 Range::const_iterator ranged_iter = ranged_file_ids.begin(); 00037 std::vector< unsigned >::const_iterator count_iter = vals_per_ent.begin(); 00038 size_t count, offset; 00039 bool ranged; 00040 int nn = 0; 00041 00042 assert( file_ids.size() == vals_per_ent.size() ); 00043 00044 try 00045 { 00046 data_set.set_file_ids( offsets, start_offset, buffer_size, data_type ); 00047 } 00048 catch( ReadHDF5Dataset::Exception& ) 00049 { 00050 return MB_FAILURE; 00051 } 00052 00053 dbgOut.printf( 3, "Reading %s in %lu chunks\n", data_set.get_debug_desc(), data_set.get_read_count() ); 00054 00055 while( !data_set.done() ) 00056 { 00057 dbgOut.printf( 3, "Reading chunk %d of %s\n", ++nn, data_set.get_debug_desc() ); 00058 try 00059 { 00060 data_set.read( data_buffer, count ); 00061 } 00062 catch( ReadHDF5Dataset::Exception& ) 00063 { 00064 return MB_FAILURE; 00065 } 00066 00067 assert( 0 == count || fileid_iter != file_ids.end() ); 00068 00069 // Handle 'special' case where we read some, but not all 00070 // of the data for an entity during the last iteration. 00071 offset = 0; 00072 if( !partial.empty() ) 00073 { // didn't read all of previous entity 00074 assert( fileid_iter != file_ids.end() ); 00075 assert( 0 == ( partial.size() % value_size ) ); 00076 size_t num_prev = partial.size() / value_size; 00077 offset = *count_iter - num_prev; 00078 if( offset > count ) 00079 { // still don't have all 00080 partial.insert( partial.end(), data_buffer, data_buffer + count * value_size ); 00081 continue; 00082 } 00083 00084 partial.insert( partial.end(), data_buffer, data_buffer + offset * value_size ); 00085 00086 ranged = is_ranged( *fileid_iter, ranged_iter, ranged_file_ids.end() ); 00087 assert( partial.size() == *count_iter * value_size ); 00088 rval = store_data( *fileid_iter, &partial[0], *count_iter, ranged ); 00089 if( MB_SUCCESS != rval ) return rval; 00090 00091 ++count_iter; 00092 ++fileid_iter; 00093 partial.clear(); 00094 } 00095 00096 // Process contents for all entities for which we 00097 // have read the complete list 00098 while( count_iter != vals_per_ent.end() && offset + *count_iter <= count ) 00099 { 00100 assert( fileid_iter != file_ids.end() ); 00101 ranged = is_ranged( *fileid_iter, ranged_iter, ranged_file_ids.end() ); 00102 rval = store_data( *fileid_iter, data_buffer + offset * value_size, *count_iter, ranged ); 00103 if( MB_SUCCESS != rval ) return rval; 00104 00105 offset += *count_iter; 00106 ++count_iter; 00107 ++fileid_iter; 00108 } 00109 00110 // If we did not read all of the final entity, 00111 // store what we did read to be processed in the 00112 // next iteration 00113 if( offset < count ) 00114 { 00115 assert( partial.empty() ); 00116 partial.insert( partial.end(), data_buffer + offset * value_size, data_buffer + count * value_size ); 00117 } 00118 } 00119 // NOTE: If the last set is empty, we will not process it here 00120 // assert(fileid_iter == file_ids.end()); 00121 #ifndef NDEBUG 00122 for( ; fileid_iter != file_ids.end(); ++fileid_iter ) 00123 { 00124 assert( 0 == *count_iter ); 00125 ++count_iter; 00126 } 00127 #endif 00128 return MB_SUCCESS; 00129 } 00130 /* 00131 ErrorCode ReadHDF5VarLen::read_offsets( ReadHDF5Dataset& data_set, 00132 const Range& file_ids, 00133 EntityHandle start_file_id, 00134 unsigned num_columns, 00135 const unsigned indices[], 00136 EntityHandle nudge, 00137 Range offsets_out[], 00138 std::vector<unsigned> counts_out[], 00139 Range* ranged_file_ids = 0 ) 00140 { 00141 const int local_index = 1; 00142 00143 // sanity check 00144 const unsigned max_cols = ranged_file_ids ? data_set.columns() - 1 : data_set.columns() 00145 for (unsigned i = 0; i < num_columns; ++i) { 00146 assert(indices[i] >= max_cols); 00147 if (indices[i] >= max_cols) 00148 return MB_FAILURE; 00149 } 00150 00151 // Use hints to make sure insertion into ranges is O(1) 00152 std::vector<Range::iterator> hints; 00153 if (ranged_file_ids) { 00154 hints.resize( num_colums + 1 ); 00155 hints.back() = ranged_file_ids->begin(); 00156 } 00157 else { 00158 hints.resize( num_columns ); 00159 } 00160 for (unsigned i = 0; i < num_columns; ++i) 00161 offsets_out[i].clear(); 00162 counts_out[i].clear(); 00163 counts_out[i].reserve( file_ids.size() ); 00164 hints[i] = offsets_out[i].begin(); 00165 } 00166 00167 // If we only need one column from a multi-column data set, 00168 // then read only that column. 00169 if (num_columns == 1 && data_set.columns() > 1 && !ranged_file_ids) { 00170 data_set.set_column( indices[0] ); 00171 indices = &local_index; 00172 } 00173 else if (ranged_file_ids && data_set.columns() > 1 && 0 == num_columns) { 00174 data_set.set_column( data_set.columns() - 1 ); 00175 } 00176 // NOTE: do not move this above the previous block. 00177 // The previous block changes the results of data_set.columns()! 00178 const size_t table_columns = data_set.columns(); 00179 00180 // Calculate which rows we need to read from the offsets table 00181 Range rows; 00182 Range::iterator hint = rows.begin(); 00183 Range::const_pair_iterator pair = file_ids.const_pair_begin(); 00184 // special case if reading first entity in dataset, because 00185 // there is no previous end value. 00186 if (pair != file_ids.const_pair_end() && pair->first == start_file_id) 00187 hint = rows.insert( nudge, pair->second - start_file_id + nudge ); 00188 while (pair != file_ids.const_pair_end()) { 00189 hint = rows.insert( hint, 00190 pair->first + nudge - 1 - start_file_id, 00191 pair->second + nudge - start_file_id ); 00192 ++pair; 00193 } 00194 00195 // set up read of offsets dataset 00196 hsize_t buffer_size = bufferSize / (sizeof(hssize_t) * data_set.columns()); 00197 hssize_t* buffer = reinterpret_cast<hssize_t*>(dataBuffer); 00198 data_set.set_file_ids( rows, nudge, buffer_size, H5T_NATIVE_HSSIZE ); 00199 std::vector<hssize_t> prev_end; 00200 // If we're reading the first row of the table, then the 00201 // previous end is implicitly -1. 00202 if (!file_ids.empty() && file_ids.front() == start_file_id) 00203 prev_end.resize(num_columns,-1); 00204 00205 // read offset table 00206 size_t count, offset; 00207 Range::const_iterator fiter = file_ids.begin(); 00208 while (!data_set.done()) { 00209 try { 00210 data_set.read( buffer, count ); 00211 } 00212 catch (ReadHDF5Dataset::Exception e) { 00213 return MB_FAILURE; 00214 } 00215 if (!count) // might have been NULL read for collective IO 00216 continue; 00217 00218 // If the previous end values were read in the previous iteration, 00219 // then they're stored in prev_end. 00220 size_t offset = 0; 00221 if (!prev_end.empty()) { 00222 for (unsigned i = 0; i < num_columns; ++i) { 00223 counts_out[i].push_back( buffer[indices[i]] - prev_end[i] ); 00224 hints[i] = offsets_out[i].insert( hints[i], 00225 prev_end[i] + 1 + nudge, 00226 buffer[indices[i]] + nudge ); 00227 } 00228 if (ranged_file_ids && (buffer[table_columns-1] & mhdf_SET_RANGE_BIT)) 00229 hints.back() = ranged_file_ids->insert( hints.back(), *fiter ); 00230 ++fiter; 00231 offset = 1; 00232 prev_end.clear(); 00233 } 00234 00235 while (offset < count) { 00236 assert(fiter != file_ids.end()); 00237 // whenever we get to a gap between blocks we need to 00238 // advance one step because we read an extra end id 00239 // preceding teah block 00240 if (fiter == fiter.start_of_block()) { 00241 if (offset == count-1) 00242 break; 00243 ++offset; 00244 } 00245 00246 for (unsigned i = 0; i < num_columns; ++i) { 00247 size_t s = buffer[(offset-1)*table_columns+indices[i]] + 1; 00248 size_t e = buffer[ offset *table_columns+indices[i]]; 00249 counts_out.push_back( e - s + 1 ); 00250 hints[i] = offsets_out.insert( hints[i], s, e ); 00251 } 00252 if (ranged_file_ids && (buffer[offset*table_columns+table_columns-1] & mhdf_SET_RANGE_BIT)) 00253 hints.back() = ranged_file_ids->insert( hints.back(), *fiter ); 00254 00255 ++fiter; 00256 ++offset; 00257 } 00258 00259 // If we did not end on the boundary between two blocks, 00260 // then we need to save the end indices for the final entry 00261 // for use in the next iteration. Similarly, if we ended 00262 // with extra values that were read with the express intention 00263 // of getting the previous end values for a block, we need to 00264 // save them. This case only arises if we hit the break in 00265 // the above loop. 00266 if (fiter != fiter.start_of_block() || offset < count) { 00267 assert(prev_end.empty()); 00268 if (offset == count) { 00269 --offset; 00270 assert(fiter != fiter.start_of_block()); 00271 } 00272 else { 00273 assert(offset+1 == count); 00274 assert(fiter == fiter.start_of_block()); 00275 } 00276 for (unsigned i = 0; i < num_columns; ++i) 00277 prev_end.push_back(buffer[offset*table_columns+indices[i]]); 00278 } 00279 } 00280 assert(prev_end.empty()); 00281 assert(fiter == file_ids.end()); 00282 00283 return MB_SUCCESS; 00284 } 00285 */ 00286 ErrorCode ReadHDF5VarLen::read_offsets( ReadHDF5Dataset& data_set, const Range& file_ids, EntityHandle start_file_id, 00287 EntityHandle nudge, Range& offsets_out, std::vector< unsigned >& counts_out ) 00288 { 00289 00290 // Use hints to make sure insertion into ranges is O(1) 00291 offsets_out.clear(); 00292 counts_out.clear(); 00293 counts_out.reserve( file_ids.size() ); 00294 Range::iterator hint; 00295 00296 // Calculate which rows we need to read from the offsets table 00297 Range rows; 00298 hint = rows.begin(); 00299 Range::const_pair_iterator pair = file_ids.const_pair_begin(); 00300 // special case if reading first entity in dataset, because 00301 // there is no previous end value. 00302 if( pair != file_ids.const_pair_end() && pair->first == start_file_id ) 00303 { 00304 hint = rows.insert( nudge, pair->second - start_file_id + nudge ); 00305 ++pair; 00306 } 00307 while( pair != file_ids.const_pair_end() ) 00308 { 00309 hint = rows.insert( hint, pair->first - start_file_id + nudge - 1, pair->second - start_file_id + nudge ); 00310 ++pair; 00311 } 00312 00313 // set up read of offsets dataset 00314 hsize_t buffer_size = bufferSize / sizeof( hssize_t ); 00315 hssize_t* buffer = reinterpret_cast< hssize_t* >( dataBuffer ); 00316 data_set.set_file_ids( rows, nudge, buffer_size, H5T_NATIVE_HSSIZE ); 00317 hssize_t prev_end; 00318 bool have_prev_end = false; 00319 // If we're reading the first row of the table, then the 00320 // previous end is implicitly -1. 00321 if( !file_ids.empty() && file_ids.front() == start_file_id ) 00322 { 00323 prev_end = -1; 00324 have_prev_end = true; 00325 } 00326 00327 dbgOut.printf( 3, "Reading %s in %lu chunks\n", data_set.get_debug_desc(), data_set.get_read_count() ); 00328 00329 // read offset table 00330 size_t count, offset; 00331 Range::const_iterator fiter = file_ids.begin(); 00332 hint = offsets_out.begin(); 00333 int nn = 0; 00334 while( !data_set.done() ) 00335 { 00336 dbgOut.printf( 3, "Reading chunk %d of %s\n", ++nn, data_set.get_debug_desc() ); 00337 try 00338 { 00339 data_set.read( buffer, count ); 00340 } 00341 catch( ReadHDF5Dataset::Exception& ) 00342 { 00343 return MB_FAILURE; 00344 } 00345 if( !count ) // might have been NULL read for collective IO 00346 continue; 00347 00348 // If the previous end values were read in the previous iteration, 00349 // then they're stored in prev_end. 00350 offset = 0; 00351 if( have_prev_end ) 00352 { 00353 counts_out.push_back( buffer[0] - prev_end ); 00354 hint = offsets_out.insert( hint, prev_end + 1 + nudge, buffer[0] + nudge ); 00355 ++fiter; 00356 offset = 1; 00357 have_prev_end = false; 00358 } 00359 00360 while( offset < count ) 00361 { 00362 assert( fiter != file_ids.end() ); 00363 // whenever we get to a gap between blocks we need to 00364 // advance one step because we read an extra end id 00365 // preceding teah block 00366 if( fiter == fiter.start_of_block() ) 00367 { 00368 if( offset == count - 1 ) break; 00369 ++offset; 00370 } 00371 00372 size_t s = buffer[offset - 1] + 1; 00373 size_t e = buffer[offset]; 00374 counts_out.push_back( e - s + 1 ); 00375 hint = offsets_out.insert( hint, s + nudge, e + nudge ); 00376 00377 ++fiter; 00378 ++offset; 00379 } 00380 00381 // If we did not end on the boundary between two blocks, 00382 // then we need to save the end indices for the final entry 00383 // for use in the next iteration. Similarly, if we ended 00384 // with extra values that were read with the express intention 00385 // of getting the previous end values for a block, we need to 00386 // save them. This case only arises if we hit the break in 00387 // the above loop. 00388 if( fiter != fiter.start_of_block() || offset < count ) 00389 { 00390 assert( !have_prev_end ); 00391 if( offset == count ) 00392 { 00393 --offset; 00394 assert( fiter != fiter.start_of_block() ); 00395 } 00396 else 00397 { 00398 assert( offset + 1 == count ); 00399 assert( fiter == fiter.start_of_block() ); 00400 } 00401 have_prev_end = true; 00402 prev_end = buffer[offset]; 00403 } 00404 } 00405 assert( !have_prev_end ); 00406 assert( fiter == file_ids.end() ); 00407 00408 return MB_SUCCESS; 00409 } 00410 00411 } // namespace moab