fathom/moab-docs/FileTokenizer_8cpp_source.html

00001 /**
00002  * MOAB, a Mesh-Oriented datABase, is a software component for creating,
00003  * storing and accessing finite element mesh data.
00004  *
00005  * Copyright 2004 Sandia Corporation.  Under the terms of Contract
00006  * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government
00007  * retains certain rights in this software.
00008  *
00009  * This library is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU Lesser General Public
00011  * License as published by the Free Software Foundation; either
00012  * version 2.1 of the License, or (at your option) any later version.
00013  *
00014  */
00015
00016 #include "FileTokenizer.hpp"
00017 #include "moab/ReadUtilIface.hpp"
00018 #include "moab/ErrorHandler.hpp"
00019
00020 #include <cstring>
00021 #include <cctype>
00022 #include <string>
00023 #include <cstdlib>
00024
00025 namespace moab
00026 {
00027
00028 using namespace std;
00029
00030 FileTokenizer::FileTokenizer( FILE* file_ptr, ReadUtilIface* )
00031     : filePtr( file_ptr ), nextToken( buffer ), bufferEnd( buffer ), lineNumber( 1 ), lastChar( '\0' )
00032 {
00033 }
00034
00035 FileTokenizer::~FileTokenizer()
00036 {
00037     fclose( filePtr );
00038 }
00039
00040 bool FileTokenizer::eof() const
00041 {
00042     return nextToken == bufferEnd && feof( filePtr );
00043 }
00044
00045 const char* FileTokenizer::get_string()
00046 {
00047     // If the whitespace character marking the end of the
00048     // last token was a newline, increment the line count.
00049     if( lastChar == '\n' ) ++lineNumber;
00050
00051     // Loop until either found the start of a token to return or have
00052     // reached the end of the file.
00053     for( ;; )
00054     {
00055         // If the buffer is empty, read more.
00056         if( nextToken == bufferEnd )
00057         {
00058             size_t count = fread( buffer, 1, sizeof( buffer ) - 1, filePtr );
00059             if( 0 == count )
00060             {
00061                 if( feof( filePtr ) )
00062                     return NULL;
00063                 else
00064                     MB_SET_ERR_RET_VAL( "I/O Error", NULL );
00065             }
00066
00067             nextToken = buffer;
00068             bufferEnd = buffer + count;
00069         }
00070
00071         // If the current character is not a space, we've found a token.
00072         if( !isspace( *nextToken ) ) break;
00073
00074         // If the current space character is a newline,
00075         // increment the line number count.
00076         if( *nextToken == '\n' ) ++lineNumber;
00077         ++nextToken;
00078     }
00079
00080     // Store the start of the token in "result" and
00081     // advance "nextToken" to one past the end of the
00082     // token.
00083     char* result = nextToken;
00084     while( nextToken != bufferEnd && !isspace( static_cast< unsigned char >( *nextToken ) ) )
00085         ++nextToken;
00086
00087     // If we have reached the end of the buffer without finding
00088     // a whitespace character terminating the token, we need to
00089     // read more from the file.  Only try once.  If the token is
00090     // too large to fit in the buffer, give up.
00091     if( nextToken == bufferEnd )
00092     {
00093         // Shift the (possibly) partial token to the start of the buffer.
00094         size_t remaining = bufferEnd - result;
00095         memmove( buffer, result, remaining );
00096         result    = buffer;
00097         nextToken = result + remaining;
00098
00099         // Fill the remainder of the buffer after the token.
00100         size_t count = fread( nextToken, 1, sizeof( buffer ) - remaining - 1, filePtr );
00101         if( 0 == count && !feof( filePtr ) ) MB_SET_ERR_RET_VAL( "I/O Error", NULL );
00102         bufferEnd = nextToken + count;
00103
00104         // Continue to advance nextToken until we find the space
00105         // terminating the token.
00106         while( nextToken != bufferEnd && !isspace( *nextToken ) )
00107             ++nextToken;
00108
00109         if( nextToken == bufferEnd )
00110         {  // EOF
00111             *bufferEnd = '\0';
00112             ++bufferEnd;
00113         }
00114     }
00115
00116     // Save terminating whitespace character (or NULL char if EOF).
00117     lastChar = *nextToken;
00118     // Put null in buffer to mark end of current token.
00119     *nextToken = '\0';
00120     // Advance nextToken to the next character to search next time.
00121     ++nextToken;
00122
00123     return result;
00124 }
00125
00126 bool FileTokenizer::get_double_internal( double& result )
00127 {
00128     // Get a token
00129     const char *token_end, *token = get_string();
00130     if( !token ) return false;
00131
00132     // Check for hex value -- on some platforms (e.g. Linux), strtod
00133     // will accept hex values, on others (e.g. Sun) it will not.  Force
00134     // failure on hex numbers for consistency.
00135     if( token[0] && token[1] && token[0] == '0' && toupper( token[1] ) == 'X' )
00136         MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected number, got \"" << token << "\"",
00137                             false );
00138
00139     // Parse token as double
00140     result = strtod( token, (char**)&token_end );
00141
00142     // If the one past the last char read by strtod is
00143     // not the NULL character terminating the string,
00144     // then parse failed.
00145     if( *token_end )
00146         MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected number, got \"" << token << "\"",
00147                             false );
00148
00149     return true;
00150 }
00151
00152 bool FileTokenizer::get_float_internal( float& result )
00153 {
00154     double d;
00155     if( !get_double_internal( d ) ) return false;
00156
00157     result = (float)d;
00158
00159     return true;
00160 }
00161
00162 bool FileTokenizer::get_long_int_internal( long& result )
00163 {
00164     // Get a token
00165     const char *token_end, *token = get_string();
00166     if( !token ) return false;
00167
00168     // Parse token as long
00169     result = strtol( token, (char**)&token_end, 0 );
00170
00171     // If the one past the last char read by strtol is
00172     // not the NULL character terminating the string,
00173     // then parse failed.
00174     if( *token_end )
00175         MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected number, got \"" << token << "\"",
00176                             false );
00177
00178     return true;
00179 }
00180
00181 bool FileTokenizer::get_byte_internal( unsigned char& result )
00182 {
00183     long i;
00184     if( !get_long_int_internal( i ) ) return false;
00185
00186     result = (unsigned char)i;
00187     if( i != (long)result ) MB_SET_ERR_RET_VAL( "Numeric overflow at line " << line_number(), false );
00188
00189     return true;
00190 }
00191
00192 bool FileTokenizer::get_short_int_internal( short& result )
00193 {
00194     long i;
00195     if( !get_long_int_internal( i ) ) return false;
00196
00197     result = (short)i;
00198     if( i != (long)result ) MB_SET_ERR_RET_VAL( "Numeric overflow at line " << line_number(), false );
00199
00200     return true;
00201 }
00202
00203 bool FileTokenizer::get_integer_internal( int& result )
00204 {
00205     long i;
00206     if( !get_long_int_internal( i ) ) return false;
00207
00208     result = (int)i;
00209     if( i != (long)result ) MB_SET_ERR_RET_VAL( "Numeric overflow at line " << line_number(), false );
00210
00211     return true;
00212 }
00213
00214 bool FileTokenizer::get_boolean_internal( bool& result )
00215 {
00216     // Get a token
00217     const char* token = get_string();
00218     if( !token ) return false;
00219
00220     if( token[1] || ( token[0] != '0' && token[0] != '1' ) )
00221         MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected 0 or 1, got \"" << token << "\"",
00222                             false );
00223
00224     result = token[0] == '1';
00225
00226     return true;
00227 }
00228
00229 bool FileTokenizer::get_floats( size_t count, float* array )
00230 {
00231     for( size_t i = 0; i < count; ++i )
00232     {
00233         if( !get_float_internal( *array ) ) return false;
00234         ++array;
00235     }
00236
00237     return true;
00238 }
00239
00240 bool FileTokenizer::get_doubles( size_t count, double* array )
00241 {
00242     for( size_t i = 0; i < count; ++i )
00243     {
00244         if( !get_double_internal( *array ) ) return false;
00245         ++array;
00246     }
00247
00248     return true;
00249 }
00250
00251 bool FileTokenizer::get_bytes( size_t count, unsigned char* array )
00252 {
00253     for( size_t i = 0; i < count; ++i )
00254     {
00255         if( !get_byte_internal( *array ) ) return false;
00256         ++array;
00257     }
00258
00259     return true;
00260 }
00261
00262 bool FileTokenizer::get_short_ints( size_t count, short* array )
00263 {
00264     for( size_t i = 0; i < count; ++i )
00265     {
00266         if( !get_short_int_internal( *array ) ) return false;
00267         ++array;
00268     }
00269
00270     return true;
00271 }
00272
00273 bool FileTokenizer::get_integers( size_t count, int* array )
00274 {
00275     for( size_t i = 0; i < count; ++i )
00276     {
00277         if( !get_integer_internal( *array ) ) return false;
00278         ++array;
00279     }
00280
00281     return true;
00282 }
00283
00284 bool FileTokenizer::get_long_ints( size_t count, long* array )
00285 {
00286     for( size_t i = 0; i < count; ++i )
00287     {
00288         if( !get_long_int_internal( *array ) ) return false;
00289         ++array;
00290     }
00291
00292     return true;
00293 }
00294
00295 bool FileTokenizer::get_booleans( size_t count, bool* array )
00296 {
00297     for( size_t i = 0; i < count; ++i )
00298     {
00299         if( !get_boolean_internal( *array ) ) return false;
00300         ++array;
00301     }
00302
00303     return true;
00304 }
00305
00306 void FileTokenizer::unget_token()
00307 {
00308     if( nextToken - buffer < 2 ) return;
00309
00310     --nextToken;
00311     *nextToken = lastChar;
00312     --nextToken;
00313     while( nextToken > buffer && *nextToken )
00314         --nextToken;
00315
00316     if( !*nextToken ) ++nextToken;
00317
00318     lastChar = '\0';
00319 }
00320
00321 bool FileTokenizer::match_token( const char* str, bool print_error )
00322 {
00323     // Get a token
00324     const char* token = get_string();
00325     if( !token ) return false;
00326
00327     // Check if it matches
00328     if( 0 == strcmp( token, str ) ) return true;
00329
00330     // Construct error message
00331     if( print_error )
00332         MB_SET_ERR_CONT( "Syntax error at line " << line_number() << ": expected \"" << str << "\", got \"" << token
00333                                                  << "\"" );
00334
00335     return false;
00336 }
00337
00338 int FileTokenizer::match_token( const char* const* list, bool print_error )
00339 {
00340     // Get a token
00341     const char* token = get_string();
00342     if( !token ) return 0;
00343
00344     // Check if it matches any input string
00345     const char* const* ptr;
00346     for( ptr = list; *ptr; ++ptr )
00347     {
00348         if( 0 == strcmp( token, *ptr ) ) return ptr - list + 1;
00349     }
00350
00351     if( !print_error ) return 0;
00352
00353     // No match, constuct error message
00354     std::string message( "Parsing error at line " );
00355     char lineno[16];
00356     sprintf( lineno, "%d", line_number() );
00357     message += lineno;
00358     message += ": expected one of {";
00359     for( ptr = list; *ptr; ++ptr )
00360     {
00361         message += " ";
00362         message += *ptr;
00363     }
00364     message += " } got \"";
00365     message += token;
00366     message += "\"";
00367     MB_SET_ERR_CONT( message.c_str() );
00368
00369     return 0;
00370 }
00371
00372 bool FileTokenizer::get_newline( bool report_error )
00373 {
00374     if( lastChar == '\n' )
00375     {
00376         lastChar = ' ';
00377         ++lineNumber;
00378         return true;
00379     }
00380
00381     // Loop until either we a) find a newline, b) find a non-whitespace
00382     // character or c) reach the end of the file.
00383     for( ;; )
00384     {
00385         // If the buffer is empty, read more.
00386         if( nextToken == bufferEnd )
00387         {
00388             size_t count = fread( buffer, 1, sizeof( buffer ), filePtr );
00389             if( 0 == count )
00390             {
00391                 if( eof() )
00392                     MB_SET_ERR_RET_VAL( "File truncated at line " << line_number(), false );
00393                 else
00394                     MB_SET_ERR_RET_VAL( "I/O Error", false );
00395             }
00396
00397             nextToken = buffer;
00398             bufferEnd = buffer + count;
00399         }
00400
00401         // If the current character is not a space, the we've failed.
00402         if( !isspace( *nextToken ) )
00403             if( report_error ) MB_SET_ERR_RET_VAL( "Expected newline at line " << line_number(), false );
00404
00405         // If the current space character is a newline,
00406         // increment the line number count.
00407         if( *nextToken == '\n' )
00408         {
00409             ++lineNumber;
00410             ++nextToken;
00411             lastChar = ' ';
00412             return true;
00413         }
00414         ++nextToken;
00415     }
00416
00417     return false;
00418 }
00419
00420 bool FileTokenizer::get_binary( size_t size, void* mem )
00421 {
00422     // If data in buffer
00423     if( nextToken != bufferEnd )
00424     {
00425         // If requested size is less than buffer contents,
00426         // just pass back part of the buffer
00427         if( bufferEnd - nextToken <= (int)size )
00428         {
00429             memcpy( mem, nextToken, size );
00430             nextToken += size;
00431             return true;
00432         }
00433
00434         // Copy buffer contents into memory and clear buffer
00435         memcpy( mem, nextToken, bufferEnd - nextToken );
00436         size -= bufferEnd - nextToken;
00437         mem       = reinterpret_cast< char* >( mem ) + ( bufferEnd - nextToken );
00438         nextToken = bufferEnd;
00439     }
00440
00441     // Read any additional data from file
00442     return size == fread( mem, 1, size, filePtr );
00443 }
00444
00445 }  // namespace moab