fathom/moab-docs/mesquite_2Misc_2FileTokenizer_8cpp_source.html

00001 /* *****************************************************************
00002     MESQUITE -- The Mesh Quality Improvement Toolkit
00003
00004     Copyright 2004 Lawrence Livermore National Laboratory.  Under
00005     the terms of Contract B545069 with the University of Wisconsin --
00006     Madison, Lawrence Livermore National Laboratory retains certain
00007     rights in this software.
00008
00009     This library is free software; you can redistribute it and/or
00010     modify it under the terms of the GNU Lesser General Public
00011     License as published by the Free Software Foundation; either
00012     version 2.1 of the License, or (at your option) any later version.
00013
00014     This library is distributed in the hope that it will be useful,
00015     but WITHOUT ANY WARRANTY; without even the implied warranty of
00016     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017     Lesser General Public License for more details.
00018
00019     You should have received a copy of the GNU Lesser General Public License
00020     (lgpl.txt) along with this library; if not, write to the Free Software
00021     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022
00023     kraftche@cae.wisc.edu
00024
00025   ***************************************************************** */
00026
00027 #include "FileTokenizer.hpp"
00028 #include "MsqError.hpp"
00029 #include <cstring>
00030 #include <cctype>
00031 #include <cstdlib>
00032
00033 namespace MBMesquite
00034 {
00035
00036 FileTokenizer::FileTokenizer( std::FILE* file_ptr )
00037     : filePtr( file_ptr ), nextToken( buffer ), bufferEnd( buffer ), lineNumber( 1 ), lastChar( '\0' )
00038 {
00039 }
00040
00041 FileTokenizer::~FileTokenizer()
00042 {
00043     fclose( filePtr );
00044 }
00045
00046 bool FileTokenizer::eof() const
00047 {
00048     return nextToken == bufferEnd && feof( filePtr );
00049 }
00050
00051 const char* FileTokenizer::get_string( MsqError& err )
00052 {
00053     // If the whitepsace character marking the end of the
00054     // last token was a newline, increment the line count.
00055     if( lastChar == '\n' ) ++lineNumber;
00056
00057     // Loop until either found the start of a token to return or have
00058     // reached the end of the file.
00059     for( ;; )
00060     {
00061         // If the buffer is empty, read more.
00062         if( nextToken == bufferEnd )
00063         {
00064             size_t count = fread( buffer, 1, sizeof( buffer ) - 1, filePtr );
00065             if( !count )
00066             {
00067                 if( feof( filePtr ) )
00068                     MSQ_SETERR( err )( "File truncated.\n", MsqError::PARSE_ERROR );
00069                 else
00070                     MSQ_SETERR( err )( MsqError::IO_ERROR );
00071                 return NULL;
00072             }
00073
00074             nextToken = buffer;
00075             bufferEnd = buffer + count;
00076         }
00077
00078         // If the current character is not a space, we've found a token.
00079         if( !isspace( *nextToken ) ) break;
00080
00081         // If the current space character is a newline,
00082         // increment the line number count.
00083         if( *nextToken == '\n' ) ++lineNumber;
00084         ++nextToken;
00085     }
00086
00087     // Store the start of the token in "result" and
00088     // advance "nextToken" to one past the end of the
00089     // token.
00090     char* result = nextToken;
00091     while( nextToken != bufferEnd && !isspace( *nextToken ) )
00092         ++nextToken;
00093
00094     // If we have reached the end of the buffer without finding
00095     // a whitespace character terminating the token, we need to
00096     // read more from the file.  Only try once.  If the token is
00097     // too large to fit in the buffer, give up.
00098     if( nextToken == bufferEnd )
00099     {
00100         // Shift the (possibly) partial token to the start of the buffer.
00101         size_t remaining = bufferEnd - result;
00102         memmove( buffer, result, remaining );
00103         result    = buffer;
00104         nextToken = result + remaining;
00105
00106         // Fill the remainder of the buffer after the token.
00107         size_t count = fread( nextToken, 1, sizeof( buffer ) - remaining - 1, filePtr );
00108         if( !count && !feof( filePtr ) )
00109         {
00110             MSQ_SETERR( err )( "I/O error.\n", MsqError::IO_ERROR );
00111             return NULL;
00112         }
00113         bufferEnd = nextToken + count;
00114
00115         // Continue to advance nextToken until we find the space
00116         // terminating the token.
00117         while( nextToken != bufferEnd && !isspace( *nextToken ) )
00118             ++nextToken;
00119
00120         if( nextToken == bufferEnd )  // EOF
00121         {
00122             *bufferEnd = '\0';
00123             ++bufferEnd;
00124         }
00125     }
00126
00127     // Save terminating whitespace character (or NULL char if EOF).
00128     lastChar = *nextToken;
00129     // Put null in buffer to mark end of current token.
00130     *nextToken = '\0';
00131     // Advance nextToken to the next character to search next time.
00132     ++nextToken;
00133     return result;
00134 }
00135
00136 bool FileTokenizer::get_double_internal( double& result, MsqError& err )
00137 {
00138     // Get a token
00139     const char *token_end, *token = get_string( err );
00140     if( MSQ_CHKERR( err ) ) return false;
00141
00142     // Check for hex value -- on some platforms (e.g. Linux), strtod
00143     // will accept hex values, on others (e.g. Sun) it wil not.  Force
00144     // failure on hex numbers for consistancy.
00145     if( token[0] && token[1] && token[0] == '0' && toupper( token[1] ) == 'X' )
00146     {
00147         MSQ_SETERR( err )
00148         ( MsqError::PARSE_ERROR, "Syntax error at line %d: expected number, got \"%s\"", line_number(), token );
00149         return false;
00150     }
00151
00152     // Parse token as double
00153     result = strtod( token, (char**)&token_end );
00154
00155     // If the one past the last char read by strtod is
00156     // not the NULL character terminating the string,
00157     // then parse failed.
00158     if( *token_end )
00159     {
00160         MSQ_SETERR( err )
00161         ( MsqError::PARSE_ERROR, "Syntax error at line %d: expected number, got \"%s\"", line_number(), token );
00162         return false;
00163     }
00164
00165     return true;
00166 }
00167
00168 bool FileTokenizer::get_float_internal( float& result, MsqError& err )
00169 {
00170     double d = 0.0;
00171     get_double_internal( d, err );
00172     if( MSQ_CHKERR( err ) ) return false;
00173
00174     result = (float)d;
00175     if( d != (double)result )
00176     {
00177         MSQ_SETERR( err )( MsqError::PARSE_ERROR, "Numberic overflow at line %d.", line_number() );
00178         return false;
00179     }
00180
00181     return true;
00182 }
00183
00184 bool FileTokenizer::get_long_int_internal( long& result, MsqError& err )
00185 {
00186     // Get a token
00187     const char *token_end, *token = get_string( err );
00188     if( MSQ_CHKERR( err ) ) return false;
00189
00190     // Parse token as long
00191     result = strtol( token, (char**)&token_end, 0 );
00192
00193     // If the one past the last char read by strtol is
00194     // not the NULL character terminating the string,
00195     // then parse failed.
00196     if( *token_end )
00197     {
00198         MSQ_SETERR( err )
00199         ( MsqError::PARSE_ERROR, "Syntax error at line %d: expected integer, got \"%s\"", line_number(), token );
00200         return false;
00201     }
00202
00203     return true;
00204 }
00205
00206 bool FileTokenizer::get_byte_internal( unsigned char& result, MsqError& err )
00207 {
00208     long i = 0;
00209     get_long_int_internal( i, err );
00210     if( MSQ_CHKERR( err ) ) return false;
00211
00212     result = (unsigned char)i;
00213     if( i != (long)result )
00214     {
00215         MSQ_SETERR( err )( MsqError::PARSE_ERROR, "Numberic overflow at line %d.", line_number() );
00216         return false;
00217     }
00218
00219     return true;
00220 }
00221
00222 bool FileTokenizer::get_short_int_internal( short& result, MsqError& err )
00223 {
00224     long i = 0;
00225     get_long_int_internal( i, err );
00226     if( MSQ_CHKERR( err ) ) return false;
00227
00228     result = (short)i;
00229     if( i != (long)result )
00230     {
00231         MSQ_SETERR( err )( MsqError::PARSE_ERROR, "Numberic overflow at line %d.", line_number() );
00232         return false;
00233     }
00234
00235     return true;
00236 }
00237
00238 bool FileTokenizer::get_integer_internal( int& result, MsqError& err )
00239 {
00240     long i = 0;
00241     get_long_int_internal( i, err );
00242     if( MSQ_CHKERR( err ) ) return false;
00243
00244     result = (int)i;
00245     if( i != (long)result )
00246     {
00247         MSQ_SETERR( err )( MsqError::PARSE_ERROR, "Numberic overflow at line %d.", line_number() );
00248         return false;
00249     }
00250
00251     return true;
00252 }
00253
00254 bool FileTokenizer::get_boolean_internal( bool& result, MsqError& err )
00255 {
00256     // Get a token
00257     const char* token = get_string( err );
00258     if( MSQ_CHKERR( err ) ) return false;
00259
00260     if( token[1] || ( token[0] != '0' && token[0] != '1' ) )
00261     {
00262         MSQ_SETERR( err )
00263         ( MsqError::PARSE_ERROR, "Syntax error at line %d: expected 0 or 1, got \"%s\"", line_number(), token );
00264         return false;
00265     }
00266
00267     result = token[0] == '1';
00268     return true;
00269 }
00270
00271 bool FileTokenizer::get_floats( size_t count, float* array, MsqError& err )
00272 {
00273     for( size_t i = 0; i < count; ++i )
00274     {
00275         if( !get_float_internal( *array, err ) ) return false;
00276         ++array;
00277     }
00278     return true;
00279 }
00280
00281 bool FileTokenizer::get_doubles( size_t count, double* array, MsqError& err )
00282 {
00283     for( size_t i = 0; i < count; ++i )
00284     {
00285         get_double_internal( *array, err );
00286         if( MSQ_CHKERR( err ) ) return false;
00287         ++array;
00288     }
00289     return true;
00290 }
00291
00292 bool FileTokenizer::get_bytes( size_t count, unsigned char* array, MsqError& err )
00293 {
00294     for( size_t i = 0; i < count; ++i )
00295     {
00296         get_byte_internal( *array, err );
00297         if( MSQ_CHKERR( err ) ) return false;
00298         ++array;
00299     }
00300     return true;
00301 }
00302
00303 bool FileTokenizer::get_short_ints( size_t count, short* array, MsqError& err )
00304 {
00305     for( size_t i = 0; i < count; ++i )
00306     {
00307         get_short_int_internal( *array, err );
00308         if( MSQ_CHKERR( err ) ) return false;
00309         ++array;
00310     }
00311     return true;
00312 }
00313
00314 bool FileTokenizer::get_integers( size_t count, int* array, MsqError& err )
00315 {
00316     for( size_t i = 0; i < count; ++i )
00317     {
00318         get_integer_internal( *array, err );
00319         if( MSQ_CHKERR( err ) ) return false;
00320         ++array;
00321     }
00322     return true;
00323 }
00324
00325 bool FileTokenizer::get_long_ints( size_t count, long* array, MsqError& err )
00326 {
00327     for( size_t i = 0; i < count; ++i )
00328     {
00329         get_long_int_internal( *array, err );
00330         if( MSQ_CHKERR( err ) ) return false;
00331         ++array;
00332     }
00333     return true;
00334 }
00335
00336 bool FileTokenizer::get_booleans( size_t count, bool* array, MsqError& err )
00337 {
00338     for( size_t i = 0; i < count; ++i )
00339     {
00340         get_boolean_internal( *array, err );
00341         if( MSQ_CHKERR( err ) ) return false;
00342         ++array;
00343     }
00344     return true;
00345 }
00346
00347 void FileTokenizer::unget_token()
00348 {
00349     if( nextToken - buffer < 2 ) return;
00350
00351     --nextToken;
00352     *nextToken = lastChar;
00353     --nextToken;
00354     while( nextToken > buffer && *nextToken )
00355         --nextToken;
00356
00357     if( !*nextToken ) ++nextToken;
00358
00359     lastChar = '\0';
00360 }
00361
00362 bool FileTokenizer::match_token( const char* str, MsqError& err )
00363 {
00364     // Get a token
00365     const char* token = get_string( err );
00366     if( MSQ_CHKERR( err ) ) return false;
00367
00368     // Check if it matches
00369     if( 0 == strcmp( token, str ) ) return true;
00370
00371     // Construct error message
00372     MSQ_SETERR( err )
00373     ( MsqError::PARSE_ERROR, "Syntax error at line %d: expected \"%s\", got \"%s\"", line_number(), str, token );
00374     return false;
00375 }  // namespace MBMesquite
00376
00377 int FileTokenizer::match_token( const char* const* list, MsqError& err )
00378 {
00379     // Get a token
00380     const char* token = get_string( err );
00381     if( MSQ_CHKERR( err ) ) return false;
00382
00383     // Check if it matches any input string
00384     const char* const* ptr;
00385     for( ptr = list; *ptr; ++ptr )
00386         if( 0 == strcmp( token, *ptr ) ) return ptr - list + 1;
00387
00388     // No match, constuct error message
00389     std::string message( "Parsing error at line " );
00390     char lineno[16];
00391     sprintf( lineno, "%d", line_number() );
00392     message += lineno;
00393     message += ": expected one of {";
00394     for( ptr = list; *ptr; ++ptr )
00395     {
00396         message += " ";
00397         message += *ptr;
00398     }
00399     message += " } got \"";
00400     message += token;
00401     message += "\"";
00402     MSQ_SETERR( err )( message, MsqError::PARSE_ERROR );
00403     return false;
00404 }
00405
00406 bool FileTokenizer::get_newline( MsqError& err )
00407 {
00408     if( lastChar == '\n' )
00409     {
00410         lastChar = ' ';
00411         ++lineNumber;
00412         return true;
00413     }
00414
00415     // Loop until either we a) find a newline, b) find a non-whitespace
00416     // character or c) reach the end of the file.
00417     for( ;; )
00418     {
00419         // If the buffer is empty, read more.
00420         if( nextToken == bufferEnd )
00421         {
00422             size_t count = fread( buffer, 1, sizeof( buffer ), filePtr );
00423             if( !count )
00424             {
00425                 if( eof() )
00426                     MSQ_SETERR( err )( "File truncated.", MsqError::PARSE_ERROR );
00427                 else
00428                     MSQ_SETERR( err )( MsqError::IO_ERROR );
00429                 return false;
00430             }
00431
00432             nextToken = buffer;
00433             bufferEnd = buffer + count;
00434         }
00435
00436         // If the current character is not a space, the we've failed.
00437         if( !isspace( *nextToken ) )
00438         {
00439             MSQ_SETERR( err )
00440             ( MsqError::PARSE_ERROR, "Expected newline at line %d.", line_number() );
00441             return false;
00442         }
00443
00444         // If the current space character is a newline,
00445         // increment the line number count.
00446         if( *nextToken == '\n' )
00447         {
00448             ++lineNumber;
00449             ++nextToken;
00450             lastChar = ' ';
00451             return true;
00452         }
00453         ++nextToken;
00454     }
00455
00456     // should never reach this
00457     return false;
00458 }
00459
00460 }  // namespace MBMesquite