%{ /* * Copyright 2008, 2009 University Corporation for Atmospheric Research * * This file is part of the UDUNITS-2 package. See the file LICENSE * in the top-level source-directory of the package for copying and * redistribution conditions. */ /* * yacc(1)-based parser for decoding formatted unit specifications. * * This module is thread-compatible but not thread-safe. Multi-threaded * access must be externally synchronized. */ /*LINTLIBRARY*/ #ifndef _XOPEN_SOURCE # define _XOPEN_SOURCE 500 #endif #include #include #include #include #include #include #include #include "udunits2.h" static ut_unit* _finalUnit; /* fully-parsed specification */ static ut_system* _unitSystem; /* The unit-system to use */ static char* _errorMessage; /* last error-message */ static ut_encoding _encoding; /* encoding of string to be parsed */ static int _restartScanner;/* restart scanner? */ /* * Removes leading and trailing whitespace from a string. * * Arguments: * string NUL-terminated string. Will be modified if it * contains whitespace. * encoding The character-encoding of "string". * Returns: * "string" */ char* ut_trim( char* const string, const ut_encoding encoding) { static const char* asciiSpace = " \t\n\r\f\v"; static const char* latin1Space = " \t\n\r\f\v\xa0"; /* add NBSP */ const char* whiteSpace; char* start; char* stop; size_t len; whiteSpace = encoding == UT_LATIN1 ? latin1Space : asciiSpace; start = string + strspn(string, whiteSpace); for (stop = start + strlen(start); stop > start; --stop) if (strchr(whiteSpace, stop[-1]) == NULL) break; len = stop - start; (void)memmove(string, start, len); string[len] = 0; ut_set_status(UT_SUCCESS); return start; } /* * YACC error routine: */ void uterror( char *s) { static char* nomem = "uterror(): out of memory"; if (_errorMessage != NULL && _errorMessage != nomem) free(_errorMessage); _errorMessage = strdup(s); if (_errorMessage == NULL) _errorMessage = nomem; } %} %union { char* id; /* identifier */ ut_unit* unit; /* "unit" structure */ double rval; /* floating-point numerical value */ long ival; /* integer numerical value */ } %token ERR %token SHIFT %token MULTIPLY %token DIVIDE %token INT %token EXPONENT %token REAL %token ID %token DATE %token CLOCK %token TIMESTAMP %token LOGREF %type unit_spec %type shift_exp %type product_exp %type power_exp %type basic_exp %type timestamp %type number %% unit_spec: /* nothing */ { _finalUnit = ut_get_dimensionless_unit_one(_unitSystem); YYACCEPT; } | shift_exp { _finalUnit = $1; YYACCEPT; } | error { ut_set_status(UT_SYNTAX); YYABORT; } ; shift_exp: product_exp { $$ = $1; } | product_exp SHIFT REAL { $$ = ut_offset($1, $3); ut_free($1); if ($$ == NULL) YYABORT; } | product_exp SHIFT INT { $$ = ut_offset($1, $3); ut_free($1); if ($$ == NULL) YYABORT; } | product_exp SHIFT timestamp { $$ = ut_offset_by_time($1, $3); ut_free($1); if ($$ == NULL) YYABORT; } ; product_exp: power_exp { $$ = $1; } | product_exp power_exp { $$ = ut_multiply($1, $2); ut_free($1); ut_free($2); if ($$ == NULL) YYABORT; } | product_exp MULTIPLY power_exp { $$ = ut_multiply($1, $3); ut_free($1); ut_free($3); if ($$ == NULL) YYABORT; } | product_exp DIVIDE power_exp { $$ = ut_divide($1, $3); ut_free($1); ut_free($3); if ($$ == NULL) YYABORT; } ; power_exp: basic_exp { $$ = $1; } | basic_exp INT { $$ = ut_raise($1, $2); ut_free($1); if ($$ == NULL) YYABORT; } | basic_exp EXPONENT { $$ = ut_raise($1, $2); ut_free($1); if ($$ == NULL) YYABORT; } ; basic_exp: ID { double prefix = 1; ut_unit* unit = NULL; char* cp = $1; int symbolPrefixSeen = 0; while (*cp) { size_t nchar; double value; unit = ut_get_unit_by_name(_unitSystem, cp); if (unit != NULL) break; unit = ut_get_unit_by_symbol(_unitSystem, cp); if (unit != NULL) break; if (utGetPrefixByName(_unitSystem, cp, &value, &nchar) != UT_SUCCESS) { if (symbolPrefixSeen || utGetPrefixBySymbol(_unitSystem, cp, &value, &nchar) != UT_SUCCESS) break; symbolPrefixSeen = 1; } prefix *= value; cp += nchar; } free($1); if (unit == NULL) { ut_set_status(UT_UNKNOWN); YYABORT; } $$ = ut_scale(prefix, unit); ut_free(unit); if ($$ == NULL) YYABORT; } | '(' shift_exp ')' { $$ = $2; } | LOGREF product_exp ')' { $$ = ut_log($1, $2); ut_free($2); if ($$ == NULL) YYABORT; } | number { $$ = ut_scale($1, ut_get_dimensionless_unit_one(_unitSystem)); } ; number: INT { $$ = $1; } | REAL { $$ = $1; } ; timestamp: DATE { $$ = $1; } | DATE CLOCK { $$ = $1 + $2; } | DATE CLOCK CLOCK { $$ = $1 + ($2 - $3); } | DATE CLOCK INT { int mag = $3 >= 0 ? $3 : -$3; if (mag <= 24) { $$ = $1 + ($2 - ut_encode_clock($3, 0, 0)); } else if (mag >= 100 && mag <= 2400) { $$ = $1 + ($2 - ut_encode_clock($3/100, $3%100, 0)); } else { ut_set_status(UT_SYNTAX); YYABORT; } } | DATE CLOCK ID { int error = 0; if (strcasecmp($3, "UTC") != 0 && strcasecmp($3, "GMT") != 0 && strcasecmp($3, "Z") != 0) { ut_set_status(UT_UNKNOWN); error = 1; } free($3); if (!error) { $$ = $1 + $2; } else { YYABORT; } } | TIMESTAMP { $$ = $1; } | TIMESTAMP CLOCK { $$ = $1 - $2; } | TIMESTAMP INT { int mag = $2 >= 0 ? $2 : -$2; if (mag <= 24) { $$ = $1 - ut_encode_clock($2, 0, 0); } else if (mag >= 100 && mag <= 2400) { $$ = $1 - ut_encode_clock($2/100, $2%100, 0); } else { ut_set_status(UT_SYNTAX); YYABORT; } } | TIMESTAMP ID { int error = 0; if (strcasecmp($2, "UTC") != 0 && strcasecmp($2, "GMT") != 0 && strcasecmp($2, "Z") != 0) { ut_set_status(UT_UNKNOWN); error = 1; } free($2); if (!error) { $$ = $1; } else { YYABORT; } } ; %% #define yymaxdepth utmaxdepth #define yylval utlval #define yychar utchar #define yypact utpact #define yyr1 utr1 #define yyr2 utr2 #define yydef utdef #define yychk utchk #define yypgo utpgo #define yyact utact #define yyexca utexca #define yyerrflag uterrflag #define yynerrs utnerrs #define yyps utps #define yypv utpv #define yys uts #define yy_yys utyys #define yystate utstate #define yytmp uttmp #define yyv utv #define yy_yyv utyyv #define yyval utval #define yylloc utlloc #define yyreds utreds #define yytoks uttoks #define yylhs utyylhs #define yylen utyylen #define yydefred utyydefred #define yydgoto utyydgoto #define yysindex utyysindex #define yyrindex utyyrindex #define yygindex utyygindex #define yytable utyytable #define yycheck utyycheck #define yyname utyyname #define yyrule utyyrule #include "scanner.c" /* * Converts a string in the Latin-1 character set (ISO 8859-1) to the UTF-8 * character set. * * Arguments: * latin1String Pointer to the string to be converted. May be freed * upon return. * Returns: * NULL Failure. ut_handle_error_message() was called. * else Pointer to UTF-8 representation of "string". Must not * be freed. Subsequent calls may overwrite. */ static const char* latin1ToUtf8( const char* const latin1String) { static char* utf8String = NULL; static size_t bufSize = 0; size_t size; const unsigned char* in; unsigned char* out; assert(latin1String != NULL); size = 2 * strlen(latin1String) + 1; if (size > bufSize) { char* buf = realloc(utf8String, size); if (buf != NULL) { utf8String = buf; bufSize = size; } else { ut_handle_error_message("Couldn't allocate %ld-byte buffer: %s", (unsigned long)size, strerror(errno)); return NULL; } } for (in = (const unsigned char*)latin1String, out = (unsigned char*)utf8String; *in; ++in) { # define IS_ASCII(c) (((c) & 0x80) == 0) if (IS_ASCII(*in)) { *out++ = *in; } else { *out++ = 0xC0 | ((0xC0 & *in) >> 6); *out++ = 0x80 | (0x3F & *in); } } *out = 0; return utf8String; } /* * Returns the binary representation of a unit corresponding to a string * representation. * * Arguments: * system Pointer to the unit-system in which the parsing will * occur. * string The string to be parsed (e.g., "millimeters"). There * should be no leading or trailing whitespace in the * string. See ut_trim(). * encoding The encoding of "string". * Returns: * NULL Failure. "ut_get_status()" will be one of * UT_BAD_ARG "system" or "string" is NULL. * UT_SYNTAX "string" contained a syntax * error. * UT_UNKNOWN "string" contained an unknown * identifier. * UT_OS Operating-system failure. See * "errno". * else Pointer to the unit corresponding to "string". */ ut_unit* ut_parse( const ut_system* const system, const char* const string, ut_encoding encoding) { ut_unit* unit = NULL; /* failure */ if (system == NULL || string == NULL) { ut_set_status(UT_BAD_ARG); } else { const char* utf8String; utrestart((FILE*)NULL); if (encoding != UT_LATIN1) { utf8String = string; } else { utf8String = latin1ToUtf8(string); encoding = UT_UTF8; if (utf8String == NULL) ut_set_status(UT_OS); } if (utf8String != NULL) { YY_BUFFER_STATE buf = ut_scan_string(utf8String); _unitSystem = (ut_system*)system; _encoding = encoding; _restartScanner = 1; #if YYDEBUG utdebug = 0; ut_flex_debug = 0; #endif _finalUnit = NULL; if (utparse() == 0) { int n = yy_c_buf_p - buf->yy_ch_buf; if (n >= strlen(utf8String)) { unit = _finalUnit; /* success */ } else { /* * Parsing terminated before the end of the string. */ ut_free(_finalUnit); } } ut_delete_buffer(buf); } /* utf8String != NULL */ } /* valid arguments */ return unit; }