libdap  Updated for version 3.17.0
parser-util.cc
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 // (c) COPYRIGHT URI/MIT 1995-1999
00027 // Please read the full copyright statement in the file COPYRIGHT_URI.
00028 //
00029 // Authors:
00030 //      jhrg,jimg       James Gallagher <jgallagher@gso.uri.edu>
00031 
00032 // These functions are utility functions used by the various DAP parsers (the
00033 // DAS, DDS and constraint expression parsers).
00034 // jhrg 9/7/95
00035 
00036 #include "config.h"
00037 
00038 #include <cerrno>
00039 #include <cassert>
00040 #include <cstring>
00041 #include <cmath>
00042 #include <cstdlib>
00043 
00044 #include <iostream>
00045 #include <sstream>
00046 
00047 //  We wrap VC++ 6.x strtod() to account for a short comming
00048 //  in that function in regards to "NaN".
00049 #ifdef WIN32
00050 #include <limits>
00051 double w32strtod(const char *, char **);
00052 #endif
00053 
00054 #include "Error.h"
00055 #include "debug.h"
00056 #include "parser.h"             // defines constants such as ID_MAX
00057 #include "dods-limits.h"
00058 #include "util.h"               // Jose Garcia: for append_long_to_string.
00059 
00060 using std::cerr;
00061 using std::endl;
00062 
00063 #ifdef WIN32
00064 //  VC++ 6.x strtod() doesn't recognize "NaN".  Account for it
00065 //  by wrapping it around a check for the Nan string.  Use of
00066 //  the product is obsolete as of 1/2007, but it is unknown if
00067 //  the issue is still there in later releases of that product.
00068 //  ROM - 01/2007
00069 double w32strtod(const char *val, char **ptr)
00070 {
00071     //  Convert the two char arrays to compare to strings.
00072     string *sval = new string(val);
00073     string *snan = new string("NaN");
00074 
00075     //  If val doesn't contain "NaN|Nan|nan|etc", use strtod as
00076     //  provided.
00077     if (stricmp(sval->c_str(), snan->c_str()) != 0)
00078         return (strtod(val, ptr));
00079 
00080     //  But if it does, return the bit pattern for Nan and point
00081     //  the parsing ptr arg at the trailing '\0'.
00082     *ptr = (char *) val + strlen(val);
00083     return (std::numeric_limits < double >::quiet_NaN());
00084 }
00085 #endif
00086 
00087 namespace libdap {
00088 
00089 // Deprecated, but still used by the HDF4 EOS server code.
00090 void
00091 parse_error(parser_arg * arg, const char *msg, const int line_num,
00092             const char *context)
00093 {
00094     // Jose Garcia
00095     // This assert(s) is (are) only for developing purposes
00096     // For production servers remove it by compiling with NDEBUG
00097     assert(arg);
00098     assert(msg);
00099 
00100     arg->set_status(FALSE);
00101 
00102     string oss = "";
00103 
00104     if (line_num != 0) {
00105         oss += "Error parsing the text on line ";
00106         append_long_to_string(line_num, 10, oss);
00107     }
00108     else {
00109         oss += "Parse error.";
00110     }
00111 
00112     if (context)
00113         oss += (string) " at or near: " + context + (string) "\n" + msg
00114                + (string) "\n";
00115     else
00116         oss += (string) "\n" + msg + (string) "\n";
00117 
00118     arg->set_error(new Error(unknown_error, oss));
00119 }
00120 
00121 void
00122 parse_error(const char *msg, const int line_num, const char *context)
00123 {
00124     // Jose Garcia
00125     // This assert(s) is (are) only for developing purposes
00126     // For production servers remove it by compiling with NDEBUG
00127     assert(msg);
00128 
00129     string oss = "";
00130 
00131     if (line_num != 0) {
00132         oss += "Error parsing the text on line ";
00133         append_long_to_string(line_num, 10, oss);
00134     }
00135     else {
00136         oss += "Parse error.";
00137     }
00138 
00139     if (context)
00140         oss += (string) " at or near: " + context + (string) "\n" + msg
00141                + (string) "\n";
00142     else
00143         oss += (string) "\n" + msg + (string) "\n";
00144 
00145     throw Error(malformed_expr, oss);
00146 }
00147 
00148 // context comes from the parser and will always be a char * unless the
00149 // parsers change dramatically.
00150 void
00151 parse_error(const string & msg, const int line_num, const char *context)
00152 {
00153     parse_error(msg.c_str(), line_num, context);
00154 }
00155 
00156 void save_str(char *dst, const char *src, const int line_num)
00157 {
00158     if (strlen(src) >= ID_MAX)
00159         parse_error(string("The word `") + string(src)
00160                     + string("' is too long (it should be no longer than ")
00161                     + long_to_string(ID_MAX) + string(")."), line_num);
00162 
00163     strncpy(dst, src, ID_MAX);
00164     dst[ID_MAX - 1] = '\0';     /* in case ... */
00165 }
00166 
00167 void save_str(string & dst, const char *src, const int)
00168 {
00169     dst = src;
00170 }
00171 
00172 bool is_keyword(string id, const string & keyword)
00173 {
00174     downcase(id);
00175     id = prune_spaces(id);
00176     DBG(cerr << "is_keyword: " << keyword << " = " << id << endl);
00177     return id == keyword;
00178 }
00179 
00180 int check_byte(const char *val)
00181 {
00182     char *ptr;
00183     long v = strtol(val, &ptr, 0);
00184 
00185     if ((v == 0 && val == ptr) || *ptr != '\0') {
00186         return FALSE;
00187     }
00188 
00189     DBG(cerr << "v: " << v << endl);
00190 
00191     // We're very liberal here with values. Anything that can fit into 8 bits
00192     // is allowed through. Clients will have to deal with the fact that the
00193     // ASCII representation for the value might need to be tweaked. This is
00194     // especially the case for Java clients where Byte datatypes are
00195     // signed. 3/20/2000 jhrg
00196     if ((v < 0 && v < DODS_SCHAR_MIN)
00197         || (v > 0 && static_cast < unsigned long >(v) > DODS_UCHAR_MAX))
00198         return FALSE;
00199 
00200     return TRUE;
00201 }
00202 
00203 // This version of check_int will pass base 8, 10 and 16 numbers when they
00204 // use the ANSI standard for string representation of those number bases.
00205 
00206 int check_int16(const char *val)
00207 {
00208     char *ptr;
00209     long v = strtol(val, &ptr, 0);      // `0' --> use val to determine base
00210 
00211     if ((v == 0 && val == ptr) || *ptr != '\0') {
00212         return FALSE;
00213     }
00214     // Don't use the constant from limits.h, use the ones in dods-limits.h
00215     if (v > DODS_SHRT_MAX || v < DODS_SHRT_MIN) {
00216         return FALSE;
00217     }
00218 
00219     return TRUE;
00220 }
00221 
00222 int check_uint16(const char *val)
00223 {
00224     char *ptr;
00225     unsigned long v = strtol(val, &ptr, 0);
00226 
00227     if ((v == 0 && val == ptr) || *ptr != '\0') {
00228         return FALSE;
00229     }
00230 
00231     if (v > DODS_USHRT_MAX) {
00232         return FALSE;
00233     }
00234 
00235     return TRUE;
00236 }
00237 
00238 int check_int32(const char *val)
00239 {
00240     char *ptr;
00241     errno = 0;
00242     long v = strtol(val, &ptr, 0);      // `0' --> use val to determine base
00243 
00244 
00245     if ((v == 0 && val == ptr) || *ptr != '\0') {
00246         return FALSE;
00247     }
00248 
00249     // We need to check errno since strtol return clamps on overflow so the
00250     // check against the DODS values below will always pass, even for out of
00251     // bounds values in the string. mjohnson 7/20/09
00252     if (errno == ERANGE) {
00253         return FALSE;
00254     }
00255     // This could be combined with the above, or course, but I'm making it
00256     // separate to highlight the test. On 64-bit linux boxes 'long' may be
00257     // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
00258     else if (v > DODS_INT_MAX || v < DODS_INT_MIN) {
00259         return FALSE;
00260     }
00261     else {
00262         return TRUE;
00263     }
00264 }
00265 
00266 int check_uint32(const char *val)
00267 {
00268   // Eat whitespace and check for an initial '-' sign...
00269   // strtoul allows an initial minus. mjohnson
00270     const char* c = val;
00271     while (c && isspace(*c)) {
00272          c++;
00273     }
00274     if (c && (*c == '-')) {
00275          return FALSE;
00276     }
00277 
00278     char *ptr;
00279     errno = 0;
00280     unsigned long v = strtoul(val, &ptr, 0);
00281 
00282     if ((v == 0 && val == ptr) || *ptr != '\0') {
00283         return FALSE;
00284     }
00285 
00286         // check overflow first, or the below check is invalid due to
00287         // clamping to the maximum value by strtoul
00288         // maybe consider using long long for these checks? mjohnson
00289         if (errno == ERANGE) {
00290                 return FALSE;
00291         }
00292         // See above.
00293         else if (v > DODS_UINT_MAX) {
00294                 return FALSE;
00295         }
00296         else {
00297                 return TRUE;
00298         }
00299 }
00300 
00301 unsigned long long get_ull(const char *val)
00302 {
00303   // Eat whitespace and check for an initial '-' sign...
00304   // strtoul allows an initial minus. mjohnson
00305     const char* c = val;
00306     while (c && isspace(*c)) {
00307          c++;
00308     }
00309     if (c && (*c == '-')) {
00310         throw Error("The value '" + string(val) + "' is not a valid array index.");
00311         // return FALSE;
00312     }
00313 
00314     char *ptr;
00315     errno = 0;
00316     unsigned long long v = strtoull(val, &ptr, 0);
00317 
00318     if ((v == 0 && val == ptr) || *ptr != '\0') {
00319         throw Error("The value '" + string(val) + "' contains extra characters.");
00320         //return FALSE;
00321     }
00322 
00323     if (errno == ERANGE) {
00324         throw Error("The value '" + string(val) + "' is out of range.");
00325         // return FALSE;
00326     }
00327     else if (v > DODS_MAX_ARRAY_INDEX) { // 2^61
00328         throw Error("The value '" + string(val) + "' is out of range.");
00329         // return FALSE;
00330     }
00331         else {
00332                 return v;
00333     }
00334 }
00335 
00336 
00337 // Check first for system errors (like numbers so small they convert
00338 // (erroneously) to zero. Then make sure that the value is within
00339 // limits.
00340 
00341 int check_float32(const char *val)
00342 {
00343     char *ptr;
00344     errno = 0;                  // Clear previous value. Fix for the 64bit
00345                                 // IRIX from Rob Morris. 5/21/2001 jhrg
00346 
00347 #ifdef WIN32
00348     double v = w32strtod(val, &ptr);
00349 #else
00350     double v = strtod(val, &ptr);
00351 #endif
00352 
00353     DBG(cerr << "v: " << v << ", ptr: " << ptr
00354         << ", errno: " << errno << ", val==ptr: " << (val == ptr) << endl);
00355 
00356     if (errno == ERANGE || (v == 0.0 && val == ptr) || *ptr != '\0')
00357         return FALSE;
00358 #if 0
00359     if ((v == 0.0 && (val == ptr || errno == HUGE_VAL || errno == ERANGE))
00360         || *ptr != '\0') {
00361         return FALSE;
00362     }
00363 #endif
00364 
00365     DBG(cerr << "fabs(" << val << ") = " << fabs(v) << endl);
00366     double abs_val = fabs(v);
00367     if (abs_val > DODS_FLT_MAX
00368         || (abs_val != 0.0 && abs_val < DODS_FLT_MIN))
00369         return FALSE;
00370 
00371     return TRUE;
00372 }
00373 
00374 int check_float64(const char *val)
00375 {
00376     DBG(cerr << "val: " << val << endl);
00377     char *ptr;
00378     errno = 0;                  // Clear previous value. 5/21/2001 jhrg
00379 
00380 #ifdef WIN32
00381     double v = w32strtod(val, &ptr);
00382 #else
00383     double v = strtod(val, &ptr);
00384 #endif
00385 
00386     DBG(cerr << "v: " << v << ", ptr: " << ptr
00387         << ", errno: " << errno << ", val==ptr: " << (val == ptr) << endl);
00388 
00389 
00390     if (errno == ERANGE || (v == 0.0 && val == ptr) || *ptr != '\0')
00391         return FALSE;
00392 #if 0
00393     if ((v == 0.0 && (val == ptr || errno == HUGE_VAL || errno == ERANGE))
00394         || *ptr != '\0') {
00395         return FALSE;
00396     }
00397 #endif
00398     DBG(cerr << "fabs(" << val << ") = " << fabs(v) << endl);
00399     double abs_val = fabs(v);
00400     if (abs_val > DODS_DBL_MAX
00401         || (abs_val != 0.0 && abs_val < DODS_DBL_MIN))
00402         return FALSE;
00403 
00404     return TRUE;
00405 }
00406 
00407 /*
00408   Maybe someday we will really check the Urls to see if they are valid...
00409 */
00410 
00411 int check_url(const char *)
00412 {
00413     return TRUE;
00414 }
00415 
00416 } // namespace libdap