libdap  Updated for version 3.17.0
GNURegex.cc
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2005 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 // 
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 // 
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 
00027 //#define DODS_DEBUG
00028 
00029 #include <config.h>
00030 
00031 #ifndef WIN32
00032 #include <alloca.h>
00033 #endif
00034 #include <stdlib.h>
00035  
00036 #include <sys/types.h>
00037 #include <regex.h>
00038 
00039 #include <new>
00040 #include <string>
00041 #include <vector>
00042 #include <stdexcept>
00043 
00044 #include "GNURegex.h"
00045 #include "Error.h"
00046 #include "util.h"
00047 #include "debug.h"
00048 
00049 
00050 using namespace std;
00051 
00052 namespace libdap {
00053 
00054 void
00055 Regex::init(const char *t)
00056 {
00057     DBG( cerr << "Regex::init() - BEGIN" << endl);
00058 
00059     DBG( cerr << "Regex::init() - creating new regex..." << endl);
00060     d_preg = static_cast<void*>(new regex_t);
00061 
00062     DBG( cerr << "Regex::init() - Calling regcomp()..." << endl);
00063     int result = regcomp(static_cast<regex_t*>(d_preg), t, REG_EXTENDED);
00064 
00065     if  (result != 0) {
00066         DBG( cerr << "Regex::init() - Call to regcomp FAILED" << endl);
00067         DBG( cerr << "Regex::init() - Calling regerror()..." << endl);
00068         size_t msg_len = regerror(result, static_cast<regex_t*>(d_preg),
00069                                   static_cast<char*>(NULL),
00070                                   static_cast<size_t>(0));
00071 
00072         DBG( cerr << "Regex::init() - Creating message" << endl);
00073         vector<char> msg(msg_len+1);
00074         //char *msg = new char[msg_len+1];
00075         DBG( cerr << "Regex::init() - Calling regerror() again..." << endl);
00076         regerror(result, static_cast<regex_t*>(d_preg), &msg[0], msg_len);
00077         DBG( cerr << "Regex::init() - Throwing libdap::Error" << endl);
00078         throw Error(string("Regex error: ") + string(&msg[0]));
00079         //delete[] msg;
00080         //throw e;
00081     }
00082     DBG( cerr << "Regex::init() - Call to regcomp() SUCCEEDED" << endl);
00083     DBG( cerr << "Regex::init() - END" << endl);
00084 }
00085 
00086 Regex::~Regex()
00087 {
00088     regfree(static_cast<regex_t*>(d_preg));
00089     delete static_cast<regex_t*>(d_preg); d_preg = 0;
00090 
00091 }
00092 
00096 Regex::Regex(const char* t)
00097 {
00098     init(t);
00099 }
00100 
00103 Regex::Regex(const char* t, int)
00104 {
00105     init(t);
00106 }
00107 
00114 int 
00115 Regex::match(const char* s, int len, int pos)
00116 {
00117    if (len > 32766)     // Integer overflow protection
00118         return -1;
00119         
00120     regmatch_t *pmatch = new regmatch_t[len+1];
00121     string ss = s;
00122 
00123     int result = regexec(static_cast<regex_t*>(d_preg), 
00124                          ss.substr(pos, len-pos).c_str(), len, pmatch, 0);
00125         int matchnum;
00126     if (result == REG_NOMATCH)
00127         matchnum = -1;
00128         else
00129                 matchnum = pmatch[0].rm_eo - pmatch[0].rm_so;
00130                 
00131         delete[] pmatch; pmatch = 0;
00132 
00133     return matchnum;
00134 }
00135 
00146 int 
00147 Regex::search(const char* s, int len, int& matchlen, int pos)
00148 {
00149         // sanitize allocation
00150     if (!size_ok(sizeof(regmatch_t), len+1))
00151         return -1;
00152         
00153     // alloc space for len matches, which is theoretical max.
00154     // Problem: If somehow 'len' is very large - say the size of a 32-bit int,
00155     // then len+1 is a an integer overflow and this might be exploited by
00156     // an attacker. It's not likely there will be more than a handful of
00157     // matches, so I am going to limit this value to 32766. jhrg 3/4/09
00158     if (len > 32766)
00159         return -1;
00160 
00161     regmatch_t *pmatch = new regmatch_t[len+1];
00162     string ss = s;
00163      
00164     int result = regexec(static_cast<regex_t*>(d_preg),
00165                          ss.substr(pos, len-pos).c_str(), len, pmatch, 0);
00166     if (result == REG_NOMATCH) {
00167         delete[] pmatch; pmatch = 0;
00168         return -1;
00169     }
00170 
00171     // Match found, find the first one (pmatch lists the longest first)
00172     int m = 0;
00173     for (int i = 1; i < len; ++i)
00174         if (pmatch[i].rm_so != -1 && pmatch[i].rm_so < pmatch[m].rm_so)
00175             m = i;
00176             
00177     matchlen = pmatch[m].rm_eo - pmatch[m].rm_so;
00178     int matchpos = pmatch[m].rm_so;
00179     
00180     delete[] pmatch; pmatch = 0;
00181     return matchpos;
00182 }
00183 
00184 } // namespace libdap
00185