libdap
Updated for version 3.17.0
|
00001 00002 // -*- mode: c++; c-basic-offset:4 -*- 00003 00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00005 // Access Protocol. 00006 00007 // Copyright (c) 2012 OPeNDAP, Inc. 00008 // Author: James Gallagher <jgallagher@opendap.org> 00009 // 00010 // This library is free software; you can redistribute it and/or 00011 // modify it under the terms of the GNU Lesser General Public 00012 // License as published by the Free Software Foundation; either 00013 // version 2.1 of the License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00023 // 00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00025 00026 #ifndef d4_parser_sax2_h 00027 #define d4_parser_sax2_h 00028 00029 #define ATTR 1 00030 00031 #include <string.h> 00032 00033 #include <string> 00034 #include <iostream> 00035 #include <map> 00036 #include <stack> 00037 00038 #include <libxml/parserInternals.h> 00039 00040 #define CRLF "\r\n" 00041 00042 namespace libdap 00043 { 00044 00045 class DMR; 00046 class BaseType; 00047 class D4BaseTypeFactory; 00048 class D4Group; 00049 class D4Attributes; 00050 class D4EnumDef; 00051 class D4Dimension; 00052 00076 class D4ParserSax2 00077 { 00078 private: 00081 enum ParseState { 00082 parser_start, 00083 00084 inside_dataset, 00085 00086 // inside_group is the state just after parsing the start of a Group 00087 // element. 00088 inside_group, 00089 00090 inside_attribute_container, 00091 inside_attribute, 00092 inside_attribute_value, 00093 inside_other_xml_attribute, 00094 00095 inside_enum_def, 00096 inside_enum_const, 00097 00098 inside_dim_def, 00099 00100 // This covers Byte, ..., Url, Opaque 00101 inside_simple_type, 00102 00103 // inside_array, 00104 inside_dim, 00105 inside_map, 00106 00107 inside_constructor, 00108 00109 // inside_sequence, Removed from merged code jhrg 5/2/14 00110 00111 parser_unknown, 00112 parser_error, 00113 parser_fatal_error, 00114 00115 parser_end 00116 }; 00117 00118 xmlSAXHandler ddx_sax_parser; 00119 00120 // The results of the parse operation are stored in these fields. 00121 // This is passed into the parser using the intern() methods. 00122 DMR *d_dmr; // dump DMR here 00123 DMR *dmr() const { return d_dmr; } 00124 00125 // These stacks hold the state of the parse as it progresses. 00126 stack<ParseState> s; // Current parse state 00127 void push_state(D4ParserSax2::ParseState state) { s.push(state); } 00128 D4ParserSax2::ParseState get_state() const { return s.top(); } 00129 void pop_state() { s.pop(); } 00130 bool empty_state() const { return s.empty(); } 00131 00132 stack<BaseType*> btp_stack; // current variable(s) 00133 void push_basetype(BaseType *btp) { btp_stack.push(btp); } 00134 BaseType *top_basetype() const { return btp_stack.top(); } 00135 void pop_basetype() { btp_stack.pop(); } 00136 bool empty_basetype() const { return btp_stack.empty(); } 00137 00138 stack<D4Group*> grp_stack; // current groups(s) 00139 void push_group(D4Group *grp) { grp_stack.push(grp); } 00140 D4Group *top_group() const { return grp_stack.top(); } 00141 void pop_group() { grp_stack.pop(); } 00142 bool empty_group() const { return grp_stack.empty(); } 00143 00144 stack<D4Attributes*> d_attrs_stack; // DAP4 Attributes 00145 void push_attributes(D4Attributes *attr) { d_attrs_stack.push(attr); } 00146 D4Attributes *top_attributes() const { return d_attrs_stack.top(); } 00147 void pop_attributes() { d_attrs_stack.pop(); } 00148 bool empty_attributes() const { return d_attrs_stack.empty(); } 00149 00150 D4EnumDef *d_enum_def; 00151 D4EnumDef *enum_def(); 00152 #if 0 00153 { 00154 if (!d_enum_def) d_enum_def = new D4EnumDef; 00155 return d_enum_def; 00156 } 00157 #endif 00158 void clear_enum_def() { d_enum_def = 0; } 00159 00160 D4Dimension *d_dim_def; 00161 D4Dimension *dim_def(); 00162 #if 0 00163 { 00164 if (!d_dim_def) d_dim_def = new D4Dimension; 00165 return d_dim_def; 00166 } 00167 #endif 00168 void clear_dim_def() { d_dim_def = 0; } 00169 00170 // Accumulate stuff inside an 'OtherXML' DAP attribute here 00171 string other_xml; 00172 00173 // When we're parsing unknown XML, how deeply is it nested? This is used 00174 // for the OtherXML DAP attributes. 00175 unsigned int other_xml_depth; 00176 unsigned int unknown_depth; 00177 00178 // These are used for processing errors. 00179 string error_msg; // Error message(s), if any. 00180 xmlParserCtxtPtr context; // used for error message line numbers 00181 00182 // These hold temporary values read during the parse. 00183 string dods_attr_name; // DAP4 attributes, not XML attributes 00184 string dods_attr_type; // ... not XML ... 00185 string char_data; // char data in value elements; null after use 00186 string root_ns; // What is the namespace of the root node (Group) 00187 00188 bool d_debug; 00189 bool debug() const { return d_debug; } 00190 00191 class XMLAttribute { 00192 public: 00193 string prefix; 00194 string nsURI; 00195 string value; 00196 00197 void clone(const XMLAttribute &src) { 00198 prefix = src.prefix; 00199 nsURI = src.nsURI; 00200 value = src.value; 00201 } 00202 00203 XMLAttribute() : prefix(""), nsURI(""), value("") {} 00204 XMLAttribute(const string &p, const string &ns, const string &v) 00205 : prefix(p), nsURI(ns), value(v) {} 00206 // 'attributes' as passed from libxml2 is a five element array but this 00207 // ctor gets the back four elements. 00208 XMLAttribute(const xmlChar **attributes/*[4]*/) { 00209 prefix = attributes[0] != 0 ? (const char *)attributes[0]: ""; 00210 nsURI = attributes[1] != 0 ? (const char *)attributes[1]: ""; 00211 value = string((const char *)attributes[2], (const char *)attributes[3]); 00212 } 00213 XMLAttribute(const XMLAttribute &rhs) { 00214 clone(rhs); 00215 } 00216 XMLAttribute &operator=(const XMLAttribute &rhs) { 00217 if (this == &rhs) 00218 return *this; 00219 clone(rhs); 00220 return *this; 00221 } 00222 }; 00223 00224 typedef map<string, XMLAttribute> XMLAttrMap; 00225 XMLAttrMap xml_attrs; // dump XML attributes here 00226 00227 XMLAttrMap::iterator xml_attr_begin() { return xml_attrs.begin(); } 00228 00229 XMLAttrMap::iterator xml_attr_end() { return xml_attrs.end(); } 00230 00231 map<string, string> namespace_table; 00232 00233 void cleanup_parse(); 00234 00241 void transfer_xml_attrs(const xmlChar **attrs, int nb_attributes); 00242 void transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces); 00243 bool check_required_attribute(const string &attr); 00244 bool check_attribute(const string & attr); 00245 void process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes); 00246 00247 void process_enum_const_helper(const xmlChar **attrs, int nb_attributes); 00248 void process_enum_def_helper(const xmlChar **attrs, int nb_attributes); 00249 00250 bool process_dimension(const char *name, const xmlChar **attrs, int nb_attrs); 00251 bool process_dimension_def(const char *name, const xmlChar **attrs, int nb_attrs); 00252 bool process_map(const char *name, const xmlChar **attrs, int nb_attributes); 00253 bool process_attribute(const char *name, const xmlChar **attrs, int nb_attributes); 00254 bool process_variable(const char *name, const xmlChar **attrs, int nb_attributes); 00255 bool process_group(const char *name, const xmlChar **attrs, int nb_attributes); 00256 bool process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes); 00257 bool process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes); 00258 00259 void finish_variable(const char *tag, Type t, const char *expected); 00261 00262 friend class D4ParserSax2Test; 00263 00264 public: 00265 D4ParserSax2() : 00266 d_dmr(0), d_enum_def(0), d_dim_def(0), 00267 other_xml(""), other_xml_depth(0), unknown_depth(0), 00268 error_msg(""), context(0), 00269 dods_attr_name(""), dods_attr_type(""), 00270 char_data(""), root_ns(""), d_debug(false) 00271 { 00272 //xmlSAXHandler ddx_sax_parser; 00273 memset(&ddx_sax_parser, 0, sizeof(xmlSAXHandler)); 00274 00275 ddx_sax_parser.getEntity = &D4ParserSax2::dmr_get_entity; 00276 ddx_sax_parser.startDocument = &D4ParserSax2::dmr_start_document; 00277 ddx_sax_parser.endDocument = &D4ParserSax2::dmr_end_document; 00278 ddx_sax_parser.characters = &D4ParserSax2::dmr_get_characters; 00279 ddx_sax_parser.ignorableWhitespace = &D4ParserSax2::dmr_ignoreable_whitespace; 00280 ddx_sax_parser.cdataBlock = &D4ParserSax2::dmr_get_cdata; 00281 ddx_sax_parser.warning = &D4ParserSax2::dmr_error; 00282 ddx_sax_parser.error = &D4ParserSax2::dmr_error; 00283 ddx_sax_parser.fatalError = &D4ParserSax2::dmr_fatal_error; 00284 ddx_sax_parser.initialized = XML_SAX2_MAGIC; 00285 ddx_sax_parser.startElementNs = &D4ParserSax2::dmr_start_element; 00286 ddx_sax_parser.endElementNs = &D4ParserSax2::dmr_end_element; 00287 } 00288 00289 void intern(istream &f, DMR *dest_dmr, bool debug = false); 00290 void intern(const string &document, DMR *dest_dmr, bool debug = false); 00291 void intern(const char *buffer, int size, DMR *dest_dmr, bool debug = false); 00292 00293 static void dmr_start_document(void *parser); 00294 static void dmr_end_document(void *parser); 00295 00296 static void dmr_start_element(void *parser, 00297 const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, 00298 int nb_namespaces, const xmlChar **namespaces, int nb_attributes, 00299 int nb_defaulted, const xmlChar **attributes); 00300 static void dmr_end_element(void *parser, const xmlChar *localname, 00301 const xmlChar *prefix, const xmlChar *URI); 00302 00303 static void dmr_get_characters(void *parser, const xmlChar *ch, int len); 00304 static void dmr_ignoreable_whitespace(void *parser, 00305 const xmlChar * ch, int len); 00306 static void dmr_get_cdata(void *parser, const xmlChar *value, int len); 00307 00308 static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name); 00309 static void dmr_fatal_error(void *parser, const char *msg, ...); 00310 static void dmr_error(void *parser, const char *msg, ...); 00311 }; 00312 00313 } // namespace libdap 00314 00315 #endif // d4_parser_sax2_h