libdap  Updated for version 3.17.0
D4ParserSax2.h
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2012 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 #ifndef d4_parser_sax2_h
00027 #define d4_parser_sax2_h
00028 
00029 #define ATTR 1
00030 
00031 #include <string.h>
00032 
00033 #include <string>
00034 #include <iostream>
00035 #include <map>
00036 #include <stack>
00037 
00038 #include <libxml/parserInternals.h>
00039 
00040 #define CRLF "\r\n"
00041 
00042 namespace libdap
00043 {
00044 
00045 class DMR;
00046 class BaseType;
00047 class D4BaseTypeFactory;
00048 class D4Group;
00049 class D4Attributes;
00050 class D4EnumDef;
00051 class D4Dimension;
00052 
00076 class D4ParserSax2
00077 {
00078 private:
00081     enum ParseState {
00082         parser_start,
00083 
00084         inside_dataset,
00085 
00086         // inside_group is the state just after parsing the start of a Group
00087         // element.
00088         inside_group,
00089 
00090         inside_attribute_container,
00091         inside_attribute,
00092         inside_attribute_value,
00093         inside_other_xml_attribute,
00094 
00095         inside_enum_def,
00096         inside_enum_const,
00097 
00098         inside_dim_def,
00099 
00100         // This covers Byte, ..., Url, Opaque
00101         inside_simple_type,
00102 
00103         // inside_array,
00104         inside_dim,
00105         inside_map,
00106 
00107         inside_constructor,
00108 
00109         // inside_sequence, Removed from merged code jhrg 5/2/14
00110 
00111         parser_unknown,
00112         parser_error,
00113         parser_fatal_error,
00114 
00115         parser_end
00116     };
00117 
00118     xmlSAXHandler ddx_sax_parser;
00119 
00120     // The results of the parse operation are stored in these fields.
00121     // This is passed into the parser using the intern() methods.
00122     DMR *d_dmr;   // dump DMR here
00123     DMR *dmr() const { return d_dmr; }
00124 
00125     // These stacks hold the state of the parse as it progresses.
00126     stack<ParseState> s; // Current parse state
00127     void push_state(D4ParserSax2::ParseState state) { s.push(state); }
00128     D4ParserSax2::ParseState get_state() const { return s.top(); }
00129     void pop_state() { s.pop(); }
00130     bool empty_state() const { return s.empty(); }
00131 
00132     stack<BaseType*> btp_stack; // current variable(s)
00133     void push_basetype(BaseType *btp) { btp_stack.push(btp); }
00134     BaseType *top_basetype() const { return btp_stack.top(); }
00135     void pop_basetype() { btp_stack.pop(); }
00136     bool empty_basetype() const { return btp_stack.empty(); }
00137 
00138     stack<D4Group*> grp_stack; // current groups(s)
00139     void push_group(D4Group *grp) { grp_stack.push(grp); }
00140     D4Group *top_group() const { return grp_stack.top(); }
00141     void pop_group() { grp_stack.pop(); }
00142     bool empty_group() const { return grp_stack.empty(); }
00143 
00144     stack<D4Attributes*> d_attrs_stack; // DAP4 Attributes
00145     void push_attributes(D4Attributes *attr) { d_attrs_stack.push(attr); }
00146     D4Attributes *top_attributes() const { return d_attrs_stack.top(); }
00147     void pop_attributes() { d_attrs_stack.pop(); }
00148     bool empty_attributes() const { return d_attrs_stack.empty(); }
00149 
00150     D4EnumDef *d_enum_def;
00151     D4EnumDef *enum_def();
00152 #if 0
00153     {
00154         if (!d_enum_def) d_enum_def = new D4EnumDef;
00155         return d_enum_def;
00156     }
00157 #endif
00158     void clear_enum_def() { d_enum_def = 0; }
00159 
00160     D4Dimension *d_dim_def;
00161     D4Dimension *dim_def();
00162 #if 0
00163     {
00164         if (!d_dim_def) d_dim_def = new D4Dimension;
00165         return d_dim_def;
00166     }
00167 #endif
00168     void clear_dim_def() { d_dim_def = 0; }
00169 
00170     // Accumulate stuff inside an 'OtherXML' DAP attribute here
00171     string other_xml;
00172 
00173     // When we're parsing unknown XML, how deeply is it nested? This is used
00174     // for the OtherXML DAP attributes.
00175     unsigned int other_xml_depth;
00176     unsigned int unknown_depth;
00177 
00178     // These are used for processing errors.
00179     string error_msg;  // Error message(s), if any.
00180     xmlParserCtxtPtr context; // used for error message line numbers
00181 
00182     // These hold temporary values read during the parse.
00183     string dods_attr_name; // DAP4 attributes, not XML attributes
00184     string dods_attr_type; // ... not XML ...
00185     string char_data;  // char data in value elements; null after use
00186     string root_ns;     // What is the namespace of the root node (Group)
00187 
00188     bool d_debug;
00189     bool debug() const { return d_debug; }
00190 
00191     class XMLAttribute {
00192         public:
00193         string prefix;
00194         string nsURI;
00195         string value;
00196 
00197         void clone(const XMLAttribute &src) {
00198             prefix = src.prefix;
00199             nsURI = src.nsURI;
00200             value = src.value;
00201         }
00202 
00203         XMLAttribute() : prefix(""), nsURI(""), value("") {}
00204         XMLAttribute(const string &p, const string &ns, const string &v)
00205             : prefix(p), nsURI(ns), value(v) {}
00206         // 'attributes' as passed from libxml2 is a five element array but this
00207         // ctor gets the back four elements.
00208         XMLAttribute(const xmlChar **attributes/*[4]*/) {
00209             prefix = attributes[0] != 0 ? (const char *)attributes[0]: "";
00210             nsURI = attributes[1] != 0 ? (const char *)attributes[1]: "";
00211             value = string((const char *)attributes[2], (const char *)attributes[3]);
00212         }
00213         XMLAttribute(const XMLAttribute &rhs) {
00214             clone(rhs);
00215         }
00216         XMLAttribute &operator=(const XMLAttribute &rhs) {
00217             if (this == &rhs)
00218                 return *this;
00219             clone(rhs);
00220             return *this;
00221         }
00222     };
00223 
00224     typedef map<string, XMLAttribute> XMLAttrMap;
00225     XMLAttrMap xml_attrs; // dump XML attributes here
00226 
00227     XMLAttrMap::iterator xml_attr_begin() { return xml_attrs.begin(); }
00228 
00229     XMLAttrMap::iterator xml_attr_end() {  return xml_attrs.end(); }
00230 
00231     map<string, string> namespace_table;
00232 
00233     void cleanup_parse();
00234 
00241     void transfer_xml_attrs(const xmlChar **attrs, int nb_attributes);
00242     void transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces);
00243     bool check_required_attribute(const string &attr);
00244     bool check_attribute(const string & attr);
00245     void process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes);
00246 
00247     void process_enum_const_helper(const xmlChar **attrs, int nb_attributes);
00248     void process_enum_def_helper(const xmlChar **attrs, int nb_attributes);
00249 
00250     bool process_dimension(const char *name, const xmlChar **attrs, int nb_attrs);
00251     bool process_dimension_def(const char *name, const xmlChar **attrs, int nb_attrs);
00252     bool process_map(const char *name, const xmlChar **attrs, int nb_attributes);
00253     bool process_attribute(const char *name, const xmlChar **attrs, int nb_attributes);
00254     bool process_variable(const char *name, const xmlChar **attrs, int nb_attributes);
00255     bool process_group(const char *name, const xmlChar **attrs, int nb_attributes);
00256     bool process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes);
00257     bool process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes);
00258 
00259     void finish_variable(const char *tag, Type t, const char *expected);
00261 
00262     friend class D4ParserSax2Test;
00263 
00264 public:
00265     D4ParserSax2() :
00266         d_dmr(0), d_enum_def(0), d_dim_def(0),
00267         other_xml(""), other_xml_depth(0), unknown_depth(0),
00268         error_msg(""), context(0),
00269         dods_attr_name(""), dods_attr_type(""),
00270         char_data(""), root_ns(""), d_debug(false)
00271     {
00272         //xmlSAXHandler ddx_sax_parser;
00273         memset(&ddx_sax_parser, 0, sizeof(xmlSAXHandler));
00274 
00275         ddx_sax_parser.getEntity = &D4ParserSax2::dmr_get_entity;
00276         ddx_sax_parser.startDocument = &D4ParserSax2::dmr_start_document;
00277         ddx_sax_parser.endDocument = &D4ParserSax2::dmr_end_document;
00278         ddx_sax_parser.characters = &D4ParserSax2::dmr_get_characters;
00279         ddx_sax_parser.ignorableWhitespace = &D4ParserSax2::dmr_ignoreable_whitespace;
00280         ddx_sax_parser.cdataBlock = &D4ParserSax2::dmr_get_cdata;
00281         ddx_sax_parser.warning = &D4ParserSax2::dmr_error;
00282         ddx_sax_parser.error = &D4ParserSax2::dmr_error;
00283         ddx_sax_parser.fatalError = &D4ParserSax2::dmr_fatal_error;
00284         ddx_sax_parser.initialized = XML_SAX2_MAGIC;
00285         ddx_sax_parser.startElementNs = &D4ParserSax2::dmr_start_element;
00286         ddx_sax_parser.endElementNs = &D4ParserSax2::dmr_end_element;
00287     }
00288 
00289     void intern(istream &f, DMR *dest_dmr, bool debug = false);
00290     void intern(const string &document, DMR *dest_dmr, bool debug = false);
00291     void intern(const char *buffer, int size, DMR *dest_dmr, bool debug = false);
00292 
00293     static void dmr_start_document(void *parser);
00294     static void dmr_end_document(void *parser);
00295 
00296     static void dmr_start_element(void *parser,
00297             const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
00298             int nb_namespaces, const xmlChar **namespaces, int nb_attributes,
00299             int nb_defaulted, const xmlChar **attributes);
00300     static void dmr_end_element(void *parser, const xmlChar *localname,
00301             const xmlChar *prefix, const xmlChar *URI);
00302 
00303     static void dmr_get_characters(void *parser, const xmlChar *ch, int len);
00304     static void dmr_ignoreable_whitespace(void *parser,
00305             const xmlChar * ch, int len);
00306     static void dmr_get_cdata(void *parser, const xmlChar *value, int len);
00307 
00308     static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name);
00309     static void dmr_fatal_error(void *parser, const char *msg, ...);
00310     static void dmr_error(void *parser, const char *msg, ...);
00311 };
00312 
00313 } // namespace libdap
00314 
00315 #endif // d4_parser_sax2_h