libdap  Updated for version 3.17.0
D4ParserSax2.cc
00001 // -*- mode: c++; c-basic-offset:4 -*-
00002 
00003 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00004 // Access Protocol.
00005 
00006 // Copyright (c) 2012 OPeNDAP, Inc.
00007 // Author: James Gallagher <jgallagher@opendap.org>
00008 //
00009 // This library is free software; you can redistribute it and/or
00010 // modify it under the terms of the GNU Lesser General Public
00011 // License as published by the Free Software Foundation; either
00012 // version 2.1 of the License, or (at your option) any later version.
00013 //
00014 // This library is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 // Lesser General Public License for more details.
00018 //
00019 // You should have received a copy of the GNU Lesser General Public
00020 // License along with this library; if not, write to the Free Software
00021 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00022 //
00023 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00024 
00025 #include "config.h"
00026 
00027 //#define DODS_DEBUG 1
00028 
00029 #include <iostream>
00030 #include <sstream>
00031 
00032 #include <cstring>
00033 #include <cstdarg>
00034 #include <cassert>
00035 
00036 #include <libxml/parserInternals.h>
00037 
00038 #include "DMR.h"
00039 
00040 #include "BaseType.h"
00041 #include "Array.h"
00042 #include "D4Group.h"
00043 #include "D4Attributes.h"
00044 #include "D4Maps.h"
00045 #include "D4Enum.h"
00046 
00047 #include "D4BaseTypeFactory.h"
00048 
00049 #include "D4ParserSax2.h"
00050 
00051 #include "util.h"
00052 #include "debug.h"
00053 
00054 namespace libdap {
00055 
00056 static const char *states[] = {
00057         "parser_start",
00058 
00059         "inside_dataset",
00060 
00061         // inside_group is the state just after parsing the start of a Group
00062         // element.
00063         "inside_group",
00064 
00065         "inside_attribute_container",
00066         "inside_attribute",
00067         "inside_attribute_value",
00068         "inside_other_xml_attribute",
00069 
00070         "inside_enum_def",
00071         "inside_enum_const",
00072 
00073         "inside_dim_def",
00074 
00075         // This covers Byte, ..., Url, Opaque
00076         "inside_simple_type",
00077 
00078         // "inside_array",
00079         "inside_dim",
00080         "inside_map",
00081 
00082         "inside_constructor",
00083 
00084         "parser_unknown",
00085         "parser_error",
00086         "parser_fatal_error",
00087 
00088         "parser_end"
00089 };
00090 
00091 static bool is_not(const char *name, const char *tag)
00092 {
00093     return strcmp(name, tag) != 0;
00094 }
00095 
00104 D4EnumDef *
00105 D4ParserSax2::enum_def()
00106 {
00107     if (!d_enum_def) d_enum_def = new D4EnumDef;
00108 
00109     return d_enum_def;
00110 }
00111 
00118 D4Dimension *
00119 D4ParserSax2::dim_def()    {
00120     if (!d_dim_def) d_dim_def = new D4Dimension;
00121 
00122     return d_dim_def;
00123 }
00124 
00130 void D4ParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
00131 {
00132     if (!xml_attrs.empty())
00133         xml_attrs.clear(); // erase old attributes
00134 
00135     // Make a value using the attribute name and the prefix, namespace URI
00136     // and the value. The prefix might be null.
00137     unsigned int index = 0;
00138     for (int i = 0; i < nb_attributes; ++i, index += 5) {
00139         xml_attrs.insert(map<string, XMLAttribute>::value_type(string((const char *)attributes[index]),
00140                                                                XMLAttribute(attributes + index + 1)));
00141 
00142         DBG(cerr << "XML Attribute '" << (const char *)attributes[index] << "': "
00143                 << xml_attrs[(const char *)attributes[index]].value << endl);
00144     }
00145 }
00146 
00153 void D4ParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
00154 {
00155     // make a value with the prefix and namespace URI. The prefix might be null.
00156     for (int i = 0; i < nb_namespaces; ++i) {
00157         namespace_table.insert(map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *)namespaces[i * 2] : "",
00158                                                                (const char *)namespaces[i * 2 + 1]));
00159     }
00160 }
00161 
00168 bool D4ParserSax2::check_required_attribute(const string & attr)
00169 {
00170     if (xml_attrs.find(attr) == xml_attrs.end()) {
00171         dmr_error(this, "Required attribute '%s' not found.", attr.c_str());
00172         return false;
00173     }
00174     else
00175         return true;
00176 }
00177 
00184 bool D4ParserSax2::check_attribute(const string & attr)
00185 {
00186     return (xml_attrs.find(attr) != xml_attrs.end());
00187 }
00188 
00189 bool D4ParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes)
00190 {
00191     if (is_not(name, "Dimension"))
00192         return false;
00193 
00194     transfer_xml_attrs(attrs, nb_attributes);
00195 
00196     if (!(check_required_attribute("name") && check_required_attribute("size"))) {
00197         dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element.");
00198         return false;
00199     }
00200 
00201     // This getter (dim_def) allocates a new object if needed.
00202     dim_def()->set_name(xml_attrs["name"].value);
00203     try {
00204         dim_def()->set_size(xml_attrs["size"].value);
00205     }
00206     catch (Error &e) {
00207         dmr_error(this, e.get_error_message().c_str());
00208         return false;
00209     }
00210 
00211     return true;
00212 }
00213 
00231 bool D4ParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes)
00232 {
00233     if (is_not(name, "Dim"))
00234         return false;
00235 
00236     transfer_xml_attrs(attrs, nb_attributes);
00237 
00238         if (check_attribute("size") && check_attribute("name")) {
00239                 dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
00240                 return false;
00241         }
00242         if (!(check_attribute("size") || check_attribute("name"))) {
00243                 dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
00244                 return false;
00245         }
00246 
00247         if (!top_basetype()->is_vector_type()) {
00248                 // Make the top BaseType* an array
00249                 BaseType *b = top_basetype();
00250                 pop_basetype();
00251 
00252                 Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
00253                 a->set_is_dap4(true);
00254                 a->add_var_nocopy(b);
00255                 a->set_attributes_nocopy(b->attributes());
00256                 // trick: instead of popping b's attributes, copying them and then pushing
00257                 // a's copy, just move the pointer (but make sure there's only one object that
00258                 // references that pointer).
00259                 b->set_attributes_nocopy(0);
00260 
00261                 push_basetype(a);
00262         }
00263 
00264         assert(top_basetype()->is_vector_type());
00265 
00266         Array *a = static_cast<Array*>(top_basetype());
00267     if (check_attribute("size")) {
00268         a->append_dim(atoi(xml_attrs["size"].value.c_str())); // low budget code for now. jhrg 8/20/13
00269         return true;
00270     }
00271     else if (check_attribute("name")) {
00272         string name = xml_attrs["name"].value;
00273 
00274         D4Dimension *dim = 0;
00275         if (name[0] == '/')             // lookup the Dimension in the root group
00276                 dim = dmr()->root()->find_dim(name);
00277         else                                    // get enclosing Group and lookup Dimension there
00278                 dim = top_group()->find_dim(name);
00279 
00280         if (!dim)
00281                 throw Error("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'.");
00282         a->append_dim(dim);
00283         return true;
00284     }
00285 
00286     return false;
00287 }
00288 
00289 bool D4ParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes)
00290 {
00291     if (is_not(name, "Map"))
00292         return false;
00293 
00294     transfer_xml_attrs(attrs, nb_attributes);
00295 
00296         if (!check_attribute("name")) {
00297                 dmr_error(this, "The 'name' attribute must be used in a Map element.");
00298                 return false;
00299         }
00300 
00301         if (!top_basetype()->is_vector_type()) {
00302                 // Make the top BaseType* an array
00303                 BaseType *b = top_basetype();
00304                 pop_basetype();
00305 
00306                 Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
00307                 a->set_is_dap4(true);
00308                 a->add_var_nocopy(b);
00309                 a->set_attributes_nocopy(b->attributes());
00310                 // trick: instead of popping b's attributes, copying them and then pushing
00311                 // a's copy, just move the pointer (but make sure there's only one object that
00312                 // references that pointer).
00313                 b->set_attributes_nocopy(0);
00314 
00315                 push_basetype(a);
00316         }
00317 
00318         assert(top_basetype()->is_vector_type());
00319 
00320         Array *a = static_cast<Array*>(top_basetype());
00321 
00322         string map_name = xml_attrs["name"].value;
00323         if (xml_attrs["name"].value[0] != '/')
00324                 map_name = top_group()->FQN() + map_name;
00325 
00326     Array *map_source = 0;      // The array variable that holds the data for the Map
00327 
00328         if (map_name[0] == '/')         // lookup the Map in the root group
00329                 map_source = dmr()->root()->find_map_source(map_name);
00330         else                                    // get enclosing Group and lookup Map there
00331                 map_source = top_group()->find_map_source(map_name);
00332 
00333         if (!map_source)
00334                 throw Error("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'.");
00335 
00336         a->maps()->add_map(new D4Map(map_name, map_source));
00337 
00338         return true;
00339 }
00340 
00341 bool D4ParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes)
00342 {
00343     if (is_not(name, "Group"))
00344         return false;
00345 
00346     transfer_xml_attrs(attrs, nb_attributes);
00347 
00348     if (!check_required_attribute("name")) {
00349         dmr_error(this, "The required attribute 'name' was missing from a Group element.");
00350         return false;
00351     }
00352 
00353     BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, xml_attrs["name"].value);
00354     if (!btp) {
00355         dmr_fatal_error(this, "Could not instantiate the Group '%s'.", xml_attrs["name"].value.c_str());
00356         return false;
00357     }
00358 
00359     D4Group *grp = static_cast<D4Group*>(btp);
00360 
00361     // Need to set this to get the D4Attribute behavior in the type classes
00362     // shared between DAP2 and DAP4. jhrg 4/18/13
00363     grp->set_is_dap4(true);
00364 
00365     // link it up and change the current group
00366     D4Group *parent = top_group();
00367         if (!parent) {
00368                 dmr_fatal_error(this, "No Group on the Group stack.");
00369                 return false;
00370         }
00371 
00372         grp->set_parent(parent);
00373         parent->add_group_nocopy(grp);
00374 
00375     push_group(grp);
00376     push_attributes(grp->attributes());
00377     return true;
00378 }
00379 
00386 inline bool D4ParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes)
00387 {
00388     if (is_not(name, "Attribute"))
00389         return false;
00390 
00391     // These methods set the state to parser_error if a problem is found.
00392     transfer_xml_attrs(attrs, nb_attributes);
00393 
00394     // add error
00395     if (!(check_required_attribute(string("name")) && check_required_attribute(string("type")))) {
00396         dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element.");
00397         return false;
00398     }
00399 
00400     if (xml_attrs["type"].value == "Container") {
00401         push_state(inside_attribute_container);
00402 
00403         DBG(cerr << "Pushing attribute container " << xml_attrs["name"].value << endl);
00404         D4Attribute *child = new D4Attribute(xml_attrs["name"].value, attr_container_c);
00405 
00406         D4Attributes *tos = top_attributes();
00407         // add return
00408         if (!tos) {
00409             delete child;
00410             dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack.");
00411             return false;
00412         }
00413 
00414         tos->add_attribute_nocopy(child);
00415         push_attributes(child->attributes());
00416     }
00417     else if (xml_attrs["type"].value == "OtherXML") {
00418         push_state(inside_other_xml_attribute);
00419 
00420         dods_attr_name = xml_attrs["name"].value;
00421         dods_attr_type = xml_attrs["type"].value;
00422     }
00423     else {
00424         push_state(inside_attribute);
00425 
00426         dods_attr_name = xml_attrs["name"].value;
00427         dods_attr_type = xml_attrs["type"].value;
00428     }
00429 
00430     return true;
00431 }
00432 
00438 inline bool D4ParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes)
00439 {
00440     if (is_not(name, "Enumeration"))
00441         return false;
00442 
00443     transfer_xml_attrs(attrs, nb_attributes);
00444 
00445     if (!(check_required_attribute("name") && check_required_attribute("basetype"))) {
00446         dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
00447         return false;
00448     }
00449 
00450     Type t = get_type(xml_attrs["basetype"].value.c_str());
00451     if (!is_integer_type(t)) {
00452         dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
00453                 xml_attrs["name"].value.c_str(), xml_attrs["basetype"].value.c_str());
00454         return false;
00455     }
00456 
00457     // This getter allocates a new object if needed.
00458     string enum_def_path = xml_attrs["name"].value;
00459 #if 0
00460         // Use FQNs when things are referenced, not when they are defined
00461     if (xml_attrs["name"].value[0] != '/')
00462         enum_def_path = top_group()->FQN() + enum_def_path;
00463 #endif
00464     enum_def()->set_name(enum_def_path);
00465     enum_def()->set_type(t);
00466 
00467     return true;
00468 }
00469 
00470 inline bool D4ParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes)
00471 {
00472     if (is_not(name, "EnumConst"))
00473         return false;
00474 
00475     // These methods set the state to parser_error if a problem is found.
00476     transfer_xml_attrs(attrs, nb_attributes);
00477 
00478     if (!(check_required_attribute("name") && check_required_attribute("value"))) {
00479         dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element.");
00480         return false;
00481     }
00482 
00483     istringstream iss(xml_attrs["value"].value);
00484     long long value = 0;
00485     iss >> skipws >> value;
00486     if (iss.fail() || iss.bad()) {
00487         dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.",
00488                 xml_attrs["value"].value.c_str());
00489     }
00490     else if (!enum_def()->is_valid_enum_value(value)) {
00491         dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
00492                 xml_attrs["value"].value.c_str(), D4type_name(d_enum_def->type()).c_str());
00493     }
00494     else {
00495         // unfortunate choice of names... args are 'label' and 'value'
00496         enum_def()->add_value(xml_attrs["name"].value, value);
00497     }
00498 
00499     return true;
00500 }
00501 
00507 inline bool D4ParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes)
00508 {
00509     Type t = get_type(name);
00510     if (is_simple_type(t)) {
00511         process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
00512         return true;
00513     }
00514     else {
00515         switch(t) {
00516         case dods_structure_c:
00517             process_variable_helper(t, inside_constructor, attrs, nb_attributes);
00518             return true;
00519 
00520         case dods_sequence_c:
00521             process_variable_helper(t, inside_constructor, attrs, nb_attributes);
00522             return true;
00523 
00524         default:
00525                 return false;
00526         }
00527     }
00528 }
00529 
00537 void D4ParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes)
00538 {
00539     transfer_xml_attrs(attrs, nb_attributes);
00540 
00541     if (check_required_attribute("name")) {
00542         BaseType *btp = dmr()->factory()->NewVariable(t, xml_attrs["name"].value);
00543         if (!btp) {
00544             dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str());
00545             return;
00546         }
00547 
00548         if ((t == dods_enum_c) && check_required_attribute("enum")) {
00549             D4EnumDef *enum_def = 0;
00550             string enum_path = xml_attrs["enum"].value;
00551                         if (enum_path[0] == '/')
00552                 enum_def = dmr()->root()->find_enum_def(enum_path);
00553             else
00554                 enum_def = top_group()->find_enum_def(enum_path);
00555 
00556             if (!enum_def)
00557                 dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str());
00558 
00559             static_cast<D4Enum*>(btp)->set_enumeration(enum_def);
00560         }
00561 
00562         btp->set_is_dap4(true); // see comment above
00563         push_basetype(btp);
00564 
00565         push_attributes(btp->attributes());
00566 
00567         push_state(s);
00568     }
00569 }
00570 
00577 
00582 void D4ParserSax2::dmr_start_document(void * p)
00583 {
00584     D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
00585     parser->error_msg = "";
00586     parser->char_data = "";
00587 
00588     // Set this in intern_helper so that the loop test for the parser_end
00589     // state works for the first iteration. It seems like XMLParseChunk calls this
00590     // function on it's first run. jhrg 9/16/13
00591     // parser->push_state(parser_start);
00592 
00593     parser->push_attributes(parser->dmr()->root()->attributes());
00594 
00595     if (parser->debug()) cerr << "Parser start state: " << states[parser->get_state()] << endl;
00596 }
00597 
00600 void D4ParserSax2::dmr_end_document(void * p)
00601 {
00602     D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
00603 
00604     if (parser->debug()) cerr << "Parser end state: " << states[parser->get_state()] << endl;
00605 
00606     if (parser->get_state() != parser_end)
00607         D4ParserSax2::dmr_error(parser, "The document contained unbalanced tags.");
00608 
00609     // If we've found any sort of error, don't make the DMR; intern() will
00610     // take care of the error.
00611     if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error)
00612         return;
00613 
00614     if (!parser->empty_basetype() || parser->empty_group())
00615         D4ParserSax2::dmr_error(parser, "The document did not contain a valid root Group or contained unbalanced tags.");
00616 
00617     parser->pop_group();     // leave the stack 'clean'
00618     parser->pop_attributes();
00619 }
00620 
00621 void D4ParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
00622         int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/,
00623         const xmlChar **attributes)
00624 {
00625     D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
00626     const char *localname = (const char *) l;
00627 
00628     if (parser->debug()) cerr << "Start element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
00629 
00630     switch (parser->get_state()) {
00631         case parser_start:
00632             if (is_not(localname, "Dataset"))
00633                 D4ParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.", localname);
00634 
00635             parser->root_ns = URI ? (const char *) URI : "";
00636             parser->transfer_xml_attrs(attributes, nb_attributes);
00637 
00638             if (parser->check_required_attribute(string("name")))
00639                 parser->dmr()->set_name(parser->xml_attrs["name"].value);
00640 
00641             if (parser->check_attribute("dapVersion"))
00642                 parser->dmr()->set_dap_version(parser->xml_attrs["dapVersion"].value);
00643 
00644             if (parser->check_attribute("dmrVersion"))
00645                 parser->dmr()->set_dmr_version(parser->xml_attrs["dmrVersion"].value);
00646 
00647             if (parser->check_attribute("base"))
00648                 parser->dmr()->set_request_xml_base(parser->xml_attrs["base"].value);
00649 
00650             if (!parser->root_ns.empty())
00651                 parser->dmr()->set_namespace(parser->root_ns);
00652 
00653             // Push the root Group on the stack
00654             parser->push_group(parser->dmr()->root());
00655 
00656             parser->push_state(inside_dataset);
00657 
00658             break;
00659 
00660             // Both inside dataset and inside group can have the same stuff.
00661             // The difference is that the Dataset holds the root group, which
00662             // must be present; other groups are optional
00663         case inside_dataset:
00664         case inside_group:
00665             if (parser->process_enum_def(localname, attributes, nb_attributes))
00666                 parser->push_state(inside_enum_def);
00667             else if (parser->process_dimension_def(localname, attributes, nb_attributes))
00668                 parser->push_state(inside_dim_def);
00669             else if (parser->process_group(localname, attributes, nb_attributes))
00670                 parser->push_state(inside_group);
00671             else if (parser->process_variable(localname, attributes, nb_attributes))
00672                 // This will push either inside_simple_type or inside_structure
00673                 // onto the parser state stack.
00674                break;
00675             else if (parser->process_attribute(localname, attributes, nb_attributes))
00676                 // This will push either inside_attribute, inside_attribute_container
00677                 // or inside_otherxml_attribute onto the parser state stack
00678                 break;
00679             else
00680                 D4ParserSax2::dmr_error(parser, "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.", localname);
00681             break;
00682 
00683         case inside_attribute_container:
00684             if (parser->process_attribute(localname, attributes, nb_attributes))
00685                 break;
00686             else
00687                 D4ParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname);
00688             break;
00689 
00690         case inside_attribute:
00691             if (parser->process_attribute(localname, attributes, nb_attributes))
00692                 break;
00693             else if (strcmp(localname, "Value") == 0)
00694                 parser->push_state(inside_attribute_value);
00695             else
00696                 dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
00697             break;
00698 
00699         case inside_attribute_value:
00700             // Attribute values are processed by the end element code.
00701             break;
00702 
00703         case inside_other_xml_attribute:
00704             parser->other_xml_depth++;
00705 
00706             // Accumulate the elements here
00707             parser->other_xml.append("<");
00708             if (prefix) {
00709                 parser->other_xml.append((const char *) prefix);
00710                 parser->other_xml.append(":");
00711             }
00712             parser->other_xml.append(localname);
00713 
00714             if (nb_namespaces != 0) {
00715                 parser->transfer_xml_ns(namespaces, nb_namespaces);
00716 
00717                 for (map<string, string>::iterator i = parser->namespace_table.begin();
00718                         i != parser->namespace_table.end(); ++i) {
00719                     parser->other_xml.append(" xmlns");
00720                     if (!i->first.empty()) {
00721                         parser->other_xml.append(":");
00722                         parser->other_xml.append(i->first);
00723                     }
00724                     parser->other_xml.append("=\"");
00725                     parser->other_xml.append(i->second);
00726                     parser->other_xml.append("\"");
00727                 }
00728             }
00729 
00730             if (nb_attributes != 0) {
00731                 parser->transfer_xml_attrs(attributes, nb_attributes);
00732                 for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
00733                     parser->other_xml.append(" ");
00734                     if (!i->second.prefix.empty()) {
00735                         parser->other_xml.append(i->second.prefix);
00736                         parser->other_xml.append(":");
00737                     }
00738                     parser->other_xml.append(i->first);
00739                     parser->other_xml.append("=\"");
00740                     parser->other_xml.append(i->second.value);
00741                     parser->other_xml.append("\"");
00742                 }
00743             }
00744 
00745             parser->other_xml.append(">");
00746             break;
00747 
00748         case inside_enum_def:
00749             // process an EnumConst element
00750             if (parser->process_enum_const(localname, attributes, nb_attributes))
00751                 parser->push_state(inside_enum_const);
00752             else
00753                 dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
00754             break;
00755 
00756         case inside_enum_const:
00757             // No content; nothing to do
00758             break;
00759 
00760         case inside_dim_def:
00761             // No content; nothing to do
00762             break;
00763 #if 0
00764         case inside_dimension:
00765             // No content.
00766             break;
00767 #endif
00768         case inside_dim:
00769             // No content.
00770             break;
00771 
00772         case inside_map:
00773             // No content.
00774             break;
00775 
00776         case inside_simple_type:
00777             if (parser->process_attribute(localname, attributes, nb_attributes))
00778                 break;
00779             else if (parser->process_dimension(localname, attributes, nb_attributes))
00780                 parser->push_state(inside_dim);
00781             else if (parser->process_map(localname, attributes, nb_attributes))
00782                 parser->push_state(inside_map);
00783             else
00784                 dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
00785             break;
00786 
00787         case inside_constructor:
00788             if (parser->process_variable(localname, attributes, nb_attributes))
00789                 // This will push either inside_simple_type or inside_structure
00790                 // onto the parser state stack.
00791                 break;
00792             else if (parser->process_attribute(localname, attributes, nb_attributes))
00793                 break;
00794             else if (parser->process_dimension(localname, attributes, nb_attributes))
00795                 parser->push_state(inside_dim);
00796             else if (parser->process_map(localname, attributes, nb_attributes))
00797                 parser->push_state(inside_map);
00798             else
00799                 D4ParserSax2::dmr_error(parser, "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
00800             break;
00801 
00802         case parser_unknown:
00803             // FIXME?
00804             // *** Never used? If so remove/error
00805             parser->push_state(parser_unknown);
00806             break;
00807 
00808         case parser_error:
00809         case parser_fatal_error:
00810             break;
00811 
00812         case parser_end:
00813             // FIXME Error?
00814             break;
00815     }
00816 
00817     if (parser->debug()) cerr << "Start element exit state: " << states[parser->get_state()] << endl;
00818 }
00819 
00820 void D4ParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI)
00821 {
00822     D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
00823     const char *localname = (const char *) l;
00824 
00825     if (parser->debug())
00826         cerr << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
00827 
00828     switch (parser->get_state()) {
00829     case parser_start:
00830         dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname);
00831         break;
00832 
00833     case inside_dataset:
00834         if (is_not(localname, "Dataset"))
00835             D4ParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname);
00836 
00837         parser->pop_state();
00838         if (parser->get_state() != parser_start)
00839             dmr_fatal_error(parser, "Unexpected state, expected start state.");
00840         else {
00841             parser->pop_state();
00842             parser->push_state(parser_end);
00843         }
00844         break;
00845 
00846     case inside_group: {
00847         if (is_not(localname, "Group"))
00848             D4ParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname);
00849 
00850         if (!parser->empty_basetype() || parser->empty_group())
00851             D4ParserSax2::dmr_error(parser,
00852                     "The document did not contain a valid root Group or contained unbalanced tags.");
00853 
00854         parser->pop_group();
00855         parser->pop_state();
00856         break;
00857     }
00858 
00859     case inside_attribute_container:
00860         if (is_not(localname, "Attribute"))
00861             D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
00862 
00863         parser->pop_state();
00864         parser->pop_attributes();
00865         break;
00866 
00867     case inside_attribute:
00868         if (is_not(localname, "Attribute"))
00869             D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
00870 
00871         parser->pop_state();
00872         break;
00873 
00874     case inside_attribute_value: {
00875         if (is_not(localname, "Value"))
00876             D4ParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
00877 
00878         parser->pop_state();
00879 
00880         // The old code added more values using the name and type as
00881         // indexes to find the correct attribute. Use get() for that
00882         // now. Or fix this code to keep a pointer to the to attribute...
00883         D4Attributes *attrs = parser->top_attributes();
00884         D4Attribute *attr = attrs->get(parser->dods_attr_name);
00885         if (!attr) {
00886             attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
00887             attrs->add_attribute_nocopy(attr);
00888         }
00889         attr->add_value(parser->char_data);
00890 
00891         parser->char_data = ""; // Null this after use.
00892         break;
00893     }
00894 
00895     case inside_other_xml_attribute: {
00896         if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) {
00897             parser->pop_state();
00898 
00899             // The old code added more values using the name and type as
00900             // indexes to find the correct attribute. Use get() for that
00901             // now. Or fix this code to keep a pointer to the to attribute...
00902             D4Attributes *attrs = parser->top_attributes();
00903             D4Attribute *attr = attrs->get(parser->dods_attr_name);
00904             if (!attr) {
00905                 attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
00906                 attrs->add_attribute_nocopy(attr);
00907             }
00908             attr->add_value(parser->other_xml);
00909 
00910             parser->other_xml = ""; // Null this after use.
00911         }
00912         else {
00913             if (parser->other_xml_depth == 0) {
00914                 D4ParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'",
00915                         localname);
00916                 break;
00917             }
00918             parser->other_xml_depth--;
00919 
00920             parser->other_xml.append("</");
00921             if (prefix) {
00922                 parser->other_xml.append((const char *) prefix);
00923                 parser->other_xml.append(":");
00924             }
00925             parser->other_xml.append(localname);
00926             parser->other_xml.append(">");
00927         }
00928         break;
00929     }
00930 
00931     case inside_enum_def:
00932         if (is_not(localname, "Enumeration"))
00933             D4ParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
00934         if (!parser->top_group())
00935             D4ParserSax2::dmr_fatal_error(parser,
00936                     "Expected a Group to be the current item, while finishing up an Enumeration.");
00937         else {
00938             // copy the pointer; not a deep copy
00939             parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
00940             // Set the enum_def to null; next call to enum_def() will
00941             // allocate a new object
00942             parser->clear_enum_def();
00943             parser->pop_state();
00944         }
00945         break;
00946 
00947     case inside_enum_const:
00948         if (is_not(localname, "EnumConst"))
00949             D4ParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
00950 
00951         parser->pop_state();
00952         break;
00953 
00954     case inside_dim_def: {
00955         if (is_not(localname, "Dimension"))
00956             D4ParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
00957 
00958         if (!parser->top_group())
00959             D4ParserSax2::dmr_error(parser,
00960                     "Expected a Group to be the current item, while finishing up an Dimension.");
00961 
00962         // FIXME Use the Group on the top of the group stack
00963         // copy the pointer; not a deep copy
00964         parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
00965         //parser->dmr()->root()->dims()->add_dim_nocopy(parser->dim_def());
00966         // Set the dim_def to null; next call to dim_def() will
00967         // allocate a new object. Calling 'clear' is important because
00968         // the cleanup method will free dim_def if it's not null and
00969         // we just copied the pointer in the add_dim_nocopy() call
00970         // above.
00971         parser->clear_dim_def();
00972         parser->pop_state();
00973         break;
00974     }
00975 
00976     case inside_simple_type:
00977         if (is_simple_type(get_type(localname))) {
00978             BaseType *btp = parser->top_basetype();
00979             parser->pop_basetype();
00980             parser->pop_attributes();
00981 
00982             BaseType *parent = 0;
00983             if (!parser->empty_basetype())
00984                 parent = parser->top_basetype();
00985             else if (!parser->empty_group())
00986                 parent = parser->top_group();
00987             else {
00988                 dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
00989                         localname);
00990                 delete btp;
00991                 parser->pop_state();
00992                 break;
00993             }
00994 
00995             if (parent->type() == dods_array_c)
00996                 static_cast<Array*>(parent)->prototype()->add_var_nocopy(btp);
00997             else
00998                 parent->add_var_nocopy(btp);
00999         }
01000         else
01001             D4ParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
01002 
01003         parser->pop_state();
01004         break;
01005 
01006     case inside_dim:
01007         if (is_not(localname, "Dim"))
01008             D4ParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname);
01009 
01010         parser->pop_state();
01011         break;
01012 
01013     case inside_map:
01014         if (is_not(localname, "Map"))
01015             D4ParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname);
01016 
01017         parser->pop_state();
01018         break;
01019 
01020     case inside_constructor: {
01021         if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) {
01022             D4ParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname);
01023             return;
01024         }
01025 
01026         BaseType *btp = parser->top_basetype();
01027         parser->pop_basetype();
01028         parser->pop_attributes();
01029 
01030         BaseType *parent = 0;
01031         if (!parser->empty_basetype())
01032             parent = parser->top_basetype();
01033         else if (!parser->empty_group())
01034             parent = parser->top_group();
01035         else {
01036             dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
01037                     localname);
01038             delete btp;
01039             parser->pop_state();
01040             break;
01041         }
01042 
01043         // TODO Why doesn't this code mirror the simple_var case and test
01044         // for the parent being an array? jhrg 10/13/13
01045         parent->add_var_nocopy(btp);
01046         parser->pop_state();
01047         break;
01048     }
01049 
01050     case parser_unknown:
01051         parser->pop_state();
01052         break;
01053 
01054     case parser_error:
01055     case parser_fatal_error:
01056         break;
01057 
01058     case parser_end:
01059         // FIXME Error?
01060         break;
01061     }
01062 
01063     if (parser->debug()) cerr << "End element exit state: " << states[parser->get_state()] << endl;
01064 }
01065 
01069 void D4ParserSax2::dmr_get_characters(void * p, const xmlChar * ch, int len)
01070 {
01071     D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
01072 
01073     switch (parser->get_state()) {
01074         case inside_attribute_value:
01075             parser->char_data.append((const char *) (ch), len);
01076             DBG(cerr << "Characters: '" << parser->char_data << "'" << endl);
01077             break;
01078 
01079         case inside_other_xml_attribute:
01080             parser->other_xml.append((const char *) (ch), len);
01081             DBG(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
01082             break;
01083 
01084         default:
01085             break;
01086     }
01087 }
01088 
01093 void D4ParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len)
01094 {
01095     D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
01096 
01097     switch (parser->get_state()) {
01098         case inside_other_xml_attribute:
01099             parser->other_xml.append((const char *) (ch), len);
01100             break;
01101 
01102         default:
01103             break;
01104     }
01105 }
01106 
01112 void D4ParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len)
01113 {
01114     D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
01115 
01116     switch (parser->get_state()) {
01117         case inside_other_xml_attribute:
01118             parser->other_xml.append((const char *) (value), len);
01119             break;
01120 
01121         case parser_unknown:
01122             break;
01123 
01124         default:
01125             D4ParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4.");
01126 
01127             break;
01128     }
01129 }
01130 
01135 xmlEntityPtr D4ParserSax2::dmr_get_entity(void *, const xmlChar * name)
01136 {
01137     return xmlGetPredefinedEntity(name);
01138 }
01139 
01150 void D4ParserSax2::dmr_fatal_error(void * p, const char *msg, ...)
01151 {
01152     va_list args;
01153     D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
01154 
01155     parser->push_state(parser_fatal_error);
01156 
01157     va_start(args, msg);
01158     char str[1024];
01159     vsnprintf(str, 1024, msg, args);
01160     va_end(args);
01161 
01162     int line = xmlSAX2GetLineNumber(parser->context);
01163 
01164     if (!parser->error_msg.empty()) parser->error_msg += "\n";
01165     parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
01166 }
01167 
01168 void D4ParserSax2::dmr_error(void *p, const char *msg, ...)
01169 {
01170     va_list args;
01171     D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
01172 
01173     parser->push_state(parser_error);
01174 
01175     va_start(args, msg);
01176     char str[1024];
01177     vsnprintf(str, 1024, msg, args);
01178     va_end(args);
01179 
01180     int line = xmlSAX2GetLineNumber(parser->context);
01181 
01182     if (!parser->error_msg.empty()) parser->error_msg += "\n";
01183     parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
01184 }
01186 
01190 void D4ParserSax2::cleanup_parse()
01191 {
01192     bool wellFormed = context->wellFormed;
01193     bool valid = context->valid;
01194 
01195     context->sax = NULL;
01196     xmlFreeParserCtxt(context);
01197 
01198     delete d_enum_def;
01199     d_enum_def = 0;
01200 
01201     delete d_dim_def;
01202     d_dim_def = 0;
01203 
01204     // If there's an error, there may still be items on the stack at the
01205     // end of the parse.
01206     while (!btp_stack.empty()) {
01207         delete top_basetype();
01208         pop_basetype();
01209     }
01210 
01211     if (!wellFormed)
01212         throw Error("The DMR was not well formed. " + error_msg);
01213     else if (!valid)
01214         throw Error("The DMR was not valid." + error_msg);
01215     else if (get_state() == parser_error)
01216         throw Error(error_msg);
01217     else if (get_state() == parser_fatal_error)
01218         throw InternalErr(error_msg);
01219 }
01220 
01235 void D4ParserSax2::intern(istream &f, DMR *dest_dmr, bool debug)
01236 {
01237     d_debug = debug;
01238 
01239     // Code example from libxml2 docs re: read from a stream.
01240 
01241     if (!f.good())
01242         throw Error("Input stream not open or read error");
01243     if (!dest_dmr)
01244         throw InternalErr(__FILE__, __LINE__, "DMR object is null");
01245 
01246     d_dmr = dest_dmr; // dump values here
01247 
01248     const int size = 1024;
01249     char chars[size];
01250     int line = 1;
01251 
01252     f.getline(chars, size);
01253     int res = f.gcount();
01254     if (res == 0) throw Error("No input found while parsing the DMR.");
01255 
01256     if (debug) cerr << "line: (" << line++ << "): " << chars << endl;
01257 
01258     context = xmlCreatePushParserCtxt(&ddx_sax_parser, this, chars, res - 1, "stream");
01259     context->validate = true;
01260     push_state(parser_start);
01261 
01262     f.getline(chars, size);
01263     while ((f.gcount() > 0) && (get_state() != parser_end)) {
01264         if (debug) cerr << "line: (" << line++ << "): " << chars << endl;
01265         xmlParseChunk(context, chars, f.gcount() - 1, 0);
01266         f.getline(chars, size);
01267     }
01268 
01269     // This call ends the parse.
01270     xmlParseChunk(context, chars, 0, 1/*terminate*/);
01271 
01272     // This checks that the state on the parser stack is parser_end and throws
01273     // an exception if it's not (i.e., the loop exited with gcount() == 0).
01274     cleanup_parse();
01275 }
01276 
01287 void D4ParserSax2::intern(const string &document, DMR *dest_dmr, bool debug)
01288 {
01289     intern(document.c_str(), document.length(), dest_dmr, debug);
01290 }
01291 
01302 void D4ParserSax2::intern(const char *buffer, int size, DMR *dest_dmr, bool debug)
01303 {
01304     if (!(size > 0)) return;
01305 
01306     d_debug = debug;
01307 
01308     // Code example from libxml2 docs re: read from a stream.
01309 
01310     if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null");
01311     d_dmr = dest_dmr; // dump values in dest_dmr
01312 
01313     push_state(parser_start);
01314     context = xmlCreatePushParserCtxt(&ddx_sax_parser, this, buffer, size, "stream");
01315     context->validate = true;
01316     //push_state(parser_start);
01317     //xmlParseChunk(context, buffer, size, 0);
01318 
01319     // This call ends the parse.
01320     xmlParseChunk(context, buffer, 0, 1/*terminate*/);
01321 
01322     // This checks that the state on the parser stack is parser_end and throws
01323     // an exception if it's not (i.e., the loop exited with gcount() == 0).
01324     cleanup_parse();
01325 }
01326 
01327 } // namespace libdap