libdap  Updated for version 3.17.0
DDXParserSAX2.cc
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 #include "config.h"
00027 
00028 //#define DODS_DEBUG 1
00029 //#define DODS_DEBUG2 1
00030 
00031 #include <cstring>
00032 #include <cstdarg>
00033 
00034 #include "BaseType.h"
00035 #include "Byte.h"
00036 #include "Int16.h"
00037 #include "UInt16.h"
00038 #include "Int32.h"
00039 #include "UInt32.h"
00040 #include "Float32.h"
00041 #include "Float64.h"
00042 #include "Str.h"
00043 #include "Url.h"
00044 #include "Array.h"
00045 #include "Structure.h"
00046 #include "Sequence.h"
00047 #include "Grid.h"
00048 
00049 #include "DDXParserSAX2.h"
00050 
00051 #include "util.h"
00052 #include "mime_util.h"
00053 #include "debug.h"
00054 
00055 namespace libdap {
00056 
00057 #if defined(DODS_DEBUG) || defined(DODS_DEUG2)
00058 static const char *states[] =
00059     {
00060         "start",
00061 
00062         "dataset",
00063 
00064         "attribute_container",
00065         "attribute",
00066         "attribute_value",
00067         "other_xml_attribute",
00068 
00069         "alias",
00070 
00071         "simple_type",
00072 
00073         "array",
00074         "dimension",
00075 
00076         "grid",
00077         "map",
00078 
00079         "structure",
00080         "sequence",
00081 
00082         "blob href",
00083 
00084         "unknown",
00085         "error"
00086     };
00087 #endif
00088 // Glue the BaseTypeFactory to the enum-based factory defined statically
00089 // here.
00090 
00091 BaseType *DDXParser::factory(Type t, const string & name)
00092 {
00093     switch (t) {
00094     case dods_byte_c:
00095         return d_factory->NewByte(name);
00096         break;
00097 
00098     case dods_int16_c:
00099         return d_factory->NewInt16(name);
00100         break;
00101 
00102     case dods_uint16_c:
00103         return d_factory->NewUInt16(name);
00104         break;
00105 
00106     case dods_int32_c:
00107         return d_factory->NewInt32(name);
00108         break;
00109 
00110     case dods_uint32_c:
00111         return d_factory->NewUInt32(name);
00112         break;
00113 
00114     case dods_float32_c:
00115         return d_factory->NewFloat32(name);
00116         break;
00117 
00118     case dods_float64_c:
00119         return d_factory->NewFloat64(name);
00120         break;
00121 
00122     case dods_str_c:
00123         return d_factory->NewStr(name);
00124         break;
00125 
00126     case dods_url_c:
00127         return d_factory->NewUrl(name);
00128         break;
00129 
00130     case dods_array_c:
00131         return d_factory->NewArray(name);
00132         break;
00133 
00134     case dods_structure_c:
00135         return d_factory->NewStructure(name);
00136         break;
00137 
00138     case dods_sequence_c:
00139         return d_factory->NewSequence(name);
00140         break;
00141 
00142     case dods_grid_c:
00143         return d_factory->NewGrid(name);
00144         break;
00145 
00146     default:
00147         return 0;
00148     }
00149 }
00150 
00151 #if 0
00152 
00153 static Type get_type(const char *name)
00154 {
00155     if (strcmp(name, "Byte") == 0)
00156         return dods_byte_c;
00157 
00158     if (strcmp(name, "Int16") == 0)
00159         return dods_int16_c;
00160 
00161     if (strcmp(name, "UInt16") == 0)
00162         return dods_uint16_c;
00163 
00164     if (strcmp(name, "Int32") == 0)
00165         return dods_int32_c;
00166 
00167     if (strcmp(name, "UInt32") == 0)
00168         return dods_uint32_c;
00169 
00170     if (strcmp(name, "Float32") == 0)
00171         return dods_float32_c;
00172 
00173     if (strcmp(name, "Float64") == 0)
00174         return dods_float64_c;
00175 
00176     if (strcmp(name, "String") == 0)
00177         return dods_str_c;
00178 
00179     if (strcmp(name, "Url") == 0)
00180         return dods_url_c;
00181 
00182     if (strcmp(name, "Array") == 0)
00183         return dods_array_c;
00184 
00185     if (strcmp(name, "Structure") == 0)
00186         return dods_structure_c;
00187 
00188     if (strcmp(name, "Sequence") == 0)
00189         return dods_sequence_c;
00190 
00191     if (strcmp(name, "Grid") == 0)
00192         return dods_grid_c;
00193 
00194     return dods_null_c;
00195 }
00196 #endif
00197 
00198 #if 0
00199 // Not used. jhrg 1/17/13
00200 static Type is_simple_type(const char *name)
00201 {
00202     Type t = get_type(name);
00203     switch (t) {
00204     case dods_byte_c:
00205     case dods_int16_c:
00206     case dods_uint16_c:
00207     case dods_int32_c:
00208     case dods_uint32_c:
00209     case dods_float32_c:
00210     case dods_float64_c:
00211     case dods_str_c:
00212     case dods_url_c:
00213         return t;
00214     default:
00215         return dods_null_c;
00216     }
00217 }
00218 #endif
00219 
00220 static bool is_not(const char *name, const char *tag)
00221 {
00222     return strcmp(name, tag) != 0;
00223 }
00224 
00225 void DDXParser::set_state(DDXParser::ParseState state)
00226 {
00227     s.push(state);
00228 }
00229 
00230 DDXParser::ParseState DDXParser::get_state() const
00231 {
00232     return s.top();
00233 }
00234 
00235 void DDXParser::pop_state()
00236 {
00237     s.pop();
00238 }
00239 
00243 void DDXParser::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
00244 {
00245     if (!attribute_table.empty())
00246         attribute_table.clear(); // erase old attributes
00247 
00248     unsigned int index = 0;
00249     for (int i = 0; i < nb_attributes; ++i, index += 5) {
00250         // Make a value using the attribute name and the prefix, namespace URI
00251         // and the value. The prefix might be null.
00252         attribute_table.insert(map<string, XMLAttribute>::value_type(
00253                 string((const char *)attributes[index]),
00254                 XMLAttribute(attributes + index + 1)));
00255 
00256         DBG(cerr << "Attribute '" << (const char *)attributes[index] << "': "
00257                 << attribute_table[(const char *)attributes[index]].value << endl);
00258     }
00259 }
00260 
00261 void DDXParser::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
00262 {
00263     for (int i = 0; i < nb_namespaces; ++i ) {
00264         // make a value with the prefix and namespace URI. The prefix might be
00265         // null.
00266         namespace_table.insert(map<string,string>::value_type(
00267                 namespaces[i*2] != 0 ? (const char *)namespaces[i*2] : "",
00268                 (const char *)namespaces[i*2+1]));
00269     }
00270 }
00271 
00276 bool DDXParser::check_required_attribute(const string & attr)
00277 {
00278     map < string, XMLAttribute >::iterator i = attribute_table.find(attr);
00279     if (i == attribute_table.end())
00280         ddx_fatal_error(this, "Required attribute '%s' not found.",
00281                         attr.c_str());
00282     return true;
00283 }
00284 
00290 bool DDXParser::check_attribute(const string & attr)
00291 {
00292     return (attribute_table.find(attr) != attribute_table.end());
00293 }
00294 
00303 void DDXParser::process_attribute_element(const xmlChar **attrs, int nb_attributes)
00304 {
00305     // These methods set the state to parser_error if a problem is found.
00306     transfer_xml_attrs(attrs, nb_attributes);
00307 
00308     bool error = !(check_required_attribute(string("name"))
00309                    && check_required_attribute(string("type")));
00310     if (error)
00311         return;
00312 
00313     if (attribute_table["type"].value == "Container") {
00314         set_state(inside_attribute_container);
00315 
00316         AttrTable *child;
00317         AttrTable *parent = at_stack.top();
00318 
00319         child = parent->append_container(attribute_table["name"].value);
00320         at_stack.push(child);   // save.
00321         DBG2(cerr << "Pushing at" << endl);
00322     }
00323     else if (attribute_table["type"].value == "OtherXML") {
00324         set_state(inside_other_xml_attribute);
00325 
00326         dods_attr_name = attribute_table["name"].value;
00327         dods_attr_type = attribute_table["type"].value;
00328     }
00329     else {
00330         set_state(inside_attribute);
00331         // *** Modify parser. Add a special state for inside OtherXML since it
00332         // does not use the <value> element.
00333 
00334         dods_attr_name = attribute_table["name"].value;
00335         dods_attr_type = attribute_table["type"].value;
00336     }
00337 }
00338 
00342 void DDXParser::process_attribute_alias(const xmlChar **attrs, int nb_attributes)
00343 {
00344     transfer_xml_attrs(attrs, nb_attributes);
00345     if (check_required_attribute(string("name"))
00346         && check_required_attribute(string("attribute"))) {
00347         set_state(inside_alias);
00348         at_stack.top()->attr_alias(attribute_table["name"].value,
00349                                    attribute_table["attribute"].value);
00350     }
00351 }
00352 
00360 void DDXParser::process_variable(Type t, ParseState s, const xmlChar **attrs,
00361         int nb_attributes)
00362 {
00363     transfer_xml_attrs(attrs, nb_attributes);
00364 
00365     set_state(s);
00366 
00367     if (bt_stack.top()->type() == dods_array_c
00368             || check_required_attribute("name")) { // throws on error/false
00369         BaseType *btp = factory(t, attribute_table["name"].value);
00370         if (!btp) {
00371             ddx_fatal_error(this, "Internal parser error; could not instantiate the variable '%s'.",
00372                 attribute_table["name"].value.c_str());
00373         }
00374         else {
00375             // Only run this code if btp is not null! jhrg 9/14/15
00376             // Once we make the new variable, we not only load it on to the
00377             // BaseType stack, we also load its AttrTable on the AttrTable stack.
00378             // The attribute processing software always operates on the AttrTable
00379             // at the top of the AttrTable stack (at_stack).
00380             bt_stack.push(btp);
00381             at_stack.push(&btp->get_attr_table());
00382         }
00383     }
00384 }
00385 
00389 void DDXParser::process_dimension(const xmlChar **attrs, int nb_attributes)
00390 {
00391     transfer_xml_attrs(attrs, nb_attributes);
00392     if (check_required_attribute(string("size"))) {
00393         set_state(inside_dimension);
00394         Array *ap = dynamic_cast < Array * >(bt_stack.top());
00395                 if (!ap) {
00396                         ddx_fatal_error(this, "Parse error: Expected an array variable.");
00397                         return;
00398                 }
00399                 
00400         ap->append_dim(atoi(attribute_table["size"].value.c_str()),
00401                        attribute_table["name"].value);
00402     }
00403 }
00404 
00407 void DDXParser::process_blob(const xmlChar **attrs, int nb_attributes)
00408 {
00409     transfer_xml_attrs(attrs, nb_attributes);
00410     if (check_required_attribute(string("href"))) {
00411         set_state(inside_blob_href);
00412         *blob_href = attribute_table["href"].value;
00413     }
00414 }
00415 
00422 inline bool
00423 DDXParser::is_attribute_or_alias(const char *name, const xmlChar **attrs,
00424         int nb_attributes)
00425 {
00426     if (strcmp(name, "Attribute") == 0) {
00427         process_attribute_element(attrs, nb_attributes);
00428         // next state: inside_attribtue or inside_attribute_container
00429         return true;
00430     }
00431     else if (strcmp(name, "Alias") == 0) {
00432         process_attribute_alias(attrs, nb_attributes);
00433         // next state: inside_alias
00434         return true;
00435     }
00436 
00437     return false;
00438 }
00439 
00445 inline bool DDXParser::is_variable(const char *name, const xmlChar **attrs,
00446         int nb_attributes)
00447 {
00448     Type t = get_type(name);
00449     //if ((t = is_simple_type(name)) != dods_null_c) {
00450     if (is_simple_type(t)) {
00451         process_variable(t, inside_simple_type, attrs, nb_attributes);
00452         return true;
00453     }
00454     else if (strcmp(name, "Array") == 0) {
00455         process_variable(dods_array_c, inside_array, attrs, nb_attributes);
00456         return true;
00457     }
00458     else if (strcmp(name, "Structure") == 0) {
00459         process_variable(dods_structure_c, inside_structure, attrs, nb_attributes);
00460         return true;
00461     }
00462     else if (strcmp(name, "Sequence") == 0) {
00463         process_variable(dods_sequence_c, inside_sequence, attrs, nb_attributes);
00464         return true;
00465     }
00466     else if (strcmp(name, "Grid") == 0) {
00467         process_variable(dods_grid_c, inside_grid, attrs, nb_attributes);
00468         return true;
00469     }
00470 
00471     return false;
00472 }
00473 
00474 void DDXParser::finish_variable(const char *tag, Type t, const char *expected)
00475 {
00476     if (strcmp(tag, expected) != 0) {
00477         DDXParser::ddx_fatal_error(this,
00478                                    "Expected an end tag for a %s; found '%s' instead.",
00479                                    expected, tag);
00480         return;
00481     }
00482 
00483     pop_state();
00484 
00485     BaseType *btp = bt_stack.top();
00486 
00487     bt_stack.pop();
00488     at_stack.pop();
00489 
00490     if (btp->type() != t) {
00491         DDXParser::ddx_fatal_error(this,
00492                                    "Internal error: Expected a %s variable.",
00493                                    expected);
00494         delete btp;
00495         return;
00496     }
00497     // Once libxml2 validates, this can go away. 05/30/03 jhrg
00498     if (t == dods_array_c
00499         && static_cast<Array*>(btp)->dimensions() == 0) {
00500         DDXParser::ddx_fatal_error(this,
00501                                    "No dimension element included in the Array '%s'.",
00502                                    btp->name().c_str());
00503         delete btp;
00504         return;
00505     }
00506 
00507     BaseType *parent = bt_stack.top();
00508 
00509     if (!(parent->is_vector_type() || parent->is_constructor_type())) {
00510         DDXParser::ddx_fatal_error(this,
00511                                    "Tried to add the array variable '%s' to a non-constructor type (%s %s).",
00512                                    tag,
00513                                    bt_stack.top()->type_name().c_str(),
00514                                    bt_stack.top()->name().c_str());
00515         delete btp;
00516         return;
00517     }
00518 
00519     parent->add_var_nocopy(btp);
00520 }
00521 
00528 
00533 void DDXParser::ddx_start_document(void * p)
00534 {
00535     DDXParser *parser = static_cast<DDXParser*>(p);
00536     parser->error_msg = "";
00537     parser->char_data = "";
00538 
00539     // init attr table stack.
00540     parser->at_stack.push(&parser->dds->get_attr_table());
00541 
00542     // Trick; DDS *should* be a child of Structure. To simplify parsing,
00543     // stuff a Structure on the bt_stack and dump the top level variables
00544     // there. Once we're done, transfer the variables to the DDS.
00545     parser->bt_stack.push(new Structure("dummy_dds"));
00546 
00547     parser->set_state(parser_start);
00548 
00549     DBG2(cerr << "Parser state: " << states[parser->get_state()] << endl);
00550 }
00551 
00554 void DDXParser::ddx_end_document(void * p)
00555 {
00556     DDXParser *parser = static_cast<DDXParser*>(p);
00557     DBG2(cerr << "Ending state == " << states[parser->get_state()] <<
00558          endl);
00559 
00560     if (parser->get_state() != parser_start)
00561         DDXParser::ddx_fatal_error(parser, "The document contained unbalanced tags.");
00562 
00563     // If we've found any sort of error, don't make the DDX; intern() will
00564     // take care of the error.
00565     if (parser->get_state() == parser_error) {
00566         return;
00567     }
00568 
00569     // Pop the temporary Structure off the stack and transfer its variables
00570     // to the DDS.
00571     Constructor *cp = dynamic_cast < Constructor * >(parser->bt_stack.top());
00572     if (!cp) {
00573         delete parser->bt_stack.top();
00574         parser->bt_stack.pop();
00575         ddx_fatal_error(parser, "Parse error: Expected a Structure, Sequence or Grid variable.");
00576                 return;
00577     }
00578 
00579     for (Constructor::Vars_iter i = cp->var_begin(); i != cp->var_end(); ++i) {
00580         (*i)->set_parent(0);        // top-level vars have no parents
00581         parser->dds->add_var(*i);
00582     }
00583 
00584     delete parser->bt_stack.top();
00585     parser->bt_stack.pop();
00586 }
00587 
00588 void DDXParser::ddx_sax2_start_element(void *p,
00589         const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
00590         int nb_namespaces, const xmlChar **namespaces,
00591         int nb_attributes, int /*nb_defaulted*/, const xmlChar **attributes)
00592 {
00593     DDXParser *parser = static_cast<DDXParser*>(p);
00594     const char *localname = (const char *)l;
00595 
00596     DBG2(cerr << "start element: " << localname << ", states: "
00597          << states[parser->get_state()]);
00598 
00599     switch (parser->get_state()) {
00600     case parser_start:
00601         if (strcmp(localname, "Dataset") == 0) {
00602             parser->set_state(inside_dataset);
00603             parser->root_ns = URI != 0 ? (const char *)URI: "";
00604             parser->transfer_xml_attrs(attributes, nb_attributes);
00605 
00606             if (parser->check_required_attribute(string("name")))
00607                 parser->dds->set_dataset_name(parser->attribute_table["name"].value);
00608 
00609             if (parser->check_attribute("dapVersion"))
00610                 parser->dds->set_dap_version(parser->attribute_table["dapVersion"].value);
00611         }
00612         else
00613             DDXParser::ddx_fatal_error(parser,
00614                                        "Expected response to start with a Dataset element; found '%s' instead.",
00615                                        localname);
00616         break;
00617 
00618     case inside_dataset:
00619         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
00620             break;
00621         else if (parser->is_variable(localname, attributes, nb_attributes))
00622             break;
00623         else if (strcmp(localname, "blob") == 0 || strcmp(localname, "dataBLOB") == 0) {
00624             parser->process_blob(attributes, nb_attributes);
00625             // next state: inside_data_blob
00626         }
00627         else
00628             DDXParser::ddx_fatal_error(parser,
00629                                        "Expected an Attribute, Alias or variable element; found '%s' instead.",
00630                                        localname);
00631         break;
00632 
00633     case inside_attribute_container:
00634         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
00635             break;
00636         else
00637             DDXParser::ddx_fatal_error(parser,
00638                                        "Expected an Attribute or Alias element; found '%s' instead.",
00639                                        localname);
00640         break;
00641 
00642     case inside_attribute:
00643         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
00644             break;
00645         else if (strcmp(localname, "value") == 0)
00646             parser->set_state(inside_attribute_value);
00647         else
00648             ddx_fatal_error(parser,
00649                             "Expected an 'Attribute', 'Alias' or 'value' element; found '%s' instead.",
00650                             localname);
00651         break;
00652 
00653     case inside_attribute_value:
00654         ddx_fatal_error(parser,
00655                         "Internal parser error; unexpected state, inside value while processing element '%s'.",
00656                         localname);
00657         break;
00658 
00659     case inside_other_xml_attribute:
00660         DBGN(cerr << endl << "\t inside_other_xml_attribute: " << localname << endl);
00661 
00662         parser->other_xml_depth++;
00663 
00664         // Accumulate the elements here
00665 
00666         parser->other_xml.append("<");
00667         if (prefix) {
00668             parser->other_xml.append((const char *)prefix);
00669             parser->other_xml.append(":");
00670         }
00671         parser->other_xml.append(localname);
00672 
00673         if (nb_namespaces != 0) {
00674             parser->transfer_xml_ns(namespaces, nb_namespaces);
00675 
00676             for (map<string,string>::iterator i = parser->namespace_table.begin();
00677                 i != parser->namespace_table.end();
00678                 ++i) {
00679                 parser->other_xml.append(" xmlns");
00680                 if (!i->first.empty()) {
00681                     parser->other_xml.append(":");
00682                     parser->other_xml.append(i->first);
00683                 }
00684                 parser->other_xml.append("=\"");
00685                 parser->other_xml.append(i->second);
00686                 parser->other_xml.append("\"");
00687             }
00688         }
00689 
00690         if (nb_attributes != 0) {
00691             parser->transfer_xml_attrs(attributes, nb_attributes);
00692             for (XMLAttrMap::iterator i = parser->attr_table_begin();
00693                 i != parser->attr_table_end();
00694                 ++i) {
00695                 parser->other_xml.append(" ");
00696                 if (!i->second.prefix.empty()) {
00697                     parser->other_xml.append(i->second.prefix);
00698                     parser->other_xml.append(":");
00699                 }
00700                 parser->other_xml.append(i->first);
00701                 parser->other_xml.append("=\"");
00702                 parser->other_xml.append(i->second.value);
00703                 parser->other_xml.append("\"");
00704             }
00705         }
00706 
00707         parser->other_xml.append(">");
00708         break;
00709 
00710     case inside_alias:
00711         ddx_fatal_error(parser,
00712                         "Internal parser error; unexpected state, inside alias while processing element '%s'.",
00713                         localname);
00714         break;
00715 
00716     case inside_simple_type:
00717         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
00718             break;
00719         else
00720             ddx_fatal_error(parser,
00721                             "Expected an 'Attribute' or 'Alias' element; found '%s' instead.",
00722                             localname);
00723         break;
00724 
00725     case inside_array:
00726         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
00727             break;
00728         else if (is_not(localname, "Array")
00729                 && parser->is_variable(localname, attributes, nb_attributes))
00730             break;
00731         else if (strcmp(localname, "dimension") == 0) {
00732             parser->process_dimension(attributes, nb_attributes);
00733             // next state: inside_dimension
00734         }
00735         else
00736             ddx_fatal_error(parser,
00737                             "Expected an 'Attribute' or 'Alias' element; found '%s' instead.",
00738                             localname);
00739         break;
00740 
00741     case inside_dimension:
00742         ddx_fatal_error(parser,
00743                         "Internal parser error; unexpected state, inside dimension while processing element '%s'.",
00744                         localname);
00745         break;
00746 
00747     case inside_structure:
00748         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
00749             break;
00750         else if (parser->is_variable(localname, attributes, nb_attributes))
00751             break;
00752         else
00753             DDXParser::ddx_fatal_error(parser,
00754                                        "Expected an Attribute, Alias or variable element; found '%s' instead.",
00755                                        localname);
00756         break;
00757 
00758     case inside_sequence:
00759         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
00760             break;
00761         else if (parser->is_variable(localname, attributes, nb_attributes))
00762             break;
00763         else
00764             DDXParser::ddx_fatal_error(parser,
00765                                        "Expected an Attribute, Alias or variable element; found '%s' instead.",
00766                                        localname);
00767         break;
00768 
00769     case inside_grid:
00770         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
00771             break;
00772         else if (strcmp(localname, "Array") == 0)
00773             parser->process_variable(dods_array_c, inside_array, attributes, nb_attributes);
00774         else if (strcmp(localname, "Map") == 0)
00775             parser->process_variable(dods_array_c, inside_map, attributes, nb_attributes);
00776         else
00777             DDXParser::ddx_fatal_error(parser,
00778                                        "Expected an Attribute, Alias or variable element; found '%s' instead.",
00779                                        localname);
00780         break;
00781 
00782     case inside_map:
00783         if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
00784             break;
00785         else if (is_not(localname, "Array") && is_not(localname, "Sequence")
00786                  && is_not(localname, "Grid")
00787                  && parser->is_variable(localname, attributes, nb_attributes))
00788             break;
00789         else if (strcmp(localname, "dimension") == 0) {
00790             parser->process_dimension(attributes, nb_attributes);
00791             // next state: inside_dimension
00792         }
00793         else
00794             ddx_fatal_error(parser,
00795                             "Expected an 'Attribute', 'Alias', variable or 'dimension' element; found '%s' instead.",
00796                             localname);
00797         break;
00798 
00799     case inside_blob_href:
00800         ddx_fatal_error(parser,
00801                         "Internal parser error; unexpected state, inside blob href while processing element '%s'.",
00802                         localname);
00803         break;
00804 
00805     case parser_unknown:
00806         // *** Never used? If so remove/error
00807         parser->set_state(parser_unknown);
00808         break;
00809 
00810     case parser_error:
00811         break;
00812     }
00813 
00814     DBGN(cerr << " ... " << states[parser->get_state()] << endl);
00815 }
00816 
00817 void DDXParser::ddx_sax2_end_element(void *p, const xmlChar *l,
00818         const xmlChar *prefix, const xmlChar *URI)
00819 {
00820     DDXParser *parser = static_cast<DDXParser*>(p);
00821     const char *localname = (const char *)l;
00822 
00823     DBG2(cerr << "End element " << localname << " (state "
00824          << states[parser->get_state()] << ")" << endl);
00825 
00826     switch (parser->get_state()) {
00827     case parser_start:
00828         ddx_fatal_error(parser,
00829                         "Internal parser error; unexpected state, inside start state while processing element '%s'.",
00830                         localname);
00831         break;
00832 
00833     case inside_dataset:
00834         if (strcmp(localname, "Dataset") == 0)
00835             parser->pop_state();
00836         else
00837             DDXParser::ddx_fatal_error(parser,
00838                                        "Expected an end Dataset tag; found '%s' instead.",
00839                                        localname);
00840         break;
00841 
00842     case inside_attribute_container:
00843         if (strcmp(localname, "Attribute") == 0) {
00844             parser->pop_state();
00845             parser->at_stack.pop();     // pop when leaving a container.
00846         }
00847         else
00848             DDXParser::ddx_fatal_error(parser,
00849                                        "Expected an end Attribute tag; found '%s' instead.",
00850                                        localname);
00851         break;
00852 
00853     case inside_attribute:
00854         if (strcmp(localname, "Attribute") == 0)
00855             parser->pop_state();
00856         else
00857             DDXParser::ddx_fatal_error(parser,
00858                                        "Expected an end Attribute tag; found '%s' instead.",
00859                                        localname);
00860         break;
00861 
00862     case inside_attribute_value:
00863         if (strcmp(localname, "value") == 0) {
00864             parser->pop_state();
00865             AttrTable *atp = parser->at_stack.top();
00866             atp->append_attr(parser->dods_attr_name,
00867                              parser->dods_attr_type, parser->char_data);
00868             parser->char_data = "";     // Null this after use.
00869         }
00870         else
00871             DDXParser::ddx_fatal_error(parser,
00872                                        "Expected an end value tag; found '%s' instead.",
00873                                        localname);
00874 
00875         break;
00876 
00877     case inside_other_xml_attribute: {
00878             if (strcmp(localname, "Attribute") == 0
00879                     && parser->root_ns == (const char *)URI) {
00880 
00881                 DBGN(cerr << endl << "\t Popping the 'inside_other_xml_attribute' state"
00882                         << endl);
00883 
00884                 parser->pop_state();
00885 
00886                 AttrTable *atp = parser->at_stack.top();
00887                 atp->append_attr(parser->dods_attr_name,
00888                         parser->dods_attr_type, parser->other_xml);
00889 
00890                 parser->other_xml = ""; // Null this after use.
00891             }
00892             else {
00893                 DBGN(cerr << endl << "\t inside_other_xml_attribute: " << localname
00894                         << ", depth: " << parser->other_xml_depth << endl);
00895                 if (parser->other_xml_depth == 0)
00896                     DDXParser::ddx_fatal_error(parser,
00897                                                "Expected an OtherXML attribute to end! Instead I found '%s'",
00898                                                localname);
00899                 parser->other_xml_depth--;
00900 
00901                 parser->other_xml.append("</");
00902                 if (prefix) {
00903                     parser->other_xml.append((const char *)prefix);
00904                     parser->other_xml.append(":");
00905                 }
00906                 parser->other_xml.append(localname);
00907                 parser->other_xml.append(">");
00908             }
00909             break;
00910         }
00911         // Alias is busted in libdap++ 05/29/03 jhrg
00912     case inside_alias:
00913         parser->pop_state();
00914         break;
00915 
00916     case inside_simple_type: {
00917         Type t = get_type(localname);
00918         if (is_simple_type(t)) {
00919             parser->pop_state();
00920             BaseType *btp = parser->bt_stack.top();
00921             parser->bt_stack.pop();
00922             parser->at_stack.pop();
00923 
00924             BaseType *parent = parser->bt_stack.top();
00925 
00926             if (parent->is_vector_type() || parent->is_constructor_type()) {
00927                 parent->add_var(btp);
00928                 delete btp;
00929             }
00930             else {
00931                 DDXParser::ddx_fatal_error(parser,
00932                                            "Tried to add the simple-type variable '%s' to a non-constructor type (%s %s).",
00933                                            localname,
00934                                            parser->bt_stack.top()->
00935                                            type_name().c_str(),
00936                                            parser->bt_stack.top()->name().
00937                                            c_str());
00938                 delete btp;
00939             }
00940         }
00941         else {
00942             DDXParser::ddx_fatal_error(parser,
00943                                        "Expected an end tag for a simple type; found '%s' instead.",
00944                                        localname);
00945         }
00946         break;
00947     }
00948 
00949     case inside_array:
00950         parser->finish_variable(localname, dods_array_c, "Array");
00951         break;
00952 
00953     case inside_dimension:
00954         if (strcmp(localname, "dimension") == 0)
00955             parser->pop_state();
00956         else
00957             DDXParser::ddx_fatal_error(parser,
00958                                        "Expected an end dimension tag; found '%s' instead.",
00959                                        localname);
00960         break;
00961 
00962     case inside_structure:
00963         parser->finish_variable(localname, dods_structure_c, "Structure");
00964         break;
00965 
00966     case inside_sequence:
00967         parser->finish_variable(localname, dods_sequence_c, "Sequence");
00968         break;
00969 
00970     case inside_grid:
00971         parser->finish_variable(localname, dods_grid_c, "Grid");
00972         break;
00973 
00974     case inside_map:
00975         parser->finish_variable(localname, dods_array_c, "Map");
00976         break;
00977 
00978     case inside_blob_href:
00979         if (strcmp(localname, "blob") == 0 || strcmp(localname, "dataBLOB") == 0)
00980             parser->pop_state();
00981         else
00982             DDXParser::ddx_fatal_error(parser,
00983                                        "Expected an end dataBLOB/blob tag; found '%s' instead.",
00984                                        localname);
00985         break;
00986 
00987     case parser_unknown:
00988         parser->pop_state();
00989         break;
00990 
00991     case parser_error:
00992         break;
00993     }
00994 
00995 
00996     DBGN(cerr << " ... " << states[parser->get_state()] << endl);
00997 }
00998 
01002 void DDXParser::ddx_get_characters(void * p, const xmlChar * ch, int len)
01003 {
01004     DDXParser *parser = static_cast<DDXParser*>(p);
01005 
01006     switch (parser->get_state()) {
01007         case inside_attribute_value:
01008             parser->char_data.append((const char *)(ch), len);
01009             DBG2(cerr << "Characters: '" << parser->char_data << "'" << endl);
01010             break;
01011 
01012         case inside_other_xml_attribute:
01013             parser->other_xml.append((const char *)(ch), len);
01014             DBG2(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
01015             break;
01016 
01017         default:
01018             break;
01019     }
01020 }
01021 
01026 void DDXParser::ddx_ignoreable_whitespace(void *p, const xmlChar *ch,
01027         int len)
01028 {
01029     DDXParser *parser = static_cast<DDXParser*>(p);
01030 
01031     switch (parser->get_state()) {
01032          case inside_other_xml_attribute:
01033              parser->other_xml.append((const char *)(ch), len);
01034              break;
01035 
01036          default:
01037              break;
01038     }
01039 }
01040 
01046 void DDXParser::ddx_get_cdata(void *p, const xmlChar *value, int len)
01047 {
01048     DDXParser *parser = static_cast<DDXParser*>(p);
01049 
01050     switch (parser->get_state()) {
01051          case inside_other_xml_attribute:
01052              parser->other_xml.append((const char *)(value), len);
01053              break;
01054 
01055          case parser_unknown:
01056              break;
01057 
01058          default:
01059              DDXParser::ddx_fatal_error(parser,
01060                                         "Found a CData block but none are allowed by DAP.");
01061 
01062              break;
01063     }
01064 }
01065 
01070 xmlEntityPtr DDXParser::ddx_get_entity(void *, const xmlChar * name)
01071 {
01072     return xmlGetPredefinedEntity(name);
01073 }
01074 
01082 void DDXParser::ddx_fatal_error(void * p, const char *msg, ...)
01083 {
01084     va_list args;
01085     DDXParser *parser = static_cast<DDXParser*>(p);
01086 
01087     parser->set_state(parser_error);
01088 
01089     va_start(args, msg);
01090     char str[1024];
01091     vsnprintf(str, 1024, msg, args);
01092     va_end(args);
01093 
01094     int line = xmlSAX2GetLineNumber(parser->ctxt);
01095 
01096     parser->error_msg += "At line " + long_to_string(line) + ": ";
01097     parser->error_msg += string(str) + string("\n");
01098 }
01099 
01101 
01102 void DDXParser::cleanup_parse(xmlParserCtxtPtr & context)
01103 {
01104     bool wellFormed = context->wellFormed;
01105     bool valid = context->valid;
01106 
01107     context->sax = NULL;
01108     xmlFreeParserCtxt(context);
01109 
01110     // If there's an error, there may still be items on the stack at the
01111     // end of the parse.
01112     while (!bt_stack.empty()) {
01113         delete bt_stack.top();
01114         bt_stack.pop();
01115     }
01116 
01117     if (!wellFormed) {
01118         throw DDXParseFailed(string("\nThe DDX is not a well formed XML document.\n") + error_msg);
01119     }
01120 
01121     if (!valid) {
01122         throw DDXParseFailed(string("\nThe DDX is not a valid document.\n") + error_msg);
01123     }
01124 
01125     if (get_state() == parser_error) {
01126         throw DDXParseFailed(string("\nError parsing DDX response.\n") + error_msg);
01127     }
01128 }
01129 
01137 void DDXParser::intern_stream(istream &in, DDS *dest_dds, string &cid, const string &boundary)
01138 {
01139     // Code example from libxml2 docs re: read from a stream.
01140     if (!in || in.eof())
01141         throw InternalErr(__FILE__, __LINE__, "Input stream not open or read error");
01142 
01143     const int size = 1024;
01144     char chars[size + 1];
01145 
01146     // int res = fread(chars, 1, 4, in);
01147     in.readsome(chars, 4);
01148     int res = in.gcount();
01149     if (res > 0) {
01150         chars[4]='\0';
01151         xmlParserCtxtPtr context = xmlCreatePushParserCtxt(NULL, NULL, chars, res, "stream");
01152 
01153         ctxt = context;         // need ctxt for error messages
01154         dds = dest_dds;         // dump values here
01155         blob_href = &cid;       // cid goes here
01156 
01157         xmlSAXHandler ddx_sax_parser;
01158         memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
01159 
01160         ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
01161         ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
01162         ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
01163         ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
01164         ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
01165         ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
01166         ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
01167         ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
01168         ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
01169         ddx_sax_parser.initialized = XML_SAX2_MAGIC;
01170         ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
01171         ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
01172 
01173         context->sax = &ddx_sax_parser;
01174         context->userData = this;
01175         context->validate = true;
01176 
01177         in.getline(chars, size);        // chars has size+1 elements
01178         res = in.gcount();
01179         chars[res-1] = '\n';            // libxml needs the newline; w/o it the parse will fail
01180         chars[res] = '\0';
01181         while (res > 0 && !is_boundary(chars, boundary)) {
01182                 DBG(cerr << "line (" << res << "): " << chars << endl);
01183                 xmlParseChunk(ctxt, chars, res, 0);
01184 
01185                 in.getline(chars, size);        // chars has size+1 elements
01186                 res = in.gcount();
01187                 if (res > 0) {
01188                         chars[res-1] = '\n';
01189                         chars[res] = '\0';
01190                 }
01191         }
01192 
01193         // This call ends the parse: The fourth argument of xmlParseChunk is
01194         // the bool 'terminate.'
01195         xmlParseChunk(ctxt, chars, 0, 1);
01196 
01197         cleanup_parse(context);
01198     }
01199 }
01200 
01203 void DDXParser::intern_stream(FILE *in, DDS *dest_dds, string &cid, const string &boundary)
01204 {
01205     // Code example from libxml2 docs re: read from a stream.
01206     if (!in || feof(in) || ferror(in))
01207         throw InternalErr(__FILE__, __LINE__,
01208                           "Input stream not open or read error");
01209 
01210     const int size = 1024;
01211     char chars[size];
01212 
01213     int res = fread(chars, 1, 4, in);
01214     if (res > 0) {
01215         chars[4]='\0';
01216         xmlParserCtxtPtr context =
01217             xmlCreatePushParserCtxt(NULL, NULL, chars, res, "stream");
01218 
01219         ctxt = context;         // need ctxt for error messages
01220         dds = dest_dds;         // dump values here
01221         blob_href = &cid;       // cid goes here
01222 
01223         xmlSAXHandler ddx_sax_parser;
01224         memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
01225 
01226         ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
01227         ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
01228         ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
01229         ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
01230         ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
01231         ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
01232         ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
01233         ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
01234         ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
01235         ddx_sax_parser.initialized = XML_SAX2_MAGIC;
01236         ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
01237         ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
01238 
01239         context->sax = &ddx_sax_parser;
01240         context->userData = this;
01241         context->validate = true;
01242 
01243 
01244         while ((fgets(chars, size, in) > 0) && !is_boundary(chars, boundary)) {
01245             //chars[size-1] = '\0';
01246             DBG(cerr << "line (" << strlen(chars) << "): " << chars << endl);
01247 
01248             xmlParseChunk(ctxt, chars, strlen(chars), 0);
01249         }
01250         // This call ends the parse: The fourth argument of xmlParseChunk is
01251         // the bool 'terminate.'
01252         xmlParseChunk(ctxt, chars, 0, 1);
01253 
01254         cleanup_parse(context);
01255     }
01256 }
01257 
01258 
01270 void DDXParser::intern(const string & document, DDS * dest_dds, string &cid)
01271 {
01272     // Create the context pointer explicitly so that we can store a pointer
01273     // to it in the DDXParser instance. This provides a way to generate our
01274     // own error messages *with* line numbers. The messages are pretty
01275     // meaningless otherwise. This means that we use an interface from the
01276     // 'parser internals' header, and not the 'parser' header. However, this
01277     // interface is also used in one of the documented examples, so it's
01278     // probably pretty stable. 06/02/03 jhrg
01279     xmlParserCtxtPtr context = xmlCreateFileParserCtxt(document.c_str());
01280     if (!context)
01281         throw
01282         DDXParseFailed(string
01283                        ("Could not initialize the parser with the file: '")
01284                        + document + string("'."));
01285 
01286     dds = dest_dds;             // dump values here
01287     blob_href = &cid;
01288     ctxt = context;             // need ctxt for error messages
01289 
01290     xmlSAXHandler ddx_sax_parser;
01291     memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
01292 
01293     ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
01294     ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
01295     ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
01296     ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
01297     ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
01298     ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
01299     ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
01300     ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
01301     ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
01302     ddx_sax_parser.initialized = XML_SAX2_MAGIC;
01303     ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
01304     ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
01305 
01306     context->sax = &ddx_sax_parser;
01307     context->userData = this;
01308     context->validate = false;
01309 
01310     xmlParseDocument(context);
01311 
01312     cleanup_parse(context);
01313 }
01314 
01315 } // namespace libdap