libdap
Updated for version 3.17.0
|
00001 // -*- mode: c++; c-basic-offset:4 -*- 00002 00003 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00004 // Access Protocol. 00005 00006 // Copyright (c) 2012 OPeNDAP, Inc. 00007 // Author: James Gallagher <jgallagher@opendap.org> 00008 // 00009 // This library is free software; you can redistribute it and/or 00010 // modify it under the terms of the GNU Lesser General Public 00011 // License as published by the Free Software Foundation; either 00012 // version 2.1 of the License, or (at your option) any later version. 00013 // 00014 // This library is distributed in the hope that it will be useful, 00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 // Lesser General Public License for more details. 00018 // 00019 // You should have received a copy of the GNU Lesser General Public 00020 // License along with this library; if not, write to the Free Software 00021 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00022 // 00023 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00024 00025 #include "config.h" 00026 00027 //#define DODS_DEBUG 1 00028 00029 #include <iostream> 00030 #include <sstream> 00031 00032 #include <cstring> 00033 #include <cstdarg> 00034 #include <cassert> 00035 00036 #include <libxml/parserInternals.h> 00037 00038 #include "DMR.h" 00039 00040 #include "BaseType.h" 00041 #include "Array.h" 00042 #include "D4Group.h" 00043 #include "D4Attributes.h" 00044 #include "D4Maps.h" 00045 #include "D4Enum.h" 00046 00047 #include "D4BaseTypeFactory.h" 00048 00049 #include "D4ParserSax2.h" 00050 00051 #include "util.h" 00052 #include "debug.h" 00053 00054 namespace libdap { 00055 00056 static const char *states[] = { 00057 "parser_start", 00058 00059 "inside_dataset", 00060 00061 // inside_group is the state just after parsing the start of a Group 00062 // element. 00063 "inside_group", 00064 00065 "inside_attribute_container", 00066 "inside_attribute", 00067 "inside_attribute_value", 00068 "inside_other_xml_attribute", 00069 00070 "inside_enum_def", 00071 "inside_enum_const", 00072 00073 "inside_dim_def", 00074 00075 // This covers Byte, ..., Url, Opaque 00076 "inside_simple_type", 00077 00078 // "inside_array", 00079 "inside_dim", 00080 "inside_map", 00081 00082 "inside_constructor", 00083 00084 "parser_unknown", 00085 "parser_error", 00086 "parser_fatal_error", 00087 00088 "parser_end" 00089 }; 00090 00091 static bool is_not(const char *name, const char *tag) 00092 { 00093 return strcmp(name, tag) != 0; 00094 } 00095 00104 D4EnumDef * 00105 D4ParserSax2::enum_def() 00106 { 00107 if (!d_enum_def) d_enum_def = new D4EnumDef; 00108 00109 return d_enum_def; 00110 } 00111 00118 D4Dimension * 00119 D4ParserSax2::dim_def() { 00120 if (!d_dim_def) d_dim_def = new D4Dimension; 00121 00122 return d_dim_def; 00123 } 00124 00130 void D4ParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes) 00131 { 00132 if (!xml_attrs.empty()) 00133 xml_attrs.clear(); // erase old attributes 00134 00135 // Make a value using the attribute name and the prefix, namespace URI 00136 // and the value. The prefix might be null. 00137 unsigned int index = 0; 00138 for (int i = 0; i < nb_attributes; ++i, index += 5) { 00139 xml_attrs.insert(map<string, XMLAttribute>::value_type(string((const char *)attributes[index]), 00140 XMLAttribute(attributes + index + 1))); 00141 00142 DBG(cerr << "XML Attribute '" << (const char *)attributes[index] << "': " 00143 << xml_attrs[(const char *)attributes[index]].value << endl); 00144 } 00145 } 00146 00153 void D4ParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces) 00154 { 00155 // make a value with the prefix and namespace URI. The prefix might be null. 00156 for (int i = 0; i < nb_namespaces; ++i) { 00157 namespace_table.insert(map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *)namespaces[i * 2] : "", 00158 (const char *)namespaces[i * 2 + 1])); 00159 } 00160 } 00161 00168 bool D4ParserSax2::check_required_attribute(const string & attr) 00169 { 00170 if (xml_attrs.find(attr) == xml_attrs.end()) { 00171 dmr_error(this, "Required attribute '%s' not found.", attr.c_str()); 00172 return false; 00173 } 00174 else 00175 return true; 00176 } 00177 00184 bool D4ParserSax2::check_attribute(const string & attr) 00185 { 00186 return (xml_attrs.find(attr) != xml_attrs.end()); 00187 } 00188 00189 bool D4ParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes) 00190 { 00191 if (is_not(name, "Dimension")) 00192 return false; 00193 00194 transfer_xml_attrs(attrs, nb_attributes); 00195 00196 if (!(check_required_attribute("name") && check_required_attribute("size"))) { 00197 dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element."); 00198 return false; 00199 } 00200 00201 // This getter (dim_def) allocates a new object if needed. 00202 dim_def()->set_name(xml_attrs["name"].value); 00203 try { 00204 dim_def()->set_size(xml_attrs["size"].value); 00205 } 00206 catch (Error &e) { 00207 dmr_error(this, e.get_error_message().c_str()); 00208 return false; 00209 } 00210 00211 return true; 00212 } 00213 00231 bool D4ParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes) 00232 { 00233 if (is_not(name, "Dim")) 00234 return false; 00235 00236 transfer_xml_attrs(attrs, nb_attributes); 00237 00238 if (check_attribute("size") && check_attribute("name")) { 00239 dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used."); 00240 return false; 00241 } 00242 if (!(check_attribute("size") || check_attribute("name"))) { 00243 dmr_error(this, "Either 'size' or 'name' must be used in a Dim element."); 00244 return false; 00245 } 00246 00247 if (!top_basetype()->is_vector_type()) { 00248 // Make the top BaseType* an array 00249 BaseType *b = top_basetype(); 00250 pop_basetype(); 00251 00252 Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name())); 00253 a->set_is_dap4(true); 00254 a->add_var_nocopy(b); 00255 a->set_attributes_nocopy(b->attributes()); 00256 // trick: instead of popping b's attributes, copying them and then pushing 00257 // a's copy, just move the pointer (but make sure there's only one object that 00258 // references that pointer). 00259 b->set_attributes_nocopy(0); 00260 00261 push_basetype(a); 00262 } 00263 00264 assert(top_basetype()->is_vector_type()); 00265 00266 Array *a = static_cast<Array*>(top_basetype()); 00267 if (check_attribute("size")) { 00268 a->append_dim(atoi(xml_attrs["size"].value.c_str())); // low budget code for now. jhrg 8/20/13 00269 return true; 00270 } 00271 else if (check_attribute("name")) { 00272 string name = xml_attrs["name"].value; 00273 00274 D4Dimension *dim = 0; 00275 if (name[0] == '/') // lookup the Dimension in the root group 00276 dim = dmr()->root()->find_dim(name); 00277 else // get enclosing Group and lookup Dimension there 00278 dim = top_group()->find_dim(name); 00279 00280 if (!dim) 00281 throw Error("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'."); 00282 a->append_dim(dim); 00283 return true; 00284 } 00285 00286 return false; 00287 } 00288 00289 bool D4ParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes) 00290 { 00291 if (is_not(name, "Map")) 00292 return false; 00293 00294 transfer_xml_attrs(attrs, nb_attributes); 00295 00296 if (!check_attribute("name")) { 00297 dmr_error(this, "The 'name' attribute must be used in a Map element."); 00298 return false; 00299 } 00300 00301 if (!top_basetype()->is_vector_type()) { 00302 // Make the top BaseType* an array 00303 BaseType *b = top_basetype(); 00304 pop_basetype(); 00305 00306 Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name())); 00307 a->set_is_dap4(true); 00308 a->add_var_nocopy(b); 00309 a->set_attributes_nocopy(b->attributes()); 00310 // trick: instead of popping b's attributes, copying them and then pushing 00311 // a's copy, just move the pointer (but make sure there's only one object that 00312 // references that pointer). 00313 b->set_attributes_nocopy(0); 00314 00315 push_basetype(a); 00316 } 00317 00318 assert(top_basetype()->is_vector_type()); 00319 00320 Array *a = static_cast<Array*>(top_basetype()); 00321 00322 string map_name = xml_attrs["name"].value; 00323 if (xml_attrs["name"].value[0] != '/') 00324 map_name = top_group()->FQN() + map_name; 00325 00326 Array *map_source = 0; // The array variable that holds the data for the Map 00327 00328 if (map_name[0] == '/') // lookup the Map in the root group 00329 map_source = dmr()->root()->find_map_source(map_name); 00330 else // get enclosing Group and lookup Map there 00331 map_source = top_group()->find_map_source(map_name); 00332 00333 if (!map_source) 00334 throw Error("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'."); 00335 00336 a->maps()->add_map(new D4Map(map_name, map_source)); 00337 00338 return true; 00339 } 00340 00341 bool D4ParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes) 00342 { 00343 if (is_not(name, "Group")) 00344 return false; 00345 00346 transfer_xml_attrs(attrs, nb_attributes); 00347 00348 if (!check_required_attribute("name")) { 00349 dmr_error(this, "The required attribute 'name' was missing from a Group element."); 00350 return false; 00351 } 00352 00353 BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, xml_attrs["name"].value); 00354 if (!btp) { 00355 dmr_fatal_error(this, "Could not instantiate the Group '%s'.", xml_attrs["name"].value.c_str()); 00356 return false; 00357 } 00358 00359 D4Group *grp = static_cast<D4Group*>(btp); 00360 00361 // Need to set this to get the D4Attribute behavior in the type classes 00362 // shared between DAP2 and DAP4. jhrg 4/18/13 00363 grp->set_is_dap4(true); 00364 00365 // link it up and change the current group 00366 D4Group *parent = top_group(); 00367 if (!parent) { 00368 dmr_fatal_error(this, "No Group on the Group stack."); 00369 return false; 00370 } 00371 00372 grp->set_parent(parent); 00373 parent->add_group_nocopy(grp); 00374 00375 push_group(grp); 00376 push_attributes(grp->attributes()); 00377 return true; 00378 } 00379 00386 inline bool D4ParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes) 00387 { 00388 if (is_not(name, "Attribute")) 00389 return false; 00390 00391 // These methods set the state to parser_error if a problem is found. 00392 transfer_xml_attrs(attrs, nb_attributes); 00393 00394 // add error 00395 if (!(check_required_attribute(string("name")) && check_required_attribute(string("type")))) { 00396 dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element."); 00397 return false; 00398 } 00399 00400 if (xml_attrs["type"].value == "Container") { 00401 push_state(inside_attribute_container); 00402 00403 DBG(cerr << "Pushing attribute container " << xml_attrs["name"].value << endl); 00404 D4Attribute *child = new D4Attribute(xml_attrs["name"].value, attr_container_c); 00405 00406 D4Attributes *tos = top_attributes(); 00407 // add return 00408 if (!tos) { 00409 delete child; 00410 dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack."); 00411 return false; 00412 } 00413 00414 tos->add_attribute_nocopy(child); 00415 push_attributes(child->attributes()); 00416 } 00417 else if (xml_attrs["type"].value == "OtherXML") { 00418 push_state(inside_other_xml_attribute); 00419 00420 dods_attr_name = xml_attrs["name"].value; 00421 dods_attr_type = xml_attrs["type"].value; 00422 } 00423 else { 00424 push_state(inside_attribute); 00425 00426 dods_attr_name = xml_attrs["name"].value; 00427 dods_attr_type = xml_attrs["type"].value; 00428 } 00429 00430 return true; 00431 } 00432 00438 inline bool D4ParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes) 00439 { 00440 if (is_not(name, "Enumeration")) 00441 return false; 00442 00443 transfer_xml_attrs(attrs, nb_attributes); 00444 00445 if (!(check_required_attribute("name") && check_required_attribute("basetype"))) { 00446 dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element."); 00447 return false; 00448 } 00449 00450 Type t = get_type(xml_attrs["basetype"].value.c_str()); 00451 if (!is_integer_type(t)) { 00452 dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.", 00453 xml_attrs["name"].value.c_str(), xml_attrs["basetype"].value.c_str()); 00454 return false; 00455 } 00456 00457 // This getter allocates a new object if needed. 00458 string enum_def_path = xml_attrs["name"].value; 00459 #if 0 00460 // Use FQNs when things are referenced, not when they are defined 00461 if (xml_attrs["name"].value[0] != '/') 00462 enum_def_path = top_group()->FQN() + enum_def_path; 00463 #endif 00464 enum_def()->set_name(enum_def_path); 00465 enum_def()->set_type(t); 00466 00467 return true; 00468 } 00469 00470 inline bool D4ParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes) 00471 { 00472 if (is_not(name, "EnumConst")) 00473 return false; 00474 00475 // These methods set the state to parser_error if a problem is found. 00476 transfer_xml_attrs(attrs, nb_attributes); 00477 00478 if (!(check_required_attribute("name") && check_required_attribute("value"))) { 00479 dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element."); 00480 return false; 00481 } 00482 00483 istringstream iss(xml_attrs["value"].value); 00484 long long value = 0; 00485 iss >> skipws >> value; 00486 if (iss.fail() || iss.bad()) { 00487 dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.", 00488 xml_attrs["value"].value.c_str()); 00489 } 00490 else if (!enum_def()->is_valid_enum_value(value)) { 00491 dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.", 00492 xml_attrs["value"].value.c_str(), D4type_name(d_enum_def->type()).c_str()); 00493 } 00494 else { 00495 // unfortunate choice of names... args are 'label' and 'value' 00496 enum_def()->add_value(xml_attrs["name"].value, value); 00497 } 00498 00499 return true; 00500 } 00501 00507 inline bool D4ParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes) 00508 { 00509 Type t = get_type(name); 00510 if (is_simple_type(t)) { 00511 process_variable_helper(t, inside_simple_type, attrs, nb_attributes); 00512 return true; 00513 } 00514 else { 00515 switch(t) { 00516 case dods_structure_c: 00517 process_variable_helper(t, inside_constructor, attrs, nb_attributes); 00518 return true; 00519 00520 case dods_sequence_c: 00521 process_variable_helper(t, inside_constructor, attrs, nb_attributes); 00522 return true; 00523 00524 default: 00525 return false; 00526 } 00527 } 00528 } 00529 00537 void D4ParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes) 00538 { 00539 transfer_xml_attrs(attrs, nb_attributes); 00540 00541 if (check_required_attribute("name")) { 00542 BaseType *btp = dmr()->factory()->NewVariable(t, xml_attrs["name"].value); 00543 if (!btp) { 00544 dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str()); 00545 return; 00546 } 00547 00548 if ((t == dods_enum_c) && check_required_attribute("enum")) { 00549 D4EnumDef *enum_def = 0; 00550 string enum_path = xml_attrs["enum"].value; 00551 if (enum_path[0] == '/') 00552 enum_def = dmr()->root()->find_enum_def(enum_path); 00553 else 00554 enum_def = top_group()->find_enum_def(enum_path); 00555 00556 if (!enum_def) 00557 dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str()); 00558 00559 static_cast<D4Enum*>(btp)->set_enumeration(enum_def); 00560 } 00561 00562 btp->set_is_dap4(true); // see comment above 00563 push_basetype(btp); 00564 00565 push_attributes(btp->attributes()); 00566 00567 push_state(s); 00568 } 00569 } 00570 00577 00582 void D4ParserSax2::dmr_start_document(void * p) 00583 { 00584 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p); 00585 parser->error_msg = ""; 00586 parser->char_data = ""; 00587 00588 // Set this in intern_helper so that the loop test for the parser_end 00589 // state works for the first iteration. It seems like XMLParseChunk calls this 00590 // function on it's first run. jhrg 9/16/13 00591 // parser->push_state(parser_start); 00592 00593 parser->push_attributes(parser->dmr()->root()->attributes()); 00594 00595 if (parser->debug()) cerr << "Parser start state: " << states[parser->get_state()] << endl; 00596 } 00597 00600 void D4ParserSax2::dmr_end_document(void * p) 00601 { 00602 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p); 00603 00604 if (parser->debug()) cerr << "Parser end state: " << states[parser->get_state()] << endl; 00605 00606 if (parser->get_state() != parser_end) 00607 D4ParserSax2::dmr_error(parser, "The document contained unbalanced tags."); 00608 00609 // If we've found any sort of error, don't make the DMR; intern() will 00610 // take care of the error. 00611 if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error) 00612 return; 00613 00614 if (!parser->empty_basetype() || parser->empty_group()) 00615 D4ParserSax2::dmr_error(parser, "The document did not contain a valid root Group or contained unbalanced tags."); 00616 00617 parser->pop_group(); // leave the stack 'clean' 00618 parser->pop_attributes(); 00619 } 00620 00621 void D4ParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI, 00622 int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/, 00623 const xmlChar **attributes) 00624 { 00625 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p); 00626 const char *localname = (const char *) l; 00627 00628 if (parser->debug()) cerr << "Start element " << localname << " (state " << states[parser->get_state()] << ")" << endl; 00629 00630 switch (parser->get_state()) { 00631 case parser_start: 00632 if (is_not(localname, "Dataset")) 00633 D4ParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.", localname); 00634 00635 parser->root_ns = URI ? (const char *) URI : ""; 00636 parser->transfer_xml_attrs(attributes, nb_attributes); 00637 00638 if (parser->check_required_attribute(string("name"))) 00639 parser->dmr()->set_name(parser->xml_attrs["name"].value); 00640 00641 if (parser->check_attribute("dapVersion")) 00642 parser->dmr()->set_dap_version(parser->xml_attrs["dapVersion"].value); 00643 00644 if (parser->check_attribute("dmrVersion")) 00645 parser->dmr()->set_dmr_version(parser->xml_attrs["dmrVersion"].value); 00646 00647 if (parser->check_attribute("base")) 00648 parser->dmr()->set_request_xml_base(parser->xml_attrs["base"].value); 00649 00650 if (!parser->root_ns.empty()) 00651 parser->dmr()->set_namespace(parser->root_ns); 00652 00653 // Push the root Group on the stack 00654 parser->push_group(parser->dmr()->root()); 00655 00656 parser->push_state(inside_dataset); 00657 00658 break; 00659 00660 // Both inside dataset and inside group can have the same stuff. 00661 // The difference is that the Dataset holds the root group, which 00662 // must be present; other groups are optional 00663 case inside_dataset: 00664 case inside_group: 00665 if (parser->process_enum_def(localname, attributes, nb_attributes)) 00666 parser->push_state(inside_enum_def); 00667 else if (parser->process_dimension_def(localname, attributes, nb_attributes)) 00668 parser->push_state(inside_dim_def); 00669 else if (parser->process_group(localname, attributes, nb_attributes)) 00670 parser->push_state(inside_group); 00671 else if (parser->process_variable(localname, attributes, nb_attributes)) 00672 // This will push either inside_simple_type or inside_structure 00673 // onto the parser state stack. 00674 break; 00675 else if (parser->process_attribute(localname, attributes, nb_attributes)) 00676 // This will push either inside_attribute, inside_attribute_container 00677 // or inside_otherxml_attribute onto the parser state stack 00678 break; 00679 else 00680 D4ParserSax2::dmr_error(parser, "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.", localname); 00681 break; 00682 00683 case inside_attribute_container: 00684 if (parser->process_attribute(localname, attributes, nb_attributes)) 00685 break; 00686 else 00687 D4ParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname); 00688 break; 00689 00690 case inside_attribute: 00691 if (parser->process_attribute(localname, attributes, nb_attributes)) 00692 break; 00693 else if (strcmp(localname, "Value") == 0) 00694 parser->push_state(inside_attribute_value); 00695 else 00696 dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname); 00697 break; 00698 00699 case inside_attribute_value: 00700 // Attribute values are processed by the end element code. 00701 break; 00702 00703 case inside_other_xml_attribute: 00704 parser->other_xml_depth++; 00705 00706 // Accumulate the elements here 00707 parser->other_xml.append("<"); 00708 if (prefix) { 00709 parser->other_xml.append((const char *) prefix); 00710 parser->other_xml.append(":"); 00711 } 00712 parser->other_xml.append(localname); 00713 00714 if (nb_namespaces != 0) { 00715 parser->transfer_xml_ns(namespaces, nb_namespaces); 00716 00717 for (map<string, string>::iterator i = parser->namespace_table.begin(); 00718 i != parser->namespace_table.end(); ++i) { 00719 parser->other_xml.append(" xmlns"); 00720 if (!i->first.empty()) { 00721 parser->other_xml.append(":"); 00722 parser->other_xml.append(i->first); 00723 } 00724 parser->other_xml.append("=\""); 00725 parser->other_xml.append(i->second); 00726 parser->other_xml.append("\""); 00727 } 00728 } 00729 00730 if (nb_attributes != 0) { 00731 parser->transfer_xml_attrs(attributes, nb_attributes); 00732 for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) { 00733 parser->other_xml.append(" "); 00734 if (!i->second.prefix.empty()) { 00735 parser->other_xml.append(i->second.prefix); 00736 parser->other_xml.append(":"); 00737 } 00738 parser->other_xml.append(i->first); 00739 parser->other_xml.append("=\""); 00740 parser->other_xml.append(i->second.value); 00741 parser->other_xml.append("\""); 00742 } 00743 } 00744 00745 parser->other_xml.append(">"); 00746 break; 00747 00748 case inside_enum_def: 00749 // process an EnumConst element 00750 if (parser->process_enum_const(localname, attributes, nb_attributes)) 00751 parser->push_state(inside_enum_const); 00752 else 00753 dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname); 00754 break; 00755 00756 case inside_enum_const: 00757 // No content; nothing to do 00758 break; 00759 00760 case inside_dim_def: 00761 // No content; nothing to do 00762 break; 00763 #if 0 00764 case inside_dimension: 00765 // No content. 00766 break; 00767 #endif 00768 case inside_dim: 00769 // No content. 00770 break; 00771 00772 case inside_map: 00773 // No content. 00774 break; 00775 00776 case inside_simple_type: 00777 if (parser->process_attribute(localname, attributes, nb_attributes)) 00778 break; 00779 else if (parser->process_dimension(localname, attributes, nb_attributes)) 00780 parser->push_state(inside_dim); 00781 else if (parser->process_map(localname, attributes, nb_attributes)) 00782 parser->push_state(inside_map); 00783 else 00784 dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname); 00785 break; 00786 00787 case inside_constructor: 00788 if (parser->process_variable(localname, attributes, nb_attributes)) 00789 // This will push either inside_simple_type or inside_structure 00790 // onto the parser state stack. 00791 break; 00792 else if (parser->process_attribute(localname, attributes, nb_attributes)) 00793 break; 00794 else if (parser->process_dimension(localname, attributes, nb_attributes)) 00795 parser->push_state(inside_dim); 00796 else if (parser->process_map(localname, attributes, nb_attributes)) 00797 parser->push_state(inside_map); 00798 else 00799 D4ParserSax2::dmr_error(parser, "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname); 00800 break; 00801 00802 case parser_unknown: 00803 // FIXME? 00804 // *** Never used? If so remove/error 00805 parser->push_state(parser_unknown); 00806 break; 00807 00808 case parser_error: 00809 case parser_fatal_error: 00810 break; 00811 00812 case parser_end: 00813 // FIXME Error? 00814 break; 00815 } 00816 00817 if (parser->debug()) cerr << "Start element exit state: " << states[parser->get_state()] << endl; 00818 } 00819 00820 void D4ParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI) 00821 { 00822 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p); 00823 const char *localname = (const char *) l; 00824 00825 if (parser->debug()) 00826 cerr << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl; 00827 00828 switch (parser->get_state()) { 00829 case parser_start: 00830 dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname); 00831 break; 00832 00833 case inside_dataset: 00834 if (is_not(localname, "Dataset")) 00835 D4ParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname); 00836 00837 parser->pop_state(); 00838 if (parser->get_state() != parser_start) 00839 dmr_fatal_error(parser, "Unexpected state, expected start state."); 00840 else { 00841 parser->pop_state(); 00842 parser->push_state(parser_end); 00843 } 00844 break; 00845 00846 case inside_group: { 00847 if (is_not(localname, "Group")) 00848 D4ParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname); 00849 00850 if (!parser->empty_basetype() || parser->empty_group()) 00851 D4ParserSax2::dmr_error(parser, 00852 "The document did not contain a valid root Group or contained unbalanced tags."); 00853 00854 parser->pop_group(); 00855 parser->pop_state(); 00856 break; 00857 } 00858 00859 case inside_attribute_container: 00860 if (is_not(localname, "Attribute")) 00861 D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname); 00862 00863 parser->pop_state(); 00864 parser->pop_attributes(); 00865 break; 00866 00867 case inside_attribute: 00868 if (is_not(localname, "Attribute")) 00869 D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname); 00870 00871 parser->pop_state(); 00872 break; 00873 00874 case inside_attribute_value: { 00875 if (is_not(localname, "Value")) 00876 D4ParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname); 00877 00878 parser->pop_state(); 00879 00880 // The old code added more values using the name and type as 00881 // indexes to find the correct attribute. Use get() for that 00882 // now. Or fix this code to keep a pointer to the to attribute... 00883 D4Attributes *attrs = parser->top_attributes(); 00884 D4Attribute *attr = attrs->get(parser->dods_attr_name); 00885 if (!attr) { 00886 attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type)); 00887 attrs->add_attribute_nocopy(attr); 00888 } 00889 attr->add_value(parser->char_data); 00890 00891 parser->char_data = ""; // Null this after use. 00892 break; 00893 } 00894 00895 case inside_other_xml_attribute: { 00896 if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) { 00897 parser->pop_state(); 00898 00899 // The old code added more values using the name and type as 00900 // indexes to find the correct attribute. Use get() for that 00901 // now. Or fix this code to keep a pointer to the to attribute... 00902 D4Attributes *attrs = parser->top_attributes(); 00903 D4Attribute *attr = attrs->get(parser->dods_attr_name); 00904 if (!attr) { 00905 attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type)); 00906 attrs->add_attribute_nocopy(attr); 00907 } 00908 attr->add_value(parser->other_xml); 00909 00910 parser->other_xml = ""; // Null this after use. 00911 } 00912 else { 00913 if (parser->other_xml_depth == 0) { 00914 D4ParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'", 00915 localname); 00916 break; 00917 } 00918 parser->other_xml_depth--; 00919 00920 parser->other_xml.append("</"); 00921 if (prefix) { 00922 parser->other_xml.append((const char *) prefix); 00923 parser->other_xml.append(":"); 00924 } 00925 parser->other_xml.append(localname); 00926 parser->other_xml.append(">"); 00927 } 00928 break; 00929 } 00930 00931 case inside_enum_def: 00932 if (is_not(localname, "Enumeration")) 00933 D4ParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname); 00934 if (!parser->top_group()) 00935 D4ParserSax2::dmr_fatal_error(parser, 00936 "Expected a Group to be the current item, while finishing up an Enumeration."); 00937 else { 00938 // copy the pointer; not a deep copy 00939 parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def()); 00940 // Set the enum_def to null; next call to enum_def() will 00941 // allocate a new object 00942 parser->clear_enum_def(); 00943 parser->pop_state(); 00944 } 00945 break; 00946 00947 case inside_enum_const: 00948 if (is_not(localname, "EnumConst")) 00949 D4ParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname); 00950 00951 parser->pop_state(); 00952 break; 00953 00954 case inside_dim_def: { 00955 if (is_not(localname, "Dimension")) 00956 D4ParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname); 00957 00958 if (!parser->top_group()) 00959 D4ParserSax2::dmr_error(parser, 00960 "Expected a Group to be the current item, while finishing up an Dimension."); 00961 00962 // FIXME Use the Group on the top of the group stack 00963 // copy the pointer; not a deep copy 00964 parser->top_group()->dims()->add_dim_nocopy(parser->dim_def()); 00965 //parser->dmr()->root()->dims()->add_dim_nocopy(parser->dim_def()); 00966 // Set the dim_def to null; next call to dim_def() will 00967 // allocate a new object. Calling 'clear' is important because 00968 // the cleanup method will free dim_def if it's not null and 00969 // we just copied the pointer in the add_dim_nocopy() call 00970 // above. 00971 parser->clear_dim_def(); 00972 parser->pop_state(); 00973 break; 00974 } 00975 00976 case inside_simple_type: 00977 if (is_simple_type(get_type(localname))) { 00978 BaseType *btp = parser->top_basetype(); 00979 parser->pop_basetype(); 00980 parser->pop_attributes(); 00981 00982 BaseType *parent = 0; 00983 if (!parser->empty_basetype()) 00984 parent = parser->top_basetype(); 00985 else if (!parser->empty_group()) 00986 parent = parser->top_group(); 00987 else { 00988 dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.", 00989 localname); 00990 delete btp; 00991 parser->pop_state(); 00992 break; 00993 } 00994 00995 if (parent->type() == dods_array_c) 00996 static_cast<Array*>(parent)->prototype()->add_var_nocopy(btp); 00997 else 00998 parent->add_var_nocopy(btp); 00999 } 01000 else 01001 D4ParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname); 01002 01003 parser->pop_state(); 01004 break; 01005 01006 case inside_dim: 01007 if (is_not(localname, "Dim")) 01008 D4ParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname); 01009 01010 parser->pop_state(); 01011 break; 01012 01013 case inside_map: 01014 if (is_not(localname, "Map")) 01015 D4ParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname); 01016 01017 parser->pop_state(); 01018 break; 01019 01020 case inside_constructor: { 01021 if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) { 01022 D4ParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname); 01023 return; 01024 } 01025 01026 BaseType *btp = parser->top_basetype(); 01027 parser->pop_basetype(); 01028 parser->pop_attributes(); 01029 01030 BaseType *parent = 0; 01031 if (!parser->empty_basetype()) 01032 parent = parser->top_basetype(); 01033 else if (!parser->empty_group()) 01034 parent = parser->top_group(); 01035 else { 01036 dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.", 01037 localname); 01038 delete btp; 01039 parser->pop_state(); 01040 break; 01041 } 01042 01043 // TODO Why doesn't this code mirror the simple_var case and test 01044 // for the parent being an array? jhrg 10/13/13 01045 parent->add_var_nocopy(btp); 01046 parser->pop_state(); 01047 break; 01048 } 01049 01050 case parser_unknown: 01051 parser->pop_state(); 01052 break; 01053 01054 case parser_error: 01055 case parser_fatal_error: 01056 break; 01057 01058 case parser_end: 01059 // FIXME Error? 01060 break; 01061 } 01062 01063 if (parser->debug()) cerr << "End element exit state: " << states[parser->get_state()] << endl; 01064 } 01065 01069 void D4ParserSax2::dmr_get_characters(void * p, const xmlChar * ch, int len) 01070 { 01071 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p); 01072 01073 switch (parser->get_state()) { 01074 case inside_attribute_value: 01075 parser->char_data.append((const char *) (ch), len); 01076 DBG(cerr << "Characters: '" << parser->char_data << "'" << endl); 01077 break; 01078 01079 case inside_other_xml_attribute: 01080 parser->other_xml.append((const char *) (ch), len); 01081 DBG(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl); 01082 break; 01083 01084 default: 01085 break; 01086 } 01087 } 01088 01093 void D4ParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len) 01094 { 01095 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p); 01096 01097 switch (parser->get_state()) { 01098 case inside_other_xml_attribute: 01099 parser->other_xml.append((const char *) (ch), len); 01100 break; 01101 01102 default: 01103 break; 01104 } 01105 } 01106 01112 void D4ParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len) 01113 { 01114 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p); 01115 01116 switch (parser->get_state()) { 01117 case inside_other_xml_attribute: 01118 parser->other_xml.append((const char *) (value), len); 01119 break; 01120 01121 case parser_unknown: 01122 break; 01123 01124 default: 01125 D4ParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4."); 01126 01127 break; 01128 } 01129 } 01130 01135 xmlEntityPtr D4ParserSax2::dmr_get_entity(void *, const xmlChar * name) 01136 { 01137 return xmlGetPredefinedEntity(name); 01138 } 01139 01150 void D4ParserSax2::dmr_fatal_error(void * p, const char *msg, ...) 01151 { 01152 va_list args; 01153 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p); 01154 01155 parser->push_state(parser_fatal_error); 01156 01157 va_start(args, msg); 01158 char str[1024]; 01159 vsnprintf(str, 1024, msg, args); 01160 va_end(args); 01161 01162 int line = xmlSAX2GetLineNumber(parser->context); 01163 01164 if (!parser->error_msg.empty()) parser->error_msg += "\n"; 01165 parser->error_msg += "At line " + long_to_string(line) + ": " + string(str); 01166 } 01167 01168 void D4ParserSax2::dmr_error(void *p, const char *msg, ...) 01169 { 01170 va_list args; 01171 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p); 01172 01173 parser->push_state(parser_error); 01174 01175 va_start(args, msg); 01176 char str[1024]; 01177 vsnprintf(str, 1024, msg, args); 01178 va_end(args); 01179 01180 int line = xmlSAX2GetLineNumber(parser->context); 01181 01182 if (!parser->error_msg.empty()) parser->error_msg += "\n"; 01183 parser->error_msg += "At line " + long_to_string(line) + ": " + string(str); 01184 } 01186 01190 void D4ParserSax2::cleanup_parse() 01191 { 01192 bool wellFormed = context->wellFormed; 01193 bool valid = context->valid; 01194 01195 context->sax = NULL; 01196 xmlFreeParserCtxt(context); 01197 01198 delete d_enum_def; 01199 d_enum_def = 0; 01200 01201 delete d_dim_def; 01202 d_dim_def = 0; 01203 01204 // If there's an error, there may still be items on the stack at the 01205 // end of the parse. 01206 while (!btp_stack.empty()) { 01207 delete top_basetype(); 01208 pop_basetype(); 01209 } 01210 01211 if (!wellFormed) 01212 throw Error("The DMR was not well formed. " + error_msg); 01213 else if (!valid) 01214 throw Error("The DMR was not valid." + error_msg); 01215 else if (get_state() == parser_error) 01216 throw Error(error_msg); 01217 else if (get_state() == parser_fatal_error) 01218 throw InternalErr(error_msg); 01219 } 01220 01235 void D4ParserSax2::intern(istream &f, DMR *dest_dmr, bool debug) 01236 { 01237 d_debug = debug; 01238 01239 // Code example from libxml2 docs re: read from a stream. 01240 01241 if (!f.good()) 01242 throw Error("Input stream not open or read error"); 01243 if (!dest_dmr) 01244 throw InternalErr(__FILE__, __LINE__, "DMR object is null"); 01245 01246 d_dmr = dest_dmr; // dump values here 01247 01248 const int size = 1024; 01249 char chars[size]; 01250 int line = 1; 01251 01252 f.getline(chars, size); 01253 int res = f.gcount(); 01254 if (res == 0) throw Error("No input found while parsing the DMR."); 01255 01256 if (debug) cerr << "line: (" << line++ << "): " << chars << endl; 01257 01258 context = xmlCreatePushParserCtxt(&ddx_sax_parser, this, chars, res - 1, "stream"); 01259 context->validate = true; 01260 push_state(parser_start); 01261 01262 f.getline(chars, size); 01263 while ((f.gcount() > 0) && (get_state() != parser_end)) { 01264 if (debug) cerr << "line: (" << line++ << "): " << chars << endl; 01265 xmlParseChunk(context, chars, f.gcount() - 1, 0); 01266 f.getline(chars, size); 01267 } 01268 01269 // This call ends the parse. 01270 xmlParseChunk(context, chars, 0, 1/*terminate*/); 01271 01272 // This checks that the state on the parser stack is parser_end and throws 01273 // an exception if it's not (i.e., the loop exited with gcount() == 0). 01274 cleanup_parse(); 01275 } 01276 01287 void D4ParserSax2::intern(const string &document, DMR *dest_dmr, bool debug) 01288 { 01289 intern(document.c_str(), document.length(), dest_dmr, debug); 01290 } 01291 01302 void D4ParserSax2::intern(const char *buffer, int size, DMR *dest_dmr, bool debug) 01303 { 01304 if (!(size > 0)) return; 01305 01306 d_debug = debug; 01307 01308 // Code example from libxml2 docs re: read from a stream. 01309 01310 if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null"); 01311 d_dmr = dest_dmr; // dump values in dest_dmr 01312 01313 push_state(parser_start); 01314 context = xmlCreatePushParserCtxt(&ddx_sax_parser, this, buffer, size, "stream"); 01315 context->validate = true; 01316 //push_state(parser_start); 01317 //xmlParseChunk(context, buffer, size, 0); 01318 01319 // This call ends the parse. 01320 xmlParseChunk(context, buffer, 0, 1/*terminate*/); 01321 01322 // This checks that the state on the parser stack is parser_end and throws 01323 // an exception if it's not (i.e., the loop exited with gcount() == 0). 01324 cleanup_parse(); 01325 } 01326 01327 } // namespace libdap