EBOOKHTMLParser.h
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /*
00003  * This file is part of the libe-book project.
00004  *
00005  * This Source Code Form is subject to the terms of the Mozilla Public
00006  * License, v. 2.0. If a copy of the MPL was not distributed with this
00007  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
00008  */
00009 
00010 #ifndef EBOOKHTMLPARSER_H_INCLUDED
00011 #define EBOOKHTMLPARSER_H_INCLUDED
00012 
00013 #include <boost/shared_ptr.hpp>
00014 
00015 #include <librevenge/librevenge.h>
00016 #include <librevenge-stream/librevenge-stream.h>
00017 
00018 #include "XMLTreeNode.h"
00019 
00020 namespace libebook
00021 {
00022 
00023 class XMLCollector;
00024 struct EBOOKHTMLMetadata;
00025 class EBOOKHTMLParagraphAttributes;
00026 class EBOOKHTMLSpanAttributes;
00027 class EBOOKHTMLTableCellAttributes;
00028 class EBOOKOutputElements;
00029 class XMLTreeWalker;
00030 
00031 class EBOOKHTMLParser
00032 {
00033   struct State;
00034 
00035 public:
00036   enum Dialect
00037   {
00038     DIALECT_UNKNOWN,
00039     DIALECT_HTML,
00040     DIALECT_XHTML
00041   };
00042 
00043 public:
00044   EBOOKHTMLParser(librevenge::RVNGInputStream *input, Dialect dialect, librevenge::RVNGTextInterface *document);
00045   virtual ~EBOOKHTMLParser();
00046 
00047   void parse();
00048   const XMLTreeNodePtr_t readTree();
00049 
00050   virtual librevenge::RVNGInputStream *getImage(const char *path) = 0;
00051   virtual librevenge::RVNGInputStream *getObject(const char *path) = 0;
00052   virtual librevenge::RVNGInputStream *getStylesheet(const char *path) = 0;
00053 
00054 private:
00055   const XMLTreeNodePtr_t readTreeImpl(std::string &encoding, bool &encodingChange);
00056   void processNode(const XMLTreeWalker &node);
00057 
00058   void startElement(const XMLTreeWalker &node);
00059   void endElement(const XMLTreeWalker &node);
00060   void text(const XMLTreeWalker &node);
00061 
00062   void startExternalElement(const XMLTreeWalker &node);
00063   void endExternalElement(const XMLTreeWalker &node);
00064 
00065   void sendParagraph(const XMLTreeWalker &node);
00066   void sendSpan(const XMLTreeWalker &node);
00067   void sendTableCell(const XMLTreeWalker &node);
00068   void sendPreformattedText(const char *ch, int len);
00069   void sendHeading(const XMLTreeWalker &node, int level);
00070 
00071 private:
00072   boost::shared_ptr<XMLCollector> m_collector;
00073   boost::shared_ptr<State> m_state;
00074   boost::shared_ptr<librevenge::RVNGInputStream> m_input;
00075   // const Dialect m_dialect;
00076   boost::shared_ptr<librevenge::RVNGInputStream> m_workingInput;
00077 };
00078 
00079 }
00080 
00081 #endif // EBOOKHTMLPARSER_H_INCLUDED
00082 
00083 /* vim:set shiftwidth=2 softtabstop=2 expandtab: */