Rewritten ExpatXmlParser

Put common elements in both Xml2XmlParser and ExpatXmlParser in
XmlParser.
pull/25/merge
Tatsuhiro Tsujikawa 2012-07-23 23:41:33 +09:00
parent 4b57106a17
commit f6b2c3c080
7 changed files with 191 additions and 154 deletions

View File

@ -36,13 +36,8 @@
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <deque>
#include <expat.h>
#include "a2io.h" #include "a2io.h"
#include "BinaryStream.h"
#include "BufferedFile.h"
#include "ParserStateMachine.h" #include "ParserStateMachine.h"
#include "A2STR.h" #include "A2STR.h"
#include "a2functional.h" #include "a2functional.h"
@ -50,15 +45,7 @@
namespace aria2 { namespace aria2 {
namespace { namespace xml {
struct SessionData {
std::deque<std::string> charactersStack_;
ParserStateMachine* psm_;
SessionData(ParserStateMachine* psm)
: psm_(psm)
{}
};
} // namespace
namespace { namespace {
void splitNsName(const char** localname, const char** nsUri, const char* src) void splitNsName(const char** localname, const char** nsUri, const char* src)
@ -101,14 +88,14 @@ void mlStartElement(void* userData, const char* nsName, const char** attrs)
const char* prefix = 0; const char* prefix = 0;
const char* nsUri = 0; const char* nsUri = 0;
splitNsName(&localname, &nsUri, nsName); splitNsName(&localname, &nsUri, nsName);
sd->psm_->beginElement(localname, prefix, nsUri, xmlAttrs); sd->psm->beginElement(localname, prefix, nsUri, xmlAttrs);
delete [] nsUri; delete [] nsUri;
for(std::vector<XmlAttr>::iterator i = xmlAttrs.begin(), for(std::vector<XmlAttr>::iterator i = xmlAttrs.begin(),
eoi = xmlAttrs.end(); i != eoi; ++i) { eoi = xmlAttrs.end(); i != eoi; ++i) {
delete [] (*i).nsUri; delete [] (*i).nsUri;
} }
if(sd->psm_->needsCharactersBuffering()) { if(sd->psm->needsCharactersBuffering()) {
sd->charactersStack_.push_front(A2STR::NIL); sd->charactersStack.push_front(A2STR::NIL);
} }
} }
} // namespace } // namespace
@ -122,11 +109,11 @@ void mlEndElement(void* userData, const char* nsName)
splitNsName(&localname, &nsUri, nsName); splitNsName(&localname, &nsUri, nsName);
SessionData* sd = reinterpret_cast<SessionData*>(userData); SessionData* sd = reinterpret_cast<SessionData*>(userData);
std::string characters; std::string characters;
if(sd->psm_->needsCharactersBuffering()) { if(sd->psm->needsCharactersBuffering()) {
characters = sd->charactersStack_.front(); characters = sd->charactersStack.front();
sd->charactersStack_.pop_front(); sd->charactersStack.pop_front();
} }
sd->psm_->endElement(localname, prefix, nsUri, characters); sd->psm->endElement(localname, prefix, nsUri, characters);
delete [] nsUri; delete [] nsUri;
} }
} // namespace } // namespace
@ -135,92 +122,74 @@ namespace {
void mlCharacters(void* userData, const char* ch, int len) void mlCharacters(void* userData, const char* ch, int len)
{ {
SessionData* sd = reinterpret_cast<SessionData*>(userData); SessionData* sd = reinterpret_cast<SessionData*>(userData);
if(sd->psm_->needsCharactersBuffering()) { if(sd->psm->needsCharactersBuffering()) {
sd->charactersStack_.front().append(&ch[0], &ch[len]); sd->charactersStack.front().append(&ch[0], &ch[len]);
} }
} }
} // namespace } // namespace
namespace {
void setupParser(XML_Parser parser, SessionData *sd)
{
XML_SetUserData(parser, sd);
XML_SetElementHandler(parser, &mlStartElement, &mlEndElement);
XML_SetCharacterDataHandler(parser, &mlCharacters);
}
} // namespace
XmlParser::XmlParser(ParserStateMachine* psm) XmlParser::XmlParser(ParserStateMachine* psm)
: psm_(psm) : psm_(psm),
{} sessionData_(psm_),
ctx_(XML_ParserCreateNS(0, static_cast<const XML_Char>('\t'))),
XmlParser::~XmlParser() {} lastError_(0)
namespace {
XML_Parser createParser(SessionData* sd)
{ {
XML_Parser parser = XML_ParserCreateNS(0, static_cast<const XML_Char>('\t')); setupParser(ctx_, &sessionData_);
XML_SetUserData(parser, sd);
XML_SetElementHandler(parser, &mlStartElement, &mlEndElement);
XML_SetCharacterDataHandler(parser, &mlCharacters);
return parser;
} }
} // namespace
bool XmlParser::parseFile(const char* filename) XmlParser::~XmlParser()
{ {
if(strcmp(filename, DEV_STDIN) == 0) { XML_ParserFree(ctx_);
BufferedFile fp(stdin); }
return parseFile(fp);
ssize_t XmlParser::parseUpdate(const char* data, size_t size)
{
if(lastError_ != 0) {
return lastError_;
}
XML_Status rv = XML_Parse(ctx_, data, size, 0);
if(rv == XML_STATUS_ERROR) {
return lastError_ = ERR_XML_PARSE;
} else { } else {
BufferedFile fp(filename, BufferedFile::READ); return size;
return parseFile(fp);
} }
} }
bool XmlParser::parseFile(BufferedFile& fp) ssize_t XmlParser::parseFinal(const char* data, size_t size)
{ {
if(!fp) { if(lastError_ != 0) {
return false; return lastError_;
} }
char buf[4096]; XML_Status rv = XML_Parse(ctx_, data, size, 1);
SessionData sessionData(psm_); if(rv == XML_STATUS_ERROR) {
XML_Parser parser = createParser(&sessionData); return lastError_ = ERR_XML_PARSE;
auto_delete<XML_Parser> deleter(parser, XML_ParserFree); } else {
while(1) { return size;
size_t res = fp.read(buf, sizeof(buf));
if(XML_Parse(parser, buf, res, 0) == XML_STATUS_ERROR) {
return false;
}
if(res < sizeof(buf)) {
break;
}
} }
return XML_Parse(parser, 0, 0, 1) != XML_STATUS_ERROR && psm_->finished();
}
bool XmlParser::parseBinaryStream(BinaryStream* bs)
{
const ssize_t bufSize = 4096;
unsigned char buf[bufSize];
SessionData sessionData(psm_);
XML_Parser parser = createParser(&sessionData);
auto_delete<XML_Parser> deleter(parser, XML_ParserFree);
int64_t readOffset = 0;
while(1) {
ssize_t res = bs->readData(buf, bufSize, readOffset);
if(res == 0) {
break;
}
if(XML_Parse(parser, reinterpret_cast<const char*>(buf), res, 0) ==
XML_STATUS_ERROR) {
return false;
}
readOffset += res;
}
return XML_Parse(parser, 0, 0, 1) != XML_STATUS_ERROR && psm_->finished();
} }
bool XmlParser::parseMemory(const char* xml, size_t size) int XmlParser::reset()
{ {
SessionData sessionData(psm_); psm_->reset();
XML_Parser parser = createParser(&sessionData); sessionData_.reset();
auto_delete<XML_Parser> deleter(parser, XML_ParserFree); XML_Bool rv = XML_ParserReset(ctx_, 0);
if(XML_Parse(parser, xml, size, 0) == XML_STATUS_ERROR) { if(rv == XML_FALSE) {
return false; return lastError_ = ERR_RESET;
} else {
setupParser(ctx_, &sessionData_);
return 0;
} }
return XML_Parse(parser, 0, 0, 1) != XML_STATUS_ERROR && psm_->finished();
} }
} // namespace xml
} // namespace aria2 } // namespace aria2

View File

@ -2,7 +2,7 @@
/* /*
* aria2 - The high speed download utility * aria2 - The high speed download utility
* *
* Copyright (C) 2011 Tatsuhiro Tsujikawa * Copyright (C) 2012 Tatsuhiro Tsujikawa
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -37,28 +37,34 @@
#include "common.h" #include "common.h"
#include <sys/types.h>
#include <cstdlib> #include <cstdlib>
#include <expat.h>
#include "XmlParser.h"
namespace aria2 { namespace aria2 {
class BinaryStream; namespace xml {
class ParserStateMachine;
class BufferedFile;
class XmlParser { class XmlParser {
public: public:
// This object does not delete psm. // This object does not delete psm.
XmlParser(ParserStateMachine* psm); XmlParser(ParserStateMachine* psm);
~XmlParser(); ~XmlParser();
bool parseFile(const char* filename); ssize_t parseUpdate(const char* data, size_t size);
bool parseBinaryStream(BinaryStream* binaryStream); ssize_t parseFinal(const char* data, size_t size);
bool parseMemory(const char* xml, size_t size); int reset();
private: private:
bool parseFile(BufferedFile& fp);
ParserStateMachine* psm_; ParserStateMachine* psm_;
SessionData sessionData_;
XML_Parser ctx_;
int lastError_;
}; };
} // namespace xml
} // namespace aria2 } // namespace aria2
#endif // D_EXPAT_XML_PARSER_H #endif // D_EXPAT_XML_PARSER_H

View File

@ -253,7 +253,7 @@ endif # !ENABLE_WEBSOCKET
if HAVE_SOME_XMLLIB if HAVE_SOME_XMLLIB
SRCS += XmlAttr.cc XmlAttr.h\ SRCS += XmlAttr.cc XmlAttr.h\
XmlParser.h\ XmlParser.cc XmlParser.h\
ParserStateMachine.h ParserStateMachine.h
endif # HAVE_SOME_XMLLIB endif # HAVE_SOME_XMLLIB

View File

@ -32,7 +32,7 @@
* files in the program, then also delete it here. * files in the program, then also delete it here.
*/ */
/* copyright --> */ /* copyright --> */
#include "XmlParser.h" #include "Xml2XmlParser.h"
#include <cassert> #include <cassert>
#include <cstring> #include <cstring>
@ -42,7 +42,6 @@
#include "A2STR.h" #include "A2STR.h"
#include "a2functional.h" #include "a2functional.h"
#include "XmlAttr.h" #include "XmlAttr.h"
#include "util.h"
namespace aria2 { namespace aria2 {
@ -207,36 +206,6 @@ int XmlParser::reset()
} }
} }
bool parseFile(const std::string& filename, ParserStateMachine* psm)
{
int fd;
if(filename == DEV_STDIN) {
fd = STDIN_FILENO;
} else {
while((fd = a2open(utf8ToWChar(filename).c_str(),
O_BINARY | O_RDONLY, OPEN_MODE)) == -1 && fd != EINTR);
if(fd == -1) {
return false;
}
}
XmlParser ps(psm);
char buf[4096];
ssize_t nread;
bool retval = true;
while((nread = read(fd, buf, sizeof(buf))) > 0) {
if(ps.parseUpdate(buf, nread) < 0) {
retval = false;
break;
}
}
if(nread == 0 && retval) {
if(ps.parseFinal(0, 0) < 0) {
retval = false;
}
}
return retval;
}
} // namespace xml } // namespace xml
} // namespace aria2 } // namespace aria2

View File

@ -39,35 +39,14 @@
#include <sys/types.h> #include <sys/types.h>
#include <cstdlib>
#include <string>
#include <deque>
#include <libxml/parser.h> #include <libxml/parser.h>
#include "XmlParser.h"
namespace aria2 { namespace aria2 {
class ParserStateMachine;
namespace xml { namespace xml {
enum XmlError {
ERR_XML_PARSE = -1,
ERR_RESET = -2
};
struct SessionData {
std::deque<std::string> charactersStack;
ParserStateMachine* psm;
SessionData(ParserStateMachine* psm)
: psm(psm)
{}
void reset()
{
charactersStack.clear();
}
};
class XmlParser { class XmlParser {
public: public:
// This object does not delete psm. // This object does not delete psm.
@ -83,8 +62,6 @@ private:
int lastError_; int lastError_;
}; };
bool parseFile(const std::string& filename, ParserStateMachine* psm);
} // namespace xml } // namespace xml
} // namespace aria2 } // namespace aria2

75
src/XmlParser.cc Normal file
View File

@ -0,0 +1,75 @@
/* <!-- copyright */
/*
* aria2 - The high speed download utility
*
* Copyright (C) 2012 Tatsuhiro Tsujikawa
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* In addition, as a special exception, the copyright holders give
* permission to link the code of portions of this program with the
* OpenSSL library under certain conditions as described in each
* individual source file, and distribute linked combinations
* including the two.
* You must obey the GNU General Public License in all respects
* for all of the code used other than OpenSSL. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you
* do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source
* files in the program, then also delete it here.
*/
/* copyright --> */
#include "XmlParser.h"
#include "a2io.h"
#include "util.h"
namespace aria2 {
namespace xml {
bool parseFile(const std::string& filename, ParserStateMachine* psm)
{
int fd;
if(filename == DEV_STDIN) {
fd = STDIN_FILENO;
} else {
while((fd = a2open(utf8ToWChar(filename).c_str(),
O_BINARY | O_RDONLY, OPEN_MODE)) == -1 && fd != EINTR);
if(fd == -1) {
return false;
}
}
XmlParser ps(psm);
char buf[4096];
ssize_t nread;
bool retval = true;
while((nread = read(fd, buf, sizeof(buf))) > 0) {
if(ps.parseUpdate(buf, nread) < 0) {
retval = false;
break;
}
}
if(nread == 0 && retval) {
if(ps.parseFinal(0, 0) < 0) {
retval = false;
}
}
return retval;
}
} // namespace xml
} // namespace aria2

View File

@ -37,10 +37,51 @@
#include "common.h" #include "common.h"
#include <cstdlib>
#include <string>
#include <deque>
namespace aria2 {
class ParserStateMachine;
namespace xml {
enum XmlError {
ERR_XML_PARSE = -1,
ERR_RESET = -2
};
struct SessionData {
std::deque<std::string> charactersStack;
ParserStateMachine* psm;
SessionData(ParserStateMachine* psm)
: psm(psm)
{}
void reset()
{
charactersStack.clear();
}
};
} // namespace xml
} // namespace aria2
#ifdef HAVE_LIBXML2 #ifdef HAVE_LIBXML2
# include "Xml2XmlParser.h" # include "Xml2XmlParser.h"
#elif HAVE_LIBEXPAT #elif HAVE_LIBEXPAT
# include "ExpatXmlParser.h" # include "ExpatXmlParser.h"
#endif #endif
namespace aria2 {
namespace xml {
bool parseFile(const std::string& filename, ParserStateMachine* psm);
} // namespace xml
} // namespace aria2
#endif // D_XML_PARSER_H #endif // D_XML_PARSER_H