mirror of https://github.com/aria2/aria2
Rewritten Xml2XmlParser
Now it is push parser + utility function for file parsing.pull/25/merge
parent
cd67e27ca4
commit
70685bd233
|
@ -2,7 +2,7 @@
|
|||
/*
|
||||
* aria2 - The high speed download utility
|
||||
*
|
||||
* Copyright (C) 2011 Tatsuhiro Tsujikawa
|
||||
* Copyright (C) 2012 Tatsuhiro Tsujikawa
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -36,28 +36,17 @@
|
|||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <deque>
|
||||
|
||||
#include <libxml/parser.h>
|
||||
|
||||
#include "a2io.h"
|
||||
#include "BinaryStream.h"
|
||||
#include "ParserStateMachine.h"
|
||||
#include "A2STR.h"
|
||||
#include "a2functional.h"
|
||||
#include "XmlAttr.h"
|
||||
#include "util.h"
|
||||
|
||||
namespace aria2 {
|
||||
|
||||
namespace {
|
||||
struct SessionData {
|
||||
std::deque<std::string> charactersStack_;
|
||||
ParserStateMachine* psm_;
|
||||
SessionData(ParserStateMachine* psm)
|
||||
: psm_(psm)
|
||||
{}
|
||||
};
|
||||
} // namespace
|
||||
namespace xml {
|
||||
|
||||
namespace {
|
||||
void mlStartElement
|
||||
|
@ -88,13 +77,13 @@ void mlStartElement
|
|||
xmlAttr.valueLength = pattrs[i+4]-xmlAttr.value;
|
||||
xmlAttrs.push_back(xmlAttr);
|
||||
}
|
||||
sd->psm_->beginElement
|
||||
sd->psm->beginElement
|
||||
(reinterpret_cast<const char*>(localname),
|
||||
reinterpret_cast<const char*>(prefix),
|
||||
reinterpret_cast<const char*>(nsUri),
|
||||
xmlAttrs);
|
||||
if(sd->psm_->needsCharactersBuffering()) {
|
||||
sd->charactersStack_.push_front(A2STR::NIL);
|
||||
if(sd->psm->needsCharactersBuffering()) {
|
||||
sd->charactersStack.push_front(A2STR::NIL);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
@ -108,11 +97,11 @@ void mlEndElement
|
|||
{
|
||||
SessionData* sd = reinterpret_cast<SessionData*>(userData);
|
||||
std::string characters;
|
||||
if(sd->psm_->needsCharactersBuffering()) {
|
||||
characters = sd->charactersStack_.front();
|
||||
sd->charactersStack_.pop_front();
|
||||
if(sd->psm->needsCharactersBuffering()) {
|
||||
characters = sd->charactersStack.front();
|
||||
sd->charactersStack.pop_front();
|
||||
}
|
||||
sd->psm_->endElement
|
||||
sd->psm->endElement
|
||||
(reinterpret_cast<const char*>(localname),
|
||||
reinterpret_cast<const char*>(prefix),
|
||||
reinterpret_cast<const char*>(nsUri),
|
||||
|
@ -124,8 +113,8 @@ namespace {
|
|||
void mlCharacters(void* userData, const xmlChar* ch, int len)
|
||||
{
|
||||
SessionData* sd = reinterpret_cast<SessionData*>(userData);
|
||||
if(sd->psm_->needsCharactersBuffering()) {
|
||||
sd->charactersStack_.front().append(&ch[0], &ch[len]);
|
||||
if(sd->psm->needsCharactersBuffering()) {
|
||||
sd->charactersStack.front().append(&ch[0], &ch[len]);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
@ -169,61 +158,85 @@ xmlSAXHandler mySAXHandler =
|
|||
} // namespace
|
||||
|
||||
XmlParser::XmlParser(ParserStateMachine* psm)
|
||||
: psm_(psm)
|
||||
: psm_(psm),
|
||||
sessionData_(psm),
|
||||
ctx_(xmlCreatePushParserCtxt(&mySAXHandler, &sessionData_, 0, 0, 0)),
|
||||
lastError_(0)
|
||||
{}
|
||||
|
||||
XmlParser::~XmlParser() {}
|
||||
|
||||
bool XmlParser::parseFile(const char* filename)
|
||||
XmlParser::~XmlParser()
|
||||
{
|
||||
SessionData sessionData(psm_);
|
||||
// Old libxml2(at least 2.7.6, Ubuntu 10.04LTS) does not read stdin
|
||||
// when "/dev/stdin" is passed as filename while 2.7.7 does. So we
|
||||
// convert DEV_STDIN to "-" for compatibility.
|
||||
const char* nfilename;
|
||||
if(strcmp(filename, DEV_STDIN) == 0) {
|
||||
nfilename = "-";
|
||||
} else {
|
||||
nfilename = filename;
|
||||
}
|
||||
int r = xmlSAXUserParseFile(&mySAXHandler, &sessionData, nfilename);
|
||||
return r == 0 && psm_->finished();
|
||||
xmlFreeParserCtxt(ctx_);
|
||||
}
|
||||
|
||||
bool XmlParser::parseBinaryStream(BinaryStream* bs)
|
||||
ssize_t XmlParser::parseUpdate(const char* data, size_t size)
|
||||
{
|
||||
const size_t bufSize = 4096;
|
||||
unsigned char buf[bufSize];
|
||||
ssize_t res = bs->readData(buf, 4, 0);
|
||||
if(res != 4) {
|
||||
return false;
|
||||
if(lastError_ != 0) {
|
||||
return lastError_;
|
||||
}
|
||||
SessionData sessionData(psm_);
|
||||
xmlParserCtxtPtr ctx = xmlCreatePushParserCtxt
|
||||
(&mySAXHandler, &sessionData,
|
||||
reinterpret_cast<const char*>(buf), res, 0);
|
||||
auto_delete<xmlParserCtxtPtr> deleter(ctx, xmlFreeParserCtxt);
|
||||
off_t readOffset = res;
|
||||
while(1) {
|
||||
ssize_t res = bs->readData(buf, bufSize, readOffset);
|
||||
if(res == 0) {
|
||||
break;
|
||||
}
|
||||
if(xmlParseChunk(ctx, reinterpret_cast<const char*>(buf), res, 0) != 0) {
|
||||
// TODO we need this? Just break is not suffice?
|
||||
int rv = xmlParseChunk(ctx_, data, size, 0);
|
||||
if(rv != 0) {
|
||||
return lastError_ = ERR_XML_PARSE;
|
||||
} else {
|
||||
return size;
|
||||
}
|
||||
}
|
||||
|
||||
ssize_t XmlParser::parseFinal(const char* data, size_t size)
|
||||
{
|
||||
if(lastError_ != 0) {
|
||||
return lastError_;
|
||||
}
|
||||
int rv = xmlParseChunk(ctx_, data, size, 1);
|
||||
if(rv != 0) {
|
||||
return lastError_ = ERR_XML_PARSE;
|
||||
} else {
|
||||
return size;
|
||||
}
|
||||
}
|
||||
|
||||
int XmlParser::reset()
|
||||
{
|
||||
// TODO psm must be reset
|
||||
sessionData_.reset();
|
||||
int rv = xmlCtxtResetPush(ctx_, 0, 0, 0, 0);
|
||||
if(rv != 0) {
|
||||
return lastError_ = ERR_RESET;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool parseFile(const std::string& filename, ParserStateMachine* psm)
|
||||
{
|
||||
int fd;
|
||||
if(filename == DEV_STDIN) {
|
||||
fd = STDIN_FILENO;
|
||||
} else {
|
||||
while((fd = a2open(utf8ToWChar(filename).c_str(),
|
||||
O_BINARY | O_RDONLY, OPEN_MODE)) == -1 && fd != EINTR);
|
||||
if(fd == -1) {
|
||||
return false;
|
||||
}
|
||||
readOffset += res;
|
||||
}
|
||||
xmlParseChunk(ctx, reinterpret_cast<const char*>(buf), 0, 1);
|
||||
return psm_->finished();
|
||||
XmlParser ps(psm);
|
||||
char buf[4096];
|
||||
ssize_t nread;
|
||||
bool retval = true;
|
||||
while((nread = read(fd, buf, sizeof(buf))) > 0) {
|
||||
if(ps.parseUpdate(buf, nread) < 0) {
|
||||
retval = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(nread == 0 && retval) {
|
||||
if(ps.parseFinal(0, 0) < 0) {
|
||||
retval = false;
|
||||
}
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
bool XmlParser::parseMemory(const char* xml, size_t len)
|
||||
{
|
||||
SessionData sessionData(psm_);
|
||||
int r = xmlSAXUserParseMemory(&mySAXHandler, &sessionData, xml, len);
|
||||
return r == 0 && psm_->finished();
|
||||
}
|
||||
} // namespace xml
|
||||
|
||||
} // namespace aria2
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
/*
|
||||
* aria2 - The high speed download utility
|
||||
*
|
||||
* Copyright (C) 2011 Tatsuhiro Tsujikawa
|
||||
* Copyright (C) 2012 Tatsuhiro Tsujikawa
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -37,25 +37,56 @@
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <deque>
|
||||
|
||||
#include <libxml/parser.h>
|
||||
|
||||
namespace aria2 {
|
||||
|
||||
class BinaryStream;
|
||||
class ParserStateMachine;
|
||||
|
||||
namespace xml {
|
||||
|
||||
enum XmlError {
|
||||
ERR_XML_PARSE = -1,
|
||||
ERR_RESET = -2
|
||||
};
|
||||
|
||||
struct SessionData {
|
||||
std::deque<std::string> charactersStack;
|
||||
ParserStateMachine* psm;
|
||||
SessionData(ParserStateMachine* psm)
|
||||
: psm(psm)
|
||||
{}
|
||||
void reset()
|
||||
{
|
||||
charactersStack.clear();
|
||||
}
|
||||
};
|
||||
|
||||
class XmlParser {
|
||||
public:
|
||||
// This object does not delete psm.
|
||||
XmlParser(ParserStateMachine* psm);
|
||||
~XmlParser();
|
||||
bool parseFile(const char* filename);
|
||||
bool parseBinaryStream(BinaryStream* binaryStream);
|
||||
bool parseMemory(const char* xml, size_t size);
|
||||
ssize_t parseUpdate(const char* data, size_t size);
|
||||
ssize_t parseFinal(const char* data, size_t size);
|
||||
int reset();
|
||||
private:
|
||||
ParserStateMachine* psm_;
|
||||
SessionData sessionData_;
|
||||
xmlParserCtxtPtr ctx_;
|
||||
int lastError_;
|
||||
};
|
||||
|
||||
bool parseFile(const std::string& filename, ParserStateMachine* psm);
|
||||
|
||||
} // namespace xml
|
||||
|
||||
} // namespace aria2
|
||||
|
||||
#endif // D_XML2_XML_PARSER_H
|
||||
|
|
|
@ -125,7 +125,7 @@ SharedHandle<Metalinker> parseFile
|
|||
{
|
||||
MetalinkParserStateMachine psm;
|
||||
psm.setBaseUri(baseUri);
|
||||
if(!XmlParser(&psm).parseFile(filename.c_str())) {
|
||||
if(!xml::parseFile(filename, &psm)) {
|
||||
throw DL_ABORT_EX2("Could not parse Metalink XML document.",
|
||||
error_code::METALINK_PARSE_ERROR);
|
||||
}
|
||||
|
@ -142,7 +142,24 @@ SharedHandle<Metalinker> parseBinaryStream
|
|||
{
|
||||
MetalinkParserStateMachine psm;
|
||||
psm.setBaseUri(baseUri);
|
||||
if(!XmlParser(&psm).parseBinaryStream(bs)) {
|
||||
xml::XmlParser ps(&psm);
|
||||
unsigned char buf[4096];
|
||||
ssize_t nread;
|
||||
off_t offread = 0;
|
||||
bool retval = true;
|
||||
while((nread = bs->readData(buf, sizeof(buf), offread)) > 0) {
|
||||
if(ps.parseUpdate(reinterpret_cast<const char*>(buf), nread) < 0) {
|
||||
retval = false;
|
||||
break;
|
||||
}
|
||||
offread += nread;
|
||||
}
|
||||
if(nread == 0 && retval) {
|
||||
if(ps.parseFinal(0, 0) < 0) {
|
||||
retval = false;
|
||||
}
|
||||
}
|
||||
if(!retval) {
|
||||
throw DL_ABORT_EX2("Could not parse Metalink XML document.",
|
||||
error_code::METALINK_PARSE_ERROR);
|
||||
}
|
||||
|
|
|
@ -53,7 +53,7 @@ namespace rpc {
|
|||
RpcRequest xmlParseMemory(const char* xml, size_t size)
|
||||
{
|
||||
XmlRpcRequestParserStateMachine psm;
|
||||
if(!XmlParser(&psm).parseMemory(xml, size)) {
|
||||
if(xml::XmlParser(&psm).parseFinal(xml, size) < 0) {
|
||||
throw DL_ABORT_EX(MSG_CANNOT_PARSE_XML_RPC_REQUEST);
|
||||
}
|
||||
SharedHandle<List> params;
|
||||
|
|
Loading…
Reference in New Issue