mirror of https://github.com/aria2/aria2
Rewritten Xml2XmlParser
Now it is push parser + utility function for file parsing.pull/25/merge
parent
cd67e27ca4
commit
70685bd233
|
@ -2,7 +2,7 @@
|
||||||
/*
|
/*
|
||||||
* aria2 - The high speed download utility
|
* aria2 - The high speed download utility
|
||||||
*
|
*
|
||||||
* Copyright (C) 2011 Tatsuhiro Tsujikawa
|
* Copyright (C) 2012 Tatsuhiro Tsujikawa
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
@ -36,28 +36,17 @@
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <deque>
|
|
||||||
|
|
||||||
#include <libxml/parser.h>
|
|
||||||
|
|
||||||
#include "a2io.h"
|
#include "a2io.h"
|
||||||
#include "BinaryStream.h"
|
|
||||||
#include "ParserStateMachine.h"
|
#include "ParserStateMachine.h"
|
||||||
#include "A2STR.h"
|
#include "A2STR.h"
|
||||||
#include "a2functional.h"
|
#include "a2functional.h"
|
||||||
#include "XmlAttr.h"
|
#include "XmlAttr.h"
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
namespace aria2 {
|
namespace aria2 {
|
||||||
|
|
||||||
namespace {
|
namespace xml {
|
||||||
struct SessionData {
|
|
||||||
std::deque<std::string> charactersStack_;
|
|
||||||
ParserStateMachine* psm_;
|
|
||||||
SessionData(ParserStateMachine* psm)
|
|
||||||
: psm_(psm)
|
|
||||||
{}
|
|
||||||
};
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
void mlStartElement
|
void mlStartElement
|
||||||
|
@ -88,13 +77,13 @@ void mlStartElement
|
||||||
xmlAttr.valueLength = pattrs[i+4]-xmlAttr.value;
|
xmlAttr.valueLength = pattrs[i+4]-xmlAttr.value;
|
||||||
xmlAttrs.push_back(xmlAttr);
|
xmlAttrs.push_back(xmlAttr);
|
||||||
}
|
}
|
||||||
sd->psm_->beginElement
|
sd->psm->beginElement
|
||||||
(reinterpret_cast<const char*>(localname),
|
(reinterpret_cast<const char*>(localname),
|
||||||
reinterpret_cast<const char*>(prefix),
|
reinterpret_cast<const char*>(prefix),
|
||||||
reinterpret_cast<const char*>(nsUri),
|
reinterpret_cast<const char*>(nsUri),
|
||||||
xmlAttrs);
|
xmlAttrs);
|
||||||
if(sd->psm_->needsCharactersBuffering()) {
|
if(sd->psm->needsCharactersBuffering()) {
|
||||||
sd->charactersStack_.push_front(A2STR::NIL);
|
sd->charactersStack.push_front(A2STR::NIL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -108,11 +97,11 @@ void mlEndElement
|
||||||
{
|
{
|
||||||
SessionData* sd = reinterpret_cast<SessionData*>(userData);
|
SessionData* sd = reinterpret_cast<SessionData*>(userData);
|
||||||
std::string characters;
|
std::string characters;
|
||||||
if(sd->psm_->needsCharactersBuffering()) {
|
if(sd->psm->needsCharactersBuffering()) {
|
||||||
characters = sd->charactersStack_.front();
|
characters = sd->charactersStack.front();
|
||||||
sd->charactersStack_.pop_front();
|
sd->charactersStack.pop_front();
|
||||||
}
|
}
|
||||||
sd->psm_->endElement
|
sd->psm->endElement
|
||||||
(reinterpret_cast<const char*>(localname),
|
(reinterpret_cast<const char*>(localname),
|
||||||
reinterpret_cast<const char*>(prefix),
|
reinterpret_cast<const char*>(prefix),
|
||||||
reinterpret_cast<const char*>(nsUri),
|
reinterpret_cast<const char*>(nsUri),
|
||||||
|
@ -124,8 +113,8 @@ namespace {
|
||||||
void mlCharacters(void* userData, const xmlChar* ch, int len)
|
void mlCharacters(void* userData, const xmlChar* ch, int len)
|
||||||
{
|
{
|
||||||
SessionData* sd = reinterpret_cast<SessionData*>(userData);
|
SessionData* sd = reinterpret_cast<SessionData*>(userData);
|
||||||
if(sd->psm_->needsCharactersBuffering()) {
|
if(sd->psm->needsCharactersBuffering()) {
|
||||||
sd->charactersStack_.front().append(&ch[0], &ch[len]);
|
sd->charactersStack.front().append(&ch[0], &ch[len]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -169,61 +158,85 @@ xmlSAXHandler mySAXHandler =
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
XmlParser::XmlParser(ParserStateMachine* psm)
|
XmlParser::XmlParser(ParserStateMachine* psm)
|
||||||
: psm_(psm)
|
: psm_(psm),
|
||||||
|
sessionData_(psm),
|
||||||
|
ctx_(xmlCreatePushParserCtxt(&mySAXHandler, &sessionData_, 0, 0, 0)),
|
||||||
|
lastError_(0)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
XmlParser::~XmlParser() {}
|
XmlParser::~XmlParser()
|
||||||
|
|
||||||
bool XmlParser::parseFile(const char* filename)
|
|
||||||
{
|
{
|
||||||
SessionData sessionData(psm_);
|
xmlFreeParserCtxt(ctx_);
|
||||||
// Old libxml2(at least 2.7.6, Ubuntu 10.04LTS) does not read stdin
|
|
||||||
// when "/dev/stdin" is passed as filename while 2.7.7 does. So we
|
|
||||||
// convert DEV_STDIN to "-" for compatibility.
|
|
||||||
const char* nfilename;
|
|
||||||
if(strcmp(filename, DEV_STDIN) == 0) {
|
|
||||||
nfilename = "-";
|
|
||||||
} else {
|
|
||||||
nfilename = filename;
|
|
||||||
}
|
|
||||||
int r = xmlSAXUserParseFile(&mySAXHandler, &sessionData, nfilename);
|
|
||||||
return r == 0 && psm_->finished();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool XmlParser::parseBinaryStream(BinaryStream* bs)
|
ssize_t XmlParser::parseUpdate(const char* data, size_t size)
|
||||||
{
|
{
|
||||||
const size_t bufSize = 4096;
|
if(lastError_ != 0) {
|
||||||
unsigned char buf[bufSize];
|
return lastError_;
|
||||||
ssize_t res = bs->readData(buf, 4, 0);
|
|
||||||
if(res != 4) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
SessionData sessionData(psm_);
|
int rv = xmlParseChunk(ctx_, data, size, 0);
|
||||||
xmlParserCtxtPtr ctx = xmlCreatePushParserCtxt
|
if(rv != 0) {
|
||||||
(&mySAXHandler, &sessionData,
|
return lastError_ = ERR_XML_PARSE;
|
||||||
reinterpret_cast<const char*>(buf), res, 0);
|
} else {
|
||||||
auto_delete<xmlParserCtxtPtr> deleter(ctx, xmlFreeParserCtxt);
|
return size;
|
||||||
off_t readOffset = res;
|
}
|
||||||
while(1) {
|
}
|
||||||
ssize_t res = bs->readData(buf, bufSize, readOffset);
|
|
||||||
if(res == 0) {
|
ssize_t XmlParser::parseFinal(const char* data, size_t size)
|
||||||
break;
|
{
|
||||||
}
|
if(lastError_ != 0) {
|
||||||
if(xmlParseChunk(ctx, reinterpret_cast<const char*>(buf), res, 0) != 0) {
|
return lastError_;
|
||||||
// TODO we need this? Just break is not suffice?
|
}
|
||||||
|
int rv = xmlParseChunk(ctx_, data, size, 1);
|
||||||
|
if(rv != 0) {
|
||||||
|
return lastError_ = ERR_XML_PARSE;
|
||||||
|
} else {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int XmlParser::reset()
|
||||||
|
{
|
||||||
|
// TODO psm must be reset
|
||||||
|
sessionData_.reset();
|
||||||
|
int rv = xmlCtxtResetPush(ctx_, 0, 0, 0, 0);
|
||||||
|
if(rv != 0) {
|
||||||
|
return lastError_ = ERR_RESET;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool parseFile(const std::string& filename, ParserStateMachine* psm)
|
||||||
|
{
|
||||||
|
int fd;
|
||||||
|
if(filename == DEV_STDIN) {
|
||||||
|
fd = STDIN_FILENO;
|
||||||
|
} else {
|
||||||
|
while((fd = a2open(utf8ToWChar(filename).c_str(),
|
||||||
|
O_BINARY | O_RDONLY, OPEN_MODE)) == -1 && fd != EINTR);
|
||||||
|
if(fd == -1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
readOffset += res;
|
|
||||||
}
|
}
|
||||||
xmlParseChunk(ctx, reinterpret_cast<const char*>(buf), 0, 1);
|
XmlParser ps(psm);
|
||||||
return psm_->finished();
|
char buf[4096];
|
||||||
|
ssize_t nread;
|
||||||
|
bool retval = true;
|
||||||
|
while((nread = read(fd, buf, sizeof(buf))) > 0) {
|
||||||
|
if(ps.parseUpdate(buf, nread) < 0) {
|
||||||
|
retval = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(nread == 0 && retval) {
|
||||||
|
if(ps.parseFinal(0, 0) < 0) {
|
||||||
|
retval = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool XmlParser::parseMemory(const char* xml, size_t len)
|
} // namespace xml
|
||||||
{
|
|
||||||
SessionData sessionData(psm_);
|
|
||||||
int r = xmlSAXUserParseMemory(&mySAXHandler, &sessionData, xml, len);
|
|
||||||
return r == 0 && psm_->finished();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace aria2
|
} // namespace aria2
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
/*
|
/*
|
||||||
* aria2 - The high speed download utility
|
* aria2 - The high speed download utility
|
||||||
*
|
*
|
||||||
* Copyright (C) 2011 Tatsuhiro Tsujikawa
|
* Copyright (C) 2012 Tatsuhiro Tsujikawa
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
@ -37,25 +37,56 @@
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <string>
|
||||||
|
#include <deque>
|
||||||
|
|
||||||
|
#include <libxml/parser.h>
|
||||||
|
|
||||||
namespace aria2 {
|
namespace aria2 {
|
||||||
|
|
||||||
class BinaryStream;
|
|
||||||
class ParserStateMachine;
|
class ParserStateMachine;
|
||||||
|
|
||||||
|
namespace xml {
|
||||||
|
|
||||||
|
enum XmlError {
|
||||||
|
ERR_XML_PARSE = -1,
|
||||||
|
ERR_RESET = -2
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SessionData {
|
||||||
|
std::deque<std::string> charactersStack;
|
||||||
|
ParserStateMachine* psm;
|
||||||
|
SessionData(ParserStateMachine* psm)
|
||||||
|
: psm(psm)
|
||||||
|
{}
|
||||||
|
void reset()
|
||||||
|
{
|
||||||
|
charactersStack.clear();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class XmlParser {
|
class XmlParser {
|
||||||
public:
|
public:
|
||||||
// This object does not delete psm.
|
// This object does not delete psm.
|
||||||
XmlParser(ParserStateMachine* psm);
|
XmlParser(ParserStateMachine* psm);
|
||||||
~XmlParser();
|
~XmlParser();
|
||||||
bool parseFile(const char* filename);
|
ssize_t parseUpdate(const char* data, size_t size);
|
||||||
bool parseBinaryStream(BinaryStream* binaryStream);
|
ssize_t parseFinal(const char* data, size_t size);
|
||||||
bool parseMemory(const char* xml, size_t size);
|
int reset();
|
||||||
private:
|
private:
|
||||||
ParserStateMachine* psm_;
|
ParserStateMachine* psm_;
|
||||||
|
SessionData sessionData_;
|
||||||
|
xmlParserCtxtPtr ctx_;
|
||||||
|
int lastError_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bool parseFile(const std::string& filename, ParserStateMachine* psm);
|
||||||
|
|
||||||
|
} // namespace xml
|
||||||
|
|
||||||
} // namespace aria2
|
} // namespace aria2
|
||||||
|
|
||||||
#endif // D_XML2_XML_PARSER_H
|
#endif // D_XML2_XML_PARSER_H
|
||||||
|
|
|
@ -125,7 +125,7 @@ SharedHandle<Metalinker> parseFile
|
||||||
{
|
{
|
||||||
MetalinkParserStateMachine psm;
|
MetalinkParserStateMachine psm;
|
||||||
psm.setBaseUri(baseUri);
|
psm.setBaseUri(baseUri);
|
||||||
if(!XmlParser(&psm).parseFile(filename.c_str())) {
|
if(!xml::parseFile(filename, &psm)) {
|
||||||
throw DL_ABORT_EX2("Could not parse Metalink XML document.",
|
throw DL_ABORT_EX2("Could not parse Metalink XML document.",
|
||||||
error_code::METALINK_PARSE_ERROR);
|
error_code::METALINK_PARSE_ERROR);
|
||||||
}
|
}
|
||||||
|
@ -142,7 +142,24 @@ SharedHandle<Metalinker> parseBinaryStream
|
||||||
{
|
{
|
||||||
MetalinkParserStateMachine psm;
|
MetalinkParserStateMachine psm;
|
||||||
psm.setBaseUri(baseUri);
|
psm.setBaseUri(baseUri);
|
||||||
if(!XmlParser(&psm).parseBinaryStream(bs)) {
|
xml::XmlParser ps(&psm);
|
||||||
|
unsigned char buf[4096];
|
||||||
|
ssize_t nread;
|
||||||
|
off_t offread = 0;
|
||||||
|
bool retval = true;
|
||||||
|
while((nread = bs->readData(buf, sizeof(buf), offread)) > 0) {
|
||||||
|
if(ps.parseUpdate(reinterpret_cast<const char*>(buf), nread) < 0) {
|
||||||
|
retval = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
offread += nread;
|
||||||
|
}
|
||||||
|
if(nread == 0 && retval) {
|
||||||
|
if(ps.parseFinal(0, 0) < 0) {
|
||||||
|
retval = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(!retval) {
|
||||||
throw DL_ABORT_EX2("Could not parse Metalink XML document.",
|
throw DL_ABORT_EX2("Could not parse Metalink XML document.",
|
||||||
error_code::METALINK_PARSE_ERROR);
|
error_code::METALINK_PARSE_ERROR);
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,7 +53,7 @@ namespace rpc {
|
||||||
RpcRequest xmlParseMemory(const char* xml, size_t size)
|
RpcRequest xmlParseMemory(const char* xml, size_t size)
|
||||||
{
|
{
|
||||||
XmlRpcRequestParserStateMachine psm;
|
XmlRpcRequestParserStateMachine psm;
|
||||||
if(!XmlParser(&psm).parseMemory(xml, size)) {
|
if(xml::XmlParser(&psm).parseFinal(xml, size) < 0) {
|
||||||
throw DL_ABORT_EX(MSG_CANNOT_PARSE_XML_RPC_REQUEST);
|
throw DL_ABORT_EX(MSG_CANNOT_PARSE_XML_RPC_REQUEST);
|
||||||
}
|
}
|
||||||
SharedHandle<List> params;
|
SharedHandle<List> params;
|
||||||
|
|
Loading…
Reference in New Issue