/* */ #include "XmlParser.h" #include #include #include #include #include "a2io.h" #include "BinaryStream.h" #include "ParserStateMachine.h" #include "A2STR.h" #include "a2functional.h" #include "XmlAttr.h" namespace aria2 { namespace { struct SessionData { std::deque charactersStack_; ParserStateMachine* psm_; SessionData(ParserStateMachine* psm) : psm_(psm) {} }; } // namespace namespace { void mlStartElement (void* userData, const xmlChar* localname, const xmlChar* prefix, const xmlChar* nsUri, int numNamespaces, const xmlChar** namespaces, int numAttrs, int numDefaulted, const xmlChar** attrs) { SessionData* sd = reinterpret_cast(userData); std::vector xmlAttrs; const char** pattrs = reinterpret_cast(attrs); for(size_t i = 0, max = numAttrs*5; i < max; i += 5) { XmlAttr xmlAttr; assert(pattrs[i]); xmlAttr.localname = pattrs[i]; if(pattrs[i+1]) { xmlAttr.prefix = pattrs[i+1]; } if(attrs[i+2]) { xmlAttr.nsUri = pattrs[i+2]; } xmlAttr.value = pattrs[i+3]; xmlAttr.valueLength = pattrs[i+4]-xmlAttr.value; xmlAttrs.push_back(xmlAttr); } sd->psm_->beginElement (reinterpret_cast(localname), reinterpret_cast(prefix), reinterpret_cast(nsUri), xmlAttrs); if(sd->psm_->needsCharactersBuffering()) { sd->charactersStack_.push_front(A2STR::NIL); } } } // namespace namespace { void mlEndElement (void* userData, const xmlChar* localname, const xmlChar* prefix, const xmlChar* nsUri) { SessionData* sd = reinterpret_cast(userData); std::string characters; if(sd->psm_->needsCharactersBuffering()) { characters = sd->charactersStack_.front(); sd->charactersStack_.pop_front(); } sd->psm_->endElement (reinterpret_cast(localname), reinterpret_cast(prefix), reinterpret_cast(nsUri), characters); } } // namespace namespace { void mlCharacters(void* userData, const xmlChar* ch, int len) { SessionData* sd = reinterpret_cast(userData); if(sd->psm_->needsCharactersBuffering()) { sd->charactersStack_.front().append(&ch[0], &ch[len]); } } } // namespace namespace { xmlSAXHandler mySAXHandler = { 0, // internalSubsetSAXFunc 0, // isStandaloneSAXFunc 0, // hasInternalSubsetSAXFunc 0, // hasExternalSubsetSAXFunc 0, // resolveEntitySAXFunc 0, // getEntitySAXFunc 0, // entityDeclSAXFunc 0, // notationDeclSAXFunc 0, // attributeDeclSAXFunc 0, // elementDeclSAXFunc 0, // unparsedEntityDeclSAXFunc 0, // setDocumentLocatorSAXFunc 0, // startDocumentSAXFunc 0, // endDocumentSAXFunc 0, // startElementSAXFunc 0, // endElementSAXFunc 0, // referenceSAXFunc &mlCharacters, // charactersSAXFunc 0, // ignorableWhitespaceSAXFunc 0, // processingInstructionSAXFunc 0, // commentSAXFunc 0, // warningSAXFunc 0, // errorSAXFunc 0, // fatalErrorSAXFunc 0, // getParameterEntitySAXFunc 0, // cdataBlockSAXFunc 0, // externalSubsetSAXFunc XML_SAX2_MAGIC, // unsigned int initialized 0, // void * _private &mlStartElement, // startElementNsSAX2Func &mlEndElement, // endElementNsSAX2Func 0, // xmlStructuredErrorFunc }; } // namespace XmlParser::XmlParser(ParserStateMachine* psm) : psm_(psm) {} XmlParser::~XmlParser() {} bool XmlParser::parseFile(const char* filename) { SessionData sessionData(psm_); // Old libxml2(at least 2.7.6, Ubuntu 10.04LTS) does not read stdin // when "/dev/stdin" is passed as filename while 2.7.7 does. So we // convert DEV_STDIN to "-" for compatibility. const char* nfilename; if(strcmp(filename, DEV_STDIN) == 0) { nfilename = "-"; } else { nfilename = filename; } int r = xmlSAXUserParseFile(&mySAXHandler, &sessionData, nfilename); return r == 0 && psm_->finished(); } bool XmlParser::parseBinaryStream(BinaryStream* bs) { const size_t bufSize = 4096; unsigned char buf[bufSize]; ssize_t res = bs->readData(buf, 4, 0); if(res != 4) { return false; } SessionData sessionData(psm_); xmlParserCtxtPtr ctx = xmlCreatePushParserCtxt (&mySAXHandler, &sessionData, reinterpret_cast(buf), res, 0); auto_delete deleter(ctx, xmlFreeParserCtxt); off_t readOffset = res; while(1) { ssize_t res = bs->readData(buf, bufSize, readOffset); if(res == 0) { break; } if(xmlParseChunk(ctx, reinterpret_cast(buf), res, 0) != 0) { // TODO we need this? Just break is not suffice? return false; } readOffset += res; } xmlParseChunk(ctx, reinterpret_cast(buf), 0, 1); return psm_->finished(); } bool XmlParser::parseMemory(const char* xml, size_t len) { SessionData sessionData(psm_); int r = xmlSAXUserParseMemory(&mySAXHandler, &sessionData, xml, len); return r == 0 && psm_->finished(); } } // namespace aria2