From acd2af82d07e346ba782f4ec7bb8711c49056c3f Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Thu, 29 Aug 2013 01:09:15 +0900 Subject: [PATCH] util::htmlEscape: Optimize a bit The cause of slowness of the first implementation is the memory allocation overhead and appending character by character. In this implementation, the output buffer is reserved the same size of input. This is reasonable because most likely no replacement happens in practice. And the unmodified region is copied using iterator range to speed up a bit. --- src/util.cc | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/src/util.cc b/src/util.cc index 89cd82b3..e5ad3688 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1542,32 +1542,31 @@ getNumericNameInfo(const struct sockaddr* sockaddr, socklen_t len) std::string htmlEscape(const std::string& src) { - std::string rv(src); - std::string::size_type pos = 0; - while ((pos = rv.find_first_of("<>&\"'", pos)) != std::string::npos) { - auto ch = rv[pos]; - if (ch == '<') { - rv.replace(pos, 1, "<"); - pos += 4; - } - else if (ch == '>') { - rv.replace(pos, 1, ">"); - pos += 4; - } - else if (ch == '&') { - rv.replace(pos, 1, "&"); - pos += 5; - } - else if (ch == '"') { - rv.replace(pos, 1, """); - pos += 6; - } - else { // '\'' - rv.replace(pos, 1, "'"); - pos += 5; + std::string dest; + dest.reserve(src.size()); + auto j = std::begin(src); + for(auto i = std::begin(src); i != std::end(src); ++i) { + char ch = *i; + const char *repl; + if(ch == '<') { + repl = "<"; + } else if(ch == '>') { + repl = ">"; + } else if(ch == '&') { + repl = "&"; + } else if(ch == '\'') { + repl = "'"; + } else if(ch == '"') { + repl = """; + } else { + continue; } + dest.append(j, i); + j = i + 1; + dest += repl; } - return rv; + dest.append(j, std::end(src)); + return dest; } std::pair