From d09947d22dbb9e4acbda9e6ce6ba0c20cc87023f Mon Sep 17 00:00:00 2001 From: milipili Date: Sat, 30 May 2015 19:47:44 +0200 Subject: [PATCH] Scintilla: Buffer: fixed invalid read via strlen when loading a file When loading a file via `FileManager::loadFileData`, a fixed-length buffer is filled via `fread`. Then, in some cases, a conversion is done with the help of `Utf8_16_Read`. However, the method `Utf8_16_Read::convert` performs a call to `strlen` on this buffer. This is obviously wrong: `\0` char should be accepted (even if a bit strange) and the buffer is not zero-terminated. The changes merely consist in adding an additional parameter `length` to not have to guess the size of the buffer. --- PowerEditor/src/ScitillaComponent/Buffer.cpp | 18 ++++++++-------- PowerEditor/src/ScitillaComponent/Buffer.h | 5 ++++- PowerEditor/src/Utf8_16.cpp | 22 ++++++++++---------- PowerEditor/src/Utf8_16.h | 8 +++++-- 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/PowerEditor/src/ScitillaComponent/Buffer.cpp b/PowerEditor/src/ScitillaComponent/Buffer.cpp index 875bf973a..3c1dc9d85 100644 --- a/PowerEditor/src/ScitillaComponent/Buffer.cpp +++ b/PowerEditor/src/ScitillaComponent/Buffer.cpp @@ -508,11 +508,10 @@ BufferID FileManager::loadFile(const TCHAR * filename, Document doc, int encodin if (encoding == -1) { // 3 formats : WIN_FORMAT, UNIX_FORMAT and MAC_FORMAT - if (UnicodeConvertor.getNewBuf()) + if (nullptr != UnicodeConvertor.getNewBuf()) { - int format = getEOLFormatForm(UnicodeConvertor.getNewBuf()); + int format = getEOLFormatForm(UnicodeConvertor.getNewBuf(), UnicodeConvertor.getNewSize()); buf->setFormat(format == -1?WIN_FORMAT:(formatType)format); - } else { @@ -569,7 +568,7 @@ bool FileManager::reloadBuffer(BufferID id) { if (UnicodeConvertor.getNewBuf()) { - int format = getEOLFormatForm(UnicodeConvertor.getNewBuf()); + int format = getEOLFormatForm(UnicodeConvertor.getNewBuf(), UnicodeConvertor.getNewSize()); buf->setFormat(format == -1?WIN_FORMAT:(formatType)format); } else @@ -1241,7 +1240,7 @@ bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Rea } if (format == -1) - format = getEOLFormatForm(data); + format = getEOLFormatForm(data, lenFile); } else { @@ -1323,14 +1322,15 @@ int FileManager::docLength(Buffer * buffer) const return docLen; } -int FileManager::getEOLFormatForm(const char *data) const +int FileManager::getEOLFormatForm(const char* const data, size_t length) const { - size_t len = strlen(data); - for (size_t i = 0 ; i < len ; i++) + assert(data != nullptr && "invalid buffer for getEOLFormatForm()"); + + for (size_t i = 0; i != length; ++i) { if (data[i] == CR) { - if (i+1 < len && data[i+1] == LF) + if (i+1 < length && data[i+1] == LF) { return int(WIN_FORMAT); } diff --git a/PowerEditor/src/ScitillaComponent/Buffer.h b/PowerEditor/src/ScitillaComponent/Buffer.h index 51c30cba4..a96c8b7e8 100644 --- a/PowerEditor/src/ScitillaComponent/Buffer.h +++ b/PowerEditor/src/ScitillaComponent/Buffer.h @@ -104,7 +104,7 @@ public: void destroyInstance() { delete _pSelf; }; int getFileNameFromBuffer(BufferID id, TCHAR * fn2copy); int docLength(Buffer * buffer) const; - int getEOLFormatForm(const char *data) const; + int getEOLFormatForm(const char* const data, size_t length) const; size_t nextUntitledNewNumber() const; private: @@ -384,6 +384,9 @@ private : if (_canNotify) _pManager->beNotifiedOfBufferChange(this, mask); }; + + Buffer(const Buffer&) { assert(false); } + Buffer& operator = (const Buffer&) { assert(false); return *this; } }; #endif //BUFFER_H diff --git a/PowerEditor/src/Utf8_16.cpp b/PowerEditor/src/Utf8_16.cpp index b120c35cf..729427ab2 100644 --- a/PowerEditor/src/Utf8_16.cpp +++ b/PowerEditor/src/Utf8_16.cpp @@ -31,7 +31,8 @@ const Utf8_16::utf8 Utf8_16::k_Boms[][3] = { Utf8_16_Read::Utf8_16_Read() { m_eEncoding = uni8Bit; - m_nBufSize = 0; + m_nAllocatedBufSize = 0; + m_nNewBufSize = 0; m_pNewBuf = NULL; m_bFirstRead = true; } @@ -113,10 +114,9 @@ size_t Utf8_16_Read::convert(char* buf, size_t len) // bugfix by Jens Lorenz static size_t nSkip = 0; - size_t ret = 0; - m_pBuf = (ubyte*)buf; m_nLen = len; + m_nNewBufSize = 0; if (m_bFirstRead == true) { @@ -131,16 +131,16 @@ size_t Utf8_16_Read::convert(char* buf, size_t len) case uni8Bit: case uniCookie: { // Do nothing, pass through - m_nBufSize = 0; + m_nAllocatedBufSize = 0; m_pNewBuf = m_pBuf; - ret = len; + m_nNewBufSize = len; break; } case uniUTF8: { // Pass through after BOM - m_nBufSize = 0; + m_nAllocatedBufSize = 0; m_pNewBuf = m_pBuf + nSkip; - ret = len - nSkip; + m_nNewBufSize = len - nSkip; break; } case uni16BE_NoBOM: @@ -149,13 +149,13 @@ size_t Utf8_16_Read::convert(char* buf, size_t len) case uni16LE: { size_t newSize = len + len / 2 + 1; - if (m_nBufSize != newSize) + if (m_nAllocatedBufSize != newSize) { if (m_pNewBuf) delete [] m_pNewBuf; m_pNewBuf = NULL; m_pNewBuf = new ubyte[newSize]; - m_nBufSize = newSize; + m_nAllocatedBufSize = newSize; } ubyte* pCur = m_pNewBuf; @@ -166,7 +166,7 @@ size_t Utf8_16_Read::convert(char* buf, size_t len) { *pCur++ = m_Iter16.get(); } - ret = pCur - m_pNewBuf; + m_nNewBufSize = pCur - m_pNewBuf; break; } default: @@ -176,7 +176,7 @@ size_t Utf8_16_Read::convert(char* buf, size_t len) // necessary for second calls and more nSkip = 0; - return ret; + return m_nNewBufSize; } diff --git a/PowerEditor/src/Utf8_16.h b/PowerEditor/src/Utf8_16.h index c47d964bd..e9aa102ac 100644 --- a/PowerEditor/src/Utf8_16.h +++ b/PowerEditor/src/Utf8_16.h @@ -112,7 +112,8 @@ public: ~Utf8_16_Read(); size_t convert(char* buf, size_t len); - char* getNewBuf() { return reinterpret_cast(m_pNewBuf); } + const char* getNewBuf() const { return (const char*) m_pNewBuf; } + size_t getNewSize() const { return m_nNewBufSize; } UniMode getEncoding() const { return m_eEncoding; } size_t calcCurPos(size_t pos); @@ -126,7 +127,10 @@ private: UniMode m_eEncoding; ubyte* m_pBuf; ubyte* m_pNewBuf; - size_t m_nBufSize; + // size of the new buffer + size_t m_nNewBufSize; + // size of the previously allocated buffer (if != 0) + size_t m_nAllocatedBufSize; size_t m_nSkip; bool m_bFirstRead; size_t m_nLen;