// Scintilla source code edit control /** @file Document.cxx ** Text document that handles notifications, DBCS, styling, words and end of line. **/ // Copyright 1998-2011 by Neil Hodgson // The License.txt file describes the conditions under which this software may be distributed. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef NO_CXX11_REGEX #include #endif #include "ScintillaTypes.h" #include "ILoader.h" #include "ILexer.h" #include "Debugging.h" #include "CharacterType.h" #include "CharacterCategoryMap.h" #include "Position.h" #include "SplitVector.h" #include "Partitioning.h" #include "RunStyles.h" #include "CellBuffer.h" #include "PerLine.h" #include "CharClassify.h" #include "Decoration.h" #include "CaseFolder.h" #include "Document.h" #include "RESearch.h" #include "UniConversion.h" #include "ElapsedPeriod.h" using namespace Scintilla; using namespace Scintilla::Internal; LexInterface::LexInterface(Document *pdoc_) noexcept : pdoc(pdoc_), performingStyle(false) { } LexInterface::~LexInterface() noexcept = default; void LexInterface::SetInstance(ILexer5 *instance_) { instance.reset(instance_); pdoc->LexerChanged(); } void LexInterface::Colourise(Sci::Position start, Sci::Position end) { if (pdoc && instance && !performingStyle) { // Protect against reentrance, which may occur, for example, when // fold points are discovered while performing styling and the folding // code looks for child lines which may trigger styling. performingStyle = true; const Sci::Position lengthDoc = pdoc->Length(); if (end == -1) end = lengthDoc; const Sci::Position len = end - start; PLATFORM_ASSERT(len >= 0); PLATFORM_ASSERT(start + len <= lengthDoc); int styleStart = 0; if (start > 0) styleStart = pdoc->StyleAt(start - 1); if (len > 0) { instance->Lex(start, len, styleStart, pdoc); instance->Fold(start, len, styleStart, pdoc); } performingStyle = false; } } LineEndType LexInterface::LineEndTypesSupported() { if (instance) { return static_cast(instance->LineEndTypesSupported()); } return LineEndType::Default; } bool LexInterface::UseContainerLexing() const noexcept { return !instance; } ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept : duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) { } void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept { // Only adjust for multiple actions to avoid instability if (numberActions < 8) return; // Alpha value for exponential smoothing. // Most recent value contributes 25% to smoothed value. constexpr double alpha = 0.25; const double durationOne = durationOfActions / numberActions; duration = std::clamp(alpha * durationOne + (1.0 - alpha) * duration, minDuration, maxDuration); } double ActionDuration::Duration() const noexcept { return duration; } size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed) const noexcept { return std::lround(secondsAllowed / Duration()); } Document::Document(DocumentOption options) : cb(!FlagSet(options, DocumentOption::StylesNone), FlagSet(options, DocumentOption::TextLarge)), durationStyleOneByte(0.000001, 0.0000001, 0.00001) { refCount = 0; #ifdef _WIN32 eolMode = EndOfLine::CrLf; #else eolMode = EndOfLine::Lf; #endif dbcsCodePage = CpUtf8; lineEndBitSet = LineEndType::Default; endStyled = 0; styleClock = 0; enteredModification = 0; enteredStyling = 0; enteredReadOnlyCount = 0; insertionSet = false; tabInChars = 8; indentInChars = 0; actualIndentInChars = 8; useTabs = true; tabIndents = true; backspaceUnindents = false; matchesValid = false; perLineData[ldMarkers] = std::make_unique(); perLineData[ldLevels] = std::make_unique(); perLineData[ldState] = std::make_unique(); perLineData[ldMargin] = std::make_unique(); perLineData[ldAnnotation] = std::make_unique(); perLineData[ldEOLAnnotation] = std::make_unique(); decorations = DecorationListCreate(IsLarge()); cb.SetPerLine(this); cb.SetUTF8Substance(CpUtf8 == dbcsCodePage); } Document::~Document() { for (const WatcherWithUserData &watcher : watchers) { watcher.watcher->NotifyDeleted(this, watcher.userData); } } // Increase reference count and return its previous value. int Document::AddRef() { return refCount++; } // Decrease reference count and return its previous value. // Delete the document if reference count reaches zero. int SCI_METHOD Document::Release() { const int curRefCount = --refCount; if (curRefCount == 0) delete this; return curRefCount; } void Document::Init() { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->Init(); } } void Document::InsertLine(Sci::Line line) { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->InsertLine(line); } } void Document::InsertLines(Sci::Line line, Sci::Line lines) { for (const auto &pl : perLineData) { if (pl) pl->InsertLines(line, lines); } } void Document::RemoveLine(Sci::Line line) { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->RemoveLine(line); } } LineMarkers *Document::Markers() const noexcept { return dynamic_cast(perLineData[ldMarkers].get()); } LineLevels *Document::Levels() const noexcept { return dynamic_cast(perLineData[ldLevels].get()); } LineState *Document::States() const noexcept { return dynamic_cast(perLineData[ldState].get()); } LineAnnotation *Document::Margins() const noexcept { return dynamic_cast(perLineData[ldMargin].get()); } LineAnnotation *Document::Annotations() const noexcept { return dynamic_cast(perLineData[ldAnnotation].get()); } LineAnnotation *Document::EOLAnnotations() const noexcept { return dynamic_cast(perLineData[ldEOLAnnotation].get()); } LineEndType Document::LineEndTypesSupported() const { if ((CpUtf8 == dbcsCodePage) && pli) return pli->LineEndTypesSupported(); else return LineEndType::Default; } bool Document::SetDBCSCodePage(int dbcsCodePage_) { if (dbcsCodePage != dbcsCodePage_) { dbcsCodePage = dbcsCodePage_; SetCaseFolder(nullptr); cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported()); cb.SetUTF8Substance(CpUtf8 == dbcsCodePage); ModifiedAt(0); // Need to restyle whole document return true; } else { return false; } } bool Document::SetLineEndTypesAllowed(LineEndType lineEndBitSet_) { if (lineEndBitSet != lineEndBitSet_) { lineEndBitSet = lineEndBitSet_; const LineEndType lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported(); if (lineEndBitSetActive != cb.GetLineEndTypes()) { ModifiedAt(0); cb.SetLineEndTypes(lineEndBitSetActive); return true; } else { return false; } } else { return false; } } void Document::SetSavePoint() { cb.SetSavePoint(); NotifySavePoint(true); } void Document::TentativeUndo() { if (!TentativeActive()) return; CheckReadOnly(); if (enteredModification == 0) { enteredModification++; if (!cb.IsReadOnly()) { const bool startSavePoint = cb.IsSavePoint(); bool multiLine = false; const int steps = cb.TentativeSteps(); //Platform::DebugPrintf("Steps=%d\n", steps); for (int step = 0; step < steps; step++) { const Sci::Line prevLinesTotal = LinesTotal(); const Action &action = cb.GetUndoStep(); if (action.at == ActionType::remove) { NotifyModified(DocModification( ModificationFlags::BeforeInsert | ModificationFlags::Undo, action)); } else if (action.at == ActionType::container) { DocModification dm(ModificationFlags::Container | ModificationFlags::Undo); dm.token = action.position; NotifyModified(dm); } else { NotifyModified(DocModification( ModificationFlags::BeforeDelete | ModificationFlags::Undo, action)); } cb.PerformUndoStep(); if (action.at != ActionType::container) { ModifiedAt(action.position); } ModificationFlags modFlags = ModificationFlags::Undo; // With undo, an insertion action becomes a deletion notification if (action.at == ActionType::remove) { modFlags |= ModificationFlags::InsertText; } else if (action.at == ActionType::insert) { modFlags |= ModificationFlags::DeleteText; } if (steps > 1) modFlags |= ModificationFlags::MultiStepUndoRedo; const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; if (linesAdded != 0) multiLine = true; if (step == steps - 1) { modFlags |= ModificationFlags::LastStepInUndoRedo; if (multiLine) modFlags |= ModificationFlags::MultilineUndoRedo; } NotifyModified(DocModification(modFlags, action.position, action.lenData, linesAdded, action.data.get())); } const bool endSavePoint = cb.IsSavePoint(); if (startSavePoint != endSavePoint) NotifySavePoint(endSavePoint); cb.TentativeCommit(); } enteredModification--; } } int Document::GetMark(Sci::Line line) const noexcept { return Markers()->MarkValue(line); } Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const noexcept { return Markers()->MarkerNext(lineStart, mask); } int Document::AddMark(Sci::Line line, int markerNum) { if (line >= 0 && line <= LinesTotal()) { const int prev = Markers()->AddMark(line, markerNum, LinesTotal()); const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); return prev; } else { return -1; } } void Document::AddMarkSet(Sci::Line line, int valueSet) { if (line < 0 || line > LinesTotal()) { return; } unsigned int m = valueSet; for (int i = 0; m; i++, m >>= 1) { if (m & 1) Markers()->AddMark(line, i, LinesTotal()); } const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); } void Document::DeleteMark(Sci::Line line, int markerNum) { Markers()->DeleteMark(line, markerNum, false); const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); } void Document::DeleteMarkFromHandle(int markerHandle) { Markers()->DeleteMarkFromHandle(markerHandle); DocModification mh(ModificationFlags::ChangeMarker); mh.line = -1; NotifyModified(mh); } void Document::DeleteAllMarks(int markerNum) { bool someChanges = false; for (Sci::Line line = 0; line < LinesTotal(); line++) { if (Markers()->DeleteMark(line, markerNum, true)) someChanges = true; } if (someChanges) { DocModification mh(ModificationFlags::ChangeMarker); mh.line = -1; NotifyModified(mh); } } Sci::Line Document::LineFromHandle(int markerHandle) const noexcept { return Markers()->LineFromHandle(markerHandle); } int Document::MarkerNumberFromLine(Sci::Line line, int which) const noexcept { return Markers()->NumberFromLine(line, which); } int Document::MarkerHandleFromLine(Sci::Line line, int which) const noexcept { return Markers()->HandleFromLine(line, which); } Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const { return cb.LineStart(line); } bool Document::IsLineStartPosition(Sci::Position position) const { return LineStart(LineFromPosition(position)) == position; } Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const { if (line >= LinesTotal() - 1) { return LineStart(line + 1); } else { Sci::Position position = LineStart(line + 1); if (LineEndType::Unicode == cb.GetLineEndTypes()) { const unsigned char bytes[] = { cb.UCharAt(position-3), cb.UCharAt(position-2), cb.UCharAt(position-1), }; if (UTF8IsSeparator(bytes)) { return position - UTF8SeparatorLength; } if (UTF8IsNEL(bytes+1)) { return position - UTF8NELLength; } } position--; // Back over CR or LF // When line terminator is CR+LF, may need to go back one more if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) { position--; } return position; } } void SCI_METHOD Document::SetErrorStatus(int status) { // Tell the watchers an error has occurred. for (const WatcherWithUserData &watcher : watchers) { watcher.watcher->NotifyErrorOccurred(this, watcher.userData, static_cast(status)); } } Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const { return cb.LineFromPosition(pos); } Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept { // Avoids casting in callers for this very common function return cb.LineFromPosition(pos); } Sci::Position Document::LineEndPosition(Sci::Position position) const { return LineEnd(LineFromPosition(position)); } bool Document::IsLineEndPosition(Sci::Position position) const { return LineEnd(LineFromPosition(position)) == position; } bool Document::IsPositionInLineEnd(Sci::Position position) const { return position >= LineEnd(LineFromPosition(position)); } Sci::Position Document::VCHomePosition(Sci::Position position) const { const Sci::Line line = SciLineFromPosition(position); const Sci::Position startPosition = LineStart(line); const Sci::Position endLine = LineEnd(line); Sci::Position startText = startPosition; while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t')) startText++; if (position == startText) return startPosition; else return startText; } Sci::Position Document::IndexLineStart(Sci::Line line, LineCharacterIndexType lineCharacterIndex) const noexcept { return cb.IndexLineStart(line, lineCharacterIndex); } Sci::Line Document::LineFromPositionIndex(Sci::Position pos, LineCharacterIndexType lineCharacterIndex) const noexcept { return cb.LineFromPositionIndex(pos, lineCharacterIndex); } Sci::Line Document::LineFromPositionAfter(Sci::Line line, Sci::Position length) const noexcept { const Sci::Position posAfter = cb.LineStart(line) + length; if (posAfter >= LengthNoExcept()) { return LinesTotal(); } const Sci::Line lineAfter = SciLineFromPosition(posAfter); if (lineAfter > line) { return lineAfter; } else { // Want to make some progress so return next line return lineAfter + 1; } } int SCI_METHOD Document::SetLevel(Sci_Position line, int level) { const int prev = Levels()->SetLevel(line, level, LinesTotal()); if (prev != level) { DocModification mh(ModificationFlags::ChangeFold | ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); mh.foldLevelNow = static_cast(level); mh.foldLevelPrev = static_cast(prev); NotifyModified(mh); } return prev; } int SCI_METHOD Document::GetLevel(Sci_Position line) const { return Levels()->GetLevel(line); } FoldLevel Document::GetFoldLevel(Sci_Position line) const { return static_cast(Levels()->GetLevel(line)); } void Document::ClearLevels() { Levels()->ClearLevels(); } static bool IsSubordinate(FoldLevel levelStart, FoldLevel levelTry) noexcept { if (LevelIsWhitespace(levelTry)) return true; else return LevelNumber(levelStart) < LevelNumber(levelTry); } Sci::Line Document::GetLastChild(Sci::Line lineParent, std::optional level, Sci::Line lastLine) { const FoldLevel levelStart = LevelNumberPart(level ? *level : GetFoldLevel(lineParent)); const Sci::Line maxLine = LinesTotal(); const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1; Sci::Line lineMaxSubord = lineParent; while (lineMaxSubord < maxLine - 1) { EnsureStyledTo(LineStart(lineMaxSubord + 2)); if (!IsSubordinate(levelStart, GetFoldLevel(lineMaxSubord + 1))) break; if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !LevelIsWhitespace(GetFoldLevel(lineMaxSubord))) break; lineMaxSubord++; } if (lineMaxSubord > lineParent) { if (levelStart > LevelNumberPart(GetFoldLevel(lineMaxSubord + 1))) { // Have chewed up some whitespace that belongs to a parent so seek back if (LevelIsWhitespace(GetFoldLevel(lineMaxSubord))) { lineMaxSubord--; } } } return lineMaxSubord; } Sci::Line Document::GetFoldParent(Sci::Line line) const { const FoldLevel level = LevelNumberPart(GetFoldLevel(line)); Sci::Line lineLook = line - 1; while ((lineLook > 0) && ( (!LevelIsHeader(GetFoldLevel(lineLook))) || (LevelNumberPart(GetFoldLevel(lineLook)) >= level)) ) { lineLook--; } if (LevelIsHeader(GetFoldLevel(lineLook)) && (LevelNumberPart(GetFoldLevel(lineLook)) < level)) { return lineLook; } else { return -1; } } void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) { const FoldLevel level = GetFoldLevel(line); const Sci::Line lookLastLine = std::max(line, lastLine) + 1; Sci::Line lookLine = line; FoldLevel lookLineLevel = level; FoldLevel lookLineLevelNum = LevelNumberPart(lookLineLevel); while ((lookLine > 0) && (LevelIsWhitespace(lookLineLevel) || (LevelIsHeader(lookLineLevel) && (lookLineLevelNum >= LevelNumberPart(GetFoldLevel(lookLine + 1)))))) { lookLineLevel = GetFoldLevel(--lookLine); lookLineLevelNum = LevelNumberPart(lookLineLevel); } Sci::Line beginFoldBlock = LevelIsHeader(lookLineLevel) ? lookLine : GetFoldParent(lookLine); if (beginFoldBlock == -1) { highlightDelimiter.Clear(); return; } Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, {}, lookLastLine); Sci::Line firstChangeableLineBefore = -1; if (endFoldBlock < line) { lookLine = beginFoldBlock - 1; lookLineLevel = GetFoldLevel(lookLine); lookLineLevelNum = LevelNumberPart(lookLineLevel); while ((lookLine >= 0) && (lookLineLevelNum >= FoldLevel::Base)) { if (LevelIsHeader(lookLineLevel)) { if (GetLastChild(lookLine, {}, lookLastLine) == line) { beginFoldBlock = lookLine; endFoldBlock = line; firstChangeableLineBefore = line - 1; } } if ((lookLine > 0) && (lookLineLevelNum == FoldLevel::Base) && (LevelNumberPart(GetFoldLevel(lookLine - 1)) > lookLineLevelNum)) break; lookLineLevel = GetFoldLevel(--lookLine); lookLineLevelNum = LevelNumberPart(lookLineLevel); } } if (firstChangeableLineBefore == -1) { for (lookLine = line - 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel); lookLine >= beginFoldBlock; lookLineLevel = GetFoldLevel(--lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) { if (LevelIsWhitespace(lookLineLevel) || (lookLineLevelNum > LevelNumberPart(level))) { firstChangeableLineBefore = lookLine; break; } } } if (firstChangeableLineBefore == -1) firstChangeableLineBefore = beginFoldBlock - 1; Sci::Line firstChangeableLineAfter = -1; for (lookLine = line + 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel); lookLine <= endFoldBlock; lookLineLevel = GetFoldLevel(++lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) { if (LevelIsHeader(lookLineLevel) && (lookLineLevelNum < LevelNumberPart(GetFoldLevel(lookLine + 1)))) { firstChangeableLineAfter = lookLine; break; } } if (firstChangeableLineAfter == -1) firstChangeableLineAfter = endFoldBlock + 1; highlightDelimiter.beginFoldBlock = beginFoldBlock; highlightDelimiter.endFoldBlock = endFoldBlock; highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore; highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter; } Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const noexcept { return std::clamp(pos, 0, LengthNoExcept()); } bool Document::IsCrLf(Sci::Position pos) const noexcept { if (pos < 0) return false; if (pos >= (LengthNoExcept() - 1)) return false; return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n'); } int Document::LenChar(Sci::Position pos) const noexcept { if (pos < 0 || pos >= LengthNoExcept()) { // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds. return 1; } else if (IsCrLf(pos)) { return 2; } const unsigned char leadByte = cb.UCharAt(pos); if (!dbcsCodePage || UTF8IsAscii(leadByte)) { // Common case: ASCII character return 1; } if (CpUtf8 == dbcsCodePage) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b = 1; b < widthCharBytes; b++) { charBytes[b] = cb.UCharAt(pos + b); } const int utf8status = UTF8Classify(charBytes, widthCharBytes); if (utf8status & UTF8MaskInvalid) { // Treat as invalid and use up just one byte return 1; } else { return utf8status & UTF8MaskWidth; } } else { if (IsDBCSLeadByteNoExcept(leadByte) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1))) { return 2; } else { return 1; } } } bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept { Sci::Position trail = pos; while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1))) trail--; start = (trail > 0) ? trail-1 : trail; const unsigned char leadByte = cb.UCharAt(start); const int widthCharBytes = UTF8BytesOfLead[leadByte]; if (widthCharBytes == 1) { return false; } else { const int trailBytes = widthCharBytes - 1; const Sci::Position len = pos - start; if (len > trailBytes) // pos too far from lead return false; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (Sci::Position b=1; b= LengthNoExcept()) return LengthNoExcept(); // PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept()); if (checkLineEnd && IsCrLf(pos - 1)) { if (moveDir > 0) return pos + 1; else return pos - 1; } if (dbcsCodePage) { if (CpUtf8 == dbcsCodePage) { const unsigned char ch = cb.UCharAt(pos); // If ch is not a trail byte then pos is valid intercharacter position if (UTF8IsTrailByte(ch)) { Sci::Position startUTF = pos; Sci::Position endUTF = pos; if (InGoodUTF8(pos, startUTF, endUTF)) { // ch is a trail byte within a UTF-8 character if (moveDir > 0) pos = endUTF; else pos = startUTF; } // Else invalid UTF-8 so return position of isolated trail byte } } else { // Anchor DBCS calculations at start of line because start of line can // not be a DBCS trail byte. const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); if (pos == posStartLine) return pos; // Step back until a non-lead-byte is found. Sci::Position posCheck = pos; while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1))) posCheck--; // Check from known start of character. while (posCheck < pos) { const int mbsize = IsDBCSDualByteAt(posCheck) ? 2 : 1; if (posCheck + mbsize == pos) { return pos; } else if (posCheck + mbsize > pos) { if (moveDir > 0) { return posCheck + mbsize; } else { return posCheck; } } posCheck += mbsize; } } } return pos; } // NextPosition moves between valid positions - it can not handle a position in the middle of a // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar. // A \r\n pair is treated as two characters. Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept { // If out of range, just return minimum/maximum value. const int increment = (moveDir > 0) ? 1 : -1; if (pos + increment <= 0) return 0; if (pos + increment >= cb.Length()) return cb.Length(); if (dbcsCodePage) { if (CpUtf8 == dbcsCodePage) { if (increment == 1) { // Simple forward movement case so can avoid some checks const unsigned char leadByte = cb.UCharAt(pos); if (UTF8IsAscii(leadByte)) { // Single byte character or invalid pos++; } else { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (int b=1; b 0) { const int mbsize = IsDBCSDualByteAt(pos) ? 2 : 1; pos += mbsize; if (pos > cb.Length()) pos = cb.Length(); } else { // Anchor DBCS calculations at start of line because start of line can // not be a DBCS trail byte. const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx // http://msdn.microsoft.com/en-us/library/cc194790.aspx if ((pos - 1) <= posStartLine) { return pos - 1; } else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) { // Should actually be trail byte if (IsDBCSDualByteAt(pos - 2)) { return pos - 2; } else { // Invalid byte pair so treat as one byte wide return pos - 1; } } else { // Otherwise, step back until a non-lead-byte is found. Sci::Position posTemp = pos - 1; while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp))) ; // Now posTemp+1 must point to the beginning of a character, // so figure out whether we went back an even or an odd // number of bytes and go back 1 or 2 bytes, respectively. const Sci::Position widthLast = ((pos - posTemp) & 1) + 1; if ((widthLast == 2) && (IsDBCSDualByteAt(pos - widthLast))) { return pos - widthLast; } // Byte before pos may be valid character or may be an invalid second byte return pos - 1; } } } } else { pos += increment; } return pos; } bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept { // Returns true if pos changed Sci::Position posNext = NextPosition(pos, moveDir); if (posNext == pos) { return false; } else { pos = posNext; return true; } } Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept { if (position >= LengthNoExcept()) { return CharacterExtracted(unicodeReplacementChar, 0); } const unsigned char leadByte = cb.UCharAt(position); if (!dbcsCodePage || UTF8IsAscii(leadByte)) { // Common case: ASCII character return CharacterExtracted(leadByte, 1); } if (CpUtf8 == dbcsCodePage) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b = 1; b 0) ? 1 : -1; while (characterOffset != 0) { const Sci::Position posNext = NextPosition(pos, increment); if (posNext == pos) return Sci::invalidPosition; pos = posNext; characterOffset -= increment; } } else { pos = positionStart + characterOffset; if ((pos < 0) || (pos > Length())) return Sci::invalidPosition; } return pos; } Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept { Sci::Position pos = positionStart; if (dbcsCodePage) { const int increment = (characterOffset > 0) ? 1 : -1; while (characterOffset != 0) { const Sci::Position posNext = NextPosition(pos, increment); if (posNext == pos) return Sci::invalidPosition; if (std::abs(pos-posNext) > 3) // 4 byte character = 2*UTF16. characterOffset -= increment; pos = posNext; characterOffset -= increment; } } else { pos = positionStart + characterOffset; if ((pos < 0) || (pos > LengthNoExcept())) return Sci::invalidPosition; } return pos; } int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const { int bytesInCharacter = 1; const unsigned char leadByte = cb.UCharAt(position); int character = leadByte; if (dbcsCodePage && !UTF8IsAscii(leadByte)) { if (CpUtf8 == dbcsCodePage) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (int b=1; b= 0x81) && (uch <= 0x9F)) || ((uch >= 0xE0) && (uch <= 0xFC)); // Lead bytes F0 to FC may be a Microsoft addition. case 936: // GBK return (uch >= 0x81) && (uch <= 0xFE); case 949: // Korean Wansung KS C-5601-1987 return (uch >= 0x81) && (uch <= 0xFE); case 950: // Big5 return (uch >= 0x81) && (uch <= 0xFE); case 1361: // Korean Johab KS C-5601-1992 return ((uch >= 0x84) && (uch <= 0xD3)) || ((uch >= 0xD8) && (uch <= 0xDE)) || ((uch >= 0xE0) && (uch <= 0xF9)); } return false; } bool Document::IsDBCSTrailByteNoExcept(char ch) const noexcept { const unsigned char trail = ch; switch (dbcsCodePage) { case 932: // Shift_jis return (trail != 0x7F) && ((trail >= 0x40) && (trail <= 0xFC)); case 936: // GBK return (trail != 0x7F) && ((trail >= 0x40) && (trail <= 0xFE)); case 949: // Korean Wansung KS C-5601-1987 return ((trail >= 0x41) && (trail <= 0x5A)) || ((trail >= 0x61) && (trail <= 0x7A)) || ((trail >= 0x81) && (trail <= 0xFE)); case 950: // Big5 return ((trail >= 0x40) && (trail <= 0x7E)) || ((trail >= 0xA1) && (trail <= 0xFE)); case 1361: // Korean Johab KS C-5601-1992 return ((trail >= 0x31) && (trail <= 0x7E)) || ((trail >= 0x81) && (trail <= 0xFE)); } return false; } int Document::DBCSDrawBytes(std::string_view text) const noexcept { if (text.length() <= 1) { return static_cast(text.length()); } if (IsDBCSLeadByteNoExcept(text[0])) { return IsDBCSTrailByteNoExcept(text[1]) ? 2 : 1; } else { return 1; } } bool Document::IsDBCSDualByteAt(Sci::Position pos) const noexcept { return IsDBCSLeadByteNoExcept(cb.CharAt(pos)) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1)); } // Need to break text into segments near end but taking into account the // encoding to not break inside a UTF-8 or DBCS character and also trying // to avoid breaking inside a pair of combining characters, or inside // ligatures. // TODO: implement grapheme cluster boundaries, // see https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries. // // The segment length must always be long enough (more than 4 bytes) // so that there will be at least one whole character to make a segment. // For UTF-8, text must consist only of valid whole characters. // In preference order from best to worst: // 1) Break before or after spaces or controls // 2) Break at word and punctuation boundary for better kerning and ligature support // 3) Break after whole character, this may break combining characters size_t Document::SafeSegment(std::string_view text) const noexcept { // check space first as most written language use spaces. for (std::string_view::iterator it = text.end() - 1; it != text.begin(); --it) { if (IsBreakSpace(*it)) { return it - text.begin(); } } if (!dbcsCodePage || dbcsCodePage == CpUtf8) { // backward iterate for UTF-8 and single byte encoding to find word and punctuation boundary. std::string_view::iterator it = text.end() - 1; const bool punctuation = IsPunctuation(*it); do { --it; if (punctuation != IsPunctuation(*it)) { return it - text.begin() + 1; } } while (it != text.begin()); it = text.end() - 1; if (dbcsCodePage) { // for UTF-8 go back to the start of last character. for (int trail = 0; trail < UTF8MaxBytes - 1 && UTF8IsTrailByte(*it); trail++) { --it; } } return it - text.begin(); } { // forward iterate for DBCS to find word and punctuation boundary. size_t lastPunctuationBreak = 0; size_t lastEncodingAllowedBreak = 0; CharacterClass ccPrev = CharacterClass::space; for (size_t j = 0; j < text.length();) { const unsigned char ch = text[j]; lastEncodingAllowedBreak = j++; CharacterClass cc = CharacterClass::word; if (UTF8IsAscii(ch)) { if (IsPunctuation(ch)) { cc = CharacterClass::punctuation; } } else { j += IsDBCSLeadByteNoExcept(ch); } if (cc != ccPrev) { ccPrev = cc; lastPunctuationBreak = lastEncodingAllowedBreak; } } return lastPunctuationBreak ? lastPunctuationBreak : lastEncodingAllowedBreak; } } EncodingFamily Document::CodePageFamily() const noexcept { if (CpUtf8 == dbcsCodePage) return EncodingFamily::unicode; else if (dbcsCodePage) return EncodingFamily::dbcs; else return EncodingFamily::eightBit; } void Document::ModifiedAt(Sci::Position pos) noexcept { if (endStyled > pos) endStyled = pos; } void Document::CheckReadOnly() { if (cb.IsReadOnly() && enteredReadOnlyCount == 0) { enteredReadOnlyCount++; NotifyModifyAttempt(); enteredReadOnlyCount--; } } // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt. // SetStyleAt does not change the persistent state of a document bool Document::DeleteChars(Sci::Position pos, Sci::Position len) { if (pos < 0) return false; if (len <= 0) return false; if ((pos + len) > LengthNoExcept()) return false; CheckReadOnly(); if (enteredModification != 0) { return false; } else { enteredModification++; if (!cb.IsReadOnly()) { NotifyModified( DocModification( ModificationFlags::BeforeDelete | ModificationFlags::User, pos, len, 0, nullptr)); const Sci::Line prevLinesTotal = LinesTotal(); const bool startSavePoint = cb.IsSavePoint(); bool startSequence = false; const char *text = cb.DeleteChars(pos, len, startSequence); if (startSavePoint && cb.IsCollectingUndo()) NotifySavePoint(false); if ((pos < LengthNoExcept()) || (pos == 0)) ModifiedAt(pos); else ModifiedAt(pos-1); NotifyModified( DocModification( ModificationFlags::DeleteText | ModificationFlags::User | (startSequence?ModificationFlags::StartAction:ModificationFlags::None), pos, len, LinesTotal() - prevLinesTotal, text)); } enteredModification--; } return !cb.IsReadOnly(); } /** * Insert a string with a length. */ Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) { if (insertLength <= 0) { return 0; } CheckReadOnly(); // Application may change read only state here if (cb.IsReadOnly()) { return 0; } if (enteredModification != 0) { return 0; } enteredModification++; insertionSet = false; insertion.clear(); NotifyModified( DocModification( ModificationFlags::InsertCheck, position, insertLength, 0, s)); if (insertionSet) { s = insertion.c_str(); insertLength = insertion.length(); } NotifyModified( DocModification( ModificationFlags::BeforeInsert | ModificationFlags::User, position, insertLength, 0, s)); const Sci::Line prevLinesTotal = LinesTotal(); const bool startSavePoint = cb.IsSavePoint(); bool startSequence = false; const char *text = cb.InsertString(position, s, insertLength, startSequence); if (startSavePoint && cb.IsCollectingUndo()) NotifySavePoint(false); ModifiedAt(position); NotifyModified( DocModification( ModificationFlags::InsertText | ModificationFlags::User | (startSequence?ModificationFlags::StartAction:ModificationFlags::None), position, insertLength, LinesTotal() - prevLinesTotal, text)); if (insertionSet) { // Free memory as could be large std::string().swap(insertion); } enteredModification--; return insertLength; } void Document::ChangeInsertion(const char *s, Sci::Position length) { insertionSet = true; insertion.assign(s, length); } int SCI_METHOD Document::AddData(const char *data, Sci_Position length) { try { const Sci::Position position = Length(); InsertString(position, data, length); } catch (std::bad_alloc &) { return static_cast(Status::BadAlloc); } catch (...) { return static_cast(Status::Failure); } return static_cast(Status::Ok); } void * SCI_METHOD Document::ConvertToDocument() { return this; } Sci::Position Document::Undo() { Sci::Position newPos = -1; CheckReadOnly(); if ((enteredModification == 0) && (cb.IsCollectingUndo())) { enteredModification++; if (!cb.IsReadOnly()) { const bool startSavePoint = cb.IsSavePoint(); bool multiLine = false; const int steps = cb.StartUndo(); //Platform::DebugPrintf("Steps=%d\n", steps); Sci::Position coalescedRemovePos = -1; Sci::Position coalescedRemoveLen = 0; Sci::Position prevRemoveActionPos = -1; Sci::Position prevRemoveActionLen = 0; for (int step = 0; step < steps; step++) { const Sci::Line prevLinesTotal = LinesTotal(); const Action &action = cb.GetUndoStep(); if (action.at == ActionType::remove) { NotifyModified(DocModification( ModificationFlags::BeforeInsert | ModificationFlags::Undo, action)); } else if (action.at == ActionType::container) { DocModification dm(ModificationFlags::Container | ModificationFlags::Undo); dm.token = action.position; NotifyModified(dm); if (!action.mayCoalesce) { coalescedRemovePos = -1; coalescedRemoveLen = 0; prevRemoveActionPos = -1; prevRemoveActionLen = 0; } } else { NotifyModified(DocModification( ModificationFlags::BeforeDelete | ModificationFlags::Undo, action)); } cb.PerformUndoStep(); if (action.at != ActionType::container) { ModifiedAt(action.position); newPos = action.position; } ModificationFlags modFlags = ModificationFlags::Undo; // With undo, an insertion action becomes a deletion notification if (action.at == ActionType::remove) { newPos += action.lenData; modFlags |= ModificationFlags::InsertText; if ((coalescedRemoveLen > 0) && (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) { coalescedRemoveLen += action.lenData; newPos = coalescedRemovePos + coalescedRemoveLen; } else { coalescedRemovePos = action.position; coalescedRemoveLen = action.lenData; } prevRemoveActionPos = action.position; prevRemoveActionLen = action.lenData; } else if (action.at == ActionType::insert) { modFlags |= ModificationFlags::DeleteText; coalescedRemovePos = -1; coalescedRemoveLen = 0; prevRemoveActionPos = -1; prevRemoveActionLen = 0; } if (steps > 1) modFlags |= ModificationFlags::MultiStepUndoRedo; const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; if (linesAdded != 0) multiLine = true; if (step == steps - 1) { modFlags |= ModificationFlags::LastStepInUndoRedo; if (multiLine) modFlags |= ModificationFlags::MultilineUndoRedo; } NotifyModified(DocModification(modFlags, action.position, action.lenData, linesAdded, action.data.get())); } const bool endSavePoint = cb.IsSavePoint(); if (startSavePoint != endSavePoint) NotifySavePoint(endSavePoint); } enteredModification--; } return newPos; } Sci::Position Document::Redo() { Sci::Position newPos = -1; CheckReadOnly(); if ((enteredModification == 0) && (cb.IsCollectingUndo())) { enteredModification++; if (!cb.IsReadOnly()) { const bool startSavePoint = cb.IsSavePoint(); bool multiLine = false; const int steps = cb.StartRedo(); for (int step = 0; step < steps; step++) { const Sci::Line prevLinesTotal = LinesTotal(); const Action &action = cb.GetRedoStep(); if (action.at == ActionType::insert) { NotifyModified(DocModification( ModificationFlags::BeforeInsert | ModificationFlags::Redo, action)); } else if (action.at == ActionType::container) { DocModification dm(ModificationFlags::Container | ModificationFlags::Redo); dm.token = action.position; NotifyModified(dm); } else { NotifyModified(DocModification( ModificationFlags::BeforeDelete | ModificationFlags::Redo, action)); } cb.PerformRedoStep(); if (action.at != ActionType::container) { ModifiedAt(action.position); newPos = action.position; } ModificationFlags modFlags = ModificationFlags::Redo; if (action.at == ActionType::insert) { newPos += action.lenData; modFlags |= ModificationFlags::InsertText; } else if (action.at == ActionType::remove) { modFlags |= ModificationFlags::DeleteText; } if (steps > 1) modFlags |= ModificationFlags::MultiStepUndoRedo; const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; if (linesAdded != 0) multiLine = true; if (step == steps - 1) { modFlags |= ModificationFlags::LastStepInUndoRedo; if (multiLine) modFlags |= ModificationFlags::MultilineUndoRedo; } NotifyModified( DocModification(modFlags, action.position, action.lenData, linesAdded, action.data.get())); } const bool endSavePoint = cb.IsSavePoint(); if (startSavePoint != endSavePoint) NotifySavePoint(endSavePoint); } enteredModification--; } return newPos; } void Document::DelChar(Sci::Position pos) { DeleteChars(pos, LenChar(pos)); } void Document::DelCharBack(Sci::Position pos) { if (pos <= 0) { return; } else if (IsCrLf(pos - 2)) { DeleteChars(pos - 2, 2); } else if (dbcsCodePage) { const Sci::Position startChar = NextPosition(pos, -1); DeleteChars(startChar, pos - startChar); } else { DeleteChars(pos - 1, 1); } } static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept { return ((pos / tabSize) + 1) * tabSize; } static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces) { std::string indentation; if (!insertSpaces) { while (indent >= tabSize) { indentation += '\t'; indent -= tabSize; } } while (indent > 0) { indentation += ' '; indent--; } return indentation; } int SCI_METHOD Document::GetLineIndentation(Sci_Position line) { int indent = 0; if ((line >= 0) && (line < LinesTotal())) { const Sci::Position lineStart = LineStart(line); const Sci::Position length = Length(); for (Sci::Position i = lineStart; i < length; i++) { const char ch = cb.CharAt(i); if (ch == ' ') indent++; else if (ch == '\t') indent = static_cast(NextTab(indent, tabInChars)); else return indent; } } return indent; } Sci::Position Document::SetLineIndentation(Sci::Line line, Sci::Position indent) { const int indentOfLine = GetLineIndentation(line); if (indent < 0) indent = 0; if (indent != indentOfLine) { std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs); const Sci::Position thisLineStart = LineStart(line); const Sci::Position indentPos = GetLineIndentPosition(line); UndoGroup ug(this); DeleteChars(thisLineStart, indentPos - thisLineStart); return thisLineStart + InsertString(thisLineStart, linebuf.c_str(), linebuf.length()); } else { return GetLineIndentPosition(line); } } Sci::Position Document::GetLineIndentPosition(Sci::Line line) const { if (line < 0) return 0; Sci::Position pos = LineStart(line); const Sci::Position length = Length(); while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) { pos++; } return pos; } Sci::Position Document::GetColumn(Sci::Position pos) { Sci::Position column = 0; const Sci::Line line = SciLineFromPosition(pos); if ((line >= 0) && (line < LinesTotal())) { for (Sci::Position i = LineStart(line); i < pos;) { const char ch = cb.CharAt(i); if (ch == '\t') { column = NextTab(column, tabInChars); i++; } else if (ch == '\r') { return column; } else if (ch == '\n') { return column; } else if (i >= Length()) { return column; } else if (UTF8IsAscii(ch)) { column++; i++; } else { column++; i = NextPosition(i, 1); } } } return column; } Sci::Position Document::CountCharacters(Sci::Position startPos, Sci::Position endPos) const noexcept { startPos = MovePositionOutsideChar(startPos, 1, false); endPos = MovePositionOutsideChar(endPos, -1, false); Sci::Position count = 0; Sci::Position i = startPos; while (i < endPos) { count++; i = NextPosition(i, 1); } return count; } Sci::Position Document::CountUTF16(Sci::Position startPos, Sci::Position endPos) const noexcept { startPos = MovePositionOutsideChar(startPos, 1, false); endPos = MovePositionOutsideChar(endPos, -1, false); Sci::Position count = 0; Sci::Position i = startPos; while (i < endPos) { count++; const Sci::Position next = NextPosition(i, 1); if ((next - i) > 3) count++; i = next; } return count; } Sci::Position Document::FindColumn(Sci::Line line, Sci::Position column) { Sci::Position position = LineStart(line); if ((line >= 0) && (line < LinesTotal())) { Sci::Position columnCurrent = 0; while ((columnCurrent < column) && (position < Length())) { const char ch = cb.CharAt(position); if (ch == '\t') { columnCurrent = NextTab(columnCurrent, tabInChars); if (columnCurrent > column) return position; position++; } else if (ch == '\r') { return position; } else if (ch == '\n') { return position; } else { columnCurrent++; position = NextPosition(position, 1); } } } return position; } void Document::Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop) { // Dedent - suck white space off the front of the line to dedent by equivalent of a tab for (Sci::Line line = lineBottom; line >= lineTop; line--) { const Sci::Position indentOfLine = GetLineIndentation(line); if (forwards) { if (LineStart(line) < LineEnd(line)) { SetLineIndentation(line, indentOfLine + IndentSize()); } } else { SetLineIndentation(line, indentOfLine - IndentSize()); } } } // Convert line endings for a piece of text to a particular mode. // Stop at len or when a NUL is found. std::string Document::TransformLineEnds(const char *s, size_t len, EndOfLine eolModeWanted) { std::string dest; for (size_t i = 0; (i < len) && (s[i]); i++) { if (s[i] == '\n' || s[i] == '\r') { if (eolModeWanted == EndOfLine::Cr) { dest.push_back('\r'); } else if (eolModeWanted == EndOfLine::Lf) { dest.push_back('\n'); } else { // eolModeWanted == EndOfLine::CrLf dest.push_back('\r'); dest.push_back('\n'); } if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) { i++; } } else { dest.push_back(s[i]); } } return dest; } void Document::ConvertLineEnds(EndOfLine eolModeSet) { UndoGroup ug(this); for (Sci::Position pos = 0; pos < Length(); pos++) { if (cb.CharAt(pos) == '\r') { if (cb.CharAt(pos + 1) == '\n') { // CRLF if (eolModeSet == EndOfLine::Cr) { DeleteChars(pos + 1, 1); // Delete the LF } else if (eolModeSet == EndOfLine::Lf) { DeleteChars(pos, 1); // Delete the CR } else { pos++; } } else { // CR if (eolModeSet == EndOfLine::CrLf) { pos += InsertString(pos + 1, "\n", 1); // Insert LF } else if (eolModeSet == EndOfLine::Lf) { pos += InsertString(pos, "\n", 1); // Insert LF DeleteChars(pos, 1); // Delete CR pos--; } } } else if (cb.CharAt(pos) == '\n') { // LF if (eolModeSet == EndOfLine::CrLf) { pos += InsertString(pos, "\r", 1); // Insert CR } else if (eolModeSet == EndOfLine::Cr) { pos += InsertString(pos, "\r", 1); // Insert CR DeleteChars(pos, 1); // Delete LF pos--; } } } } DocumentOption Document::Options() const noexcept { return (IsLarge() ? DocumentOption::TextLarge : DocumentOption::Default) | (cb.HasStyles() ? DocumentOption::Default : DocumentOption::StylesNone); } bool Document::IsWhiteLine(Sci::Line line) const { Sci::Position currentChar = LineStart(line); const Sci::Position endLine = LineEnd(line); while (currentChar < endLine) { if (!IsSpaceOrTab(cb.CharAt(currentChar))) { return false; } ++currentChar; } return true; } Sci::Position Document::ParaUp(Sci::Position pos) const { Sci::Line line = SciLineFromPosition(pos); line--; while (line >= 0 && IsWhiteLine(line)) { // skip empty lines line--; } while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines line--; } line++; return LineStart(line); } Sci::Position Document::ParaDown(Sci::Position pos) const { Sci::Line line = SciLineFromPosition(pos); while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines line++; } while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines line++; } if (line < LinesTotal()) return LineStart(line); else // end of a document return LineEnd(line-1); } CharacterClass Document::WordCharacterClass(unsigned int ch) const { if (dbcsCodePage && (ch >= 0x80)) { if (CpUtf8 == dbcsCodePage) { // Use hard coded Unicode class const CharacterCategory cc = charMap.CategoryFor(ch); switch (cc) { // Separator, Line/Paragraph case ccZl: case ccZp: return CharacterClass::newLine; // Separator, Space case ccZs: // Other case ccCc: case ccCf: case ccCs: case ccCo: case ccCn: return CharacterClass::space; // Letter case ccLu: case ccLl: case ccLt: case ccLm: case ccLo: // Number case ccNd: case ccNl: case ccNo: // Mark - includes combining diacritics case ccMn: case ccMc: case ccMe: return CharacterClass::word; // Punctuation case ccPc: case ccPd: case ccPs: case ccPe: case ccPi: case ccPf: case ccPo: // Symbol case ccSm: case ccSc: case ccSk: case ccSo: return CharacterClass::punctuation; } } else { // Asian DBCS return CharacterClass::word; } } return charClass.GetClass(static_cast(ch)); } /** * Used by commands that want to select whole words. * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0. */ Sci::Position Document::ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters) const { CharacterClass ccStart = CharacterClass::word; if (delta < 0) { if (!onlyWordCharacters) { const CharacterExtracted ce = CharacterBefore(pos); ccStart = WordCharacterClass(ce.character); } while (pos > 0) { const CharacterExtracted ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos -= ce.widthBytes; } } else { if (!onlyWordCharacters && pos < LengthNoExcept()) { const CharacterExtracted ce = CharacterAfter(pos); ccStart = WordCharacterClass(ce.character); } while (pos < LengthNoExcept()) { const CharacterExtracted ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos += ce.widthBytes; } } return MovePositionOutsideChar(pos, delta, true); } /** * Find the start of the next word in either a forward (delta >= 0) or backwards direction * (delta < 0). * This is looking for a transition between character classes although there is also some * additional movement to transit white space. * Used by cursor movement by word commands. */ Sci::Position Document::NextWordStart(Sci::Position pos, int delta) const { if (delta < 0) { while (pos > 0) { const CharacterExtracted ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != CharacterClass::space) break; pos -= ce.widthBytes; } if (pos > 0) { CharacterExtracted ce = CharacterBefore(pos); const CharacterClass ccStart = WordCharacterClass(ce.character); while (pos > 0) { ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos -= ce.widthBytes; } } } else { CharacterExtracted ce = CharacterAfter(pos); const CharacterClass ccStart = WordCharacterClass(ce.character); while (pos < LengthNoExcept()) { ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos += ce.widthBytes; } while (pos < LengthNoExcept()) { ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != CharacterClass::space) break; pos += ce.widthBytes; } } return pos; } /** * Find the end of the next word in either a forward (delta >= 0) or backwards direction * (delta < 0). * This is looking for a transition between character classes although there is also some * additional movement to transit white space. * Used by cursor movement by word commands. */ Sci::Position Document::NextWordEnd(Sci::Position pos, int delta) const { if (delta < 0) { if (pos > 0) { CharacterExtracted ce = CharacterBefore(pos); const CharacterClass ccStart = WordCharacterClass(ce.character); if (ccStart != CharacterClass::space) { while (pos > 0) { ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos -= ce.widthBytes; } } while (pos > 0) { ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != CharacterClass::space) break; pos -= ce.widthBytes; } } } else { while (pos < LengthNoExcept()) { const CharacterExtracted ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != CharacterClass::space) break; pos += ce.widthBytes; } if (pos < LengthNoExcept()) { CharacterExtracted ce = CharacterAfter(pos); const CharacterClass ccStart = WordCharacterClass(ce.character); while (pos < LengthNoExcept()) { ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos += ce.widthBytes; } } } return pos; } namespace { constexpr bool IsWordEdge(CharacterClass cc, CharacterClass ccNext) noexcept { return (cc != ccNext) && (cc == CharacterClass::word || cc == CharacterClass::punctuation); } } /** * Check that the character at the given position is a word or punctuation character and that * the previous character is of a different character class. */ bool Document::IsWordStartAt(Sci::Position pos) const { if (pos >= LengthNoExcept()) return false; if (pos >= 0) { const CharacterExtracted cePos = CharacterAfter(pos); // At start of document, treat as if space before so can be word start const CharacterExtracted cePrev = (pos > 0) ? CharacterBefore(pos) : CharacterExtracted(' ', 1); return IsWordEdge(WordCharacterClass(cePos.character), WordCharacterClass(cePrev.character)); } return true; } /** * Check that the character before the given position is a word or punctuation character and that * the next character is of a different character class. */ bool Document::IsWordEndAt(Sci::Position pos) const { if (pos <= 0) return false; if (pos <= LengthNoExcept()) { // At end of document, treat as if space after so can be word end const CharacterExtracted cePos = (pos < LengthNoExcept()) ? CharacterAfter(pos) : CharacterExtracted(' ', 1); const CharacterExtracted cePrev = CharacterBefore(pos); return IsWordEdge(WordCharacterClass(cePrev.character), WordCharacterClass(cePos.character)); } return true; } /** * Check that the given range is has transitions between character classes at both * ends and where the characters on the inside are word or punctuation characters. */ bool Document::IsWordAt(Sci::Position start, Sci::Position end) const { return (start < end) && IsWordStartAt(start) && IsWordEndAt(end); } bool Document::MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const { return (!word && !wordStart) || (word && IsWordAt(pos, pos + length)) || (wordStart && IsWordStartAt(pos)); } bool Document::HasCaseFolder() const noexcept { return pcf != nullptr; } void Document::SetCaseFolder(std::unique_ptr pcf_) noexcept { pcf = std::move(pcf_); } Document::CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept { const unsigned char leadByte = cb.UCharAt(position); if (UTF8IsAscii(leadByte)) { // Common case: ASCII character return CharacterExtracted(leadByte, 1); } const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b=1; b(memchr(view.segment1 + start, ch, range1Length)); if (match) { return match - view.segment1; } start += range1Length; } const char *match2 = static_cast(memchr(view.segment2 + start, ch, length - range1Length)); if (match2) { return match2 - view.segment2; } return -1; } // Equivalent of memcmp over the split view // This does not call memcmp as search texts are commonly too short to overcome the // call overhead. bool SplitMatch(const SplitView &view, size_t start, std::string_view text) noexcept { for (size_t i = 0; i < text.length(); i++) { if (view.CharAt(i + start) != text[i]) { return false; } } return true; } } /** * Find text in document, supporting both forward and backward * searches (just pass minPos > maxPos to do a backward search) * Has not been tested with backwards DBCS searches yet. */ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, const char *search, FindOption flags, Sci::Position *length) { if (*length <= 0) return minPos; const bool caseSensitive = FlagSet(flags, FindOption::MatchCase); const bool word = FlagSet(flags, FindOption::WholeWord); const bool wordStart = FlagSet(flags, FindOption::WordStart); const bool regExp = FlagSet(flags, FindOption::RegExp); if (regExp) { if (!regex) regex = std::unique_ptr(CreateRegexSearch(&charClass)); return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length); } else { const bool forward = minPos <= maxPos; const int increment = forward ? 1 : -1; // Range endpoints should not be inside DBCS characters, but just in case, move them. const Sci::Position startPos = MovePositionOutsideChar(minPos, increment, false); const Sci::Position endPos = MovePositionOutsideChar(maxPos, increment, false); // Compute actual search ranges needed const Sci::Position lengthFind = *length; //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind); const Sci::Position limitPos = std::max(startPos, endPos); Sci::Position pos = startPos; if (!forward) { // Back all of a character pos = NextPosition(pos, increment); } const SplitView cbView = cb.AllView(); if (caseSensitive) { const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos; const unsigned char charStartSearch = search[0]; if (forward && ((0 == dbcsCodePage) || (CpUtf8 == dbcsCodePage && !UTF8IsTrailByte(charStartSearch)))) { // This is a fast case where there is no need to test byte values to iterate // so becomes the equivalent of a memchr+memcmp loop. // UTF-8 search will not be self-synchronizing when starts with trail byte const std::string_view suffix(search + 1, lengthFind - 1); while (pos < endSearch) { pos = SplitFindChar(cbView, pos, limitPos - pos, charStartSearch); if (pos < 0) { break; } if (SplitMatch(cbView, pos + 1, suffix) && MatchesWordOptions(word, wordStart, pos, lengthFind)) { return pos; } pos++; } } else { while (forward ? (pos < endSearch) : (pos >= endSearch)) { const unsigned char leadByte = cbView.CharAt(pos); if (leadByte == charStartSearch) { bool found = (pos + lengthFind) <= limitPos; // SplitMatch could be called here but it is slower with g++ -O2 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) { found = cbView.CharAt(pos + indexSearch) == search[indexSearch]; } if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) { return pos; } } if (forward && UTF8IsAscii(leadByte)) { pos++; } else { if (dbcsCodePage) { if (!NextCharacter(pos, increment)) { break; } } else { pos += increment; } } } } } else if (CpUtf8 == dbcsCodePage) { constexpr size_t maxFoldingExpansion = 4; std::vector searchThing((lengthFind+1) * UTF8MaxBytes * maxFoldingExpansion + 1); const size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); while (forward ? (pos < endPos) : (pos >= endPos)) { int widthFirstCharacter = 0; Sci::Position posIndexDocument = pos; size_t indexSearch = 0; bool characterMatches = true; for (;;) { const unsigned char leadByte = cbView.CharAt(posIndexDocument); char bytes[UTF8MaxBytes + 1]; int widthChar = 1; if (!UTF8IsAscii(leadByte)) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; bytes[0] = leadByte; for (int b=1; b(bytes), widthCharBytes) & UTF8MaskWidth; } if (!widthFirstCharacter) { widthFirstCharacter = widthChar; } if ((posIndexDocument + widthChar) > limitPos) { break; } size_t lenFlat = 1; if (widthChar == 1) { characterMatches = searchThing[indexSearch] == MakeLowerCase(leadByte); } else { char folded[UTF8MaxBytes * maxFoldingExpansion + 1]; lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar); // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing assert((indexSearch + lenFlat) <= searchThing.size()); // Does folded match the buffer characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat); } if (!characterMatches) { break; } posIndexDocument += widthChar; indexSearch += lenFlat; if (indexSearch >= lenSearch) { break; } } if (characterMatches && (indexSearch == lenSearch)) { if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) { *length = posIndexDocument - pos; return pos; } } if (forward) { pos += widthFirstCharacter; } else { if (!NextCharacter(pos, increment)) { break; } } } } else if (dbcsCodePage) { constexpr size_t maxBytesCharacter = 2; constexpr size_t maxFoldingExpansion = 4; std::vector searchThing((lengthFind+1) * maxBytesCharacter * maxFoldingExpansion + 1); const size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); while (forward ? (pos < endPos) : (pos >= endPos)) { int widthFirstCharacter = 0; Sci::Position indexDocument = 0; size_t indexSearch = 0; bool characterMatches = true; while (((pos + indexDocument) < limitPos) && (indexSearch < lenSearch)) { const unsigned char leadByte = cbView.CharAt(pos + indexDocument); const int widthChar = (!UTF8IsAscii(leadByte) && IsDBCSLeadByteNoExcept(leadByte)) ? 2 : 1; if (!widthFirstCharacter) { widthFirstCharacter = widthChar; } if ((pos + indexDocument + widthChar) > limitPos) { break; } size_t lenFlat = 1; if (widthChar == 1) { characterMatches = searchThing[indexSearch] == MakeLowerCase(leadByte); } else { char bytes[maxBytesCharacter + 1]; bytes[0] = leadByte; bytes[1] = cbView.CharAt(pos + indexDocument + 1); char folded[maxBytesCharacter * maxFoldingExpansion + 1]; lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar); // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing assert((indexSearch + lenFlat) <= searchThing.size()); // Does folded match the buffer characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat); } if (!characterMatches) { break; } indexDocument += widthChar; indexSearch += lenFlat; } if (characterMatches && (indexSearch == lenSearch)) { if (MatchesWordOptions(word, wordStart, pos, indexDocument)) { *length = indexDocument; return pos; } } if (forward) { pos += widthFirstCharacter; } else { if (!NextCharacter(pos, increment)) { break; } } } } else { const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos; std::vector searchThing(lengthFind + 1); pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); while (forward ? (pos < endSearch) : (pos >= endSearch)) { bool found = (pos + lengthFind) <= limitPos; for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) { const char ch = cbView.CharAt(pos + indexSearch); const char chTest = searchThing[indexSearch]; if (UTF8IsAscii(ch)) { found = chTest == MakeLowerCase(ch); } else { char folded[2]; pcf->Fold(folded, sizeof(folded), &ch, 1); found = folded[0] == chTest; } } if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) { return pos; } pos += increment; } } } //Platform::DebugPrintf("Not found\n"); return -1; } const char *Document::SubstituteByPosition(const char *text, Sci::Position *length) { if (regex) return regex->SubstituteByPosition(this, text, length); else return nullptr; } LineCharacterIndexType Document::LineCharacterIndex() const noexcept { return cb.LineCharacterIndex(); } void Document::AllocateLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) { return cb.AllocateLineCharacterIndex(lineCharacterIndex); } void Document::ReleaseLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) { return cb.ReleaseLineCharacterIndex(lineCharacterIndex); } Sci::Line Document::LinesTotal() const noexcept { return cb.Lines(); } void Document::AllocateLines(Sci::Line lines) { cb.AllocateLines(lines); } void Document::SetDefaultCharClasses(bool includeWordClass) { charClass.SetDefaultCharClasses(includeWordClass); } void Document::SetCharClasses(const unsigned char *chars, CharacterClass newCharClass) { charClass.SetCharClasses(chars, newCharClass); } int Document::GetCharsOfClass(CharacterClass characterClass, unsigned char *buffer) const { return charClass.GetCharsOfClass(characterClass, buffer); } void Document::SetCharacterCategoryOptimization(int countCharacters) { charMap.Optimize(countCharacters); } int Document::CharacterCategoryOptimization() const noexcept { return charMap.Size(); } void SCI_METHOD Document::StartStyling(Sci_Position position) { endStyled = position; } bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) { if (enteredStyling != 0) { return false; } else { enteredStyling++; const Sci::Position prevEndStyled = endStyled; if (cb.SetStyleFor(endStyled, length, style)) { const DocModification mh(ModificationFlags::ChangeStyle | ModificationFlags::User, prevEndStyled, length); NotifyModified(mh); } endStyled += length; enteredStyling--; return true; } } bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) { if (enteredStyling != 0) { return false; } else { enteredStyling++; bool didChange = false; Sci::Position startMod = 0; Sci::Position endMod = 0; for (int iPos = 0; iPos < length; iPos++, endStyled++) { PLATFORM_ASSERT(endStyled < Length()); if (cb.SetStyleAt(endStyled, styles[iPos])) { if (!didChange) { startMod = endStyled; } didChange = true; endMod = endStyled; } } if (didChange) { const DocModification mh(ModificationFlags::ChangeStyle | ModificationFlags::User, startMod, endMod - startMod + 1); NotifyModified(mh); } enteredStyling--; return true; } } void Document::EnsureStyledTo(Sci::Position pos) { if ((enteredStyling == 0) && (pos > GetEndStyled())) { IncrementStyleClock(); if (pli && !pli->UseContainerLexing()) { const Sci::Line lineEndStyled = SciLineFromPosition(GetEndStyled()); const Sci::Position endStyledTo = LineStart(lineEndStyled); pli->Colourise(endStyledTo, pos); } else { // Ask the watchers to style, and stop as soon as one responds. for (std::vector::iterator it = watchers.begin(); (pos > GetEndStyled()) && (it != watchers.end()); ++it) { it->watcher->NotifyStyleNeeded(this, it->userData, pos); } } } } void Document::StyleToAdjustingLineDuration(Sci::Position pos) { const Sci::Position stylingStart = GetEndStyled(); ElapsedPeriod epStyling; EnsureStyledTo(pos); durationStyleOneByte.AddSample(pos - stylingStart, epStyling.Duration()); } void Document::LexerChanged() { // Tell the watchers the lexer has changed. for (const WatcherWithUserData &watcher : watchers) { watcher.watcher->NotifyLexerChanged(this, watcher.userData); } } LexInterface *Document::GetLexInterface() const noexcept { return pli.get(); } void Document::SetLexInterface(std::unique_ptr pLexInterface) noexcept { pli = std::move(pLexInterface); } int SCI_METHOD Document::SetLineState(Sci_Position line, int state) { const int statePrevious = States()->SetLineState(line, state); if (state != statePrevious) { const DocModification mh(ModificationFlags::ChangeLineState, LineStart(line), 0, 0, nullptr, static_cast(line)); NotifyModified(mh); } return statePrevious; } int SCI_METHOD Document::GetLineState(Sci_Position line) const { return States()->GetLineState(line); } Sci::Line Document::GetMaxLineState() const noexcept { return States()->GetMaxLineState(); } void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) { const DocModification mh(ModificationFlags::LexerState, start, end-start, 0, nullptr, 0); NotifyModified(mh); } StyledText Document::MarginStyledText(Sci::Line line) const noexcept { const LineAnnotation *pla = Margins(); return StyledText(pla->Length(line), pla->Text(line), pla->MultipleStyles(line), pla->Style(line), pla->Styles(line)); } void Document::MarginSetText(Sci::Line line, const char *text) { Margins()->SetText(line, text); const DocModification mh(ModificationFlags::ChangeMargin, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); } void Document::MarginSetStyle(Sci::Line line, int style) { Margins()->SetStyle(line, style); NotifyModified(DocModification(ModificationFlags::ChangeMargin, LineStart(line), 0, 0, nullptr, line)); } void Document::MarginSetStyles(Sci::Line line, const unsigned char *styles) { Margins()->SetStyles(line, styles); NotifyModified(DocModification(ModificationFlags::ChangeMargin, LineStart(line), 0, 0, nullptr, line)); } void Document::MarginClearAll() { const Sci::Line maxEditorLine = LinesTotal(); for (Sci::Line l=0; lClearAll(); } StyledText Document::AnnotationStyledText(Sci::Line line) const noexcept { const LineAnnotation *pla = Annotations(); return StyledText(pla->Length(line), pla->Text(line), pla->MultipleStyles(line), pla->Style(line), pla->Styles(line)); } void Document::AnnotationSetText(Sci::Line line, const char *text) { if (line >= 0 && line < LinesTotal()) { const Sci::Line linesBefore = AnnotationLines(line); Annotations()->SetText(line, text); const int linesAfter = AnnotationLines(line); DocModification mh(ModificationFlags::ChangeAnnotation, LineStart(line), 0, 0, nullptr, line); mh.annotationLinesAdded = linesAfter - linesBefore; NotifyModified(mh); } } void Document::AnnotationSetStyle(Sci::Line line, int style) { if (line >= 0 && line < LinesTotal()) { Annotations()->SetStyle(line, style); const DocModification mh(ModificationFlags::ChangeAnnotation, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); } } void Document::AnnotationSetStyles(Sci::Line line, const unsigned char *styles) { if (line >= 0 && line < LinesTotal()) { Annotations()->SetStyles(line, styles); } } int Document::AnnotationLines(Sci::Line line) const noexcept { return Annotations()->Lines(line); } void Document::AnnotationClearAll() { if (Annotations()->Empty()) { return; } const Sci::Line maxEditorLine = LinesTotal(); for (Sci::Line l=0; lClearAll(); } StyledText Document::EOLAnnotationStyledText(Sci::Line line) const noexcept { const LineAnnotation *pla = EOLAnnotations(); return StyledText(pla->Length(line), pla->Text(line), pla->MultipleStyles(line), pla->Style(line), pla->Styles(line)); } void Document::EOLAnnotationSetText(Sci::Line line, const char *text) { if (line >= 0 && line < LinesTotal()) { EOLAnnotations()->SetText(line, text); const DocModification mh(ModificationFlags::ChangeEOLAnnotation, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); } } void Document::EOLAnnotationSetStyle(Sci::Line line, int style) { if (line >= 0 && line < LinesTotal()) { EOLAnnotations()->SetStyle(line, style); const DocModification mh(ModificationFlags::ChangeEOLAnnotation, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); } } void Document::EOLAnnotationClearAll() { if (EOLAnnotations()->Empty()) { return; } const Sci::Line maxEditorLine = LinesTotal(); for (Sci::Line l=0; lClearAll(); } void Document::IncrementStyleClock() noexcept { styleClock = (styleClock + 1) % 0x100000; } void SCI_METHOD Document::DecorationSetCurrentIndicator(int indicator) { decorations->SetCurrentIndicator(indicator); } void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) { const FillResult fr = decorations->FillRange( position, value, fillLength); if (fr.changed) { const DocModification mh(ModificationFlags::ChangeIndicator | ModificationFlags::User, fr.position, fr.fillLength); NotifyModified(mh); } } bool Document::AddWatcher(DocWatcher *watcher, void *userData) { const WatcherWithUserData wwud(watcher, userData); std::vector::iterator it = std::find(watchers.begin(), watchers.end(), wwud); if (it != watchers.end()) return false; watchers.push_back(wwud); return true; } bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) noexcept { try { // This can never fail as WatcherWithUserData constructor and == are noexcept // but std::find is not noexcept. std::vector::iterator it = std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData)); if (it != watchers.end()) { watchers.erase(it); return true; } } catch (...) { // Ignore any exception } return false; } void Document::NotifyModifyAttempt() { for (const WatcherWithUserData &watcher : watchers) { watcher.watcher->NotifyModifyAttempt(this, watcher.userData); } } void Document::NotifySavePoint(bool atSavePoint) { for (const WatcherWithUserData &watcher : watchers) { watcher.watcher->NotifySavePoint(this, watcher.userData, atSavePoint); } } void Document::NotifyModified(DocModification mh) { if (FlagSet(mh.modificationType, ModificationFlags::InsertText)) { decorations->InsertSpace(mh.position, mh.length); } else if (FlagSet(mh.modificationType, ModificationFlags::DeleteText)) { decorations->DeleteRange(mh.position, mh.length); } for (const WatcherWithUserData &watcher : watchers) { watcher.watcher->NotifyModified(this, mh, watcher.userData); } } bool Document::IsWordPartSeparator(unsigned int ch) const { return (WordCharacterClass(ch) == CharacterClass::word) && IsPunctuation(ch); } Sci::Position Document::WordPartLeft(Sci::Position pos) const { if (pos > 0) { pos -= CharacterBefore(pos).widthBytes; CharacterExtracted ceStart = CharacterAfter(pos); if (IsWordPartSeparator(ceStart.character)) { while (pos > 0 && IsWordPartSeparator(CharacterAfter(pos).character)) { pos -= CharacterBefore(pos).widthBytes; } } if (pos > 0) { ceStart = CharacterAfter(pos); pos -= CharacterBefore(pos).widthBytes; if (IsLowerCase(ceStart.character)) { while (pos > 0 && IsLowerCase(CharacterAfter(pos).character)) pos -= CharacterBefore(pos).widthBytes; if (!IsUpperCase(CharacterAfter(pos).character) && !IsLowerCase(CharacterAfter(pos).character)) pos += CharacterAfter(pos).widthBytes; } else if (IsUpperCase(ceStart.character)) { while (pos > 0 && IsUpperCase(CharacterAfter(pos).character)) pos -= CharacterBefore(pos).widthBytes; if (!IsUpperCase(CharacterAfter(pos).character)) pos += CharacterAfter(pos).widthBytes; } else if (IsADigit(ceStart.character)) { while (pos > 0 && IsADigit(CharacterAfter(pos).character)) pos -= CharacterBefore(pos).widthBytes; if (!IsADigit(CharacterAfter(pos).character)) pos += CharacterAfter(pos).widthBytes; } else if (IsPunctuation(ceStart.character)) { while (pos > 0 && IsPunctuation(CharacterAfter(pos).character)) pos -= CharacterBefore(pos).widthBytes; if (!IsPunctuation(CharacterAfter(pos).character)) pos += CharacterAfter(pos).widthBytes; } else if (IsASpace(ceStart.character)) { while (pos > 0 && IsASpace(CharacterAfter(pos).character)) pos -= CharacterBefore(pos).widthBytes; if (!IsASpace(CharacterAfter(pos).character)) pos += CharacterAfter(pos).widthBytes; } else if (!IsASCII(ceStart.character)) { while (pos > 0 && !IsASCII(CharacterAfter(pos).character)) pos -= CharacterBefore(pos).widthBytes; if (IsASCII(CharacterAfter(pos).character)) pos += CharacterAfter(pos).widthBytes; } else { pos += CharacterAfter(pos).widthBytes; } } } return pos; } Sci::Position Document::WordPartRight(Sci::Position pos) const { CharacterExtracted ceStart = CharacterAfter(pos); const Sci::Position length = LengthNoExcept(); if (IsWordPartSeparator(ceStart.character)) { while (pos < length && IsWordPartSeparator(CharacterAfter(pos).character)) pos += CharacterAfter(pos).widthBytes; ceStart = CharacterAfter(pos); } if (!IsASCII(ceStart.character)) { while (pos < length && !IsASCII(CharacterAfter(pos).character)) pos += CharacterAfter(pos).widthBytes; } else if (IsLowerCase(ceStart.character)) { while (pos < length && IsLowerCase(CharacterAfter(pos).character)) pos += CharacterAfter(pos).widthBytes; } else if (IsUpperCase(ceStart.character)) { if (IsLowerCase(CharacterAfter(pos + ceStart.widthBytes).character)) { pos += CharacterAfter(pos).widthBytes; while (pos < length && IsLowerCase(CharacterAfter(pos).character)) pos += CharacterAfter(pos).widthBytes; } else { while (pos < length && IsUpperCase(CharacterAfter(pos).character)) pos += CharacterAfter(pos).widthBytes; } if (IsLowerCase(CharacterAfter(pos).character) && IsUpperCase(CharacterBefore(pos).character)) pos -= CharacterBefore(pos).widthBytes; } else if (IsADigit(ceStart.character)) { while (pos < length && IsADigit(CharacterAfter(pos).character)) pos += CharacterAfter(pos).widthBytes; } else if (IsPunctuation(ceStart.character)) { while (pos < length && IsPunctuation(CharacterAfter(pos).character)) pos += CharacterAfter(pos).widthBytes; } else if (IsASpace(ceStart.character)) { while (pos < length && IsASpace(CharacterAfter(pos).character)) pos += CharacterAfter(pos).widthBytes; } else { pos += CharacterAfter(pos).widthBytes; } return pos; } Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool singleLine) noexcept { const char sStart = cb.StyleAt(pos); if (delta < 0) { while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsEOLCharacter(cb.CharAt(pos)))) pos--; pos++; } else { while (pos < (LengthNoExcept()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsEOLCharacter(cb.CharAt(pos)))) pos++; } return pos; } static char BraceOpposite(char ch) noexcept { switch (ch) { case '(': return ')'; case ')': return '('; case '[': return ']'; case ']': return '['; case '{': return '}'; case '}': return '{'; case '<': return '>'; case '>': return '<'; default: return '\0'; } } // TODO: should be able to extend styled region to find matching brace Sci::Position Document::BraceMatch(Sci::Position position, Sci::Position /*maxReStyle*/, Sci::Position startPos, bool useStartPos) noexcept { const char chBrace = CharAt(position); const char chSeek = BraceOpposite(chBrace); if (chSeek == '\0') return - 1; const int styBrace = StyleIndexAt(position); int direction = -1; if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<') direction = 1; int depth = 1; position = useStartPos ? startPos : NextPosition(position, direction); while ((position >= 0) && (position < LengthNoExcept())) { const char chAtPos = CharAt(position); const int styAtPos = StyleIndexAt(position); if ((position > GetEndStyled()) || (styAtPos == styBrace)) { if (chAtPos == chBrace) depth++; if (chAtPos == chSeek) depth--; if (depth == 0) return position; } const Sci::Position positionBeforeMove = position; position = NextPosition(position, direction); if (position == positionBeforeMove) break; } return - 1; } /** * Implementation of RegexSearchBase for the default built-in regular expression engine */ class BuiltinRegex : public RegexSearchBase { public: explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {} Sci::Position FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s, bool caseSensitive, bool word, bool wordStart, FindOption flags, Sci::Position *length) override; const char *SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) override; private: RESearch search; std::string substituted; }; namespace { /** * RESearchRange keeps track of search range. */ class RESearchRange { public: const Document *doc; int increment; Sci::Position startPos; Sci::Position endPos; Sci::Line lineRangeStart; Sci::Line lineRangeEnd; Sci::Line lineRangeBreak; RESearchRange(const Document *doc_, Sci::Position minPos, Sci::Position maxPos) noexcept : doc(doc_) { increment = (minPos <= maxPos) ? 1 : -1; // Range endpoints should not be inside DBCS characters or between a CR and LF, // but just in case, move them. startPos = doc->MovePositionOutsideChar(minPos, 1, true); endPos = doc->MovePositionOutsideChar(maxPos, 1, true); lineRangeStart = doc->SciLineFromPosition(startPos); lineRangeEnd = doc->SciLineFromPosition(endPos); lineRangeBreak = lineRangeEnd + increment; } Range LineRange(Sci::Line line) const { Range range(doc->LineStart(line), doc->LineEnd(line)); if (increment == 1) { if (line == lineRangeStart) range.start = startPos; if (line == lineRangeEnd) range.end = endPos; } else { if (line == lineRangeEnd) range.start = endPos; if (line == lineRangeStart) range.end = startPos; } return range; } }; // Define a way for the Regular Expression code to access the document class DocumentIndexer : public CharacterIndexer { Document *pdoc; Sci::Position end; public: DocumentIndexer(Document *pdoc_, Sci::Position end_) noexcept : pdoc(pdoc_), end(end_) { } DocumentIndexer(const DocumentIndexer &) = delete; DocumentIndexer(DocumentIndexer &&) = delete; DocumentIndexer &operator=(const DocumentIndexer &) = delete; DocumentIndexer &operator=(DocumentIndexer &&) = delete; ~DocumentIndexer() override = default; char CharAt(Sci::Position index) const noexcept override { if (index < 0 || index >= end) return 0; else return pdoc->CharAt(index); } }; #ifndef NO_CXX11_REGEX class ByteIterator { public: using iterator_category = std::bidirectional_iterator_tag; using value_type = char; using difference_type = ptrdiff_t; using pointer = char*; using reference = char&; const Document *doc; Sci::Position position; explicit ByteIterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept : doc(doc_), position(position_) { } char operator*() const noexcept { return doc->CharAt(position); } ByteIterator &operator++() noexcept { position++; return *this; } ByteIterator operator++(int) noexcept { ByteIterator retVal(*this); position++; return retVal; } ByteIterator &operator--() noexcept { position--; return *this; } bool operator==(const ByteIterator &other) const noexcept { return doc == other.doc && position == other.position; } bool operator!=(const ByteIterator &other) const noexcept { return doc != other.doc || position != other.position; } Sci::Position Pos() const noexcept { return position; } Sci::Position PosRoundUp() const noexcept { return position; } }; // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide. // Would be better to use sizeof(wchar_t) or similar to differentiate // but easier for now to hard-code platforms. // C++11 has char16_t and char32_t but neither Clang nor Visual C++ // appear to allow specializing basic_regex over these. #ifdef _WIN32 #define WCHAR_T_IS_16 1 #else #define WCHAR_T_IS_16 0 #endif #if WCHAR_T_IS_16 // On Windows, report non-BMP characters as 2 separate surrogates as that // matches wregex since it is based on wchar_t. class UTF8Iterator { // These 3 fields determine the iterator position and are used for comparisons const Document *doc; Sci::Position position; size_t characterIndex; // Remaining fields are derived from the determining fields so are excluded in comparisons unsigned int lenBytes; size_t lenCharacters; wchar_t buffered[2]; public: using iterator_category = std::bidirectional_iterator_tag; using value_type = wchar_t; using difference_type = ptrdiff_t; using pointer = wchar_t*; using reference = wchar_t&; explicit UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept : doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0), buffered{} { buffered[0] = 0; buffered[1] = 0; if (doc) { ReadCharacter(); } } wchar_t operator*() const noexcept { assert(lenCharacters != 0); return buffered[characterIndex]; } UTF8Iterator &operator++() noexcept { if ((characterIndex + 1) < (lenCharacters)) { characterIndex++; } else { position += lenBytes; ReadCharacter(); characterIndex = 0; } return *this; } UTF8Iterator operator++(int) noexcept { UTF8Iterator retVal(*this); if ((characterIndex + 1) < (lenCharacters)) { characterIndex++; } else { position += lenBytes; ReadCharacter(); characterIndex = 0; } return retVal; } UTF8Iterator &operator--() noexcept { if (characterIndex) { characterIndex--; } else { position = doc->NextPosition(position, -1); ReadCharacter(); characterIndex = lenCharacters - 1; } return *this; } bool operator==(const UTF8Iterator &other) const noexcept { // Only test the determining fields, not the character widths and values derived from this return doc == other.doc && position == other.position && characterIndex == other.characterIndex; } bool operator!=(const UTF8Iterator &other) const noexcept { // Only test the determining fields, not the character widths and values derived from this return doc != other.doc || position != other.position || characterIndex != other.characterIndex; } Sci::Position Pos() const noexcept { return position; } Sci::Position PosRoundUp() const noexcept { if (characterIndex) return position + lenBytes; // Force to end of character else return position; } private: void ReadCharacter() noexcept { const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position); lenBytes = charExtracted.widthBytes; if (charExtracted.character == unicodeReplacementChar) { lenCharacters = 1; buffered[0] = static_cast(charExtracted.character); } else { lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered); } } }; #else // On Unix, report non-BMP characters as single characters class UTF8Iterator { const Document *doc; Sci::Position position; public: using iterator_category = std::bidirectional_iterator_tag; using value_type = wchar_t; using difference_type = ptrdiff_t; using pointer = wchar_t*; using reference = wchar_t&; explicit UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept : doc(doc_), position(position_) { } wchar_t operator*() const noexcept { const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position); return charExtracted.character; } UTF8Iterator &operator++() noexcept { position = doc->NextPosition(position, 1); return *this; } UTF8Iterator operator++(int) noexcept { UTF8Iterator retVal(*this); position = doc->NextPosition(position, 1); return retVal; } UTF8Iterator &operator--() noexcept { position = doc->NextPosition(position, -1); return *this; } bool operator==(const UTF8Iterator &other) const noexcept { return doc == other.doc && position == other.position; } bool operator!=(const UTF8Iterator &other) const noexcept { return doc != other.doc || position != other.position; } Sci::Position Pos() const noexcept { return position; } Sci::Position PosRoundUp() const noexcept { return position; } }; #endif std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos) { std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default; if (!doc->IsLineStartPosition(startPos)) flagsMatch |= std::regex_constants::match_not_bol; if (!doc->IsLineEndPosition(endPos)) flagsMatch |= std::regex_constants::match_not_eol; return flagsMatch; } template bool MatchOnLines(const Document *doc, const Regex ®exp, const RESearchRange &resr, RESearch &search) { std::match_results match; // MSVC and libc++ have problems with ^ and $ matching line ends inside a range. // CRLF line ends are also a problem as ^ and $ only treat LF as a line end. // The std::regex::multiline option was added to C++17 to improve behaviour but // has not been implemented by compiler runtimes with MSVC always in multiline // mode and libc++ and libstdc++ always in single-line mode. // If multiline regex worked well then the line by line iteration could be removed // for the forwards case and replaced with the following 4 lines: #ifdef REGEX_MULTILINE Iterator itStart(doc, resr.startPos); Iterator itEnd(doc, resr.endPos); const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos); const bool matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch); #else // Line by line. bool matched = false; for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) { const Range lineRange = resr.LineRange(line); Iterator itStart(doc, lineRange.start); Iterator itEnd(doc, lineRange.end); std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end); matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch); // Check for the last match on this line. if (matched) { if (resr.increment == -1) { while (matched) { Iterator itNext(doc, match[0].second.PosRoundUp()); flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end); std::match_results matchNext; matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch); if (matched) { if (match[0].first == match[0].second) { // Empty match means failure so exit return false; } match = matchNext; } } matched = true; } break; } } #endif if (matched) { for (size_t co = 0; co < match.size() && co < RESearch::MAXTAG; co++) { search.bopat[co] = match[co].first.Pos(); search.eopat[co] = match[co].second.PosRoundUp(); const Sci::Position lenMatch = search.eopat[co] - search.bopat[co]; search.pat[co].resize(lenMatch); for (Sci::Position iPos = 0; iPos < lenMatch; iPos++) { search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]); } } } return matched; } Sci::Position Cxx11RegexFindText(const Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s, bool caseSensitive, Sci::Position *length, RESearch &search) { const RESearchRange resr(doc, minPos, maxPos); try { //ElapsedPeriod ep; std::regex::flag_type flagsRe = std::regex::ECMAScript; // Flags that appear to have no effect: // | std::regex::collate | std::regex::extended; if (!caseSensitive) flagsRe = flagsRe | std::regex::icase; // Clear the RESearch so can fill in matches search.Clear(); bool matched = false; if (CpUtf8 == doc->dbcsCodePage) { const std::wstring ws = WStringFromUTF8(s); std::wregex regexp; regexp.assign(ws, flagsRe); matched = MatchOnLines(doc, regexp, resr, search); } else { std::regex regexp; regexp.assign(s, flagsRe); matched = MatchOnLines(doc, regexp, resr, search); } Sci::Position posMatch = -1; if (matched) { posMatch = search.bopat[0]; *length = search.eopat[0] - search.bopat[0]; } // Example - search in doc/ScintillaHistory.html for // [[:upper:]]eta[[:space:]] // On MacBook, normally around 1 second but with locale imbued -> 14 seconds. //const double durSearch = ep.Duration(true); //Platform::DebugPrintf("Search:%9.6g \n", durSearch); return posMatch; } catch (std::regex_error &) { // Failed to create regular expression throw RegexError(); } catch (...) { // Failed in some other way return -1; } } #endif } Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s, bool caseSensitive, bool, bool, FindOption flags, Sci::Position *length) { #ifndef NO_CXX11_REGEX if (FlagSet(flags, FindOption::Cxx11RegEx)) { return Cxx11RegexFindText(doc, minPos, maxPos, s, caseSensitive, length, search); } #endif const RESearchRange resr(doc, minPos, maxPos); const bool posix = FlagSet(flags, FindOption::Posix); const char *errmsg = search.Compile(s, *length, caseSensitive, posix); if (errmsg) { return -1; } // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\)) // Replace first '.' with '-' in each property file variable reference: // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\)) // Replace: $(\1-\2) Sci::Position pos = -1; Sci::Position lenRet = 0; const bool searchforLineStart = s[0] == '^'; const char searchEnd = s[*length - 1]; const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0'; const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\'); for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) { Sci::Position startOfLine = doc->LineStart(line); Sci::Position endOfLine = doc->LineEnd(line); if (resr.increment == 1) { if (line == resr.lineRangeStart) { if ((resr.startPos != startOfLine) && searchforLineStart) continue; // Can't match start of line if start position after start of line startOfLine = resr.startPos; } if (line == resr.lineRangeEnd) { if ((resr.endPos != endOfLine) && searchforLineEnd) continue; // Can't match end of line if end position before end of line endOfLine = resr.endPos; } } else { if (line == resr.lineRangeEnd) { if ((resr.endPos != startOfLine) && searchforLineStart) continue; // Can't match start of line if end position after start of line startOfLine = resr.endPos; } if (line == resr.lineRangeStart) { if ((resr.startPos != endOfLine) && searchforLineEnd) continue; // Can't match end of line if start position before end of line endOfLine = resr.startPos; } } const DocumentIndexer di(doc, endOfLine); int success = search.Execute(di, startOfLine, endOfLine); if (success) { pos = search.bopat[0]; // Ensure only whole characters selected search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false); lenRet = search.eopat[0] - search.bopat[0]; // There can be only one start of a line, so no need to look for last match in line if ((resr.increment == -1) && !searchforLineStart) { // Check for the last match on this line. int repetitions = 1000; // Break out of infinite loop while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) { success = search.Execute(di, pos+1, endOfLine); if (success) { if (search.eopat[0] <= minPos) { pos = search.bopat[0]; lenRet = search.eopat[0] - search.bopat[0]; } else { success = 0; } } } } break; } } *length = lenRet; return pos; } const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) { substituted.clear(); const DocumentIndexer di(doc, doc->Length()); search.GrabMatches(di); for (Sci::Position j = 0; j < *length; j++) { if (text[j] == '\\') { if (text[j + 1] >= '0' && text[j + 1] <= '9') { const unsigned int patNum = text[j + 1] - '0'; const Sci::Position len = search.eopat[patNum] - search.bopat[patNum]; if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur substituted.append(search.pat[patNum].c_str(), len); j++; } else { j++; switch (text[j]) { case 'a': substituted.push_back('\a'); break; case 'b': substituted.push_back('\b'); break; case 'f': substituted.push_back('\f'); break; case 'n': substituted.push_back('\n'); break; case 'r': substituted.push_back('\r'); break; case 't': substituted.push_back('\t'); break; case 'v': substituted.push_back('\v'); break; case '\\': substituted.push_back('\\'); break; default: substituted.push_back('\\'); j--; } } } else { substituted.push_back(text[j]); } } *length = substituted.length(); return substituted.c_str(); } #ifndef SCI_OWNREGEX RegexSearchBase *Scintilla::Internal::CreateRegexSearch(CharClassify *charClassTable) { return new BuiltinRegex(charClassTable); } #endif