# BSD 2-Clause License # # Apprise - Push Notification Library. # Copyright (c) 2025, Chris Caron # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. from apprise import NotifyFormat from apprise.utils.format import html_adjust, markdown_adjust, smart_split def test_smart_split_prefers_newlines_over_spaces_and_punctuation(): """ Newlines should win even if there are spaces and punctuation before the limit. """ text = "line1\nline2 line3. line4" # Long enough to include the newline and some of the next line limit = 12 chunks = smart_split(text, limit, body_format=NotifyFormat.TEXT) # First chunk should end immediately after the newline assert chunks[0] == "line1\n" # Nothing lost assert "".join(chunks) == text def test_smart_split_prefers_spaces_over_hard_split(): """ When there are no newlines, split on the last space/tab before falling back to a hard character-limit split. """ text = "word1 word2 word3" # Force a split between word2 and word3 limit = 12 # "word1 word2 " is 12 characters chunks = smart_split(text, limit, body_format=NotifyFormat.TEXT) assert chunks == ["word1 word2 ", "word3"] assert "".join(chunks) == text def test_smart_split_can_split_after_punctuation_plus_whitespace(): """ Exercise the punctuation+whitespace pattern. In practice this collapses to the same split point as the last space, but we verify the behaviour. """ text = "Hello world. Again" # Force the split around ". " # "Hello world. " is 13 characters limit = 13 chunks = smart_split(text, limit, body_format=NotifyFormat.TEXT) # First chunk should end at the space after the period assert chunks[0] == "Hello world. " assert chunks[1] == "Again" assert "".join(chunks) == text def test_smart_split_avoids_splitting_inside_html_entity() -> None: """ In HTML mode we must not end a chunk in the middle of '&...;'. We do NOT assert exact chunk values. Instead we assert: - TEXT mode can split inside the entity. - HTML mode never has a chunk that contains '&' without a matching ';' after it in the same chunk. """ text = "1234 5678" limit = 8 # without adjustment, we would cut inside ' ' # Plain text mode: allowed to split anywhere chunks_text = smart_split(text, limit, body_format=NotifyFormat.TEXT) assert "".join(chunks_text) == text # Sanity: in TEXT mode we *do* split inside the entity assert any( "&" in chunk and ";" not in chunk[chunk.find("&") :] for chunk in chunks_text ) # HTML mode: entity-aware chunks_html = smart_split(text, limit, body_format=NotifyFormat.HTML) assert "".join(chunks_html) == text # If a chunk contains '&', it must also contain the terminating ';' # for that entity within the same chunk. for chunk in chunks_html: idx = chunk.find("&") if idx == -1: continue semi = chunk.find(";", idx + 1) assert semi != -1, f"Chunk ends inside HTML entity: {chunk!r}" def test_smart_split_avoids_splitting_inside_markdown_link() -> None: """ In MARKDOWN mode, do not split inside [text](url). We only require that the full [link](...) lies in a single chunk and that any '[' appearing in a chunk has a matching ')' in that same chunk. """ link = "[link](https://example.com)" text = "AAAA" + link limit = len(link) chunks = smart_split(text, limit, body_format=NotifyFormat.MARKDOWN) assert "".join(chunks) == text # Entire link must be contained in one chunk assert any(link in chunk for chunk in chunks) # If a chunk has '[', it must also contain its closing ')' for chunk in chunks: idx = chunk.find("[") if idx == -1: continue semi = chunk.find(")", idx + 1) assert semi != -1, f"Markdown link was split inside chunk: {chunk!r}" def test_smart_split_avoids_splitting_inside_markdown_image() -> None: """ In MARKDOWN mode, do not split inside the [alt](url) of an image. The implementation currently splits "AAAA![alt](...)" as: - "AAAA!" - "[alt](...)" which is acceptable, as the [alt](url) part is kept intact. """ image = "![alt](https://example.com/image.png)" text = "AAAA" + image limit = len(image) chunks = smart_split(text, limit, body_format=NotifyFormat.MARKDOWN) assert "".join(chunks) == text inner = "[alt](https://example.com/image.png)" # The [alt](...) portion must appear fully within a single chunk assert any(inner in chunk for chunk in chunks) # As with links, any '[' in a chunk must have its matching ')' within # the same chunk so we never split inside the [alt](url) part. for chunk in chunks: idx = chunk.find("[") if idx == -1: continue semi = chunk.find(")", idx + 1) assert semi != -1, f"Markdown image was split inside chunk: {chunk!r}" def test_smart_split_empty_and_none_input() -> None: """ Empty / None input should be returned as a single-element list unchanged. """ assert smart_split("", 10, body_format=NotifyFormat.TEXT) == [""] assert smart_split("", 0, body_format=NotifyFormat.TEXT) == [""] assert smart_split("content", 0, body_format=NotifyFormat.TEXT) == [""] # None short-circuits before len() is called assert smart_split(None, 10, body_format=NotifyFormat.TEXT) == [""] def test_smart_split_html_entity_exact_boundary() -> None: """ Splitting exactly at an HTML entity boundary should not shift the split point (no need to "fix up" a perfectly aligned boundary). """ text = "AAAA BBBB" limit = len("AAAA ") # split exactly after the entity chunks = smart_split(text, limit, body_format=NotifyFormat.HTML) # We expect the entity to remain whole in the first chunk assert chunks == ["AAAA ", "BBBB"] assert "".join(chunks) == text def test_smart_split_markdown_link_exact_boundary() -> None: """ Splitting exactly after a Markdown link should not cause any adjustment. """ link = "[link](https://example.com)" tail = " TAIL" text = link + tail limit = len(link) # split immediately after ')' chunks = smart_split(text, limit, body_format=NotifyFormat.MARKDOWN) # First chunk is exactly the link, second is the remainder assert chunks[0] == link assert "".join(chunks) == text # Sanity: the link itself is not split across chunks assert any(link in chunk for chunk in chunks) def test_smart_split_whitespace_priority_with_tabs_and_newlines() -> None: """ Exercise newline vs space/tab priority with a mix of whitespace. """ text = "word1\tword2\nword3" # Case 1: window ends just before the newline, so only tab is visible. limit_without_newline = text.index("\n") # position of '\n' chunks_no_nl = smart_split( text, limit_without_newline, body_format=NotifyFormat.TEXT ) # First chunk should end after the tab, since that is the last space/tab assert chunks_no_nl[0] == "word1\t" assert "".join(chunks_no_nl) == text # Case 2: window includes the newline; newline should win over tab. limit_with_newline = text.index("\n") + 1 chunks_with_nl = smart_split( text, limit_with_newline, body_format=NotifyFormat.TEXT ) # First chunk should now end after the newline assert chunks_with_nl[0] == "word1\tword2\n" assert "".join(chunks_with_nl) == text def test_smart_split_very_short_limit() -> None: """ Very small limits should still split deterministically without loss. """ text = "ABC" chunks = smart_split(text, 1, body_format=NotifyFormat.TEXT) # One character per chunk assert chunks == ["A", "B", "C"] assert "".join(chunks) == text def test_smart_split_very_long_limit() -> None: """ Very large limits (>= len(text)) should return a single chunk. """ text = "A short message for testing" chunks = smart_split(text, 10_000, body_format=NotifyFormat.TEXT) assert chunks == [text] assert "".join(chunks) == text def test_html_adjust_guard_paths_and_no_entity() -> None: """ Cover the early-return guard in html_adjust and the path where there is no '&' at all in the search window. """ text = "abcdef" # split_at <= window_start -> early-return unchanged assert html_adjust(text, window_start=2, split_at=2) == 2 # split_at beyond the end of the text -> early-return unchanged assert html_adjust( text, window_start=0, split_at=len(text) + 5) == len(text) + 5 # No '&' in window, nothing to adjust assert html_adjust(text, window_start=0, split_at=3) == 3 def test_html_adjust_inside_and_at_boundary_of_entity() -> None: """ Exercise the path where html_adjust moves the split back to '&' when the split falls inside an entity, and the path where the split is exactly at the entity boundary and should not move. """ text = "1234 5678" # indexes: 0..3 '1234', 4 '&', 5 'n', 6 'b', 7 's', 8 'p', 9 ';', 10 '5'... # Split inside ' ' (at index 8) -> move back to '&' (index 4) assert html_adjust(text, window_start=0, split_at=8) == 4 # Split exactly after the ';' (index 10) -> already outside entity assert html_adjust(text, window_start=0, split_at=10) == 10 def test_markdown_adjust_guard_and_no_construct() -> None: """ Cover the guard in markdown_adjust and the case where there is no '[' or '!' in the window. """ text = "plain text" # split_at <= window_start -> early-return unchanged assert markdown_adjust(text, window_start=4, split_at=4) == 4 # split_at past the end -> early-return unchanged assert markdown_adjust( text, window_start=0, split_at=len(text) + 3) == len(text) + 3 # No markdown constructs -> nothing to adjust assert markdown_adjust(text, window_start=0, split_at=5) == 5 def test_markdown_adjust_inside_construct_moves_to_start() -> None: """ Exercise the positive path in markdown_adjust where the split lands inside a [text](url) construct and the function moves the split back to the start of the construct. """ link = "[link](https://example.com)" # Choose a split point inside the URL split_at = link.index("(") + 3 # somewhere inside "(https..." adjusted = markdown_adjust(link, window_start=0, split_at=split_at) # Should move back to the '[' at index 0 assert adjusted == 0 def test_smart_split_markdown_guard_split_at_start_is_reset() -> None: """ Cover the smart_split guard 'if split_at <= start: split_at = orig_split'. We force markdown_adjust to move the split back to the window start, then verify smart_split resets to the original split so progress is still made and chunks join back to the original text. """ text = "[link](https://example.com)" limit = 5 # will cause the first soft split to land inside the link chunks = smart_split(text, limit, body_format=NotifyFormat.MARKDOWN) # We should never get stuck; all chunks must be non-empty assert len(chunks) >= 2 assert all(chunks) # Re-joining all chunks must restore the original text assert "".join(chunks) == text def test_smart_split_uses_punctuation_branch_on_rare_whitespace() -> None: """ When punctuation is followed by rare whitespace (vertical tab / form feed) and there are no spaces/tabs/newlines, we should use the punctuation + whitespace split branch. """ vt = "\x0b" # vertical tab text = f"Hello.{vt}World" # Window includes 'Hello.' and the VT limit = len("Hello.") + 1 chunks = smart_split(text, limit, body_format=NotifyFormat.TEXT) assert "".join(chunks) == text # We expect the first chunk to end after the rare whitespace assert chunks[0] == f"Hello.{vt}" assert chunks[1] == "World"