mirror of
https://github.com/caronc/apprise.git
synced 2025-12-15 10:04:06 +08:00
373 lines
13 KiB
Python
373 lines
13 KiB
Python
# BSD 2-Clause License
|
|
#
|
|
# Apprise - Push Notification Library.
|
|
# Copyright (c) 2025, Chris Caron <lead2gold@gmail.com>
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
#
|
|
# 1. Redistributions of source code must retain the above copyright notice,
|
|
# this list of conditions and the following disclaimer.
|
|
#
|
|
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
# this list of conditions and the following disclaimer in the documentation
|
|
# and/or other materials provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
from apprise import NotifyFormat
|
|
from apprise.utils.format import html_adjust, markdown_adjust, smart_split
|
|
|
|
|
|
def test_smart_split_prefers_newlines_over_spaces_and_punctuation():
|
|
"""
|
|
Newlines should win even if there are spaces and punctuation before the
|
|
limit.
|
|
"""
|
|
text = "line1\nline2 line3. line4"
|
|
# Long enough to include the newline and some of the next line
|
|
limit = 12
|
|
|
|
chunks = smart_split(text, limit, body_format=NotifyFormat.TEXT)
|
|
|
|
# First chunk should end immediately after the newline
|
|
assert chunks[0] == "line1\n"
|
|
# Nothing lost
|
|
assert "".join(chunks) == text
|
|
|
|
|
|
def test_smart_split_prefers_spaces_over_hard_split():
|
|
"""
|
|
When there are no newlines, split on the last space/tab before falling back
|
|
to a hard character-limit split.
|
|
"""
|
|
text = "word1 word2 word3"
|
|
# Force a split between word2 and word3
|
|
limit = 12 # "word1 word2 " is 12 characters
|
|
|
|
chunks = smart_split(text, limit, body_format=NotifyFormat.TEXT)
|
|
|
|
assert chunks == ["word1 word2 ", "word3"]
|
|
assert "".join(chunks) == text
|
|
|
|
|
|
def test_smart_split_can_split_after_punctuation_plus_whitespace():
|
|
"""
|
|
Exercise the punctuation+whitespace pattern. In practice this collapses
|
|
to the same split point as the last space, but we verify the behaviour.
|
|
"""
|
|
text = "Hello world. Again"
|
|
# Force the split around ". "
|
|
|
|
# "Hello world. " is 13 characters
|
|
limit = 13
|
|
|
|
chunks = smart_split(text, limit, body_format=NotifyFormat.TEXT)
|
|
|
|
# First chunk should end at the space after the period
|
|
assert chunks[0] == "Hello world. "
|
|
assert chunks[1] == "Again"
|
|
assert "".join(chunks) == text
|
|
|
|
|
|
def test_smart_split_avoids_splitting_inside_html_entity() -> None:
|
|
"""
|
|
In HTML mode we must not end a chunk in the middle of '&...;'.
|
|
|
|
We do NOT assert exact chunk values. Instead we assert:
|
|
- TEXT mode can split inside the entity.
|
|
- HTML mode never has a chunk that contains '&' without a matching ';'
|
|
after it in the same chunk.
|
|
"""
|
|
text = "1234 5678"
|
|
limit = 8 # without adjustment, we would cut inside ' '
|
|
|
|
# Plain text mode: allowed to split anywhere
|
|
chunks_text = smart_split(text, limit, body_format=NotifyFormat.TEXT)
|
|
assert "".join(chunks_text) == text
|
|
|
|
# Sanity: in TEXT mode we *do* split inside the entity
|
|
assert any(
|
|
"&" in chunk and ";" not in chunk[chunk.find("&") :]
|
|
for chunk in chunks_text
|
|
)
|
|
|
|
# HTML mode: entity-aware
|
|
chunks_html = smart_split(text, limit, body_format=NotifyFormat.HTML)
|
|
assert "".join(chunks_html) == text
|
|
|
|
# If a chunk contains '&', it must also contain the terminating ';'
|
|
# for that entity within the same chunk.
|
|
for chunk in chunks_html:
|
|
idx = chunk.find("&")
|
|
if idx == -1:
|
|
continue
|
|
semi = chunk.find(";", idx + 1)
|
|
assert semi != -1, f"Chunk ends inside HTML entity: {chunk!r}"
|
|
|
|
|
|
def test_smart_split_avoids_splitting_inside_markdown_link() -> None:
|
|
"""
|
|
In MARKDOWN mode, do not split inside [text](url).
|
|
|
|
We only require that the full [link](...) lies in a single chunk and that
|
|
any '[' appearing in a chunk has a matching ')' in that same chunk.
|
|
"""
|
|
link = "[link](https://example.com)"
|
|
text = "AAAA" + link
|
|
limit = len(link)
|
|
|
|
chunks = smart_split(text, limit, body_format=NotifyFormat.MARKDOWN)
|
|
assert "".join(chunks) == text
|
|
|
|
# Entire link must be contained in one chunk
|
|
assert any(link in chunk for chunk in chunks)
|
|
|
|
# If a chunk has '[', it must also contain its closing ')'
|
|
for chunk in chunks:
|
|
idx = chunk.find("[")
|
|
if idx == -1:
|
|
continue
|
|
semi = chunk.find(")", idx + 1)
|
|
assert semi != -1, f"Markdown link was split inside chunk: {chunk!r}"
|
|
|
|
|
|
def test_smart_split_avoids_splitting_inside_markdown_image() -> None:
|
|
"""
|
|
In MARKDOWN mode, do not split inside the [alt](url) of an image.
|
|
|
|
The implementation currently splits "AAAA" as:
|
|
- "AAAA!"
|
|
- "[alt](...)"
|
|
which is acceptable, as the [alt](url) part is kept intact.
|
|
"""
|
|
image = ""
|
|
text = "AAAA" + image
|
|
limit = len(image)
|
|
|
|
chunks = smart_split(text, limit, body_format=NotifyFormat.MARKDOWN)
|
|
assert "".join(chunks) == text
|
|
|
|
inner = "[alt](https://example.com/image.png)"
|
|
|
|
# The [alt](...) portion must appear fully within a single chunk
|
|
assert any(inner in chunk for chunk in chunks)
|
|
|
|
# As with links, any '[' in a chunk must have its matching ')' within
|
|
# the same chunk so we never split inside the [alt](url) part.
|
|
for chunk in chunks:
|
|
idx = chunk.find("[")
|
|
if idx == -1:
|
|
continue
|
|
semi = chunk.find(")", idx + 1)
|
|
assert semi != -1, f"Markdown image was split inside chunk: {chunk!r}"
|
|
|
|
|
|
def test_smart_split_empty_and_none_input() -> None:
|
|
"""
|
|
Empty / None input should be returned as a single-element list unchanged.
|
|
"""
|
|
assert smart_split("", 10, body_format=NotifyFormat.TEXT) == [""]
|
|
assert smart_split("", 0, body_format=NotifyFormat.TEXT) == [""]
|
|
assert smart_split("content", 0, body_format=NotifyFormat.TEXT) == [""]
|
|
# None short-circuits before len() is called
|
|
assert smart_split(None, 10, body_format=NotifyFormat.TEXT) == [""]
|
|
|
|
|
|
def test_smart_split_html_entity_exact_boundary() -> None:
|
|
"""
|
|
Splitting exactly at an HTML entity boundary should not shift the split
|
|
point (no need to "fix up" a perfectly aligned boundary).
|
|
"""
|
|
text = "AAAA BBBB"
|
|
limit = len("AAAA ") # split exactly after the entity
|
|
|
|
chunks = smart_split(text, limit, body_format=NotifyFormat.HTML)
|
|
|
|
# We expect the entity to remain whole in the first chunk
|
|
assert chunks == ["AAAA ", "BBBB"]
|
|
assert "".join(chunks) == text
|
|
|
|
|
|
def test_smart_split_markdown_link_exact_boundary() -> None:
|
|
"""
|
|
Splitting exactly after a Markdown link should not cause any adjustment.
|
|
"""
|
|
link = "[link](https://example.com)"
|
|
tail = " TAIL"
|
|
text = link + tail
|
|
limit = len(link) # split immediately after ')'
|
|
|
|
chunks = smart_split(text, limit, body_format=NotifyFormat.MARKDOWN)
|
|
|
|
# First chunk is exactly the link, second is the remainder
|
|
assert chunks[0] == link
|
|
assert "".join(chunks) == text
|
|
|
|
# Sanity: the link itself is not split across chunks
|
|
assert any(link in chunk for chunk in chunks)
|
|
|
|
|
|
def test_smart_split_whitespace_priority_with_tabs_and_newlines() -> None:
|
|
"""
|
|
Exercise newline vs space/tab priority with a mix of whitespace.
|
|
"""
|
|
text = "word1\tword2\nword3"
|
|
|
|
# Case 1: window ends just before the newline, so only tab is visible.
|
|
limit_without_newline = text.index("\n") # position of '\n'
|
|
chunks_no_nl = smart_split(
|
|
text, limit_without_newline, body_format=NotifyFormat.TEXT
|
|
)
|
|
# First chunk should end after the tab, since that is the last space/tab
|
|
assert chunks_no_nl[0] == "word1\t"
|
|
assert "".join(chunks_no_nl) == text
|
|
|
|
# Case 2: window includes the newline; newline should win over tab.
|
|
limit_with_newline = text.index("\n") + 1
|
|
chunks_with_nl = smart_split(
|
|
text, limit_with_newline, body_format=NotifyFormat.TEXT
|
|
)
|
|
# First chunk should now end after the newline
|
|
assert chunks_with_nl[0] == "word1\tword2\n"
|
|
assert "".join(chunks_with_nl) == text
|
|
|
|
|
|
def test_smart_split_very_short_limit() -> None:
|
|
"""
|
|
Very small limits should still split deterministically without loss.
|
|
"""
|
|
text = "ABC"
|
|
chunks = smart_split(text, 1, body_format=NotifyFormat.TEXT)
|
|
|
|
# One character per chunk
|
|
assert chunks == ["A", "B", "C"]
|
|
assert "".join(chunks) == text
|
|
|
|
|
|
def test_smart_split_very_long_limit() -> None:
|
|
"""
|
|
Very large limits (>= len(text)) should return a single chunk.
|
|
"""
|
|
text = "A short message for testing"
|
|
chunks = smart_split(text, 10_000, body_format=NotifyFormat.TEXT)
|
|
|
|
assert chunks == [text]
|
|
assert "".join(chunks) == text
|
|
|
|
|
|
def test_html_adjust_guard_paths_and_no_entity() -> None:
|
|
"""
|
|
Cover the early-return guard in html_adjust and the path where there is
|
|
no '&' at all in the search window.
|
|
"""
|
|
text = "abcdef"
|
|
|
|
# split_at <= window_start -> early-return unchanged
|
|
assert html_adjust(text, window_start=2, split_at=2) == 2
|
|
|
|
# split_at beyond the end of the text -> early-return unchanged
|
|
assert html_adjust(
|
|
text, window_start=0, split_at=len(text) + 5) == len(text) + 5
|
|
|
|
# No '&' in window, nothing to adjust
|
|
assert html_adjust(text, window_start=0, split_at=3) == 3
|
|
|
|
|
|
def test_html_adjust_inside_and_at_boundary_of_entity() -> None:
|
|
"""
|
|
Exercise the path where html_adjust moves the split back to '&' when the
|
|
split falls inside an entity, and the path where the split is exactly at
|
|
the entity boundary and should not move.
|
|
"""
|
|
text = "1234 5678"
|
|
# indexes: 0..3 '1234', 4 '&', 5 'n', 6 'b', 7 's', 8 'p', 9 ';', 10 '5'...
|
|
|
|
# Split inside ' ' (at index 8) -> move back to '&' (index 4)
|
|
assert html_adjust(text, window_start=0, split_at=8) == 4
|
|
|
|
# Split exactly after the ';' (index 10) -> already outside entity
|
|
assert html_adjust(text, window_start=0, split_at=10) == 10
|
|
|
|
|
|
def test_markdown_adjust_guard_and_no_construct() -> None:
|
|
"""
|
|
Cover the guard in markdown_adjust and the case where there is no
|
|
'[' or '!' in the window.
|
|
"""
|
|
text = "plain text"
|
|
|
|
# split_at <= window_start -> early-return unchanged
|
|
assert markdown_adjust(text, window_start=4, split_at=4) == 4
|
|
|
|
# split_at past the end -> early-return unchanged
|
|
assert markdown_adjust(
|
|
text, window_start=0, split_at=len(text) + 3) == len(text) + 3
|
|
|
|
# No markdown constructs -> nothing to adjust
|
|
assert markdown_adjust(text, window_start=0, split_at=5) == 5
|
|
|
|
|
|
def test_markdown_adjust_inside_construct_moves_to_start() -> None:
|
|
"""
|
|
Exercise the positive path in markdown_adjust where the split lands
|
|
inside a [text](url) construct and the function moves the split
|
|
back to the start of the construct.
|
|
"""
|
|
link = "[link](https://example.com)"
|
|
# Choose a split point inside the URL
|
|
split_at = link.index("(") + 3 # somewhere inside "(https..."
|
|
adjusted = markdown_adjust(link, window_start=0, split_at=split_at)
|
|
|
|
# Should move back to the '[' at index 0
|
|
assert adjusted == 0
|
|
|
|
|
|
def test_smart_split_markdown_guard_split_at_start_is_reset() -> None:
|
|
"""
|
|
Cover the smart_split guard 'if split_at <= start: split_at = orig_split'.
|
|
|
|
We force markdown_adjust to move the split back to the window start,
|
|
then verify smart_split resets to the original split so progress is
|
|
still made and chunks join back to the original text.
|
|
"""
|
|
text = "[link](https://example.com)"
|
|
limit = 5 # will cause the first soft split to land inside the link
|
|
|
|
chunks = smart_split(text, limit, body_format=NotifyFormat.MARKDOWN)
|
|
|
|
# We should never get stuck; all chunks must be non-empty
|
|
assert len(chunks) >= 2
|
|
assert all(chunks)
|
|
|
|
# Re-joining all chunks must restore the original text
|
|
assert "".join(chunks) == text
|
|
|
|
|
|
def test_smart_split_uses_punctuation_branch_on_rare_whitespace() -> None:
|
|
"""
|
|
When punctuation is followed by rare whitespace (vertical tab / form feed)
|
|
and there are no spaces/tabs/newlines, we should use the punctuation
|
|
+ whitespace split branch.
|
|
"""
|
|
vt = "\x0b" # vertical tab
|
|
text = f"Hello.{vt}World"
|
|
# Window includes 'Hello.' and the VT
|
|
limit = len("Hello.") + 1
|
|
|
|
chunks = smart_split(text, limit, body_format=NotifyFormat.TEXT)
|
|
|
|
assert "".join(chunks) == text
|
|
# We expect the first chunk to end after the rare whitespace
|
|
assert chunks[0] == f"Hello.{vt}"
|
|
assert chunks[1] == "World"
|