From fd0cb3ffcc192bc132ac341cc8c555fd571b5345 Mon Sep 17 00:00:00 2001
From: Chris Caron
+ ',
- re.compile(r'<\s*/pre[^>]*>', re.I): '
',
+ __telegram_escape_html_entries = (
+ # Comments
+ (re.compile(
+ r'\s*\s*',
+ (re.I | re.M | re.S)), '', {}),
# the following tags are not supported
- re.compile(
- r'<\s*(br|p|div|span|body|script|meta|html|font'
- r'|label|iframe|li|ol|ul|source|script)[^>]*>', re.I): '',
+ (re.compile(
+ r'\s*<\s*(!?DOCTYPE|p|div|span|body|script|link|'
+ r'meta|html|font|head|label|form|input|textarea|select|iframe|'
+ r'source|script)([^a-z0-9>][^>]*)?>\s*',
+ (re.I | re.M | re.S)), '', {}),
- re.compile(
- r'<\s*/(span|body|script|meta|html|font'
- r'|label|iframe|ol|ul|source|script)[^>]*>', re.I): '',
-
- # Italic
- re.compile(r'<\s*(caption|em)[^>]*>', re.I): '',
- re.compile(r'<\s*/(caption|em)[^>]*>', re.I): '',
+ # All closing tags to be removed are put here
+ (re.compile(
+ r'\s*<\s*/(span|body|script|meta|html|font|head|'
+ r'label|form|input|textarea|select|ol|ul|link|'
+ r'iframe|source|script)([^a-z0-9>][^>]*)?>\s*',
+ (re.I | re.M | re.S)), '', {}),
# Bold
- re.compile(r'<\s*(h[1-6]|title|strong)[^>]*>', re.I): '',
- re.compile(r'<\s*/(h[1-6]|title|strong)[^>]*>', re.I): '',
+ (re.compile(
+ r'<\s*(strong)([^a-z0-9>][^>]*)?>',
+ (re.I | re.M | re.S)), '', {}),
+ (re.compile(
+ r'<\s*/\s*(strong)([^a-z0-9>][^>]*)?>',
+ (re.I | re.M | re.S)), '', {}),
+ (re.compile(
+ r'\s*<\s*(h[1-6]|title)([^a-z0-9>][^>]*)?>\s*',
+ (re.I | re.M | re.S)), '{}', {'html': '\r\n'}),
+ (re.compile(
+ r'\s*<\s*/\s*(h[1-6]|title)([^a-z0-9>][^>]*)?>\s*',
+ (re.I | re.M | re.S)),
+ '{}', {'html': '
'}),
+
+ # Italic
+ (re.compile(
+ r'<\s*(caption|em)([^a-z0-9>][^>]*)?>',
+ (re.I | re.M | re.S)), '', {}),
+ (re.compile(
+ r'<\s*/\s*(caption|em)([^a-z0-9>][^>]*)?>',
+ (re.I | re.M | re.S)), '', {}),
+
+ # Bullet Lists
+ (re.compile(
+ r'<\s*li([^a-z0-9>][^>]*)?>\s*',
+ (re.I | re.M | re.S)), ' -', {}),
+
+ # convert pre tags to code (supported by Telegram)
+ (re.compile(
+ r'<\s*pre([^a-z0-9>][^>]*)?>',
+ (re.I | re.M | re.S)), '{}', {'html': '\r\n'}),
+ (re.compile(
+ r'<\s*/\s*pre([^a-z0-9>][^>]*)?>',
+ (re.I | re.M | re.S)), '
{}', {'html': '\r\n'}),
+
+ # New Lines
+ (re.compile(
+ r'\s*<\s*/?\s*(ol|ul|br|hr)\s*/?>\s*',
+ (re.I | re.M | re.S)), '\r\n', {}),
+ (re.compile(
+ r'\s*<\s*/\s*(br|p|hr|li|div)([^a-z0-9>][^>]*)?>\s*',
+ (re.I | re.M | re.S)), '\r\n', {}),
# HTML Spaces ( ) and tabs ( ) aren't supported
# See https://core.telegram.org/bots/api#html-style
- re.compile(r'\ ?', re.I): ' ',
+ (re.compile(r'\ ?', re.I), ' ', {}),
# Tabs become 3 spaces
- re.compile(r'\ ?', re.I): ' ',
+ (re.compile(r'\ ?', re.I), ' ', {}),
# Some characters get re-escaped by the Telegram upstream
# service so we need to convert these back,
- re.compile(r'\'?', re.I): '\'',
- re.compile(r'\"?', re.I): '"',
- }
+ (re.compile(r'\'?', re.I), '\'', {}),
+ (re.compile(r'\"?', re.I), '"', {}),
+
+ # New line cleanup
+ (re.compile(r'\r*\n[\r\n]+', re.I), '\r\n', {}),
+ )
# Define our template tokens
template_tokens = dict(NotifyBase.template_tokens, **{
@@ -597,38 +638,19 @@ class NotifyTelegram(NotifyBase):
# Use Telegram's HTML mode
payload['parse_mode'] = 'HTML'
- for r, v in self.__telegram_escape_html_dict.items():
- body = r.sub(v, body, re.I)
+ for r, v, m in self.__telegram_escape_html_entries:
+
+ if 'html' in m:
+ # Handle special cases where we need to alter new lines
+ # for presentation purposes
+ v = v.format(m['html'] if body_format in (
+ NotifyFormat.HTML, NotifyFormat.MARKDOWN) else '')
+
+ body = r.sub(v, body)
# Prepare our payload based on HTML or TEXT
payload['text'] = body
- # else: # self.notify_format == NotifyFormat.TEXT:
- # # Use Telegram's HTML mode
- # payload['parse_mode'] = 'HTML'
-
- # # Further html escaping required...
- # telegram_escape_text_dict = {
- # # We need to escape characters that conflict with html
- # # entity blocks (< and >) when displaying text
- # r'>': '>',
- # r'<': '<',
- # r'\&': '&',
- # }
-
- # # Create a regular expression from the dictionary keys
- # text_regex = re.compile("(%s)" % "|".join(
- # map(re.escape, telegram_escape_text_dict.keys())).lower(),
- # re.I)
-
- # # For each match, look-up corresponding value in dictionary
- # body = text_regex.sub( # pragma: no branch
- # lambda mo: telegram_escape_text_dict[
- # mo.string[mo.start():mo.end()]], body)
-
- # # prepare our payload based on HTML or TEXT
- # payload['text'] = body
-
# Create a copy of the chat_ids list
targets = list(self.targets)
while len(targets):
diff --git a/test/test_conversion.py b/test/test_conversion.py
index c6ab6d8a..0908f232 100644
--- a/test/test_conversion.py
+++ b/test/test_conversion.py
@@ -22,7 +22,6 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-
from apprise import NotifyFormat
from apprise.conversion import convert_between
import pytest
diff --git a/test/test_plugin_telegram.py b/test/test_plugin_telegram.py
index a9f191b6..d2fa3c0c 100644
--- a/test/test_plugin_telegram.py
+++ b/test/test_plugin_telegram.py
@@ -625,11 +625,10 @@ def test_plugin_telegram_formating_py3(mock_post):
# Test that everything is escaped properly in a TEXT mode
assert payload['text'] == \
- '🚨 Change detected for <i>Apprise ' \
- 'Test Title</i>\r\n<a href=' \
- '"http://localhost"><i>Apprise Body Title<' \
- '/i></a> had <a href="http://' \
- '127.0.0.1">a change</a>'
+ '🚨 Change detected for <i>Apprise Test Title</i>' \
+ '\r\n<a href="http://localhost"><i>' \
+ 'Apprise Body Title</i></a> had <' \
+ 'a href="http://127.0.0.1">a change</a>'
# Reset our values
mock_post.reset_mock()
@@ -718,8 +717,9 @@ def test_plugin_telegram_formating_py3(mock_post):
# Test that everything is escaped properly in a HTML mode
assert payload['text'] == \
- '🚨 Another Change detected for Apprise Test Title' \
- '\r\nApprise Body Title' \
+ '\r\n🚨 Another Change detected for ' \
+ 'Apprise Test Title\r\n\r\n' \
+ 'Apprise Body Title' \
' had a change\r\n'
# Now we'll test an edge case where a title was defined, but after
@@ -881,11 +881,11 @@ def test_plugin_telegram_formating_py2(mock_post):
# Test that everything is escaped properly in a TEXT mode
assert payload['text'].encode('utf-8') == \
- '\xf0\x9f\x9a\xa8 Change detected for <i>' \
- 'Apprise Test Title</i>\r\n<a ' \
- 'href="http://localhost"><i>Apprise Body ' \
- 'Title</i></a> had <a href="' \
- 'http://127.0.0.1">a change</a>'
+ '\xf0\x9f\x9a\xa8 Change detected for <i>' \
+ 'Apprise Test Title</i>\r\n<' \
+ 'a href="http://localhost"><i>Apprise Body Title' \
+ '</i></a> had <a href="http://127.0.0.1"' \
+ '>a change</a>'
# Reset our values
mock_post.reset_mock()
@@ -969,9 +969,9 @@ def test_plugin_telegram_formating_py2(mock_post):
# Test that everything is escaped properly in a HTML mode
assert payload['text'].encode('utf-8') == \
- '\xf0\x9f\x9a\xa8 Change detected for ' \
- 'Apprise Test Title\r\n' \
- 'Apprise Body Title'\
+ '\r\n\xf0\x9f\x9a\xa8 Change detected for ' \
+ 'Apprise Test Title\r\n\r\n' \
+ 'Apprise Body Title' \
' had a change\r\n'
# Reset our values
@@ -1163,8 +1163,8 @@ def test_plugin_telegram_html_formatting(mock_post):
# Test that everything is escaped properly in a HTML mode
assert payload['text'] == \
- '\'information\'\r\n"This is in Italic"' \
- '\r\n Headings are dropped and converted to bold'
+ '\r\n\'information\'\r\n\r\n"This is in Italic"' \
+ '\r\n Headings are dropped and converted to bold\r\n'
mock_post.reset_mock()
@@ -1177,7 +1177,28 @@ def test_plugin_telegram_html_formatting(mock_post):
assert payload['text'] == \
'<title>'information&apos</title>' \
- '\r\n<em>"This is in Italic"</em' \
- '><br/><h5> &emspHeadings ' \
- 'are dropped and converted to bold<' \
- '/h5>'
+ '\r\n<em>"This is in Italic"</em><' \
+ 'br/><h5> &emspHeadings are ' \
+ 'dropped and converted to bold</h5>'
+
+ # Lest test more complex HTML examples now
+ mock_post.reset_mock()
+
+ test_file_01 = os.path.join(
+ TEST_VAR_DIR, '01_test_example.html')
+ with open(test_file_01) as html_file:
+ assert aobj.notify(
+ body=html_file.read(), body_format=NotifyFormat.HTML)
+
+ # owner has already been looked up, so only one call is made
+ assert mock_post.call_count == 1
+
+ payload = loads(mock_post.call_args_list[0][1]['data'])
+ assert payload['text'] == \
+ '\r\nBootstrap 101 Template\r\nMy Title\r\n' \
+ 'Heading 1\r\n-Bullet 1\r\n-Bullet 2\r\n-Bullet 3\r\n' \
+ '-Bullet 1\r\n-Bullet 2\r\n-Bullet 3\r\nHeading 2\r\n' \
+ 'A div entry\r\nA div entry\r\nA pre entry
\r\n' \
+ 'Heading 3\r\nHeading 4\r\nHeading 5\r\n' \
+ 'Heading 6\r\nA set of text\r\n' \
+ 'Another line after the set of text\r\nMore text\r\nlabel'
diff --git a/test/var/01_test_example.html b/test/var/01_test_example.html
new file mode 100644
index 00000000..07891255
--- /dev/null
+++ b/test/var/01_test_example.html
@@ -0,0 +1,66 @@
+
+
+
+
+
+
+
+ My Title
+
+
+
+
+
+
+ Heading 1
+
+
+
+
+
+
+ A div entry +
A pre entry+ + +
+ A set of text
Another line after the set of text
+