Telegram and HTML title conversion updates (#574)

pull/578/head
Chris Caron 2022-04-23 08:30:45 -04:00 committed by GitHub
parent 1908b26668
commit 8a3acc815d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 177 additions and 67 deletions

View File

@ -546,7 +546,7 @@ class Apprise(object):
conversion_body_map[server.notify_format]) = \ conversion_body_map[server.notify_format]) = \
convert_between( convert_between(
body_format, server.notify_format, body=body, body_format, server.notify_format, body=body,
title=title) title=title, title_format=server.title_format)
if interpret_escapes: if interpret_escapes:
# #

View File

@ -36,7 +36,8 @@ else:
from html.parser import HTMLParser from html.parser import HTMLParser
def convert_between(from_format, to_format, body, title=None): def convert_between(from_format, to_format, body, title=None,
title_format=NotifyFormat.TEXT):
""" """
Converts between different notification formats. If no conversion exists, Converts between different notification formats. If no conversion exists,
or the selected one fails, the original text will be returned. or the selected one fails, the original text will be returned.
@ -60,27 +61,31 @@ def convert_between(from_format, to_format, body, title=None):
title = '' if not title else title title = '' if not title else title
convert = converters.get((from_format, to_format)) convert = converters.get((from_format, to_format))
title, body = convert(title=title, body=body) \ title, body = convert(title=title, body=body, title_format=title_format) \
if convert is not None else (title, body) if convert is not None else (title, body)
return (title, body) return (title, body)
def markdown_to_html(body, title=None): def markdown_to_html(body, title=None, title_format=None):
""" """
Handle Markdown conversions Handle Markdown conversions
""" """
if title_format == NotifyFormat.HTML and title:
# perform conversion if otherwise told to do so
title = markdown(title)
return ( return (
# Title # Title
'' if not title else markdown(title), '' if not title else title,
# Body # Body
markdown(body), markdown(body),
) )
def text_to_html(body, title=None): def text_to_html(body, title=None, title_format=None):
""" """
Converts a notification body from plain text to HTML. Converts a notification body from plain text to HTML.
""" """
@ -124,7 +129,7 @@ def text_to_html(body, title=None):
lambda x: re_map[x.group()], body))) lambda x: re_map[x.group()], body)))
def html_to_text(body, title=None): def html_to_text(body, title=None, title_format=None):
""" """
Converts a notification body from HTML to plain text. Converts a notification body from HTML to plain text.
""" """

View File

@ -121,6 +121,13 @@ class NotifyBase(BASE_OBJECT):
# automatically placed into the body # automatically placed into the body
title_maxlen = 250 title_maxlen = 250
# Set this to HTML for services that support the conversion of HTML in
# the title. For example; services like Telegram support HTML in the
# title, however services like Email (where this goes in the Subject line)
# do not (but the body does). By default we do not convert titles but
# allow those who wish to over-ride this to do so.
title_format = NotifyFormat.TEXT
# Set the maximum line count; if this is set to anything larger then zero # Set the maximum line count; if this is set to anything larger then zero
# the message (prior to it being sent) will be truncated to this number # the message (prior to it being sent) will be truncated to this number
# of lines. Setting this to zero disables this feature. # of lines. Setting this to zero disables this feature.

View File

@ -105,6 +105,9 @@ class NotifyTelegram(NotifyBase):
# The maximum allowable characters allowed in the body per message # The maximum allowable characters allowed in the body per message
body_maxlen = 4096 body_maxlen = 4096
# Allow the title to support HTML character sets
title_format = NotifyFormat.HTML
# Telegram is limited to sending a maximum of 100 requests per second. # Telegram is limited to sending a maximum of 100 requests per second.
request_rate_per_sec = 0.001 request_rate_per_sec = 0.001
@ -541,7 +544,7 @@ class NotifyTelegram(NotifyBase):
'disable_web_page_preview': not self.preview, 'disable_web_page_preview': not self.preview,
} }
# Prepare Email Message # Prepare Message Body
if self.notify_format == NotifyFormat.MARKDOWN: if self.notify_format == NotifyFormat.MARKDOWN:
payload['parse_mode'] = 'MARKDOWN' payload['parse_mode'] = 'MARKDOWN'
@ -550,7 +553,7 @@ class NotifyTelegram(NotifyBase):
body, body,
) )
else: # TEXT or HTML elif self.notify_format == NotifyFormat.HTML:
# Use Telegram's HTML mode # Use Telegram's HTML mode
payload['parse_mode'] = 'HTML' payload['parse_mode'] = 'HTML'
@ -561,39 +564,48 @@ class NotifyTelegram(NotifyBase):
telegram_escape_html_dict = { telegram_escape_html_dict = {
# HTML Spaces ( ) and tabs ( ) aren't supported # HTML Spaces ( ) and tabs ( ) aren't supported
# See https://core.telegram.org/bots/api#html-style # See https://core.telegram.org/bots/api#html-style
r'nbsp': ' ', r'\ ?': ' ',
# Tabs become 3 spaces # Tabs become 3 spaces
r'emsp': ' ', r'\ ?': ' ',
# Some characters get re-escaped by the Telegram upstream # Some characters get re-escaped by the Telegram upstream
# service so we need to convert these back, # service so we need to convert these back,
r'apos': '\'', r'\'?': '\'',
r'quot': '"', r'\"?': '"',
# the following tags are not supported
r'<[ \t]*/?(br|p|div|span|body|script|meta|html|font'
r'|label|iframe|li|ol|ul)[^>]*>': '',
# The following characters can be altered to become supported
r'<[ \t]*pre[^>]*>': '<code>',
r'<[ \t]*/pre[^>]*>': '</code>',
# Bold
r'<[ \t]*(h[0-9]+|title|strong)[^>]*>': '<b>',
r'<[ \t]*/(h[0-9]+|title|strong)[^>]*>': '</b>',
# Italic
r'<[ \t]*(caption|em)[^>]*>': '<i>',
r'<[ \t]*/(caption|em)[^>]*>': '</i>',
} }
# Create a regular expression from the dictionary keys for k, v in telegram_escape_html_dict.items():
html_regex = re.compile("&(%s);?" % "|".join( body = re.sub(k, v, body, re.I)
map(re.escape, telegram_escape_html_dict.keys())).lower(),
re.I)
# For each match, look-up corresponding value in dictionary
# we look +1 to ignore the & that does not appear in the index
# we only look at the first 4 characters because we don't want to
# fail on &apos; as it's accepted (along with &apos - no
# semi-colon)
body = html_regex.sub( # pragma: no branch
lambda mo: telegram_escape_html_dict[
mo.string[mo.start():mo.end()][1:5]], body)
if title: if title:
# For each match, look-up corresponding value in dictionary title = re.sub(k, v, title, re.I)
# Indexing is explained above (for how the body is parsed)
title = html_regex.sub( # pragma: no branch # prepare our payload based on HTML or TEXT
lambda mo: telegram_escape_html_dict[ payload['text'] = '{}{}'.format(
mo.string[mo.start():mo.end()][1:5]], title) '<b>{}</b>\r\n'.format(title) if title else '',
body,
)
else: # self.notify_format == NotifyFormat.TEXT:
# Use Telegram's HTML mode
payload['parse_mode'] = 'HTML'
if self.notify_format == NotifyFormat.TEXT:
# Further html escaping required... # Further html escaping required...
telegram_escape_text_dict = { telegram_escape_text_dict = {
# We need to escape characters that conflict with html # We need to escape characters that conflict with html

View File

@ -407,7 +407,7 @@ def test_plugin_telegram_general(mock_post):
# Test our payload # Test our payload
assert payload['text'] == \ assert payload['text'] == \
'<b>special characters</b>\r\n<p>\'"This can\'t\t\r\nfail us"\'</p>' '<b>special characters</b>\r\n\'"This can\'t\t\r\nfail us"\''
# Test sending attachments # Test sending attachments
attach = AppriseAttachment(os.path.join(TEST_VAR_DIR, 'apprise-test.gif')) attach = AppriseAttachment(os.path.join(TEST_VAR_DIR, 'apprise-test.gif'))
@ -629,10 +629,10 @@ def test_plugin_telegram_formating_py3(mock_post):
# Test that everything is escaped properly in a TEXT mode # Test that everything is escaped properly in a TEXT mode
assert payload['text'] == \ assert payload['text'] == \
'<b>🚨 Change detected for &lt;i&gt;Apprise Test Title&lt;/i&gt;</b>' \ '<b>🚨 Change detected&nbsp;for&nbsp;&lt;i&gt;Apprise&nbsp;Test' \
'\r\n&lt;a href="http://localhost"&gt;&lt;i&gt;Apprise Body Title' \ '&nbsp;Title&lt;/i&gt;</b>\r\n&lt;a href="http://localhost"&gt;' \
'&lt;/i&gt;&lt;/a&gt; had &lt;a href="http://127.0.0.1"&gt;a change' \ '&lt;i&gt;Apprise Body&nbsp;Title&lt;/i&gt;&lt;/a&gt;&nbsp;had' \
'&lt;/a&gt;' '&nbsp;&lt;a&nbsp;href="http://127.0.0.1"&gt;a&nbsp;change&lt;/a&gt;'
# Reset our values # Reset our values
mock_post.reset_mock() mock_post.reset_mock()
@ -716,9 +716,9 @@ def test_plugin_telegram_formating_py3(mock_post):
# Test that everything is escaped properly in a HTML mode # Test that everything is escaped properly in a HTML mode
assert payload['text'] == \ assert payload['text'] == \
'<b><p>🚨 Change detected for <em>Apprise Test Title</em></p></b>' \ '<b>🚨 Change detected for <i>Apprise Test Title</i></b>\r\n<i>' \
'\r\n<p><em><a href="http://localhost">Apprise Body Title</a></em> ' \ '<a href="http://localhost">Apprise Body Title</a></i> ' \
'had <a href="http://127.0.0.1">a change</a></p>' 'had <a href="http://127.0.0.1">a change</a>'
@pytest.mark.skipif(sys.version_info.major >= 3, reason="Requires Python 2.x+") @pytest.mark.skipif(sys.version_info.major >= 3, reason="Requires Python 2.x+")
@ -809,11 +809,11 @@ def test_plugin_telegram_formating_py2(mock_post):
# Test that everything is escaped properly in a TEXT mode # Test that everything is escaped properly in a TEXT mode
assert payload['text'].encode('utf-8') == \ assert payload['text'].encode('utf-8') == \
'<b>\xf0\x9f\x9a\xa8 Change detected for &lt;i&gt;' \ '<b>\xf0\x9f\x9a\xa8 Change detected&nbsp;for&nbsp;' \
'Apprise Test Title&lt;/i&gt;</b>\r\n' \ '&lt;i&gt;Apprise&nbsp;Test&nbsp;Title&lt;/i&gt;</b>\r\n' \
'&lt;a href="http://localhost"&gt;&lt;i&gt;' \ '&lt;a href="http://localhost"&gt;&lt;i&gt;Apprise Body&nbsp;' \
'Apprise Body Title&lt;/i&gt;&lt;/a&gt; had &lt;a ' \ 'Title&lt;/i&gt;&lt;/a&gt;&nbsp;had&nbsp;&lt;a&nbsp;' \
'href="http://127.0.0.1"&gt;a change&lt;/a&gt;' 'href="http://127.0.0.1"&gt;a&nbsp;change&lt;/a&gt;'
# Reset our values # Reset our values
mock_post.reset_mock() mock_post.reset_mock()
@ -897,10 +897,9 @@ def test_plugin_telegram_formating_py2(mock_post):
# Test that everything is escaped properly in a HTML mode # Test that everything is escaped properly in a HTML mode
assert payload['text'].encode('utf-8') == \ assert payload['text'].encode('utf-8') == \
'<b><p>\xf0\x9f\x9a\xa8 Change detected for ' \ '<b>\xf0\x9f\x9a\xa8 Change detected for <i>Apprise Test Title</i>' \
'<em>Apprise Test Title</em></p></b>\r\n<p><em>' \ '</b>\r\n<i><a href="http://localhost">Apprise Body Title</a></i> ' \
'<a href="http://localhost">Apprise Body Title</a></em>' \ 'had <a href="http://127.0.0.1">a change</a>'
' had <a href="http://127.0.0.1">a change</a></p>'
# Reset our values # Reset our values
mock_post.reset_mock() mock_post.reset_mock()
@ -951,3 +950,90 @@ def test_plugin_telegram_formating_py2(mock_post):
'<b>\xd7\x9b\xd7\x95\xd7\xaa\xd7\xa8\xd7\xaa '\ '<b>\xd7\x9b\xd7\x95\xd7\xaa\xd7\xa8\xd7\xaa '\
'\xd7\xa0\xd7\xa4\xd7\x9c\xd7\x90\xd7\x94</b>\r\n[_[\xd7\x96\xd7\x95 '\ '\xd7\xa0\xd7\xa4\xd7\x9c\xd7\x90\xd7\x94</b>\r\n[_[\xd7\x96\xd7\x95 '\
'\xd7\x94\xd7\x95\xd7\x93\xd7\xa2\xd7\x94](http://localhost)_' '\xd7\x94\xd7\x95\xd7\x93\xd7\xa2\xd7\x94](http://localhost)_'
@mock.patch('requests.post')
def test_plugin_telegram_html_formatting(mock_post):
"""
NotifyTelegram() HTML Formatting
"""
# on't send anything other than <b>, <i>, <a>,<code> and <pre>
# Disable Throttling to speed testing
plugins.NotifyTelegram.request_rate_per_sec = 0
# Prepare Mock
mock_post.return_value = requests.Request()
mock_post.return_value.status_code = requests.codes.ok
mock_post.return_value.content = '{}'
# Simple success response
mock_post.return_value.content = dumps({
"ok": True,
"result": [{
"update_id": 645421321,
"message": {
"message_id": 2,
"from": {
"id": 532389719,
"is_bot": False,
"first_name": "Chris",
"language_code": "en-US"
},
"chat": {
"id": 532389719,
"first_name": "Chris",
"type": "private"
},
"date": 1519694394,
"text": "/start",
"entities": [{
"offset": 0,
"length": 6,
"type": "bot_command",
}],
}},
],
})
mock_post.return_value.status_code = requests.codes.ok
aobj = Apprise()
aobj.add('tgram://123456789:abcdefg_hijklmnop/')
assert len(aobj) == 1
assert isinstance(aobj[0], plugins.NotifyTelegram)
# Test our HTML Conversion
title = '<title>&apos;information&apos</title>'
body = '<em>&quot;This is in Italic&quot</em><br/>' \
'<h5>&emsp;&emspHeadings&nbsp;are dropped and' \
'&nbspconverted to bold</h5>'
assert aobj.notify(title=title, body=body, body_format=NotifyFormat.HTML)
# 1 call to look up bot owner, and second for notification
assert mock_post.call_count == 2
payload = loads(mock_post.call_args_list[1][1]['data'])
# Test that everything is escaped properly in a HTML mode
assert payload['text'] == \
'<b><b>\'information\'</b></b>\r\n<i>"This is in Italic"</i>' \
'<b> Headings are dropped and converted to bold</b>'
mock_post.reset_mock()
assert aobj.notify(title=title, body=body, body_format=NotifyFormat.TEXT)
# owner has already been looked up, so only one call is made
assert mock_post.call_count == 1
payload = loads(mock_post.call_args_list[0][1]['data'])
assert payload['text'] == \
'<b>&lt;title&gt;&amp;apos;information&amp;apos&lt;/title&gt;</b>' \
'\r\n&lt;em&gt;&amp;quot;This is in&nbsp;Italic&amp;quot&lt;/em&gt;' \
'&lt;br/&gt;&lt;h5&gt;&amp;emsp;&amp;emspHeadings&amp;nbsp;are' \
'&nbsp;dropped&nbsp;and&amp;nbspconverted&nbsp;to&nbsp;bold&lt;/h5&gt;'