Telegram escaping completely refactored (#386)

pull/389/head
Chris Caron 2021-05-15 16:08:53 -04:00 committed by GitHub
parent 7f7ee043d9
commit 59aa5f5d10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 75 additions and 25 deletions

View File

@ -524,39 +524,73 @@ class NotifyTelegram(NotifyBase):
body, body,
) )
elif self.notify_format == NotifyFormat.HTML: else: # HTML or TEXT
# Use Telegram's HTML mode
payload['parse_mode'] = 'HTML' payload['parse_mode'] = 'HTML'
# HTML Spaces ( ) and tabs ( ) aren't supported # Telegram's HTML support doesn't like having HTML escaped
# See https://core.telegram.org/bots/api#html-style # characters passed into it. to handle this situation, we need to
body = re.sub(' ?', ' ', body, re.I) # search the body for these sequences and convert them to the
# output the user expected
# Tabs become 3 spaces telegram_escape_html_dict = {
body = re.sub(' ?', ' ', body, re.I)
if title:
# HTML Spaces ( ) and tabs ( ) aren't supported # HTML Spaces ( ) and tabs ( ) aren't supported
# See https://core.telegram.org/bots/api#html-style # See https://core.telegram.org/bots/api#html-style
title = re.sub(' ?', ' ', title, re.I) r'nbsp': ' ',
# Tabs become 3 spaces # Tabs become 3 spaces
title = re.sub(' ?', ' ', title, re.I) r'emsp': ' ',
payload['text'] = '{}{}'.format( # Some characters get re-escaped by the Telegram upstream
'<b>{}</b>\r\n'.format(title) if title else '', # service so we need to convert these back,
body, r'apos': '\'',
) r'quot': '"',
}
else: # pass directly as is... # Create a regular expression from the dictionary keys
payload['parse_mode'] = 'HTML' html_regex = re.compile("&(%s);?" % "|".join(
map(re.escape, telegram_escape_html_dict.keys())).lower(),
re.I)
# Telegram strangely escapes all HTML characters for us already # For each match, look-up corresponding value in dictionary
# but to avoid causing issues with HTML, we escape the < and > # we look +1 to ignore the & that does not appear in the index
# characters # we only look at the first 4 characters because we don't want to
title = re.sub('>', '&gt;', title, re.I) # fail on &apos; as it's accepted (along with &apos - no
title = re.sub('<', '&lt;', title, re.I) # semi-colon)
body = re.sub('>', '&gt;', body, re.I) body = html_regex.sub( # pragma: no branch
body = re.sub('<', '&lt;', body, re.I) lambda mo: telegram_escape_html_dict[
mo.string[mo.start():mo.end()][1:5]], body)
if title:
# For each match, look-up corresponding value in dictionary
# Indexing is explained above (for how the body is parsed)
title = html_regex.sub( # pragma: no branch
lambda mo: telegram_escape_html_dict[
mo.string[mo.start():mo.end()][1:5]], title)
if self.notify_format == NotifyFormat.TEXT:
telegram_escape_text_dict = {
# We need to escape characters that conflict with html
# entity blocks (< and >) when displaying text
r'>': '&gt;',
r'<': '&lt;',
}
# Create a regular expression from the dictionary keys
text_regex = re.compile("(%s)" % "|".join(
map(re.escape, telegram_escape_text_dict.keys())).lower(),
re.I)
# For each match, look-up corresponding value in dictionary
body = text_regex.sub( # pragma: no branch
lambda mo: telegram_escape_text_dict[
mo.string[mo.start():mo.end()]], body)
if title:
# For each match, look-up corresponding value in dictionary
title = text_regex.sub( # pragma: no branch
lambda mo: telegram_escape_text_dict[
mo.string[mo.start():mo.end()]], title)
payload['text'] = '{}{}'.format( payload['text'] = '{}{}'.format(
'<b>{}</b>\r\n'.format(title) if title else '', '<b>{}</b>\r\n'.format(title) if title else '',

View File

@ -29,6 +29,7 @@ import pytest
import mock import mock
import requests import requests
from json import dumps from json import dumps
from json import loads
from apprise import Apprise from apprise import Apprise
from apprise import AppriseAttachment from apprise import AppriseAttachment
from apprise import AppriseAsset from apprise import AppriseAsset
@ -202,11 +203,26 @@ def test_notify_telegram_plugin(mock_post, mock_get):
}) })
mock_post.return_value.status_code = requests.codes.ok mock_post.return_value.status_code = requests.codes.ok
# Test sending attachments
obj = plugins.NotifyTelegram(bot_token=bot_token, targets='12345') obj = plugins.NotifyTelegram(bot_token=bot_token, targets='12345')
assert len(obj.targets) == 1 assert len(obj.targets) == 1
assert obj.targets[0] == '12345' assert obj.targets[0] == '12345'
# Test the escaping of characters since Telegram escapes stuff for us to
# which we need to consider
mock_post.reset_mock()
body = "<p>\'\"This can't\t\r\nfail&nbsp;us\"\'</p>"
assert obj.notify(
body=body, title='special characters',
notify_type=NotifyType.INFO) is True
assert mock_post.call_count == 1
payload = loads(mock_post.call_args_list[0][1]['data'])
# Our special characters are escaped properly
assert payload['text'] == \
'<b>special characters</b>\r\n&lt;p&gt;'\
'\'"This can\'t\t\r\nfail us"\'&lt;/p&gt;'
# Test sending attachments
attach = AppriseAttachment(os.path.join(TEST_VAR_DIR, 'apprise-test.gif')) attach = AppriseAttachment(os.path.join(TEST_VAR_DIR, 'apprise-test.gif'))
assert obj.notify( assert obj.notify(
body='body', title='title', notify_type=NotifyType.INFO, body='body', title='title', notify_type=NotifyType.INFO,