Telegram escaping completely refactored (#386)

pull/389/head
Chris Caron 4 years ago committed by GitHub
parent 7f7ee043d9
commit 59aa5f5d10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -524,39 +524,73 @@ class NotifyTelegram(NotifyBase):
body, body,
) )
elif self.notify_format == NotifyFormat.HTML: else: # HTML or TEXT
# Use Telegram's HTML mode
payload['parse_mode'] = 'HTML' payload['parse_mode'] = 'HTML'
# Telegram's HTML support doesn't like having HTML escaped
# characters passed into it. to handle this situation, we need to
# search the body for these sequences and convert them to the
# output the user expected
telegram_escape_html_dict = {
# HTML Spaces ( ) and tabs ( ) aren't supported # HTML Spaces ( ) and tabs ( ) aren't supported
# See https://core.telegram.org/bots/api#html-style # See https://core.telegram.org/bots/api#html-style
body = re.sub(' ?', ' ', body, re.I) r'nbsp': ' ',
# Tabs become 3 spaces # Tabs become 3 spaces
body = re.sub(' ?', ' ', body, re.I) r'emsp': ' ',
if title: # Some characters get re-escaped by the Telegram upstream
# HTML Spaces ( ) and tabs ( ) aren't supported # service so we need to convert these back,
# See https://core.telegram.org/bots/api#html-style r'apos': '\'',
title = re.sub(' ?', ' ', title, re.I) r'quot': '"',
}
# Tabs become 3 spaces # Create a regular expression from the dictionary keys
title = re.sub(' ?', ' ', title, re.I) html_regex = re.compile("&(%s);?" % "|".join(
map(re.escape, telegram_escape_html_dict.keys())).lower(),
re.I)
payload['text'] = '{}{}'.format( # For each match, look-up corresponding value in dictionary
'<b>{}</b>\r\n'.format(title) if title else '', # we look +1 to ignore the & that does not appear in the index
body, # we only look at the first 4 characters because we don't want to
) # fail on &apos; as it's accepted (along with &apos - no
# semi-colon)
body = html_regex.sub( # pragma: no branch
lambda mo: telegram_escape_html_dict[
mo.string[mo.start():mo.end()][1:5]], body)
else: # pass directly as is... if title:
payload['parse_mode'] = 'HTML' # For each match, look-up corresponding value in dictionary
# Indexing is explained above (for how the body is parsed)
title = html_regex.sub( # pragma: no branch
lambda mo: telegram_escape_html_dict[
mo.string[mo.start():mo.end()][1:5]], title)
if self.notify_format == NotifyFormat.TEXT:
telegram_escape_text_dict = {
# We need to escape characters that conflict with html
# entity blocks (< and >) when displaying text
r'>': '&gt;',
r'<': '&lt;',
}
# Create a regular expression from the dictionary keys
text_regex = re.compile("(%s)" % "|".join(
map(re.escape, telegram_escape_text_dict.keys())).lower(),
re.I)
# Telegram strangely escapes all HTML characters for us already # For each match, look-up corresponding value in dictionary
# but to avoid causing issues with HTML, we escape the < and > body = text_regex.sub( # pragma: no branch
# characters lambda mo: telegram_escape_text_dict[
title = re.sub('>', '&gt;', title, re.I) mo.string[mo.start():mo.end()]], body)
title = re.sub('<', '&lt;', title, re.I)
body = re.sub('>', '&gt;', body, re.I) if title:
body = re.sub('<', '&lt;', body, re.I) # For each match, look-up corresponding value in dictionary
title = text_regex.sub( # pragma: no branch
lambda mo: telegram_escape_text_dict[
mo.string[mo.start():mo.end()]], title)
payload['text'] = '{}{}'.format( payload['text'] = '{}{}'.format(
'<b>{}</b>\r\n'.format(title) if title else '', '<b>{}</b>\r\n'.format(title) if title else '',

@ -29,6 +29,7 @@ import pytest
import mock import mock
import requests import requests
from json import dumps from json import dumps
from json import loads
from apprise import Apprise from apprise import Apprise
from apprise import AppriseAttachment from apprise import AppriseAttachment
from apprise import AppriseAsset from apprise import AppriseAsset
@ -202,11 +203,26 @@ def test_notify_telegram_plugin(mock_post, mock_get):
}) })
mock_post.return_value.status_code = requests.codes.ok mock_post.return_value.status_code = requests.codes.ok
# Test sending attachments
obj = plugins.NotifyTelegram(bot_token=bot_token, targets='12345') obj = plugins.NotifyTelegram(bot_token=bot_token, targets='12345')
assert len(obj.targets) == 1 assert len(obj.targets) == 1
assert obj.targets[0] == '12345' assert obj.targets[0] == '12345'
# Test the escaping of characters since Telegram escapes stuff for us to
# which we need to consider
mock_post.reset_mock()
body = "<p>\'\"This can't\t\r\nfail&nbsp;us\"\'</p>"
assert obj.notify(
body=body, title='special characters',
notify_type=NotifyType.INFO) is True
assert mock_post.call_count == 1
payload = loads(mock_post.call_args_list[0][1]['data'])
# Our special characters are escaped properly
assert payload['text'] == \
'<b>special characters</b>\r\n&lt;p&gt;'\
'\'"This can\'t\t\r\nfail us"\'&lt;/p&gt;'
# Test sending attachments
attach = AppriseAttachment(os.path.join(TEST_VAR_DIR, 'apprise-test.gif')) attach = AppriseAttachment(os.path.join(TEST_VAR_DIR, 'apprise-test.gif'))
assert obj.notify( assert obj.notify(
body='body', title='title', notify_type=NotifyType.INFO, body='body', title='title', notify_type=NotifyType.INFO,

Loading…
Cancel
Save