Browse Source

Telegram escaping completely refactored (#386)

pull/389/head
Chris Caron 4 years ago committed by GitHub
parent
commit
59aa5f5d10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 86
      apprise/plugins/NotifyTelegram.py
  2. 18
      test/test_telegram.py

86
apprise/plugins/NotifyTelegram.py

@ -524,39 +524,73 @@ class NotifyTelegram(NotifyBase):
body, body,
) )
elif self.notify_format == NotifyFormat.HTML: else: # HTML or TEXT
payload['parse_mode'] = 'HTML'
# HTML Spaces ( ) and tabs ( ) aren't supported
# See https://core.telegram.org/bots/api#html-style
body = re.sub(' ?', ' ', body, re.I)
# Tabs become 3 spaces # Use Telegram's HTML mode
body = re.sub(' ?', ' ', body, re.I) payload['parse_mode'] = 'HTML'
if title: # Telegram's HTML support doesn't like having HTML escaped
# characters passed into it. to handle this situation, we need to
# search the body for these sequences and convert them to the
# output the user expected
telegram_escape_html_dict = {
# HTML Spaces ( ) and tabs ( ) aren't supported # HTML Spaces ( ) and tabs ( ) aren't supported
# See https://core.telegram.org/bots/api#html-style # See https://core.telegram.org/bots/api#html-style
title = re.sub(' ?', ' ', title, re.I) r'nbsp': ' ',
# Tabs become 3 spaces # Tabs become 3 spaces
title = re.sub(' ?', ' ', title, re.I) r'emsp': ' ',
payload['text'] = '{}{}'.format( # Some characters get re-escaped by the Telegram upstream
'<b>{}</b>\r\n'.format(title) if title else '', # service so we need to convert these back,
body, r'apos': '\'',
) r'quot': '"',
}
# Create a regular expression from the dictionary keys
html_regex = re.compile("&(%s);?" % "|".join(
map(re.escape, telegram_escape_html_dict.keys())).lower(),
re.I)
# For each match, look-up corresponding value in dictionary
# we look +1 to ignore the & that does not appear in the index
# we only look at the first 4 characters because we don't want to
# fail on &apos; as it's accepted (along with &apos - no
# semi-colon)
body = html_regex.sub( # pragma: no branch
lambda mo: telegram_escape_html_dict[
mo.string[mo.start():mo.end()][1:5]], body)
else: # pass directly as is... if title:
payload['parse_mode'] = 'HTML' # For each match, look-up corresponding value in dictionary
# Indexing is explained above (for how the body is parsed)
# Telegram strangely escapes all HTML characters for us already title = html_regex.sub( # pragma: no branch
# but to avoid causing issues with HTML, we escape the < and > lambda mo: telegram_escape_html_dict[
# characters mo.string[mo.start():mo.end()][1:5]], title)
title = re.sub('>', '&gt;', title, re.I)
title = re.sub('<', '&lt;', title, re.I) if self.notify_format == NotifyFormat.TEXT:
body = re.sub('>', '&gt;', body, re.I) telegram_escape_text_dict = {
body = re.sub('<', '&lt;', body, re.I) # We need to escape characters that conflict with html
# entity blocks (< and >) when displaying text
r'>': '&gt;',
r'<': '&lt;',
}
# Create a regular expression from the dictionary keys
text_regex = re.compile("(%s)" % "|".join(
map(re.escape, telegram_escape_text_dict.keys())).lower(),
re.I)
# For each match, look-up corresponding value in dictionary
body = text_regex.sub( # pragma: no branch
lambda mo: telegram_escape_text_dict[
mo.string[mo.start():mo.end()]], body)
if title:
# For each match, look-up corresponding value in dictionary
title = text_regex.sub( # pragma: no branch
lambda mo: telegram_escape_text_dict[
mo.string[mo.start():mo.end()]], title)
payload['text'] = '{}{}'.format( payload['text'] = '{}{}'.format(
'<b>{}</b>\r\n'.format(title) if title else '', '<b>{}</b>\r\n'.format(title) if title else '',

18
test/test_telegram.py

@ -29,6 +29,7 @@ import pytest
import mock import mock
import requests import requests
from json import dumps from json import dumps
from json import loads
from apprise import Apprise from apprise import Apprise
from apprise import AppriseAttachment from apprise import AppriseAttachment
from apprise import AppriseAsset from apprise import AppriseAsset
@ -202,11 +203,26 @@ def test_notify_telegram_plugin(mock_post, mock_get):
}) })
mock_post.return_value.status_code = requests.codes.ok mock_post.return_value.status_code = requests.codes.ok
# Test sending attachments
obj = plugins.NotifyTelegram(bot_token=bot_token, targets='12345') obj = plugins.NotifyTelegram(bot_token=bot_token, targets='12345')
assert len(obj.targets) == 1 assert len(obj.targets) == 1
assert obj.targets[0] == '12345' assert obj.targets[0] == '12345'
# Test the escaping of characters since Telegram escapes stuff for us to
# which we need to consider
mock_post.reset_mock()
body = "<p>\'\"This can't\t\r\nfail&nbsp;us\"\'</p>"
assert obj.notify(
body=body, title='special characters',
notify_type=NotifyType.INFO) is True
assert mock_post.call_count == 1
payload = loads(mock_post.call_args_list[0][1]['data'])
# Our special characters are escaped properly
assert payload['text'] == \
'<b>special characters</b>\r\n&lt;p&gt;'\
'\'"This can\'t\t\r\nfail us"\'&lt;/p&gt;'
# Test sending attachments
attach = AppriseAttachment(os.path.join(TEST_VAR_DIR, 'apprise-test.gif')) attach = AppriseAttachment(os.path.join(TEST_VAR_DIR, 'apprise-test.gif'))
assert obj.notify( assert obj.notify(
body='body', title='title', notify_type=NotifyType.INFO, body='body', title='title', notify_type=NotifyType.INFO,

Loading…
Cancel
Save