Telegram and HTML title conversion updates (#574)

2022-04-23 08:30:45 -04:00 · 2022-04-23 08:30:45 -04:00 · 8a3acc815d
parent 1908b26668
commit 8a3acc815d
5 changed files with 177 additions and 67 deletions
--- a/apprise/Apprise.py
+++ b/apprise/Apprise.py
@ -546,7 +546,7 @@ class Apprise(object):
                 conversion_body_map[server.notify_format]) = \
                    convert_between(
                        body_format, server.notify_format, body=body,
-                        title=title)
+                        title=title, title_format=server.title_format)
                if interpret_escapes:
                    #
--- a/apprise/conversion.py
+++ b/apprise/conversion.py
@ -36,7 +36,8 @@ else:
    from html.parser import HTMLParser
-def convert_between(from_format, to_format, body, title=None):
+def convert_between(from_format, to_format, body, title=None,
                    title_format=NotifyFormat.TEXT):
    """
    Converts between different notification formats. If no conversion exists,
    or the selected one fails, the original text will be returned.
@ -60,27 +61,31 @@ def convert_between(from_format, to_format, body, title=None):
        title = '' if not title else title
    convert = converters.get((from_format, to_format))
-    title, body = convert(title=title, body=body) \
+    title, body = convert(title=title, body=body, title_format=title_format) \
        if convert is not None else (title, body)
    return (title, body)
-def markdown_to_html(body, title=None):
+def markdown_to_html(body, title=None, title_format=None):
    """
    Handle Markdown conversions
    """
    if title_format == NotifyFormat.HTML and title:
        # perform conversion if otherwise told to do so
        title = markdown(title)
    return (
        # Title
-        '' if not title else markdown(title),
+        '' if not title else title,
        # Body
        markdown(body),
    )
-def text_to_html(body, title=None):
+def text_to_html(body, title=None, title_format=None):
    """
    Converts a notification body from plain text to HTML.
    """
@ -124,7 +129,7 @@ def text_to_html(body, title=None):
                lambda x: re_map[x.group()], body)))
-def html_to_text(body, title=None):
+def html_to_text(body, title=None, title_format=None):
    """
    Converts a notification body from HTML to plain text.
    """
--- a/apprise/plugins/NotifyBase.py
+++ b/apprise/plugins/NotifyBase.py
@ -121,6 +121,13 @@ class NotifyBase(BASE_OBJECT):
    # automatically placed into the body
    title_maxlen = 250
    # Set this to HTML for services that support the conversion of HTML in
    # the title. For example; services like Telegram support HTML in the
    # title, however services like Email (where this goes in the Subject line)
    # do not (but the body does).  By default we do not convert titles but
    # allow those who wish to over-ride this to do so.
    title_format = NotifyFormat.TEXT
    # Set the maximum line count; if this is set to anything larger then zero
    # the message (prior to it being sent) will be truncated to this number
    # of lines. Setting this to zero disables this feature.
--- a/apprise/plugins/NotifyTelegram.py
+++ b/apprise/plugins/NotifyTelegram.py
@ -105,6 +105,9 @@ class NotifyTelegram(NotifyBase):
    # The maximum allowable characters allowed in the body per message
    body_maxlen = 4096
    # Allow the title to support HTML character sets
    title_format = NotifyFormat.HTML
    # Telegram is limited to sending a maximum of 100 requests per second.
    request_rate_per_sec = 0.001
@ -541,7 +544,7 @@ class NotifyTelegram(NotifyBase):
            'disable_web_page_preview': not self.preview,
        }
-        # Prepare Email Message
+        # Prepare Message Body
        if self.notify_format == NotifyFormat.MARKDOWN:
            payload['parse_mode'] = 'MARKDOWN'
@ -550,7 +553,7 @@ class NotifyTelegram(NotifyBase):
                body,
            )
-        else:  # TEXT or HTML
+        elif self.notify_format == NotifyFormat.HTML:
            # Use Telegram's HTML mode
            payload['parse_mode'] = 'HTML'
@ -561,39 +564,48 @@ class NotifyTelegram(NotifyBase):
            telegram_escape_html_dict = {
                # HTML Spaces (&nbsp;) and tabs (&emsp;) aren't supported
                # See https://core.telegram.org/bots/api#html-style
-                r'nbsp': ' ',
+                r'\&nbsp;?': ' ',
                # Tabs become 3 spaces
-                r'emsp': '   ',
+                r'\&emsp;?': '   ',
                # Some characters get re-escaped by the Telegram upstream
                # service so we need to convert these back,
-                r'apos': '\'',
+                r'\&apos;?': '\'',
-                r'quot': '"',
+                r'\&quot;?': '"',
                # the following tags are not supported
                r'<[ \t]*/?(br|p|div|span|body|script|meta|html|font'
                r'|label|iframe|li|ol|ul)[^>]*>': '',
                # The following characters can be altered to become supported
                r'<[ \t]*pre[^>]*>': '<code>',
                r'<[ \t]*/pre[^>]*>': '</code>',
                # Bold
                r'<[ \t]*(h[0-9]+|title|strong)[^>]*>': '<b>',
                r'<[ \t]*/(h[0-9]+|title|strong)[^>]*>': '</b>',
                # Italic
                r'<[ \t]*(caption|em)[^>]*>': '<i>',
                r'<[ \t]*/(caption|em)[^>]*>': '</i>',
            }
-            # Create a regular expression from the dictionary keys
+            for k, v in telegram_escape_html_dict.items():
-            html_regex = re.compile("&(%s);?" % "|".join(
+                body = re.sub(k, v, body, re.I)
                map(re.escape, telegram_escape_html_dict.keys())).lower(),
                re.I)
            # For each match, look-up corresponding value in dictionary
            # we look +1 to ignore the & that does not appear in the index
            # we only look at the first 4 characters because we don't want to
            # fail on &apos; as it's accepted (along with &apos - no
            # semi-colon)
            body = html_regex.sub(  # pragma: no branch
                lambda mo: telegram_escape_html_dict[
                    mo.string[mo.start():mo.end()][1:5]], body)
                if title:
-                # For each match, look-up corresponding value in dictionary
+                    title = re.sub(k, v, title, re.I)
-                # Indexing is explained above (for how the body is parsed)
+
-                title = html_regex.sub(  # pragma: no branch
+            # prepare our payload based on HTML or TEXT
-                    lambda mo: telegram_escape_html_dict[
+            payload['text'] = '{}{}'.format(
-                        mo.string[mo.start():mo.end()][1:5]], title)
+                '<b>{}</b>\r\n'.format(title) if title else '',
                body,
            )
        else:  # self.notify_format == NotifyFormat.TEXT:
            # Use Telegram's HTML mode
            payload['parse_mode'] = 'HTML'
            if self.notify_format == NotifyFormat.TEXT:
            # Further html escaping required...
            telegram_escape_text_dict = {
                # We need to escape characters that conflict with html
--- a/test/test_plugin_telegram.py
+++ b/test/test_plugin_telegram.py
@ -407,7 +407,7 @@ def test_plugin_telegram_general(mock_post):
    # Test our payload
    assert payload['text'] == \
-        '<b>special characters</b>\r\n<p>\'"This can\'t\t\r\nfail us"\'</p>'
+        '<b>special characters</b>\r\n\'"This can\'t\t\r\nfail us"\''
    # Test sending attachments
    attach = AppriseAttachment(os.path.join(TEST_VAR_DIR, 'apprise-test.gif'))
@ -629,10 +629,10 @@ def test_plugin_telegram_formating_py3(mock_post):
    # Test that everything is escaped properly in a TEXT mode
    assert payload['text'] == \
-        '<b>🚨 Change detected for &lt;i&gt;Apprise Test Title&lt;/i&gt;</b>' \
+        '<b>🚨 Change detected&nbsp;for&nbsp;&lt;i&gt;Apprise&nbsp;Test' \
-        '\r\n&lt;a href="http://localhost"&gt;&lt;i&gt;Apprise Body Title' \
+        '&nbsp;Title&lt;/i&gt;</b>\r\n&lt;a href="http://localhost"&gt;' \
-        '&lt;/i&gt;&lt;/a&gt; had &lt;a href="http://127.0.0.1"&gt;a change' \
+        '&lt;i&gt;Apprise Body&nbsp;Title&lt;/i&gt;&lt;/a&gt;&nbsp;had' \
-        '&lt;/a&gt;'
+        '&nbsp;&lt;a&nbsp;href="http://127.0.0.1"&gt;a&nbsp;change&lt;/a&gt;'
    # Reset our values
    mock_post.reset_mock()
@ -716,9 +716,9 @@ def test_plugin_telegram_formating_py3(mock_post):
    # Test that everything is escaped properly in a HTML mode
    assert payload['text'] == \
-        '<b><p>🚨 Change detected for <em>Apprise Test Title</em></p></b>' \
+        '<b>🚨 Change detected for <i>Apprise Test Title</i></b>\r\n<i>' \
-        '\r\n<p><em><a href="http://localhost">Apprise Body Title</a></em> ' \
+        '<a href="http://localhost">Apprise Body Title</a></i> ' \
-        'had <a href="http://127.0.0.1">a change</a></p>'
+        'had <a href="http://127.0.0.1">a change</a>'
@pytest.mark.skipif(sys.version_info.major >= 3, reason="Requires Python 2.x+")
@ -809,11 +809,11 @@ def test_plugin_telegram_formating_py2(mock_post):
    # Test that everything is escaped properly in a TEXT mode
    assert payload['text'].encode('utf-8') == \
-        '<b>\xf0\x9f\x9a\xa8 Change detected for &lt;i&gt;' \
+        '<b>\xf0\x9f\x9a\xa8 Change detected&nbsp;for&nbsp;' \
-        'Apprise Test Title&lt;/i&gt;</b>\r\n' \
+        '&lt;i&gt;Apprise&nbsp;Test&nbsp;Title&lt;/i&gt;</b>\r\n' \
-        '&lt;a href="http://localhost"&gt;&lt;i&gt;' \
+        '&lt;a href="http://localhost"&gt;&lt;i&gt;Apprise Body&nbsp;' \
-        'Apprise Body Title&lt;/i&gt;&lt;/a&gt; had &lt;a ' \
+        'Title&lt;/i&gt;&lt;/a&gt;&nbsp;had&nbsp;&lt;a&nbsp;' \
-        'href="http://127.0.0.1"&gt;a change&lt;/a&gt;'
+        'href="http://127.0.0.1"&gt;a&nbsp;change&lt;/a&gt;'
    # Reset our values
    mock_post.reset_mock()
@ -897,10 +897,9 @@ def test_plugin_telegram_formating_py2(mock_post):
    # Test that everything is escaped properly in a HTML mode
    assert payload['text'].encode('utf-8') == \
-        '<b><p>\xf0\x9f\x9a\xa8 Change detected for ' \
+        '<b>\xf0\x9f\x9a\xa8 Change detected for <i>Apprise Test Title</i>' \
-        '<em>Apprise Test Title</em></p></b>\r\n<p><em>' \
+        '</b>\r\n<i><a href="http://localhost">Apprise Body Title</a></i> ' \
-        '<a href="http://localhost">Apprise Body Title</a></em>' \
+        'had <a href="http://127.0.0.1">a change</a>'
        ' had <a href="http://127.0.0.1">a change</a></p>'
    # Reset our values
    mock_post.reset_mock()
@ -951,3 +950,90 @@ def test_plugin_telegram_formating_py2(mock_post):
        '<b>\xd7\x9b\xd7\x95\xd7\xaa\xd7\xa8\xd7\xaa '\
        '\xd7\xa0\xd7\xa4\xd7\x9c\xd7\x90\xd7\x94</b>\r\n[_[\xd7\x96\xd7\x95 '\
        '\xd7\x94\xd7\x95\xd7\x93\xd7\xa2\xd7\x94](http://localhost)_'
@mock.patch('requests.post')
 def test_plugin_telegram_html_formatting(mock_post):
    """
    NotifyTelegram() HTML Formatting
    """
    # on't send anything other than <b>, <i>, <a>,<code> and <pre>
    # Disable Throttling to speed testing
    plugins.NotifyTelegram.request_rate_per_sec = 0
    # Prepare Mock
    mock_post.return_value = requests.Request()
    mock_post.return_value.status_code = requests.codes.ok
    mock_post.return_value.content = '{}'
    # Simple success response
    mock_post.return_value.content = dumps({
        "ok": True,
        "result": [{
            "update_id": 645421321,
            "message": {
                "message_id": 2,
                "from": {
                    "id": 532389719,
                    "is_bot": False,
                    "first_name": "Chris",
                    "language_code": "en-US"
                },
                "chat": {
                    "id": 532389719,
                    "first_name": "Chris",
                    "type": "private"
                },
                "date": 1519694394,
                "text": "/start",
                "entities": [{
                    "offset": 0,
                    "length": 6,
                    "type": "bot_command",
                }],
            }},
        ],
    })
    mock_post.return_value.status_code = requests.codes.ok
    aobj = Apprise()
    aobj.add('tgram://123456789:abcdefg_hijklmnop/')
    assert len(aobj) == 1
    assert isinstance(aobj[0], plugins.NotifyTelegram)
    # Test our HTML Conversion
    title = '<title>&apos;information&apos</title>'
    body = '<em>&quot;This is in Italic&quot</em><br/>' \
           '<h5>&emsp;&emspHeadings&nbsp;are dropped and' \
           '&nbspconverted to bold</h5>'
    assert aobj.notify(title=title, body=body, body_format=NotifyFormat.HTML)
    # 1 call to look up bot owner, and second for notification
    assert mock_post.call_count == 2
    payload = loads(mock_post.call_args_list[1][1]['data'])
    # Test that everything is escaped properly in a HTML mode
    assert payload['text'] == \
        '<b><b>\'information\'</b></b>\r\n<i>"This is in Italic"</i>' \
        '<b>      Headings are dropped and converted to bold</b>'
    mock_post.reset_mock()
    assert aobj.notify(title=title, body=body, body_format=NotifyFormat.TEXT)
    # owner has already been looked up, so only one call is made
    assert mock_post.call_count == 1
    payload = loads(mock_post.call_args_list[0][1]['data'])
    assert payload['text'] == \
        '<b>&lt;title&gt;&amp;apos;information&amp;apos&lt;/title&gt;</b>' \
        '\r\n&lt;em&gt;&amp;quot;This is in&nbsp;Italic&amp;quot&lt;/em&gt;' \
        '&lt;br/&gt;&lt;h5&gt;&amp;emsp;&amp;emspHeadings&amp;nbsp;are' \
        '&nbsp;dropped&nbsp;and&amp;nbspconverted&nbsp;to&nbsp;bold&lt;/h5&gt;'