mirror of https://github.com/caronc/apprise
code & test improvements, added more coverage
parent
8d543a5eb3
commit
f725b3ac75
|
@ -101,7 +101,7 @@ class HTMLConverter(HTMLParser, object):
|
||||||
|
|
||||||
# The following tags must start on a new line
|
# The following tags must start on a new line
|
||||||
BLOCK_TAGS = ('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
BLOCK_TAGS = ('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
||||||
'div', 'td', 'th', 'code', 'pre', 'label', 'li',)
|
'div', 'td', 'th', 'pre', 'samp', 'label', 'li',)
|
||||||
|
|
||||||
# the folowing tags ignore any internal text
|
# the folowing tags ignore any internal text
|
||||||
IGNORE_TAGS = (
|
IGNORE_TAGS = (
|
||||||
|
@ -216,8 +216,10 @@ class HTMLMarkDownConverter(HTMLConverter):
|
||||||
"""An HTML to markdown converter tuned for email messages."""
|
"""An HTML to markdown converter tuned for email messages."""
|
||||||
|
|
||||||
# Escape markdown characters
|
# Escape markdown characters
|
||||||
MARKDOWN_ESCAPE = re.compile(r'([\\`*_{}[\]<>()#+\-.!|])',
|
MARKDOWN_ESCAPE = re.compile(r'([`*#])', re.DOTALL | re.MULTILINE)
|
||||||
re.DOTALL | re.MULTILINE)
|
|
||||||
|
# Detect Carriage Return
|
||||||
|
HAS_CR = re.compile(r'[\r*\n]+', re.DOTALL | re.MULTILINE)
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
@ -225,6 +227,8 @@ class HTMLMarkDownConverter(HTMLConverter):
|
||||||
# Store href value
|
# Store href value
|
||||||
self._link = ""
|
self._link = ""
|
||||||
|
|
||||||
|
self._preserver_cr = False
|
||||||
|
|
||||||
def handle_data(self, data, *args, **kwargs):
|
def handle_data(self, data, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
Store our data if it is not on the ignore list
|
Store our data if it is not on the ignore list
|
||||||
|
@ -234,7 +238,8 @@ class HTMLMarkDownConverter(HTMLConverter):
|
||||||
if self._do_store:
|
if self._do_store:
|
||||||
|
|
||||||
# Tidy our whitespace
|
# Tidy our whitespace
|
||||||
content = self.WS_TRIM.sub(' ', data)
|
content = self.WS_TRIM.sub(' ', data) \
|
||||||
|
if not self._preserver_cr else data
|
||||||
content = self.MARKDOWN_ESCAPE.sub(r'\\\1', content)
|
content = self.MARKDOWN_ESCAPE.sub(r'\\\1', content)
|
||||||
|
|
||||||
# Add hyperlink
|
# Add hyperlink
|
||||||
|
@ -287,19 +292,28 @@ class HTMLMarkDownConverter(HTMLConverter):
|
||||||
elif tag == 'h6':
|
elif tag == 'h6':
|
||||||
self._result.append('###### ')
|
self._result.append('###### ')
|
||||||
|
|
||||||
elif tag in ['strong', 'b']:
|
elif tag in ('strong', 'b'):
|
||||||
self._result.append('**')
|
self._result.append('**')
|
||||||
|
|
||||||
elif tag in ['em', 'i']:
|
elif tag in ('em', 'i'):
|
||||||
self._result.append('*')
|
self._result.append('*')
|
||||||
|
|
||||||
elif tag == 'code':
|
elif tag == 'code':
|
||||||
self._result.append('`')
|
self._result.append('`')
|
||||||
|
self._preserver_cr = True
|
||||||
|
|
||||||
|
elif tag in ('pre', 'samp'):
|
||||||
|
self._result.append('```')
|
||||||
|
self._result.append(self.BLOCK_END)
|
||||||
|
self._preserver_cr = True
|
||||||
|
|
||||||
elif tag == 'a':
|
elif tag == 'a':
|
||||||
for name, link in attrs:
|
for name, link in attrs:
|
||||||
if name == 'href':
|
if name == 'href':
|
||||||
self._link = '(' + link + ')'
|
self._link = '(' + link + ')'
|
||||||
|
# Take an early exit for speed (in case there are more
|
||||||
|
# parameters - no need to waste time looking at them)
|
||||||
|
break
|
||||||
|
|
||||||
def handle_endtag(self, tag):
|
def handle_endtag(self, tag):
|
||||||
"""
|
"""
|
||||||
|
@ -311,11 +325,17 @@ class HTMLMarkDownConverter(HTMLConverter):
|
||||||
if tag in self.BLOCK_TAGS:
|
if tag in self.BLOCK_TAGS:
|
||||||
self._result.append(self.BLOCK_END)
|
self._result.append(self.BLOCK_END)
|
||||||
|
|
||||||
if tag in ['strong', 'b']:
|
if tag in ('strong', 'b'):
|
||||||
self._result.append('**')
|
self._result.append('**')
|
||||||
|
|
||||||
elif tag in ['em', 'i']:
|
elif tag in ('em', 'i'):
|
||||||
self._result.append('*')
|
self._result.append('*')
|
||||||
|
|
||||||
elif tag == 'code':
|
elif tag == 'code':
|
||||||
self._result.append('`')
|
self._result.append('`')
|
||||||
|
self._preserver_cr = False
|
||||||
|
|
||||||
|
elif tag in ('pre', 'samp'):
|
||||||
|
self._result.append('```')
|
||||||
|
self._result.append(self.BLOCK_END)
|
||||||
|
self._preserver_cr = False
|
||||||
|
|
|
@ -153,24 +153,24 @@ def test_conversion_html_to_markdown():
|
||||||
"""
|
"""
|
||||||
return convert_between(NotifyFormat.HTML, NotifyFormat.MARKDOWN, body)
|
return convert_between(NotifyFormat.HTML, NotifyFormat.MARKDOWN, body)
|
||||||
|
|
||||||
assert to_markdown("No HTML code here.") == "No HTML code here\."
|
assert to_markdown("No HTML code here.") == "No HTML code here."
|
||||||
|
|
||||||
clist = to_markdown("<ul><li>Lots and lots</li><li>of lists.</li></ul>")
|
clist = to_markdown("<ul><li>Lots and lots</li><li>of lists.</li></ul>")
|
||||||
assert "- Lots and lots" in clist
|
assert "- Lots and lots" in clist
|
||||||
assert "- of lists\." in clist
|
assert "- of lists." in clist
|
||||||
|
|
||||||
assert "> To be or not to be\." == to_markdown(
|
assert "> To be or not to be." == to_markdown(
|
||||||
"<blockquote>To be or not to be.</blockquote>")
|
"<blockquote>To be or not to be.</blockquote>")
|
||||||
|
|
||||||
cspace = to_markdown(
|
cspace = to_markdown(
|
||||||
"<h2>Fancy heading</h2>"
|
"<h2>Fancy heading</h2>"
|
||||||
"<p>And a paragraph too.<br>Plus line break.</p>")
|
"<p>And a paragraph too.<br>Plus line break.</p>")
|
||||||
assert "# Fancy heading" in cspace
|
assert "# Fancy heading" in cspace
|
||||||
assert "And a paragraph too\.\nPlus line break\." in cspace
|
assert "And a paragraph too.\nPlus line break." in cspace
|
||||||
|
|
||||||
assert to_markdown(
|
assert to_markdown(
|
||||||
"<style>body { font: 200%; }</style>"
|
"<style>body { font: 200%; }</style>"
|
||||||
"<p>Some obnoxious text here.</p>") == "Some obnoxious text here\."
|
"<p>Some obnoxious text here.</p>") == "Some obnoxious text here."
|
||||||
|
|
||||||
assert to_markdown(
|
assert to_markdown(
|
||||||
"<p>line 1</p>"
|
"<p>line 1</p>"
|
||||||
|
@ -194,9 +194,18 @@ def test_conversion_html_to_markdown():
|
||||||
|
|
||||||
# first 2 entries are okay, but last will do as best as it can
|
# first 2 entries are okay, but last will do as best as it can
|
||||||
assert to_markdown(
|
assert to_markdown(
|
||||||
|
"<h1>Heading 1</h1>"
|
||||||
|
"<h2>Heading 2</h2>"
|
||||||
|
"<h3>Heading 3</h3>"
|
||||||
|
"<h4>Heading 4</h4>"
|
||||||
|
"<h5>Heading 5</h5>"
|
||||||
|
"<h6>Heading 6</h6>"
|
||||||
"<p>line 1</>"
|
"<p>line 1</>"
|
||||||
"<p>line 2</gar>"
|
"<p><em>line 2</em></gar>"
|
||||||
"<p>line 3>") == "line 1\nline 2\nline 3\>"
|
"<p>line 3>") == \
|
||||||
|
"# Heading 1\n## Heading 2\n### Heading 3\n" \
|
||||||
|
"#### Heading 4\n##### Heading 5\n###### Heading 6\n" \
|
||||||
|
"line 1\n*line 2*\nline 3>"
|
||||||
|
|
||||||
# Make sure we ignore fields that aren't important to us
|
# Make sure we ignore fields that aren't important to us
|
||||||
assert to_markdown(
|
assert to_markdown(
|
||||||
|
@ -205,6 +214,13 @@ def test_conversion_html_to_markdown():
|
||||||
"Another line without being enclosed") == \
|
"Another line without being enclosed") == \
|
||||||
"line 1\nAnother line without being enclosed"
|
"line 1\nAnother line without being enclosed"
|
||||||
|
|
||||||
|
# Test <code> and <pre>
|
||||||
|
assert to_markdown(
|
||||||
|
"<code>multi-line 1\nmulti-line 2</code>more content"
|
||||||
|
"<pre>multi-line 1\nmulti-line 2</pre>more content") == \
|
||||||
|
'`multi-line 1\nmulti-line 2`more content' \
|
||||||
|
'\n```\nmulti-line 1\nmulti-line 2\n```\nmore content'
|
||||||
|
|
||||||
# Test cases when there are no new lines (we're dealing with just inline
|
# Test cases when there are no new lines (we're dealing with just inline
|
||||||
# entries); an empty entry as well
|
# entries); an empty entry as well
|
||||||
assert to_markdown("<span></span<<span>test</span> "
|
assert to_markdown("<span></span<<span>test</span> "
|
||||||
|
|
Loading…
Reference in New Issue