From 28b67d42f31a48f969dae33a32cf1f481b08543b Mon Sep 17 00:00:00 2001
From: Chris Caron <lead2gold@gmail.com>
Date: Sat, 27 Apr 2019 23:21:35 -0400
Subject: [PATCH] Better handling of multiple URLs delimited by space/comma

---
 apprise/Apprise.py |  5 ++-
 apprise/utils.py   | 26 ++++++++++++
 test/test_api.py   |  3 ++
 test/test_utils.py | 99 +++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 131 insertions(+), 2 deletions(-)

diff --git a/apprise/Apprise.py b/apprise/Apprise.py
index 0cc9e88f..f729e068 100644
--- a/apprise/Apprise.py
+++ b/apprise/Apprise.py
@@ -33,6 +33,7 @@ from .common import NotifyType
 from .common import NotifyFormat
 from .utils import is_exclusive_match
 from .utils import parse_list
+from .utils import split_urls
 from .utils import GET_SCHEMA_RE
 from .logger import logger
 
@@ -161,7 +162,9 @@ class Apprise(object):
 
         if isinstance(servers, six.string_types):
             # build our server list
-            servers = parse_list(servers)
+            servers = split_urls(servers)
+            if len(servers) == 0:
+                return False
 
         elif isinstance(servers, (ConfigBase, NotifyBase, AppriseConfig)):
             # Go ahead and just add our plugin into our list
diff --git a/apprise/utils.py b/apprise/utils.py
index 923b19d3..c0177d78 100644
--- a/apprise/utils.py
+++ b/apprise/utils.py
@@ -107,6 +107,10 @@ GET_EMAIL_RE = re.compile(
     re.IGNORECASE,
 )
 
+# Regular expression used to destinguish between multiple URLs
+URL_DETECTION_RE = re.compile(
+    r'([a-z0-9]+?:\/\/.*?)[\s,]*(?=$|[a-z0-9]+?:\/\/)', re.I)
+
 
 def is_hostname(hostname):
     """
@@ -463,6 +467,28 @@ def parse_bool(arg, default=False):
     return bool(arg)
 
 
+def split_urls(urls):
+    """
+    Takes a string containing URLs separated by comma's and/or spaces and
+    returns a list.
+    """
+
+    try:
+        results = URL_DETECTION_RE.findall(urls)
+
+    except TypeError:
+        results = []
+
+    if len(results) > 0 and results[len(results) - 1][-1] != urls[-1]:
+        # we always want to save the end of url URL if we can; This handles
+        # cases where there is actually a comma (,) at the end of a single URL
+        # that would have otherwise got lost when our regex passed over it.
+        results[len(results) - 1] += \
+            re.match(r'.*?([\s,]+)?$', urls).group(1).rstrip()
+
+    return results
+
+
 def parse_list(*args):
     """
     Take a string list and break it into a delimited
diff --git a/test/test_api.py b/test/test_api.py
index e3608dd6..03d0c638 100644
--- a/test/test_api.py
+++ b/test/test_api.py
@@ -318,6 +318,9 @@ def test_apprise_tagging(mock_post, mock_get):
     # Create our object
     a = Apprise()
 
+    # An invalid addition can't add the tag
+    assert(a.add('averyinvalidschema://localhost', tag='uhoh') is False)
+
     # Add entry and assign it to a tag called 'awesome'
     assert(a.add('json://localhost/path1/', tag='awesome') is True)
 
diff --git a/test/test_utils.py b/test/test_utils.py
index b1625575..65ff6aec 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -24,6 +24,7 @@
 # THE SOFTWARE.
 
 from __future__ import print_function
+import re
 try:
     # Python 2.7
     from urllib import unquote
@@ -406,8 +407,104 @@ def test_is_email():
     assert utils.is_email(None) is False
 
 
+def test_split_urls():
+    """utils: split_urls() testing """
+    # A simple single array entry (As str)
+    results = utils.split_urls('')
+    assert isinstance(results, list)
+    assert len(results) == 0
+
+    # just delimeters
+    results = utils.split_urls(',  ,, , ,,, ')
+    assert isinstance(results, list)
+    assert len(results) == 0
+
+    results = utils.split_urls(',')
+    assert isinstance(results, list)
+    assert len(results) == 0
+
+    results = utils.split_urls(None)
+    assert isinstance(results, list)
+    assert len(results) == 0
+
+    results = utils.split_urls(42)
+    assert isinstance(results, list)
+    assert len(results) == 0
+
+    results = utils.split_urls('this is not a parseable url at all')
+    assert isinstance(results, list)
+    assert len(results) == 0
+
+    # Now test valid URLs
+    results = utils.split_urls('windows://')
+    assert isinstance(results, list)
+    assert len(results) == 1
+    assert 'windows://' in results
+
+    results = utils.split_urls('windows:// gnome://')
+    assert isinstance(results, list)
+    assert len(results) == 2
+    assert 'windows://' in results
+    assert 'gnome://' in results
+
+    # Commas and spaces found inside URLs are ignored
+    urls = [
+        'mailgun://noreply@sandbox.mailgun.org/apikey/?to=test@example.com,'
+        'test2@example.com,, abcd@example.com',
+        'mailgun://noreply@sandbox.another.mailgun.org/apikey/'
+        '?to=hello@example.com,,hmmm@example.com,, abcd@example.com, ,',
+        'windows://',
+    ]
+
+    # Since comma's and whitespace are the delimiters; they won't be
+    # present at the end of the URL; so we just need to write a special
+    # rstrip() as a regular exression to handle whitespace (\s) and comma
+    # delimiter
+    rstrip_re = re.compile(r'[\s,]+$')
+
+    # Since a comma acts as a delimiter, we run a risk of a problem where the
+    # comma exists as part of the URL and is therefore lost if it was found
+    # at the end of it.
+
+    results = utils.split_urls(', '.join(urls))
+    assert isinstance(results, list)
+    assert len(results) == len(urls)
+    for url in urls:
+        assert rstrip_re.sub('', url) in results
+
+    # However if a comma is found at the end of a single url without a new
+    # match to hit, it is saved and not lost
+
+    # The comma at the end of the password will not be lost if we're
+    # dealing with a single entry:
+    url = 'http://hostname?password=,abcd,'
+    results = utils.split_urls(url)
+    assert isinstance(results, list)
+    assert len(results) == 1
+    assert url in results
+
+    # however if we have multiple entries, commas and spaces between
+    # URLs will be lost, however the last URL will not lose the comma
+    urls = [
+        'schema1://hostname?password=,abcd,',
+        'schema2://hostname?password=,abcd,',
+    ]
+    results = utils.split_urls(', '.join(urls))
+    assert isinstance(results, list)
+    assert len(results) == len(urls)
+
+    # No match because the comma is gone in the results entry
+    # schema1://hostname?password=,abcd
+    assert urls[0] not in results
+    assert urls[0][:-1] in results
+
+    # However we wouldn't have lost the comma in the second one:
+    # schema2://hostname?password=,abcd,
+    assert urls[1] in results
+
+
 def test_parse_list():
-    "utils: parse_list() testing """
+    """utils: parse_list() testing """
 
     # A simple single array entry (As str)
     results = utils.parse_list(