From fa2d338568dc2b4ba6468b0dc6f7b53c6f61637a Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Sat, 11 May 2024 16:03:01 -0400 Subject: [PATCH] Do not sanitize http:// attachment URLs (#1122) --- apprise/attachment/base.py | 5 +++-- apprise/attachment/http.py | 6 ++---- apprise/url.py | 5 +++-- apprise/utils.py | 13 +++++++++---- test/test_apprise_utils.py | 32 ++++++++++++++++++++++++++++++++ test/test_attach_http.py | 8 ++++++-- 6 files changed, 55 insertions(+), 14 deletions(-) diff --git a/apprise/attachment/base.py b/apprise/attachment/base.py index fe8eb1bd..71e3a4d0 100644 --- a/apprise/attachment/base.py +++ b/apprise/attachment/base.py @@ -315,7 +315,7 @@ class AttachBase(URLBase): "download() is implimented by the child class.") @staticmethod - def parse_url(url, verify_host=True, mimetype_db=None): + def parse_url(url, verify_host=True, mimetype_db=None, sanitize=True): """Parses the URL and returns it broken apart into a dictionary. This is very specific and customized for Apprise. @@ -333,7 +333,8 @@ class AttachBase(URLBase): successful, otherwise None is returned. """ - results = URLBase.parse_url(url, verify_host=verify_host) + results = URLBase.parse_url( + url, verify_host=verify_host, sanitize=sanitize) if not results: # We're done; we failed to parse our url diff --git a/apprise/attachment/http.py b/apprise/attachment/http.py index 28bf55ca..aa075d67 100644 --- a/apprise/attachment/http.py +++ b/apprise/attachment/http.py @@ -296,8 +296,7 @@ class AttachHTTP(AttachBase): """ Tidy memory if open """ - with self._lock: - self.invalidate() + self.invalidate() def url(self, privacy=False, *args, **kwargs): """ @@ -363,8 +362,7 @@ class AttachHTTP(AttachBase): us to re-instantiate this object. """ - results = AttachBase.parse_url(url) - + results = AttachBase.parse_url(url, sanitize=False) if not results: # We're done early as we couldn't load the results return results diff --git a/apprise/url.py b/apprise/url.py index 72fc1b5d..39daec86 100644 --- a/apprise/url.py +++ b/apprise/url.py @@ -744,7 +744,7 @@ class URLBase: @staticmethod def parse_url(url, verify_host=True, plus_to_space=False, - strict_port=False): + strict_port=False, sanitize=True): """Parses the URL and returns it broken apart into a dictionary. This is very specific and customized for Apprise. @@ -765,7 +765,8 @@ class URLBase: results = parse_url( url, default_schema='unknown', verify_host=verify_host, - plus_to_space=plus_to_space, strict_port=strict_port) + plus_to_space=plus_to_space, strict_port=strict_port, + sanitize=sanitize) if not results: # We're done; we failed to parse our url diff --git a/apprise/utils.py b/apprise/utils.py index e1881f31..b33ec074 100644 --- a/apprise/utils.py +++ b/apprise/utils.py @@ -541,7 +541,7 @@ def tidy_path(path): return path -def parse_qsd(qs, simple=False, plus_to_space=False): +def parse_qsd(qs, simple=False, plus_to_space=False, sanitize=True): """ Query String Dictionary Builder @@ -568,6 +568,8 @@ def parse_qsd(qs, simple=False, plus_to_space=False): per normal URL Encoded defininition. Normal URL parsing applies this, but `+` is very actively used character with passwords, api keys, tokens, etc. So Apprise does not do this by default. + + if sanitize is set to False, then kwargs are not placed into lowercase """ # Our return result set: @@ -608,7 +610,7 @@ def parse_qsd(qs, simple=False, plus_to_space=False): # Always Query String Dictionary (qsd) for every entry we have # content is always made lowercase for easy indexing - result['qsd'][key.lower().strip()] = val + result['qsd'][key.lower().strip() if sanitize else key] = val if simple: # move along @@ -636,7 +638,7 @@ def parse_qsd(qs, simple=False, plus_to_space=False): def parse_url(url, default_schema='http', verify_host=True, strict_port=False, - simple=False, plus_to_space=False): + simple=False, plus_to_space=False, sanitize=True): """A function that greatly simplifies the parsing of a url specified by the end user. @@ -691,6 +693,8 @@ def parse_url(url, default_schema='http', verify_host=True, strict_port=False, If the URL can't be parsed then None is returned + If sanitize is set to False, then kwargs are not placed in lowercase + and wrapping whitespace is not removed """ if not isinstance(url, str): @@ -750,7 +754,8 @@ def parse_url(url, default_schema='http', verify_host=True, strict_port=False, # while ensuring that all keys are lowercase if qsdata: result.update(parse_qsd( - qsdata, simple=simple, plus_to_space=plus_to_space)) + qsdata, simple=simple, plus_to_space=plus_to_space, + sanitize=sanitize)) # Now do a proper extraction of data; http:// is just substitued in place # to allow urlparse() to function as expected, we'll swap this back to the diff --git a/test/test_apprise_utils.py b/test/test_apprise_utils.py index 9ff79377..340f78c9 100644 --- a/test/test_apprise_utils.py +++ b/test/test_apprise_utils.py @@ -719,6 +719,38 @@ def test_parse_url_general(): assert result['qsd+'] == {} assert result['qsd:'] == {} + # Sanitizing + result = utils.parse_url( + 'hTTp://hostname/?+KeY=ValueA&-kEy=ValueB&KEY=Value%20+C&:cOlON=YeS', + sanitize=False) + + assert len(result['qsd-']) == 1 + assert len(result['qsd+']) == 1 + assert len(result['qsd']) == 4 + assert len(result['qsd:']) == 1 + + assert result['schema'] == 'http' + assert result['host'] == 'hostname' + assert result['port'] is None + assert result['user'] is None + assert result['password'] is None + assert result['fullpath'] == '/' + assert result['path'] == '/' + assert result['query'] is None + assert result['url'] == 'http://hostname/' + assert '+KeY' in result['qsd'] + assert '-kEy' in result['qsd'] + assert ':cOlON' in result['qsd'] + assert result['qsd:']['cOlON'] == 'YeS' + assert 'key' not in result['qsd'] + assert 'KeY' in result['qsd+'] + assert result['qsd+']['KeY'] == 'ValueA' + assert 'kEy' in result['qsd-'] + assert result['qsd-']['kEy'] == 'ValueB' + assert result['qsd']['KEY'] == 'Value +C' + assert result['qsd']['+KeY'] == result['qsd+']['KeY'] + assert result['qsd']['-kEy'] == result['qsd-']['kEy'] + def test_parse_url_simple(): "utils: parse_url() testing """ diff --git a/test/test_attach_http.py b/test/test_attach_http.py index 5894341d..71e32719 100644 --- a/test/test_attach_http.py +++ b/test/test_attach_http.py @@ -190,7 +190,7 @@ def test_attach_http(mock_get, mock_post): # Test custom url get parameters results = AttachHTTP.parse_url( - 'http://user:pass@localhost/apprise.gif?dl=1&cache=300') + 'http://user:pass@localhost/apprise.gif?DL=1&cache=300') assert isinstance(results, dict) attachment = AttachHTTP(**results) assert isinstance(attachment.url(), str) is True @@ -200,12 +200,16 @@ def test_attach_http(mock_get, mock_post): assert attachment assert mock_get.call_count == 1 assert 'params' in mock_get.call_args_list[0][1] - assert 'dl' in mock_get.call_args_list[0][1]['params'] + assert 'DL' in mock_get.call_args_list[0][1]['params'] # Verify that arguments that are reserved for apprise are not # passed along assert 'cache' not in mock_get.call_args_list[0][1]['params'] + with mock.patch('os.unlink', side_effect=OSError()): + # Test invalidation with exception thrown + attachment.invalidate() + results = AttachHTTP.parse_url( 'http://user:pass@localhost/apprise.gif?+key=value&cache=True') assert isinstance(results, dict)