From 13b0e73a4eed301a75465ef5f1ef8980f4621605 Mon Sep 17 00:00:00 2001 From: "John W. Parent" <45471568+johnwparent@users.noreply.github.com> Date: Sat, 4 Jun 2022 18:06:43 -0400 Subject: Sanitize filepath from URL (#30625) Spack's staging logic constructs a file path based on a URL. The URL may contain characters which are not allowed in valid file paths on the system (e.g. Windows prohibits ':' and '?' among others). This commit adds a function to remove such offending characters (but otherwise preserves the URL string when constructing a file path). --- lib/spack/spack/stage.py | 5 +- lib/spack/spack/test/util/path.py | 157 ++++++++++++++++++++------------------ lib/spack/spack/util/path.py | 36 +++++++++ 3 files changed, 123 insertions(+), 75 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index f289305392..a5d29f2917 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -363,12 +363,13 @@ class Stage(object): def expected_archive_files(self): """Possible archive file paths.""" paths = [] - fnames = [] expanded = True if isinstance(self.default_fetcher, fs.URLFetchStrategy): expanded = self.default_fetcher.expand_archive - fnames.append(os.path.basename(self.default_fetcher.url)) + clean_url = os.path.basename( + sup.sanitize_file_path(self.default_fetcher.url)) + fnames.append(clean_url) if self.mirror_paths: fnames.extend(os.path.basename(x) for x in self.mirror_paths) diff --git a/lib/spack/spack/test/util/path.py b/lib/spack/spack/test/util/path.py index 7d2b81e8b3..11989517f9 100644 --- a/lib/spack/spack/test/util/path.py +++ b/lib/spack/spack/test/util/path.py @@ -3,6 +3,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import os import sys import pytest @@ -12,11 +13,8 @@ import llnl.util.tty as tty import spack.config import spack.util.path as sup -# This module pertains to path string padding manipulation specifically -# which is used for binary caching. This functionality is not supported -# on Windows as of yet. -pytestmark = pytest.mark.skipif(sys.platform == 'win32', - reason="Tests fail on Windows") +is_windows = sys.platform == 'win32' + #: Some lines with lots of placeholders padded_lines = [ @@ -34,74 +32,87 @@ fixed_lines = [ ] -@pytest.mark.parametrize("padded,fixed", zip(padded_lines, fixed_lines)) -def test_padding_substitution(padded, fixed): - """Ensure that all padded lines are unpadded correctly.""" - assert fixed == sup.padding_filter(padded) - - -def test_no_substitution(): - """Ensure that a line not containing one full path placeholder is not modified.""" - partial = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 - assert sup.padding_filter(partial) == partial - - -def test_short_substitution(): - """Ensure that a single placeholder path component is replaced""" - short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 - short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-63-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 - assert short_subst == sup.padding_filter(short) - - -def test_partial_substitution(): - """Ensure that a single placeholder path component is replaced""" - short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_p/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 - short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-73-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 - assert short_subst == sup.padding_filter(short) - - -def test_longest_prefix_re(): - """Test that longest_prefix_re generates correct regular expressions.""" - assert "(s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re( - "string", capture=True - ) - assert "(?:s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re( - "string", capture=False - ) +def test_sanitze_file_path(tmpdir): + """Test filtering illegal characters out of potential file paths""" + # *nix illegal files characters are '/' and none others + illegal_file_path = str(tmpdir) + '//' + 'abcdefghi.txt' + if is_windows: + # Windows has a larger set of illegal characters + illegal_file_path = os.path.join(tmpdir, 'acd?e:f"g|h*i.txt') + real_path = sup.sanitize_file_path(illegal_file_path) + assert real_path == os.path.join(str(tmpdir), 'abcdefghi.txt') -def test_output_filtering(capfd, install_mockery, mutable_config): - """Test filtering padding out of tty messages.""" - long_path = "/" + "/".join([sup.SPACK_PATH_PADDING_CHARS] * 200) - padding_string = "[padded-to-%d-chars]" % len(long_path) - - # test filtering when padding is enabled - with spack.config.override('config:install_tree', {"padded_length": 256}): - # tty.msg with filtering on the first argument - with sup.filter_padding(): - tty.msg("here is a long path: %s/with/a/suffix" % long_path) - out, err = capfd.readouterr() - assert padding_string in out - - # tty.msg with filtering on a laterargument - with sup.filter_padding(): - tty.msg("here is a long path:", "%s/with/a/suffix" % long_path) - out, err = capfd.readouterr() - assert padding_string in out - - # tty.error with filtering on the first argument - with sup.filter_padding(): - tty.error("here is a long path: %s/with/a/suffix" % long_path) - out, err = capfd.readouterr() - assert padding_string in err - - # tty.error with filtering on a later argument - with sup.filter_padding(): - tty.error("here is a long path:", "%s/with/a/suffix" % long_path) +# This class pertains to path string padding manipulation specifically +# which is used for binary caching. This functionality is not supported +# on Windows as of yet. +@pytest.mark.skipif(is_windows, + reason='Padding funtionality unsupported on Windows') +class TestPathPadding(): + @pytest.mark.parametrize("padded,fixed", zip(padded_lines, fixed_lines)) + def test_padding_substitution(self, padded, fixed): + """Ensure that all padded lines are unpadded correctly.""" + assert fixed == sup.padding_filter(padded) + + def test_no_substitution(self): + """Ensure that a line not containing one full path placeholder + is not modified.""" + partial = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 + assert sup.padding_filter(partial) == partial + + def test_short_substitution(self): + """Ensure that a single placeholder path component is replaced""" + short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 + short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-63-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 + assert short_subst == sup.padding_filter(short) + + def test_partial_substitution(self): + """Ensure that a single placeholder path component is replaced""" + short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_p/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 + short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-73-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 + assert short_subst == sup.padding_filter(short) + + def test_longest_prefix_re(self): + """Test that longest_prefix_re generates correct regular expressions.""" + assert "(s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re( + "string", capture=True + ) + assert "(?:s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re( + "string", capture=False + ) + + def test_output_filtering(self, capfd, install_mockery, mutable_config): + """Test filtering padding out of tty messages.""" + long_path = "/" + "/".join([sup.SPACK_PATH_PADDING_CHARS] * 200) + padding_string = "[padded-to-%d-chars]" % len(long_path) + + # test filtering when padding is enabled + with spack.config.override('config:install_tree', {"padded_length": 256}): + # tty.msg with filtering on the first argument + with sup.filter_padding(): + tty.msg("here is a long path: %s/with/a/suffix" % long_path) + out, err = capfd.readouterr() + assert padding_string in out + + # tty.msg with filtering on a laterargument + with sup.filter_padding(): + tty.msg("here is a long path:", "%s/with/a/suffix" % long_path) + out, err = capfd.readouterr() + assert padding_string in out + + # tty.error with filtering on the first argument + with sup.filter_padding(): + tty.error("here is a long path: %s/with/a/suffix" % long_path) + out, err = capfd.readouterr() + assert padding_string in err + + # tty.error with filtering on a later argument + with sup.filter_padding(): + tty.error("here is a long path:", "%s/with/a/suffix" % long_path) + out, err = capfd.readouterr() + assert padding_string in err + + # test no filtering + tty.msg("here is a long path: %s/with/a/suffix" % long_path) out, err = capfd.readouterr() - assert padding_string in err - - # test no filtering - tty.msg("here is a long path: %s/with/a/suffix" % long_path) - out, err = capfd.readouterr() - assert padding_string not in out + assert padding_string not in out diff --git a/lib/spack/spack/util/path.py b/lib/spack/spack/util/path.py index fd51acbce7..f2b52e57c3 100644 --- a/lib/spack/spack/util/path.py +++ b/lib/spack/spack/util/path.py @@ -87,6 +87,42 @@ def path_to_os_path(*pths): return ret_pths +def sanitize_file_path(pth): + """ + Formats strings to contain only characters that can + be used to generate legal file paths. + + Criteria for legal files based on + https://en.wikipedia.org/wiki/Filename#Comparison_of_filename_limitations + + Args: + pth: string containing path to be created + on the host filesystem + + Return: + sanitized string that can legally be made into a path + """ + # on unix, splitting path by seperators will remove + # instances of illegal characters on join + pth_cmpnts = pth.split(os.path.sep) + + if is_windows: + drive_match = r'[a-zA-Z]:' + is_abs = bool(re.match(drive_match, pth_cmpnts[0])) + drive = pth_cmpnts[0] + os.path.sep if is_abs else '' + pth_cmpnts = pth_cmpnts[1:] if drive else pth_cmpnts + illegal_chars = r'[<>?:"|*\\]' + else: + drive = '/' if not pth_cmpnts[0] else '' + illegal_chars = r'[/]' + + pth = [] + for cmp in pth_cmpnts: + san_cmp = re.sub(illegal_chars, '', cmp) + pth.append(san_cmp) + return drive + os.path.join(*pth) + + def system_path_filter(_func=None, arg_slice=None): """ Filters function arguments to account for platform path separators. -- cgit v1.2.3-70-g09d2