summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJohn W. Parent <45471568+johnwparent@users.noreply.github.com>2022-06-04 18:06:43 -0400
committerGitHub <noreply@github.com>2022-06-04 15:06:43 -0700
commit13b0e73a4eed301a75465ef5f1ef8980f4621605 (patch)
tree278a7b97a9752b2e3fcad2ded787ab340f6b07af /lib
parent03c54aebdd914ada16925db7ca455132c45b8a45 (diff)
downloadspack-13b0e73a4eed301a75465ef5f1ef8980f4621605.tar.gz
spack-13b0e73a4eed301a75465ef5f1ef8980f4621605.tar.bz2
spack-13b0e73a4eed301a75465ef5f1ef8980f4621605.tar.xz
spack-13b0e73a4eed301a75465ef5f1ef8980f4621605.zip
Sanitize filepath from URL (#30625)
Spack's staging logic constructs a file path based on a URL. The URL may contain characters which are not allowed in valid file paths on the system (e.g. Windows prohibits ':' and '?' among others). This commit adds a function to remove such offending characters (but otherwise preserves the URL string when constructing a file path).
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/spack/stage.py5
-rw-r--r--lib/spack/spack/test/util/path.py157
-rw-r--r--lib/spack/spack/util/path.py36
3 files changed, 123 insertions, 75 deletions
diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py
index f289305392..a5d29f2917 100644
--- a/lib/spack/spack/stage.py
+++ b/lib/spack/spack/stage.py
@@ -363,12 +363,13 @@ class Stage(object):
def expected_archive_files(self):
"""Possible archive file paths."""
paths = []
-
fnames = []
expanded = True
if isinstance(self.default_fetcher, fs.URLFetchStrategy):
expanded = self.default_fetcher.expand_archive
- fnames.append(os.path.basename(self.default_fetcher.url))
+ clean_url = os.path.basename(
+ sup.sanitize_file_path(self.default_fetcher.url))
+ fnames.append(clean_url)
if self.mirror_paths:
fnames.extend(os.path.basename(x) for x in self.mirror_paths)
diff --git a/lib/spack/spack/test/util/path.py b/lib/spack/spack/test/util/path.py
index 7d2b81e8b3..11989517f9 100644
--- a/lib/spack/spack/test/util/path.py
+++ b/lib/spack/spack/test/util/path.py
@@ -3,6 +3,7 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+import os
import sys
import pytest
@@ -12,11 +13,8 @@ import llnl.util.tty as tty
import spack.config
import spack.util.path as sup
-# This module pertains to path string padding manipulation specifically
-# which is used for binary caching. This functionality is not supported
-# on Windows as of yet.
-pytestmark = pytest.mark.skipif(sys.platform == 'win32',
- reason="Tests fail on Windows")
+is_windows = sys.platform == 'win32'
+
#: Some lines with lots of placeholders
padded_lines = [
@@ -34,74 +32,87 @@ fixed_lines = [
]
-@pytest.mark.parametrize("padded,fixed", zip(padded_lines, fixed_lines))
-def test_padding_substitution(padded, fixed):
- """Ensure that all padded lines are unpadded correctly."""
- assert fixed == sup.padding_filter(padded)
-
-
-def test_no_substitution():
- """Ensure that a line not containing one full path placeholder is not modified."""
- partial = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
- assert sup.padding_filter(partial) == partial
-
-
-def test_short_substitution():
- """Ensure that a single placeholder path component is replaced"""
- short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
- short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-63-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
- assert short_subst == sup.padding_filter(short)
-
-
-def test_partial_substitution():
- """Ensure that a single placeholder path component is replaced"""
- short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_p/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
- short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-73-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
- assert short_subst == sup.padding_filter(short)
-
-
-def test_longest_prefix_re():
- """Test that longest_prefix_re generates correct regular expressions."""
- assert "(s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re(
- "string", capture=True
- )
- assert "(?:s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re(
- "string", capture=False
- )
+def test_sanitze_file_path(tmpdir):
+ """Test filtering illegal characters out of potential file paths"""
+ # *nix illegal files characters are '/' and none others
+ illegal_file_path = str(tmpdir) + '//' + 'abcdefghi.txt'
+ if is_windows:
+ # Windows has a larger set of illegal characters
+ illegal_file_path = os.path.join(tmpdir, 'a<b>cd?e:f"g|h*i.txt')
+ real_path = sup.sanitize_file_path(illegal_file_path)
+ assert real_path == os.path.join(str(tmpdir), 'abcdefghi.txt')
-def test_output_filtering(capfd, install_mockery, mutable_config):
- """Test filtering padding out of tty messages."""
- long_path = "/" + "/".join([sup.SPACK_PATH_PADDING_CHARS] * 200)
- padding_string = "[padded-to-%d-chars]" % len(long_path)
-
- # test filtering when padding is enabled
- with spack.config.override('config:install_tree', {"padded_length": 256}):
- # tty.msg with filtering on the first argument
- with sup.filter_padding():
- tty.msg("here is a long path: %s/with/a/suffix" % long_path)
- out, err = capfd.readouterr()
- assert padding_string in out
-
- # tty.msg with filtering on a laterargument
- with sup.filter_padding():
- tty.msg("here is a long path:", "%s/with/a/suffix" % long_path)
- out, err = capfd.readouterr()
- assert padding_string in out
-
- # tty.error with filtering on the first argument
- with sup.filter_padding():
- tty.error("here is a long path: %s/with/a/suffix" % long_path)
- out, err = capfd.readouterr()
- assert padding_string in err
-
- # tty.error with filtering on a later argument
- with sup.filter_padding():
- tty.error("here is a long path:", "%s/with/a/suffix" % long_path)
+# This class pertains to path string padding manipulation specifically
+# which is used for binary caching. This functionality is not supported
+# on Windows as of yet.
+@pytest.mark.skipif(is_windows,
+ reason='Padding funtionality unsupported on Windows')
+class TestPathPadding():
+ @pytest.mark.parametrize("padded,fixed", zip(padded_lines, fixed_lines))
+ def test_padding_substitution(self, padded, fixed):
+ """Ensure that all padded lines are unpadded correctly."""
+ assert fixed == sup.padding_filter(padded)
+
+ def test_no_substitution(self):
+ """Ensure that a line not containing one full path placeholder
+ is not modified."""
+ partial = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
+ assert sup.padding_filter(partial) == partial
+
+ def test_short_substitution(self):
+ """Ensure that a single placeholder path component is replaced"""
+ short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
+ short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-63-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
+ assert short_subst == sup.padding_filter(short)
+
+ def test_partial_substitution(self):
+ """Ensure that a single placeholder path component is replaced"""
+ short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_p/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
+ short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-73-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
+ assert short_subst == sup.padding_filter(short)
+
+ def test_longest_prefix_re(self):
+ """Test that longest_prefix_re generates correct regular expressions."""
+ assert "(s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re(
+ "string", capture=True
+ )
+ assert "(?:s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re(
+ "string", capture=False
+ )
+
+ def test_output_filtering(self, capfd, install_mockery, mutable_config):
+ """Test filtering padding out of tty messages."""
+ long_path = "/" + "/".join([sup.SPACK_PATH_PADDING_CHARS] * 200)
+ padding_string = "[padded-to-%d-chars]" % len(long_path)
+
+ # test filtering when padding is enabled
+ with spack.config.override('config:install_tree', {"padded_length": 256}):
+ # tty.msg with filtering on the first argument
+ with sup.filter_padding():
+ tty.msg("here is a long path: %s/with/a/suffix" % long_path)
+ out, err = capfd.readouterr()
+ assert padding_string in out
+
+ # tty.msg with filtering on a laterargument
+ with sup.filter_padding():
+ tty.msg("here is a long path:", "%s/with/a/suffix" % long_path)
+ out, err = capfd.readouterr()
+ assert padding_string in out
+
+ # tty.error with filtering on the first argument
+ with sup.filter_padding():
+ tty.error("here is a long path: %s/with/a/suffix" % long_path)
+ out, err = capfd.readouterr()
+ assert padding_string in err
+
+ # tty.error with filtering on a later argument
+ with sup.filter_padding():
+ tty.error("here is a long path:", "%s/with/a/suffix" % long_path)
+ out, err = capfd.readouterr()
+ assert padding_string in err
+
+ # test no filtering
+ tty.msg("here is a long path: %s/with/a/suffix" % long_path)
out, err = capfd.readouterr()
- assert padding_string in err
-
- # test no filtering
- tty.msg("here is a long path: %s/with/a/suffix" % long_path)
- out, err = capfd.readouterr()
- assert padding_string not in out
+ assert padding_string not in out
diff --git a/lib/spack/spack/util/path.py b/lib/spack/spack/util/path.py
index fd51acbce7..f2b52e57c3 100644
--- a/lib/spack/spack/util/path.py
+++ b/lib/spack/spack/util/path.py
@@ -87,6 +87,42 @@ def path_to_os_path(*pths):
return ret_pths
+def sanitize_file_path(pth):
+ """
+ Formats strings to contain only characters that can
+ be used to generate legal file paths.
+
+ Criteria for legal files based on
+ https://en.wikipedia.org/wiki/Filename#Comparison_of_filename_limitations
+
+ Args:
+ pth: string containing path to be created
+ on the host filesystem
+
+ Return:
+ sanitized string that can legally be made into a path
+ """
+ # on unix, splitting path by seperators will remove
+ # instances of illegal characters on join
+ pth_cmpnts = pth.split(os.path.sep)
+
+ if is_windows:
+ drive_match = r'[a-zA-Z]:'
+ is_abs = bool(re.match(drive_match, pth_cmpnts[0]))
+ drive = pth_cmpnts[0] + os.path.sep if is_abs else ''
+ pth_cmpnts = pth_cmpnts[1:] if drive else pth_cmpnts
+ illegal_chars = r'[<>?:"|*\\]'
+ else:
+ drive = '/' if not pth_cmpnts[0] else ''
+ illegal_chars = r'[/]'
+
+ pth = []
+ for cmp in pth_cmpnts:
+ san_cmp = re.sub(illegal_chars, '', cmp)
+ pth.append(san_cmp)
+ return drive + os.path.join(*pth)
+
+
def system_path_filter(_func=None, arg_slice=None):
"""
Filters function arguments to account for platform path separators.