# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""Test Spack's URL handling utility functions."""
import os
import os.path
import urllib.parse
import spack.util.path
import spack.util.url as url_util
def test_url_local_file_path(tmpdir):
# Create a file
path = str(tmpdir.join("hello.txt"))
with open(path, "wb") as f:
f.write(b"hello world")
# Go from path -> url -> path.
roundtrip = url_util.local_file_path(url_util.path_to_file_url(path))
# Verify it's the same file.
assert os.path.samefile(roundtrip, path)
# Test if it accepts urlparse objects
parsed = urllib.parse.urlparse(url_util.path_to_file_url(path))
assert os.path.samefile(url_util.local_file_path(parsed), path)
def test_url_local_file_path_no_file_scheme():
assert url_util.local_file_path("https://example.com/hello.txt") is None
assert url_util.local_file_path("C:\\Program Files\\hello.txt") is None
def test_relative_path_to_file_url(tmpdir):
# Create a file
path = str(tmpdir.join("hello.txt"))
with open(path, "wb") as f:
f.write(b"hello world")
with tmpdir.as_cwd():
roundtrip = url_util.local_file_path(url_util.path_to_file_url("hello.txt"))
assert os.path.samefile(roundtrip, path)
def test_url_join_local_paths():
# Resolve local link against page URL
# wrong:
assert (
url_util.join("s3://bucket/index.html", "../other-bucket/document.txt")
== "s3://bucket/other-bucket/document.txt"
)
# correct - need to specify resolve_href=True:
assert (
url_util.join("s3://bucket/index.html", "../other-bucket/document.txt", resolve_href=True)
== "s3://other-bucket/document.txt"
)
# same as above: make sure several components are joined together correctly
assert (
url_util.join(
# with resolve_href=True, first arg is the base url; can not be
# broken up
"s3://bucket/index.html",
# with resolve_href=True, remaining arguments are the components of
# the local href that needs to be resolved
"..",
"other-bucket",
"document.txt",
resolve_href=True,
)
== "s3://other-bucket/document.txt"
)
# Append local path components to prefix URL
# wrong:
assert (
url_util.join("https://mirror.spack.io/build_cache", "my-package", resolve_href=True)
== "https://mirror.spack.io/my-package"
)
# correct - Need to specify resolve_href=False:
assert (
url_util.join("https://mirror.spack.io/build_cache", "my-package", resolve_href=False)
== "https://mirror.spack.io/build_cache/my-package"
)
# same as above; make sure resolve_href=False is default
assert (
url_util.join("https://mirror.spack.io/build_cache", "my-package")
== "https://mirror.spack.io/build_cache/my-package"
)
# same as above: make sure several components are joined together correctly
assert (
url_util.join(
# with resolve_href=False, first arg is just a prefix. No
# resolution is done. So, there should be no difference between
# join('/a/b/c', 'd/e'),
# join('/a/b', 'c', 'd/e'),
# join('/a', 'b/c', 'd', 'e'), etc.
"https://mirror.spack.io",
"build_cache",
"my-package",
)
== "https://mirror.spack.io/build_cache/my-package"
)
# For s3:// URLs, the "netloc" (bucket) is considered part of the path.
# Make sure join() can cross bucket boundaries in this case.
args = ["s3://bucket/a/b", "new-bucket", "c"]
assert url_util.join(*args) == "s3://bucket/a/b/new-bucket/c"
args.insert(1, "..")
assert url_util.join(*args) == "s3://bucket/a/new-bucket/c"
args.insert(1, "..")
assert url_util.join(*args) == "s3://bucket/new-bucket/c"
# new-bucket is now the "netloc" (bucket name)
args.insert(1, "..")
assert url_util.join(*args) == "s3://new-bucket/c"
def test_url_join_absolute_paths():
# Handling absolute path components is a little tricky. To this end, we
# distinguish "absolute path components", from the more-familiar concept of
# "absolute paths" as they are understood for local filesystem paths.
#
# - All absolute paths are absolute path components. Joining a URL with
# these components has the effect of completely replacing the path of the
# URL with the absolute path. These components do not specify a URL
# scheme, so the scheme of the URL procuced when joining them depend on
# those provided by components that came before it (file:// assumed if no
# such scheme is provided).
# For eaxmple:
p = "/path/to/resource"
# ...is an absolute path
# http:// URL
assert url_util.join("http://example.com/a/b/c", p) == "http://example.com/path/to/resource"
# s3:// URL
# also notice how the netloc is treated as part of the path for s3:// URLs
assert url_util.join("s3://example.com/a/b/c", p) == "s3://path/to/resource"
# - URL components that specify a scheme are always absolute path
# components. Joining a base URL with these components effectively
# discards the base URL and "resets" the joining logic starting at the
# component in question and using it as the new base URL.
# For eaxmple:
p = "http://example.com/path/to"
# ...is an http:// URL
join_result = url_util.join(p, "resource")
assert join_result == "http://example.com/path/to/resource"
# works as if everything before the http:// URL was left out
assert url_util.join("literally", "does", "not", "matter", p, "resource") == join_result
assert url_util.join("file:///a/b/c", "./d") == "file:///a/b/c/d"
# Finally, resolve_href should have no effect for how absolute path
# components are handled because local hrefs can not be absolute path
# components.
args = [
"s3://does",
"not",
"matter",
"http://example.com",
"also",
"does",
"not",
"matter",
"/path",
]
expected = "http://example.com/path"
assert url_util.join(*args, resolve_href=True) == expected
assert url_util.join(*args, resolve_href=False) == expected
# resolve_href only matters for the local path components at the end of the
# argument list.
args[-1] = "/path/to/page"
args.extend(("..", "..", "resource"))
assert url_util.join(*args, resolve_href=True) == "http://example.com/resource"
assert url_util.join(*args, resolve_href=False) == "http://example.com/path/resource"
def test_default_download_name():
url = "https://example.com:1234/path/to/file.txt;params?abc=def#file=blob.tar"
filename = url_util.default_download_filename(url)
assert filename == spack.util.path.sanitize_filename(filename)
def test_default_download_name_dot_dot():
"""Avoid that downloaded files get names computed as ., .. or any hidden file."""
assert url_util.default_download_filename("https://example.com/.") == "_"
assert url_util.default_download_filename("https://example.com/..") == "_."
assert url_util.default_download_filename("https://example.com/.abcdef") == "_abcdef"