summaryrefslogtreecommitdiff
path: root/lib/spack/spack/util/compression.py
diff options
context:
space:
mode:
authorJohn W. Parent <45471568+johnwparent@users.noreply.github.com>2022-06-06 21:14:43 -0400
committerGitHub <noreply@github.com>2022-06-06 18:14:43 -0700
commit5b45df526965335e0e93c845ca5d0ac99da64280 (patch)
treee628e93d27c6c9b66537fe9c626dc84c50652302 /lib/spack/spack/util/compression.py
parent9d7cc436732eaa21679480219a5c7ed96450eea0 (diff)
downloadspack-5b45df526965335e0e93c845ca5d0ac99da64280.tar.gz
spack-5b45df526965335e0e93c845ca5d0ac99da64280.tar.bz2
spack-5b45df526965335e0e93c845ca5d0ac99da64280.tar.xz
spack-5b45df526965335e0e93c845ca5d0ac99da64280.zip
Update decompression support on Windows (#25185)
Most package installations include compressed source files. This adds support for common archive types on Windows: * Add support for using system 7zip functionality to decompress .Z files when available (and on Windows, use 7zip for .xz archives) * Default to using built-in Python support for tar/bz2 decompression (note that Python tar documentation mentions preservation of file permissions) * Add tests for decompression support * Extract logic for handling exploding archives (i.e. compressed archives that expand to more than one base file) into an exploding_archive_catch context manager in the filesystem module
Diffstat (limited to 'lib/spack/spack/util/compression.py')
-rw-r--r--lib/spack/spack/util/compression.py255
1 files changed, 234 insertions, 21 deletions
diff --git a/lib/spack/spack/util/compression.py b/lib/spack/spack/util/compression.py
index a5b7db4ec2..9a8eda0c91 100644
--- a/lib/spack/spack/util/compression.py
+++ b/lib/spack/spack/util/compression.py
@@ -5,10 +5,11 @@
import os
import re
+import shutil
import sys
from itertools import product
-from spack.util.executable import which
+from spack.util.executable import CommandNotFoundError, which
# Supported archive extensions.
PRE_EXTS = ["tar", "TAR"]
@@ -22,35 +23,146 @@ ALLOWED_ARCHIVE_TYPES = [".".join(ext) for ext in product(
is_windows = sys.platform == 'win32'
+def bz2_support():
+ try:
+ import bz2 # noqa
+ return True
+ except ImportError:
+ return False
+
+
+def gzip_support():
+ try:
+ import gzip # noqa
+ return True
+ except ImportError:
+ return False
+
+
+def lzma_support():
+ try:
+ import lzma # noqa # novermin
+ return True
+ except ImportError:
+ return False
+
+
+def tar_support():
+ try:
+ import tarfile # noqa
+ return True
+ except ImportError:
+ return False
+
+
def allowed_archive(path):
- return any(path.endswith(t) for t in ALLOWED_ARCHIVE_TYPES)
+ return False if not path else \
+ any(path.endswith(t) for t in ALLOWED_ARCHIVE_TYPES)
+
+
+def _untar(archive_file):
+ """ Untar archive. Prefer native Python `tarfile`
+ but fall back to system utility if there is a failure
+ to find the native Python module (tar on Unix).
+ Filters archives through native support gzip and xz
+ compression formats.
+
+ Args:
+ archive_file (str): absolute path to the archive to be extracted.
+ Can be one of .tar(.[gz|bz2|xz|Z]) or .(tgz|tbz|tbz2|txz).
+ """
+ _, ext = os.path.splitext(archive_file)
+ outfile = os.path.basename(archive_file.strip(ext))
+ uncompress_required = 'Z' in ext
+ lzma_required = 'xz' in ext
+ lzma_needed_and_not_available = not lzma_support() and lzma_required
+ if tar_support() and not uncompress_required and\
+ not lzma_needed_and_not_available:
+ import tarfile
+ tar = tarfile.open(archive_file)
+ tar.extractall()
+ tar.close()
+ else:
+ tar = which('tar', required=True)
+ tar.add_default_arg('-oxf')
+ tar(archive_file)
+ return outfile
+
+
+def _bunzip2(archive_file):
+ """ Use Python's bz2 module to decompress bz2 compressed archives
+ Fall back to system utility failing to find Python module `bz2`
+
+ Args:
+ archive_file (str): absolute path to the bz2 archive to be decompressed
+ """
+ _, ext = os.path.splitext(archive_file)
+ compressed_file_name = os.path.basename(archive_file)
+ decompressed_file = os.path.basename(archive_file.strip(ext))
+ working_dir = os.getcwd()
+ archive_out = os.path.join(working_dir, decompressed_file)
+ copy_path = os.path.join(working_dir, compressed_file_name)
+ if bz2_support():
+ import bz2
+ f_bz = bz2.BZ2File(archive_file, mode='rb')
+ with open(archive_out, 'wb') as ar:
+ ar.write(f_bz.read())
+ f_bz.close()
+ else:
+ shutil.copy(archive_file, copy_path)
+ bunzip2 = which('bunzip2', required=True)
+ bunzip2.add_default_arg('-q')
+ return bunzip2(copy_path)
+ return archive_out
def _gunzip(archive_file):
- """Like gunzip, but extracts in the current working directory
+ """ Decompress `.gz` extensions. Prefer native Python `gzip` module.
+ Failing back to system utility gunzip.
+ Like gunzip, but extracts in the current working directory
instead of in-place.
Args:
archive_file (str): absolute path of the file to be decompressed
"""
- import gzip
- decompressed_file = os.path.basename(archive_file.strip('.gz'))
+ _, ext = os.path.splitext(archive_file)
+ decompressed_file = os.path.basename(archive_file.strip(ext))
working_dir = os.getcwd()
destination_abspath = os.path.join(working_dir, decompressed_file)
- with gzip.open(archive_file, "rb") as f_in:
+ if gzip_support():
+ import gzip
+ f_in = gzip.open(archive_file, "rb")
with open(destination_abspath, "wb") as f_out:
f_out.write(f_in.read())
+ else:
+ _system_gunzip(archive_file)
+ return destination_abspath
-def _unzip(archive_file):
- """Try to use Python's zipfile, but extract in the current working
- directory instead of in-place.
+def _system_gunzip(archive_file):
+ _, ext = os.path.splitext(archive_file)
+ decompressed_file = os.path.basename(archive_file.strip(ext))
+ working_dir = os.getcwd()
+ destination_abspath = os.path.join(working_dir, decompressed_file)
+ compressed_file = os.path.basename(archive_file)
+ copy_path = os.path.join(working_dir, compressed_file)
+ shutil.copy(archive_file, copy_path)
+ gzip = which("gzip")
+ gzip.add_default_arg("-d")
+ gzip(copy_path)
+ return destination_abspath
- If unavailable, search for 'unzip' executable on system and use instead
+
+def _unzip(archive_file):
+ """
+ Extract Zipfile, searching for unzip system executable
+ If unavailable, search for 'tar' executable on system and use instead
Args:
archive_file (str): absolute path of the file to be decompressed
"""
+
+ destination_abspath = os.getcwd()
exe = 'unzip'
arg = '-q'
if is_windows:
@@ -59,21 +171,122 @@ def _unzip(archive_file):
unzip = which(exe, required=True)
unzip.add_default_arg(arg)
unzip(archive_file)
+ return destination_abspath
+
+def _unZ(archive_file):
+ if is_windows:
+ result = _7zip(archive_file)
+ else:
+ result = _system_gunzip(archive_file)
+ return result
+
+
+def _lzma_decomp(archive_file):
+ """Decompress lzma compressed files. Prefer Python native
+ lzma module, but fall back on command line xz tooling
+ to find available Python support. This is the xz command
+ on Unix and 7z on Windows"""
+ if lzma_support():
+ import lzma # novermin
+ _, ext = os.path.splitext(archive_file)
+ decompressed_file = os.path.basename(archive_file.strip(ext))
+ archive_out = os.path.join(os.getcwd(), decompressed_file)
+ with open(archive_out, 'wb') as ar:
+ with lzma.open(archive_file) as lar:
+ ar.write(lar.read())
+ else:
+ if is_windows:
+ return _7zip(archive_file)
+ else:
+ return _xz(archive_file)
+
+
+def _xz(archive_file):
+ """Decompress lzma compressed .xz files via xz command line
+ tool. Available only on Unix
+ """
+ if is_windows:
+ raise RuntimeError('XZ tool unavailable on Windows')
+ _, ext = os.path.splitext(archive_file)
+ decompressed_file = os.path.basename(archive_file.strip(ext))
+ working_dir = os.getcwd()
+ destination_abspath = os.path.join(working_dir, decompressed_file)
+ compressed_file = os.path.basename(archive_file)
+ copy_path = os.path.join(working_dir, compressed_file)
+ shutil.copy(archive_file, copy_path)
+ xz = which('xz', required=True)
+ xz.add_default_arg('-d')
+ xz(copy_path)
+ return destination_abspath
+
+
+def _7zip(archive_file):
+ """Unpack/decompress with 7z executable
+ 7z is able to handle a number file extensions however
+ it may not be available on system.
+
+ Without 7z, Windows users with certain versions of Python may
+ be unable to extract .xz files, and all Windows users will be unable
+ to extract .Z files. If we cannot find 7z either externally or a
+ Spack installed copy, we fail, but inform the user that 7z can
+ be installed via `spack install 7zip`
+
+ Args:
+ archive_file (str): absolute path of file to be unarchived
+ """
+ _, ext = os.path.splitext(archive_file)
+ outfile = os.path.basename(archive_file.strip(ext))
+ _7z = which('7z')
+ if not _7z:
+ raise CommandNotFoundError("7z unavailable,\
+unable to extract %s files. 7z can be installed via Spack" % ext)
+ _7z.add_default_arg('e')
+ _7z(archive_file)
+ return outfile
+
+
+def decompressor_for(path, ext=None):
+ """Returns a function pointer to appropriate decompression
+ algorithm based on extension type.
+
+ Args:
+ path (str): path of the archive file requiring decompression
+ ext (str): Extension of archive file
+ """
+ if not ext:
+ ext = extension(path)
+
+ if not allowed_archive(ext):
+ raise CommandNotFoundError("Cannot extract archive, \
+unrecognized file extension: '%s'" % ext)
-def decompressor_for(path, extension=None):
- """Get the appropriate decompressor for a path."""
- if ((extension and re.match(r'\.?zip$', extension)) or
- path.endswith('.zip')):
+ if re.match(r'\.?zip$', ext) or path.endswith('.zip'):
return _unzip
- if extension and re.match(r'gz', extension):
+
+ if re.match(r'gz', ext):
return _gunzip
- if extension and re.match(r'bz2', extension):
- bunzip2 = which('bunzip2', required=True)
- return bunzip2
- tar = which('tar', required=True)
- tar.add_default_arg('-oxf')
- return tar
+
+ if re.match(r'bz2', ext):
+ return _bunzip2
+
+ # Python does not have native support
+ # of any kind for .Z files. In these cases,
+ # we rely on external tools such as tar,
+ # 7z, or uncompressZ
+ if re.match(r'Z$', ext):
+ return _unZ
+
+ # Python and platform may not have support for lzma
+ # compression. If no lzma support, use tools available on systems
+ # 7zip on Windows and the xz tool on Unix systems.
+ if re.match(r'xz', ext):
+ return _lzma_decomp
+
+ if ('xz' in ext or 'Z' in ext) and is_windows:
+ return _7zip
+
+ return _untar
def strip_extension(path):