diff options
author | John W. Parent <45471568+johnwparent@users.noreply.github.com> | 2022-06-06 21:14:43 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-06-06 18:14:43 -0700 |
commit | 5b45df526965335e0e93c845ca5d0ac99da64280 (patch) | |
tree | e628e93d27c6c9b66537fe9c626dc84c50652302 /lib/spack/spack/util/compression.py | |
parent | 9d7cc436732eaa21679480219a5c7ed96450eea0 (diff) | |
download | spack-5b45df526965335e0e93c845ca5d0ac99da64280.tar.gz spack-5b45df526965335e0e93c845ca5d0ac99da64280.tar.bz2 spack-5b45df526965335e0e93c845ca5d0ac99da64280.tar.xz spack-5b45df526965335e0e93c845ca5d0ac99da64280.zip |
Update decompression support on Windows (#25185)
Most package installations include compressed source files. This
adds support for common archive types on Windows:
* Add support for using system 7zip functionality to decompress .Z
files when available (and on Windows, use 7zip for .xz archives)
* Default to using built-in Python support for tar/bz2 decompression
(note that Python tar documentation mentions preservation of file
permissions)
* Add tests for decompression support
* Extract logic for handling exploding archives (i.e. compressed
archives that expand to more than one base file) into an
exploding_archive_catch context manager in the filesystem module
Diffstat (limited to 'lib/spack/spack/util/compression.py')
-rw-r--r-- | lib/spack/spack/util/compression.py | 255 |
1 files changed, 234 insertions, 21 deletions
diff --git a/lib/spack/spack/util/compression.py b/lib/spack/spack/util/compression.py index a5b7db4ec2..9a8eda0c91 100644 --- a/lib/spack/spack/util/compression.py +++ b/lib/spack/spack/util/compression.py @@ -5,10 +5,11 @@ import os import re +import shutil import sys from itertools import product -from spack.util.executable import which +from spack.util.executable import CommandNotFoundError, which # Supported archive extensions. PRE_EXTS = ["tar", "TAR"] @@ -22,35 +23,146 @@ ALLOWED_ARCHIVE_TYPES = [".".join(ext) for ext in product( is_windows = sys.platform == 'win32' +def bz2_support(): + try: + import bz2 # noqa + return True + except ImportError: + return False + + +def gzip_support(): + try: + import gzip # noqa + return True + except ImportError: + return False + + +def lzma_support(): + try: + import lzma # noqa # novermin + return True + except ImportError: + return False + + +def tar_support(): + try: + import tarfile # noqa + return True + except ImportError: + return False + + def allowed_archive(path): - return any(path.endswith(t) for t in ALLOWED_ARCHIVE_TYPES) + return False if not path else \ + any(path.endswith(t) for t in ALLOWED_ARCHIVE_TYPES) + + +def _untar(archive_file): + """ Untar archive. Prefer native Python `tarfile` + but fall back to system utility if there is a failure + to find the native Python module (tar on Unix). + Filters archives through native support gzip and xz + compression formats. + + Args: + archive_file (str): absolute path to the archive to be extracted. + Can be one of .tar(.[gz|bz2|xz|Z]) or .(tgz|tbz|tbz2|txz). + """ + _, ext = os.path.splitext(archive_file) + outfile = os.path.basename(archive_file.strip(ext)) + uncompress_required = 'Z' in ext + lzma_required = 'xz' in ext + lzma_needed_and_not_available = not lzma_support() and lzma_required + if tar_support() and not uncompress_required and\ + not lzma_needed_and_not_available: + import tarfile + tar = tarfile.open(archive_file) + tar.extractall() + tar.close() + else: + tar = which('tar', required=True) + tar.add_default_arg('-oxf') + tar(archive_file) + return outfile + + +def _bunzip2(archive_file): + """ Use Python's bz2 module to decompress bz2 compressed archives + Fall back to system utility failing to find Python module `bz2` + + Args: + archive_file (str): absolute path to the bz2 archive to be decompressed + """ + _, ext = os.path.splitext(archive_file) + compressed_file_name = os.path.basename(archive_file) + decompressed_file = os.path.basename(archive_file.strip(ext)) + working_dir = os.getcwd() + archive_out = os.path.join(working_dir, decompressed_file) + copy_path = os.path.join(working_dir, compressed_file_name) + if bz2_support(): + import bz2 + f_bz = bz2.BZ2File(archive_file, mode='rb') + with open(archive_out, 'wb') as ar: + ar.write(f_bz.read()) + f_bz.close() + else: + shutil.copy(archive_file, copy_path) + bunzip2 = which('bunzip2', required=True) + bunzip2.add_default_arg('-q') + return bunzip2(copy_path) + return archive_out def _gunzip(archive_file): - """Like gunzip, but extracts in the current working directory + """ Decompress `.gz` extensions. Prefer native Python `gzip` module. + Failing back to system utility gunzip. + Like gunzip, but extracts in the current working directory instead of in-place. Args: archive_file (str): absolute path of the file to be decompressed """ - import gzip - decompressed_file = os.path.basename(archive_file.strip('.gz')) + _, ext = os.path.splitext(archive_file) + decompressed_file = os.path.basename(archive_file.strip(ext)) working_dir = os.getcwd() destination_abspath = os.path.join(working_dir, decompressed_file) - with gzip.open(archive_file, "rb") as f_in: + if gzip_support(): + import gzip + f_in = gzip.open(archive_file, "rb") with open(destination_abspath, "wb") as f_out: f_out.write(f_in.read()) + else: + _system_gunzip(archive_file) + return destination_abspath -def _unzip(archive_file): - """Try to use Python's zipfile, but extract in the current working - directory instead of in-place. +def _system_gunzip(archive_file): + _, ext = os.path.splitext(archive_file) + decompressed_file = os.path.basename(archive_file.strip(ext)) + working_dir = os.getcwd() + destination_abspath = os.path.join(working_dir, decompressed_file) + compressed_file = os.path.basename(archive_file) + copy_path = os.path.join(working_dir, compressed_file) + shutil.copy(archive_file, copy_path) + gzip = which("gzip") + gzip.add_default_arg("-d") + gzip(copy_path) + return destination_abspath - If unavailable, search for 'unzip' executable on system and use instead + +def _unzip(archive_file): + """ + Extract Zipfile, searching for unzip system executable + If unavailable, search for 'tar' executable on system and use instead Args: archive_file (str): absolute path of the file to be decompressed """ + + destination_abspath = os.getcwd() exe = 'unzip' arg = '-q' if is_windows: @@ -59,21 +171,122 @@ def _unzip(archive_file): unzip = which(exe, required=True) unzip.add_default_arg(arg) unzip(archive_file) + return destination_abspath + +def _unZ(archive_file): + if is_windows: + result = _7zip(archive_file) + else: + result = _system_gunzip(archive_file) + return result + + +def _lzma_decomp(archive_file): + """Decompress lzma compressed files. Prefer Python native + lzma module, but fall back on command line xz tooling + to find available Python support. This is the xz command + on Unix and 7z on Windows""" + if lzma_support(): + import lzma # novermin + _, ext = os.path.splitext(archive_file) + decompressed_file = os.path.basename(archive_file.strip(ext)) + archive_out = os.path.join(os.getcwd(), decompressed_file) + with open(archive_out, 'wb') as ar: + with lzma.open(archive_file) as lar: + ar.write(lar.read()) + else: + if is_windows: + return _7zip(archive_file) + else: + return _xz(archive_file) + + +def _xz(archive_file): + """Decompress lzma compressed .xz files via xz command line + tool. Available only on Unix + """ + if is_windows: + raise RuntimeError('XZ tool unavailable on Windows') + _, ext = os.path.splitext(archive_file) + decompressed_file = os.path.basename(archive_file.strip(ext)) + working_dir = os.getcwd() + destination_abspath = os.path.join(working_dir, decompressed_file) + compressed_file = os.path.basename(archive_file) + copy_path = os.path.join(working_dir, compressed_file) + shutil.copy(archive_file, copy_path) + xz = which('xz', required=True) + xz.add_default_arg('-d') + xz(copy_path) + return destination_abspath + + +def _7zip(archive_file): + """Unpack/decompress with 7z executable + 7z is able to handle a number file extensions however + it may not be available on system. + + Without 7z, Windows users with certain versions of Python may + be unable to extract .xz files, and all Windows users will be unable + to extract .Z files. If we cannot find 7z either externally or a + Spack installed copy, we fail, but inform the user that 7z can + be installed via `spack install 7zip` + + Args: + archive_file (str): absolute path of file to be unarchived + """ + _, ext = os.path.splitext(archive_file) + outfile = os.path.basename(archive_file.strip(ext)) + _7z = which('7z') + if not _7z: + raise CommandNotFoundError("7z unavailable,\ +unable to extract %s files. 7z can be installed via Spack" % ext) + _7z.add_default_arg('e') + _7z(archive_file) + return outfile + + +def decompressor_for(path, ext=None): + """Returns a function pointer to appropriate decompression + algorithm based on extension type. + + Args: + path (str): path of the archive file requiring decompression + ext (str): Extension of archive file + """ + if not ext: + ext = extension(path) + + if not allowed_archive(ext): + raise CommandNotFoundError("Cannot extract archive, \ +unrecognized file extension: '%s'" % ext) -def decompressor_for(path, extension=None): - """Get the appropriate decompressor for a path.""" - if ((extension and re.match(r'\.?zip$', extension)) or - path.endswith('.zip')): + if re.match(r'\.?zip$', ext) or path.endswith('.zip'): return _unzip - if extension and re.match(r'gz', extension): + + if re.match(r'gz', ext): return _gunzip - if extension and re.match(r'bz2', extension): - bunzip2 = which('bunzip2', required=True) - return bunzip2 - tar = which('tar', required=True) - tar.add_default_arg('-oxf') - return tar + + if re.match(r'bz2', ext): + return _bunzip2 + + # Python does not have native support + # of any kind for .Z files. In these cases, + # we rely on external tools such as tar, + # 7z, or uncompressZ + if re.match(r'Z$', ext): + return _unZ + + # Python and platform may not have support for lzma + # compression. If no lzma support, use tools available on systems + # 7zip on Windows and the xz tool on Unix systems. + if re.match(r'xz', ext): + return _lzma_decomp + + if ('xz' in ext or 'Z' in ext) and is_windows: + return _7zip + + return _untar def strip_extension(path): |