summaryrefslogtreecommitdiff
path: root/lib/spack/spack/util/compression.py
blob: 44d19bd00c05533cb992fbb8aae35c0b74952092 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

import os
import re
from itertools import product

from spack.util.executable import which

# Supported archive extensions.
PRE_EXTS   = ["tar", "TAR"]
EXTS       = ["gz", "bz2", "xz", "Z"]
NOTAR_EXTS = ["zip", "tgz", "tbz", "tbz2", "txz"]

# Add PRE_EXTS and EXTS last so that .tar.gz is matched *before* .tar or .gz
ALLOWED_ARCHIVE_TYPES = [".".join(ext) for ext in product(
    PRE_EXTS, EXTS)] + PRE_EXTS + EXTS + NOTAR_EXTS


def allowed_archive(path):
    return any(path.endswith(t) for t in ALLOWED_ARCHIVE_TYPES)


def _gunzip(archive_file):
    """Like gunzip, but extracts in the current working directory
    instead of in-place.

    Args:
        archive_file (str): absolute path of the file to be decompressed
    """
    import gzip
    decompressed_file = os.path.basename(archive_file.strip('.gz'))
    working_dir = os.getcwd()
    destination_abspath = os.path.join(working_dir, decompressed_file)
    with gzip.open(archive_file, "rb") as f_in:
        with open(destination_abspath, "wb") as f_out:
            f_out.write(f_in.read())


def _unzip(archive_file):
    """Try to use Python's zipfile, but extract in the current working
    directory instead of in-place.

    If unavailable, try unzip

    Args:
        archive_file (str): absolute path of the file to be decompressed
    """
    try:
        from zipfile import ZipFile
        destination_abspath = os.getcwd()
        with ZipFile(archive_file, 'r') as zf:
            zf.extractall(destination_abspath)
    except ImportError:
        unzip = which('unzip', required=True)
        unzip.add_default_arg('-q')
        return unzip


def decompressor_for(path, extension=None):
    """Get the appropriate decompressor for a path."""
    if ((extension and re.match(r'\.?zip$', extension)) or
            path.endswith('.zip')):
        return _unzip
    if extension and re.match(r'gz', extension):
        return _gunzip
    if extension and re.match(r'bz2', extension):
        bunzip2 = which('bunzip2', required=True)
        return bunzip2
    tar = which('tar', required=True)
    tar.add_default_arg('-oxf')
    return tar


def strip_extension(path):
    """Get the part of a path that does not include its compressed
       type extension."""
    for type in ALLOWED_ARCHIVE_TYPES:
        suffix = r'\.%s$' % type
        if re.search(suffix, path):
            return re.sub(suffix, "", path)
    return path


def extension(path):
    """Get the archive extension for a path."""
    if path is None:
        raise ValueError("Can't call extension() on None")

    # Strip sourceforge suffix.
    if re.search(r'((?:sourceforge.net|sf.net)/.*)/download$', path):
        path = os.path.dirname(path)

    for t in ALLOWED_ARCHIVE_TYPES:
        suffix = r'\.%s$' % t
        if re.search(suffix, path):
            return t
    return None