path: root/lib/spack/spack/ci.py

                                                                         

# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

import base64
import copy
import json
import os
import re
import shutil
import stat
import tempfile
import zipfile

from six import iteritems
from six.moves.urllib.error import HTTPError, URLError
from six.moves.urllib.parse import urlencode
from six.moves.urllib.request import HTTPHandler, Request, build_opener

import llnl.util.filesystem as fs
import llnl.util.tty as tty

import spack
import spack.binary_distribution as bindist
import spack.compilers as compilers
import spack.config as cfg
import spack.environment as ev
import spack.main
import spack.mirror
import spack.paths
import spack.repo
import spack.util.executable as exe
import spack.util.gpg as gpg_util
import spack.util.spack_yaml as syaml
import spack.util.web as web_util
from spack.error import SpackError
from spack.spec import Spec

JOB_RETRY_CONDITIONS = [
    'always',
]

TEMP_STORAGE_MIRROR_NAME = 'ci_temporary_mirror'
SPACK_RESERVED_TAGS = ["public", "protected", "notary"]

spack_gpg = spack.main.SpackCommand('gpg')
spack_compiler = spack.main.SpackCommand('compiler')


class TemporaryDirectory(object):
    def __init__(self):
        self.temporary_directory = tempfile.mkdtemp()

    def __enter__(self):
        return self.temporary_directory

    def __exit__(self, exc_type, exc_value, exc_traceback):
        shutil.rmtree(self.temporary_directory)
        return False


def _create_buildgroup(opener, headers, url, project, group_name, group_type):
    data = {
        "newbuildgroup": group_name,
        "project": project,
        "type": group_type
    }

    enc_data = json.dumps(data).encode('utf-8')

    request = Request(url, data=enc_data, headers=headers)

    response = opener.open(request)
    response_code = response.getcode()

    if response_code != 200 and response_code != 201:
        msg = 'Creating buildgroup failed (response code = {0}'.format(
            response_code)
        tty.warn(msg)
        return None

    response_text = response.read()
    response_json = json.loads(response_text)
    build_group_id = response_json['id']

    return build_group_id


def _populate_buildgroup(job_names, group_name, project, site,
                         credentials, cdash_url):
    url = "{0}/api/v1/buildgroup.php".format(cdash_url)

    headers = {
        'Authorization': 'Bearer {0}'.format(credentials),
        'Content-Type': 'application/json',
    }

    opener = build_opener(HTTPHandler)

    parent_group_id = _create_buildgroup(
        opener, headers, url, project, group_name, 'Daily')
    group_id = _create_buildgroup(
        opener, headers, url, project, 'Latest {0}'.format(group_name),
        'Latest')

    if not parent_group_id or not group_id:
        msg = 'Failed to create or retrieve buildgroups for {0}'.format(
            group_name)
        tty.warn(msg)
        return

    data = {
        'project': project,
        'buildgroupid': group_id,
        'dynamiclist': [{
            'match': name,
            'parentgroupid': parent_group_id,
            'site': site
        } for name in job_names]
    }

    enc_data = json.dumps(data).encode('utf-8')

    request = Request(url, data=enc_data, headers=headers)
    request.get_method = lambda: 'PUT'

    response = opener.open(request)
    response_code = response.getcode()

    if response_code != 200:
        msg = 'Error response code ({0}) in _populate_buildgroup'.format(
            response_code)
        tty.warn(msg)


def _is_main_phase(phase_name):
    return True if phase_name == 'specs' else False


def get_job_name(phase, strip_compiler, spec, osarch, build_group):
    """ Given the necessary parts, format the gitlab job name

    Arguments:
        phase (str): Either 'specs' for the main phase, or the name of a
            bootstrapping phase
        strip_compiler (bool): Should compiler be stripped from job name
        spec (spack.spec.Spec): Spec job will build
        osarch: Architecture TODO: (this is a spack.spec.ArchSpec,
            but sphinx doesn't recognize the type and fails).
        build_group (str): Name of build group this job belongs to (a CDash
        notion)

    Returns: The job name
    """
    item_idx = 0
    format_str = ''
    format_args = []

    if phase:
        format_str += '({{{0}}})'.format(item_idx)
        format_args.append(phase)
        item_idx += 1

    format_str += ' {{{0}}}'.format(item_idx)
    format_args.append(spec.name)
    item_idx += 1

    format_str += '/{{{0}}}'.format(item_idx)
    format_args.append(spec.dag_hash(7))
    item_idx += 1

    format_str += ' {{{0}}}'.format(item_idx)
    format_args.append(spec.version)
    item_idx += 1

    if _is_main_phase(phase) is True or strip_compiler is False:
        format_str += ' {{{0}}}'.format(item_idx)
        format_args.append(spec.compiler)
        item_idx += 1

    format_str += ' {{{0}}}'.format(item_idx)
    format_args.append(osarch)
    item_idx += 1

    if build_group:
        format_str += ' {{{0}}}'.format(item_idx)
        format_args.append(build_group)
        item_idx += 1

    return format_str.format(*format_args)


def _get_cdash_build_name(spec, build_group):
    return '{0}@{1}%{2} arch={3} ({4})'.format(
        spec.name, spec.version, spec.compiler, spec.architecture, build_group)


def _remove_reserved_tags(tags):
    """Convenience function to strip reserved tags from jobs"""
    return [tag for tag in tags if tag not in SPACK_RESERVED_TAGS]


def _get_spec_string(spec):
    format_elements = [
        '{name}{@version}',
        '{%compiler}',
    ]

    if spec.architecture:
        format_elements.append(' {arch=architecture}')

    return spec.format(''.join(format_elements))


def _format_root_spec(spec, main_phase, strip_compiler):
    if main_phase is False and strip_compiler is True:
        return '{0}@{1} arch={2}'.format(
            spec.name, spec.version, spec.architecture)
    else:
        return spec.dag_hash()


def _spec_deps_key(s):
    return '{0}/{1}'.format(s.name, s.dag_hash(7))


def _add_dependency(spec_label, dep_label, deps):
    if spec_label == dep_label:
        return
    if spec_label not in deps:
        deps[spec_label] = set()
    deps[spec_label].add(dep_label)


def _get_spec_dependencies(specs, deps, spec_labels, check_index_only=False,
                           mirrors_to_check=None):
    spec_deps_obj = _compute_spec_deps(specs, check_index_only=check_index_only,
                                       mirrors_to_check=mirrors_to_check)

    if spec_deps_obj:
        dependencies = spec_deps_obj['dependencies']
        specs = spec_deps_obj['specs']

        for entry in specs:
            spec_labels[entry['label']] = {
                'spec': Spec(entry['spec']),
                'rootSpec': entry['root_spec'],
                'needs_rebuild': entry['needs_rebuild'],
            }

        for entry in dependencies:
            _add_dependency(entry['spec'], entry['depends'], deps)


def stage_spec_jobs(specs, check_index_only=False, mirrors_to_check=None):
    """Take a set of release specs and generate a list of "stages", where the
        jobs in any stage are dependent only on jobs in previous stages.  This
        allows us to maximize build parallelism within the gitlab-ci framework.

    Arguments:
        specs (Iterable): Specs to build
        check_index_only (bool): Regardless of whether DAG pruning is enabled,
            all configured mirrors are searched to see if binaries for specs
            are up to date on those mirrors.  This flag limits that search to
            the binary cache indices on those mirrors to speed the process up,
            even though there is no garantee the index is up to date.
        mirrors_to_checK: Optional mapping giving mirrors to check instead of
            any configured mirrors.

    Returns: A tuple of information objects describing the specs, dependencies
        and stages:

        spec_labels: A dictionary mapping the spec labels which are made of
            (pkg-name/hash-prefix), to objects containing "rootSpec" and "spec"
            keys.  The root spec is the spec of which this spec is a dependency
            and the spec is the formatted spec string for this spec.

        deps: A dictionary where the keys should also have appeared as keys in
            the spec_labels dictionary, and the values are the set of
            dependencies for that spec.

        stages: An ordered list of sets, each of which contains all the jobs to
            built in that stage.  The jobs are expressed in the same format as
            the keys in the spec_labels and deps objects.

    """

    # The convenience method below, "_remove_satisfied_deps()", does not modify
    # the "deps" parameter.  Instead, it returns a new dictionary where only
    # dependencies which have not yet been satisfied are included in the
    # return value.
    def _remove_satisfied_deps(deps, satisfied_list):
        new_deps = {}

        for key, value in iteritems(deps):
            new_value = set([v for v in value if v not in satisfied_list])
            if new_value:
                new_deps[key] = new_value

        return new_deps

    deps = {}
    spec_labels = {}

    _get_spec_dependencies(specs, deps, spec_labels, check_index_only=check_index_only,
                           mirrors_to_check=mirrors_to_check)

    # Save the original deps, as we need to return them at the end of the
    # function.  In the while loop below, the "dependencies" variable is
    # overwritten rather than being modified each time through the loop,
    # thus preserving the original value of "deps" saved here.
    dependencies = deps
    unstaged = set(spec_labels.keys())
    stages = []

    while dependencies:
        dependents = set(dependencies.keys())
        next_stage = unstaged.difference(dependents)
        stages.append(next_stage)
        unstaged.difference_update(next_stage)
        # Note that "dependencies" is a dictionary mapping each dependent
        # package to the set of not-yet-handled dependencies.  The final step
        # below removes all the dependencies that are handled by this stage.
        dependencies = _remove_satisfied_deps(dependencies, next_stage)

    if unstaged:
        stages.append(unstaged.copy())

    return spec_labels, deps, stages


def _print_staging_summary(spec_labels, dependencies, stages):
    if not stages:
        return

    tty.msg('  Staging summary ([x] means a job needs rebuilding):')
    for stage_index, stage in enumerate(stages):
        tty.msg('    stage {0} ({1} jobs):'.format(stage_index, len(stage)))

        for job in sorted(stage):
            s = spec_labels[job]['spec']
            tty.msg('      [{1}] {0} -> {2}'.format(
                job,
                'x' if spec_labels[job]['needs_rebuild'] else ' ',
                _get_spec_string(s)))


def _compute_spec_deps(spec_list, check_index_only=False, mirrors_to_check=None):
    """
    Computes all the dependencies for the spec(s) and generates a JSON
    object which provides both a list of unique spec names as well as a
    comprehensive list of all the edges in the dependency graph.  For
    example, given a single spec like 'readline@7.0', this function
    generates the following JSON object:

    .. code-block:: JSON

       {
           "dependencies": [
               {
                   "depends": "readline/ip6aiun",
                   "spec": "readline/ip6aiun"
               },
               {
                   "depends": "ncurses/y43rifz",
                   "spec": "readline/ip6aiun"
               },
               {
                   "depends": "ncurses/y43rifz",
                   "spec": "readline/ip6aiun"
               },
               {
                   "depends": "pkgconf/eg355zb",
                   "spec": "ncurses/y43rifz"
               },
               {
                   "depends": "pkgconf/eg355zb",
                   "spec": "readline/ip6aiun"
               }
           ],
           "specs": [
               {
                 "root_spec": "readline@7.0%apple-clang@9.1.0 arch=darwin-...",
                 "spec": "readline@7.0%apple-clang@9.1.0 arch=darwin-highs...",
                 "label": "readline/ip6aiun"
               },
               {
                 "root_spec": "readline@7.0%apple-clang@9.1.0 arch=darwin-...",
                 "spec": "ncurses@6.1%apple-clang@9.1.0 arch=darwin-highsi...",
                 "label": "ncurses/y43rifz"
               },
               {
                 "root_spec": "readline@7.0%apple-clang@9.1.0 arch=darwin-...",
                 "spec": "pkgconf@1.5.4%apple-clang@9.1.0 arch=darwin-high...",
                 "label": "pkgconf/eg355zb"
               }
           ]
       }

    """
    spec_labels = {}

    specs = []
    dependencies = []

    def append_dep(s, d):
        dependencies.append({
            'spec': s,
            'depends': d,
        })

    for spec in spec_list:
        root_spec = spec

        for s in spec.traverse(deptype=all):
            if s.external:
                tty.msg('Will not stage external pkg: {0}'.format(s))
                continue

            up_to_date_mirrors = bindist.get_mirrors_for_spec(
                spec=s, mirrors_to_check=mirrors_to_check, index_only=check_index_only)

            skey = _spec_deps_key(s)
            spec_labels[skey] = {
                'spec': _get_spec_string(s),
                'root': root_spec,
                'needs_rebuild': not up_to_date_mirrors,
            }

            for d in s.dependencies(deptype=all):
                dkey = _spec_deps_key(d)
                if d.external:
                    tty.msg('Will not stage external dep: {0}'.format(d))
                    continue

                append_dep(skey, dkey)

    for spec_label, spec_holder in spec_labels.items():
        specs.append({
            'label': spec_label,
            'spec': spec_holder['spec'],
            'root_spec': spec_holder['root'],
            'needs_rebuild': spec_holder['needs_rebuild'],
        })

    deps_json_obj = {
        'specs': specs,
        'dependencies': dependencies,
    }

    return deps_json_obj


def _spec_matches(spec, match_string):
    return spec.satisfies(match_string)


def _copy_attributes(attrs_list, src_dict, dest_dict):
    for runner_attr in attrs_list:
        if runner_attr in src_dict:
            if runner_attr in dest_dict and runner_attr == 'tags':
                # For 'tags', we combine the lists of tags, while
                # avoiding duplicates
                for tag in src_dict[runner_attr]:
                    if tag not in dest_dict[runner_attr]:
                        dest_dict[runner_attr].append(tag)
            elif runner_attr in dest_dict and runner_attr == 'variables':
                # For 'variables', we merge the dictionaries.  Any conflicts
                # (i.e. 'runner-attributes' has same variable key as the
                # higher level) we resolve by keeping the more specific
                # 'runner-attributes' version.
                for src_key, src_val in src_dict[runner_attr].items():
                    dest_dict[runner_attr][src_key] = copy.deepcopy(
                        src_dict[runner_attr][src_key])
            else:
                dest_dict[runner_attr] = copy.deepcopy(src_dict[runner_attr])


def _find_matching_config(spec, gitlab_ci):
    runner_attributes = {}
    overridable_attrs = [
        'image',
        'tags',
        'variables',
        'before_script',
        'script',
        'after_script',
    ]

    _copy_attributes(overridable_attrs, gitlab_ci, runner_attributes)

    ci_mappings = gitlab_ci['mappings']
    for ci_mapping in ci_mappings:
        for match_string in ci_mapping['match']:
            if _spec_matches(spec, match_string):
                if 'runner-attributes' in ci_mapping:
                    _copy_attributes(overridable_attrs,
                                     ci_mapping['runner-attributes'],
                                     runner_attributes)
                return runner_attributes
    else:
        return None

    return runner_attributes


def _pkg_name_from_spec_label(spec_label):
    return spec_label[:spec_label.index('/')]


def _format_job_needs(phase_name, strip_compilers, dep_jobs,
                      osname, build_group, prune_dag, stage_spec_dict,
                      enable_artifacts_buildcache):
    needs_list = []
    for dep_job in dep_jobs:
        dep_spec_key = _spec_deps_key(dep_job)
        dep_spec_info = stage_spec_dict[dep_spec_key]

        if not prune_dag or dep_spec_info['needs_rebuild']:
            needs_list.append({
                'job': get_job_name(phase_name,
                                    strip_compilers,
                                    dep_job,
                                    dep_job.architecture,
                                    build_group),
                'artifacts': enable_artifacts_buildcache,
            })
    return needs_list


def get_change_revisions():
    """If this is a git repo get the revisions to use when checking
    for changed packages and spack core modules."""
    git_dir = os.path.join(spack.paths.prefix, '.git')
    if os.path.exists(git_dir) and os.path.isdir(git_dir):
        # TODO: This will only find changed packages from the last
        # TODO: commit.  While this may work for single merge commits
        # TODO: when merging the topic branch into the base, it will
        # TODO: require more thought outside of that narrow case.
        return 'HEAD^', 'HEAD'
    return None, None


def get_stack_changed(env_path, rev1='HEAD^', rev2='HEAD'):
    """Given an environment manifest path and two revisions to compare, return
    whether or not the stack was changed.  Returns True if the environment
    manifest changed between the provided revisions (or additionally if the
    `.gitlab-ci.yml` file itself changed).  Returns False otherwise."""
    git = exe.which("git")
    if git:
        with fs.working_dir(spack.paths.prefix):
            git_log = git("diff", "--name-only", rev1, rev2,
                          output=str, error=os.devnull,
                          fail_on_error=False).strip()
            lines = [] if not git_log else re.split(r'\s+', git_log)

            for path in lines:
                if '.gitlab-ci.yml' in path or path in env_path:
                    tty.debug('env represented by {0} changed'.format(
                        env_path))
                    tty.debug('touched file: {0}'.format(path))
                    return True
    return False


def compute_affected_packages(rev1='HEAD^', rev2='HEAD'):
    """Determine which packages were added, removed or changed
    between rev1 and rev2, and return the names as a set"""
    return spack.repo.get_all_package_diffs('ARC', rev1=rev1, rev2=rev2)


def get_spec_filter_list(env, affected_pkgs, dependencies=True, dependents=True):
    """Given a list of package names, and assuming an active and
       concretized environment, return a set of concrete specs from
       the environment corresponding to any of the affected pkgs (or
       optionally to any of their dependencies/dependents).

    Arguments:

        env (spack.environment.Environment): Active concrete environment
        affected_pkgs (List[str]): Affected package names
        dependencies (bool): Include dependencies of affected packages
        dependents (bool): Include dependents of affected pacakges

    Returns:

        A list of concrete specs from the active environment including
        those associated with affected packages, and possible their
        dependencies and dependents as well.
    """
    affected_specs = set()
    all_concrete_specs = env.all_specs()
    tty.debug('All concrete environment specs:')
    for s in all_concrete_specs:
        tty.debug('  {0}/{1}'.format(s.name, s.dag_hash()[:7]))
    for pkg in affected_pkgs:
        env_matches = [s for s in all_concrete_specs if s.name == pkg]
        for match in env_matches:
            affected_specs.add(match)
            if dependencies:
                affected_specs.update(match.traverse(direction='children', root=False))
            if dependents:
                affected_specs.update(match.traverse(direction='parents', root=False))
    return affected_specs


def generate_gitlab_ci_yaml(env, print_summary, output_file,
                            prune_dag=False, check_index_only=False,
                            run_optimizer=False, use_dependencies=False,
                            artifacts_root=None, remote_mirror_override=None):
    """ Generate a gitlab yaml file to run a dynamic child pipeline from
        the spec matrix in the active environment.

    Arguments:
        env (spack.environment.Environment): Activated environment object
            which must contain a gitlab-ci section describing how to map
            specs to runners
        print_summary (bool): Should we print a summary of all the jobs in
            the stages in which they were placed.
        output_file (str): File path where generated file should be written
        prune_dag (bool): If True, do not generate jobs for specs already
            exist built on the mirror.
        check_index_only (bool): If True, attempt to fetch the mirror index
            and only use that to determine whether built specs on the mirror
            this mode results in faster yaml generation time). Otherwise, also
            check each spec directly by url (useful if there is no index or it
            might be out of date).
        run_optimizer (bool): If True, post-process the generated yaml to try
            try to reduce the size (attempts to collect repeated configuration
            and replace with definitions).)
        use_dependencies (bool): If true, use "dependencies" rather than "needs"
            ("needs" allows DAG scheduling).  Useful if gitlab instance cannot
            be configured to handle more than a few "needs" per job.
        artifacts_root (str): Path where artifacts like logs, environment
            files (spack.yaml, spack.lock), etc should be written.  GitLab
            requires this to be within the project directory.
        remote_mirror_override (str): Typically only needed when one spack.yaml
            is used to populate several mirrors with binaries, based on some
            criteria.  Spack protected pipelines populate different mirrors based
            on branch name, facilitated by this option.
    """
    with spack.concretize.disable_compiler_existence_check():
        with env.write_transaction():
            env.concretize()
            env.write()

    yaml_root = ev.config_dict(env.yaml)

    if 'gitlab-ci' not in yaml_root:
        tty.die('Environment yaml does not have "gitlab-ci" section')

    gitlab_ci = yaml_root['gitlab-ci']

    build_group = None
    enable_cdash_reporting = False
    cdash_auth_token = None

    if 'cdash' in yaml_root:
        enable_cdash_reporting = True
        ci_cdash = yaml_root['cdash']
        build_group = ci_cdash['build-group']
        cdash_url = ci_cdash['url']
        cdash_project = ci_cdash['project']
        cdash_site = ci_cdash['site']

        if 'SPACK_CDASH_AUTH_TOKEN' in os.environ:
            tty.verbose("Using CDash auth token from environment")
            cdash_auth_token = os.environ.get('SPACK_CDASH_AUTH_TOKEN')

    prune_untouched_packages = os.environ.get('SPACK_PRUNE_UNTOUCHED', None)
    if prune_untouched_packages:
        # Requested to prune untouched packages, but assume we won't do that
        # unless we're actually in a git repo.
        prune_untouched_packages = False
        rev1, rev2 = get_change_revisions()
        tty.debug('Got following revisions: rev1={0}, rev2={1}'.format(rev1, rev2))
        if rev1 and rev2:
            # If the stack file itself did not change, proceed with pruning
            if not get_stack_changed(env.manifest_path, rev1, rev2):
                prune_untouched_packages = True
                affected_pkgs = compute_affected_packages(rev1, rev2)
                tty.debug('affected pkgs:')
                for p in affected_pkgs:
                    tty.debug('  {0}'.format(p))
                affected_specs = get_spec_filter_list(env, affected_pkgs)
                tty.debug('all affected specs:')
                for s in affected_specs:
                    tty.debug('  {0}'.format(s.name))

    # Downstream jobs will "need" (depend on, for both scheduling and
    # artifacts, which include spack.lock file) this pipeline generation
    # job by both name and pipeline id.  If those environment variables
    # do not exist, then maybe this is just running in a shell, in which
    # case, there is no expectation gitlab will ever run the generated
    # pipeline and those environment variables do not matter.
    generate_job_name = os.environ.get('CI_JOB_NAME', 'job-does-not-exist')
    parent_pipeline_id = os.environ.get('CI_PIPELINE_ID', 'pipeline-does-not-exist')

    # Values: "spack_pull_request", "spack_protected_branch", or not set
    spack_pipeline_type = os.environ.get('SPACK_PIPELINE_TYPE', None)

    spack_buildcache_copy = os.environ.get('SPACK_COPY_BUILDCACHE', None)

    if 'mirrors' not in yaml_root or len(yaml_root['mirrors'].values()) < 1:
        tty.die('spack ci generate requires an env containing a mirror')

    ci_mirrors = yaml_root['mirrors']
    mirror_urls = [url for url in ci_mirrors.values()]
    remote_mirror_url = mirror_urls[0]

    # Check for a list of "known broken" specs that we should not bother
    # trying to build.
    broken_specs_url = ''
    known_broken_specs_encountered = []
    if 'broken-specs-url' in gitlab_ci:
        broken_specs_url = gitlab_ci['broken-specs-url']

    enable_artifacts_buildcache = False
    if 'enable-artifacts-buildcache' in gitlab_ci:
        enable_artifacts_buildcache = gitlab_ci['enable-artifacts-buildcache']

    rebuild_index_enabled = True
    if 'rebuild-index' in gitlab_ci and gitlab_ci['rebuild-index'] is False:
        rebuild_index_enabled = False

    temp_storage_url_prefix = None
    if 'temporary-storage-url-prefix' in gitlab_ci:
        temp_storage_url_prefix = gitlab_ci['temporary-storage-url-prefix']

    bootstrap_specs = []
    phases = []
    if 'bootstrap' in gitlab_ci:
        for phase in gitlab_ci['bootstrap']:
            try:
                phase_name = phase.get('name')
                strip_compilers = phase.get('compiler-agnostic')
            except AttributeError:
                phase_name = phase
                strip_compilers = False
            phases.append({
                'name': phase_name,
                'strip-compilers': strip_compilers,
            })

            for bs in env.spec_lists[phase_name]:
                bootstrap_specs.append({
                    'spec': bs,
                    'phase-name': phase_name,
                    'strip-compilers': strip_compilers,
                })

    phases.append({
        'name': 'specs',
        'strip-compilers': False,
    })

    # If a remote mirror override (alternate buildcache destination) was
    # specified, add it here in case it has already built hashes we might
    # generate.
    mirrors_to_check = None
    if remote_mirror_override:
        if spack_pipeline_type == 'spack_protected_branch':
            # Overriding the main mirror in this case might result
            # in skipping jobs on a release pipeline because specs are
            # up to date in develop.  Eventually we want to notice and take
            # advantage of this by scheduling a job to copy the spec from
            # develop to the release, but until we have that, this makes
            # sure we schedule a rebuild job if the spec isn't already in
            # override mirror.
            mirrors_to_check = {
                'override': remote_mirror_override
            }

        # If we have a remote override and we want generate pipeline using
        # --check-index-only, then the override mirror needs to be added to
        # the configured mirrors when bindist.update() is run, or else we
        # won't fetch its index and include in our local cache.
        spack.mirror.add(
            'ci_pr_mirror', remote_mirror_override, cfg.default_modify_scope())

    pipeline_artifacts_dir = artifacts_root
    if not pipeline_artifacts_dir:
        proj_dir = os.environ.get('CI_PROJECT_DIR', os.getcwd())
        pipeline_artifacts_dir = os.path.join(proj_dir, 'jobs_scratch_dir')

    pipeline_artifacts_dir = os.path.abspath(pipeline_artifacts_dir)
    concrete_env_dir = os.path.join(
        pipeline_artifacts_dir, 'concrete_environment')

    # Now that we've added the mirrors we know about, they should be properly
    # reflected in the environment manifest file, so copy that into the
    # concrete environment directory, along with the spack.lock file.
    if not os.path.exists(concrete_env_dir):
        os.makedirs(concrete_env_dir)
    shutil.copyfile(env.manifest_path,
                    os.path.join(concrete_env_dir, 'spack.yaml'))
    shutil.copyfile(env.lock_path,
                    os.path.join(concrete_env_dir, 'spack.lock'))

    job_log_dir = os.path.join(pipeline_artifacts_dir, 'logs')
    job_repro_dir = os.path.join(pipeline_artifacts_dir, 'reproduction')
    local_mirror_dir = os.path.join(pipeline_artifacts_dir, 'mirror')
    user_artifacts_dir = os.path.join(pipeline_artifacts_dir, 'user_data')

    # We communicate relative paths to the downstream jobs to avoid issues in
    # situations where the CI_PROJECT_DIR varies between the pipeline
    # generation job and the rebuild jobs.  This can happen when gitlab
    # checks out the project into a runner-specific directory, for example,
    # and different runners are picked for generate and rebuild jobs.
    ci_project_dir = os.environ.get('CI_PROJECT_DIR')
    rel_artifacts_root = os.path.relpath(
        pipeline_artifacts_dir, ci_project_dir)
    rel_concrete_env_dir = os.path.relpath(
        concrete_env_dir, ci_project_dir)
    rel_job_log_dir = os.path.relpath(
        job_log_dir, ci_project_dir)
    rel_job_repro_dir = os.path.relpath(
        job_repro_dir, ci_project_dir)
    rel_local_mirror_dir = os.path.relpath(
        local_mirror_dir, ci_project_dir)
    rel_user_artifacts_dir = os.path.relpath(
        user_artifacts_dir, ci_project_dir)

    # Speed up staging by first fetching binary indices from all mirrors
    # (including the override mirror we may have just added above).
    try:
        bindist.binary_index.update()
    except bindist.FetchCacheError as e:
        tty.error(e)

    staged_phases = {}
    try:
        for phase in phases:
            phase_name = phase['name']
            if phase_name == 'specs':
                # Anything in the "specs" of the environment are already
                # concretized by the block at the top of this method, so we
                # only need to find the concrete versions, and then avoid
                # re-concretizing them needlessly later on.
                concrete_phase_specs = [
                    concrete for abstract, concrete in env.concretized_specs()
                    if abstract in env.spec_lists[phase_name]
                ]
            else:
                # Any specs lists in other definitions (but not in the
                # "specs") of the environment are not yet concretized so we
                # have to concretize them explicitly here.
                concrete_phase_specs = env.spec_lists[phase_name]
                with spack.concretize.disable_compiler_existence_check():
                    for phase_spec in concrete_phase_specs:
                        phase_spec.concretize()
            staged_phases[phase_name] = stage_spec_jobs(
                concrete_phase_specs,
                check_index_only=check_index_only,
                mirrors_to_check=mirrors_to_check)
    finally:
        # Clean up remote mirror override if enabled
        if remote_mirror_override:
            spack.mirror.remove('ci_pr_mirror', cfg.default_modify_scope())

    all_job_names = []
    output_object = {}
    job_id = 0
    stage_id = 0

    stage_names = []

    max_length_needs = 0
    max_needs_job = ''

    # If this is configured, spack will fail "spack ci generate" if it
    # generates any hash which exists under the broken specs url.
    broken_spec_urls = None
    if broken_specs_url:
        if broken_specs_url.startswith('http'):
            # To make checking each spec against the list faster, we require
            # a url protocol that allows us to iterate the url in advance.
            tty.msg('Cannot use an http(s) url for broken specs, ignoring')
        else:
            broken_spec_urls = web_util.list_url(broken_specs_url)

    before_script, after_script = None, None
    for phase in phases:
        phase_name = phase['name']
        strip_compilers = phase['strip-compilers']

        main_phase = _is_main_phase(phase_name)
        spec_labels, dependencies, stages = staged_phases[phase_name]

        for stage_jobs in stages:
            stage_name = 'stage-{0}'.format(stage_id)
            stage_names.append(stage_name)
            stage_id += 1

            for spec_label in stage_jobs:
                spec_record = spec_labels[spec_label]
                root_spec = spec_record['rootSpec']
                pkg_name = _pkg_name_from_spec_label(spec_label)
                release_spec = root_spec[pkg_name]
                release_spec_dag_hash = release_spec.dag_hash()

                if prune_untouched_packages:
                    if release_spec not in affected_specs:
                        tty.debug('Pruning {0}, untouched by change.'.format(
                            release_spec.name))
                        spec_record['needs_rebuild'] = False
                        continue

                runner_attribs = _find_matching_config(
                    release_spec, gitlab_ci)

                if not runner_attribs:
                    tty.warn('No match found for {0}, skipping it'.format(
                        release_spec))
                    continue

                tags = [tag for tag in runner_attribs['tags']]

                if spack_pipeline_type is not None:
                    # For spack pipelines "public" and "protected" are reserved tags
                    tags = _remove_reserved_tags(tags)
                    if spack_pipeline_type == 'spack_protected_branch':
                        tags.extend(['aws', 'protected'])
                    elif spack_pipeline_type == 'spack_pull_request':
                        tags.extend(['public'])

                variables = {}
                if 'variables' in runner_attribs:
                    variables.update(runner_attribs['variables'])

                image_name = None
                image_entry = None
                if 'image' in runner_attribs:
                    build_image = runner_attribs['image']
                    try:
                        image_name = build_image.get('name')
                        entrypoint = build_image.get('entrypoint')
                        image_entry = [p for p in entrypoint]
                    except AttributeError:
                        image_name = build_image

                job_script = ['spack env activate --without-view .']

                if artifacts_root:
                    job_script.insert(0, 'cd {0}'.format(concrete_env_dir))

                job_script.extend([
                    'spack ci rebuild'
                ])

                if 'script' in runner_attribs:
                    job_script = [s for s in runner_attribs['script']]

                before_script = None
                if 'before_script' in runner_attribs:
                    before_script = [
                        s for s in runner_attribs['before_script']
                    ]

                after_script = None
                if 'after_script' in runner_attribs:
                    after_script = [s for s in runner_attribs['after_script']]

                osname = str(release_spec.architecture)
                job_name = get_job_name(phase_name, strip_compilers,
                                        release_spec, osname, build_group)

                compiler_action = 'NONE'
                if len(phases) > 1:
                    compiler_action = 'FIND_ANY'
                    if _is_main_phase(phase_name):
                        compiler_action = 'INSTALL_MISSING'

                job_vars = {
                    'SPACK_ROOT_SPEC': _format_root_spec(
                        root_spec, main_phase, strip_compilers),
                    'SPACK_JOB_SPEC_DAG_HASH': release_spec_dag_hash,
                    'SPACK_JOB_SPEC_PKG_NAME': release_spec.name,
                    'SPACK_COMPILER_ACTION': compiler_action
                }

                job_dependencies = []
                if spec_label in dependencies:
                    if enable_artifacts_buildcache:
                        # Get dependencies transitively, so they're all
                        # available in the artifacts buildcache.
                        dep_jobs = [
                            d for d in release_spec.traverse(deptype=all,
                                                             root=False)
                        ]
                    else:
                        # In this case, "needs" is only used for scheduling
                        # purposes, so we only get the direct dependencies.
                        dep_jobs = []
                        for dep_label in dependencies[spec_label]:
                            dep_pkg = _pkg_name_from_spec_label(dep_label)
                            dep_root = spec_labels[dep_label]['rootSpec']
                            dep_jobs.append(dep_root[dep_pkg])

                    job_dependencies.extend(
                        _format_job_needs(phase_name, strip_compilers,
                                          dep_jobs, osname, build_group,
                                          prune_dag, spec_labels,
                                          enable_artifacts_buildcache))

                rebuild_spec = spec_record['needs_rebuild']

                # This next section helps gitlab make sure the right
                # bootstrapped compiler exists in the artifacts buildcache by
                # creating an artificial dependency between this spec and its
                # compiler.  So, if we are in the main phase, and if the
                # compiler we are supposed to use is listed in any of the
                # bootstrap spec lists, then we will add more dependencies to
                # the job (that compiler and maybe it's dependencies as well).
                if _is_main_phase(phase_name):
                    spec_arch_family = (release_spec.architecture
                                                    .target
                                                    .microarchitecture
                                                    .family)
                    compiler_pkg_spec = compilers.pkg_spec_for_compiler(
                        release_spec.compiler)
                    for bs in bootstrap_specs:
                        c_spec = bs['spec']
                        bs_arch = c_spec.architecture
                        bs_arch_family = (bs_arch.target
                                                 .microarchitecture
                                                 .family)
                        if (c_spec.satisfies(compiler_pkg_spec) and
                            bs_arch_family == spec_arch_family):
                            # We found the bootstrap compiler this release spec
                            # should be built with, so for DAG scheduling
                            # purposes, we will at least add the compiler spec
                            # to the jobs "needs".  But if artifact buildcache
                            # is enabled, we'll have to add all transtive deps
                            # of the compiler as well.

                            # Here we check whether the bootstrapped compiler
                            # needs to be rebuilt.  Until compilers are proper
                            # dependencies, we artificially force the spec to
                            # be rebuilt if the compiler targeted to build it
                            # needs to be rebuilt.
                            bs_specs, _, _ = staged_phases[bs['phase-name']]
                            c_spec_key = _spec_deps_key(c_spec)
                            rbld_comp = bs_specs[c_spec_key]['needs_rebuild']
                            rebuild_spec = rebuild_spec or rbld_comp
                            # Also update record so dependents do not fail to
                            # add this spec to their "needs"
                            spec_record['needs_rebuild'] = rebuild_spec

                            dep_jobs = [c_spec]
                            if enable_artifacts_buildcache:
                                dep_jobs = [
                                    d for d in c_spec.traverse(deptype=all)
                                ]

                            job_dependencies.extend(
                                _format_job_needs(bs['phase-name'],
                                                  bs['strip-compilers'],
                                                  dep_jobs,
                                                  str(bs_arch),
                                                  build_group,
                                                  prune_dag,
                                                  bs_specs,
                                                  enable_artifacts_buildcache))
                        else:
                            debug_msg = ''.join([
                                'Considered compiler {0} for spec ',
                                '{1}, but rejected it either because it was ',
                                'not the compiler required by the spec, or ',
                                'because the target arch families of the ',
                                'spec and the compiler did not match'
                            ]).format(c_spec, release_spec)
                            tty.debug(debug_msg)

                if prune_dag and not rebuild_spec:
                    tty.debug('Pruning {0}, does not need rebuild.'.format(
                        release_spec.name))
                    continue

                if (broken_spec_urls is not None and
                        release_spec_dag_hash in broken_spec_urls):
                    known_broken_specs_encountered.append('{0} ({1})'.format(
                        release_spec, release_spec_dag_hash))

                if artifacts_root:
                    job_dependencies.append({
                        'job': generate_job_name,
                        'pipeline': '{0}'.format(parent_pipeline_id)
                    })

                job_vars['SPACK_SPEC_NEEDS_REBUILD'] = str(rebuild_spec)

                if enable_cdash_reporting:
                    cdash_build_name = _get_cdash_build_name(
                        release_spec, build_group)
                    all_job_names.append(cdash_build_name)
                    job_vars['SPACK_CDASH_BUILD_NAME'] = cdash_build_name

                variables.update(job_vars)

                artifact_paths = [
                    rel_job_log_dir,
                    rel_job_repro_dir,
                    rel_user_artifacts_dir
                ]

                if enable_artifacts_buildcache:
                    bc_root = os.path.join(
                        local_mirror_dir, 'build_cache')
                    artifact_paths.extend([os.path.join(bc_root, p) for p in [
                        bindist.tarball_name(release_spec, '.spec.json'),
                        bindist.tarball_directory_name(release_spec),
                    ]])

                job_object = {
                    'stage': stage_name,
                    'variables': variables,
                    'script': job_script,
                    'tags': tags,
                    'artifacts': {
                        'paths': artifact_paths,
                        'when': 'always',
                    },
                    'needs': sorted(job_dependencies, key=lambda d: d['job']),
                    'retry': {
                        'max': 2,
                        'when': JOB_RETRY_CONDITIONS,
                    },
                    'interruptible': True
                }

                length_needs = len(job_dependencies)
                if length_needs > max_length_needs:
                    max_length_needs = length_needs
                    max_needs_job = job_name

                if before_script:
                    job_object['before_script'] = before_script

                if after_script:
                    job_object['after_script'] = after_script

                if image_name:
                    job_object['image'] = image_name
                    if image_entry is not None:
                        job_object['image'] = {
                            'name': image_name,
                            'entrypoint': image_entry,
                        }

                output_object[job_name] = job_object
                job_id += 1

    if print_summary:
        for phase in phases:
            phase_name = phase['name']
            tty.msg('Stages for phase "{0}"'.format(phase_name))
            phase_stages = staged_phases[phase_name]
            _print_staging_summary(*phase_stages)

    tty.debug('{0} build jobs generated in {1} stages'.format(
        job_id, stage_id))

    if job_id > 0:
        tty.debug('The max_needs_job is {0}, with {1} needs'.format(
            max_needs_job, max_length_needs))

    # Use "all_job_names" to populate the build group for this set
    if enable_cdash_reporting and cdash_auth_token:
        try:
            _populate_buildgroup(all_job_names, build_group, cdash_project,
                                 cdash_site, cdash_auth_token, cdash_url)
        except (SpackError, HTTPError, URLError) as err:
            tty.warn('Problem populating buildgroup: {0}'.format(err))
    else:
        tty.warn('Unable to populate buildgroup without CDash credentials')

    service_job_config = None
    if 'service-job-attributes' in gitlab_ci:
        service_job_config = gitlab_ci['service-job-attributes']

    default_attrs = [
        'image',
        'tags',
        'variables',
        'before_script',
        # 'script',
        'after_script',
    ]

    service_job_retries = {
        'max': 2,
        'when': [
            'runner_system_failure',
            'stuck_or_timeout_failure'
        ]
    }

    if job_id > 0:
        if temp_storage_url_prefix:
            # There were some rebuild jobs scheduled, so we will need to
            # schedule a job to clean up the temporary storage location
            # associated with this pipeline.
            stage_names.append('cleanup-temp-storage')
            cleanup_job = {}

            if service_job_config:
                _copy_attributes(default_attrs,
                                 service_job_config,
                                 cleanup_job)

            if 'tags' in cleanup_job:
                service_tags = _remove_reserved_tags(cleanup_job['tags'])
                cleanup_job['tags'] = service_tags

            cleanup_job['stage'] = 'cleanup-temp-storage'
            cleanup_job['script'] = [
                'spack -d mirror destroy --mirror-url {0}/$CI_PIPELINE_ID'.format(
                    temp_storage_url_prefix)
            ]
            cleanup_job['when'] = 'always'
            cleanup_job['retry'] = service_job_retries
            cleanup_job['interruptible'] = True

            output_object['cleanup'] = cleanup_job

        if ('signing-job-attributes' in gitlab_ci and
                spack_pipeline_type == 'spack_protected_branch'):
            # External signing: generate a job to check and sign binary pkgs
            stage_names.append('stage-sign-pkgs')
            signing_job_config = gitlab_ci['signing-job-attributes']
            signing_job = {}

            signing_job_attrs_to_copy = [
                'image',
                'tags',
                'variables',
                'before_script',
                'script',
                'after_script',
            ]

            _copy_attributes(signing_job_attrs_to_copy,
                             signing_job_config,
                             signing_job)

            signing_job_tags = []
            if 'tags' in signing_job:
                signing_job_tags = _remove_reserved_tags(signing_job['tags'])

            for tag in ['aws', 'protected', 'notary']:
                if tag not in signing_job_tags:
                    signing_job_tags.append(tag)
            signing_job['tags'] = signing_job_tags

            signing_job['stage'] = 'stage-sign-pkgs'
            signing_job['when'] = 'always'
            signing_job['retry'] = {
                'max': 2,
                'when': ['always']
            }
            signing_job['interruptible'] = True

            output_object['sign-pkgs'] = signing_job

        if spack_buildcache_copy:
            # Generate a job to copy the contents from wherever the builds are getting
            # pushed to the url specified in the "SPACK_BUILDCACHE_COPY" environment
            # variable.
            src_url = remote_mirror_override or remote_mirror_url
            dest_url = spack_buildcache_copy

            stage_names.append('stage-copy-buildcache')
            copy_job = {
                'stage': 'stage-copy-buildcache',
                'tags': ['spack', 'public', 'medium', 'aws', 'x86_64'],
                'image': 'ghcr.io/spack/python-aws-bash:0.0.1',
                'when': 'on_success',
                'interruptible': True,
                'retry': service_job_retries,
                'script': [
                    '. ./share/spack/setup-env.sh',
                    'spack --version',
                    'aws s3 sync --exclude *index.json* --exclude *pgp* {0} {1}'.format(
                        src_url, dest_url)
                ]
            }

            output_object['copy-mirror'] = copy_job

        if rebuild_index_enabled:
            # Add a final job to regenerate the index
            stage_names.append('stage-rebuild-index')
            final_job = {}

            if service_job_config:
                _copy_attributes(default_attrs,
                                 service_job_config,
                                 final_job)

            if 'tags' in final_job:
                service_tags = _remove_reserved_tags(final_job['tags'])
                final_job['tags'] = service_tags

            index_target_mirror = mirror_urls[0]
            if remote_mirror_override:
                index_target_mirror = remote_mirror_override

            final_job['stage'] = 'stage-rebuild-index'
            final_job['script'] = [
                'spack buildcache update-index --keys -d {0}'.format(
                    index_target_mirror)
            ]
            final_job['when'] = 'always'
            final_job['retry'] = service_job_retries
            final_job['interruptible'] = True

            output_object['rebuild-index'] = final_job

        output_object['stages'] = stage_names

        # Capture the version of spack used to generate the pipeline, transform it
        # into a value that can be passed to "git checkout", and save it in a
        # global yaml variable
        spack_version = spack.main.get_version()
        version_to_clone = None
        v_match = re.match(r"^\d+\.\d+\.\d+$", spack_version)
        if v_match:
            version_to_clone = 'v{0}'.format(v_match.group(0))
        else:
            v_match = re.match(r"^[^-]+-[^-]+-([a-f\d]+)$", spack_version)
            if v_match:
                version_to_clone = v_match.group(1)
            else:
                version_to_clone = spack_version

        output_object['variables'] = {
            'SPACK_ARTIFACTS_ROOT': rel_artifacts_root,
            'SPACK_CONCRETE_ENV_DIR': rel_concrete_env_dir,
            'SPACK_VERSION': spack_version,
            'SPACK_CHECKOUT_VERSION': version_to_clone,
            'SPACK_REMOTE_MIRROR_URL': remote_mirror_url,
            'SPACK_JOB_LOG_DIR': rel_job_log_dir,
            'SPACK_JOB_REPRO_DIR': rel_job_repro_dir,
            'SPACK_LOCAL_MIRROR_DIR': rel_local_mirror_dir,
            'SPACK_PIPELINE_TYPE': str(spack_pipeline_type)
        }

        if remote_mirror_override:
            (output_object['variables']
                          ['SPACK_REMOTE_MIRROR_OVERRIDE']) = remote_mirror_override

        spack_stack_name = os.environ.get('SPACK_CI_STACK_NAME', None)
        if spack_stack_name:
            output_object['variables']['SPACK_CI_STACK_NAME'] = spack_stack_name

        sorted_output = {}
        for output_key, output_value in sorted(output_object.items()):
            sorted_output[output_key] = output_value

        # TODO(opadron): remove this or refactor
        if run_optimizer:
            import spack.ci_optimization as ci_opt
            sorted_output = ci_opt.optimizer(sorted_output)

        # TODO(opadron): remove this or refactor
        if use_dependencies:
            import spack.ci_needs_workaround as cinw
            sorted_output = cinw.needs_to_dependencies(sorted_output)
    else:
        # No jobs were generated
        tty.debug('No specs to rebuild, generating no-op job')
        noop_job = {}

        if service_job_config:
            _copy_attributes(default_attrs,
                             service_job_config,
                             noop_job)

        if 'script' not in noop_job:
            noop_job['script'] = [
                'echo "All specs already up to date, nothing to rebuild."',
            ]

        noop_job['retry'] = service_job_retries

        sorted_output = {'no-specs-to-rebuild': noop_job}

    if known_broken_specs_encountered:
        error_msg = (
            'Pipeline generation failed due to the presence of the '
            'following specs that are known to be broken in develop:\n')
        for broken_spec in known_broken_specs_encountered:
            error_msg += '* {0}\n'.format(broken_spec)
        tty.die(error_msg)

    with open(output_file, 'w') as outf:
        outf.write(syaml.dump_config(sorted_output, default_flow_style=True))


def _url_encode_string(input_string):
    encoded_keyval = urlencode({'donotcare': input_string})
    eq_idx = encoded_keyval.find('=') + 1
    encoded_value = encoded_keyval[eq_idx:]
    return encoded_value


def import_signing_key(base64_signing_key):
    """ Given Base64-encoded gpg key, decode and import it to use for
        signing packages.

    Arguments:
        base64_signing_key (str): A gpg key including the secret key,
            armor-exported and base64 encoded, so it can be stored in a
            gitlab CI variable.  For an example of how to generate such
            a key, see:

        https://github.com/spack/spack-infrastructure/blob/main/gitlab-docker/files/gen-key
    """
    if not base64_signing_key:
        tty.warn('No key found for signing/verifying packages')
        return

    tty.debug('ci.import_signing_key() will attempt to import a key')

    # This command has the side-effect of creating the directory referred
    # to as GNUPGHOME in setup_environment()
    list_output = spack_gpg('list', output=str)

    tty.debug('spack gpg list:')
    tty.debug(list_output)

    decoded_key = base64.b64decode(base64_signing_key)
    if isinstance(decoded_key, bytes):
        decoded_key = decoded_key.decode('utf8')

    with TemporaryDirectory() as tmpdir:
        sign_key_path = os.path.join(tmpdir, 'signing_key')
        with open(sign_key_path, 'w') as fd:
            fd.write(decoded_key)

        key_import_output = spack_gpg('trust', sign_key_path, output=str)
        tty.debug('spack gpg trust {0}'.format(sign_key_path))
        tty.debug(key_import_output)

    # Now print the keys we have for verifying and signing
    trusted_keys_output = spack_gpg('list', '--trusted', output=str)
    signing_keys_output = spack_gpg('list', '--signing', output=str)

    tty.debug('spack gpg list --trusted')
    tty.debug(trusted_keys_output)
    tty.debug('spack gpg list --signing')
    tty.debug(signing_keys_output)


def can_sign_binaries():
    """ Utility method to determine if this spack instance is capable of
        signing binary packages.  This is currently only possible if the
        spack gpg keystore contains exactly one secret key."""
    return len(gpg_util.signing_keys()) == 1


def can_verify_binaries():
    """ Utility method to determin if this spack instance is capable (at
        least in theory) of verifying signed binaries."""
    return len(gpg_util.public_keys()) >= 1


def configure_compilers(compiler_action, scope=None):
    """ Depending on the compiler_action parameter, either turn on the
        install_missing_compilers config option, or find spack compilers,
        or do nothing.  This is used from rebuild jobs in bootstrapping
        pipelines, where in the bootsrapping phase we would pass
        FIND_ANY in case of compiler-agnostic bootstrapping, while in the
        spec building phase we would pass INSTALL_MISSING in order to get
        spack to use the compiler which was built in the previous phase and
        is now sitting in the binary mirror.

    Arguments:
        compiler_action (str): 'FIND_ANY', 'INSTALL_MISSING' have meanings
            described above.  Any other value essentially results in a no-op.
        scope (spack.config.ConfigScope): Optional.  The scope in which to look for
            compilers, in case 'FIND_ANY' was provided.
    """
    if compiler_action == 'INSTALL_MISSING':
        tty.debug('Make sure bootstrapped compiler will be installed')
        config = cfg.get('config')
        config['install_missing_compilers'] = True
        cfg.set('config', config)
    elif compiler_action == 'FIND_ANY':
        tty.debug('Just find any available compiler')
        find_args = ['find']
        if scope:
            find_args.extend(['--scope', scope])
        output = spack_compiler(*find_args)
        tty.debug('spack compiler find')
        tty.debug(output)
        output = spack_compiler('list')
        tty.debug('spack compiler list')
        tty.debug(output)
    else:
        tty.debug('No compiler action to be taken')

    return None


def get_concrete_specs(env, root_spec, job_name, compiler_action):
    """ Build a dictionary of concrete specs relevant to a particular
        rebuild job.  This includes the root spec and the spec to be
        rebuilt (which could be the same).

    Arguments:

        env (spack.environment.Environment): Activated spack environment
            used to get concrete root spec by hash in case compiler_action
            is anthing other than FIND_ANY.
        root_spec (str): If compiler_action is FIND_ANY root_spec is
            a string representation which can be turned directly into
            a spec, otherwise, it's a hash used to index the activated
            spack environment.
        job_name (str): Name of package to be built, used to index the
            concrete root spec and produce the concrete spec to be
            built.
        compiler_action (str): Determines how to interpret the root_spec
            parameter, either as a string representation as a hash.

    Returns:

    .. code-block:: JSON

       {
           "root": "<spec>",
           "<job-pkg-name>": "<spec>",
        }

    """
    spec_map = {
        'root': None,
    }

    if compiler_action == 'FIND_ANY':
        # This corresponds to a bootstrapping phase where we need to
        # rely on any available compiler to build the package (i.e. the
        # compiler needed to be stripped from the spec when we generated
        # the job), and thus we need to concretize the root spec again.
        tty.debug('About to concretize {0}'.format(root_spec))
        concrete_root = Spec(root_spec).concretized()
        tty.debug('Resulting concrete root: {0}'.format(concrete_root))
    else:
        # in this case, either we're relying on Spack to install missing
        # compiler bootstrapped in a previous phase, or else we only had one
        # phase (like a site which already knows what compilers are available
        # on it's runners), so we don't want to concretize that root spec
        # again.  The reason we take this path in the first case (bootstrapped
        # compiler), is that we can't concretize a spec at this point if we're
        # going to ask spack to "install_missing_compilers".
        concrete_root = env.specs_by_hash[root_spec]

    spec_map['root'] = concrete_root
    spec_map[job_name] = concrete_root[job_name]

    return spec_map


def _push_mirror_contents(env, specfile_path, sign_binaries, mirror_url):
    """Unchecked version of the public API, for easier mocking"""
    unsigned = not sign_binaries
    tty.debug('Creating buildcache ({0})'.format(
        'unsigned' if unsigned else 'signed'))
    hashes = env.all_hashes() if env else None
    matches = spack.store.specfile_matches(specfile_path, hashes=hashes)
    push_url = spack.mirror.push_url_from_mirror_url(mirror_url)
    spec_kwargs = {'include_root': True, 'include_dependencies': False}
    kwargs = {
        'force': True,
        'allow_root': True,
        'unsigned': unsigned
    }
    bindist.push(matches, push_url, spec_kwargs, **kwargs)


def push_mirror_contents(env, specfile_path, mirror_url, sign_binaries):
    """ Push one or more binary packages to the mirror.

    Arguments:

        env (spack.environment.Environment): Optional environment.  If
            provided, it is used to make sure binary package to push
            exists in the environment.
        specfile_path (str): Path to spec.json corresponding to built pkg
            to push.
        mirror_url (str): Base url of target mirror
        sign_binaries (bool): If True, spack will attempt to sign binary
            package before pushing.
    """
    try:
        _push_mirror_contents(env, specfile_path, sign_binaries, mirror_url)
    except Exception as inst:
        # If the mirror we're pushing to is on S3 and there's some
        # permissions problem, for example, we can't just target
        # that exception type here, since users of the
        # `spack ci rebuild' may not need or want any dependency
        # on boto3.  So we use the first non-boto exception type
        # in the heirarchy:
        #     boto3.exceptions.S3UploadFailedError
        #     boto3.exceptions.Boto3Error
        #     Exception
        #     BaseException
        #     object
        err_msg = 'Error msg: {0}'.format(inst)
        if any(x in err_msg for x in ['Access Denied', 'InvalidAccessKeyId']):
            tty.msg('Permission problem writing to {0}'.format(
                mirror_url))
            tty.msg(err_msg)
        else:
            raise inst


def copy_stage_logs_to_artifacts(job_spec, job_log_dir):
    """ Looks for spack-build-out.txt in the stage directory of the given
        job_spec, and attempts to copy the file into the directory given
        by job_log_dir.

    Arguments:

        job_spec (spack.spec.Spec): Spec associated with spack install log
        job_log_dir (str): Path into which build log should be copied
    """
    try:
        pkg_cls = spack.repo.path.get_pkg_class(job_spec.name)
        job_pkg = pkg_cls(job_spec)
        tty.debug('job package: {0.fullname}'.format(job_pkg))
        stage_dir = job_pkg.stage.path
        tty.debug('stage dir: {0}'.format(stage_dir))
        build_out_src = os.path.join(stage_dir, 'spack-build-out.txt')
        build_out_dst = os.path.join(
            job_log_dir, 'spack-build-out.txt')
        tty.debug('Copying build log ({0}) to artifacts ({1})'.format(
            build_out_src, build_out_dst))
        shutil.copyfile(build_out_src, build_out_dst)
    except Exception as inst:
        msg = ('Unable to copy build logs from stage to artifacts '
               'due to exception: {0}').format(inst)
        tty.error(msg)


def download_and_extract_artifacts(url, work_dir):
    """ Look for gitlab artifacts.zip at the given url, and attempt to download
        and extract the contents into the given work_dir

    Arguments:

        url (str): Complete url to artifacts.zip file
        work_dir (str): Path to destination where artifacts should be extracted
    """
    tty.msg('Fetching artifacts from: {0}\n'.format(url))

    headers = {
        'Content-Type': 'application/zip',
    }

    token = os.environ.get('GITLAB_PRIVATE_TOKEN', None)
    if token:
        headers['PRIVATE-TOKEN'] = token

    opener = build_opener(HTTPHandler)

    request = Request(url, headers=headers)
    request.get_method = lambda: 'GET'

    response = opener.open(request)
    response_code = response.getcode()

    if response_code != 200:
        msg = 'Error response code ({0}) in reproduce_ci_job'.format(
            response_code)
        raise SpackError(msg)

    artifacts_zip_path = os.path.join(work_dir, 'artifacts.zip')

    if not os.path.exists(work_dir):
        os.makedirs(work_dir)

    with open(artifacts_zip_path, 'wb') as out_file:
        shutil.copyfileobj(response, out_file)

    zip_file = zipfile.ZipFile(artifacts_zip_path)
    zip_file.extractall(work_dir)
    zip_file.close()

    os.remove(artifacts_zip_path)


def get_spack_info():
    """ If spack is running from a git repo, return the most recent git log
        entry, otherwise, return a string containing the spack version. """
    git_path = os.path.join(spack.paths.prefix, ".git")
    if os.path.exists(git_path):
        git = exe.which("git")
        if git:
            with fs.working_dir(spack.paths.prefix):
                git_log = git("log", "-1",
                              output=str, error=os.devnull,
                              fail_on_error=False)

            return git_log

    return 'no git repo, use spack {0}'.format(spack.spack_version)


def setup_spack_repro_version(repro_dir, checkout_commit, merge_commit=None):
    """ Look in the local spack clone to find the checkout_commit, and if
        provided, the merge_commit given as arguments.  If those commits can
        be found locally, then clone spack and attempt to recreate a merge
        commit with the same parent commits as tested in gitlab.  This looks
        something like 1) git clone repo && cd repo 2) git checkout
        <checkout_commit> 3) git merge <merge_commit>.  If there is no
        merge_commit provided, then skip step (3).

    Arguments:

        repro_dir (str): Location where spack should be cloned
        checkout_commit (str): SHA of PR branch commit
        merge_commit (str): SHA of target branch parent

    Returns: True if git repo state was successfully recreated, or False
        otherwise.
    """
    # figure out the path to the spack git version being used for the
    # reproduction
    print('checkout_commit: {0}'.format(checkout_commit))
    print('merge_commit: {0}'.format(merge_commit))

    dot_git_path = os.path.join(spack.paths.prefix, ".git")
    if not os.path.exists(dot_git_path):
        tty.error('Unable to find the path to your local spack clone')
        return False

    spack_git_path = spack.paths.prefix

    git = exe.which("git")
    if not git:
        tty.error("reproduction of pipeline job requires git")
        return False

    # Check if we can find the tested commits in your local spack repo
    with fs.working_dir(spack_git_path):
        git("log", "-1", checkout_commit, output=str, error=os.devnull,
            fail_on_error=False)

        if git.returncode != 0:
            tty.error('Missing commit: {0}'.format(checkout_commit))
            return False

        if merge_commit:
            git("log", "-1", merge_commit, output=str, error=os.devnull,
                fail_on_error=False)

            if git.returncode != 0:
                tty.error('Missing commit: {0}'.format(merge_commit))
                return False

    # Next attempt to clone your local spack repo into the repro dir
    with fs.working_dir(repro_dir):
        clone_out = git("clone", spack_git_path, "spack",
                        output=str, error=os.devnull,
                        fail_on_error=False)

        if git.returncode != 0:
            tty.error('Unable to clone your local spack repo:')
            tty.msg(clone_out)
            return False

    # Finally, attempt to put the cloned repo into the same state used during
    # the pipeline build job
    repro_spack_path = os.path.join(repro_dir, 'spack')
    with fs.working_dir(repro_spack_path):
        co_out = git("checkout", checkout_commit,
                     output=str, error=os.devnull,
                     fail_on_error=False)

        if git.returncode != 0:
            tty.error('Unable to checkout {0}'.format(checkout_commit))
            tty.msg(co_out)
            return False

        if merge_commit:
            merge_out = git("-c", "user.name=cirepro", "-c",
                            "user.email=user@email.org", "merge",
                            "--no-edit", merge_commit,
                            output=str, error=os.devnull,
                            fail_on_error=False)

            if git.returncode != 0:
                tty.error('Unable to merge {0}'.format(merge_commit))
                tty.msg(merge_out)
                return False

    return True


def reproduce_ci_job(url, work_dir):
    """ Given a url to gitlab artifacts.zip from a failed 'spack ci rebuild' job,
        attempt to setup an environment in which the failure can be reproduced
        locally.  This entails the following:

        First download and extract artifacts.  Then look through those artifacts
        to glean some information needed for the reproduer (e.g. one of the
        artifacts contains information about the version of spack tested by
        gitlab, another is the generated pipeline yaml containing details
        of the job like the docker image used to run it).  The output of this
        function is a set of printed instructions for running docker and then
        commands to run to reproduce the build once inside the container.
    """
    download_and_extract_artifacts(url, work_dir)

    lock_file = fs.find(work_dir, 'spack.lock')[0]
    concrete_env_dir = os.path.dirname(lock_file)

    tty.debug('Concrete environment directory: {0}'.format(
        concrete_env_dir))

    yaml_files = fs.find(work_dir, ['*.yaml', '*.yml'])

    tty.debug('yaml files:')
    for yaml_file in yaml_files:
        tty.debug('  {0}'.format(yaml_file))

    pipeline_yaml = None

    # Try to find the dynamically generated pipeline yaml file in the
    # reproducer.  If the user did not put it in the artifacts root,
    # but rather somewhere else and exported it as an artifact from
    # that location, we won't be able to find it.
    for yf in yaml_files:
        with open(yf) as y_fd:
            yaml_obj = syaml.load(y_fd)
            if 'variables' in yaml_obj and 'stages' in yaml_obj:
                pipeline_yaml = yaml_obj

    if pipeline_yaml:
        tty.debug('\n{0} is likely your pipeline file'.format(yf))

    # Find the install script in the unzipped artifacts and make it executable
    install_script = fs.find(work_dir, 'install.sh')[0]
    st = os.stat(install_script)
    os.chmod(install_script, st.st_mode | stat.S_IEXEC)

    # Find the repro details file.  This just includes some values we wrote
    # during `spack ci rebuild` to make reproduction easier.  E.g. the job
    # name is written here so we can easily find the configuration of the
    # job from the generated pipeline file.
    repro_file = fs.find(work_dir, 'repro.json')[0]
    repro_details = None
    with open(repro_file) as fd:
        repro_details = json.load(fd)

    repro_dir = os.path.dirname(repro_file)
    rel_repro_dir = repro_dir.replace(work_dir, '').lstrip(os.path.sep)

    # Find the spack info text file that should contain the git log
    # of the HEAD commit used during the CI build
    spack_info_file = fs.find(work_dir, 'spack_info.txt')[0]
    with open(spack_info_file) as fd:
        spack_info = fd.read()

    # Access the specific job configuration
    job_name = repro_details['job_name']
    job_yaml = None

    if job_name in pipeline_yaml:
        job_yaml = pipeline_yaml[job_name]

    if job_yaml:
        tty.debug('Found job:')
        tty.debug(job_yaml)

    job_image = None
    setup_result = False
    if 'image' in job_yaml:
        job_image_elt = job_yaml['image']
        if 'name' in job_image_elt:
            job_image = job_image_elt['name']
        else:
            job_image = job_image_elt
        tty.msg('Job ran with the following image: {0}'.format(job_image))

        # Because we found this job was run with a docker image, so we will try
        # to print a "docker run" command that bind-mounts the directory where
        # we extracted the artifacts.

        # Destination of bind-mounted reproduction directory.  It makes for a
        # more faithful reproducer if everything appears to run in the same
        # absolute path used during the CI build.
        mount_as_dir = '/work'
        if repro_details:
            mount_as_dir = repro_details['ci_project_dir']
            mounted_repro_dir = os.path.join(mount_as_dir, rel_repro_dir)

        # We will also try to clone spack from your local checkout and
        # reproduce the state present during the CI build, and put that into
        # the bind-mounted reproducer directory.

        # Regular expressions for parsing that HEAD commit.  If the pipeline
        # was on the gitlab spack mirror, it will have been a merge commit made by
        # gitub and pushed by the sync script.  If the pipeline was run on some
        # environment repo, then the tested spack commit will likely have been
        # a regular commit.
        commit_1 = None
        commit_2 = None
        commit_regex = re.compile(r"commit\s+([^\s]+)")
        merge_commit_regex = re.compile(r"Merge\s+([^\s]+)\s+into\s+([^\s]+)")

        # Try the more specific merge commit regex first
        m = merge_commit_regex.search(spack_info)
        if m:
            # This was a merge commit and we captured the parents
            commit_1 = m.group(1)
            commit_2 = m.group(2)
        else:
            # Not a merge commit, just get the commit sha
            m = commit_regex.search(spack_info)
            if m:
                commit_1 = m.group(1)

        setup_result = False
        if commit_1:
            if commit_2:
                setup_result = setup_spack_repro_version(
                    work_dir, commit_2, merge_commit=commit_1)
            else:
                setup_result = setup_spack_repro_version(work_dir, commit_1)

        if not setup_result:
            setup_msg = """
        This can happen if the spack you are using to run this command is not a git
        repo, or if it is a git repo, but it does not have the commits needed to
        recreate the tested merge commit.  If you are trying to reproduce a spack
        PR pipeline job failure, try fetching the latest develop commits from
        mainline spack and make sure you have the most recent commit of the PR
        branch in your local spack repo.  Then run this command again.
        Alternatively, you can also manually clone spack if you know the version
        you want to test.
            """
            tty.error('Failed to automatically setup the tested version of spack '
                      'in your local reproduction directory.')
            print(setup_msg)

    # In cases where CI build was run on a shell runner, it might be useful
    # to see what tags were applied to the job so the user knows what shell
    # runner was used.  But in that case in general, we cannot do nearly as
    # much to set up the reproducer.
    job_tags = None
    if 'tags' in job_yaml:
        job_tags = job_yaml['tags']
        tty.msg('Job ran with the following tags: {0}'.format(job_tags))

    inst_list = []

    # Finally, print out some instructions to reproduce the build
    if job_image:
        inst_list.append('\nRun the following command:\n\n')
        inst_list.append('    $ docker run --rm -v {0}:{1} -ti {2}\n'.format(
            work_dir, mount_as_dir, job_image))
        inst_list.append('\nOnce inside the container:\n\n')
    else:
        inst_list.append('\nOnce on the tagged runner:\n\n')

    if not setup_result:
        inst_list.append('    - Clone spack and acquire tested commit\n')
        inst_list.append('{0}'.format(spack_info))
        spack_root = '<spack-clone-path>'
    else:
        spack_root = '{0}/spack'.format(mount_as_dir)

    inst_list.append('    - Activate the environment\n\n')
    inst_list.append('        $ source {0}/share/spack/setup-env.sh\n'.format(
        spack_root))
    inst_list.append(
        '        $ spack env activate --without-view {0}\n\n'.format(
            mounted_repro_dir if job_image else repro_dir))
    inst_list.append('    - Run the install script\n\n')
    inst_list.append('        $ {0}\n'.format(
        os.path.join(mounted_repro_dir, 'install.sh')
        if job_image else install_script))

    print(''.join(inst_list))
# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

import base64
import copy
import json
import os
import re
import shutil
import stat
import tempfile
import zipfile

from six import iteritems
from six.moves.urllib.error import HTTPError, URLError
from six.moves.urllib.parse import urlencode
from six.moves.urllib.request import HTTPHandler, Request, build_opener

import llnl.util.filesystem as fs
import llnl.util.tty as tty

import spack
import spack.binary_distribution as bindist
import spack.compilers as compilers
import spack.config as cfg
import spack.environment as ev
import spack.main
import spack.mirror
import spack.paths
import spack.repo
import spack.util.executable as exe
import spack.util.gpg as gpg_util
import spack.util.spack_yaml as syaml
import spack.util.web as web_util
from spack.error import SpackError
from spack.spec import Spec

JOB_RETRY_CONDITIONS = [
    'always',
]

TEMP_STORAGE_MIRROR_NAME = 'ci_temporary_mirror'
SPACK_RESERVED_TAGS = ["public", "protected", "notary"]

spack_gpg = spack.main.SpackCommand('gpg')
spack_compiler = spack.main.SpackCommand('compiler')


class TemporaryDirectory(object):
    def __init__(self):
        self.temporary_directory = tempfile.mkdtemp()

    def __enter__(self):
        return self.temporary_directory

    def __exit__(self, exc_type, exc_value, exc_traceback):
        shutil.rmtree(self.temporary_directory)
        return False


def _create_buildgroup(opener, headers, url, project, group_name, group_type):
    data = {
        "newbuildgroup": group_name,
        "project": project,
        "type": group_type
    }

    enc_data = json.dumps(data).encode('utf-8')

    request = Request(url, data=enc_data, headers=headers)

    response = opener.open(request)
    response_code = response.getcode()

    if response_code != 200 and response_code != 201:
        msg = 'Creating buildgroup failed (response code = {0}'.format(
            response_code)
        tty.warn(msg)
        return None

    response_text = response.read()
    response_json = json.loads(response_text)
    build_group_id = response_json['id']

    return build_group_id


def _populate_buildgroup(job_names, group_name, project, site,
                         credentials, cdash_url):
    url = "{0}/api/v1/buildgroup.php".format(cdash_url)

    headers = {
        'Authorization': 'Bearer {0}'.format(credentials),
        'Content-Type': 'application/json',
    }

    opener = build_opener(HTTPHandler)

    parent_group_id = _create_buildgroup(
        opener, headers, url, project, group_name, 'Daily')
    group_id = _create_buildgroup(
        opener, headers, url, project, 'Latest {0}'.format(group_name),
        'Latest')

    if not parent_group_id or not group_id:
        msg = 'Failed to create or retrieve buildgroups for {0}'.format(
            group_name)
        tty.warn(msg)
        return

    data = {
        'project': project,
        'buildgroupid': group_id,
        'dynamiclist': [{
            'match': name,
            'parentgroupid': parent_group_id,
            'site': site
        } for name in job_names]
    }

    enc_data = json.dumps(data).encode('utf-8')

    request = Request(url, data=enc_data, headers=headers)
    request.get_method = lambda: 'PUT'

    response = opener.open(request)
    response_code = response.getcode()

    if response_code != 200:
        msg = 'Error response code ({0}) in _populate_buildgroup'.format(
            response_code)
        tty.warn(msg)


def _is_main_phase(phase_name):
    return True if phase_name == 'specs' else False


def get_job_name(phase, strip_compiler, spec, osarch, build_group):
    """ Given the necessary parts, format the gitlab job name

    Arguments:
        phase (str): Either 'specs' for the main phase, or the name of a
            bootstrapping phase
        strip_compiler (bool): Should compiler be stripped from job name
        spec (spack.spec.Spec): Spec job will build
        osarch: Architecture TODO: (this is a spack.spec.ArchSpec,
            but sphinx doesn't recognize the type and fails).
        build_group (str): Name of build group this job belongs to (a CDash
        notion)

    Returns: The job name
    """
    item_idx = 0
    format_str = ''
    format_args = []

    if phase:
        format_str += '({{{0}}})'.format(item_idx)
        format_args.append(phase)
        item_idx += 1

    format_str += ' {{{0}}}'.format(item_idx)
    format_args.append(spec.name)
    item_idx += 1

    format_str += '/{{{0}}}'.format(item_idx)
    format_args.append(spec.dag_hash(7))
    item_idx += 1

    format_str += ' {{{0}}}'.format(item_idx)
    format_args.append(spec.version)
    item_idx += 1

    if _is_main_phase(phase) is True or strip_compiler is False:
        format_str += ' {{{0}}}'.format(item_idx)
        format_args.append(spec.compiler)
        item_idx += 1

    format_str += ' {{{0}}}'.format(item_idx)
    format_args.append(osarch)
    item_idx += 1

    if build_group:
        format_str += ' {{{0}}}'.format(item_idx)
        format_args.append(build_group)
        item_idx += 1

    return format_str.format(*format_args)


def _get_cdash_build_name(spec, build_group):
    return '{0}@{1}%{2} arch={3} ({4})'.format(
        spec.name, spec.version, spec.compiler, spec.architecture, build_group)


def _remove_reserved_tags(tags):
    """Convenience function to strip reserved tags from jobs"""
    return [tag for tag in tags if tag not in SPACK_RESERVED_TAGS]


def _get_spec_string(spec):
    format_elements = [
        '{name}{@version}',
        '{%compiler}',
    ]

    if spec.architecture:
        format_elements.append(' {arch=architecture}')

    return spec.format(''.join(format_elements))


def _format_root_spec(spec, main_phase, strip_compiler):
    if main_phase is False and strip_compiler is True:
        return '{0}@{1} arch={2}'.format(
            spec.name, spec.version, spec.architecture)
    else:
        return spec.dag_hash()


def _spec_deps_key(s):
    return '{0}/{1}'.format(s.name, s.dag_hash(7))


def _add_dependency(spec_label, dep_label, deps):
    if spec_label == dep_label:
        return
    if spec_label not in deps:
        deps[spec_label] = set()
    deps[spec_label].add(dep_label)


def _get_spec_dependencies(specs, deps, spec_labels, check_index_only=False,
                           mirrors_to_check=None):
    spec_deps_obj = _compute_spec_deps(specs, check_index_only=check_index_only,
                                       mirrors_to_check=mirrors_to_check)

    if spec_deps_obj:
        dependencies = spec_deps_obj['dependencies']
        specs = spec_deps_obj['specs']

        for entry in specs:
            spec_labels[entry['label']] = {
                'spec': Spec(entry['spec']),
                'rootSpec': entry['root_spec'],
                'needs_rebuild': entry['needs_rebuild'],
            }

        for entry in dependencies:
            _add_dependency(entry['spec'], entry['depends'], deps)


def stage_spec_jobs(specs, check_index_only=False, mirrors_to_check=None):
    """Take a set of release specs and generate a list of "stages", where the
        jobs in any stage are dependent only on jobs in previous stages.  This
        allows us to maximize build parallelism within the gitlab-ci framework.

    Arguments:
        specs (Iterable): Specs to build
        check_index_only (bool): Regardless of whether DAG pruning is enabled,
            all configured mirrors are searched to see if binaries for specs
            are up to date on those mirrors.  This flag limits that search to
            the binary cache indices on those mirrors to speed the process up,
            even though there is no garantee the index is up to date.
        mirrors_to_checK: Optional mapping giving mirrors to check instead of
            any configured mirrors.

    Returns: A tuple of information objects describing the specs, dependencies
        and stages:

        spec_labels: A dictionary mapping the spec labels which are made of
            (pkg-name/hash-prefix), to objects containing "rootSpec" and "spec"
            keys.  The root spec is the spec of which this spec is a dependency
            and the spec is the formatted spec string for this spec.

        deps: A dictionary where the keys should also have appeared as keys in
            the spec_labels dictionary, and the values are the set of
            dependencies for that spec.

        stages: An ordered list of sets, each of which contains all the jobs to
            built in that stage.  The jobs are expressed in the same format as
            the keys in the spec_labels and deps objects.

    """

    # The convenience method below, "_remove_satisfied_deps()", does not modify
    # the "deps" parameter.  Instead, it returns a new dictionary where only
    # dependencies which have not yet been satisfied are included in the
    # return value.
    def _remove_satisfied_deps(deps, satisfied_list):
        new_deps = {}

        for key, value in iteritems(deps):
            new_value = set([v for v in value if v not in satisfied_list])
            if new_value:
                new_deps[key] = new_value

        return new_deps

    deps = {}
    spec_labels = {}

    _get_spec_dependencies(specs, deps, spec_labels, check_index_only=check_index_only,
                           mirrors_to_check=mirrors_to_check)

    # Save the original deps, as we need to return them at the end of the
    # function.  In the while loop below, the "dependencies" variable is
    # overwritten rather than being modified each time through the loop,
    # thus preserving the original value of "deps" saved here.
    dependencies = deps
    unstaged = set(spec_labels.keys())
    stages = []

    while dependencies:
        dependents = set(dependencies.keys())
        next_stage = unstaged.difference(dependents)
        stages.append(next_stage)
        unstaged.difference_update(next_stage)
        # Note that "dependencies" is a dictionary mapping each dependent
        # package to the set of not-yet-handled dependencies.  The final step
        # below removes all the dependencies that are handled by this stage.
        dependencies = _remove_satisfied_deps(dependencies, next_stage)

    if unstaged:
        stages.append(unstaged.copy())

    return spec_labels, deps, stages


def _print_staging_summary(spec_labels, dependencies, stages):
    if not stages:
        return

    tty.msg('  Staging summary ([x] means a job needs rebuilding):')
    for stage_index, stage in enumerate(stages):
        tty.msg('    stage {0} ({1} jobs):'.format(stage_index, len(stage)))

        for job in sorted(stage):
            s = spec_labels[job]['spec']
            tty.msg('      [{1}] {0} -> {2}'.format(
                job,
                'x' if spec_labels[job]['needs_rebuild'] else ' ',
                _get_spec_string(s)))


def _compute_spec_deps(spec_list, check_index_only=False, mirrors_to_check=None):
    """
    Computes all the dependencies for the spec(s) and generates a JSON
    object which provides both a list of unique spec names as well as a
    comprehensive list of all the edges in the dependency graph.  For
    example, given a single spec like 'readline@7.0', this function
    generates the following JSON object:

    .. code-block:: JSON

       {
           "dependencies": [
               {
                   "depends": "readline/ip6aiun",
                   "spec": "readline/ip6aiun"
               },
               {
                   "depends": "ncurses/y43rifz",
                   "spec": "readline/ip6aiun"
               },
               {
                   "depends": "ncurses/y43rifz",
                   "spec": "readline/ip6aiun"
               },
               {
                   "depends": "pkgconf/eg355zb",
                   "spec": "ncurses/y43rifz"
               },
               {
                   "depends": "pkgconf/eg355zb",
                   "spec": "readline/ip6aiun"
               }
           ],
           "specs": [
               {
                 "root_spec": "readline@7.0%apple-clang@9.1.0 arch=darwin-...",
                 "spec": "readline@7.0%apple-clang@9.1.0 arch=darwin-highs...",
                 "label": "readline/ip6aiun"
               },
               {
                 "root_spec": "readline@7.0%apple-clang@9.1.0 arch=darwin-...",
                 "spec": "ncurses@6.1%apple-clang@9.1.0 arch=darwin-highsi...",
                 "label": "ncurses/y43rifz"
               },
               {
                 "root_spec": "readline@7.0%apple-clang@9.1.0 arch=darwin-...",
                 "spec": "pkgconf@1.5.4%apple-clang@9.1.0 arch=darwin-high...",
                 "label": "pkgconf/eg355zb"
               }
           ]
       }

    """
    spec_labels = {}

    specs = []
    dependencies = []

    def append_dep(s, d):
        dependencies.append({
            'spec': s,
            'depends': d,
        })

    for spec in spec_list:
        root_spec = spec

        for s in spec.traverse(deptype=all):
            if s.external:
                tty.msg('Will not stage external pkg: {0}'.format(s))
                continue

            up_to_date_mirrors = bindist.get_mirrors_for_spec(
                spec=s, mirrors_to_check=mirrors_to_check, index_only=check_index_only)

            skey = _spec_deps_key(s)
            spec_labels[skey] = {
                'spec': _get_spec_string(s),
                'root': root_spec,
                'needs_rebuild': not up_to_date_mirrors,
            }

            for d in s.dependencies(deptype=all):
                dkey = _spec_deps_key(d)
                if d.external:
                    tty.msg('Will not stage external dep: {0}'.format(d))
                    continue

                append_dep(skey, dkey)

    for spec_label, spec_holder in spec_labels.items():
        specs.append({
            'label': spec_label,
            'spec': spec_holder['spec'],
            'root_spec': spec_holder['root'],
            'needs_rebuild': spec_holder['needs_rebuild'],
        })

    deps_json_obj = {
        'specs': specs,
        'dependencies': dependencies,
    }

    return deps_json_obj


def _spec_matches(spec, match_string):
    return spec.satisfies(match_string)


def _copy_attributes(attrs_list, src_dict, dest_dict):
    for runner_attr in attrs_list:
        if runner_attr in src_dict:
            if runner_attr in dest_dict and runner_attr == 'tags':
                # For 'tags', we combine the lists of tags, while
                # avoiding duplicates
                for tag in src_dict[runner_attr]:
                    if tag not in dest_dict[runner_attr]:
                        dest_dict[runner_attr].append(tag)
            elif runner_attr in dest_dict and runner_attr == 'variables':
                # For 'variables', we merge the dictionaries.  Any conflicts
                # (i.e. 'runner-attributes' has same variable key as the
                # higher level) we resolve by keeping the more specific
                # 'runner-attributes' version.
                for src_key, src_val in src_dict[runner_attr].items():
                    dest_dict[runner_attr][src_key] = copy.deepcopy(
                        src_dict[runner_attr][src_key])
            else:
                dest_dict[runner_attr] = copy.deepcopy(src_dict[runner_attr])


def _find_matching_config(spec, gitlab_ci):
    runner_attributes = {}
    overridable_attrs = [
        'image',
        'tags',
        'variables',
        'before_script',
        'script',
        'after_script',
    ]

    _copy_attributes(overridable_attrs, gitlab_ci, runner_attributes)

    ci_mappings = gitlab_ci['mappings']
    for ci_mapping in ci_mappings:
        for match_string in ci_mapping['match']:
            if _spec_matches(spec, match_string):
                if 'runner-attributes' in ci_mapping:
                    _copy_attributes(overridable_attrs,
                                     ci_mapping['runner-attributes'],
                                     runner_attributes)
                return runner_attributes
    else:
        return None

    return runner_attributes


def _pkg_name_from_spec_label(spec_label):
    return spec_label[:spec_label.index('/')]


def _format_job_needs(phase_name, strip_compilers, dep_jobs,
                      osname, build_group, prune_dag, stage_spec_dict,
                      enable_artifacts_buildcache):
    needs_list = []
    for dep_job in dep_jobs:
        dep_spec_key = _spec_deps_key(dep_job)
        dep_spec_info = stage_spec_dict[dep_spec_key]

        if not prune_dag or dep_spec_info['needs_rebuild']:
            needs_list.append({
                'job': get_job_name(phase_name,
                                    strip_compilers,
                                    dep_job,
                                    dep_job.architecture,
                                    build_group),
                'artifacts': enable_artifacts_buildcache,
            })
    return needs_list


def get_change_revisions():
    """If this is a git repo get the revisions to use when checking
    for changed packages and spack core modules."""
    git_dir = os.path.join(spack.paths.prefix, '.git')
    if os.path.exists(git_dir) and os.path.isdir(git_dir):
        # TODO: This will only find changed packages from the last
        # TODO: commit.  While this may work for single merge commits
        # TODO: when merging the topic branch into the base, it will
        # TODO: require more thought outside of that narrow case.
        return 'HEAD^', 'HEAD'
    return None, None


def get_stack_changed(env_path, rev1='HEAD^', rev2='HEAD'):
    """Given an environment manifest path and two revisions to compare, return
    whether or not the stack was changed.  Returns True if the environment
    manifest changed between the provided revisions (or additionally if the
    `.gitlab-ci.yml` file itself changed).  Returns False otherwise."""
    git = exe.which("git")
    if git:
        with fs.working_dir(spack.paths.prefix):
            git_log = git("diff", "--name-only", rev1, rev2,
                          output=str, error=os.devnull,
                          fail_on_error=False).strip()
            lines = [] if not git_log else re.split(r'\s+', git_log)

            for path in lines:
                if '.gitlab-ci.yml' in path or path in env_path:
                    tty.debug('env represented by {0} changed'.format(
                        env_path))
                    tty.debug('touched file: {0}'.format(path))
                    return True
    return False


def compute_affected_packages(rev1='HEAD^', rev2='HEAD'):
    """Determine which packages were added, removed or changed
    between rev1 and rev2, and return the names as a set"""
    return spack.repo.get_all_package_diffs('ARC', rev1=rev1, rev2=rev2)


def get_spec_filter_list(env, affected_pkgs, dependencies=True, dependents=True):
    """Given a list of package names, and assuming an active and
       concretized environment, return a set of concrete specs from
       the environment corresponding to any of the affected pkgs (or
       optionally to any of their dependencies/dependents).

    Arguments:

        env (spack.environment.Environment): Active concrete environment
        affected_pkgs (List[str]): Affected package names
        dependencies (bool): Include dependencies of affected packages
        dependents (bool): Include dependents of affected pacakges

    Returns:

        A list of concrete specs from the active environment including
        those associated with affected packages, and possible their
        dependencies and dependents as well.
    """
    affected_specs = set()
    all_concrete_specs = env.all_specs()
    tty.debug('All concrete environment specs:')
    for s in all_concrete_specs:
        tty.debug('  {0}/{1}'.format(s.name, s.dag_hash()[:7]))
    for pkg in affected_pkgs:
        env_matches = [s for s in all_concrete_specs if s.name == pkg]
        for match in env_matches:
            affected_specs.add(match)
            if dependencies:
                affected_specs.update(match.traverse(direction='children', root=False))
            if dependents:
                affected_specs.update(match.traverse(direction='parents', root=False))
    return affected_specs


def generate_gitlab_ci_yaml(env, print_summary, output_file,
                            prune_dag=False, check_index_only=False,
                            run_optimizer=False, use_dependencies=False,
                            artifacts_root=None, remote_mirror_override=None):
    """ Generate a gitlab yaml file to run a dynamic child pipeline from
        the spec matrix in the active environment.

    Arguments:
        env (spack.environment.Environment): Activated environment object
            which must contain a gitlab-ci section describing how to map
            specs to runners
        print_summary (bool): Should we print a summary of all the jobs in
            the stages in which they were placed.
        output_file (str): File path where generated file should be written
        prune_dag (bool): If True, do not generate jobs for specs already
            exist built on the mirror.
        check_index_only (bool): If True, attempt to fetch the mirror index
            and only use that to determine whether built specs on the mirror
            this mode results in faster yaml generation time). Otherwise, also
            check each spec directly by url (useful if there is no index or it
            might be out of date).
        run_optimizer (bool): If True, post-process the generated yaml to try
            try to reduce the size (attempts to collect repeated configuration
            and replace with definitions).)
        use_dependencies (bool): If true, use "dependencies" rather than "needs"
            ("needs" allows DAG scheduling).  Useful if gitlab instance cannot
            be configured to handle more than a few "needs" per job.
        artifacts_root (str): Path where artifacts like logs, environment
            files (spack.yaml, spack.lock), etc should be written.  GitLab
            requires this to be within the project directory.
        remote_mirror_override (str): Typically only needed when one spack.yaml
            is used to populate several mirrors with binaries, based on some
            criteria.  Spack protected pipelines populate different mirrors based
            on branch name, facilitated by this option.
    """
    with spack.concretize.disable_compiler_existence_check():
        with env.write_transaction():
            env.concretize()
            env.write()

    yaml_root = ev.config_dict(env.yaml)

    if 'gitlab-ci' not in yaml_root:
        tty.die('Environment yaml does not have "gitlab-ci" section')

    gitlab_ci = yaml_root['gitlab-ci']

    build_group = None
    enable_cdash_reporting = False
    cdash_auth_token = None

    if 'cdash' in yaml_root:
        enable_cdash_reporting = True
        ci_cdash = yaml_root['cdash']
        build_group = ci_cdash['build-group']
        cdash_url = ci_cdash['url']
        cdash_project = ci_cdash['project']
        cdash_site = ci_cdash['site']

        if 'SPACK_CDASH_AUTH_TOKEN' in os.environ:
            tty.verbose("Using CDash auth token from environment")
            cdash_auth_token = os.environ.get('SPACK_CDASH_AUTH_TOKEN')

    prune_untouched_packages = os.environ.get('SPACK_PRUNE_UNTOUCHED', None)
    if prune_untouched_packages:
        # Requested to prune untouched packages, but assume we won't do that
        # unless we're actually in a git repo.
        prune_untouched_packages = False
        rev1, rev2 = get_change_revisions()
        tty.debug('Got following revisions: rev1={0}, rev2={1}'.format(rev1, rev2))
        if rev1 and rev2:
            # If the stack file itself did not change, proceed with pruning
            if not get_stack_changed(env.manifest_path, rev1, rev2):
                prune_untouched_packages = True
                affected_pkgs = compute_affected_packages(rev1, rev2)
                tty.debug('affected pkgs:')
                for p in affected_pkgs:
                    tty.debug('  {0}'.format(p))
                affected_specs = get_spec_filter_list(env, affected_pkgs)
                tty.debug('all affected specs:')
                for s in affected_specs:
                    tty.debug('  {0}'.format(s.name))

    # Downstream jobs will "need" (depend on, for both scheduling and
    # artifacts, which include spack.lock file) this pipeline generation
    # job by both name and pipeline id.  If those environment variables
    # do not exist, then maybe this is just running in a shell, in which
    # case, there is no expectation gitlab will ever run the generated
    # pipeline and those environment variables do not matter.
    generate_job_name = os.environ.get('CI_JOB_NAME', 'job-does-not-exist')
    parent_pipeline_id = os.environ.get('CI_PIPELINE_ID', 'pipeline-does-not-exist')

    # Values: "spack_pull_request", "spack_protected_branch", or not set
    spack_pipeline_type = os.environ.get('SPACK_PIPELINE_TYPE', None)

    spack_buildcache_copy = os.environ.get('SPACK_COPY_BUILDCACHE', None)

    if 'mirrors' not in yaml_root or len(yaml_root['mirrors'].values()) < 1:
        tty.die('spack ci generate requires an env containing a mirror')

    ci_mirrors = yaml_root['mirrors']
    mirror_urls = [url for url in ci_mirrors.values()]
    remote_mirror_url = mirror_urls[0]

    # Check for a list of "known broken" specs that we should not bother
    # trying to build.
    broken_specs_url = ''
    known_broken_specs_encountered = []
    if 'broken-specs-url' in gitlab_ci:
        broken_specs_url = gitlab_ci['broken-specs-url']

    enable_artifacts_buildcache = False
    if 'enable-artifacts-buildcache' in gitlab_ci:
        enable_artifacts_buildcache = gitlab_ci['enable-artifacts-buildcache']

    rebuild_index_enabled = True
    if 'rebuild-index' in gitlab_ci and gitlab_ci['rebuild-index'] is False:
        rebuild_index_enabled = False

    temp_storage_url_prefix = None
    if 'temporary-storage-url-prefix' in gitlab_ci:
        temp_storage_url_prefix = gitlab_ci['temporary-storage-url-prefix']

    bootstrap_specs = []
    phases = []
    if 'bootstrap' in gitlab_ci:
        for phase in gitlab_ci['bootstrap']:
            try:
                phase_name = phase.get('name')
                strip_compilers = phase.get('compiler-agnostic')
            except AttributeError:
                phase_name = phase
                strip_compilers = False
            phases.append({
                'name': phase_name,
                'strip-compilers': strip_compilers,
            })

            for bs in env.spec_lists[phase_name]:
                bootstrap_specs.append({
                    'spec': bs,
                    'phase-name': phase_name,
                    'strip-compilers': strip_compilers,
                })

    phases.append({
        'name': 'specs',
        'strip-compilers': False,
    })

    # If a remote mirror override (alternate buildcache destination) was
    # specified, add it here in case it has already built hashes we might
    # generate.
    mirrors_to_check = None
    if remote_mirror_override:
        if spack_pipeline_type == 'spack_protected_branch':
            # Overriding the main mirror in this case might result
            # in skipping jobs on a release pipeline because specs are
            # up to date in develop.  Eventually we want to notice and take
            # advantage of this by scheduling a job to copy the spec from
            # develop to the release, but until we have that, this makes
            # sure we schedule a rebuild job if the spec isn't already in
            # override mirror.
            mirrors_to_check = {
                'override': remote_mirror_override
            }

        # If we have a remote override and we want generate pipeline using
        # --check-index-only, then the override mirror needs to be added to
        # the configured mirrors when bindist.update() is run, or else we
        # won't fetch its index and include in our local cache.
        spack.mirror.add(
            'ci_pr_mirror', remote_mirror_override, cfg.default_modify_scope())

    pipeline_artifacts_dir = artifacts_root
    if not pipeline_artifacts_dir:
        proj_dir = os.environ.get('CI_PROJECT_DIR', os.getcwd())
        pipeline_artifacts_dir = os.path.join(proj_dir, 'jobs_scratch_dir')

    pipeline_artifacts_dir = os.path.abspath(pipeline_artifacts_dir)
    concrete_env_dir = os.path.join(
        pipeline_artifacts_dir, 'concrete_environment')

    # Now that we've added the mirrors we know about, they should be properly
    # reflected in the environment manifest file, so copy that into the
    # concrete environment directory, along with the spack.lock file.
    if not os.path.exists(concrete_env_dir):
        os.makedirs(concrete_env_dir)
    shutil.copyfile(env.manifest_path,
                    os.path.join(concrete_env_dir, 'spack.yaml'))
    shutil.copyfile(env.lock_path,
                    os.path.join(concrete_env_dir, 'spack.lock'))

    job_log_dir = os.path.join(pipeline_artifacts_dir, 'logs')
    job_repro_dir = os.path.join(pipeline_artifacts_dir, 'reproduction')
    local_mirror_dir = os.path.join(pipeline_artifacts_dir, 'mirror')
    user_artifacts_dir = os.path.join(pipeline_artifacts_dir, 'user_data')

    # We communicate relative paths to the downstream jobs to avoid issues in
    # situations where the CI_PROJECT_DIR varies between the pipeline
    # generation job and the rebuild jobs.  This can happen when gitlab
    # checks out the project into a runner-specific directory, for example,
    # and different runners are picked for generate and rebuild jobs.
    ci_project_dir = os.environ.get('CI_PROJECT_DIR')
    rel_artifacts_root = os.path.relpath(
        pipeline_artifacts_dir, ci_project_dir)
    rel_concrete_env_dir = os.path.relpath(
        concrete_env_dir, ci_project_dir)
    rel_job_log_dir = os.path.relpath(
        job_log_dir, ci_project_dir)
    rel_job_repro_dir = os.path.relpath(
        job_repro_dir, ci_project_dir)
    rel_local_mirror_dir = os.path.relpath(
        local_mirror_dir, ci_project_dir)
    rel_user_artifacts_dir = os.path.relpath(
        user_artifacts_dir, ci_project_dir)

    # Speed up staging by first fetching binary indices from all mirrors
    # (including the override mirror we may have just added above).
    try:
        bindist.binary_index.update()
    except bindist.FetchCacheError as e:
        tty.error(e)

    staged_phases = {}
    try:
        for phase in phases:
            phase_name = phase['name']
            if phase_name == 'specs':
                # Anything in the "specs" of the environment are already
                # concretized by the block at the top of this method, so we
                # only need to find the concrete versions, and then avoid
                # re-concretizing them needlessly later on.
                concrete_phase_specs = [
                    concrete for abstract, concrete in env.concretized_specs()
                    if abstract in env.spec_lists[phase_name]
                ]
            else:
                # Any specs lists in other definitions (but not in the
                # "specs") of the environment are not yet concretized so we
                # have to concretize them explicitly here.
                concrete_phase_specs = env.spec_lists[phase_name]
                with spack.concretize.disable_compiler_existence_check():
                    for phase_spec in concrete_phase_specs:
                        phase_spec.concretize()
            staged_phases[phase_name] = stage_spec_jobs(
                concrete_phase_specs,
                check_index_only=check_index_only,
                mirrors_to_check=mirrors_to_check)
    finally:
        # Clean up remote mirror override if enabled
        if remote_mirror_override:
            spack.mirror.remove('ci_pr_mirror', cfg.default_modify_scope())

    all_job_names = []
    output_object = {}
    job_id = 0
    stage_id = 0

    stage_names = []

    max_length_needs = 0
    max_needs_job = ''

    # If this is configured, spack will fail "spack ci generate" if it
    # generates any hash which exists under the broken specs url.
    broken_spec_urls = None
    if broken_specs_url:
        if broken_specs_url.startswith('http'):
            # To make checking each spec against the list faster, we require
            # a url protocol that allows us to iterate the url in advance.
            tty.msg('Cannot use an http(s) url for broken specs, ignoring')
        else:
            broken_spec_urls = web_util.list_url(broken_specs_url)

    before_script, after_script = None, None
    for phase in phases:
        phase_name = phase['name']
        strip_compilers = phase['strip-compilers']

        main_phase = _is_main_phase(phase_name)
        spec_labels, dependencies, stages = staged_phases[phase_name]

        for stage_jobs in stages:
            stage_name = 'stage-{0}'.format(stage_id)
            stage_names.append(stage_name)
            stage_id += 1

            for spec_label in stage_jobs:
                spec_record = spec_labels[spec_label]
                root_spec = spec_record['rootSpec']
                pkg_name = _pkg_name_from_spec_label(spec_label)
                release_spec = root_spec[pkg_name]
                release_spec_dag_hash = release_spec.dag_hash()

                if prune_untouched_packages:
                    if release_spec not in affected_specs:
                        tty.debug('Pruning {0}, untouched by change.'.format(
                            release_spec.name))
                        spec_record['needs_rebuild'] = False
                        continue

                runner_attribs = _find_matching_config(
                    release_spec, gitlab_ci)

                if not runner_attribs:
                    tty.warn('No match found for {0}, skipping it'.format(
                        release_spec))
                    continue

                tags = [tag for tag in runner_attribs['tags']]

                if spack_pipeline_type is not None:
                    # For spack pipelines "public" and "protected" are reserved tags
                    tags = _remove_reserved_tags(tags)
                    if spack_pipeline_type == 'spack_protected_branch':
                        tags.extend(['aws', 'protected'])
                    elif spack_pipeline_type == 'spack_pull_request':
                        tags.extend(['public'])

                variables = {}
                if 'variables' in runner_attribs:
                    variables.update(runner_attribs['variables'])

                image_name = None
                image_entry = None
                if 'image' in runner_attribs:
                    build_image = runner_attribs['image']
                    try:
                        image_name = build_image.get('name')
                        entrypoint = build_image.get('entrypoint')
                        image_entry = [p for p in entrypoint]
                    except AttributeError:
                        image_name = build_image

                job_script = ['spack env activate --without-view .']

                if artifacts_root:
                    job_script.insert(0, 'cd {0}'.format(concrete_env_dir))

                job_script.extend([
                    'spack ci rebuild'
                ])

                if 'script' in runner_attribs:
                    job_script = [s for s in runner_attribs['script']]

                before_script = None
                if 'before_script' in runner_attribs:
                    before_script = [
                        s for s in runner_attribs['before_script']
                    ]

                after_script = None
                if 'after_script' in runner_attribs:
                    after_script = [s for s in runner_attribs['after_script']]

                osname = str(release_spec.architecture)
                job_name = get_job_name(phase_name, strip_compilers,
                                        release_spec, osname, build_group)

                compiler_action = 'NONE'
                if len(phases) > 1:
                    compiler_action = 'FIND_ANY'
                    if _is_main_phase(phase_name):
                        compiler_action = 'INSTALL_MISSING'

                job_vars = {
                    'SPACK_ROOT_SPEC': _format_root_spec(
                        root_spec, main_phase, strip_compilers),
                    'SPACK_JOB_SPEC_DAG_HASH': release_spec_dag_hash,
                    'SPACK_JOB_SPEC_PKG_NAME': release_spec.name,
                    'SPACK_COMPILER_ACTION': compiler_action
                }

                job_dependencies = []
                if spec_label in dependencies:
                    if enable_artifacts_buildcache:
                        # Get dependencies transitively, so they're all
                        # available in the artifacts buildcache.
                        dep_jobs = [
                            d for d in release_spec.traverse(deptype=all,
                                                             root=False)
                        ]
                    else:
                        # In this case, "needs" is only used for scheduling
                        # purposes, so we only get the direct dependencies.
                        dep_jobs = []
                        for dep_label in dependencies[spec_label]:
                            dep_pkg = _pkg_name_from_spec_label(dep_label)
                            dep_root = spec_labels[dep_label]['rootSpec']
                            dep_jobs.append(dep_root[dep_pkg])

                    job_dependencies.extend(
                        _format_job_needs(phase_name, strip_compilers,
                                          dep_jobs, osname, build_group,
                                          prune_dag, spec_labels,
                                          enable_artifacts_buildcache))

                rebuild_spec = spec_record['needs_rebuild']

                # This next section helps gitlab make sure the right
                # bootstrapped compiler exists in the artifacts buildcache by
                # creating an artificial dependency between this spec and its
                # compiler.  So, if we are in the main phase, and if the
                # compiler we are supposed to use is listed in any of the
                # bootstrap spec lists, then we will add more dependencies to
                # the job (that compiler and maybe it's dependencies as well).
                if _is_main_phase(phase_name):
                    spec_arch_family = (release_spec.architecture
                                                    .target
                                                    .microarchitecture
                                                    .family)
                    compiler_pkg_spec = compilers.pkg_spec_for_compiler(
                        release_spec.compiler)
                    for bs in bootstrap_specs:
                        c_spec = bs['spec']
                        bs_arch = c_spec.architecture
                        bs_arch_family = (bs_arch.target
                                                 .microarchitecture
                                                 .family)
                        if (c_spec.satisfies(compiler_pkg_spec) and
                            bs_arch_family == spec_arch_family):
                            # We found the bootstrap compiler this release spec
                            # should be built with, so for DAG scheduling
                            # purposes, we will at least add the compiler spec
                            # to the jobs "needs".  But if artifact buildcache
                            # is enabled, we'll have to add all transtive deps
                            # of the compiler as well.

                            # Here we check whether the bootstrapped compiler
                            # needs to be rebuilt.  Until compilers are proper
                            # dependencies, we artificially force the spec to
                            # be rebuilt if the compiler targeted to build it
                            # needs to be rebuilt.
                            bs_specs, _, _ = staged_phases[bs['phase-name']]
                            c_spec_key = _spec_deps_key(c_spec)
                            rbld_comp = bs_specs[c_spec_key]['needs_rebuild']
                            rebuild_spec = rebuild_spec or rbld_comp
                            # Also update record so dependents do not fail to
                            # add this spec to their "needs"
                            spec_record['needs_rebuild'] = rebuild_spec

                            dep_jobs = [c_spec]
                            if enable_artifacts_buildcache:
                                dep_jobs = [
                                    d for d in c_spec.traverse(deptype=all)
                                ]

                            job_dependencies.extend(
                                _format_job_needs(bs['phase-name'],
                                                  bs['strip-compilers'],
                                                  dep_jobs,
                                                  str(bs_arch),
                                                  build_group,
                                                  prune_dag,
                                                  bs_specs,
                                                  enable_artifacts_buildcache))
                        else:
                            debug_msg = ''.join([
                                'Considered compiler {0} for spec ',
                                '{1}, but rejected it either because it was ',
                                'not the compiler required by the spec, or ',
                                'because the target arch families of the ',
                                'spec and the compiler did not match'
                            ]).format(c_spec, release_spec)
                            tty.debug(debug_msg)

                if prune_dag and not rebuild_spec:
                    tty.debug('Pruning {0}, does not need rebuild.'.format(
                        release_spec.name))
                    continue

                if (broken_spec_urls is not None and
                        release_spec_dag_hash in broken_spec_urls):
                    known_broken_specs_encountered.append('{0} ({1})'.format(
                        release_spec, release_spec_dag_hash))

                if artifacts_root:
                    job_dependencies.append({
                        'job': generate_job_name,
                        'pipeline': '{0}'.format(parent_pipeline_id)
                    })

                job_vars['SPACK_SPEC_NEEDS_REBUILD'] = str(rebuild_spec)

                if enable_cdash_reporting:
                    cdash_build_name = _get_cdash_build_name(
                        release_spec, build_group)
                    all_job_names.append(cdash_build_name)
                    job_vars['SPACK_CDASH_BUILD_NAME'] = cdash_build_name

                variables.update(job_vars)

                artifact_paths = [
                    rel_job_log_dir,
                    rel_job_repro_dir,
                    rel_user_artifacts_dir
                ]

                if enable_artifacts_buildcache:
                    bc_root = os.path.join(
                        local_mirror_dir, 'build_cache')
                    artifact_paths.extend([os.path.join(bc_root, p) for p in [
                        bindist.tarball_name(release_spec, '.spec.json'),
                        bindist.tarball_directory_name(release_spec),
                    ]])

                job_object = {
                    'stage': stage_name,
                    'variables': variables,
                    'script': job_script,
                    'tags': tags,
                    'artifacts': {
                        'paths': artifact_paths,
                        'when': 'always',
                    },
                    'needs': sorted(job_dependencies, key=lambda d: d['job']),
                    'retry': {
                        'max': 2,
                        'when': JOB_RETRY_CONDITIONS,
                    },
                    'interruptible': True
                }

                length_needs = len(job_dependencies)
                if length_needs > max_length_needs:
                    max_length_needs = length_needs
                    max_needs_job = job_name

                if before_script:
                    job_object['before_script'] = before_script

                if after_script:
                    job_object['after_script'] = after_script

                if image_name:
                    job_object['image'] = image_name
                    if image_entry is not None:
                        job_object['image'] = {
                            'name': image_name,
                            'entrypoint': image_entry,
                        }

                output_object[job_name] = job_object
                job_id += 1

    if print_summary:
        for phase in phases:
            phase_name = phase['name']
            tty.msg('Stages for phase "{0}"'.format(phase_name))
            phase_stages = staged_phases[phase_name]
            _print_staging_summary(*phase_stages)

    tty.debug('{0} build jobs generated in {1} stages'.format(
        job_id, stage_id))

    if job_id > 0:
        tty.debug('The max_needs_job is {0}, with {1} needs'.format(
            max_needs_job, max_length_needs))

    # Use "all_job_names" to populate the build group for this set
    if enable_cdash_reporting and cdash_auth_token:
        try:
            _populate_buildgroup(all_job_names, build_group, cdash_project,
                                 cdash_site, cdash_auth_token, cdash_url)
        except (SpackError, HTTPError, URLError) as err:
            tty.warn('Problem populating buildgroup: {0}'.format(err))
    else:
        tty.warn('Unable to populate buildgroup without CDash credentials')

    service_job_config = None
    if 'service-job-attributes' in gitlab_ci:
        service_job_config = gitlab_ci['service-job-attributes']

    default_attrs = [
        'image',
        'tags',
        'variables',
        'before_script',
        # 'script',
        'after_script',
    ]

    service_job_retries = {
        'max': 2,
        'when': [
            'runner_system_failure',
            'stuck_or_timeout_failure'
        ]
    }

    if job_id > 0:
        if temp_storage_url_prefix:
            # There were some rebuild jobs scheduled, so we will need to
            # schedule a job to clean up the temporary storage location
            # associated with this pipeline.
            stage_names.append('cleanup-temp-storage')
            cleanup_job = {}

            if service_job_config:
                _copy_attributes(default_attrs,
                                 service_job_config,
                                 cleanup_job)

            if 'tags' in cleanup_job:
                service_tags = _remove_reserved_tags(cleanup_job['tags'])
                cleanup_job['tags'] = service_tags

            cleanup_job['stage'] = 'cleanup-temp-storage'
            cleanup_job['script'] = [
                'spack -d mirror destroy --mirror-url {0}/$CI_PIPELINE_ID'.format(
                    temp_storage_url_prefix)
            ]
            cleanup_job['when'] = 'always'
            cleanup_job['retry'] = service_job_retries
            cleanup_job['interruptible'] = True

            output_object['cleanup'] = cleanup_job

        if ('signing-job-attributes' in gitlab_ci and
                spack_pipeline_type == 'spack_protected_branch'):
            # External signing: generate a job to check and sign binary pkgs
            stage_names.append('stage-sign-pkgs')
            signing_job_config = gitlab_ci['signing-job-attributes']
            signing_job = {}

            signing_job_attrs_to_copy = [
                'image',
                'tags',
                'variables',
                'before_script',
                'script',
                'after_script',
            ]

            _copy_attributes(signing_job_attrs_to_copy,
                             signing_job_config,
                             signing_job)

            signing_job_tags = []
            if 'tags' in signing_job:
                signing_job_tags = _remove_reserved_tags(signing_job['tags'])

            for tag in ['aws', 'protected', 'notary']:
                if tag not in signing_job_tags:
                    signing_job_tags.append(tag)
            signing_job['tags'] = signing_job_tags

            signing_job['stage'] = 'stage-sign-pkgs'
            signing_job['when'] = 'always'
            signing_job['retry'] = {
                'max': 2,
                'when': ['always']
            }
            signing_job['interruptible'] = True

            output_object['sign-pkgs'] = signing_job

        if spack_buildcache_copy:
            # Generate a job to copy the contents from wherever the builds are getting
            # pushed to the url specified in the "SPACK_BUILDCACHE_COPY" environment
            # variable.
            src_url = remote_mirror_override or remote_mirror_url
            dest_url = spack_buildcache_copy

            stage_names.append('stage-copy-buildcache')
            copy_job = {
                'stage': 'stage-copy-buildcache',
                'tags': ['spack', 'public', 'medium', 'aws', 'x86_64'],
                'image': 'ghcr.io/spack/python-aws-bash:0.0.1',
                'when': 'on_success',
                'interruptible': True,
                'retry': service_job_retries,
                'script': [
                    '. ./share/spack/setup-env.sh',
                    'spack --version',
                    'aws s3 sync --exclude *index.json* --exclude *pgp* {0} {1}'.format(
                        src_url, dest_url)
                ]
            }

            output_object['copy-mirror'] = copy_job

        if rebuild_index_enabled:
            # Add a final job to regenerate the index
            stage_names.append('stage-rebuild-index')
            final_job = {}

            if service_job_config:
                _copy_attributes(default_attrs,
                                 service_job_config,
                                 final_job)

            if 'tags' in final_job:
                service_tags = _remove_reserved_tags(final_job['tags'])
                final_job['tags'] = service_tags

            index_target_mirror = mirror_urls[0]
            if remote_mirror_override:
                index_target_mirror = remote_mirror_override

            final_job['stage'] = 'stage-rebuild-index'
            final_job['script'] = [
                'spack buildcache update-index --keys -d {0}'.format(
                    index_target_mirror)
            ]
            final_job['when'] = 'always'
            final_job['retry'] = service_job_retries
            final_job['interruptible'] = True

            output_object['rebuild-index'] = final_job

        output_object['stages'] = stage_names

        # Capture the version of spack used to generate the pipeline, transform it
        # into a value that can be passed to "git checkout", and save it in a
        # global yaml variable
        spack_version = spack.main.get_version()
        version_to_clone = None
        v_match = re.match(r"^\d+\.\d+\.\d+$", spack_version)
        if v_match:
            version_to_clone = 'v{0}'.format(v_match.group(0))
        else:
            v_match = re.match(r"^[^-]+-[^-]+-([a-f\d]+)$", spack_version)
            if v_match:
                version_to_clone = v_match.group(1)
            else:
                version_to_clone = spack_version

        output_object['variables'] = {
            'SPACK_ARTIFACTS_ROOT': rel_artifacts_root,
            'SPACK_CONCRETE_ENV_DIR': rel_concrete_env_dir,
            'SPACK_VERSION': spack_version,
            'SPACK_CHECKOUT_VERSION': version_to_clone,
            'SPACK_REMOTE_MIRROR_URL': remote_mirror_url,
            'SPACK_JOB_LOG_DIR': rel_job_log_dir,
            'SPACK_JOB_REPRO_DIR': rel_job_repro_dir,
            'SPACK_LOCAL_MIRROR_DIR': rel_local_mirror_dir,
            'SPACK_PIPELINE_TYPE': str(spack_pipeline_type)
        }

        if remote_mirror_override:
            (output_object['variables']
                          ['SPACK_REMOTE_MIRROR_OVERRIDE']) = remote_mirror_override

        spack_stack_name = os.environ.get('SPACK_CI_STACK_NAME', None)
        if spack_stack_name:
            output_object['variables']['SPACK_CI_STACK_NAME'] = spack_stack_name

        sorted_output = {}
        for output_key, output_value in sorted(output_object.items()):
            sorted_output[output_key] = output_value

        # TODO(opadron): remove this or refactor
        if run_optimizer:
            import spack.ci_optimization as ci_opt
            sorted_output = ci_opt.optimizer(sorted_output)

        # TODO(opadron): remove this or refactor
        if use_dependencies:
            import spack.ci_needs_workaround as cinw
            sorted_output = cinw.needs_to_dependencies(sorted_output)
    else:
        # No jobs were generated
        tty.debug('No specs to rebuild, generating no-op job')
        noop_job = {}

        if service_job_config:
            _copy_attributes(default_attrs,
                             service_job_config,
                             noop_job)

        if 'script' not in noop_job:
            noop_job['script'] = [
                'echo "All specs already up to date, nothing to rebuild."',
            ]

        noop_job['retry'] = service_job_retries

        sorted_output = {'no-specs-to-rebuild': noop_job}

    if known_broken_specs_encountered:
        error_msg = (
            'Pipeline generation failed due to the presence of the '
            'following specs that are known to be broken in develop:\n')
        for broken_spec in known_broken_specs_encountered:
            error_msg += '* {0}\n'.format(broken_spec)
        tty.die(error_msg)

    with open(output_file, 'w') as outf:
        outf.write(syaml.dump_config(sorted_output, default_flow_style=True))


def _url_encode_string(input_string):
    encoded_keyval = urlencode({'donotcare': input_string})
    eq_idx = encoded_keyval.find('=') + 1
    encoded_value = encoded_keyval[eq_idx:]
    return encoded_value


def import_signing_key(base64_signing_key):
    """ Given Base64-encoded gpg key, decode and import it to use for
        signing packages.

    Arguments:
        base64_signing_key (str): A gpg key including the secret key,
            armor-exported and base64 encoded, so it can be stored in a
            gitlab CI variable.  For an example of how to generate such
            a key, see:

        https://github.com/spack/spack-infrastructure/blob/main/gitlab-docker/files/gen-key
    """
    if not base64_signing_key:
        tty.warn('No key found for signing/verifying packages')
        return

    tty.debug('ci.import_signing_key() will attempt to import a key')

    # This command has the side-effect of creating the directory referred
    # to as GNUPGHOME in setup_environment()
    list_output = spack_gpg('list', output=str)

    tty.debug('spack gpg list:')
    tty.debug(list_output)

    decoded_key = base64.b64decode(base64_signing_key)
    if isinstance(decoded_key, bytes):
        decoded_key = decoded_key.decode('utf8')

    with TemporaryDirectory() as tmpdir:
        sign_key_path = os.path.join(tmpdir, 'signing_key')
        with open(sign_key_path, 'w') as fd:
            fd.write(decoded_key)

        key_import_output = spack_gpg('trust', sign_key_path, output=str)
        tty.debug('spack gpg trust {0}'.format(sign_key_path))
        tty.debug(key_import_output)

    # Now print the keys we have for verifying and signing
    trusted_keys_output = spack_gpg('list', '--trusted', output=str)
    signing_keys_output = spack_gpg('list', '--signing', output=str)

    tty.debug('spack gpg list --trusted')
    tty.debug(trusted_keys_output)
    tty.debug('spack gpg list --signing')
    tty.debug(signing_keys_output)


def can_sign_binaries():
    """ Utility method to determine if this spack instance is capable of
        signing binary packages.  This is currently only possible if the
        spack gpg keystore contains exactly one secret key."""
    return len(gpg_util.signing_keys()) == 1


def can_verify_binaries():
    """ Utility method to determin if this spack instance is capable (at
        least in theory) of verifying signed binaries."""
    return len(gpg_util.public_keys()) >= 1


def configure_compilers(compiler_action, scope=None):
    """ Depending on the compiler_action parameter, either turn on the
        install_missing_compilers config option, or find spack compilers,
        or do nothing.  This is used from rebuild jobs in bootstrapping
        pipelines, where in the bootsrapping phase we would pass
        FIND_ANY in case of compiler-agnostic bootstrapping, while in the
        spec building phase we would pass INSTALL_MISSING in order to get
        spack to use the compiler which was built in the previous phase and
        is now sitting in the binary mirror.

    Arguments:
        compiler_action (str): 'FIND_ANY', 'INSTALL_MISSING' have meanings
            described above.  Any other value essentially results in a no-op.
        scope (spack.config.ConfigScope): Optional.  The scope in which to look for
            compilers, in case 'FIND_ANY' was provided.
    """
    if compiler_action == 'INSTALL_MISSING':
        tty.debug('Make sure bootstrapped compiler will be installed')
        config = cfg.get('config')
        config['install_missing_compilers'] = True
        cfg.set('config', config)
    elif compiler_action == 'FIND_ANY':
        tty.debug('Just find any available compiler')
        find_args = ['find']
        if scope:
            find_args.extend(['--scope', scope])
        output = spack_compiler(*find_args)
        tty.debug('spack compiler find')
        tty.debug(output)
        output = spack_compiler('list')
        tty.debug('spack compiler list')
        tty.debug(output)
    else:
        tty.debug('No compiler action to be taken')

    return None


def get_concrete_specs(env, root_spec, job_name, compiler_action):
    """ Build a dictionary of concrete specs relevant to a particular
        rebuild job.  This includes the root spec and the spec to be
        rebuilt (which could be the same).

    Arguments:

        env (spack.environment.Environment): Activated spack environment
            used to get concrete root spec by hash in case compiler_action
            is anthing other than FIND_ANY.
        root_spec (str): If compiler_action is FIND_ANY root_spec is
            a string representation which can be turned directly into
            a spec, otherwise, it's a hash used to index the activated
            spack environment.
        job_name (str): Name of package to be built, used to index the
            concrete root spec and produce the concrete spec to be
            built.
        compiler_action (str): Determines how to interpret the root_spec
            parameter, either as a string representation as a hash.

    Returns:

    .. code-block:: JSON

       {
           "root": "<spec>",
           "<job-pkg-name>": "<spec>",
        }

    """
    spec_map = {
        'root': None,
    }

    if compiler_action == 'FIND_ANY':
        # This corresponds to a bootstrapping phase where we need to
        # rely on any available compiler to build the package (i.e. the
        # compiler needed to be stripped from the spec when we generated
        # the job), and thus we need to concretize the root spec again.
        tty.debug('About to concretize {0}'.format(root_spec))
        concrete_root = Spec(root_spec).concretized()
        tty.debug('Resulting concrete root: {0}'.format(concrete_root))
    else:
        # in this case, either we're relying on Spack to install missing
        # compiler bootstrapped in a previous phase, or else we only had one
        # phase (like a site which already knows what compilers are available
        # on it's runners), so we don't want to concretize that root spec
        # again.  The reason we take this path in the first case (bootstrapped
        # compiler), is that we can't concretize a spec at this point if we're
        # going to ask spack to "install_missing_compilers".
        concrete_root = env.specs_by_hash[root_spec]

    spec_map['root'] = concrete_root
    spec_map[job_name] = concrete_root[job_name]

    return spec_map


def _push_mirror_contents(env, specfile_path, sign_binaries, mirror_url):
    """Unchecked version of the public API, for easier mocking"""
    unsigned = not sign_binaries
    tty.debug('Creating buildcache ({0})'.format(
        'unsigned' if unsigned else 'signed'))
    hashes = env.all_hashes() if env else None
    matches = spack.store.specfile_matches(specfile_path, hashes=hashes)
    push_url = spack.mirror.push_url_from_mirror_url(mirror_url)
    spec_kwargs = {'include_root': True, 'include_dependencies': False}
    kwargs = {
        'force': True,
        'allow_root': True,
        'unsigned': unsigned
    }
    bindist.push(matches, push_url, spec_kwargs, **kwargs)


def push_mirror_contents(env, specfile_path, mirror_url, sign_binaries):
    """ Push one or more binary packages to the mirror.

    Arguments:

        env (spack.environment.Environment): Optional environment.  If
            provided, it is used to make sure binary package to push
            exists in the environment.
        specfile_path (str): Path to spec.json corresponding to built pkg
            to push.
        mirror_url (str): Base url of target mirror
        sign_binaries (bool): If True, spack will attempt to sign binary
            package before pushing.
    """
    try:
        _push_mirror_contents(env, specfile_path, sign_binaries, mirror_url)
    except Exception as inst:
        # If the mirror we're pushing to is on S3 and there's some
        # permissions problem, for example, we can't just target
        # that exception type here, since users of the
        # `spack ci rebuild' may not need or want any dependency
        # on boto3.  So we use the first non-boto exception type
        # in the heirarchy:
        #     boto3.exceptions.S3UploadFailedError
        #     boto3.exceptions.Boto3Error
        #     Exception
        #     BaseException
        #     object
        err_msg = 'Error msg: {0}'.format(inst)
        if any(x in err_msg for x in ['Access Denied', 'InvalidAccessKeyId']):
            tty.msg('Permission problem writing to {0}'.format(
                mirror_url))
            tty.msg(err_msg)
        else:
            raise inst


def copy_stage_logs_to_artifacts(job_spec, job_log_dir):
    """ Looks for spack-build-out.txt in the stage directory of the given
        job_spec, and attempts to copy the file into the directory given
        by job_log_dir.

    Arguments:

        job_spec (spack.spec.Spec): Spec associated with spack install log
        job_log_dir (str): Path into which build log should be copied
    """
    try:
        pkg_cls = spack.repo.path.get_pkg_class(job_spec.name)
        job_pkg = pkg_cls(job_spec)
        tty.debug('job package: {0.fullname}'.format(job_pkg))
        stage_dir = job_pkg.stage.path
        tty.debug('stage dir: {0}'.format(stage_dir))
        build_out_src = os.path.join(stage_dir, 'spack-build-out.txt')
        build_out_dst = os.path.join(
            job_log_dir, 'spack-build-out.txt')
        tty.debug('Copying build log ({0}) to artifacts ({1})'.format(
            build_out_src, build_out_dst))
        shutil.copyfile(build_out_src, build_out_dst)
    except Exception as inst:
        msg = ('Unable to copy build logs from stage to artifacts '
               'due to exception: {0}').format(inst)
        tty.error(msg)


def download_and_extract_artifacts(url, work_dir):
    """ Look for gitlab artifacts.zip at the given url, and attempt to download
        and extract the contents into the given work_dir

    Arguments:

        url (str): Complete url to artifacts.zip file
        work_dir (str): Path to destination where artifacts should be extracted
    """
    tty.msg('Fetching artifacts from: {0}\n'.format(url))

    headers = {
        'Content-Type': 'application/zip',
    }

    token = os.environ.get('GITLAB_PRIVATE_TOKEN', None)
    if token:
        headers['PRIVATE-TOKEN'] = token

    opener = build_opener(HTTPHandler)

    request = Request(url, headers=headers)
    request.get_method = lambda: 'GET'

    response = opener.open(request)
    response_code = response.getcode()

    if response_code != 200:
        msg = 'Error response code ({0}) in reproduce_ci_job'.format(
            response_code)
        raise SpackError(msg)

    artifacts_zip_path = os.path.join(work_dir, 'artifacts.zip')

    if not os.path.exists(work_dir):
        os.makedirs(work_dir)

    with open(artifacts_zip_path, 'wb') as out_file:
        shutil.copyfileobj(response, out_file)

    zip_file = zipfile.ZipFile(artifacts_zip_path)
    zip_file.extractall(work_dir)
    zip_file.close()

    os.remove(artifacts_zip_path)


def get_spack_info():
    """ If spack is running from a git repo, return the most recent git log
        entry, otherwise, return a string containing the spack version. """
    git_path = os.path.join(spack.paths.prefix, ".git")
    if os.path.exists(git_path):
        git = exe.which("git")
        if git:
            with fs.working_dir(spack.paths.prefix):
                git_log = git("log", "-1",
                              output=str, error=os.devnull,
                              fail_on_error=False)

            return git_log

    return 'no git repo, use spack {0}'.format(spack.spack_version)


def setup_spack_repro_version(repro_dir, checkout_commit, merge_commit=None):
    """ Look in the local spack clone to find the checkout_commit, and if
        provided, the merge_commit given as arguments.  If those commits can
        be found locally, then clone spack and attempt to recreate a merge
        commit with the same parent commits as tested in gitlab.  This looks
        something like 1) git clone repo && cd repo 2) git checkout
        <checkout_commit> 3) git merge <merge_commit>.  If there is no
        merge_commit provided, then skip step (3).

    Arguments:

        repro_dir (str): Location where spack should be cloned
        checkout_commit (str): SHA of PR branch commit
        merge_commit (str): SHA of target branch parent

    Returns: True if git repo state was successfully recreated, or False
        otherwise.
    """
    # figure out the path to the spack git version being used for the
    # reproduction
    print('checkout_commit: {0}'.format(checkout_commit))
    print('merge_commit: {0}'.format(merge_commit))

    dot_git_path = os.path.join(spack.paths.prefix, ".git")
    if not os.path.exists(dot_git_path):
        tty.error('Unable to find the path to your local spack clone')
        return False

    spack_git_path = spack.paths.prefix

    git = exe.which("git")
    if not git:
        tty.error("reproduction of pipeline job requires git")
        return False

    # Check if we can find the tested commits in your local spack repo
    with fs.working_dir(spack_git_path):
        git("log", "-1", checkout_commit, output=str, error=os.devnull,
            fail_on_error=False)

        if git.returncode != 0:
            tty.error('Missing commit: {0}'.format(checkout_commit))
            return False

        if merge_commit:
            git("log", "-1", merge_commit, output=str, error=os.devnull,
                fail_on_error=False)

            if git.returncode != 0:
                tty.error('Missing commit: {0}'.format(merge_commit))
                return False

    # Next attempt to clone your local spack repo into the repro dir
    with fs.working_dir(repro_dir):
        clone_out = git("clone", spack_git_path, "spack",
                        output=str, error=os.devnull,
                        fail_on_error=False)

        if git.returncode != 0:
            tty.error('Unable to clone your local spack repo:')
            tty.msg(clone_out)
            return False

    # Finally, attempt to put the cloned repo into the same state used during
    # the pipeline build job
    repro_spack_path = os.path.join(repro_dir, 'spack')
    with fs.working_dir(repro_spack_path):
        co_out = git("checkout", checkout_commit,
                     output=str, error=os.devnull,
                     fail_on_error=False)

        if git.returncode != 0:
            tty.error('Unable to checkout {0}'.format(checkout_commit))
            tty.msg(co_out)
            return False

        if merge_commit:
            merge_out = git("-c", "user.name=cirepro", "-c",
                            "user.email=user@email.org", "merge",
                            "--no-edit", merge_commit,
                            output=str, error=os.devnull,
                            fail_on_error=False)

            if git.returncode != 0:
                tty.error('Unable to merge {0}'.format(merge_commit))
                tty.msg(merge_out)
                return False

    return True


def reproduce_ci_job(url, work_dir):
    """ Given a url to gitlab artifacts.zip from a failed 'spack ci rebuild' job,
        attempt to setup an environment in which the failure can be reproduced
        locally.  This entails the following:

        First download and extract artifacts.  Then look through those artifacts
        to glean some information needed for the reproduer (e.g. one of the
        artifacts contains information about the version of spack tested by
        gitlab, another is the generated pipeline yaml containing details
        of the job like the docker image used to run it).  The output of this
        function is a set of printed instructions for running docker and then
        commands to run to reproduce the build once inside the container.
    """
    download_and_extract_artifacts(url, work_dir)

    lock_file = fs.find(work_dir, 'spack.lock')[0]
    concrete_env_dir = os.path.dirname(lock_file)

    tty.debug('Concrete environment directory: {0}'.format(
        concrete_env_dir))

    yaml_files = fs.find(work_dir, ['*.yaml', '*.yml'])

    tty.debug('yaml files:')
    for yaml_file in yaml_files:
        tty.debug('  {0}'.format(yaml_file))

    pipeline_yaml = None

    # Try to find the dynamically generated pipeline yaml file in the
    # reproducer.  If the user did not put it in the artifacts root,
    # but rather somewhere else and exported it as an artifact from
    # that location, we won't be able to find it.
    for yf in yaml_files:
        with open(yf) as y_fd:
            yaml_obj = syaml.load(y_fd)
            if 'variables' in yaml_obj and 'stages' in yaml_obj:
                pipeline_yaml = yaml_obj

    if pipeline_yaml:
        tty.debug('\n{0} is likely your pipeline file'.format(yf))

    # Find the install script in the unzipped artifacts and make it executable
    install_script = fs.find(work_dir, 'install.sh')[0]
    st = os.stat(install_script)
    os.chmod(install_script, st.st_mode | stat.S_IEXEC)

    # Find the repro details file.  This just includes some values we wrote
    # during `spack ci rebuild` to make reproduction easier.  E.g. the job
    # name is written here so we can easily find the configuration of the
    # job from the generated pipeline file.
    repro_file = fs.find(work_dir, 'repro.json')[0]
    repro_details = None
    with open(repro_file) as fd:
        repro_details = json.load(fd)

    repro_dir = os.path.dirname(repro_file)
    rel_repro_dir = repro_dir.replace(work_dir, '').lstrip(os.path.sep)

    # Find the spack info text file that should contain the git log
    # of the HEAD commit used during the CI build
    spack_info_file = fs.find(work_dir, 'spack_info.txt')[0]
    with open(spack_info_file) as fd:
        spack_info = fd.read()

    # Access the specific job configuration
    job_name = repro_details['job_name']
    job_yaml = None

    if job_name in pipeline_yaml:
        job_yaml = pipeline_yaml[job_name]

    if job_yaml:
        tty.debug('Found job:')
        tty.debug(job_yaml)

    job_image = None
    setup_result = False
    if 'image' in job_yaml:
        job_image_elt = job_yaml['image']
        if 'name' in job_image_elt:
            job_image = job_image_elt['name']
        else:
            job_image = job_image_elt
        tty.msg('Job ran with the following image: {0}'.format(job_image))

        # Because we found this job was run with a docker image, so we will try
        # to print a "docker run" command that bind-mounts the directory where
        # we extracted the artifacts.

        # Destination of bind-mounted reproduction directory.  It makes for a
        # more faithful reproducer if everything appears to run in the same
        # absolute path used during the CI build.
        mount_as_dir = '/work'
        if repro_details:
            mount_as_dir = repro_details['ci_project_dir']
            mounted_repro_dir = os.path.join(mount_as_dir, rel_repro_dir)

        # We will also try to clone spack from your local checkout and
        # reproduce the state present during the CI build, and put that into
        # the bind-mounted reproducer directory.

        # Regular expressions for parsing that HEAD commit.  If the pipeline
        # was on the gitlab spack mirror, it will have been a merge commit made by
        # gitub and pushed by the sync script.  If the pipeline was run on some
        # environment repo, then the tested spack commit will likely have been
        # a regular commit.
        commit_1 = None
        commit_2 = None
        commit_regex = re.compile(r"commit\s+([^\s]+)")
        merge_commit_regex = re.compile(r"Merge\s+([^\s]+)\s+into\s+([^\s]+)")

        # Try the more specific merge commit regex first
        m = merge_commit_regex.search(spack_info)
        if m:
            # This was a merge commit and we captured the parents
            commit_1 = m.group(1)
            commit_2 = m.group(2)
        else:
            # Not a merge commit, just get the commit sha
            m = commit_regex.search(spack_info)
            if m:
                commit_1 = m.group(1)

        setup_result = False
        if commit_1:
            if commit_2:
                setup_result = setup_spack_repro_version(
                    work_dir, commit_2, merge_commit=commit_1)
            else:
                setup_result = setup_spack_repro_version(work_dir, commit_1)

        if not setup_result:
            setup_msg = """
        This can happen if the spack you are using to run this command is not a git
        repo, or if it is a git repo, but it does not have the commits needed to
        recreate the tested merge commit.  If you are trying to reproduce a spack
        PR pipeline job failure, try fetching the latest develop commits from
        mainline spack and make sure you have the most recent commit of the PR
        branch in your local spack repo.  Then run this command again.
        Alternatively, you can also manually clone spack if you know the version
        you want to test.
            """
            tty.error('Failed to automatically setup the tested version of spack '
                      'in your local reproduction directory.')
            print(setup_msg)

    # In cases where CI build was run on a shell runner, it might be useful
    # to see what tags were applied to the job so the user knows what shell
    # runner was used.  But in that case in general, we cannot do nearly as
    # much to set up the reproducer.
    job_tags = None
    if 'tags' in job_yaml:
        job_tags = job_yaml['tags']
        tty.msg('Job ran with the following tags: {0}'.format(job_tags))

    inst_list = []

    # Finally, print out some instructions to reproduce the build
    if job_image:
        inst_list.append('\nRun the following command:\n\n')
        inst_list.append('    $ docker run --rm -v {0}:{1} -ti {2}\n'.format(
            work_dir, mount_as_dir, job_image))
        inst_list.append('\nOnce inside the container:\n\n')
    else:
        inst_list.append('\nOnce on the tagged runner:\n\n')

    if not setup_result:
        inst_list.append('    - Clone spack and acquire tested commit\n')
        inst_list.append('{0}'.format(spack_info))
        spack_root = '<spack-clone-path>'
    else:
        spack_root = '{0}/spack'.format(mount_as_dir)

    inst_list.append('    - Activate the environment\n\n')
    inst_list.append('        $ source {0}/share/spack/setup-env.sh\n'.format(
        spack_root))
    inst_list.append(
        '        $ spack env activate --without-view {0}\n\n'.format(
            mounted_repro_dir if job_image else repro_dir))
    inst_list.append('    - Run the install script\n\n')
    inst_list.append('        $ {0}\n'.format(
        os.path.join(mounted_repro_dir, 'install.sh')
        if job_image else install_script))

    print(''.join(inst_list))