From eb67497020f64de7a10f683eda0177a637b4ccf0 Mon Sep 17 00:00:00 2001
From: "Adam J. Stewart" <ajstewart426@gmail.com>
Date: Thu, 22 Dec 2022 11:31:40 -0600
Subject: ML CI: Linux x86_64 (#34299)

* ML CI: Linux x86_64

* Update comments

* Rename again

* Rename comments

* Update to match other arches

* No compiler

* Compiler was wrong anyway

* Faster TF
---
 share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml  | 106 ++++++-------
 .../cloud_pipelines/stacks/ml-cpu/spack.yaml       | 155 -------------------
 .../cloud_pipelines/stacks/ml-cuda/spack.yaml      | 158 -------------------
 .../stacks/ml-linux-x86_64-cpu/spack.yaml          | 164 ++++++++++++++++++++
 .../stacks/ml-linux-x86_64-cuda/spack.yaml         | 167 ++++++++++++++++++++
 .../stacks/ml-linux-x86_64-rocm/spack.yaml         | 172 +++++++++++++++++++++
 .../cloud_pipelines/stacks/ml-rocm/spack.yaml      | 163 -------------------
 7 files changed, 556 insertions(+), 529 deletions(-)
 delete mode 100644 share/spack/gitlab/cloud_pipelines/stacks/ml-cpu/spack.yaml
 delete mode 100644 share/spack/gitlab/cloud_pipelines/stacks/ml-cuda/spack.yaml
 create mode 100644 share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cpu/spack.yaml
 create mode 100644 share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cuda/spack.yaml
 create mode 100644 share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml
 delete mode 100644 share/spack/gitlab/cloud_pipelines/stacks/ml-rocm/spack.yaml

(limited to 'share')

diff --git a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml
index 8122cd7f35..8800387436 100644
--- a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml
+++ b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml
@@ -760,122 +760,122 @@ tutorial-protected-build:
     - artifacts: True
       job: tutorial-protected-generate
 
-########################################
-# Machine Learning (CPU)
-########################################
-.ml-cpu:
+#######################################
+# Machine Learning - Linux x86_64 (CPU)
+#######################################
+.ml-linux-x86_64-cpu:
   variables:
-    SPACK_CI_STACK_NAME: ml-cpu
+    SPACK_CI_STACK_NAME: ml-linux-x86_64-cpu
 
-.ml-cpu-generate:
-  extends: .ml-cpu
+.ml-linux-x86_64-cpu-generate:
+  extends: .ml-linux-x86_64-cpu
   image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21
   tags: ["spack", "aws", "public", "medium", "x86_64_v4"]
 
-ml-cpu-pr-generate:
-  extends: [ ".ml-cpu-generate", ".pr-generate"]
+ml-linux-x86_64-cpu-pr-generate:
+  extends: [ ".ml-linux-x86_64-cpu-generate", ".pr-generate"]
 
-ml-cpu-protected-generate:
-  extends: [ ".ml-cpu-generate", ".protected-generate"]
+ml-linux-x86_64-cpu-protected-generate:
+  extends: [ ".ml-linux-x86_64-cpu-generate", ".protected-generate"]
 
-ml-cpu-pr-build:
-  extends: [ ".ml-cpu", ".pr-build" ]
+ml-linux-x86_64-cpu-pr-build:
+  extends: [ ".ml-linux-x86_64-cpu", ".pr-build" ]
   trigger:
     include:
       - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
-        job: ml-cpu-pr-generate
+        job: ml-linux-x86_64-cpu-pr-generate
     strategy: depend
   needs:
     - artifacts: True
-      job: ml-cpu-pr-generate
+      job: ml-linux-x86_64-cpu-pr-generate
 
-ml-cpu-protected-build:
-  extends: [ ".ml-cpu", ".protected-build" ]
+ml-linux-x86_64-cpu-protected-build:
+  extends: [ ".ml-linux-x86_64-cpu", ".protected-build" ]
   trigger:
     include:
       - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
-        job: ml-cpu-protected-generate
+        job: ml-linux-x86_64-cpu-protected-generate
     strategy: depend
   needs:
     - artifacts: True
-      job: ml-cpu-protected-generate
+      job: ml-linux-x86_64-cpu-protected-generate
 
 ########################################
-# Machine Learning (CUDA)
+# Machine Learning - Linux x86_64 (CUDA)
 ########################################
-.ml-cuda:
+.ml-linux-x86_64-cuda:
   variables:
-    SPACK_CI_STACK_NAME: ml-cuda
+    SPACK_CI_STACK_NAME: ml-linux-x86_64-cuda
 
-.ml-cuda-generate:
-  extends: .ml-cuda
+.ml-linux-x86_64-cuda-generate:
+  extends: .ml-linux-x86_64-cuda
   image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21
   tags: ["spack", "aws", "public", "medium", "x86_64_v4"]
 
-ml-cuda-pr-generate:
-  extends: [ ".ml-cuda-generate", ".pr-generate"]
+ml-linux-x86_64-cuda-pr-generate:
+  extends: [ ".ml-linux-x86_64-cuda-generate", ".pr-generate"]
 
-ml-cuda-protected-generate:
-  extends: [ ".ml-cuda-generate", ".protected-generate"]
+ml-linux-x86_64-cuda-protected-generate:
+  extends: [ ".ml-linux-x86_64-cuda-generate", ".protected-generate"]
 
-ml-cuda-pr-build:
-  extends: [ ".ml-cuda", ".pr-build" ]
+ml-linux-x86_64-cuda-pr-build:
+  extends: [ ".ml-linux-x86_64-cuda", ".pr-build" ]
   trigger:
     include:
       - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
-        job: ml-cuda-pr-generate
+        job: ml-linux-x86_64-cuda-pr-generate
     strategy: depend
   needs:
     - artifacts: True
-      job: ml-cuda-pr-generate
+      job: ml-linux-x86_64-cuda-pr-generate
 
-ml-cuda-protected-build:
-  extends: [ ".ml-cuda", ".protected-build" ]
+ml-linux-x86_64-cuda-protected-build:
+  extends: [ ".ml-linux-x86_64-cuda", ".protected-build" ]
   trigger:
     include:
       - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
-        job: ml-cuda-protected-generate
+        job: ml-linux-x86_64-cuda-protected-generate
     strategy: depend
   needs:
     - artifacts: True
-      job: ml-cuda-protected-generate
+      job: ml-linux-x86_64-cuda-protected-generate
 
 ########################################
-# Machine Learning (ROCm)
+# Machine Learning - Linux x86_64 (ROCm)
 ########################################
-.ml-rocm:
+.ml-linux-x86_64-rocm:
   variables:
-    SPACK_CI_STACK_NAME: ml-rocm
+    SPACK_CI_STACK_NAME: ml-linux-x86_64-rocm
 
-.ml-rocm-generate:
-  extends: .ml-rocm
+.ml-linux-x86_64-rocm-generate:
+  extends: .ml-linux-x86_64-rocm
   image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21
   tags: ["spack", "aws", "public", "medium", "x86_64_v4"]
 
-ml-rocm-pr-generate:
-  extends: [ ".ml-rocm-generate", ".pr-generate"]
+ml-linux-x86_64-rocm-pr-generate:
+  extends: [ ".ml-linux-x86_64-rocm-generate", ".pr-generate"]
 
-ml-rocm-protected-generate:
-  extends: [ ".ml-rocm-generate", ".protected-generate"]
+ml-linux-x86_64-rocm-protected-generate:
+  extends: [ ".ml-linux-x86_64-rocm-generate", ".protected-generate"]
 
-ml-rocm-pr-build:
-  extends: [ ".ml-rocm", ".pr-build" ]
+ml-linux-x86_64-rocm-pr-build:
+  extends: [ ".ml-linux-x86_64-rocm", ".pr-build" ]
   trigger:
     include:
       - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
-        job: ml-rocm-pr-generate
+        job: ml-linux-x86_64-rocm-pr-generate
     strategy: depend
   needs:
     - artifacts: True
-      job: ml-rocm-pr-generate
+      job: ml-linux-x86_64-rocm-pr-generate
 
-ml-rocm-protected-build:
-  extends: [ ".ml-rocm", ".protected-build" ]
+ml-linux-x86_64-rocm-protected-build:
+  extends: [ ".ml-linux-x86_64-rocm", ".protected-build" ]
   trigger:
     include:
       - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
-        job: ml-rocm-protected-generate
+        job: ml-linux-x86_64-rocm-protected-generate
     strategy: depend
   needs:
     - artifacts: True
-      job: ml-rocm-protected-generate
+      job: ml-linux-x86_64-rocm-protected-generate
diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-cpu/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-cpu/spack.yaml
deleted file mode 100644
index ec2ac30b8a..0000000000
--- a/share/spack/gitlab/cloud_pipelines/stacks/ml-cpu/spack.yaml
+++ /dev/null
@@ -1,155 +0,0 @@
-spack:
-  view: false
-
-  concretizer:
-    reuse: false
-    unify: false
-
-  config:
-    build_jobs: 32
-    concretizer: clingo
-    install_tree:
-      root: /home/software/spack
-      padded_length: 384
-      projections:
-        all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}"
-
-  packages:
-    all:
-      compiler: [gcc@11.2.0]
-      target: [x86_64_v3]
-      variants: ~cuda~rocm
-
-  specs:
-    # Horovod
-    - py-horovod
-
-    # Hugging Face
-    - py-transformers
-
-    # JAX
-    - py-jax
-    - py-jaxlib
-
-    # Keras
-    - py-keras
-    - py-keras-applications
-    - py-keras-preprocessing
-    - py-keras2onnx
-
-    # PyTorch
-    - py-botorch
-    - py-efficientnet-pytorch
-    - py-gpytorch
-    - py-kornia
-    - py-pytorch-gradual-warmup-lr
-    - py-pytorch-lightning
-    - py-segmentation-models-pytorch
-    - py-timm
-    - py-torch
-    - py-torch-cluster
-    - py-torch-geometric
-    - py-torch-nvidia-apex
-    - py-torch-scatter
-    - py-torch-sparse
-    - py-torch-spline-conv
-    - py-torchaudio
-    - py-torchdata
-    - py-torchfile
-    - py-torchgeo
-    - py-torchmeta
-    - py-torchmetrics
-    - py-torchtext
-    - py-torchvision
-    - py-vector-quantize-pytorch
-
-    # scikit-learn
-    - py-scikit-learn
-    - py-scikit-learn-extra
-
-    # TensorBoard
-    - py-tensorboard
-    - py-tensorboard-data-server
-    - py-tensorboard-plugin-wit
-    - py-tensorboardx
-
-    # TensorFlow
-    - py-tensorflow
-    - py-tensorflow-datasets
-    - py-tensorflow-estimator
-    - py-tensorflow-hub
-    - py-tensorflow-metadata
-    - py-tensorflow-probability
-
-    # XGBoost
-    - py-xgboost
-    # - r-xgboost
-    - xgboost
-
-  mirrors: { "mirror": "s3://spack-binaries/develop/ml-cpu" }
-
-  gitlab-ci:
-    script:
-      - uname -a || true
-      - grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true
-      - nproc
-      - curl -Lfs 'https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz' -o gmake.tar.gz
-      - printf 'fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz' | sha256sum --check --strict --quiet
-      - tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null
-      - . "./share/spack/setup-env.sh"
-      - spack --version
-      - spack arch
-      - cd ${SPACK_CONCRETE_ENV_DIR}
-      - spack env activate --without-view .
-      - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'"
-      - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data
-      - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi
-      - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi
-      - spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
-    after_script:
-      - cat /proc/loadavg || true
-
-    match_behavior: first
-    mappings:
-      - match:
-          - llvm
-          - py-torch
-        runner-attributes:
-          tags: [ "spack", "huge", "x86_64_v4" ]
-          variables:
-            CI_JOB_SIZE: huge
-            KUBERNETES_CPU_REQUEST: 11000m
-            KUBERNETES_MEMORY_REQUEST: 42G
-      - match:
-          - "@:"
-        runner-attributes:
-          tags: [ "spack", "large", "x86_64_v4" ]
-          variables:
-            CI_JOB_SIZE: large
-            KUBERNETES_CPU_REQUEST: 8000m
-            KUBERNETES_MEMORY_REQUEST: 12G
-
-    image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
-
-    broken-specs-url: "s3://spack-binaries/broken-specs"
-
-    service-job-attributes:
-      before_script:
-        - . "./share/spack/setup-env.sh"
-        - spack --version
-      image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
-      tags: ["spack", "public", "x86_64_v4"]
-
-    signing-job-attributes:
-      image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] }
-      tags: ["spack", "aws"]
-      script:
-        - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp
-        - /sign.sh
-        - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache
-
-  cdash:
-    build-group: Machine Learning
-    url: https://cdash.spack.io
-    project: Spack Testing
-    site: Cloud Gitlab Infrastructure
diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-cuda/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-cuda/spack.yaml
deleted file mode 100644
index d0c9823c40..0000000000
--- a/share/spack/gitlab/cloud_pipelines/stacks/ml-cuda/spack.yaml
+++ /dev/null
@@ -1,158 +0,0 @@
-spack:
-  view: false
-
-  concretizer:
-    reuse: false
-    unify: false
-
-  config:
-    build_jobs: 32
-    concretizer: clingo
-    install_tree:
-      root: /home/software/spack
-      padded_length: 384
-      projections:
-        all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}"
-
-  packages:
-    all:
-      compiler: [gcc@11.2.0]
-      target: [x86_64_v3]
-      variants: ~rocm+cuda cuda_arch=80
-    llvm:
-      # https://github.com/spack/spack/issues/27999
-      require: ~cuda
-
-  specs:
-    # Horovod
-    - py-horovod
-
-    # Hugging Face
-    - py-transformers
-
-    # JAX
-    - py-jax
-    - py-jaxlib
-
-    # Keras
-    - py-keras
-    - py-keras-applications
-    - py-keras-preprocessing
-    - py-keras2onnx
-
-    # PyTorch
-    - py-botorch
-    - py-efficientnet-pytorch
-    - py-gpytorch
-    - py-kornia
-    - py-pytorch-gradual-warmup-lr
-    - py-pytorch-lightning
-    - py-segmentation-models-pytorch
-    - py-timm
-    - py-torch
-    - py-torch-cluster
-    - py-torch-geometric
-    - py-torch-nvidia-apex
-    - py-torch-scatter
-    - py-torch-sparse
-    - py-torch-spline-conv
-    - py-torchaudio
-    - py-torchdata
-    - py-torchfile
-    - py-torchgeo
-    - py-torchmeta
-    - py-torchmetrics
-    - py-torchtext
-    - py-torchvision
-    - py-vector-quantize-pytorch
-
-    # scikit-learn
-    - py-scikit-learn
-    - py-scikit-learn-extra
-
-    # TensorBoard
-    - py-tensorboard
-    - py-tensorboard-data-server
-    - py-tensorboard-plugin-wit
-    - py-tensorboardx
-
-    # TensorFlow
-    - py-tensorflow
-    - py-tensorflow-datasets
-    - py-tensorflow-estimator
-    - py-tensorflow-hub
-    - py-tensorflow-metadata
-    - py-tensorflow-probability
-
-    # XGBoost
-    - py-xgboost
-    # - r-xgboost
-    - xgboost
-
-  mirrors: { "mirror": "s3://spack-binaries/develop/ml-cuda" }
-
-  gitlab-ci:
-    script:
-      - uname -a || true
-      - grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true
-      - nproc
-      - curl -Lfs 'https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz' -o gmake.tar.gz
-      - printf 'fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz' | sha256sum --check --strict --quiet
-      - tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null
-      - . "./share/spack/setup-env.sh"
-      - spack --version
-      - spack arch
-      - cd ${SPACK_CONCRETE_ENV_DIR}
-      - spack env activate --without-view .
-      - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'"
-      - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data
-      - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi
-      - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi
-      - spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
-    after_script:
-      - cat /proc/loadavg || true
-
-    match_behavior: first
-    mappings:
-      - match:
-          - llvm
-          - py-torch
-        runner-attributes:
-          tags: [ "spack", "huge", "x86_64_v4" ]
-          variables:
-            CI_JOB_SIZE: huge
-            KUBERNETES_CPU_REQUEST: 11000m
-            KUBERNETES_MEMORY_REQUEST: 42G
-      - match:
-          - "@:"
-        runner-attributes:
-          tags: [ "spack", "large", "x86_64_v4" ]
-          variables:
-            CI_JOB_SIZE: large
-            KUBERNETES_CPU_REQUEST: 8000m
-            KUBERNETES_MEMORY_REQUEST: 12G
-
-    image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
-
-    broken-specs-url: "s3://spack-binaries/broken-specs"
-
-    service-job-attributes:
-      before_script:
-        - . "./share/spack/setup-env.sh"
-        - spack --version
-      image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
-      tags: ["spack", "public", "x86_64_v4"]
-
-    signing-job-attributes:
-      image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] }
-      tags: ["spack", "aws"]
-      script:
-        - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp
-        - /sign.sh
-        - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache
-
-  cdash:
-    build-group: Machine Learning
-    url: https://cdash.spack.io
-    project: Spack Testing
-    site: Cloud Gitlab Infrastructure
diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cpu/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cpu/spack.yaml
new file mode 100644
index 0000000000..bfd5020cea
--- /dev/null
+++ b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cpu/spack.yaml
@@ -0,0 +1,164 @@
+spack:
+  view: false
+
+  concretizer:
+    reuse: false
+    unify: false
+
+  config:
+    build_jobs: 32
+    concretizer: clingo
+    install_tree:
+      root: /home/software/spack
+      padded_length: 384
+      projections:
+        all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}"
+
+  packages:
+    all:
+      target: [x86_64_v3]
+      variants: ~cuda~rocm
+
+  definitions:
+    - packages:
+      # Horovod
+      - py-horovod
+
+      # Hugging Face
+      - py-transformers
+
+      # JAX
+      - py-jax
+      - py-jaxlib
+
+      # Keras
+      - py-keras
+      - py-keras-applications
+      - py-keras-preprocessing
+      - py-keras2onnx
+
+      # PyTorch
+      - py-botorch
+      - py-efficientnet-pytorch
+      - py-gpytorch
+      - py-kornia
+      - py-pytorch-gradual-warmup-lr
+      - py-pytorch-lightning
+      - py-segmentation-models-pytorch
+      - py-timm
+      - py-torch
+      - py-torch-cluster
+      - py-torch-geometric
+      - py-torch-nvidia-apex
+      - py-torch-scatter
+      - py-torch-sparse
+      - py-torch-spline-conv
+      - py-torchaudio
+      - py-torchdata
+      - py-torchfile
+      - py-torchgeo
+      - py-torchmeta
+      - py-torchmetrics
+      - py-torchtext
+      - py-torchvision
+      - py-vector-quantize-pytorch
+
+      # scikit-learn
+      - py-scikit-learn
+      - py-scikit-learn-extra
+
+      # TensorBoard
+      - py-tensorboard
+      - py-tensorboard-data-server
+      - py-tensorboard-plugin-wit
+      - py-tensorboardx
+
+      # TensorFlow
+      - py-tensorflow
+      - py-tensorflow-datasets
+      - py-tensorflow-estimator
+      - py-tensorflow-hub
+      - py-tensorflow-metadata
+      - py-tensorflow-probability
+
+      # XGBoost
+      - py-xgboost
+      # - r-xgboost
+      - xgboost
+
+    - arch:
+      - target=x86_64_v3
+
+  specs:
+    - matrix:
+      - [$packages]
+      - [$arch]
+
+  mirrors: { "mirror": "s3://spack-binaries/develop/ml-linux-x86_64-cpu" }
+
+  gitlab-ci:
+    script:
+      - uname -a || true
+      - grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true
+      - nproc
+      - curl -Lfs 'https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz' -o gmake.tar.gz
+      - printf 'fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz' | sha256sum --check --strict --quiet
+      - tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null
+      - . "./share/spack/setup-env.sh"
+      - spack --version
+      - spack arch
+      - cd ${SPACK_CONCRETE_ENV_DIR}
+      - spack env activate --without-view .
+      - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'"
+      - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data
+      - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi
+      - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi
+      - spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
+    after_script:
+      - cat /proc/loadavg || true
+
+    match_behavior: first
+    mappings:
+      - match:
+          - llvm
+          - py-tensorflow
+          - py-torch
+        runner-attributes:
+          tags: [ "spack", "huge", "x86_64_v4" ]
+          variables:
+            CI_JOB_SIZE: huge
+            KUBERNETES_CPU_REQUEST: 11000m
+            KUBERNETES_MEMORY_REQUEST: 42G
+      - match:
+          - "@:"
+        runner-attributes:
+          tags: [ "spack", "large", "x86_64_v4" ]
+          variables:
+            CI_JOB_SIZE: large
+            KUBERNETES_CPU_REQUEST: 8000m
+            KUBERNETES_MEMORY_REQUEST: 12G
+
+    image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
+
+    broken-specs-url: "s3://spack-binaries/broken-specs"
+
+    service-job-attributes:
+      before_script:
+        - . "./share/spack/setup-env.sh"
+        - spack --version
+      image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
+      tags: ["spack", "public", "x86_64_v4"]
+
+    signing-job-attributes:
+      image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] }
+      tags: ["spack", "aws"]
+      script:
+        - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp
+        - /sign.sh
+        - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache
+
+  cdash:
+    build-group: Machine Learning
+    url: https://cdash.spack.io
+    project: Spack Testing
+    site: Cloud Gitlab Infrastructure
diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cuda/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cuda/spack.yaml
new file mode 100644
index 0000000000..4ab8e3698b
--- /dev/null
+++ b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cuda/spack.yaml
@@ -0,0 +1,167 @@
+spack:
+  view: false
+
+  concretizer:
+    reuse: false
+    unify: false
+
+  config:
+    build_jobs: 32
+    concretizer: clingo
+    install_tree:
+      root: /home/software/spack
+      padded_length: 384
+      projections:
+        all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}"
+
+  packages:
+    all:
+      target: [x86_64_v3]
+      variants: ~rocm+cuda cuda_arch=80
+    llvm:
+      # https://github.com/spack/spack/issues/27999
+      require: ~cuda
+
+  definitions:
+    - packages:
+      # Horovod
+      - py-horovod
+
+      # Hugging Face
+      - py-transformers
+
+      # JAX
+      - py-jax
+      - py-jaxlib
+
+      # Keras
+      - py-keras
+      - py-keras-applications
+      - py-keras-preprocessing
+      - py-keras2onnx
+
+      # PyTorch
+      - py-botorch
+      - py-efficientnet-pytorch
+      - py-gpytorch
+      - py-kornia
+      - py-pytorch-gradual-warmup-lr
+      - py-pytorch-lightning
+      - py-segmentation-models-pytorch
+      - py-timm
+      - py-torch
+      - py-torch-cluster
+      - py-torch-geometric
+      - py-torch-nvidia-apex
+      - py-torch-scatter
+      - py-torch-sparse
+      - py-torch-spline-conv
+      - py-torchaudio
+      - py-torchdata
+      - py-torchfile
+      - py-torchgeo
+      - py-torchmeta
+      - py-torchmetrics
+      - py-torchtext
+      - py-torchvision
+      - py-vector-quantize-pytorch
+
+      # scikit-learn
+      - py-scikit-learn
+      - py-scikit-learn-extra
+
+      # TensorBoard
+      - py-tensorboard
+      - py-tensorboard-data-server
+      - py-tensorboard-plugin-wit
+      - py-tensorboardx
+
+      # TensorFlow
+      - py-tensorflow
+      - py-tensorflow-datasets
+      - py-tensorflow-estimator
+      - py-tensorflow-hub
+      - py-tensorflow-metadata
+      - py-tensorflow-probability
+
+      # XGBoost
+      - py-xgboost
+      # - r-xgboost
+      - xgboost
+
+    - arch:
+      - target=x86_64_v3
+
+  specs:
+    - matrix:
+      - [$packages]
+      - [$arch]
+
+  mirrors: { "mirror": "s3://spack-binaries/develop/ml-linux-x86_64-cuda" }
+
+  gitlab-ci:
+    script:
+      - uname -a || true
+      - grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true
+      - nproc
+      - curl -Lfs 'https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz' -o gmake.tar.gz
+      - printf 'fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz' | sha256sum --check --strict --quiet
+      - tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null
+      - . "./share/spack/setup-env.sh"
+      - spack --version
+      - spack arch
+      - cd ${SPACK_CONCRETE_ENV_DIR}
+      - spack env activate --without-view .
+      - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'"
+      - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data
+      - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi
+      - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi
+      - spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
+    after_script:
+      - cat /proc/loadavg || true
+
+    match_behavior: first
+    mappings:
+      - match:
+          - llvm
+          - py-tensorflow
+          - py-torch
+        runner-attributes:
+          tags: [ "spack", "huge", "x86_64_v4" ]
+          variables:
+            CI_JOB_SIZE: huge
+            KUBERNETES_CPU_REQUEST: 11000m
+            KUBERNETES_MEMORY_REQUEST: 42G
+      - match:
+          - "@:"
+        runner-attributes:
+          tags: [ "spack", "large", "x86_64_v4" ]
+          variables:
+            CI_JOB_SIZE: large
+            KUBERNETES_CPU_REQUEST: 8000m
+            KUBERNETES_MEMORY_REQUEST: 12G
+
+    image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
+
+    broken-specs-url: "s3://spack-binaries/broken-specs"
+
+    service-job-attributes:
+      before_script:
+        - . "./share/spack/setup-env.sh"
+        - spack --version
+      image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
+      tags: ["spack", "public", "x86_64_v4"]
+
+    signing-job-attributes:
+      image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] }
+      tags: ["spack", "aws"]
+      script:
+        - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp
+        - /sign.sh
+        - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache
+
+  cdash:
+    build-group: Machine Learning
+    url: https://cdash.spack.io
+    project: Spack Testing
+    site: Cloud Gitlab Infrastructure
diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml
new file mode 100644
index 0000000000..29cf0aabe0
--- /dev/null
+++ b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml
@@ -0,0 +1,172 @@
+spack:
+  view: false
+
+  concretizer:
+    reuse: false
+    unify: false
+
+  config:
+    build_jobs: 32
+    concretizer: clingo
+    install_tree:
+      root: /home/software/spack
+      padded_length: 384
+      projections:
+        all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}"
+
+  packages:
+    all:
+      target: [x86_64_v3]
+      variants: ~cuda+rocm amdgpu_target=gfx90a
+    gl:
+      require: "osmesa"
+    py-torch:
+      # Does not yet support Spack-installed ROCm
+      require: ~rocm
+
+  definitions:
+    - packages:
+      # Horovod
+      - py-horovod
+
+      # Hugging Face
+      - py-transformers
+
+      # JAX
+      - py-jax
+      - py-jaxlib
+
+      # Keras
+      - py-keras
+      - py-keras-applications
+      - py-keras-preprocessing
+      - py-keras2onnx
+
+      # PyTorch
+      # Does not yet support Spack-install ROCm
+      # - py-botorch
+      # - py-efficientnet-pytorch
+      # - py-gpytorch
+      # - py-kornia
+      # - py-pytorch-gradual-warmup-lr
+      # - py-pytorch-lightning
+      # - py-segmentation-models-pytorch
+      # - py-timm
+      # - py-torch
+      # - py-torch-cluster
+      # - py-torch-geometric
+      # - py-torch-nvidia-apex
+      # - py-torch-scatter
+      # - py-torch-sparse
+      # - py-torch-spline-conv
+      # - py-torchaudio
+      # - py-torchdata
+      # - py-torchfile
+      # - py-torchgeo
+      # - py-torchmeta
+      # - py-torchmetrics
+      # - py-torchtext
+      # - py-torchvision
+      # - py-vector-quantize-pytorch
+
+      # scikit-learn
+      - py-scikit-learn
+      - py-scikit-learn-extra
+
+      # TensorBoard
+      - py-tensorboard
+      - py-tensorboard-data-server
+      - py-tensorboard-plugin-wit
+      - py-tensorboardx
+
+      # TensorFlow
+      - py-tensorflow
+      - py-tensorflow-datasets
+      - py-tensorflow-estimator
+      - py-tensorflow-hub
+      - py-tensorflow-metadata
+      - py-tensorflow-probability
+
+      # XGBoost
+      - py-xgboost
+      # - r-xgboost
+      - xgboost
+
+    - arch:
+      - target=x86_64_v3
+
+  specs:
+    - matrix:
+      - [$packages]
+      - [$arch]
+
+  mirrors: { "mirror": "s3://spack-binaries/develop/ml-linux-x86_64-rocm" }
+
+  gitlab-ci:
+    script:
+      - uname -a || true
+      - grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true
+      - nproc
+      - curl -Lfs 'https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz' -o gmake.tar.gz
+      - printf 'fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz' | sha256sum --check --strict --quiet
+      - tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null
+      - . "./share/spack/setup-env.sh"
+      - spack --version
+      - spack arch
+      - cd ${SPACK_CONCRETE_ENV_DIR}
+      - spack env activate --without-view .
+      - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'"
+      - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data
+      - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi
+      - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi
+      - spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
+    after_script:
+      - cat /proc/loadavg || true
+
+    match_behavior: first
+    mappings:
+      - match:
+          - llvm
+          - llvm-amdgpu
+          - py-tensorflow
+          - py-torch
+          - rocblas
+        runner-attributes:
+          tags: [ "spack", "huge", "x86_64_v4" ]
+          variables:
+            CI_JOB_SIZE: huge
+            KUBERNETES_CPU_REQUEST: 11000m
+            KUBERNETES_MEMORY_REQUEST: 42G
+      - match:
+          - "@:"
+        runner-attributes:
+          tags: [ "spack", "large", "x86_64_v4" ]
+          variables:
+            CI_JOB_SIZE: large
+            KUBERNETES_CPU_REQUEST: 8000m
+            KUBERNETES_MEMORY_REQUEST: 12G
+
+    image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
+
+    broken-specs-url: "s3://spack-binaries/broken-specs"
+
+    service-job-attributes:
+      before_script:
+        - . "./share/spack/setup-env.sh"
+        - spack --version
+      image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
+      tags: ["spack", "public", "x86_64_v4"]
+
+    signing-job-attributes:
+      image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] }
+      tags: ["spack", "aws"]
+      script:
+        - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp
+        - /sign.sh
+        - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache
+
+  cdash:
+    build-group: Machine Learning
+    url: https://cdash.spack.io
+    project: Spack Testing
+    site: Cloud Gitlab Infrastructure
diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-rocm/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-rocm/spack.yaml
deleted file mode 100644
index 601327ec7e..0000000000
--- a/share/spack/gitlab/cloud_pipelines/stacks/ml-rocm/spack.yaml
+++ /dev/null
@@ -1,163 +0,0 @@
-spack:
-  view: false
-
-  concretizer:
-    reuse: false
-    unify: false
-
-  config:
-    build_jobs: 32
-    concretizer: clingo
-    install_tree:
-      root: /home/software/spack
-      padded_length: 384
-      projections:
-        all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}"
-
-  packages:
-    all:
-      compiler: [gcc@11.2.0]
-      target: [x86_64_v3]
-      variants: ~cuda+rocm amdgpu_target=gfx90a
-    gl:
-      require: "osmesa"
-    py-torch:
-      # Does not yet support Spack-installed ROCm
-      require: ~rocm
-
-  specs:
-    # Horovod
-    - py-horovod
-
-    # Hugging Face
-    - py-transformers
-
-    # JAX
-    - py-jax
-    - py-jaxlib
-
-    # Keras
-    - py-keras
-    - py-keras-applications
-    - py-keras-preprocessing
-    - py-keras2onnx
-
-    # PyTorch
-    # Does not yet support Spack-install ROCm
-    # - py-botorch
-    # - py-efficientnet-pytorch
-    # - py-gpytorch
-    # - py-kornia
-    # - py-pytorch-gradual-warmup-lr
-    # - py-pytorch-lightning
-    # - py-segmentation-models-pytorch
-    # - py-timm
-    # - py-torch
-    # - py-torch-cluster
-    # - py-torch-geometric
-    # - py-torch-nvidia-apex
-    # - py-torch-scatter
-    # - py-torch-sparse
-    # - py-torch-spline-conv
-    # - py-torchaudio
-    # - py-torchdata
-    # - py-torchfile
-    # - py-torchgeo
-    # - py-torchmeta
-    # - py-torchmetrics
-    # - py-torchtext
-    # - py-torchvision
-    # - py-vector-quantize-pytorch
-
-    # scikit-learn
-    - py-scikit-learn
-    - py-scikit-learn-extra
-
-    # TensorBoard
-    - py-tensorboard
-    - py-tensorboard-data-server
-    - py-tensorboard-plugin-wit
-    - py-tensorboardx
-
-    # TensorFlow
-    - py-tensorflow
-    - py-tensorflow-datasets
-    - py-tensorflow-estimator
-    - py-tensorflow-hub
-    - py-tensorflow-metadata
-    - py-tensorflow-probability
-
-    # XGBoost
-    - py-xgboost
-    # - r-xgboost
-    - xgboost
-
-  mirrors: { "mirror": "s3://spack-binaries/develop/ml-rocm" }
-
-  gitlab-ci:
-    script:
-      - uname -a || true
-      - grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true
-      - nproc
-      - curl -Lfs 'https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz' -o gmake.tar.gz
-      - printf 'fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz' | sha256sum --check --strict --quiet
-      - tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null
-      - . "./share/spack/setup-env.sh"
-      - spack --version
-      - spack arch
-      - cd ${SPACK_CONCRETE_ENV_DIR}
-      - spack env activate --without-view .
-      - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'"
-      - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data
-      - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi
-      - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi
-      - spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
-    after_script:
-      - cat /proc/loadavg || true
-
-    match_behavior: first
-    mappings:
-      - match:
-          - llvm-amdgpu
-          - llvm
-          - py-torch
-          - rocblas
-        runner-attributes:
-          tags: [ "spack", "huge", "x86_64_v4" ]
-          variables:
-            CI_JOB_SIZE: huge
-            KUBERNETES_CPU_REQUEST: 11000m
-            KUBERNETES_MEMORY_REQUEST: 42G
-      - match:
-          - "@:"
-        runner-attributes:
-          tags: [ "spack", "large", "x86_64_v4" ]
-          variables:
-            CI_JOB_SIZE: large
-            KUBERNETES_CPU_REQUEST: 8000m
-            KUBERNETES_MEMORY_REQUEST: 12G
-
-    image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
-
-    broken-specs-url: "s3://spack-binaries/broken-specs"
-
-    service-job-attributes:
-      before_script:
-        - . "./share/spack/setup-env.sh"
-        - spack --version
-      image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
-      tags: ["spack", "public", "x86_64_v4"]
-
-    signing-job-attributes:
-      image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] }
-      tags: ["spack", "aws"]
-      script:
-        - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp
-        - /sign.sh
-        - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache
-
-  cdash:
-    build-group: Machine Learning
-    url: https://cdash.spack.io
-    project: Spack Testing
-    site: Cloud Gitlab Infrastructure
-- 
cgit v1.2.3-70-g09d2