summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorafzpatel <122491982+afzpatel@users.noreply.github.com>2024-07-24 10:16:15 -0400
committerGitHub <noreply@github.com>2024-07-24 16:16:15 +0200
commite529a454eb2a84388b3bab6154cf47ac1ecb6ef6 (patch)
tree4bd9575d08268ca216e342c645fabc0e2f7f22a0
parent1b5dc396e3591011098814a460171f3834e95757 (diff)
downloadspack-e529a454eb2a84388b3bab6154cf47ac1ecb6ef6.tar.gz
spack-e529a454eb2a84388b3bab6154cf47ac1ecb6ef6.tar.bz2
spack-e529a454eb2a84388b3bab6154cf47ac1ecb6ef6.tar.xz
spack-e529a454eb2a84388b3bab6154cf47ac1ecb6ef6.zip
CI: add ML ROCm stack (#45302)
* add ML ROCm stack * add suggested changes * remove py-torch and py-tensorflow-estimator * add TF_ROCM_AMDGPU_TARGETS env variable and remove packages from pipeline * remove py-jax and py-xgboost
-rw-r--r--share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml23
-rw-r--r--share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml93
-rw-r--r--var/spack/repos/builtin/packages/py-tensorflow/package.py1
3 files changed, 117 insertions, 0 deletions
diff --git a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml
index f81a45a0b5..ce201d402d 100644
--- a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml
+++ b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml
@@ -726,6 +726,29 @@ ml-linux-x86_64-cuda-build:
- artifacts: True
job: ml-linux-x86_64-cuda-generate
+########################################
+# Machine Learning - Linux x86_64 (ROCm)
+########################################
+.ml-linux-x86_64-rocm:
+ extends: [ ".linux_x86_64_v3" ]
+ variables:
+ SPACK_CI_STACK_NAME: ml-linux-x86_64-rocm
+
+ml-linux-x86_64-rocm-generate:
+ extends: [ ".generate-x86_64", .ml-linux-x86_64-rocm, ".tags-x86_64_v4" ]
+ image: ghcr.io/spack/ubuntu-22.04:v2024-05-07
+
+ml-linux-x86_64-rocm-build:
+ extends: [ ".build", ".ml-linux-x86_64-rocm" ]
+ trigger:
+ include:
+ - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml
+ job: ml-linux-x86_64-rocm-generate
+ strategy: depend
+ needs:
+ - artifacts: True
+ job: ml-linux-x86_64-rocm-generate
+
#########################################
# Machine Learning - Darwin aarch64 (MPS)
#########################################
diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml
new file mode 100644
index 0000000000..7a236b136d
--- /dev/null
+++ b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml
@@ -0,0 +1,93 @@
+spack:
+ view: false
+ packages:
+ all:
+ require:
+ - target=x86_64_v3
+ - ~cuda
+ - +rocm
+ - amdgpu_target=gfx90a
+ gl:
+ require: "osmesa"
+ mpi:
+ require: openmpi
+
+ specs:
+ # Horovod
+ # - py-horovod
+
+ # Hugging Face
+ - py-transformers
+
+ # JAX
+ # Does not yet support Spack-installed ROCm
+ # - py-jax
+ # - py-jaxlib
+
+ # Keras
+ - py-keras backend=tensorflow
+ # - py-keras backend=jax
+ # - py-keras backend=torch
+ - py-keras-applications
+ - py-keras-preprocessing
+ - py-keras2onnx
+
+ # PyTorch
+ # Does not yet support Spack-installed ROCm
+ # - py-botorch
+ # - py-efficientnet-pytorch
+ # - py-gpytorch
+ # - py-kornia
+ # - py-lightning
+ # - py-pytorch-gradual-warmup-lr
+ # - py-pytorch-lightning
+ # - py-segmentation-models-pytorch
+ # - py-timm
+ # - py-torch
+ # - py-torch-cluster
+ # - py-torch-geometric
+ # - py-torch-nvidia-apex
+ # - py-torch-scatter
+ # - py-torch-sparse
+ # - py-torch-spline-conv
+ # - py-torchaudio
+ # - py-torchdata
+ # - py-torchfile
+ # - py-torchgeo
+ # - py-torchmetrics
+ # - py-torchtext
+ # - py-torchvision
+ # - py-vector-quantize-pytorch
+
+ # scikit-learn
+ - py-scikit-learn
+ - py-scikit-learn-extra
+
+ # TensorBoard
+ - py-tensorboard
+ - py-tensorboard-data-server
+ - py-tensorboard-plugin-wit
+ - py-tensorboardx
+
+ # TensorFlow
+ - py-tensorflow
+ - py-tensorflow-datasets
+ # version 2.16 is not available
+ # - py-tensorflow-estimator
+ - py-tensorflow-hub
+ - py-tensorflow-metadata
+ - py-tensorflow-probability
+
+ # XGBoost
+ # Does not yet support Spack-installed ROCm
+ # - py-xgboost
+
+ ci:
+ pipeline-gen:
+ - build-job:
+ image:
+ name: ghcr.io/spack/ubuntu-22.04:v2024-05-07
+ entrypoint: ['']
+
+ cdash:
+ build-group: Machine Learning
diff --git a/var/spack/repos/builtin/packages/py-tensorflow/package.py b/var/spack/repos/builtin/packages/py-tensorflow/package.py
index fd152cb89b..1d86e81fd0 100644
--- a/var/spack/repos/builtin/packages/py-tensorflow/package.py
+++ b/var/spack/repos/builtin/packages/py-tensorflow/package.py
@@ -562,6 +562,7 @@ class PyTensorflow(Package, CudaPackage, ROCmPackage, PythonExtension):
for pkg_dep in rocm_dependencies:
pkg_dep_cap = pkg_dep.upper().replace("-", "_")
env.set(f"{pkg_dep_cap}_PATH", spec[pkg_dep].prefix)
+ env.set("TF_ROCM_AMDGPU_TARGETS", ",".join(self.spec.variants["amdgpu_target"].value))
else:
env.set("TF_NEED_ROCM", "0")