diff options
author | afzpatel <122491982+afzpatel@users.noreply.github.com> | 2024-07-24 10:16:15 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-24 16:16:15 +0200 |
commit | e529a454eb2a84388b3bab6154cf47ac1ecb6ef6 (patch) | |
tree | 4bd9575d08268ca216e342c645fabc0e2f7f22a0 | |
parent | 1b5dc396e3591011098814a460171f3834e95757 (diff) | |
download | spack-e529a454eb2a84388b3bab6154cf47ac1ecb6ef6.tar.gz spack-e529a454eb2a84388b3bab6154cf47ac1ecb6ef6.tar.bz2 spack-e529a454eb2a84388b3bab6154cf47ac1ecb6ef6.tar.xz spack-e529a454eb2a84388b3bab6154cf47ac1ecb6ef6.zip |
CI: add ML ROCm stack (#45302)
* add ML ROCm stack
* add suggested changes
* remove py-torch and py-tensorflow-estimator
* add TF_ROCM_AMDGPU_TARGETS env variable and remove packages from pipeline
* remove py-jax and py-xgboost
3 files changed, 117 insertions, 0 deletions
diff --git a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml index f81a45a0b5..ce201d402d 100644 --- a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml +++ b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml @@ -726,6 +726,29 @@ ml-linux-x86_64-cuda-build: - artifacts: True job: ml-linux-x86_64-cuda-generate +######################################## +# Machine Learning - Linux x86_64 (ROCm) +######################################## +.ml-linux-x86_64-rocm: + extends: [ ".linux_x86_64_v3" ] + variables: + SPACK_CI_STACK_NAME: ml-linux-x86_64-rocm + +ml-linux-x86_64-rocm-generate: + extends: [ ".generate-x86_64", .ml-linux-x86_64-rocm, ".tags-x86_64_v4" ] + image: ghcr.io/spack/ubuntu-22.04:v2024-05-07 + +ml-linux-x86_64-rocm-build: + extends: [ ".build", ".ml-linux-x86_64-rocm" ] + trigger: + include: + - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml + job: ml-linux-x86_64-rocm-generate + strategy: depend + needs: + - artifacts: True + job: ml-linux-x86_64-rocm-generate + ######################################### # Machine Learning - Darwin aarch64 (MPS) ######################################### diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml new file mode 100644 index 0000000000..7a236b136d --- /dev/null +++ b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -0,0 +1,93 @@ +spack: + view: false + packages: + all: + require: + - target=x86_64_v3 + - ~cuda + - +rocm + - amdgpu_target=gfx90a + gl: + require: "osmesa" + mpi: + require: openmpi + + specs: + # Horovod + # - py-horovod + + # Hugging Face + - py-transformers + + # JAX + # Does not yet support Spack-installed ROCm + # - py-jax + # - py-jaxlib + + # Keras + - py-keras backend=tensorflow + # - py-keras backend=jax + # - py-keras backend=torch + - py-keras-applications + - py-keras-preprocessing + - py-keras2onnx + + # PyTorch + # Does not yet support Spack-installed ROCm + # - py-botorch + # - py-efficientnet-pytorch + # - py-gpytorch + # - py-kornia + # - py-lightning + # - py-pytorch-gradual-warmup-lr + # - py-pytorch-lightning + # - py-segmentation-models-pytorch + # - py-timm + # - py-torch + # - py-torch-cluster + # - py-torch-geometric + # - py-torch-nvidia-apex + # - py-torch-scatter + # - py-torch-sparse + # - py-torch-spline-conv + # - py-torchaudio + # - py-torchdata + # - py-torchfile + # - py-torchgeo + # - py-torchmetrics + # - py-torchtext + # - py-torchvision + # - py-vector-quantize-pytorch + + # scikit-learn + - py-scikit-learn + - py-scikit-learn-extra + + # TensorBoard + - py-tensorboard + - py-tensorboard-data-server + - py-tensorboard-plugin-wit + - py-tensorboardx + + # TensorFlow + - py-tensorflow + - py-tensorflow-datasets + # version 2.16 is not available + # - py-tensorflow-estimator + - py-tensorflow-hub + - py-tensorflow-metadata + - py-tensorflow-probability + + # XGBoost + # Does not yet support Spack-installed ROCm + # - py-xgboost + + ci: + pipeline-gen: + - build-job: + image: + name: ghcr.io/spack/ubuntu-22.04:v2024-05-07 + entrypoint: [''] + + cdash: + build-group: Machine Learning diff --git a/var/spack/repos/builtin/packages/py-tensorflow/package.py b/var/spack/repos/builtin/packages/py-tensorflow/package.py index fd152cb89b..1d86e81fd0 100644 --- a/var/spack/repos/builtin/packages/py-tensorflow/package.py +++ b/var/spack/repos/builtin/packages/py-tensorflow/package.py @@ -562,6 +562,7 @@ class PyTensorflow(Package, CudaPackage, ROCmPackage, PythonExtension): for pkg_dep in rocm_dependencies: pkg_dep_cap = pkg_dep.upper().replace("-", "_") env.set(f"{pkg_dep_cap}_PATH", spec[pkg_dep].prefix) + env.set("TF_ROCM_AMDGPU_TARGETS", ",".join(self.spec.variants["amdgpu_target"].value)) else: env.set("TF_NEED_ROCM", "0") |