diff options
author | Adam J. Stewart <ajstewart426@gmail.com> | 2024-10-28 10:30:07 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-10-28 10:30:07 +0100 |
commit | 32ce278a51cf046a2e3034ffb5f9b69a84361e61 (patch) | |
tree | 31ac0371ed2d0976b5cf2e69dc3ff01fe6e8c389 /share | |
parent | e83536de380e66a8419825ada3231be79da7853a (diff) | |
download | spack-32ce278a51cf046a2e3034ffb5f9b69a84361e61.tar.gz spack-32ce278a51cf046a2e3034ffb5f9b69a84361e61.tar.bz2 spack-32ce278a51cf046a2e3034ffb5f9b69a84361e61.tar.xz spack-32ce278a51cf046a2e3034ffb5f9b69a84361e61.zip |
ML CI: Linux aarch64 (#39666)
* ML CI: Linux aarch64
* Add config files
* No aarch64 tag
* Don't specify image
* Use amazonlinux image
Co-authored-by: kwryankrattiger <80296582+kwryankrattiger@users.noreply.github.com>
* Update and require
* GCC is too old
* Fix some builds
* xgboost doesn't support old GCC + cuda
* Run on newer Ubuntu
* Remove mxnet
* Try aarch64 range
* Use main branch
* Conflict applies to all targets
* cuda only required when +cuda
* Use tagged version
* Comment out tf-estimator
* Add ROCm, use newer Ubuntu
* Remove ROCm
---------
Co-authored-by: kwryankrattiger <80296582+kwryankrattiger@users.noreply.github.com>
Diffstat (limited to 'share')
3 files changed, 222 insertions, 0 deletions
diff --git a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml index 8997ae5b24..f082b3b413 100644 --- a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml +++ b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml @@ -735,6 +735,52 @@ ml-linux-x86_64-rocm-build: - artifacts: True job: ml-linux-x86_64-rocm-generate +######################################## +# Machine Learning - Linux aarch64 (CPU) +######################################## +.ml-linux-aarch64-cpu: + extends: [ ".linux_aarch64" ] + variables: + SPACK_CI_STACK_NAME: ml-linux-aarch64-cpu + +ml-linux-aarch64-cpu-generate: + extends: [ ".generate-aarch64", .ml-linux-aarch64-cpu ] + image: ghcr.io/spack/ubuntu-24.04:v2024-09-05-v2 + +ml-linux-aarch64-cpu-build: + extends: [ ".build", ".ml-linux-aarch64-cpu" ] + trigger: + include: + - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml + job: ml-linux-aarch64-cpu-generate + strategy: depend + needs: + - artifacts: True + job: ml-linux-aarch64-cpu-generate + +######################################### +# Machine Learning - Linux aarch64 (CUDA) +######################################### +.ml-linux-aarch64-cuda: + extends: [ ".linux_aarch64" ] + variables: + SPACK_CI_STACK_NAME: ml-linux-aarch64-cuda + +ml-linux-aarch64-cuda-generate: + extends: [ ".generate-aarch64", .ml-linux-aarch64-cuda ] + image: ghcr.io/spack/ubuntu-24.04:v2024-09-05-v2 + +ml-linux-aarch64-cuda-build: + extends: [ ".build", ".ml-linux-aarch64-cuda" ] + trigger: + include: + - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml + job: ml-linux-aarch64-cuda-generate + strategy: depend + needs: + - artifacts: True + job: ml-linux-aarch64-cuda-generate + ######################################### # Machine Learning - Darwin aarch64 (MPS) ######################################### diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-aarch64-cpu/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-aarch64-cpu/spack.yaml new file mode 100644 index 0000000000..23ed6aa665 --- /dev/null +++ b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-aarch64-cpu/spack.yaml @@ -0,0 +1,85 @@ +spack: + view: false + packages: + all: + require: + - target=aarch64 + - ~cuda + - ~rocm + mpi: + require: openmpi + + specs: + # Horovod + - py-horovod + + # Hugging Face + - py-transformers + + # JAX + - py-jax + - py-jaxlib + + # Keras + - py-keras backend=tensorflow + - py-keras backend=jax + - py-keras backend=torch + - py-keras-applications + - py-keras-preprocessing + - py-keras2onnx + + # PyTorch + - py-botorch + - py-efficientnet-pytorch + - py-gpytorch + - py-kornia + - py-lightning + - py-pytorch-gradual-warmup-lr + - py-pytorch-lightning + - py-segmentation-models-pytorch + - py-timm + - py-torch + - py-torch-cluster + - py-torch-geometric + - py-torch-nvidia-apex + - py-torch-scatter + - py-torch-sparse + - py-torch-spline-conv + - py-torchaudio + - py-torchdata + - py-torchfile + - py-torchgeo + - py-torchmetrics + - py-torchtext + - py-torchvision + - py-vector-quantize-pytorch + + # scikit-learn + - py-scikit-learn + - py-scikit-learn-extra + + # TensorBoard + - py-tensorboard + - py-tensorboard-data-server + - py-tensorboard-plugin-wit + - py-tensorboardx + + # TensorFlow + - py-tensorflow + - py-tensorflow-datasets + - py-tensorflow-hub + - py-tensorflow-metadata + - py-tensorflow-probability + + # XGBoost + - py-xgboost + + ci: + pipeline-gen: + - build-job: + image: + name: ghcr.io/spack/ubuntu-24.04:v2024-09-05-v2 + entrypoint: [''] + + cdash: + build-group: Machine Learning diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-aarch64-cuda/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-aarch64-cuda/spack.yaml new file mode 100644 index 0000000000..47f4eda0f1 --- /dev/null +++ b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-aarch64-cuda/spack.yaml @@ -0,0 +1,91 @@ +spack: + view: false + packages: + all: + require: + - target=aarch64 + - ~rocm + - +cuda + - cuda_arch=80 + llvm: + # https://github.com/spack/spack/issues/27999 + require: ~cuda + mpi: + require: openmpi + + specs: + # Horovod + - py-horovod + + # Hugging Face + - py-transformers + + # JAX + - py-jax + - py-jaxlib + + # Keras + - py-keras backend=tensorflow + - py-keras backend=jax + - py-keras backend=torch + - py-keras-applications + - py-keras-preprocessing + - py-keras2onnx + + # PyTorch + - py-botorch + - py-efficientnet-pytorch + - py-gpytorch + - py-kornia + - py-lightning + - py-pytorch-gradual-warmup-lr + - py-pytorch-lightning + - py-segmentation-models-pytorch + - py-timm + - py-torch + - py-torch-cluster + - py-torch-geometric + - py-torch-nvidia-apex + - py-torch-scatter + - py-torch-sparse + - py-torch-spline-conv + - py-torchaudio + - py-torchdata + - py-torchfile + - py-torchgeo + - py-torchmetrics + # torchtext requires older pytorch, which requires older cuda, which doesn't support newer GCC + # - py-torchtext + - py-torchvision + - py-vector-quantize-pytorch + + # scikit-learn + - py-scikit-learn + - py-scikit-learn-extra + + # TensorBoard + - py-tensorboard + - py-tensorboard-data-server + - py-tensorboard-plugin-wit + - py-tensorboardx + + # TensorFlow + - py-tensorflow + - py-tensorflow-datasets + - py-tensorflow-hub + - py-tensorflow-metadata + - py-tensorflow-probability + + # XGBoost + # xgboost requires older cuda, which doesn't support newer GCC + # - py-xgboost + + ci: + pipeline-gen: + - build-job: + image: + name: ghcr.io/spack/ubuntu-24.04:v2024-09-05-v2 + entrypoint: [''] + + cdash: + build-group: Machine Learning |