From eb67497020f64de7a10f683eda0177a637b4ccf0 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Thu, 22 Dec 2022 11:31:40 -0600 Subject: ML CI: Linux x86_64 (#34299) * ML CI: Linux x86_64 * Update comments * Rename again * Rename comments * Update to match other arches * No compiler * Compiler was wrong anyway * Faster TF --- share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml | 106 ++++++------- .../cloud_pipelines/stacks/ml-cpu/spack.yaml | 155 ------------------- .../cloud_pipelines/stacks/ml-cuda/spack.yaml | 158 ------------------- .../stacks/ml-linux-x86_64-cpu/spack.yaml | 164 ++++++++++++++++++++ .../stacks/ml-linux-x86_64-cuda/spack.yaml | 167 ++++++++++++++++++++ .../stacks/ml-linux-x86_64-rocm/spack.yaml | 172 +++++++++++++++++++++ .../cloud_pipelines/stacks/ml-rocm/spack.yaml | 163 ------------------- 7 files changed, 556 insertions(+), 529 deletions(-) delete mode 100644 share/spack/gitlab/cloud_pipelines/stacks/ml-cpu/spack.yaml delete mode 100644 share/spack/gitlab/cloud_pipelines/stacks/ml-cuda/spack.yaml create mode 100644 share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cpu/spack.yaml create mode 100644 share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cuda/spack.yaml create mode 100644 share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml delete mode 100644 share/spack/gitlab/cloud_pipelines/stacks/ml-rocm/spack.yaml (limited to 'share') diff --git a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml index 8122cd7f35..8800387436 100644 --- a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml +++ b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml @@ -760,122 +760,122 @@ tutorial-protected-build: - artifacts: True job: tutorial-protected-generate -######################################## -# Machine Learning (CPU) -######################################## -.ml-cpu: +####################################### +# Machine Learning - Linux x86_64 (CPU) +####################################### +.ml-linux-x86_64-cpu: variables: - SPACK_CI_STACK_NAME: ml-cpu + SPACK_CI_STACK_NAME: ml-linux-x86_64-cpu -.ml-cpu-generate: - extends: .ml-cpu +.ml-linux-x86_64-cpu-generate: + extends: .ml-linux-x86_64-cpu image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21 tags: ["spack", "aws", "public", "medium", "x86_64_v4"] -ml-cpu-pr-generate: - extends: [ ".ml-cpu-generate", ".pr-generate"] +ml-linux-x86_64-cpu-pr-generate: + extends: [ ".ml-linux-x86_64-cpu-generate", ".pr-generate"] -ml-cpu-protected-generate: - extends: [ ".ml-cpu-generate", ".protected-generate"] +ml-linux-x86_64-cpu-protected-generate: + extends: [ ".ml-linux-x86_64-cpu-generate", ".protected-generate"] -ml-cpu-pr-build: - extends: [ ".ml-cpu", ".pr-build" ] +ml-linux-x86_64-cpu-pr-build: + extends: [ ".ml-linux-x86_64-cpu", ".pr-build" ] trigger: include: - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml - job: ml-cpu-pr-generate + job: ml-linux-x86_64-cpu-pr-generate strategy: depend needs: - artifacts: True - job: ml-cpu-pr-generate + job: ml-linux-x86_64-cpu-pr-generate -ml-cpu-protected-build: - extends: [ ".ml-cpu", ".protected-build" ] +ml-linux-x86_64-cpu-protected-build: + extends: [ ".ml-linux-x86_64-cpu", ".protected-build" ] trigger: include: - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml - job: ml-cpu-protected-generate + job: ml-linux-x86_64-cpu-protected-generate strategy: depend needs: - artifacts: True - job: ml-cpu-protected-generate + job: ml-linux-x86_64-cpu-protected-generate ######################################## -# Machine Learning (CUDA) +# Machine Learning - Linux x86_64 (CUDA) ######################################## -.ml-cuda: +.ml-linux-x86_64-cuda: variables: - SPACK_CI_STACK_NAME: ml-cuda + SPACK_CI_STACK_NAME: ml-linux-x86_64-cuda -.ml-cuda-generate: - extends: .ml-cuda +.ml-linux-x86_64-cuda-generate: + extends: .ml-linux-x86_64-cuda image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21 tags: ["spack", "aws", "public", "medium", "x86_64_v4"] -ml-cuda-pr-generate: - extends: [ ".ml-cuda-generate", ".pr-generate"] +ml-linux-x86_64-cuda-pr-generate: + extends: [ ".ml-linux-x86_64-cuda-generate", ".pr-generate"] -ml-cuda-protected-generate: - extends: [ ".ml-cuda-generate", ".protected-generate"] +ml-linux-x86_64-cuda-protected-generate: + extends: [ ".ml-linux-x86_64-cuda-generate", ".protected-generate"] -ml-cuda-pr-build: - extends: [ ".ml-cuda", ".pr-build" ] +ml-linux-x86_64-cuda-pr-build: + extends: [ ".ml-linux-x86_64-cuda", ".pr-build" ] trigger: include: - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml - job: ml-cuda-pr-generate + job: ml-linux-x86_64-cuda-pr-generate strategy: depend needs: - artifacts: True - job: ml-cuda-pr-generate + job: ml-linux-x86_64-cuda-pr-generate -ml-cuda-protected-build: - extends: [ ".ml-cuda", ".protected-build" ] +ml-linux-x86_64-cuda-protected-build: + extends: [ ".ml-linux-x86_64-cuda", ".protected-build" ] trigger: include: - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml - job: ml-cuda-protected-generate + job: ml-linux-x86_64-cuda-protected-generate strategy: depend needs: - artifacts: True - job: ml-cuda-protected-generate + job: ml-linux-x86_64-cuda-protected-generate ######################################## -# Machine Learning (ROCm) +# Machine Learning - Linux x86_64 (ROCm) ######################################## -.ml-rocm: +.ml-linux-x86_64-rocm: variables: - SPACK_CI_STACK_NAME: ml-rocm + SPACK_CI_STACK_NAME: ml-linux-x86_64-rocm -.ml-rocm-generate: - extends: .ml-rocm +.ml-linux-x86_64-rocm-generate: + extends: .ml-linux-x86_64-rocm image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21 tags: ["spack", "aws", "public", "medium", "x86_64_v4"] -ml-rocm-pr-generate: - extends: [ ".ml-rocm-generate", ".pr-generate"] +ml-linux-x86_64-rocm-pr-generate: + extends: [ ".ml-linux-x86_64-rocm-generate", ".pr-generate"] -ml-rocm-protected-generate: - extends: [ ".ml-rocm-generate", ".protected-generate"] +ml-linux-x86_64-rocm-protected-generate: + extends: [ ".ml-linux-x86_64-rocm-generate", ".protected-generate"] -ml-rocm-pr-build: - extends: [ ".ml-rocm", ".pr-build" ] +ml-linux-x86_64-rocm-pr-build: + extends: [ ".ml-linux-x86_64-rocm", ".pr-build" ] trigger: include: - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml - job: ml-rocm-pr-generate + job: ml-linux-x86_64-rocm-pr-generate strategy: depend needs: - artifacts: True - job: ml-rocm-pr-generate + job: ml-linux-x86_64-rocm-pr-generate -ml-rocm-protected-build: - extends: [ ".ml-rocm", ".protected-build" ] +ml-linux-x86_64-rocm-protected-build: + extends: [ ".ml-linux-x86_64-rocm", ".protected-build" ] trigger: include: - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml - job: ml-rocm-protected-generate + job: ml-linux-x86_64-rocm-protected-generate strategy: depend needs: - artifacts: True - job: ml-rocm-protected-generate + job: ml-linux-x86_64-rocm-protected-generate diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-cpu/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-cpu/spack.yaml deleted file mode 100644 index ec2ac30b8a..0000000000 --- a/share/spack/gitlab/cloud_pipelines/stacks/ml-cpu/spack.yaml +++ /dev/null @@ -1,155 +0,0 @@ -spack: - view: false - - concretizer: - reuse: false - unify: false - - config: - build_jobs: 32 - concretizer: clingo - install_tree: - root: /home/software/spack - padded_length: 384 - projections: - all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}" - - packages: - all: - compiler: [gcc@11.2.0] - target: [x86_64_v3] - variants: ~cuda~rocm - - specs: - # Horovod - - py-horovod - - # Hugging Face - - py-transformers - - # JAX - - py-jax - - py-jaxlib - - # Keras - - py-keras - - py-keras-applications - - py-keras-preprocessing - - py-keras2onnx - - # PyTorch - - py-botorch - - py-efficientnet-pytorch - - py-gpytorch - - py-kornia - - py-pytorch-gradual-warmup-lr - - py-pytorch-lightning - - py-segmentation-models-pytorch - - py-timm - - py-torch - - py-torch-cluster - - py-torch-geometric - - py-torch-nvidia-apex - - py-torch-scatter - - py-torch-sparse - - py-torch-spline-conv - - py-torchaudio - - py-torchdata - - py-torchfile - - py-torchgeo - - py-torchmeta - - py-torchmetrics - - py-torchtext - - py-torchvision - - py-vector-quantize-pytorch - - # scikit-learn - - py-scikit-learn - - py-scikit-learn-extra - - # TensorBoard - - py-tensorboard - - py-tensorboard-data-server - - py-tensorboard-plugin-wit - - py-tensorboardx - - # TensorFlow - - py-tensorflow - - py-tensorflow-datasets - - py-tensorflow-estimator - - py-tensorflow-hub - - py-tensorflow-metadata - - py-tensorflow-probability - - # XGBoost - - py-xgboost - # - r-xgboost - - xgboost - - mirrors: { "mirror": "s3://spack-binaries/develop/ml-cpu" } - - gitlab-ci: - script: - - uname -a || true - - grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true - - nproc - - curl -Lfs 'https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz' -o gmake.tar.gz - - printf 'fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz' | sha256sum --check --strict --quiet - - tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null - - . "./share/spack/setup-env.sh" - - spack --version - - spack arch - - cd ${SPACK_CONCRETE_ENV_DIR} - - spack env activate --without-view . - - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'" - - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data - - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi - - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi - - spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) - after_script: - - cat /proc/loadavg || true - - match_behavior: first - mappings: - - match: - - llvm - - py-torch - runner-attributes: - tags: [ "spack", "huge", "x86_64_v4" ] - variables: - CI_JOB_SIZE: huge - KUBERNETES_CPU_REQUEST: 11000m - KUBERNETES_MEMORY_REQUEST: 42G - - match: - - "@:" - runner-attributes: - tags: [ "spack", "large", "x86_64_v4" ] - variables: - CI_JOB_SIZE: large - KUBERNETES_CPU_REQUEST: 8000m - KUBERNETES_MEMORY_REQUEST: 12G - - image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } - - broken-specs-url: "s3://spack-binaries/broken-specs" - - service-job-attributes: - before_script: - - . "./share/spack/setup-env.sh" - - spack --version - image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } - tags: ["spack", "public", "x86_64_v4"] - - signing-job-attributes: - image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] } - tags: ["spack", "aws"] - script: - - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp - - /sign.sh - - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache - - cdash: - build-group: Machine Learning - url: https://cdash.spack.io - project: Spack Testing - site: Cloud Gitlab Infrastructure diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-cuda/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-cuda/spack.yaml deleted file mode 100644 index d0c9823c40..0000000000 --- a/share/spack/gitlab/cloud_pipelines/stacks/ml-cuda/spack.yaml +++ /dev/null @@ -1,158 +0,0 @@ -spack: - view: false - - concretizer: - reuse: false - unify: false - - config: - build_jobs: 32 - concretizer: clingo - install_tree: - root: /home/software/spack - padded_length: 384 - projections: - all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}" - - packages: - all: - compiler: [gcc@11.2.0] - target: [x86_64_v3] - variants: ~rocm+cuda cuda_arch=80 - llvm: - # https://github.com/spack/spack/issues/27999 - require: ~cuda - - specs: - # Horovod - - py-horovod - - # Hugging Face - - py-transformers - - # JAX - - py-jax - - py-jaxlib - - # Keras - - py-keras - - py-keras-applications - - py-keras-preprocessing - - py-keras2onnx - - # PyTorch - - py-botorch - - py-efficientnet-pytorch - - py-gpytorch - - py-kornia - - py-pytorch-gradual-warmup-lr - - py-pytorch-lightning - - py-segmentation-models-pytorch - - py-timm - - py-torch - - py-torch-cluster - - py-torch-geometric - - py-torch-nvidia-apex - - py-torch-scatter - - py-torch-sparse - - py-torch-spline-conv - - py-torchaudio - - py-torchdata - - py-torchfile - - py-torchgeo - - py-torchmeta - - py-torchmetrics - - py-torchtext - - py-torchvision - - py-vector-quantize-pytorch - - # scikit-learn - - py-scikit-learn - - py-scikit-learn-extra - - # TensorBoard - - py-tensorboard - - py-tensorboard-data-server - - py-tensorboard-plugin-wit - - py-tensorboardx - - # TensorFlow - - py-tensorflow - - py-tensorflow-datasets - - py-tensorflow-estimator - - py-tensorflow-hub - - py-tensorflow-metadata - - py-tensorflow-probability - - # XGBoost - - py-xgboost - # - r-xgboost - - xgboost - - mirrors: { "mirror": "s3://spack-binaries/develop/ml-cuda" } - - gitlab-ci: - script: - - uname -a || true - - grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true - - nproc - - curl -Lfs 'https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz' -o gmake.tar.gz - - printf 'fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz' | sha256sum --check --strict --quiet - - tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null - - . "./share/spack/setup-env.sh" - - spack --version - - spack arch - - cd ${SPACK_CONCRETE_ENV_DIR} - - spack env activate --without-view . - - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'" - - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data - - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi - - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi - - spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) - after_script: - - cat /proc/loadavg || true - - match_behavior: first - mappings: - - match: - - llvm - - py-torch - runner-attributes: - tags: [ "spack", "huge", "x86_64_v4" ] - variables: - CI_JOB_SIZE: huge - KUBERNETES_CPU_REQUEST: 11000m - KUBERNETES_MEMORY_REQUEST: 42G - - match: - - "@:" - runner-attributes: - tags: [ "spack", "large", "x86_64_v4" ] - variables: - CI_JOB_SIZE: large - KUBERNETES_CPU_REQUEST: 8000m - KUBERNETES_MEMORY_REQUEST: 12G - - image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } - - broken-specs-url: "s3://spack-binaries/broken-specs" - - service-job-attributes: - before_script: - - . "./share/spack/setup-env.sh" - - spack --version - image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } - tags: ["spack", "public", "x86_64_v4"] - - signing-job-attributes: - image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] } - tags: ["spack", "aws"] - script: - - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp - - /sign.sh - - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache - - cdash: - build-group: Machine Learning - url: https://cdash.spack.io - project: Spack Testing - site: Cloud Gitlab Infrastructure diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cpu/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cpu/spack.yaml new file mode 100644 index 0000000000..bfd5020cea --- /dev/null +++ b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cpu/spack.yaml @@ -0,0 +1,164 @@ +spack: + view: false + + concretizer: + reuse: false + unify: false + + config: + build_jobs: 32 + concretizer: clingo + install_tree: + root: /home/software/spack + padded_length: 384 + projections: + all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}" + + packages: + all: + target: [x86_64_v3] + variants: ~cuda~rocm + + definitions: + - packages: + # Horovod + - py-horovod + + # Hugging Face + - py-transformers + + # JAX + - py-jax + - py-jaxlib + + # Keras + - py-keras + - py-keras-applications + - py-keras-preprocessing + - py-keras2onnx + + # PyTorch + - py-botorch + - py-efficientnet-pytorch + - py-gpytorch + - py-kornia + - py-pytorch-gradual-warmup-lr + - py-pytorch-lightning + - py-segmentation-models-pytorch + - py-timm + - py-torch + - py-torch-cluster + - py-torch-geometric + - py-torch-nvidia-apex + - py-torch-scatter + - py-torch-sparse + - py-torch-spline-conv + - py-torchaudio + - py-torchdata + - py-torchfile + - py-torchgeo + - py-torchmeta + - py-torchmetrics + - py-torchtext + - py-torchvision + - py-vector-quantize-pytorch + + # scikit-learn + - py-scikit-learn + - py-scikit-learn-extra + + # TensorBoard + - py-tensorboard + - py-tensorboard-data-server + - py-tensorboard-plugin-wit + - py-tensorboardx + + # TensorFlow + - py-tensorflow + - py-tensorflow-datasets + - py-tensorflow-estimator + - py-tensorflow-hub + - py-tensorflow-metadata + - py-tensorflow-probability + + # XGBoost + - py-xgboost + # - r-xgboost + - xgboost + + - arch: + - target=x86_64_v3 + + specs: + - matrix: + - [$packages] + - [$arch] + + mirrors: { "mirror": "s3://spack-binaries/develop/ml-linux-x86_64-cpu" } + + gitlab-ci: + script: + - uname -a || true + - grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true + - nproc + - curl -Lfs 'https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz' -o gmake.tar.gz + - printf 'fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz' | sha256sum --check --strict --quiet + - tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null + - . "./share/spack/setup-env.sh" + - spack --version + - spack arch + - cd ${SPACK_CONCRETE_ENV_DIR} + - spack env activate --without-view . + - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'" + - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data + - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi + - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi + - spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + after_script: + - cat /proc/loadavg || true + + match_behavior: first + mappings: + - match: + - llvm + - py-tensorflow + - py-torch + runner-attributes: + tags: [ "spack", "huge", "x86_64_v4" ] + variables: + CI_JOB_SIZE: huge + KUBERNETES_CPU_REQUEST: 11000m + KUBERNETES_MEMORY_REQUEST: 42G + - match: + - "@:" + runner-attributes: + tags: [ "spack", "large", "x86_64_v4" ] + variables: + CI_JOB_SIZE: large + KUBERNETES_CPU_REQUEST: 8000m + KUBERNETES_MEMORY_REQUEST: 12G + + image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } + + broken-specs-url: "s3://spack-binaries/broken-specs" + + service-job-attributes: + before_script: + - . "./share/spack/setup-env.sh" + - spack --version + image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } + tags: ["spack", "public", "x86_64_v4"] + + signing-job-attributes: + image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] } + tags: ["spack", "aws"] + script: + - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp + - /sign.sh + - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache + + cdash: + build-group: Machine Learning + url: https://cdash.spack.io + project: Spack Testing + site: Cloud Gitlab Infrastructure diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cuda/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cuda/spack.yaml new file mode 100644 index 0000000000..4ab8e3698b --- /dev/null +++ b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-cuda/spack.yaml @@ -0,0 +1,167 @@ +spack: + view: false + + concretizer: + reuse: false + unify: false + + config: + build_jobs: 32 + concretizer: clingo + install_tree: + root: /home/software/spack + padded_length: 384 + projections: + all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}" + + packages: + all: + target: [x86_64_v3] + variants: ~rocm+cuda cuda_arch=80 + llvm: + # https://github.com/spack/spack/issues/27999 + require: ~cuda + + definitions: + - packages: + # Horovod + - py-horovod + + # Hugging Face + - py-transformers + + # JAX + - py-jax + - py-jaxlib + + # Keras + - py-keras + - py-keras-applications + - py-keras-preprocessing + - py-keras2onnx + + # PyTorch + - py-botorch + - py-efficientnet-pytorch + - py-gpytorch + - py-kornia + - py-pytorch-gradual-warmup-lr + - py-pytorch-lightning + - py-segmentation-models-pytorch + - py-timm + - py-torch + - py-torch-cluster + - py-torch-geometric + - py-torch-nvidia-apex + - py-torch-scatter + - py-torch-sparse + - py-torch-spline-conv + - py-torchaudio + - py-torchdata + - py-torchfile + - py-torchgeo + - py-torchmeta + - py-torchmetrics + - py-torchtext + - py-torchvision + - py-vector-quantize-pytorch + + # scikit-learn + - py-scikit-learn + - py-scikit-learn-extra + + # TensorBoard + - py-tensorboard + - py-tensorboard-data-server + - py-tensorboard-plugin-wit + - py-tensorboardx + + # TensorFlow + - py-tensorflow + - py-tensorflow-datasets + - py-tensorflow-estimator + - py-tensorflow-hub + - py-tensorflow-metadata + - py-tensorflow-probability + + # XGBoost + - py-xgboost + # - r-xgboost + - xgboost + + - arch: + - target=x86_64_v3 + + specs: + - matrix: + - [$packages] + - [$arch] + + mirrors: { "mirror": "s3://spack-binaries/develop/ml-linux-x86_64-cuda" } + + gitlab-ci: + script: + - uname -a || true + - grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true + - nproc + - curl -Lfs 'https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz' -o gmake.tar.gz + - printf 'fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz' | sha256sum --check --strict --quiet + - tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null + - . "./share/spack/setup-env.sh" + - spack --version + - spack arch + - cd ${SPACK_CONCRETE_ENV_DIR} + - spack env activate --without-view . + - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'" + - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data + - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi + - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi + - spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + after_script: + - cat /proc/loadavg || true + + match_behavior: first + mappings: + - match: + - llvm + - py-tensorflow + - py-torch + runner-attributes: + tags: [ "spack", "huge", "x86_64_v4" ] + variables: + CI_JOB_SIZE: huge + KUBERNETES_CPU_REQUEST: 11000m + KUBERNETES_MEMORY_REQUEST: 42G + - match: + - "@:" + runner-attributes: + tags: [ "spack", "large", "x86_64_v4" ] + variables: + CI_JOB_SIZE: large + KUBERNETES_CPU_REQUEST: 8000m + KUBERNETES_MEMORY_REQUEST: 12G + + image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } + + broken-specs-url: "s3://spack-binaries/broken-specs" + + service-job-attributes: + before_script: + - . "./share/spack/setup-env.sh" + - spack --version + image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } + tags: ["spack", "public", "x86_64_v4"] + + signing-job-attributes: + image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] } + tags: ["spack", "aws"] + script: + - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp + - /sign.sh + - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache + + cdash: + build-group: Machine Learning + url: https://cdash.spack.io + project: Spack Testing + site: Cloud Gitlab Infrastructure diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml new file mode 100644 index 0000000000..29cf0aabe0 --- /dev/null +++ b/share/spack/gitlab/cloud_pipelines/stacks/ml-linux-x86_64-rocm/spack.yaml @@ -0,0 +1,172 @@ +spack: + view: false + + concretizer: + reuse: false + unify: false + + config: + build_jobs: 32 + concretizer: clingo + install_tree: + root: /home/software/spack + padded_length: 384 + projections: + all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}" + + packages: + all: + target: [x86_64_v3] + variants: ~cuda+rocm amdgpu_target=gfx90a + gl: + require: "osmesa" + py-torch: + # Does not yet support Spack-installed ROCm + require: ~rocm + + definitions: + - packages: + # Horovod + - py-horovod + + # Hugging Face + - py-transformers + + # JAX + - py-jax + - py-jaxlib + + # Keras + - py-keras + - py-keras-applications + - py-keras-preprocessing + - py-keras2onnx + + # PyTorch + # Does not yet support Spack-install ROCm + # - py-botorch + # - py-efficientnet-pytorch + # - py-gpytorch + # - py-kornia + # - py-pytorch-gradual-warmup-lr + # - py-pytorch-lightning + # - py-segmentation-models-pytorch + # - py-timm + # - py-torch + # - py-torch-cluster + # - py-torch-geometric + # - py-torch-nvidia-apex + # - py-torch-scatter + # - py-torch-sparse + # - py-torch-spline-conv + # - py-torchaudio + # - py-torchdata + # - py-torchfile + # - py-torchgeo + # - py-torchmeta + # - py-torchmetrics + # - py-torchtext + # - py-torchvision + # - py-vector-quantize-pytorch + + # scikit-learn + - py-scikit-learn + - py-scikit-learn-extra + + # TensorBoard + - py-tensorboard + - py-tensorboard-data-server + - py-tensorboard-plugin-wit + - py-tensorboardx + + # TensorFlow + - py-tensorflow + - py-tensorflow-datasets + - py-tensorflow-estimator + - py-tensorflow-hub + - py-tensorflow-metadata + - py-tensorflow-probability + + # XGBoost + - py-xgboost + # - r-xgboost + - xgboost + + - arch: + - target=x86_64_v3 + + specs: + - matrix: + - [$packages] + - [$arch] + + mirrors: { "mirror": "s3://spack-binaries/develop/ml-linux-x86_64-rocm" } + + gitlab-ci: + script: + - uname -a || true + - grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true + - nproc + - curl -Lfs 'https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz' -o gmake.tar.gz + - printf 'fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz' | sha256sum --check --strict --quiet + - tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null + - . "./share/spack/setup-env.sh" + - spack --version + - spack arch + - cd ${SPACK_CONCRETE_ENV_DIR} + - spack env activate --without-view . + - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'" + - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data + - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi + - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi + - spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + after_script: + - cat /proc/loadavg || true + + match_behavior: first + mappings: + - match: + - llvm + - llvm-amdgpu + - py-tensorflow + - py-torch + - rocblas + runner-attributes: + tags: [ "spack", "huge", "x86_64_v4" ] + variables: + CI_JOB_SIZE: huge + KUBERNETES_CPU_REQUEST: 11000m + KUBERNETES_MEMORY_REQUEST: 42G + - match: + - "@:" + runner-attributes: + tags: [ "spack", "large", "x86_64_v4" ] + variables: + CI_JOB_SIZE: large + KUBERNETES_CPU_REQUEST: 8000m + KUBERNETES_MEMORY_REQUEST: 12G + + image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } + + broken-specs-url: "s3://spack-binaries/broken-specs" + + service-job-attributes: + before_script: + - . "./share/spack/setup-env.sh" + - spack --version + image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } + tags: ["spack", "public", "x86_64_v4"] + + signing-job-attributes: + image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] } + tags: ["spack", "aws"] + script: + - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp + - /sign.sh + - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache + + cdash: + build-group: Machine Learning + url: https://cdash.spack.io + project: Spack Testing + site: Cloud Gitlab Infrastructure diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-rocm/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-rocm/spack.yaml deleted file mode 100644 index 601327ec7e..0000000000 --- a/share/spack/gitlab/cloud_pipelines/stacks/ml-rocm/spack.yaml +++ /dev/null @@ -1,163 +0,0 @@ -spack: - view: false - - concretizer: - reuse: false - unify: false - - config: - build_jobs: 32 - concretizer: clingo - install_tree: - root: /home/software/spack - padded_length: 384 - projections: - all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}" - - packages: - all: - compiler: [gcc@11.2.0] - target: [x86_64_v3] - variants: ~cuda+rocm amdgpu_target=gfx90a - gl: - require: "osmesa" - py-torch: - # Does not yet support Spack-installed ROCm - require: ~rocm - - specs: - # Horovod - - py-horovod - - # Hugging Face - - py-transformers - - # JAX - - py-jax - - py-jaxlib - - # Keras - - py-keras - - py-keras-applications - - py-keras-preprocessing - - py-keras2onnx - - # PyTorch - # Does not yet support Spack-install ROCm - # - py-botorch - # - py-efficientnet-pytorch - # - py-gpytorch - # - py-kornia - # - py-pytorch-gradual-warmup-lr - # - py-pytorch-lightning - # - py-segmentation-models-pytorch - # - py-timm - # - py-torch - # - py-torch-cluster - # - py-torch-geometric - # - py-torch-nvidia-apex - # - py-torch-scatter - # - py-torch-sparse - # - py-torch-spline-conv - # - py-torchaudio - # - py-torchdata - # - py-torchfile - # - py-torchgeo - # - py-torchmeta - # - py-torchmetrics - # - py-torchtext - # - py-torchvision - # - py-vector-quantize-pytorch - - # scikit-learn - - py-scikit-learn - - py-scikit-learn-extra - - # TensorBoard - - py-tensorboard - - py-tensorboard-data-server - - py-tensorboard-plugin-wit - - py-tensorboardx - - # TensorFlow - - py-tensorflow - - py-tensorflow-datasets - - py-tensorflow-estimator - - py-tensorflow-hub - - py-tensorflow-metadata - - py-tensorflow-probability - - # XGBoost - - py-xgboost - # - r-xgboost - - xgboost - - mirrors: { "mirror": "s3://spack-binaries/develop/ml-rocm" } - - gitlab-ci: - script: - - uname -a || true - - grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true - - nproc - - curl -Lfs 'https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz' -o gmake.tar.gz - - printf 'fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz' | sha256sum --check --strict --quiet - - tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null - - . "./share/spack/setup-env.sh" - - spack --version - - spack arch - - cd ${SPACK_CONCRETE_ENV_DIR} - - spack env activate --without-view . - - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'" - - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data - - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi - - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi - - spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) - after_script: - - cat /proc/loadavg || true - - match_behavior: first - mappings: - - match: - - llvm-amdgpu - - llvm - - py-torch - - rocblas - runner-attributes: - tags: [ "spack", "huge", "x86_64_v4" ] - variables: - CI_JOB_SIZE: huge - KUBERNETES_CPU_REQUEST: 11000m - KUBERNETES_MEMORY_REQUEST: 42G - - match: - - "@:" - runner-attributes: - tags: [ "spack", "large", "x86_64_v4" ] - variables: - CI_JOB_SIZE: large - KUBERNETES_CPU_REQUEST: 8000m - KUBERNETES_MEMORY_REQUEST: 12G - - image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } - - broken-specs-url: "s3://spack-binaries/broken-specs" - - service-job-attributes: - before_script: - - . "./share/spack/setup-env.sh" - - spack --version - image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } - tags: ["spack", "public", "x86_64_v4"] - - signing-job-attributes: - image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] } - tags: ["spack", "aws"] - script: - - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp - - /sign.sh - - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache - - cdash: - build-group: Machine Learning - url: https://cdash.spack.io - project: Spack Testing - site: Cloud Gitlab Infrastructure -- cgit v1.2.3-70-g09d2