From 2f8cea2792322e5bc39bc2444f24862dd425c4d7 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Thu, 25 May 2023 00:12:54 -0500 Subject: Add macOS ML CI stacks (#36586) * Add macOS ML CI stacks * torchmeta is no longer maintained and requires ancient PyTorch * Add MXNet * update darwin aarch64 stacks * add darwin-aarch64 scoped config.yaml * remove unnecessary cleanup job * fix specifications * fix labels * fix labels * fix indent on tags specification * no tags for trigger jobs * try overriding tags in stack spack.yaml * do not use CI_STACK_CONFIG_SCOPES * incorporate config:install_tree:root: overrides and compiler defs * copy relevant ci-scoped config settings directly into stack spack.yaml * remove build-job-remove * spack ci generate: add debug flag * include cdash config directly in stack spack.yaml * customize build-job script section to avoid absolute paths * add any-job specification * tags: use aarch64-macos instead of aarch64 * generate tags: use aarch64-macos instead of aarch64 * do not add morepadding * use shared mirror; comment out known failures * remove any-job * nproc || true * comment out specs failing due to bazel from cache codesign issue --------- Co-authored-by: eugeneswalker --- share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml | 172 +++++++-------------- .../configs/darwin/aarch64/compilers.yaml | 27 ++++ .../configs/darwin/aarch64/config.yaml | 3 + .../gitlab/cloud_pipelines/configs/darwin/ci.yaml | 14 -- .../cloud_pipelines/stacks/e4s-mac/spack.yaml | 40 ----- .../stacks/ml-darwin-aarch64-mps/spack.yaml | 139 +++++++++++++++++ 6 files changed, 223 insertions(+), 172 deletions(-) create mode 100644 share/spack/gitlab/cloud_pipelines/configs/darwin/aarch64/compilers.yaml create mode 100644 share/spack/gitlab/cloud_pipelines/configs/darwin/aarch64/config.yaml delete mode 100644 share/spack/gitlab/cloud_pipelines/configs/darwin/ci.yaml delete mode 100644 share/spack/gitlab/cloud_pipelines/stacks/e4s-mac/spack.yaml create mode 100644 share/spack/gitlab/cloud_pipelines/stacks/ml-darwin-aarch64-mps/spack.yaml (limited to 'share') diff --git a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml index 3415a5f20d..50f51d9eeb 100644 --- a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml +++ b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml @@ -128,6 +128,38 @@ default: extends: [ ".base-job", ".generate-base" ] tags: ["spack", "public", "medium", "x86_64"] +.darwin-generate-base: + stage: generate + script: + - export SPACK_DISABLE_LOCAL_CONFIG=1 + - export SPACK_USER_CACHE_PATH=$(pwd)/_user_cache + - uname -a || true + - grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true + - nproc || true + - . "./share/spack/setup-env.sh" + - spack --version + - cd share/spack/gitlab/cloud_pipelines/stacks/${SPACK_CI_STACK_NAME} + - spack env activate --without-view . + - spack -d ci generate --check-index-only + --buildcache-destination "${SPACK_BUILDCACHE_DESTINATION}" + --artifacts-root "${CI_PROJECT_DIR}/jobs_scratch_dir" + --output-file "${CI_PROJECT_DIR}/jobs_scratch_dir/cloud-ci-pipeline.yml" + after_script: + - cat /proc/loadavg || true + artifacts: + paths: + - "${CI_PROJECT_DIR}/jobs_scratch_dir" + interruptible: true + timeout: 60 minutes + retry: + max: 2 + when: + - always + +.darwin-generate: + extends: [ ".base-job", ".darwin-generate-base" ] + + .generate-deprecated: extends: [ ".base-job" ] stage: generate @@ -236,124 +268,6 @@ protected-publish: # - artifacts: True # job: my-super-cool-stack-generate -######################################## -# E4S Mac Stack -# -# With no near-future plans to have -# protected aws runners running mac -# builds, it seems best to decouple -# them from the rest of the stacks for -# the time being. This way they can -# still run on UO runners and be signed -# using the previous approach. -######################################## -# .e4s-mac: -# variables: -# SPACK_CI_STACK_NAME: e4s-mac -# allow_failure: True - -# .mac-pr: -# only: -# - /^pr[\d]+_.*$/ -# - /^github\/pr[\d]+_.*$/ -# variables: -# SPACK_BUILDCACHE_DESTINATION: "s3://spack-binaries-prs/${CI_COMMIT_REF_NAME}" -# SPACK_PRUNE_UNTOUCHED: "True" - -# .mac-protected: -# only: -# - /^develop$/ -# - /^releases\/v.*/ -# - /^v.*/ -# - /^github\/develop$/ -# variables: -# SPACK_BUILDCACHE_DESTINATION: "s3://spack-binaries/${CI_COMMIT_REF_NAME}/${SPACK_CI_STACK_NAME}" - -# .mac-pr-build: -# extends: [ ".mac-pr", ".build" ] -# variables: -# AWS_ACCESS_KEY_ID: ${PR_MIRRORS_AWS_ACCESS_KEY_ID} -# AWS_SECRET_ACCESS_KEY: ${PR_MIRRORS_AWS_SECRET_ACCESS_KEY} -# .mac-protected-build: -# extends: [ ".mac-protected", ".build" ] -# variables: -# AWS_ACCESS_KEY_ID: ${PROTECTED_MIRRORS_AWS_ACCESS_KEY_ID} -# AWS_SECRET_ACCESS_KEY: ${PROTECTED_MIRRORS_AWS_SECRET_ACCESS_KEY} -# SPACK_SIGNING_KEY: ${PACKAGE_SIGNING_KEY} - -# e4s-mac-pr-generate: -# extends: [".e4s-mac", ".mac-pr"] -# stage: generate -# script: -# - tmp="$(mktemp -d)"; export SPACK_USER_CONFIG_PATH="$tmp"; export SPACK_USER_CACHE_PATH="$tmp" -# - . "./share/spack/setup-env.sh" -# - spack --version -# - cd share/spack/gitlab/cloud_pipelines/stacks/${SPACK_CI_STACK_NAME} -# - spack env activate --without-view . -# - spack ci generate --check-index-only -# --buildcache-destination "${SPACK_BUILDCACHE_DESTINATION}" -# --artifacts-root "${CI_PROJECT_DIR}/jobs_scratch_dir" -# --output-file "${CI_PROJECT_DIR}/jobs_scratch_dir/cloud-ci-pipeline.yml" -# artifacts: -# paths: -# - "${CI_PROJECT_DIR}/jobs_scratch_dir" -# tags: -# - lambda -# interruptible: true -# retry: -# max: 2 -# when: -# - runner_system_failure -# - stuck_or_timeout_failure -# timeout: 60 minutes - -# e4s-mac-protected-generate: -# extends: [".e4s-mac", ".mac-protected"] -# stage: generate -# script: -# - tmp="$(mktemp -d)"; export SPACK_USER_CONFIG_PATH="$tmp"; export SPACK_USER_CACHE_PATH="$tmp" -# - . "./share/spack/setup-env.sh" -# - spack --version -# - cd share/spack/gitlab/cloud_pipelines/stacks/${SPACK_CI_STACK_NAME} -# - spack env activate --without-view . -# - spack ci generate --check-index-only -# --artifacts-root "${CI_PROJECT_DIR}/jobs_scratch_dir" -# --output-file "${CI_PROJECT_DIR}/jobs_scratch_dir/cloud-ci-pipeline.yml" -# artifacts: -# paths: -# - "${CI_PROJECT_DIR}/jobs_scratch_dir" -# tags: -# - omicron -# interruptible: true -# retry: -# max: 2 -# when: -# - runner_system_failure -# - stuck_or_timeout_failure -# timeout: 60 minutes - -# e4s-mac-pr-build: -# extends: [ ".e4s-mac", ".mac-pr-build" ] -# trigger: -# include: -# - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml -# job: e4s-mac-pr-generate -# strategy: depend -# needs: -# - artifacts: True -# job: e4s-mac-pr-generate - -# e4s-mac-protected-build: -# extends: [ ".e4s-mac", ".mac-protected-build" ] -# trigger: -# include: -# - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml -# job: e4s-mac-protected-generate -# strategy: depend -# needs: -# - artifacts: True -# job: e4s-mac-protected-generate - ######################################## # E4S pipeline ######################################## @@ -762,6 +676,28 @@ ml-linux-x86_64-rocm-build: - artifacts: True job: ml-linux-x86_64-rocm-generate +######################################## +# Machine Learning - Darwin aarch64 (MPS) +######################################## +.ml-darwin-aarch64-mps: + variables: + SPACK_CI_STACK_NAME: ml-darwin-aarch64-mps + +ml-darwin-aarch64-mps-generate: + tags: [ "macos-ventura", "apple-clang-14", "aarch64-macos" ] + extends: [ ".ml-darwin-aarch64-mps", ".darwin-generate"] + +ml-darwin-aarch64-mps-build: + extends: [ ".ml-darwin-aarch64-mps", ".build" ] + trigger: + include: + - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml + job: ml-darwin-aarch64-mps-generate + strategy: depend + needs: + - artifacts: True + job: ml-darwin-aarch64-mps-generate + ######################################## # Deprecated CI testing ######################################## diff --git a/share/spack/gitlab/cloud_pipelines/configs/darwin/aarch64/compilers.yaml b/share/spack/gitlab/cloud_pipelines/configs/darwin/aarch64/compilers.yaml new file mode 100644 index 0000000000..d5a0130341 --- /dev/null +++ b/share/spack/gitlab/cloud_pipelines/configs/darwin/aarch64/compilers.yaml @@ -0,0 +1,27 @@ +compilers: +- compiler: + spec: apple-clang@14.0.0 + paths: + cc: /usr/bin/clang + cxx: /usr/bin/clang++ + f77: /opt/homebrew/bin/gfortran + fc: /opt/homebrew/bin/gfortran + flags: {} + operating_system: ventura + target: aarch64 + modules: [] + environment: {} + extra_rpaths: [] +- compiler: + spec: gcc@12.2.0 + paths: + cc: /opt/homebrew/bin/gcc-12 + cxx: /opt/homebrew/bin/g++-12 + f77: /opt/homebrew/bin/gfortran-12 + fc: /opt/homebrew/bin/gfortran-12 + flags: {} + operating_system: ventura + target: aarch64 + modules: [] + environment: {} + extra_rpaths: [] diff --git a/share/spack/gitlab/cloud_pipelines/configs/darwin/aarch64/config.yaml b/share/spack/gitlab/cloud_pipelines/configs/darwin/aarch64/config.yaml new file mode 100644 index 0000000000..fcbe195a4f --- /dev/null +++ b/share/spack/gitlab/cloud_pipelines/configs/darwin/aarch64/config.yaml @@ -0,0 +1,3 @@ +config: + install_tree: + root: $spack/opt/spack diff --git a/share/spack/gitlab/cloud_pipelines/configs/darwin/ci.yaml b/share/spack/gitlab/cloud_pipelines/configs/darwin/ci.yaml deleted file mode 100644 index f689e0c37a..0000000000 --- a/share/spack/gitlab/cloud_pipelines/configs/darwin/ci.yaml +++ /dev/null @@ -1,14 +0,0 @@ -ci: - pipeline-gen: - - build-job: - script: | - - tmp="$(mktemp -d)"; export SPACK_USER_CONFIG_PATH="$tmp"; export SPACK_USER_CACHE_PATH="$tmp" - - . "./share/spack/setup-env.sh" - - spack --version - - spack arch - - cd ${SPACK_CONCRETE_ENV_DIR} - - spack env activate --without-view . - - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'" - - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data - - spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) - tags: ["lambda"] diff --git a/share/spack/gitlab/cloud_pipelines/stacks/e4s-mac/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/e4s-mac/spack.yaml deleted file mode 100644 index 88b081d2eb..0000000000 --- a/share/spack/gitlab/cloud_pipelines/stacks/e4s-mac/spack.yaml +++ /dev/null @@ -1,40 +0,0 @@ -spack: - view: false - packages: - all: - compiler: [apple-clang@13.1.6] - target: [m1] - - definitions: - - easy_specs: - - berkeley-db - - ncurses - - gcc - - py-jupyterlab - - py-scipy - - py-matplotlib - - py-pandas - - - arch: - - '%apple-clang@13.1.6 target=m1' - - specs: - - - matrix: - - - $easy_specs - - - $arch - - mirrors: { "mirror": "s3://spack-binaries/develop/e4s-mac" } - - ci: - pipeline-gen: - - cleanup-job: - before_script: | - - export SPACK_USER_CACHE_PATH=$(pwd)/.spack-user-cache - - export SPACK_USER_CONFIG_PATH=$(pwd)/.spack-user-config - - . "./share/spack/setup-env.sh" - - spack --version - tags: [lambda] - - cdash: - build-group: E4S Mac diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-darwin-aarch64-mps/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-darwin-aarch64-mps/spack.yaml new file mode 100644 index 0000000000..8545a8bbcc --- /dev/null +++ b/share/spack/gitlab/cloud_pipelines/stacks/ml-darwin-aarch64-mps/spack.yaml @@ -0,0 +1,139 @@ +spack: + view: false + + concretizer: + unify: false + reuse: false + + config: + concretizer: clingo + db_lock_timeout: 120 + install_tree: + root: $spack/opt/spack + padded_length: 256 + projections: + all: '{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}' + + packages: + all: + require: target=aarch64 + variants: +mps~cuda~rocm + mpi: + require: openmpi + + specs: + # Hugging Face + - py-transformers + + # JAX + - py-jax + # - py-jaxlib # bazel codesign + + # Keras + - py-keras-applications + - py-keras-preprocessing + - py-keras2onnx + # - py-keras # bazel codesign + + # MXNet + - mxnet + + # PyTorch + - py-botorch + - py-gpytorch + - py-pytorch-gradual-warmup-lr + - py-segmentation-models-pytorch + - py-timm + - py-torch + - py-torch-cluster + - py-torch-geometric + - py-torch-sparse + - py-torchdata + - py-torchfile + - py-torchgeo + - py-torchvision + + # scikit-learn + - py-scikit-learn + - py-scikit-learn-extra + + # TensorBoard + - py-tensorboard + - py-tensorboard-data-server + - py-tensorboard-plugin-wit + - py-tensorboardx + + # TensorFlow + # - py-tensorflow # bazel codesign + # - py-tensorflow-datasets # bazel codesign + # - py-tensorflow-hub # bazel codesign + # - py-tensorflow-metadata # bazel codesign + # - py-tensorflow-estimator # bazel codesign + # - py-tensorflow-probability # py-dm-tree due to bazel codesign + + # XGBoost + - py-xgboost + - xgboost + + # ERRORS + # - py-efficientnet-pytorch # py-torch + # - py-horovod # py-torch + # - py-kornia # py-torch + # - py-lightning # py-torch + # - py-pytorch-lightning # py-torch + # - py-torch-nvidia-apex # py-torch + # - py-torch-scatter # py-torch + # - py-torch-spline-conv # py-torch + # - py-torchaudio # py-torchaudio + # - py-torchmetrics # py-torch + # - py-torchtext # py-torchtext + # - py-vector-quantize-pytorch # py-torch + # - r-xgboost # r + + mirrors: { "mirror": "s3://spack-binaries/develop/ml-darwin-aarch64-cpu" } + + ci: + pipeline-gen: + - build-job-remove: + image: no-image + tags: [spack, public] + - build-job: + tags: [ "macos-ventura", "apple-clang-14", "aarch64-macos" ] + script:: + - - spack compiler find + - cd ${SPACK_CONCRETE_ENV_DIR} + - spack env activate --without-view . + - if [ -n "$SPACK_BUILD_JOBS" ]; then spack config add "config:build_jobs:$SPACK_BUILD_JOBS"; fi + - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data + # AWS runners mount E4S public key (verification), UO runners mount public/private (signing/verification) + - if [[ -r /mnt/key/e4s.gpg ]]; then spack gpg trust /mnt/key/e4s.gpg; fi + # UO runners mount intermediate ci public key (verification), AWS runners mount public/private (signing/verification) + - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi + - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi + - spack --color=always --backtrace ci rebuild --tests > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + after_script: + - - cat /proc/loadavg || true + - signing-job: + image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] } + tags: ["aws"] + script: + - - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp + - /sign.sh + - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache + - aws s3 cp /tmp/public_keys ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache/_pgp --recursive --exclude "*" --include "*.pub" + - any-job: + image: "ghcr.io/spack/e4s-ubuntu-18.04:v2021-10-18" + tags: ["spack"] + before_script: + - - uname -a || true + - grep -E "vendor|model name" /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true + - nproc || true + - - . "./share/spack/setup-env.sh" + - spack --version + - spack arch + + cdash: + build-group: Machine Learning MPS + url: https://cdash.spack.io + project: Spack Testing + site: Cloud Gitlab Infrastructure -- cgit v1.2.3-70-g09d2