diff options
4 files changed, 553 insertions, 0 deletions
diff --git a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml index fcfa925549..17c16e16d6 100644 --- a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml +++ b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml @@ -749,3 +749,123 @@ tutorial-protected-build: needs: - artifacts: True job: tutorial-protected-generate + +######################################## +# Machine Learning (CPU) +######################################## +.ml-cpu: + variables: + SPACK_CI_STACK_NAME: ml-cpu + +.ml-cpu-generate: + extends: .ml-cpu + image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21 + tags: ["spack", "aws", "public", "medium", "x86_64_v4"] + +ml-cpu-pr-generate: + extends: [ ".ml-cpu-generate", ".pr-generate"] + +ml-cpu-protected-generate: + extends: [ ".ml-cpu-generate", ".protected-generate"] + +ml-cpu-pr-build: + extends: [ ".ml-cpu", ".pr-build" ] + trigger: + include: + - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml + job: ml-cpu-pr-generate + strategy: depend + needs: + - artifacts: True + job: ml-cpu-pr-generate + +ml-cpu-protected-build: + extends: [ ".ml-cpu", ".protected-build" ] + trigger: + include: + - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml + job: ml-cpu-protected-generate + strategy: depend + needs: + - artifacts: True + job: ml-cpu-protected-generate + +######################################## +# Machine Learning (CUDA) +######################################## +.ml-cuda: + variables: + SPACK_CI_STACK_NAME: ml-cuda + +.ml-cuda-generate: + extends: .ml-cuda + image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21 + tags: ["spack", "aws", "public", "medium", "x86_64_v4"] + +ml-cuda-pr-generate: + extends: [ ".ml-cuda-generate", ".pr-generate"] + +ml-cuda-protected-generate: + extends: [ ".ml-cuda-generate", ".protected-generate"] + +ml-cuda-pr-build: + extends: [ ".ml-cuda", ".pr-build" ] + trigger: + include: + - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml + job: ml-cuda-pr-generate + strategy: depend + needs: + - artifacts: True + job: ml-cuda-pr-generate + +ml-cuda-protected-build: + extends: [ ".ml-cuda", ".protected-build" ] + trigger: + include: + - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml + job: ml-cuda-protected-generate + strategy: depend + needs: + - artifacts: True + job: ml-cuda-protected-generate + +######################################## +# Machine Learning (ROCm) +######################################## +.ml-rocm: + variables: + SPACK_CI_STACK_NAME: ml-rocm + +.ml-rocm-generate: + extends: .ml-rocm + image: ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21 + tags: ["spack", "aws", "public", "medium", "x86_64_v4"] + +ml-rocm-pr-generate: + extends: [ ".ml-rocm-generate", ".pr-generate"] + +ml-rocm-protected-generate: + extends: [ ".ml-rocm-generate", ".protected-generate"] + +ml-rocm-pr-build: + extends: [ ".ml-rocm", ".pr-build" ] + trigger: + include: + - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml + job: ml-rocm-pr-generate + strategy: depend + needs: + - artifacts: True + job: ml-rocm-pr-generate + +ml-rocm-protected-build: + extends: [ ".ml-rocm", ".protected-build" ] + trigger: + include: + - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml + job: ml-rocm-protected-generate + strategy: depend + needs: + - artifacts: True + job: ml-rocm-protected-generate diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-cpu/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-cpu/spack.yaml new file mode 100644 index 0000000000..a687a6928e --- /dev/null +++ b/share/spack/gitlab/cloud_pipelines/stacks/ml-cpu/spack.yaml @@ -0,0 +1,142 @@ +spack: + view: false + + concretizer: + reuse: false + unify: false + + config: + concretizer: clingo + install_tree: + root: /home/software/spack + padded_length: 384 + projections: + all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}" + + packages: + all: + compiler: [gcc@11.2.0] + target: [x86_64_v4] + variants: ~cuda~rocm + + specs: + # Horovod + - py-horovod + + # JAX + # https://github.com/google/jax/issues/12614 + # - py-jax + # - py-jaxlib + + # Keras + - py-keras + - py-keras-applications + - py-keras-preprocessing + - py-keras2onnx + + # PyTorch + - py-botorch + - py-efficientnet-pytorch + - py-gpytorch + - py-kornia + - py-pytorch-gradual-warmup-lr + - py-pytorch-lightning + - py-segmentation-models-pytorch + - py-timm + - py-torch + - py-torch-cluster + - py-torch-geometric + # https://github.com/NVIDIA/apex/issues/1498 + # - py-torch-nvidia-apex + - py-torch-scatter + - py-torch-sparse + - py-torch-spline-conv + - py-torchaudio + - py-torchdata + - py-torchfile + - py-torchgeo + - py-torchmeta + - py-torchmetrics + - py-torchtext + - py-torchvision + - py-vector-quantize-pytorch + + # scikit-learn + - py-scikit-learn + - py-scikit-learn-extra + + # TensorBoard + - py-tensorboard + - py-tensorboard-data-server + - py-tensorboard-plugin-wit + - py-tensorboardx + + # TensorFlow + - py-tensorflow + - py-tensorflow-datasets + - py-tensorflow-estimator + - py-tensorflow-hub + - py-tensorflow-metadata + - py-tensorflow-probability + + # XGBoost + - py-xgboost + # - r-xgboost + - xgboost + + mirrors: { "mirror": "s3://spack-binaries/develop/ml-cpu" } + + gitlab-ci: + script: + - . "./share/spack/setup-env.sh" + - spack --version + - cd ${SPACK_CONCRETE_ENV_DIR} + - spack env activate --without-view . + - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'" + - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data + - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi + - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi + - spack -d ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + + mappings: + - match: + - llvm + runner-attributes: + tags: [ "spack", "huge", "x86_64_v4" ] + variables: + CI_JOB_SIZE: huge + KUBERNETES_CPU_REQUEST: 11000m + KUBERNETES_MEMORY_REQUEST: 42G + - match: + - "@:" + runner-attributes: + tags: [ "spack", "large", "x86_64_v4" ] + variables: + CI_JOB_SIZE: large + KUBERNETES_CPU_REQUEST: 8000m + KUBERNETES_MEMORY_REQUEST: 12G + + image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } + + broken-specs-url: "s3://spack-binaries/broken-specs" + + service-job-attributes: + before_script: + - . "./share/spack/setup-env.sh" + - spack --version + image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } + tags: ["spack", "public", "x86_64_v4"] + + signing-job-attributes: + image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] } + tags: ["spack", "aws"] + script: + - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp + - /sign.sh + - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache + + cdash: + build-group: Machine Learning + url: https://cdash.spack.io + project: Spack Testing + site: Cloud Gitlab Infrastructure diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-cuda/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-cuda/spack.yaml new file mode 100644 index 0000000000..eb37168665 --- /dev/null +++ b/share/spack/gitlab/cloud_pipelines/stacks/ml-cuda/spack.yaml @@ -0,0 +1,144 @@ +spack: + view: false + + concretizer: + reuse: false + unify: false + + config: + concretizer: clingo + install_tree: + root: /home/software/spack + padded_length: 384 + projections: + all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}" + + packages: + all: + compiler: [gcc@11.2.0] + target: [x86_64_v4] + variants: ~rocm+cuda cuda_arch=80 + llvm: + # https://github.com/spack/spack/issues/27999 + require: ~cuda + + specs: + # Horovod + - py-horovod + + # JAX + # https://github.com/google/jax/issues/12614 + # - py-jax + # - py-jaxlib + + # Keras + - py-keras + - py-keras-applications + - py-keras-preprocessing + - py-keras2onnx + + # PyTorch + - py-botorch + - py-efficientnet-pytorch + - py-gpytorch + - py-kornia + - py-pytorch-gradual-warmup-lr + - py-pytorch-lightning + - py-segmentation-models-pytorch + - py-timm + - py-torch + - py-torch-cluster + - py-torch-geometric + - py-torch-nvidia-apex + - py-torch-scatter + - py-torch-sparse + - py-torch-spline-conv + - py-torchaudio + - py-torchdata + - py-torchfile + - py-torchgeo + - py-torchmeta + - py-torchmetrics + - py-torchtext + - py-torchvision + - py-vector-quantize-pytorch + + # scikit-learn + - py-scikit-learn + - py-scikit-learn-extra + + # TensorBoard + - py-tensorboard + - py-tensorboard-data-server + - py-tensorboard-plugin-wit + - py-tensorboardx + + # TensorFlow + - py-tensorflow + - py-tensorflow-datasets + - py-tensorflow-estimator + - py-tensorflow-hub + - py-tensorflow-metadata + - py-tensorflow-probability + + # XGBoost + - py-xgboost + # - r-xgboost + - xgboost + + mirrors: { "mirror": "s3://spack-binaries/develop/ml-cuda" } + + gitlab-ci: + script: + - . "./share/spack/setup-env.sh" + - spack --version + - cd ${SPACK_CONCRETE_ENV_DIR} + - spack env activate --without-view . + - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'" + - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data + - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi + - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi + - spack -d ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + + mappings: + - match: + - llvm + runner-attributes: + tags: [ "spack", "huge", "x86_64_v4" ] + variables: + CI_JOB_SIZE: huge + KUBERNETES_CPU_REQUEST: 11000m + KUBERNETES_MEMORY_REQUEST: 42G + - match: + - "@:" + runner-attributes: + tags: [ "spack", "large", "x86_64_v4" ] + variables: + CI_JOB_SIZE: large + KUBERNETES_CPU_REQUEST: 8000m + KUBERNETES_MEMORY_REQUEST: 12G + + image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } + + broken-specs-url: "s3://spack-binaries/broken-specs" + + service-job-attributes: + before_script: + - . "./share/spack/setup-env.sh" + - spack --version + image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } + tags: ["spack", "public", "x86_64_v4"] + + signing-job-attributes: + image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] } + tags: ["spack", "aws"] + script: + - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp + - /sign.sh + - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache + + cdash: + build-group: Machine Learning + url: https://cdash.spack.io + project: Spack Testing + site: Cloud Gitlab Infrastructure diff --git a/share/spack/gitlab/cloud_pipelines/stacks/ml-rocm/spack.yaml b/share/spack/gitlab/cloud_pipelines/stacks/ml-rocm/spack.yaml new file mode 100644 index 0000000000..c437b170e4 --- /dev/null +++ b/share/spack/gitlab/cloud_pipelines/stacks/ml-rocm/spack.yaml @@ -0,0 +1,147 @@ +spack: + view: false + + concretizer: + reuse: false + unify: false + + config: + concretizer: clingo + install_tree: + root: /home/software/spack + padded_length: 384 + projections: + all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}" + + packages: + all: + compiler: [gcc@11.2.0] + target: [x86_64_v4] + variants: ~cuda+rocm amdgpu_target=gfx90a + gl: + require: "osmesa" + py-torch: + # Does not yet support Spack-installed ROCm + require: ~rocm + + specs: + # Horovod + - py-horovod + + # JAX + # https://github.com/google/jax/issues/12614 + # - py-jax + # - py-jaxlib + + # Keras + - py-keras + - py-keras-applications + - py-keras-preprocessing + - py-keras2onnx + + # PyTorch + # Does not yet support Spack-install ROCm + # - py-botorch + # - py-efficientnet-pytorch + # - py-gpytorch + # - py-kornia + # - py-pytorch-gradual-warmup-lr + # - py-pytorch-lightning + # - py-segmentation-models-pytorch + # - py-timm + # - py-torch + # - py-torch-cluster + # - py-torch-geometric + # - py-torch-nvidia-apex + # - py-torch-scatter + # - py-torch-sparse + # - py-torch-spline-conv + # - py-torchaudio + # - py-torchdata + # - py-torchfile + # - py-torchgeo + # - py-torchmeta + # - py-torchmetrics + # - py-torchtext + # - py-torchvision + # - py-vector-quantize-pytorch + + # scikit-learn + - py-scikit-learn + - py-scikit-learn-extra + + # TensorBoard + - py-tensorboard + - py-tensorboard-data-server + - py-tensorboard-plugin-wit + - py-tensorboardx + + # TensorFlow + - py-tensorflow + - py-tensorflow-datasets + - py-tensorflow-estimator + - py-tensorflow-hub + - py-tensorflow-metadata + - py-tensorflow-probability + + # XGBoost + - py-xgboost + # - r-xgboost + - xgboost + + mirrors: { "mirror": "s3://spack-binaries/develop/ml-rocm" } + + gitlab-ci: + script: + - . "./share/spack/setup-env.sh" + - spack --version + - cd ${SPACK_CONCRETE_ENV_DIR} + - spack env activate --without-view . + - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'" + - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data + - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi + - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi + - spack -d ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2) + + mappings: + - match: + - llvm + runner-attributes: + tags: [ "spack", "huge", "x86_64_v4" ] + variables: + CI_JOB_SIZE: huge + KUBERNETES_CPU_REQUEST: 11000m + KUBERNETES_MEMORY_REQUEST: 42G + - match: + - "@:" + runner-attributes: + tags: [ "spack", "large", "x86_64_v4" ] + variables: + CI_JOB_SIZE: large + KUBERNETES_CPU_REQUEST: 8000m + KUBERNETES_MEMORY_REQUEST: 12G + + image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } + + broken-specs-url: "s3://spack-binaries/broken-specs" + + service-job-attributes: + before_script: + - . "./share/spack/setup-env.sh" + - spack --version + image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] } + tags: ["spack", "public", "x86_64_v4"] + + signing-job-attributes: + image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] } + tags: ["spack", "aws"] + script: + - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp + - /sign.sh + - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache + + cdash: + build-group: Machine Learning + url: https://cdash.spack.io + project: Spack Testing + site: Cloud Gitlab Infrastructure |