summaryrefslogtreecommitdiff
path: root/share/spack/gitlab/cloud_pipelines/stacks/ml-rocm/spack.yaml
blob: f1c5fa39567bc50773111aed9124c78c5bd47e8a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
spack:
  view: false

  concretizer:
    reuse: false
    unify: false

  config:
    concretizer: clingo
    install_tree:
      root: /home/software/spack
      padded_length: 384
      projections:
        all: "{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}"

  packages:
    all:
      compiler: [gcc@11.2.0]
      target: [x86_64_v3]
      variants: ~cuda+rocm amdgpu_target=gfx90a
    gl:
      require: "osmesa"
    py-torch:
      # Does not yet support Spack-installed ROCm
      require: ~rocm

  specs:
    # Horovod
    - py-horovod

    # Hugging Face
    - py-transformers

    # JAX
    # https://github.com/google/jax/issues/12614
    # - py-jax
    # - py-jaxlib

    # Keras
    - py-keras
    - py-keras-applications
    - py-keras-preprocessing
    - py-keras2onnx

    # PyTorch
    # Does not yet support Spack-install ROCm
    # - py-botorch
    # - py-efficientnet-pytorch
    # - py-gpytorch
    # - py-kornia
    # - py-pytorch-gradual-warmup-lr
    # - py-pytorch-lightning
    # - py-segmentation-models-pytorch
    # - py-timm
    # - py-torch
    # - py-torch-cluster
    # - py-torch-geometric
    # - py-torch-nvidia-apex
    # - py-torch-scatter
    # - py-torch-sparse
    # - py-torch-spline-conv
    # - py-torchaudio
    # - py-torchdata
    # - py-torchfile
    # - py-torchgeo
    # - py-torchmeta
    # - py-torchmetrics
    # - py-torchtext
    # - py-torchvision
    # - py-vector-quantize-pytorch

    # scikit-learn
    - py-scikit-learn
    - py-scikit-learn-extra

    # TensorBoard
    - py-tensorboard
    - py-tensorboard-data-server
    - py-tensorboard-plugin-wit
    - py-tensorboardx

    # TensorFlow
    - py-tensorflow
    - py-tensorflow-datasets
    - py-tensorflow-estimator
    - py-tensorflow-hub
    - py-tensorflow-metadata
    - py-tensorflow-probability

    # XGBoost
    - py-xgboost
    # - r-xgboost
    - xgboost

  mirrors: { "mirror": "s3://spack-binaries/develop/ml-rocm" }

  gitlab-ci:
    script:
      - uname -a || true
      - grep -E 'vendor|model name' /proc/cpuinfo 2>/dev/null | sort -u || head -n10 /proc/cpuinfo 2>/dev/null || true
      - curl -Lfs 'https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz' -o gmake.tar.gz
      - printf 'fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz' | sha256sum --check --strict --quiet
      - tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null
      - . "./share/spack/setup-env.sh"
      - spack --version
      - spack arch
      - cd ${SPACK_CONCRETE_ENV_DIR}
      - spack env activate --without-view .
      - spack config add "config:install_tree:projections:${SPACK_JOB_SPEC_PKG_NAME}:'morepadding/{architecture}/{compiler.name}-{compiler.version}/{name}-{version}-{hash}'"
      - mkdir -p ${SPACK_ARTIFACTS_ROOT}/user_data
      - if [[ -r /mnt/key/intermediate_ci_signing_key.gpg ]]; then spack gpg trust /mnt/key/intermediate_ci_signing_key.gpg; fi
      - if [[ -r /mnt/key/spack_public_key.gpg ]]; then spack gpg trust /mnt/key/spack_public_key.gpg; fi
      - spack --color=always --backtrace ci rebuild > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)

    match_behavior: first
    mappings:
      - match:
          - llvm
        runner-attributes:
          tags: [ "spack", "huge", "x86_64_v4" ]
          variables:
            CI_JOB_SIZE: huge
            KUBERNETES_CPU_REQUEST: 11000m
            KUBERNETES_MEMORY_REQUEST: 42G
      - match:
          - "@:"
        runner-attributes:
          tags: [ "spack", "large", "x86_64_v4" ]
          variables:
            CI_JOB_SIZE: large
            KUBERNETES_CPU_REQUEST: 8000m
            KUBERNETES_MEMORY_REQUEST: 12G

    image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }

    broken-specs-url: "s3://spack-binaries/broken-specs"

    service-job-attributes:
      before_script:
        - . "./share/spack/setup-env.sh"
        - spack --version
      image: { "name": "ghcr.io/spack/e4s-amazonlinux-2:v2022-03-21", "entrypoint": [""] }
      tags: ["spack", "public", "x86_64_v4"]

    signing-job-attributes:
      image: { "name": "ghcr.io/spack/notary:latest", "entrypoint": [""] }
      tags: ["spack", "aws"]
      script:
        - aws s3 sync --exclude "*" --include "*spec.json*" ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache /tmp
        - /sign.sh
        - aws s3 sync --exclude "*" --include "*spec.json.sig*" /tmp ${SPACK_REMOTE_MIRROR_OVERRIDE}/build_cache

  cdash:
    build-group: Machine Learning
    url: https://cdash.spack.io
    project: Spack Testing
    site: Cloud Gitlab Infrastructure