From b71661eaa6792e12e9a50ebfb58b5123b3aba8e9 Mon Sep 17 00:00:00 2001 From: Cory Bloor Date: Mon, 11 Apr 2022 07:08:28 -0600 Subject: rocmlibs: cleanup amdgpu_target variant behaviour (#28907) * Use auto_or_any_combination_of for AMD GPU architecture variant * Make consistent rocmlibs amdgpu_target variants * Propagate amdgpu_target variant * Fix amdgpu_target=auto * Fix amdgpu_target=auto setting HCC_AMDGPU_TARGET * Tighten HIP CMake requirements Co-authored-by: Teodor Nikolov --- var/spack/repos/builtin/packages/hip/package.py | 9 +-- var/spack/repos/builtin/packages/rccl/package.py | 8 +++ .../repos/builtin/packages/rocalution/package.py | 24 +++++-- .../repos/builtin/packages/rocblas/package.py | 76 ++++++++++------------ var/spack/repos/builtin/packages/rocfft/package.py | 24 +++---- .../repos/builtin/packages/rocprim/package.py | 8 +++ .../repos/builtin/packages/rocrand/package.py | 8 +++ .../repos/builtin/packages/rocsolver/package.py | 21 +++--- .../repos/builtin/packages/rocsparse/package.py | 15 ++++- 9 files changed, 123 insertions(+), 70 deletions(-) diff --git a/var/spack/repos/builtin/packages/hip/package.py b/var/spack/repos/builtin/packages/hip/package.py index dec070e97e..432efa5bb9 100644 --- a/var/spack/repos/builtin/packages/hip/package.py +++ b/var/spack/repos/builtin/packages/hip/package.py @@ -37,7 +37,8 @@ class Hip(CMakePackage): variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type') - depends_on('cmake@3:', type='build') + depends_on('cmake@3.16.8:', type='build', when='@4.5.0:') + depends_on('cmake@3.4.3:', type='build') depends_on('perl@5.10:', type=('build', 'run')) depends_on('gl@4.5:') @@ -251,9 +252,9 @@ class Hip(CMakePackage): self.set_variables(env) if 'amdgpu_target' in dependent_spec.variants: - arch = dependent_spec.variants['amdgpu_target'].value - if arch != 'none': - env.set('HCC_AMDGPU_TARGET', ','.join(arch)) + arch = dependent_spec.variants['amdgpu_target'] + if 'none' not in arch and 'auto' not in arch: + env.set('HCC_AMDGPU_TARGET', ','.join(arch.value)) def setup_dependent_run_environment(self, env, dependent_spec): self.setup_dependent_build_environment(env, dependent_spec) diff --git a/var/spack/repos/builtin/packages/rccl/package.py b/var/spack/repos/builtin/packages/rccl/package.py index 51c23b8143..f52362b2ce 100644 --- a/var/spack/repos/builtin/packages/rccl/package.py +++ b/var/spack/repos/builtin/packages/rccl/package.py @@ -36,6 +36,11 @@ class Rccl(CMakePackage): version('3.7.0', sha256='8273878ff71aac2e7adf5cc8562d2933034c6c6b3652f88fbe3cd4f2691036e3', deprecated=True) version('3.5.0', sha256='290b57a66758dce47d0bfff3f5f8317df24764e858af67f60ddcdcadb9337253', deprecated=True) + amdgpu_targets = ('gfx803', 'gfx900:xnack-', 'gfx906:xnack-', + 'gfx908:xnack-', 'gfx90a:xnack-', 'gfx90a:xnack+', + 'gfx1030') + + variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets)) variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type') patch('0001-Fix-numactl-path-issue.patch', when='@3.7.0:4.3.2') @@ -79,6 +84,9 @@ class Rccl(CMakePackage): self.spec['numactl'].prefix )) + if 'auto' not in self.spec.variants['amdgpu_target']: + args.append(self.define_from_variant('AMDGPU_TARGETS', 'amdgpu_target')) + if self.spec.satisfies('^cmake@3.21.0:3.21.2'): args.append(self.define('__skip_rocmclang', 'ON')) diff --git a/var/spack/repos/builtin/packages/rocalution/package.py b/var/spack/repos/builtin/packages/rocalution/package.py index 9e92c8d938..52976367c2 100644 --- a/var/spack/repos/builtin/packages/rocalution/package.py +++ b/var/spack/repos/builtin/packages/rocalution/package.py @@ -3,6 +3,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import itertools from spack import * @@ -36,6 +37,11 @@ class Rocalution(CMakePackage): version('3.7.0', sha256='4d6b20aaaac3bafb7ec084d684417bf578349203b0f9f54168f669e3ec5699f8', deprecated=True) version('3.5.0', sha256='be2f78c10c100d7fd9df5dd2403a44700219c2cbabaacf2ea50a6e2241df7bfe', deprecated=True) + amdgpu_targets = ('gfx803', 'gfx900:xnack-', 'gfx906:xnack-', + 'gfx908:xnack-', 'gfx90a:xnack-', 'gfx90a:xnack+', + 'gfx1030') + + variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets)) variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type') depends_on('cmake@3.5:', type='build') @@ -43,9 +49,14 @@ class Rocalution(CMakePackage): '4.2.0', '4.3.0', '4.3.1', '4.5.0', '4.5.2', '5.0.0', '5.0.2']: depends_on('hip@' + ver, when='@' + ver) - depends_on('rocblas@' + ver, when='@' + ver) - depends_on('rocprim@' + ver, when='@' + ver) - depends_on('rocsparse@' + ver, when='@' + ver) + for tgt in itertools.chain(['auto'], amdgpu_targets): + rocblas_tgt = tgt if tgt != 'gfx900:xnack-' else 'gfx900' + depends_on('rocblas@{0} amdgpu_target={1}'.format(ver, rocblas_tgt), + when='@{0} amdgpu_target={1}'.format(ver, tgt)) + depends_on('rocprim@{0} amdgpu_target={1}'.format(ver, tgt), + when='@{0} amdgpu_target={1}'.format(ver, tgt)) + depends_on('rocsparse@{0} amdgpu_target={1}'.format(ver, tgt), + when='@{0} amdgpu_target={1}'.format(ver, tgt)) depends_on('comgr@' + ver, when='@' + ver) depends_on('llvm-amdgpu@' + ver, type='build', when='@' + ver) depends_on('rocm-cmake@' + ver, type='build', when='@' + ver) @@ -53,7 +64,9 @@ class Rocalution(CMakePackage): for ver in ['3.9.0', '3.10.0', '4.0.0', '4.1.0', '4.2.0', '4.3.0', '4.3.1', '4.5.0', '4.5.2', '5.0.0', '5.0.2']: - depends_on('rocrand@' + ver, when='@' + ver) + for tgt in itertools.chain(['auto'], amdgpu_targets): + depends_on('rocrand@{0} amdgpu_target={1}'.format(ver, tgt), + when='@{0} amdgpu_target={1}'.format(ver, tgt)) def setup_build_environment(self, env): env.set('CXX', self.spec['hip'].hipcc) @@ -76,6 +89,9 @@ class Rocalution(CMakePackage): self.define('BUILD_CLIENTS_SAMPLES', 'OFF') ] + if 'auto' not in self.spec.variants['amdgpu_target']: + args.append(self.define_from_variant('AMDGPU_TARGETS', 'amdgpu_target')) + if self.spec.satisfies('^cmake@3.21.0:3.21.2'): args.append(self.define('__skip_rocmclang', 'ON')) diff --git a/var/spack/repos/builtin/packages/rocblas/package.py b/var/spack/repos/builtin/packages/rocblas/package.py index 2da655dc8f..04665b641c 100644 --- a/var/spack/repos/builtin/packages/rocblas/package.py +++ b/var/spack/repos/builtin/packages/rocblas/package.py @@ -31,27 +31,28 @@ class Rocblas(CMakePackage): version('3.7.0', sha256='9425db5f8e8b6f7fb172d09e2a360025b63a4e54414607709efc5acb28819642', deprecated=True) version('3.5.0', sha256='8560fabef7f13e8d67da997de2295399f6ec595edfd77e452978c140d5f936f0', deprecated=True) - tensile_architecture = ('all', 'gfx906', 'gfx908', 'gfx803', 'gfx900', - 'gfx906:xnack-', 'gfx908:xnack-', 'gfx90a:xnack+', - 'gfx90a:xnack-', 'gfx1010', 'gfx1011', - 'gfx1012', 'gfx1030') + amdgpu_targets = ('gfx906', 'gfx908', 'gfx803', 'gfx900', + 'gfx906:xnack-', 'gfx908:xnack-', 'gfx90a:xnack+', + 'gfx90a:xnack-', 'gfx1010', 'gfx1011', + 'gfx1012', 'gfx1030') - variant('tensile_architecture', default='all', values=tensile_architecture, multi=True) + variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets)) + variant('tensile', default=True, description='Use Tensile as a backend') variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type') # gfx906, gfx908,gfx803,gfx900 are valid for @:4.0.0 # gfx803,gfx900,gfx:xnack-,gfx908:xnack- are valid gpus for @4.1.0:4.2.0 # gfx803 till gfx1030 are valid gpus for @4.3.0: - conflicts('tensile_architecture=gfx906', when='@4.0.1:') - conflicts('tensile_architecture=gfx908', when='@4.0.1:') - conflicts('tensile_architecture=gfx906:xnack-', when='@:4.0.0') - conflicts('tensile_architecture=gfx908:xnack-', when='@:4.0.0') - conflicts('tensile_architecture=gfx90a:xnack+', when='@:4.2.1') - conflicts('tensile_architecture=gfx90a:xnack-', when='@:4.2.1') - conflicts('tensile_architecture=gfx1010', when='@:4.2.1') - conflicts('tensile_architecture=gfx1011', when='@:4.2.1') - conflicts('tensile_architecture=gfx1012', when='@:4.2.1') - conflicts('tensile_architecture=gfx1030', when='@:4.2.1') + conflicts('amdgpu_target=gfx906', when='@4.0.1:') + conflicts('amdgpu_target=gfx908', when='@4.0.1:') + conflicts('amdgpu_target=gfx906:xnack-', when='@:4.0.0') + conflicts('amdgpu_target=gfx908:xnack-', when='@:4.0.0') + conflicts('amdgpu_target=gfx90a:xnack+', when='@:4.2.1') + conflicts('amdgpu_target=gfx90a:xnack-', when='@:4.2.1') + conflicts('amdgpu_target=gfx1010', when='@:4.2.1') + conflicts('amdgpu_target=gfx1011', when='@:4.2.1') + conflicts('amdgpu_target=gfx1012', when='@:4.2.1') + conflicts('amdgpu_target=gfx1030', when='@:4.2.1') depends_on('cmake@3.16.8:', type='build', when='@4.2.0:') depends_on('cmake@3.8:', type='build', when='@3.9.0:') @@ -109,7 +110,7 @@ class Rocblas(CMakePackage): resource(name='Tensile', git='https://github.com/ROCmSoftwarePlatform/Tensile.git', commit=t_commit, - when=t_version) + when='{} +tensile'.format(t_version)) # Status: https://github.com/ROCmSoftwarePlatform/Tensile/commit/a488f7dadba34f84b9658ba92ce9ec5a0615a087 # Not yet landed in 3.7.0, nor 3.8.0. @@ -120,45 +121,36 @@ class Rocblas(CMakePackage): def setup_build_environment(self, env): env.set('CXX', self.spec['hip'].hipcc) - def get_gpulist_for_tensile_support(self): - arch = self.spec.variants['tensile_architecture'].value - if arch[0] == 'all': - if self.spec.satisfies('@:4.2.1'): - arch_value = self.tensile_architecture[0] - elif self.spec.satisfies('@4.3.0:'): - arch_value = self.tensile_architecture[3:] - return arch_value - else: - return arch - def cmake_args(self): - tensile = join_path(self.stage.source_path, 'Tensile') args = [ self.define('BUILD_CLIENTS_TESTS', self.run_tests and '@4.2.0:' in self.spec), self.define('BUILD_CLIENTS_BENCHMARKS', 'OFF'), self.define('BUILD_CLIENTS_SAMPLES', 'OFF'), self.define('RUN_HEADER_TESTING', 'OFF'), - self.define('BUILD_WITH_TENSILE', 'ON'), - self.define('Tensile_TEST_LOCAL_PATH', tensile), - self.define('Tensile_COMPILER', 'hipcc'), - self.define('Tensile_LOGIC', 'asm_full'), - self.define('Tensile_CODE_OBJECT_VERSION', 'V3'), - self.define('BUILD_WITH_TENSILE_HOST', '@3.7.0:' in self.spec) + self.define_from_variant('BUILD_WITH_TENSILE', 'tensile'), ] if self.run_tests: args.append(self.define('LINK_BLIS', 'OFF')) - if '@3.7.0:' in self.spec: - args.append(self.define('Tensile_LIBRARY_FORMAT', 'msgpack')) + arch_define_name = 'AMDGPU_TARGETS' + if '+tensile' in self.spec: + tensile_path = join_path(self.stage.source_path, 'Tensile') + args += [ + self.define('Tensile_TEST_LOCAL_PATH', tensile_path), + self.define('Tensile_COMPILER', 'hipcc'), + self.define('Tensile_LOGIC', 'asm_full'), + self.define('Tensile_CODE_OBJECT_VERSION', 'V3'), + self.define('BUILD_WITH_TENSILE_HOST', '@3.7.0:' in self.spec) + ] + if self.spec.satisfies('@3.7.0:'): + args.append(self.define('Tensile_LIBRARY_FORMAT', 'msgpack')) + if self.spec.satisfies('@:4.2.0'): + arch_define_name = 'Tensile_ARCHITECTURE' # See https://github.com/ROCmSoftwarePlatform/rocBLAS/commit/c1895ba4bb3f4f5947f3818ebd155cf71a27b634 - if self.spec.satisfies('@:4.2.0'): - args.append(self.define('Tensile_ARCHITECTURE', - self.get_gpulist_for_tensile_support())) - else: - args.append(self.define('AMDGPU_TARGETS', - self.get_gpulist_for_tensile_support())) + if 'auto' not in self.spec.variants['amdgpu_target']: + args.append(self.define_from_variant(arch_define_name, 'amdgpu_target')) # See https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1196 if self.spec.satisfies('^cmake@3.21.0:3.21.2'): diff --git a/var/spack/repos/builtin/packages/rocfft/package.py b/var/spack/repos/builtin/packages/rocfft/package.py index 75fcc68387..a8f1448ff8 100644 --- a/var/spack/repos/builtin/packages/rocfft/package.py +++ b/var/spack/repos/builtin/packages/rocfft/package.py @@ -32,14 +32,14 @@ class Rocfft(CMakePackage): version('3.5.0', sha256='629f02cfecb7de5ad2517b6a8aac6ed4de60d3a9c620413c4d9db46081ac2c88', deprecated=True) amdgpu_targets = ( - 'none', 'gfx701', 'gfx801', 'gfx802', 'gfx803', + 'gfx701', 'gfx801', 'gfx802', 'gfx803', 'gfx900', 'gfx906', 'gfx908', 'gfx1010', 'gfx1011', 'gfx1012' ) variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type') - variant('amdgpu_target', default='gfx701', multi=True, values=amdgpu_targets) - variant('amdgpu_target_sram_ecc', default='none', multi=True, values=amdgpu_targets) + variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets)) + variant('amdgpu_target_sram_ecc', values=auto_or_any_combination_of(*amdgpu_targets)) depends_on('cmake@3:', type='build') depends_on('python@3:', type='build', when='@5.0.0:') @@ -58,20 +58,22 @@ class Rocfft(CMakePackage): def cmake_args(self): args = [] - tgt = self.spec.variants['amdgpu_target'].value + tgt = self.spec.variants['amdgpu_target'] - if tgt[0] != 'none': + if 'auto' not in tgt: if '@:3.8.0' in self.spec: - args.append(self.define('CMAKE_CXX_FLAGS', - '--amdgpu-target={0}'.format(",".join(tgt)))) + args.append(self.define( + 'CMAKE_CXX_FLAGS', + '--amdgpu-target={0}'.format(",".join(tgt.value)))) else: - args.append(self.define('AMDGPU_TARGETS', ";".join(tgt))) + args.append(self.define_from_variant('AMDGPU_TARGETS', 'amdgpu_target')) # From version 3.9 and above we have AMDGPU_TARGETS_SRAM_ECC - tgt_sram = self.spec.variants['amdgpu_target_sram_ecc'].value + tgt_sram = self.spec.variants['amdgpu_target_sram_ecc'] - if tgt_sram[0] != 'none' and self.spec.satisfies('@3.9.0:4.0.0'): - args.append(self.define('AMDGPU_TARGETS_SRAM_ECC', ";".join(tgt_sram))) + if 'auto' not in tgt_sram and self.spec.satisfies('@3.9.0:4.0.0'): + args.append(self.define_from_variant( + 'AMDGPU_TARGETS_SRAM_ECC', 'amdgpu_target_sram_ecc')) # See https://github.com/ROCmSoftwarePlatform/rocFFT/issues/322 if self.spec.satisfies('^cmake@3.21.0:3.21.2'): diff --git a/var/spack/repos/builtin/packages/rocprim/package.py b/var/spack/repos/builtin/packages/rocprim/package.py index 25abc29cfb..3c4674463b 100644 --- a/var/spack/repos/builtin/packages/rocprim/package.py +++ b/var/spack/repos/builtin/packages/rocprim/package.py @@ -30,6 +30,11 @@ class Rocprim(CMakePackage): version('3.7.0', sha256='225209a0cbd003c241821c8a9192cec5c07c7f1a6ab7da296305fc69f5f6d365', deprecated=True) version('3.5.0', sha256='29302dbeb27ae88632aa1be43a721f03e7e597c329602f9ca9c9c530c1def40d', deprecated=True) + amdgpu_targets = ('gfx803', 'gfx900:xnack-', 'gfx906:xnack-', + 'gfx908:xnack-', 'gfx90a:xnack-', 'gfx90a:xnack+', + 'gfx1030') + + variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets)) variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type') depends_on('cmake@3:', type='build') @@ -56,6 +61,9 @@ class Rocprim(CMakePackage): self.define('BUILD_EXAMPLE', 'OFF') ] + if 'auto' not in self.spec.variants['amdgpu_target']: + args.append(self.define_from_variant('AMDGPU_TARGETS', 'amdgpu_target')) + if self.spec.satisfies('^cmake@3.21.0:3.21.2'): args.append(self.define('__skip_rocmclang', 'ON')) diff --git a/var/spack/repos/builtin/packages/rocrand/package.py b/var/spack/repos/builtin/packages/rocrand/package.py index 9b0ef65cde..5d24fd387a 100644 --- a/var/spack/repos/builtin/packages/rocrand/package.py +++ b/var/spack/repos/builtin/packages/rocrand/package.py @@ -34,6 +34,11 @@ class Rocrand(CMakePackage): version('3.7.0', sha256='5e43fe07afe2c7327a692b3b580875bae6e6ee790e044c053fffafbfcbc14860', deprecated=True) version('3.5.0', sha256='592865a45e7ef55ad9d7eddc8082df69eacfd2c1f3e9c57810eb336b15cd5732', deprecated=True) + amdgpu_targets = ('gfx803', 'gfx900:xnack-', 'gfx906:xnack-', 'gfx908:xnack-', + 'gfx90a:xnack-', 'gfx90a:xnack+', + 'gfx1030') + + variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets)) variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type') depends_on('cmake@3.10.2:', type='build', when='@4.5.0:') @@ -95,6 +100,9 @@ class Rocrand(CMakePackage): self.define('BUILD_TEST', self.run_tests) ] + if 'auto' not in self.spec.variants['amdgpu_target']: + args.append(self.define_from_variant('AMDGPU_TARGETS', 'amdgpu_target')) + if self.spec.satisfies('^cmake@3.21.0:3.21.2'): args.append(self.define('__skip_rocmclang', 'ON')) diff --git a/var/spack/repos/builtin/packages/rocsolver/package.py b/var/spack/repos/builtin/packages/rocsolver/package.py index d056751069..9ddc8b3454 100644 --- a/var/spack/repos/builtin/packages/rocsolver/package.py +++ b/var/spack/repos/builtin/packages/rocsolver/package.py @@ -3,6 +3,8 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import itertools + from spack import * @@ -17,10 +19,10 @@ class Rocsolver(CMakePackage): maintainers = ['srekolam', 'arjun-raj-kuppala', 'haampie'] amdgpu_targets = ( - 'none', 'gfx803', 'gfx900', 'gfx906:xnack-', 'gfx908:xnack-', + 'gfx803', 'gfx900', 'gfx906:xnack-', 'gfx908:xnack-', 'gfx90a:xnack-', 'gfx90a:xnack+', 'gfx1010', 'gfx1011', 'gfx1012', 'gfx1030' ) - variant('amdgpu_target', default='gfx906:xnack-', multi=True, values=amdgpu_targets) + variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets)) variant('optimal', default=True, description='This option improves performance at the cost of increased binary \ size and compile time by adding specialized kernels \ @@ -60,11 +62,12 @@ class Rocsolver(CMakePackage): '4.2.0', '4.3.0', '4.3.1', '4.5.0', '4.5.2', '5.0.0', '5.0.2']: depends_on('hip@' + ver, when='@' + ver) - depends_on('rocblas@' + ver, when='@' + ver) + for tgt in itertools.chain(['auto'], amdgpu_targets): + depends_on('rocblas@{0} amdgpu_target={1}'.format(ver, tgt), + when='@{0} amdgpu_target={1}'.format(ver, tgt)) depends_on('rocm-cmake@' + ver, type='build', when='@' + ver) def cmake_args(self): - tgt = self.spec.variants['amdgpu_target'].value args = [ self.define('BUILD_CLIENTS_SAMPLES', 'OFF'), self.define('BUILD_CLIENTS_TESTS', self.run_tests), @@ -80,12 +83,14 @@ class Rocsolver(CMakePackage): if self.spec.satisfies('@3.7.0:'): args.append(self.define_from_variant('OPTIMAL', 'optimal')) - if tgt[0] != 'none': + tgt = self.spec.variants['amdgpu_target'] + if 'auto' not in tgt: if '@:3.8.0' in self.spec: - args.append(self.define('CMAKE_CXX_FLAGS', - '--amdgpu-target={0}'.format(",".join(tgt)))) + args.append(self.define( + 'CMAKE_CXX_FLAGS', + '--amdgpu-target={0}'.format(",".join(tgt.value)))) else: - args.append(self.define('AMDGPU_TARGETS', ";".join(tgt))) + args.append(self.define_from_variant('AMDGPU_TARGETS', 'amdgpu_target')) if self.spec.satisfies('^cmake@3.21.0:3.21.2'): args.append(self.define('__skip_rocmclang', 'ON')) diff --git a/var/spack/repos/builtin/packages/rocsparse/package.py b/var/spack/repos/builtin/packages/rocsparse/package.py index 106aca0809..f9c56551b4 100644 --- a/var/spack/repos/builtin/packages/rocsparse/package.py +++ b/var/spack/repos/builtin/packages/rocsparse/package.py @@ -3,6 +3,8 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import itertools + from spack import * @@ -19,6 +21,11 @@ class Rocsparse(CMakePackage): maintainers = ['srekolam', 'arjun-raj-kuppala'] + amdgpu_targets = ('gfx803', 'gfx900:xnack-', 'gfx906:xnack-', 'gfx908:xnack-', + 'gfx90a:xnack-', 'gfx90a:xnack+', + 'gfx1030') + + variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets)) variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type') version('5.0.2', sha256='c9d9e1b7859e1c5aa5050f5dfdf86245cbd7c1296c0ce60d9ca5f3e22a9b748b') @@ -41,7 +48,9 @@ class Rocsparse(CMakePackage): for ver in ['3.5.0', '3.7.0', '3.8.0', '3.9.0', '3.10.0', '4.0.0', '4.1.0', '4.2.0', '4.3.0', '4.3.1', '4.5.0', '4.5.2', '5.0.0', '5.0.2']: depends_on('hip@' + ver, when='@' + ver) - depends_on('rocprim@' + ver, when='@' + ver) + for tgt in itertools.chain(['auto'], amdgpu_targets): + depends_on('rocprim@{0} amdgpu_target={1}'.format(ver, tgt), + when='@{0} amdgpu_target={1}'.format(ver, tgt)) depends_on('rocm-cmake@' + ver, type='build', when='@' + ver) def setup_build_environment(self, env): @@ -53,6 +62,10 @@ class Rocsparse(CMakePackage): self.define('BUILD_CLIENTS_TESTS', 'OFF'), self.define('BUILD_CLIENTS_BENCHMARKS', 'OFF') ] + + if 'auto' not in self.spec.variants['amdgpu_target']: + args.append(self.define_from_variant('AMDGPU_TARGETS', 'amdgpu_target')) + if self.spec.satisfies('^cmake@3.21.0:3.21.2'): args.append(self.define('__skip_rocmclang', 'ON')) -- cgit v1.2.3-60-g2f50