summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCory Bloor <Cordell.Bloor@amd.com>2022-04-11 07:08:28 -0600
committerGitHub <noreply@github.com>2022-04-11 15:08:28 +0200
commitb71661eaa6792e12e9a50ebfb58b5123b3aba8e9 (patch)
tree8ccb12be90cb57d01bfe99ad741ebcbef6eb629f
parentf110d0848a550ef4932132094d2cc626552b452b (diff)
downloadspack-b71661eaa6792e12e9a50ebfb58b5123b3aba8e9.tar.gz
spack-b71661eaa6792e12e9a50ebfb58b5123b3aba8e9.tar.bz2
spack-b71661eaa6792e12e9a50ebfb58b5123b3aba8e9.tar.xz
spack-b71661eaa6792e12e9a50ebfb58b5123b3aba8e9.zip
rocmlibs: cleanup amdgpu_target variant behaviour (#28907)
* Use auto_or_any_combination_of for AMD GPU architecture variant * Make consistent rocmlibs amdgpu_target variants * Propagate amdgpu_target variant * Fix amdgpu_target=auto * Fix amdgpu_target=auto setting HCC_AMDGPU_TARGET * Tighten HIP CMake requirements Co-authored-by: Teodor Nikolov <teodor.nikolov22@gmail.com>
-rw-r--r--var/spack/repos/builtin/packages/hip/package.py9
-rw-r--r--var/spack/repos/builtin/packages/rccl/package.py8
-rw-r--r--var/spack/repos/builtin/packages/rocalution/package.py24
-rw-r--r--var/spack/repos/builtin/packages/rocblas/package.py76
-rw-r--r--var/spack/repos/builtin/packages/rocfft/package.py24
-rw-r--r--var/spack/repos/builtin/packages/rocprim/package.py8
-rw-r--r--var/spack/repos/builtin/packages/rocrand/package.py8
-rw-r--r--var/spack/repos/builtin/packages/rocsolver/package.py21
-rw-r--r--var/spack/repos/builtin/packages/rocsparse/package.py15
9 files changed, 123 insertions, 70 deletions
diff --git a/var/spack/repos/builtin/packages/hip/package.py b/var/spack/repos/builtin/packages/hip/package.py
index dec070e97e..432efa5bb9 100644
--- a/var/spack/repos/builtin/packages/hip/package.py
+++ b/var/spack/repos/builtin/packages/hip/package.py
@@ -37,7 +37,8 @@ class Hip(CMakePackage):
variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type')
- depends_on('cmake@3:', type='build')
+ depends_on('cmake@3.16.8:', type='build', when='@4.5.0:')
+ depends_on('cmake@3.4.3:', type='build')
depends_on('perl@5.10:', type=('build', 'run'))
depends_on('gl@4.5:')
@@ -251,9 +252,9 @@ class Hip(CMakePackage):
self.set_variables(env)
if 'amdgpu_target' in dependent_spec.variants:
- arch = dependent_spec.variants['amdgpu_target'].value
- if arch != 'none':
- env.set('HCC_AMDGPU_TARGET', ','.join(arch))
+ arch = dependent_spec.variants['amdgpu_target']
+ if 'none' not in arch and 'auto' not in arch:
+ env.set('HCC_AMDGPU_TARGET', ','.join(arch.value))
def setup_dependent_run_environment(self, env, dependent_spec):
self.setup_dependent_build_environment(env, dependent_spec)
diff --git a/var/spack/repos/builtin/packages/rccl/package.py b/var/spack/repos/builtin/packages/rccl/package.py
index 51c23b8143..f52362b2ce 100644
--- a/var/spack/repos/builtin/packages/rccl/package.py
+++ b/var/spack/repos/builtin/packages/rccl/package.py
@@ -36,6 +36,11 @@ class Rccl(CMakePackage):
version('3.7.0', sha256='8273878ff71aac2e7adf5cc8562d2933034c6c6b3652f88fbe3cd4f2691036e3', deprecated=True)
version('3.5.0', sha256='290b57a66758dce47d0bfff3f5f8317df24764e858af67f60ddcdcadb9337253', deprecated=True)
+ amdgpu_targets = ('gfx803', 'gfx900:xnack-', 'gfx906:xnack-',
+ 'gfx908:xnack-', 'gfx90a:xnack-', 'gfx90a:xnack+',
+ 'gfx1030')
+
+ variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets))
variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type')
patch('0001-Fix-numactl-path-issue.patch', when='@3.7.0:4.3.2')
@@ -79,6 +84,9 @@ class Rccl(CMakePackage):
self.spec['numactl'].prefix
))
+ if 'auto' not in self.spec.variants['amdgpu_target']:
+ args.append(self.define_from_variant('AMDGPU_TARGETS', 'amdgpu_target'))
+
if self.spec.satisfies('^cmake@3.21.0:3.21.2'):
args.append(self.define('__skip_rocmclang', 'ON'))
diff --git a/var/spack/repos/builtin/packages/rocalution/package.py b/var/spack/repos/builtin/packages/rocalution/package.py
index 9e92c8d938..52976367c2 100644
--- a/var/spack/repos/builtin/packages/rocalution/package.py
+++ b/var/spack/repos/builtin/packages/rocalution/package.py
@@ -3,6 +3,7 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+import itertools
from spack import *
@@ -36,6 +37,11 @@ class Rocalution(CMakePackage):
version('3.7.0', sha256='4d6b20aaaac3bafb7ec084d684417bf578349203b0f9f54168f669e3ec5699f8', deprecated=True)
version('3.5.0', sha256='be2f78c10c100d7fd9df5dd2403a44700219c2cbabaacf2ea50a6e2241df7bfe', deprecated=True)
+ amdgpu_targets = ('gfx803', 'gfx900:xnack-', 'gfx906:xnack-',
+ 'gfx908:xnack-', 'gfx90a:xnack-', 'gfx90a:xnack+',
+ 'gfx1030')
+
+ variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets))
variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type')
depends_on('cmake@3.5:', type='build')
@@ -43,9 +49,14 @@ class Rocalution(CMakePackage):
'4.2.0', '4.3.0', '4.3.1', '4.5.0', '4.5.2', '5.0.0',
'5.0.2']:
depends_on('hip@' + ver, when='@' + ver)
- depends_on('rocblas@' + ver, when='@' + ver)
- depends_on('rocprim@' + ver, when='@' + ver)
- depends_on('rocsparse@' + ver, when='@' + ver)
+ for tgt in itertools.chain(['auto'], amdgpu_targets):
+ rocblas_tgt = tgt if tgt != 'gfx900:xnack-' else 'gfx900'
+ depends_on('rocblas@{0} amdgpu_target={1}'.format(ver, rocblas_tgt),
+ when='@{0} amdgpu_target={1}'.format(ver, tgt))
+ depends_on('rocprim@{0} amdgpu_target={1}'.format(ver, tgt),
+ when='@{0} amdgpu_target={1}'.format(ver, tgt))
+ depends_on('rocsparse@{0} amdgpu_target={1}'.format(ver, tgt),
+ when='@{0} amdgpu_target={1}'.format(ver, tgt))
depends_on('comgr@' + ver, when='@' + ver)
depends_on('llvm-amdgpu@' + ver, type='build', when='@' + ver)
depends_on('rocm-cmake@' + ver, type='build', when='@' + ver)
@@ -53,7 +64,9 @@ class Rocalution(CMakePackage):
for ver in ['3.9.0', '3.10.0', '4.0.0', '4.1.0', '4.2.0',
'4.3.0', '4.3.1', '4.5.0', '4.5.2', '5.0.0',
'5.0.2']:
- depends_on('rocrand@' + ver, when='@' + ver)
+ for tgt in itertools.chain(['auto'], amdgpu_targets):
+ depends_on('rocrand@{0} amdgpu_target={1}'.format(ver, tgt),
+ when='@{0} amdgpu_target={1}'.format(ver, tgt))
def setup_build_environment(self, env):
env.set('CXX', self.spec['hip'].hipcc)
@@ -76,6 +89,9 @@ class Rocalution(CMakePackage):
self.define('BUILD_CLIENTS_SAMPLES', 'OFF')
]
+ if 'auto' not in self.spec.variants['amdgpu_target']:
+ args.append(self.define_from_variant('AMDGPU_TARGETS', 'amdgpu_target'))
+
if self.spec.satisfies('^cmake@3.21.0:3.21.2'):
args.append(self.define('__skip_rocmclang', 'ON'))
diff --git a/var/spack/repos/builtin/packages/rocblas/package.py b/var/spack/repos/builtin/packages/rocblas/package.py
index 2da655dc8f..04665b641c 100644
--- a/var/spack/repos/builtin/packages/rocblas/package.py
+++ b/var/spack/repos/builtin/packages/rocblas/package.py
@@ -31,27 +31,28 @@ class Rocblas(CMakePackage):
version('3.7.0', sha256='9425db5f8e8b6f7fb172d09e2a360025b63a4e54414607709efc5acb28819642', deprecated=True)
version('3.5.0', sha256='8560fabef7f13e8d67da997de2295399f6ec595edfd77e452978c140d5f936f0', deprecated=True)
- tensile_architecture = ('all', 'gfx906', 'gfx908', 'gfx803', 'gfx900',
- 'gfx906:xnack-', 'gfx908:xnack-', 'gfx90a:xnack+',
- 'gfx90a:xnack-', 'gfx1010', 'gfx1011',
- 'gfx1012', 'gfx1030')
+ amdgpu_targets = ('gfx906', 'gfx908', 'gfx803', 'gfx900',
+ 'gfx906:xnack-', 'gfx908:xnack-', 'gfx90a:xnack+',
+ 'gfx90a:xnack-', 'gfx1010', 'gfx1011',
+ 'gfx1012', 'gfx1030')
- variant('tensile_architecture', default='all', values=tensile_architecture, multi=True)
+ variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets))
+ variant('tensile', default=True, description='Use Tensile as a backend')
variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type')
# gfx906, gfx908,gfx803,gfx900 are valid for @:4.0.0
# gfx803,gfx900,gfx:xnack-,gfx908:xnack- are valid gpus for @4.1.0:4.2.0
# gfx803 till gfx1030 are valid gpus for @4.3.0:
- conflicts('tensile_architecture=gfx906', when='@4.0.1:')
- conflicts('tensile_architecture=gfx908', when='@4.0.1:')
- conflicts('tensile_architecture=gfx906:xnack-', when='@:4.0.0')
- conflicts('tensile_architecture=gfx908:xnack-', when='@:4.0.0')
- conflicts('tensile_architecture=gfx90a:xnack+', when='@:4.2.1')
- conflicts('tensile_architecture=gfx90a:xnack-', when='@:4.2.1')
- conflicts('tensile_architecture=gfx1010', when='@:4.2.1')
- conflicts('tensile_architecture=gfx1011', when='@:4.2.1')
- conflicts('tensile_architecture=gfx1012', when='@:4.2.1')
- conflicts('tensile_architecture=gfx1030', when='@:4.2.1')
+ conflicts('amdgpu_target=gfx906', when='@4.0.1:')
+ conflicts('amdgpu_target=gfx908', when='@4.0.1:')
+ conflicts('amdgpu_target=gfx906:xnack-', when='@:4.0.0')
+ conflicts('amdgpu_target=gfx908:xnack-', when='@:4.0.0')
+ conflicts('amdgpu_target=gfx90a:xnack+', when='@:4.2.1')
+ conflicts('amdgpu_target=gfx90a:xnack-', when='@:4.2.1')
+ conflicts('amdgpu_target=gfx1010', when='@:4.2.1')
+ conflicts('amdgpu_target=gfx1011', when='@:4.2.1')
+ conflicts('amdgpu_target=gfx1012', when='@:4.2.1')
+ conflicts('amdgpu_target=gfx1030', when='@:4.2.1')
depends_on('cmake@3.16.8:', type='build', when='@4.2.0:')
depends_on('cmake@3.8:', type='build', when='@3.9.0:')
@@ -109,7 +110,7 @@ class Rocblas(CMakePackage):
resource(name='Tensile',
git='https://github.com/ROCmSoftwarePlatform/Tensile.git',
commit=t_commit,
- when=t_version)
+ when='{} +tensile'.format(t_version))
# Status: https://github.com/ROCmSoftwarePlatform/Tensile/commit/a488f7dadba34f84b9658ba92ce9ec5a0615a087
# Not yet landed in 3.7.0, nor 3.8.0.
@@ -120,45 +121,36 @@ class Rocblas(CMakePackage):
def setup_build_environment(self, env):
env.set('CXX', self.spec['hip'].hipcc)
- def get_gpulist_for_tensile_support(self):
- arch = self.spec.variants['tensile_architecture'].value
- if arch[0] == 'all':
- if self.spec.satisfies('@:4.2.1'):
- arch_value = self.tensile_architecture[0]
- elif self.spec.satisfies('@4.3.0:'):
- arch_value = self.tensile_architecture[3:]
- return arch_value
- else:
- return arch
-
def cmake_args(self):
- tensile = join_path(self.stage.source_path, 'Tensile')
args = [
self.define('BUILD_CLIENTS_TESTS',
self.run_tests and '@4.2.0:' in self.spec),
self.define('BUILD_CLIENTS_BENCHMARKS', 'OFF'),
self.define('BUILD_CLIENTS_SAMPLES', 'OFF'),
self.define('RUN_HEADER_TESTING', 'OFF'),
- self.define('BUILD_WITH_TENSILE', 'ON'),
- self.define('Tensile_TEST_LOCAL_PATH', tensile),
- self.define('Tensile_COMPILER', 'hipcc'),
- self.define('Tensile_LOGIC', 'asm_full'),
- self.define('Tensile_CODE_OBJECT_VERSION', 'V3'),
- self.define('BUILD_WITH_TENSILE_HOST', '@3.7.0:' in self.spec)
+ self.define_from_variant('BUILD_WITH_TENSILE', 'tensile'),
]
if self.run_tests:
args.append(self.define('LINK_BLIS', 'OFF'))
- if '@3.7.0:' in self.spec:
- args.append(self.define('Tensile_LIBRARY_FORMAT', 'msgpack'))
+ arch_define_name = 'AMDGPU_TARGETS'
+ if '+tensile' in self.spec:
+ tensile_path = join_path(self.stage.source_path, 'Tensile')
+ args += [
+ self.define('Tensile_TEST_LOCAL_PATH', tensile_path),
+ self.define('Tensile_COMPILER', 'hipcc'),
+ self.define('Tensile_LOGIC', 'asm_full'),
+ self.define('Tensile_CODE_OBJECT_VERSION', 'V3'),
+ self.define('BUILD_WITH_TENSILE_HOST', '@3.7.0:' in self.spec)
+ ]
+ if self.spec.satisfies('@3.7.0:'):
+ args.append(self.define('Tensile_LIBRARY_FORMAT', 'msgpack'))
+ if self.spec.satisfies('@:4.2.0'):
+ arch_define_name = 'Tensile_ARCHITECTURE'
# See https://github.com/ROCmSoftwarePlatform/rocBLAS/commit/c1895ba4bb3f4f5947f3818ebd155cf71a27b634
- if self.spec.satisfies('@:4.2.0'):
- args.append(self.define('Tensile_ARCHITECTURE',
- self.get_gpulist_for_tensile_support()))
- else:
- args.append(self.define('AMDGPU_TARGETS',
- self.get_gpulist_for_tensile_support()))
+ if 'auto' not in self.spec.variants['amdgpu_target']:
+ args.append(self.define_from_variant(arch_define_name, 'amdgpu_target'))
# See https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1196
if self.spec.satisfies('^cmake@3.21.0:3.21.2'):
diff --git a/var/spack/repos/builtin/packages/rocfft/package.py b/var/spack/repos/builtin/packages/rocfft/package.py
index 75fcc68387..a8f1448ff8 100644
--- a/var/spack/repos/builtin/packages/rocfft/package.py
+++ b/var/spack/repos/builtin/packages/rocfft/package.py
@@ -32,14 +32,14 @@ class Rocfft(CMakePackage):
version('3.5.0', sha256='629f02cfecb7de5ad2517b6a8aac6ed4de60d3a9c620413c4d9db46081ac2c88', deprecated=True)
amdgpu_targets = (
- 'none', 'gfx701', 'gfx801', 'gfx802', 'gfx803',
+ 'gfx701', 'gfx801', 'gfx802', 'gfx803',
'gfx900', 'gfx906', 'gfx908', 'gfx1010',
'gfx1011', 'gfx1012'
)
variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type')
- variant('amdgpu_target', default='gfx701', multi=True, values=amdgpu_targets)
- variant('amdgpu_target_sram_ecc', default='none', multi=True, values=amdgpu_targets)
+ variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets))
+ variant('amdgpu_target_sram_ecc', values=auto_or_any_combination_of(*amdgpu_targets))
depends_on('cmake@3:', type='build')
depends_on('python@3:', type='build', when='@5.0.0:')
@@ -58,20 +58,22 @@ class Rocfft(CMakePackage):
def cmake_args(self):
args = []
- tgt = self.spec.variants['amdgpu_target'].value
+ tgt = self.spec.variants['amdgpu_target']
- if tgt[0] != 'none':
+ if 'auto' not in tgt:
if '@:3.8.0' in self.spec:
- args.append(self.define('CMAKE_CXX_FLAGS',
- '--amdgpu-target={0}'.format(",".join(tgt))))
+ args.append(self.define(
+ 'CMAKE_CXX_FLAGS',
+ '--amdgpu-target={0}'.format(",".join(tgt.value))))
else:
- args.append(self.define('AMDGPU_TARGETS', ";".join(tgt)))
+ args.append(self.define_from_variant('AMDGPU_TARGETS', 'amdgpu_target'))
# From version 3.9 and above we have AMDGPU_TARGETS_SRAM_ECC
- tgt_sram = self.spec.variants['amdgpu_target_sram_ecc'].value
+ tgt_sram = self.spec.variants['amdgpu_target_sram_ecc']
- if tgt_sram[0] != 'none' and self.spec.satisfies('@3.9.0:4.0.0'):
- args.append(self.define('AMDGPU_TARGETS_SRAM_ECC', ";".join(tgt_sram)))
+ if 'auto' not in tgt_sram and self.spec.satisfies('@3.9.0:4.0.0'):
+ args.append(self.define_from_variant(
+ 'AMDGPU_TARGETS_SRAM_ECC', 'amdgpu_target_sram_ecc'))
# See https://github.com/ROCmSoftwarePlatform/rocFFT/issues/322
if self.spec.satisfies('^cmake@3.21.0:3.21.2'):
diff --git a/var/spack/repos/builtin/packages/rocprim/package.py b/var/spack/repos/builtin/packages/rocprim/package.py
index 25abc29cfb..3c4674463b 100644
--- a/var/spack/repos/builtin/packages/rocprim/package.py
+++ b/var/spack/repos/builtin/packages/rocprim/package.py
@@ -30,6 +30,11 @@ class Rocprim(CMakePackage):
version('3.7.0', sha256='225209a0cbd003c241821c8a9192cec5c07c7f1a6ab7da296305fc69f5f6d365', deprecated=True)
version('3.5.0', sha256='29302dbeb27ae88632aa1be43a721f03e7e597c329602f9ca9c9c530c1def40d', deprecated=True)
+ amdgpu_targets = ('gfx803', 'gfx900:xnack-', 'gfx906:xnack-',
+ 'gfx908:xnack-', 'gfx90a:xnack-', 'gfx90a:xnack+',
+ 'gfx1030')
+
+ variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets))
variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type')
depends_on('cmake@3:', type='build')
@@ -56,6 +61,9 @@ class Rocprim(CMakePackage):
self.define('BUILD_EXAMPLE', 'OFF')
]
+ if 'auto' not in self.spec.variants['amdgpu_target']:
+ args.append(self.define_from_variant('AMDGPU_TARGETS', 'amdgpu_target'))
+
if self.spec.satisfies('^cmake@3.21.0:3.21.2'):
args.append(self.define('__skip_rocmclang', 'ON'))
diff --git a/var/spack/repos/builtin/packages/rocrand/package.py b/var/spack/repos/builtin/packages/rocrand/package.py
index 9b0ef65cde..5d24fd387a 100644
--- a/var/spack/repos/builtin/packages/rocrand/package.py
+++ b/var/spack/repos/builtin/packages/rocrand/package.py
@@ -34,6 +34,11 @@ class Rocrand(CMakePackage):
version('3.7.0', sha256='5e43fe07afe2c7327a692b3b580875bae6e6ee790e044c053fffafbfcbc14860', deprecated=True)
version('3.5.0', sha256='592865a45e7ef55ad9d7eddc8082df69eacfd2c1f3e9c57810eb336b15cd5732', deprecated=True)
+ amdgpu_targets = ('gfx803', 'gfx900:xnack-', 'gfx906:xnack-', 'gfx908:xnack-',
+ 'gfx90a:xnack-', 'gfx90a:xnack+',
+ 'gfx1030')
+
+ variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets))
variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type')
depends_on('cmake@3.10.2:', type='build', when='@4.5.0:')
@@ -95,6 +100,9 @@ class Rocrand(CMakePackage):
self.define('BUILD_TEST', self.run_tests)
]
+ if 'auto' not in self.spec.variants['amdgpu_target']:
+ args.append(self.define_from_variant('AMDGPU_TARGETS', 'amdgpu_target'))
+
if self.spec.satisfies('^cmake@3.21.0:3.21.2'):
args.append(self.define('__skip_rocmclang', 'ON'))
diff --git a/var/spack/repos/builtin/packages/rocsolver/package.py b/var/spack/repos/builtin/packages/rocsolver/package.py
index d056751069..9ddc8b3454 100644
--- a/var/spack/repos/builtin/packages/rocsolver/package.py
+++ b/var/spack/repos/builtin/packages/rocsolver/package.py
@@ -3,6 +3,8 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+import itertools
+
from spack import *
@@ -17,10 +19,10 @@ class Rocsolver(CMakePackage):
maintainers = ['srekolam', 'arjun-raj-kuppala', 'haampie']
amdgpu_targets = (
- 'none', 'gfx803', 'gfx900', 'gfx906:xnack-', 'gfx908:xnack-',
+ 'gfx803', 'gfx900', 'gfx906:xnack-', 'gfx908:xnack-',
'gfx90a:xnack-', 'gfx90a:xnack+', 'gfx1010', 'gfx1011', 'gfx1012', 'gfx1030'
)
- variant('amdgpu_target', default='gfx906:xnack-', multi=True, values=amdgpu_targets)
+ variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets))
variant('optimal', default=True,
description='This option improves performance at the cost of increased binary \
size and compile time by adding specialized kernels \
@@ -60,11 +62,12 @@ class Rocsolver(CMakePackage):
'4.2.0', '4.3.0', '4.3.1', '4.5.0', '4.5.2', '5.0.0',
'5.0.2']:
depends_on('hip@' + ver, when='@' + ver)
- depends_on('rocblas@' + ver, when='@' + ver)
+ for tgt in itertools.chain(['auto'], amdgpu_targets):
+ depends_on('rocblas@{0} amdgpu_target={1}'.format(ver, tgt),
+ when='@{0} amdgpu_target={1}'.format(ver, tgt))
depends_on('rocm-cmake@' + ver, type='build', when='@' + ver)
def cmake_args(self):
- tgt = self.spec.variants['amdgpu_target'].value
args = [
self.define('BUILD_CLIENTS_SAMPLES', 'OFF'),
self.define('BUILD_CLIENTS_TESTS', self.run_tests),
@@ -80,12 +83,14 @@ class Rocsolver(CMakePackage):
if self.spec.satisfies('@3.7.0:'):
args.append(self.define_from_variant('OPTIMAL', 'optimal'))
- if tgt[0] != 'none':
+ tgt = self.spec.variants['amdgpu_target']
+ if 'auto' not in tgt:
if '@:3.8.0' in self.spec:
- args.append(self.define('CMAKE_CXX_FLAGS',
- '--amdgpu-target={0}'.format(",".join(tgt))))
+ args.append(self.define(
+ 'CMAKE_CXX_FLAGS',
+ '--amdgpu-target={0}'.format(",".join(tgt.value))))
else:
- args.append(self.define('AMDGPU_TARGETS', ";".join(tgt)))
+ args.append(self.define_from_variant('AMDGPU_TARGETS', 'amdgpu_target'))
if self.spec.satisfies('^cmake@3.21.0:3.21.2'):
args.append(self.define('__skip_rocmclang', 'ON'))
diff --git a/var/spack/repos/builtin/packages/rocsparse/package.py b/var/spack/repos/builtin/packages/rocsparse/package.py
index 106aca0809..f9c56551b4 100644
--- a/var/spack/repos/builtin/packages/rocsparse/package.py
+++ b/var/spack/repos/builtin/packages/rocsparse/package.py
@@ -3,6 +3,8 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+import itertools
+
from spack import *
@@ -19,6 +21,11 @@ class Rocsparse(CMakePackage):
maintainers = ['srekolam', 'arjun-raj-kuppala']
+ amdgpu_targets = ('gfx803', 'gfx900:xnack-', 'gfx906:xnack-', 'gfx908:xnack-',
+ 'gfx90a:xnack-', 'gfx90a:xnack+',
+ 'gfx1030')
+
+ variant('amdgpu_target', values=auto_or_any_combination_of(*amdgpu_targets))
variant('build_type', default='Release', values=("Release", "Debug", "RelWithDebInfo"), description='CMake build type')
version('5.0.2', sha256='c9d9e1b7859e1c5aa5050f5dfdf86245cbd7c1296c0ce60d9ca5f3e22a9b748b')
@@ -41,7 +48,9 @@ class Rocsparse(CMakePackage):
for ver in ['3.5.0', '3.7.0', '3.8.0', '3.9.0', '3.10.0', '4.0.0', '4.1.0',
'4.2.0', '4.3.0', '4.3.1', '4.5.0', '4.5.2', '5.0.0', '5.0.2']:
depends_on('hip@' + ver, when='@' + ver)
- depends_on('rocprim@' + ver, when='@' + ver)
+ for tgt in itertools.chain(['auto'], amdgpu_targets):
+ depends_on('rocprim@{0} amdgpu_target={1}'.format(ver, tgt),
+ when='@{0} amdgpu_target={1}'.format(ver, tgt))
depends_on('rocm-cmake@' + ver, type='build', when='@' + ver)
def setup_build_environment(self, env):
@@ -53,6 +62,10 @@ class Rocsparse(CMakePackage):
self.define('BUILD_CLIENTS_TESTS', 'OFF'),
self.define('BUILD_CLIENTS_BENCHMARKS', 'OFF')
]
+
+ if 'auto' not in self.spec.variants['amdgpu_target']:
+ args.append(self.define_from_variant('AMDGPU_TARGETS', 'amdgpu_target'))
+
if self.spec.satisfies('^cmake@3.21.0:3.21.2'):
args.append(self.define('__skip_rocmclang', 'ON'))