diff options
author | Cory Bloor <Cordell.Bloor@amd.com> | 2022-09-12 10:19:59 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-12 09:19:59 -0700 |
commit | 13d872592eb5b50fa4a4b107c6f9bf20f689f34b (patch) | |
tree | 6a3fbffcc80545ddeefc40b7e7739e61e421de3b /var | |
parent | 5dc1a9f2140013b72c57c00bcf3523ed1accb02b (diff) | |
download | spack-13d872592eb5b50fa4a4b107c6f9bf20f689f34b.tar.gz spack-13d872592eb5b50fa4a4b107c6f9bf20f689f34b.tar.bz2 spack-13d872592eb5b50fa4a4b107c6f9bf20f689f34b.tar.xz spack-13d872592eb5b50fa4a4b107c6f9bf20f689f34b.zip |
rocalution: fix compilation for Navi 1x and 2x (#32586)
Diffstat (limited to 'var')
-rw-r--r-- | var/spack/repos/builtin/packages/rocalution/0004-fix-navi-1x.patch | 79 | ||||
-rw-r--r-- | var/spack/repos/builtin/packages/rocalution/package.py | 2 |
2 files changed, 81 insertions, 0 deletions
diff --git a/var/spack/repos/builtin/packages/rocalution/0004-fix-navi-1x.patch b/var/spack/repos/builtin/packages/rocalution/0004-fix-navi-1x.patch new file mode 100644 index 0000000000..6e96e64774 --- /dev/null +++ b/var/spack/repos/builtin/packages/rocalution/0004-fix-navi-1x.patch @@ -0,0 +1,79 @@ +From 9bdff9b0897360a60d21a686f7b988f924aea825 Mon Sep 17 00:00:00 2001 +From: DorianRudolph <dorianrudo97@googlemail.com> +Date: Fri, 9 Sep 2022 07:09:25 +0200 +Subject: [PATCH] fix compilation for gfx1031 (#150) + +* fix compilation for gfx1031 + +* Improve guards for arch-specific instructions + +Default to using the fallback implementation and only use +__hip_move_dpp on platforms known that are known to support the +necessary intrinsics. + +rocALUTION can also be compiled with CXXFLAGS=-DROCALUTION_USE_MOVE_DPP=0 +to force the use of the fallback implementation (or with the value 1 to +force the use of the __hip_move_dpp implementation). + +This change fixes the compilation error: + + Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+ + +when building for unsupported Navi 1x and Navi 2x GPUs. + +Co-authored-by: Cordell Bloor <Cordell.Bloor@amd.com> +--- + src/base/hip/hip_utils.hpp | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/src/base/hip/hip_utils.hpp b/src/base/hip/hip_utils.hpp +index 830f9a5d..5ec4cd00 100644 +--- a/src/base/hip/hip_utils.hpp ++++ b/src/base/hip/hip_utils.hpp +@@ -37,6 +37,18 @@ + #include <hip/hip_complex.h> + #endif + ++#ifndef ROCALUTION_USE_MOVE_DPP ++#if defined(__gfx803__) || \ ++ defined(__gfx900__) || \ ++ defined(__gfx906__) || \ ++ defined(__gfx908__) || \ ++ defined(__gfx90a__) ++#define ROCALUTION_USE_MOVE_DPP 1 ++#else ++#define ROCALUTION_USE_MOVE_DPP 0 ++#endif ++#endif ++ + #define ROCBLAS_HANDLE(handle) *static_cast<rocblas_handle*>(handle) + #define ROCSPARSE_HANDLE(handle) *static_cast<rocsparse_handle*>(handle) + +@@ -206,7 +218,7 @@ namespace rocalution + + __device__ int __llvm_amdgcn_readlane(int index, int offset) __asm("llvm.amdgcn.readlane"); + +-#ifndef __gfx1030__ ++#if ROCALUTION_USE_MOVE_DPP + template <unsigned int WFSIZE> + static __device__ __forceinline__ void wf_reduce_sum(int* sum) + { +@@ -223,7 +235,7 @@ namespace rocalution + if(WFSIZE > 32) + *sum += __hip_move_dpp(*sum, 0x143, 0xc, 0xf, 0); + } +-#else ++#else /* ROCALUTION_USE_MOVE_DPP */ + template <unsigned int WFSIZE> + static __device__ __forceinline__ void wf_reduce_sum(int* sum) + { +@@ -232,7 +244,7 @@ namespace rocalution + *sum += __shfl_xor(*sum, i); + } + } +-#endif ++#endif /* ROCALUTION_USE_MOVE_DPP */ + + template <unsigned int WF_SIZE> + static __device__ __forceinline__ void wf_reduce_sum(float* sum) + diff --git a/var/spack/repos/builtin/packages/rocalution/package.py b/var/spack/repos/builtin/packages/rocalution/package.py index 95cd325822..6b6fb7ef99 100644 --- a/var/spack/repos/builtin/packages/rocalution/package.py +++ b/var/spack/repos/builtin/packages/rocalution/package.py @@ -152,6 +152,8 @@ class Rocalution(CMakePackage): # This fix is added to address the compilation failure and it is # already taken in 5.2.3 rocm release. patch("0003-fix-compilation-for-rocalution-5.2.0.patch", when="@5.2.0:") + # Fix build for most Radeon 5000 and Radeon 6000 series GPUs. + patch("0004-fix-navi-1x.patch", when="@5.2.0:") def check(self): exe = join_path(self.build_directory, "clients", "staging", "rocalution-test") |