From 13d872592eb5b50fa4a4b107c6f9bf20f689f34b Mon Sep 17 00:00:00 2001 From: Cory Bloor Date: Mon, 12 Sep 2022 10:19:59 -0600 Subject: rocalution: fix compilation for Navi 1x and 2x (#32586) --- .../packages/rocalution/0004-fix-navi-1x.patch | 79 ++++++++++++++++++++++ .../repos/builtin/packages/rocalution/package.py | 2 + 2 files changed, 81 insertions(+) create mode 100644 var/spack/repos/builtin/packages/rocalution/0004-fix-navi-1x.patch diff --git a/var/spack/repos/builtin/packages/rocalution/0004-fix-navi-1x.patch b/var/spack/repos/builtin/packages/rocalution/0004-fix-navi-1x.patch new file mode 100644 index 0000000000..6e96e64774 --- /dev/null +++ b/var/spack/repos/builtin/packages/rocalution/0004-fix-navi-1x.patch @@ -0,0 +1,79 @@ +From 9bdff9b0897360a60d21a686f7b988f924aea825 Mon Sep 17 00:00:00 2001 +From: DorianRudolph +Date: Fri, 9 Sep 2022 07:09:25 +0200 +Subject: [PATCH] fix compilation for gfx1031 (#150) + +* fix compilation for gfx1031 + +* Improve guards for arch-specific instructions + +Default to using the fallback implementation and only use +__hip_move_dpp on platforms known that are known to support the +necessary intrinsics. + +rocALUTION can also be compiled with CXXFLAGS=-DROCALUTION_USE_MOVE_DPP=0 +to force the use of the fallback implementation (or with the value 1 to +force the use of the __hip_move_dpp implementation). + +This change fixes the compilation error: + + Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+ + +when building for unsupported Navi 1x and Navi 2x GPUs. + +Co-authored-by: Cordell Bloor +--- + src/base/hip/hip_utils.hpp | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/src/base/hip/hip_utils.hpp b/src/base/hip/hip_utils.hpp +index 830f9a5d..5ec4cd00 100644 +--- a/src/base/hip/hip_utils.hpp ++++ b/src/base/hip/hip_utils.hpp +@@ -37,6 +37,18 @@ + #include + #endif + ++#ifndef ROCALUTION_USE_MOVE_DPP ++#if defined(__gfx803__) || \ ++ defined(__gfx900__) || \ ++ defined(__gfx906__) || \ ++ defined(__gfx908__) || \ ++ defined(__gfx90a__) ++#define ROCALUTION_USE_MOVE_DPP 1 ++#else ++#define ROCALUTION_USE_MOVE_DPP 0 ++#endif ++#endif ++ + #define ROCBLAS_HANDLE(handle) *static_cast(handle) + #define ROCSPARSE_HANDLE(handle) *static_cast(handle) + +@@ -206,7 +218,7 @@ namespace rocalution + + __device__ int __llvm_amdgcn_readlane(int index, int offset) __asm("llvm.amdgcn.readlane"); + +-#ifndef __gfx1030__ ++#if ROCALUTION_USE_MOVE_DPP + template + static __device__ __forceinline__ void wf_reduce_sum(int* sum) + { +@@ -223,7 +235,7 @@ namespace rocalution + if(WFSIZE > 32) + *sum += __hip_move_dpp(*sum, 0x143, 0xc, 0xf, 0); + } +-#else ++#else /* ROCALUTION_USE_MOVE_DPP */ + template + static __device__ __forceinline__ void wf_reduce_sum(int* sum) + { +@@ -232,7 +244,7 @@ namespace rocalution + *sum += __shfl_xor(*sum, i); + } + } +-#endif ++#endif /* ROCALUTION_USE_MOVE_DPP */ + + template + static __device__ __forceinline__ void wf_reduce_sum(float* sum) + diff --git a/var/spack/repos/builtin/packages/rocalution/package.py b/var/spack/repos/builtin/packages/rocalution/package.py index 95cd325822..6b6fb7ef99 100644 --- a/var/spack/repos/builtin/packages/rocalution/package.py +++ b/var/spack/repos/builtin/packages/rocalution/package.py @@ -152,6 +152,8 @@ class Rocalution(CMakePackage): # This fix is added to address the compilation failure and it is # already taken in 5.2.3 rocm release. patch("0003-fix-compilation-for-rocalution-5.2.0.patch", when="@5.2.0:") + # Fix build for most Radeon 5000 and Radeon 6000 series GPUs. + patch("0004-fix-navi-1x.patch", when="@5.2.0:") def check(self): exe = join_path(self.build_directory, "clients", "staging", "rocalution-test") -- cgit v1.2.3-70-g09d2