summaryrefslogtreecommitdiff
path: root/var
diff options
context:
space:
mode:
authorCory Bloor <Cordell.Bloor@amd.com>2022-09-12 10:19:59 -0600
committerGitHub <noreply@github.com>2022-09-12 09:19:59 -0700
commit13d872592eb5b50fa4a4b107c6f9bf20f689f34b (patch)
tree6a3fbffcc80545ddeefc40b7e7739e61e421de3b /var
parent5dc1a9f2140013b72c57c00bcf3523ed1accb02b (diff)
downloadspack-13d872592eb5b50fa4a4b107c6f9bf20f689f34b.tar.gz
spack-13d872592eb5b50fa4a4b107c6f9bf20f689f34b.tar.bz2
spack-13d872592eb5b50fa4a4b107c6f9bf20f689f34b.tar.xz
spack-13d872592eb5b50fa4a4b107c6f9bf20f689f34b.zip
rocalution: fix compilation for Navi 1x and 2x (#32586)
Diffstat (limited to 'var')
-rw-r--r--var/spack/repos/builtin/packages/rocalution/0004-fix-navi-1x.patch79
-rw-r--r--var/spack/repos/builtin/packages/rocalution/package.py2
2 files changed, 81 insertions, 0 deletions
diff --git a/var/spack/repos/builtin/packages/rocalution/0004-fix-navi-1x.patch b/var/spack/repos/builtin/packages/rocalution/0004-fix-navi-1x.patch
new file mode 100644
index 0000000000..6e96e64774
--- /dev/null
+++ b/var/spack/repos/builtin/packages/rocalution/0004-fix-navi-1x.patch
@@ -0,0 +1,79 @@
+From 9bdff9b0897360a60d21a686f7b988f924aea825 Mon Sep 17 00:00:00 2001
+From: DorianRudolph <dorianrudo97@googlemail.com>
+Date: Fri, 9 Sep 2022 07:09:25 +0200
+Subject: [PATCH] fix compilation for gfx1031 (#150)
+
+* fix compilation for gfx1031
+
+* Improve guards for arch-specific instructions
+
+Default to using the fallback implementation and only use
+__hip_move_dpp on platforms known that are known to support the
+necessary intrinsics.
+
+rocALUTION can also be compiled with CXXFLAGS=-DROCALUTION_USE_MOVE_DPP=0
+to force the use of the fallback implementation (or with the value 1 to
+force the use of the __hip_move_dpp implementation).
+
+This change fixes the compilation error:
+
+ Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+
+
+when building for unsupported Navi 1x and Navi 2x GPUs.
+
+Co-authored-by: Cordell Bloor <Cordell.Bloor@amd.com>
+---
+ src/base/hip/hip_utils.hpp | 18 +++++++++++++++---
+ 1 file changed, 15 insertions(+), 3 deletions(-)
+
+diff --git a/src/base/hip/hip_utils.hpp b/src/base/hip/hip_utils.hpp
+index 830f9a5d..5ec4cd00 100644
+--- a/src/base/hip/hip_utils.hpp
++++ b/src/base/hip/hip_utils.hpp
+@@ -37,6 +37,18 @@
+ #include <hip/hip_complex.h>
+ #endif
+
++#ifndef ROCALUTION_USE_MOVE_DPP
++#if defined(__gfx803__) || \
++ defined(__gfx900__) || \
++ defined(__gfx906__) || \
++ defined(__gfx908__) || \
++ defined(__gfx90a__)
++#define ROCALUTION_USE_MOVE_DPP 1
++#else
++#define ROCALUTION_USE_MOVE_DPP 0
++#endif
++#endif
++
+ #define ROCBLAS_HANDLE(handle) *static_cast<rocblas_handle*>(handle)
+ #define ROCSPARSE_HANDLE(handle) *static_cast<rocsparse_handle*>(handle)
+
+@@ -206,7 +218,7 @@ namespace rocalution
+
+ __device__ int __llvm_amdgcn_readlane(int index, int offset) __asm("llvm.amdgcn.readlane");
+
+-#ifndef __gfx1030__
++#if ROCALUTION_USE_MOVE_DPP
+ template <unsigned int WFSIZE>
+ static __device__ __forceinline__ void wf_reduce_sum(int* sum)
+ {
+@@ -223,7 +235,7 @@ namespace rocalution
+ if(WFSIZE > 32)
+ *sum += __hip_move_dpp(*sum, 0x143, 0xc, 0xf, 0);
+ }
+-#else
++#else /* ROCALUTION_USE_MOVE_DPP */
+ template <unsigned int WFSIZE>
+ static __device__ __forceinline__ void wf_reduce_sum(int* sum)
+ {
+@@ -232,7 +244,7 @@ namespace rocalution
+ *sum += __shfl_xor(*sum, i);
+ }
+ }
+-#endif
++#endif /* ROCALUTION_USE_MOVE_DPP */
+
+ template <unsigned int WF_SIZE>
+ static __device__ __forceinline__ void wf_reduce_sum(float* sum)
+
diff --git a/var/spack/repos/builtin/packages/rocalution/package.py b/var/spack/repos/builtin/packages/rocalution/package.py
index 95cd325822..6b6fb7ef99 100644
--- a/var/spack/repos/builtin/packages/rocalution/package.py
+++ b/var/spack/repos/builtin/packages/rocalution/package.py
@@ -152,6 +152,8 @@ class Rocalution(CMakePackage):
# This fix is added to address the compilation failure and it is
# already taken in 5.2.3 rocm release.
patch("0003-fix-compilation-for-rocalution-5.2.0.patch", when="@5.2.0:")
+ # Fix build for most Radeon 5000 and Radeon 6000 series GPUs.
+ patch("0004-fix-navi-1x.patch", when="@5.2.0:")
def check(self):
exe = join_path(self.build_directory, "clients", "staging", "rocalution-test")