summaryrefslogtreecommitdiff
path: root/var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-4.5.patch
blob: ab84b91c6177d3019d031637f50d8cb3bd9fa9b9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
From f9446b8f4c3cb3a3c6d38734f9980712a82b9db9 Mon Sep 17 00:00:00 2001
From: Cory Bloor <Cordell.Bloor@amd.com>
Date: Fri, 8 Jul 2022 20:53:32 -0600
Subject: [PATCH] Improve guards for arch-specific instructions (#368)

When choosing between a specialized implementation that uses
architecture-specific functionality and a generic fallback, it is
usually preferable to make the fallback the default. This will give the
software the best possible chance of functioning without modification
on future hardware.

Of course, the library will still need code updates to function
optimally on hardware released after the software was written.

rocSPARSE can also be compiled with CXXFLAGS=-DROCSPARSE_USE_MOVE_DPP=0
to force the use of the fallback implementation. Or with the value 1 to
force the use of the specialized __hip_move_dpp implementation.

This change fixes the compilation error:

    Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+

when building for unsupported Navi 1x and Navi 2x GPUs as was
reported in https://github.com/ROCmSoftwarePlatform/rocSPARSE/issues/250
---
 library/src/include/common.h | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/library/src/include/common.h b/library/src/include/common.h
index 6a4654af..975c5f7d 100644
--- a/library/src/include/common.h
+++ b/library/src/include/common.h
@@ -34,6 +34,18 @@
 
 // clang-format off
 
+#ifndef ROCSPARSE_USE_MOVE_DPP
+#if defined(__gfx803__) || \
+    defined(__gfx900__) || \
+    defined(__gfx906__) || \
+    defined(__gfx908__) || \
+    defined(__gfx90a__)
+#define ROCSPARSE_USE_MOVE_DPP 1
+#else
+#define ROCSPARSE_USE_MOVE_DPP 0
+#endif
+#endif
+
 // BSR indexing macros
 #define BSR_IND(j, bi, bj, dir) ((dir == rocsparse_direction_row) ? BSR_IND_R(j, bi, bj) : BSR_IND_C(j, bi, bj))
 #define BSR_IND_R(j, bi, bj) (block_dim * block_dim * (j) + (bi) * block_dim + (bj))
@@ -233,7 +245,7 @@ __device__ __forceinline__ void rocsparse_blockreduce_min(int i, T* data)
     if(BLOCKSIZE >   1) { if(i <   1 && i +   1 < BLOCKSIZE) { data[i] = min(data[i], data[i +   1]); } __syncthreads(); }
 }
 
-#ifndef __gfx1030__
+#if ROCSPARSE_USE_MOVE_DPP
 // DPP-based wavefront reduction maximum
 template <unsigned int WFSIZE>
 __device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
@@ -499,7 +511,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
     sum = temp_sum.val;
     return sum;
 }
-#else
+#else /* ROCSPARSE_USE_MOVE_DPP */
 template <unsigned int WFSIZE>
 __device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
 {
@@ -566,7 +578,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
 
     return sum;
 }
-#endif
+#endif /* ROCSPARSE_USE_MOVE_DPP */
 
 // DPP-based complex float wavefront reduction sum
 template <unsigned int WFSIZE>