summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--var/spack/repos/builtin/packages/rocsparse/0001-set-mtx-directory.patch32
-rw-r--r--var/spack/repos/builtin/packages/rocsparse/0002-fix-gentest-shebang.patch8
-rw-r--r--var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-4.5.patch78
-rw-r--r--var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-5.2.patch78
-rw-r--r--var/spack/repos/builtin/packages/rocsparse/package.py175
5 files changed, 364 insertions, 7 deletions
diff --git a/var/spack/repos/builtin/packages/rocsparse/0001-set-mtx-directory.patch b/var/spack/repos/builtin/packages/rocsparse/0001-set-mtx-directory.patch
new file mode 100644
index 0000000000..cd1fe0d21f
--- /dev/null
+++ b/var/spack/repos/builtin/packages/rocsparse/0001-set-mtx-directory.patch
@@ -0,0 +1,32 @@
+--- a/clients/tests/CMakeLists.txt
++++ b/clients/tests/CMakeLists.txt
+@@ -33,6 +33,7 @@ if(NOT EXISTS "${CMAKE_MATRICES_DIR}")
+ # Download.
+ #
+ set(CMAKE_MATRICES_DIR ${PROJECT_BINARY_DIR}/matrices CACHE STRING "Matrices directory.")
++ file(MAKE_DIRECTORY ${CMAKE_MATRICES_DIR})
+
+ if(NOT TARGET rocsparse)
+ set(CONVERT_SOURCE ${CMAKE_SOURCE_DIR}/../deps/convert.cpp CACHE STRING "Convert tool mtx2csr.")
+diff --git a/cmake/ClientMatrices.cmake b/cmake/ClientMatrices.cmake
+index 871ebc4..b7fb7bc 100644
+--- a/cmake/ClientMatrices.cmake
++++ b/cmake/ClientMatrices.cmake
+@@ -99,6 +99,7 @@ foreach(i RANGE 0 ${len1})
+
+ # Download test matrices if not already downloaded
+ if(NOT EXISTS "${CMAKE_MATRICES_DIR}/${mat}.csr")
++ if(NOT ROCSPARSE_MTX_DIR)
+ # First try user specified mirror, if available
+ if(DEFINED ENV{ROCSPARSE_TEST_MIRROR} AND NOT $ENV{ROCSPARSE_TEST_MIRROR} STREQUAL "")
+ message("-- Downloading and extracting test matrix ${m}.tar.gz from user specified test mirror: $ENV{ROCSPARSE_TEST_MIRROR}")
+@@ -149,6 +150,9 @@ foreach(i RANGE 0 ${len1})
+ WORKING_DIRECTORY ${CMAKE_MATRICES_DIR})
+
+ file(RENAME ${CMAKE_MATRICES_DIR}/${mat}/${mat}.mtx ${CMAKE_MATRICES_DIR}/${mat}.mtx)
++ else()
++ file(RENAME ${ROCSPARSE_MTX_DIR}/${mat}/${mat}.mtx ${CMAKE_MATRICES_DIR}/${mat}.mtx)
++ endif()
+ execute_process(COMMAND ${PROJECT_BINARY_DIR}/mtx2csr.exe ${mat}.mtx ${mat}.csr
+ WORKING_DIRECTORY ${CMAKE_MATRICES_DIR})
+ # TODO: add 'COMMAND_ERROR_IS_FATAL ANY' once cmake supported version is 3.19
diff --git a/var/spack/repos/builtin/packages/rocsparse/0002-fix-gentest-shebang.patch b/var/spack/repos/builtin/packages/rocsparse/0002-fix-gentest-shebang.patch
new file mode 100644
index 0000000000..c0cd38a8fe
--- /dev/null
+++ b/var/spack/repos/builtin/packages/rocsparse/0002-fix-gentest-shebang.patch
@@ -0,0 +1,8 @@
+--- a/clients/common/rocsparse_gentest.py
++++ b/clients/common/rocsparse_gentest.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/python3
++#!/usr/bin/env python3
+
+ # ########################################################################
+ # Copyright (c) 2019-2021 Advanced Micro Devices, Inc.
diff --git a/var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-4.5.patch b/var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-4.5.patch
new file mode 100644
index 0000000000..ab84b91c61
--- /dev/null
+++ b/var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-4.5.patch
@@ -0,0 +1,78 @@
+From f9446b8f4c3cb3a3c6d38734f9980712a82b9db9 Mon Sep 17 00:00:00 2001
+From: Cory Bloor <Cordell.Bloor@amd.com>
+Date: Fri, 8 Jul 2022 20:53:32 -0600
+Subject: [PATCH] Improve guards for arch-specific instructions (#368)
+
+When choosing between a specialized implementation that uses
+architecture-specific functionality and a generic fallback, it is
+usually preferable to make the fallback the default. This will give the
+software the best possible chance of functioning without modification
+on future hardware.
+
+Of course, the library will still need code updates to function
+optimally on hardware released after the software was written.
+
+rocSPARSE can also be compiled with CXXFLAGS=-DROCSPARSE_USE_MOVE_DPP=0
+to force the use of the fallback implementation. Or with the value 1 to
+force the use of the specialized __hip_move_dpp implementation.
+
+This change fixes the compilation error:
+
+ Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+
+
+when building for unsupported Navi 1x and Navi 2x GPUs as was
+reported in https://github.com/ROCmSoftwarePlatform/rocSPARSE/issues/250
+---
+ library/src/include/common.h | 18 +++++++++++++++---
+ 1 file changed, 15 insertions(+), 3 deletions(-)
+
+diff --git a/library/src/include/common.h b/library/src/include/common.h
+index 6a4654af..975c5f7d 100644
+--- a/library/src/include/common.h
++++ b/library/src/include/common.h
+@@ -34,6 +34,18 @@
+
+ // clang-format off
+
++#ifndef ROCSPARSE_USE_MOVE_DPP
++#if defined(__gfx803__) || \
++ defined(__gfx900__) || \
++ defined(__gfx906__) || \
++ defined(__gfx908__) || \
++ defined(__gfx90a__)
++#define ROCSPARSE_USE_MOVE_DPP 1
++#else
++#define ROCSPARSE_USE_MOVE_DPP 0
++#endif
++#endif
++
+ // BSR indexing macros
+ #define BSR_IND(j, bi, bj, dir) ((dir == rocsparse_direction_row) ? BSR_IND_R(j, bi, bj) : BSR_IND_C(j, bi, bj))
+ #define BSR_IND_R(j, bi, bj) (block_dim * block_dim * (j) + (bi) * block_dim + (bj))
+@@ -233,7 +245,7 @@ __device__ __forceinline__ void rocsparse_blockreduce_min(int i, T* data)
+ if(BLOCKSIZE > 1) { if(i < 1 && i + 1 < BLOCKSIZE) { data[i] = min(data[i], data[i + 1]); } __syncthreads(); }
+ }
+
+-#ifndef __gfx1030__
++#if ROCSPARSE_USE_MOVE_DPP
+ // DPP-based wavefront reduction maximum
+ template <unsigned int WFSIZE>
+ __device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
+@@ -499,7 +511,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
+ sum = temp_sum.val;
+ return sum;
+ }
+-#else
++#else /* ROCSPARSE_USE_MOVE_DPP */
+ template <unsigned int WFSIZE>
+ __device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
+ {
+@@ -566,7 +578,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
+
+ return sum;
+ }
+-#endif
++#endif /* ROCSPARSE_USE_MOVE_DPP */
+
+ // DPP-based complex float wavefront reduction sum
+ template <unsigned int WFSIZE>
diff --git a/var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-5.2.patch b/var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-5.2.patch
new file mode 100644
index 0000000000..52bbe8336f
--- /dev/null
+++ b/var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-5.2.patch
@@ -0,0 +1,78 @@
+From f9446b8f4c3cb3a3c6d38734f9980712a82b9db9 Mon Sep 17 00:00:00 2001
+From: Cory Bloor <Cordell.Bloor@amd.com>
+Date: Fri, 8 Jul 2022 20:53:32 -0600
+Subject: [PATCH] Improve guards for arch-specific instructions (#368)
+
+When choosing between a specialized implementation that uses
+architecture-specific functionality and a generic fallback, it is
+usually preferable to make the fallback the default. This will give the
+software the best possible chance of functioning without modification
+on future hardware.
+
+Of course, the library will still need code updates to function
+optimally on hardware released after the software was written.
+
+rocSPARSE can also be compiled with CXXFLAGS=-DROCSPARSE_USE_MOVE_DPP=0
+to force the use of the fallback implementation. Or with the value 1 to
+force the use of the specialized __hip_move_dpp implementation.
+
+This change fixes the compilation error:
+
+ Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+
+
+when building for unsupported Navi 1x and Navi 2x GPUs as was
+reported in https://github.com/ROCmSoftwarePlatform/rocSPARSE/issues/250
+---
+ library/src/include/common.h | 18 +++++++++++++++---
+ 1 file changed, 15 insertions(+), 3 deletions(-)
+
+diff --git a/library/src/include/common.h b/library/src/include/common.h
+index 6a4654af..975c5f7d 100644
+--- a/library/src/include/common.h
++++ b/library/src/include/common.h
+@@ -34,6 +34,18 @@
+
+ // clang-format off
+
++#ifndef ROCSPARSE_USE_MOVE_DPP
++#if defined(__gfx803__) || \
++ defined(__gfx900__) || \
++ defined(__gfx906__) || \
++ defined(__gfx908__) || \
++ defined(__gfx90a__)
++#define ROCSPARSE_USE_MOVE_DPP 1
++#else
++#define ROCSPARSE_USE_MOVE_DPP 0
++#endif
++#endif
++
+ // BSR indexing macros
+ #define BSR_IND(j, bi, bj, dir) ((dir == rocsparse_direction_row) ? BSR_IND_R(j, bi, bj) : BSR_IND_C(j, bi, bj))
+ #define BSR_IND_R(j, bi, bj) (block_dim * block_dim * (j) + (bi) * block_dim + (bj))
+@@ -233,7 +245,7 @@ __device__ __forceinline__ void rocsparse_blockreduce_min(int i, T* data)
+ if(BLOCKSIZE > 1) { if(i < 1 && i + 1 < BLOCKSIZE) { data[i] = min(data[i], data[i + 1]); } __syncthreads(); }
+ }
+
+-#if (!defined(__gfx1030__)) && (!defined(__gfx1011__))
++#if ROCSPARSE_USE_MOVE_DPP
+ // DPP-based wavefront reduction maximum
+ template <unsigned int WFSIZE>
+ __device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
+@@ -499,7 +511,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
+ sum = temp_sum.val;
+ return sum;
+ }
+-#else
++#else /* ROCSPARSE_USE_MOVE_DPP */
+ template <unsigned int WFSIZE>
+ __device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
+ {
+@@ -566,7 +578,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
+
+ return sum;
+ }
+-#endif
++#endif /* ROCSPARSE_USE_MOVE_DPP */
+
+ // DPP-based complex float wavefront reduction sum
+ template <unsigned int WFSIZE>
diff --git a/var/spack/repos/builtin/packages/rocsparse/package.py b/var/spack/repos/builtin/packages/rocsparse/package.py
index 66e2983676..765dc23bff 100644
--- a/var/spack/repos/builtin/packages/rocsparse/package.py
+++ b/var/spack/repos/builtin/packages/rocsparse/package.py
@@ -3,7 +3,6 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
-import itertools
import re
from spack.package import *
@@ -33,6 +32,7 @@ class Rocsparse(CMakePackage):
values=("Release", "Debug", "RelWithDebInfo"),
description="CMake build type",
)
+ variant("test", default=False, description="Build rocsparse-test client")
version("5.2.0", sha256="7ed929af16d2502135024a6463997d9a95f03899b8a33aa95db7029575c89572")
version("5.1.3", sha256="ef9641045b36c9aacc87e4fe7717b41b1e29d97e21432678dce7aca633a8edc2")
@@ -114,13 +114,173 @@ class Rocsparse(CMakePackage):
"5.2.0",
]:
depends_on("hip@" + ver, when="@" + ver)
- for tgt in itertools.chain(["auto"], amdgpu_targets):
- depends_on(
- "rocprim@{0} amdgpu_target={1}".format(ver, tgt),
- when="@{0} amdgpu_target={1}".format(ver, tgt),
- )
+ depends_on("rocprim@" + ver, when="@" + ver)
depends_on("rocm-cmake@%s:" % ver, type="build", when="@" + ver)
+ # Add option so Spack can manage downloaded test matricies as resources.
+ patch("0001-set-mtx-directory.patch", when="@4.5.0: +test")
+ # Enable use of Spack-provided Python.
+ patch("0002-fix-gentest-shebang.patch", when="@4.5.0: +test")
+ # Fix build for most Radeon 5000 and Radeon 6000 series GPUs.
+ patch("0003-fix-navi-1x-rocm-4.5.patch", when="@4.5.0:5.1")
+ patch("0003-fix-navi-1x-rocm-5.2.patch", when="@5.2.0:")
+
+ depends_on("googletest@1.11.0:", when="@5.1.0: +test")
+ depends_on("googletest@1.10.0:", when="+test")
+ depends_on("python@3:", type="build", when="+test")
+ depends_on("py-pyyaml", type="build", when="+test")
+
+ with when("+test"):
+ resource(
+ name="amazon0312",
+ url="https://sparse.tamu.edu/MM/SNAP/amazon0312.tar.gz",
+ sha256="75ffd36b33675856f370f508d53e6197caa972ac52929991db7dc4198bd64910",
+ destination="mtx",
+ )
+ resource(
+ name="Chebyshev4",
+ url="https://sparse.tamu.edu/MM/Muite/Chebyshev4.tar.gz",
+ sha256="82553d73281587ea70e5faa427910e979524412c89e59ada7fa47a97142ae8a6",
+ destination="mtx",
+ )
+ resource(
+ name="sme3Dc",
+ url="https://sparse.tamu.edu/MM/FEMLAB/sme3Dc.tar.gz",
+ sha256="82f03904849cceea0af1b9975942717527ecc5e87a98cfddea78ffbe7e7c076d",
+ destination="mtx",
+ )
+ resource(
+ name="webbase-1M",
+ url="https://sparse.tamu.edu/MM/Williams/webbase-1M.tar.gz",
+ sha256="17a0391cdd966350b2b41f32aaf8e6684f3c55f25eb68c4be088f44f728a3ed4",
+ destination="mtx",
+ )
+ resource(
+ name="rma10",
+ url="https://sparse.tamu.edu/MM/Bova/rma10.tar.gz",
+ sha256="50db8d278d371531b3dd0638444d47a77f3a3e189663993a857861dbc34c5e3f",
+ destination="mtx",
+ )
+ resource(
+ name="bibd_22_8",
+ url="https://sparse.tamu.edu/MM/JGD_BIBD/bibd_22_8.tar.gz",
+ sha256="534b5210662d1b5b14a3938671501189685d12abf9f2a206778508345181014c",
+ destination="mtx",
+ )
+ resource(
+ name="mac_econ_fwd500",
+ url="https://sparse.tamu.edu/MM/Williams/mac_econ_fwd500.tar.gz",
+ sha256="0dec2952b2908e3d59e4179289245db7f2c84f9e5e6543e818491deed5978f82",
+ destination="mtx",
+ )
+ resource(
+ name="mc2depi",
+ url="https://sparse.tamu.edu/MM/Williams/mc2depi.tar.gz",
+ sha256="c02fef86efdd4f4322487e7472697a3d30b084ede1021e6d6889b347d3f6b268",
+ destination="mtx",
+ )
+ resource(
+ name="scircuit",
+ url="https://sparse.tamu.edu/MM/Hamm/scircuit.tar.gz",
+ sha256="227d4c98e51c8af49c07f89929c62f2523e115e81b672e7f306185ea92c2996f",
+ destination="mtx",
+ )
+ resource(
+ name="ASIC_320k",
+ url="https://sparse.tamu.edu/MM/Sandia/ASIC_320k.tar.gz",
+ sha256="d0d4ac477f641c8372d7347bc262ffcbde017f50fb17bb1a1539c98dd3440145",
+ destination="mtx",
+ )
+ resource(
+ name="bmwcra_1",
+ url="https://sparse.tamu.edu/MM/GHS_psdef/bmwcra_1.tar.gz",
+ sha256="31467b319f3d4e8a8fc3a320344650bee14b285755b13ee29264b7a488b3d222",
+ destination="mtx",
+ )
+ resource(
+ name="nos1",
+ url="https://sparse.tamu.edu/MM/HB/nos1.tar.gz",
+ sha256="7e64dc2408890e85a60dbd2ad048963c74625cc3037dbdff9647d30844a52674",
+ destination="mtx",
+ )
+ resource(
+ name="nos2",
+ url="https://sparse.tamu.edu/MM/HB/nos2.tar.gz",
+ sha256="7439318b969e8cad0e96f154937a35256374bb8f0e16ed7ecc3a5219f8dc903b",
+ destination="mtx",
+ )
+ resource(
+ name="nos3",
+ url="https://sparse.tamu.edu/MM/HB/nos3.tar.gz",
+ sha256="7dd62179bbcaeb693c774712a8d70b97316364983f1cbf06cecb3900da8954a5",
+ destination="mtx",
+ )
+ resource(
+ name="nos4",
+ url="https://sparse.tamu.edu/MM/HB/nos4.tar.gz",
+ sha256="ec2323a5195db153fd6ae32ff537b22eb47f08e73949754b71f8d4104358f10f",
+ destination="mtx",
+ )
+ resource(
+ name="nos5",
+ url="https://sparse.tamu.edu/MM/HB/nos5.tar.gz",
+ sha256="dd67e906b0392cfbbe5a01a1f1a569c50875cbf88249a31721fb87519666a342",
+ destination="mtx",
+ )
+ resource(
+ name="nos6",
+ url="https://sparse.tamu.edu/MM/HB/nos6.tar.gz",
+ sha256="a0301c38ed91b849571303db581205cfae113565a7938eaa1a7466320f0d03c4",
+ destination="mtx",
+ )
+ resource(
+ name="nos7",
+ url="https://sparse.tamu.edu/MM/HB/nos7.tar.gz",
+ sha256="c5d8d99bf4b54ee45e2f45d78530e3787f2e9670c000a68ad986a3b923e9e5ae",
+ destination="mtx",
+ )
+ resource(
+ name="shipsec1",
+ url="https://sparse.tamu.edu/MM/DNVS/shipsec1.tar.gz",
+ sha256="d021889affed5429f85b606900f76870d0b1b1aefd92529cc6f43bf9d7ef0eb1",
+ destination="mtx",
+ )
+ resource(
+ name="mplate",
+ url="https://sparse.tamu.edu/MM/Cote/mplate.tar.gz",
+ sha256="647b848343e423a24e05d3a3d462fa6b77958e362aadf70e9bb51bd420730df2",
+ destination="mtx",
+ )
+ resource(
+ name="qc2534",
+ url="https://sparse.tamu.edu/MM/Bai/qc2534.tar.gz",
+ sha256="591c54ceee70222909353d2a400dd9819e3432143b2c25b6c4ffa262b8e397c8",
+ destination="mtx",
+ )
+ resource(
+ name="Chevron2",
+ url="https://sparse.tamu.edu/MM/Chevron/Chevron2.tar.gz",
+ sha256="9334b61c25958f5221fd114e9698c11ac0ec57a0432150731d3fe80033da3026",
+ destination="mtx",
+ )
+ resource(
+ name="Chevron3",
+ url="https://sparse.tamu.edu/MM/Chevron/Chevron3.tar.gz",
+ sha256="5679292ba86defedb0a6afc25274948521ace7ca90fc765265be11ca6eaaaee4",
+ destination="mtx",
+ )
+ resource(
+ name="Chevron4",
+ url="https://sparse.tamu.edu/MM/Chevron/Chevron4.tar.gz",
+ sha256="2ac9dc0d8d38cbf4a62089c74e53aea87edbb3f0b553b77b27c70df70e1d17d5",
+ destination="mtx",
+ )
+
+ def check(self):
+ if self.spec.satisfies("+test"):
+ exe = join_path(self.build_directory, "clients", "staging", "rocsparse-test")
+ self.run_test(exe, options=["--gtest_filter=*quick*:*pre_checkin*-*known_bug*"])
+
def setup_build_environment(self, env):
env.set("CXX", self.spec["hip"].hipcc)
@@ -138,8 +298,9 @@ class Rocsparse(CMakePackage):
def cmake_args(self):
args = [
self.define("BUILD_CLIENTS_SAMPLES", "OFF"),
- self.define("BUILD_CLIENTS_TESTS", "OFF"),
+ self.define_from_variant("BUILD_CLIENTS_TESTS", "test"),
self.define("BUILD_CLIENTS_BENCHMARKS", "OFF"),
+ self.define("ROCSPARSE_MTX_DIR", join_path(self.stage.source_path, "mtx")),
]
if "auto" not in self.spec.variants["amdgpu_target"]: