diff options
5 files changed, 364 insertions, 7 deletions
diff --git a/var/spack/repos/builtin/packages/rocsparse/0001-set-mtx-directory.patch b/var/spack/repos/builtin/packages/rocsparse/0001-set-mtx-directory.patch new file mode 100644 index 0000000000..cd1fe0d21f --- /dev/null +++ b/var/spack/repos/builtin/packages/rocsparse/0001-set-mtx-directory.patch @@ -0,0 +1,32 @@ +--- a/clients/tests/CMakeLists.txt ++++ b/clients/tests/CMakeLists.txt +@@ -33,6 +33,7 @@ if(NOT EXISTS "${CMAKE_MATRICES_DIR}") + # Download. + # + set(CMAKE_MATRICES_DIR ${PROJECT_BINARY_DIR}/matrices CACHE STRING "Matrices directory.") ++ file(MAKE_DIRECTORY ${CMAKE_MATRICES_DIR}) + + if(NOT TARGET rocsparse) + set(CONVERT_SOURCE ${CMAKE_SOURCE_DIR}/../deps/convert.cpp CACHE STRING "Convert tool mtx2csr.") +diff --git a/cmake/ClientMatrices.cmake b/cmake/ClientMatrices.cmake +index 871ebc4..b7fb7bc 100644 +--- a/cmake/ClientMatrices.cmake ++++ b/cmake/ClientMatrices.cmake +@@ -99,6 +99,7 @@ foreach(i RANGE 0 ${len1}) + + # Download test matrices if not already downloaded + if(NOT EXISTS "${CMAKE_MATRICES_DIR}/${mat}.csr") ++ if(NOT ROCSPARSE_MTX_DIR) + # First try user specified mirror, if available + if(DEFINED ENV{ROCSPARSE_TEST_MIRROR} AND NOT $ENV{ROCSPARSE_TEST_MIRROR} STREQUAL "") + message("-- Downloading and extracting test matrix ${m}.tar.gz from user specified test mirror: $ENV{ROCSPARSE_TEST_MIRROR}") +@@ -149,6 +150,9 @@ foreach(i RANGE 0 ${len1}) + WORKING_DIRECTORY ${CMAKE_MATRICES_DIR}) + + file(RENAME ${CMAKE_MATRICES_DIR}/${mat}/${mat}.mtx ${CMAKE_MATRICES_DIR}/${mat}.mtx) ++ else() ++ file(RENAME ${ROCSPARSE_MTX_DIR}/${mat}/${mat}.mtx ${CMAKE_MATRICES_DIR}/${mat}.mtx) ++ endif() + execute_process(COMMAND ${PROJECT_BINARY_DIR}/mtx2csr.exe ${mat}.mtx ${mat}.csr + WORKING_DIRECTORY ${CMAKE_MATRICES_DIR}) + # TODO: add 'COMMAND_ERROR_IS_FATAL ANY' once cmake supported version is 3.19 diff --git a/var/spack/repos/builtin/packages/rocsparse/0002-fix-gentest-shebang.patch b/var/spack/repos/builtin/packages/rocsparse/0002-fix-gentest-shebang.patch new file mode 100644 index 0000000000..c0cd38a8fe --- /dev/null +++ b/var/spack/repos/builtin/packages/rocsparse/0002-fix-gentest-shebang.patch @@ -0,0 +1,8 @@ +--- a/clients/common/rocsparse_gentest.py ++++ b/clients/common/rocsparse_gentest.py +@@ -1,4 +1,4 @@ +-#!/usr/bin/python3 ++#!/usr/bin/env python3 + + # ######################################################################## + # Copyright (c) 2019-2021 Advanced Micro Devices, Inc. diff --git a/var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-4.5.patch b/var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-4.5.patch new file mode 100644 index 0000000000..ab84b91c61 --- /dev/null +++ b/var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-4.5.patch @@ -0,0 +1,78 @@ +From f9446b8f4c3cb3a3c6d38734f9980712a82b9db9 Mon Sep 17 00:00:00 2001 +From: Cory Bloor <Cordell.Bloor@amd.com> +Date: Fri, 8 Jul 2022 20:53:32 -0600 +Subject: [PATCH] Improve guards for arch-specific instructions (#368) + +When choosing between a specialized implementation that uses +architecture-specific functionality and a generic fallback, it is +usually preferable to make the fallback the default. This will give the +software the best possible chance of functioning without modification +on future hardware. + +Of course, the library will still need code updates to function +optimally on hardware released after the software was written. + +rocSPARSE can also be compiled with CXXFLAGS=-DROCSPARSE_USE_MOVE_DPP=0 +to force the use of the fallback implementation. Or with the value 1 to +force the use of the specialized __hip_move_dpp implementation. + +This change fixes the compilation error: + + Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+ + +when building for unsupported Navi 1x and Navi 2x GPUs as was +reported in https://github.com/ROCmSoftwarePlatform/rocSPARSE/issues/250 +--- + library/src/include/common.h | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/library/src/include/common.h b/library/src/include/common.h +index 6a4654af..975c5f7d 100644 +--- a/library/src/include/common.h ++++ b/library/src/include/common.h +@@ -34,6 +34,18 @@ + + // clang-format off + ++#ifndef ROCSPARSE_USE_MOVE_DPP ++#if defined(__gfx803__) || \ ++ defined(__gfx900__) || \ ++ defined(__gfx906__) || \ ++ defined(__gfx908__) || \ ++ defined(__gfx90a__) ++#define ROCSPARSE_USE_MOVE_DPP 1 ++#else ++#define ROCSPARSE_USE_MOVE_DPP 0 ++#endif ++#endif ++ + // BSR indexing macros + #define BSR_IND(j, bi, bj, dir) ((dir == rocsparse_direction_row) ? BSR_IND_R(j, bi, bj) : BSR_IND_C(j, bi, bj)) + #define BSR_IND_R(j, bi, bj) (block_dim * block_dim * (j) + (bi) * block_dim + (bj)) +@@ -233,7 +245,7 @@ __device__ __forceinline__ void rocsparse_blockreduce_min(int i, T* data) + if(BLOCKSIZE > 1) { if(i < 1 && i + 1 < BLOCKSIZE) { data[i] = min(data[i], data[i + 1]); } __syncthreads(); } + } + +-#ifndef __gfx1030__ ++#if ROCSPARSE_USE_MOVE_DPP + // DPP-based wavefront reduction maximum + template <unsigned int WFSIZE> + __device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum) +@@ -499,7 +511,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum) + sum = temp_sum.val; + return sum; + } +-#else ++#else /* ROCSPARSE_USE_MOVE_DPP */ + template <unsigned int WFSIZE> + __device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum) + { +@@ -566,7 +578,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum) + + return sum; + } +-#endif ++#endif /* ROCSPARSE_USE_MOVE_DPP */ + + // DPP-based complex float wavefront reduction sum + template <unsigned int WFSIZE> diff --git a/var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-5.2.patch b/var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-5.2.patch new file mode 100644 index 0000000000..52bbe8336f --- /dev/null +++ b/var/spack/repos/builtin/packages/rocsparse/0003-fix-navi-1x-rocm-5.2.patch @@ -0,0 +1,78 @@ +From f9446b8f4c3cb3a3c6d38734f9980712a82b9db9 Mon Sep 17 00:00:00 2001 +From: Cory Bloor <Cordell.Bloor@amd.com> +Date: Fri, 8 Jul 2022 20:53:32 -0600 +Subject: [PATCH] Improve guards for arch-specific instructions (#368) + +When choosing between a specialized implementation that uses +architecture-specific functionality and a generic fallback, it is +usually preferable to make the fallback the default. This will give the +software the best possible chance of functioning without modification +on future hardware. + +Of course, the library will still need code updates to function +optimally on hardware released after the software was written. + +rocSPARSE can also be compiled with CXXFLAGS=-DROCSPARSE_USE_MOVE_DPP=0 +to force the use of the fallback implementation. Or with the value 1 to +force the use of the specialized __hip_move_dpp implementation. + +This change fixes the compilation error: + + Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+ + +when building for unsupported Navi 1x and Navi 2x GPUs as was +reported in https://github.com/ROCmSoftwarePlatform/rocSPARSE/issues/250 +--- + library/src/include/common.h | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/library/src/include/common.h b/library/src/include/common.h +index 6a4654af..975c5f7d 100644 +--- a/library/src/include/common.h ++++ b/library/src/include/common.h +@@ -34,6 +34,18 @@ + + // clang-format off + ++#ifndef ROCSPARSE_USE_MOVE_DPP ++#if defined(__gfx803__) || \ ++ defined(__gfx900__) || \ ++ defined(__gfx906__) || \ ++ defined(__gfx908__) || \ ++ defined(__gfx90a__) ++#define ROCSPARSE_USE_MOVE_DPP 1 ++#else ++#define ROCSPARSE_USE_MOVE_DPP 0 ++#endif ++#endif ++ + // BSR indexing macros + #define BSR_IND(j, bi, bj, dir) ((dir == rocsparse_direction_row) ? BSR_IND_R(j, bi, bj) : BSR_IND_C(j, bi, bj)) + #define BSR_IND_R(j, bi, bj) (block_dim * block_dim * (j) + (bi) * block_dim + (bj)) +@@ -233,7 +245,7 @@ __device__ __forceinline__ void rocsparse_blockreduce_min(int i, T* data) + if(BLOCKSIZE > 1) { if(i < 1 && i + 1 < BLOCKSIZE) { data[i] = min(data[i], data[i + 1]); } __syncthreads(); } + } + +-#if (!defined(__gfx1030__)) && (!defined(__gfx1011__)) ++#if ROCSPARSE_USE_MOVE_DPP + // DPP-based wavefront reduction maximum + template <unsigned int WFSIZE> + __device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum) +@@ -499,7 +511,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum) + sum = temp_sum.val; + return sum; + } +-#else ++#else /* ROCSPARSE_USE_MOVE_DPP */ + template <unsigned int WFSIZE> + __device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum) + { +@@ -566,7 +578,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum) + + return sum; + } +-#endif ++#endif /* ROCSPARSE_USE_MOVE_DPP */ + + // DPP-based complex float wavefront reduction sum + template <unsigned int WFSIZE> diff --git a/var/spack/repos/builtin/packages/rocsparse/package.py b/var/spack/repos/builtin/packages/rocsparse/package.py index 66e2983676..765dc23bff 100644 --- a/var/spack/repos/builtin/packages/rocsparse/package.py +++ b/var/spack/repos/builtin/packages/rocsparse/package.py @@ -3,7 +3,6 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) -import itertools import re from spack.package import * @@ -33,6 +32,7 @@ class Rocsparse(CMakePackage): values=("Release", "Debug", "RelWithDebInfo"), description="CMake build type", ) + variant("test", default=False, description="Build rocsparse-test client") version("5.2.0", sha256="7ed929af16d2502135024a6463997d9a95f03899b8a33aa95db7029575c89572") version("5.1.3", sha256="ef9641045b36c9aacc87e4fe7717b41b1e29d97e21432678dce7aca633a8edc2") @@ -114,13 +114,173 @@ class Rocsparse(CMakePackage): "5.2.0", ]: depends_on("hip@" + ver, when="@" + ver) - for tgt in itertools.chain(["auto"], amdgpu_targets): - depends_on( - "rocprim@{0} amdgpu_target={1}".format(ver, tgt), - when="@{0} amdgpu_target={1}".format(ver, tgt), - ) + depends_on("rocprim@" + ver, when="@" + ver) depends_on("rocm-cmake@%s:" % ver, type="build", when="@" + ver) + # Add option so Spack can manage downloaded test matricies as resources. + patch("0001-set-mtx-directory.patch", when="@4.5.0: +test") + # Enable use of Spack-provided Python. + patch("0002-fix-gentest-shebang.patch", when="@4.5.0: +test") + # Fix build for most Radeon 5000 and Radeon 6000 series GPUs. + patch("0003-fix-navi-1x-rocm-4.5.patch", when="@4.5.0:5.1") + patch("0003-fix-navi-1x-rocm-5.2.patch", when="@5.2.0:") + + depends_on("googletest@1.11.0:", when="@5.1.0: +test") + depends_on("googletest@1.10.0:", when="+test") + depends_on("python@3:", type="build", when="+test") + depends_on("py-pyyaml", type="build", when="+test") + + with when("+test"): + resource( + name="amazon0312", + url="https://sparse.tamu.edu/MM/SNAP/amazon0312.tar.gz", + sha256="75ffd36b33675856f370f508d53e6197caa972ac52929991db7dc4198bd64910", + destination="mtx", + ) + resource( + name="Chebyshev4", + url="https://sparse.tamu.edu/MM/Muite/Chebyshev4.tar.gz", + sha256="82553d73281587ea70e5faa427910e979524412c89e59ada7fa47a97142ae8a6", + destination="mtx", + ) + resource( + name="sme3Dc", + url="https://sparse.tamu.edu/MM/FEMLAB/sme3Dc.tar.gz", + sha256="82f03904849cceea0af1b9975942717527ecc5e87a98cfddea78ffbe7e7c076d", + destination="mtx", + ) + resource( + name="webbase-1M", + url="https://sparse.tamu.edu/MM/Williams/webbase-1M.tar.gz", + sha256="17a0391cdd966350b2b41f32aaf8e6684f3c55f25eb68c4be088f44f728a3ed4", + destination="mtx", + ) + resource( + name="rma10", + url="https://sparse.tamu.edu/MM/Bova/rma10.tar.gz", + sha256="50db8d278d371531b3dd0638444d47a77f3a3e189663993a857861dbc34c5e3f", + destination="mtx", + ) + resource( + name="bibd_22_8", + url="https://sparse.tamu.edu/MM/JGD_BIBD/bibd_22_8.tar.gz", + sha256="534b5210662d1b5b14a3938671501189685d12abf9f2a206778508345181014c", + destination="mtx", + ) + resource( + name="mac_econ_fwd500", + url="https://sparse.tamu.edu/MM/Williams/mac_econ_fwd500.tar.gz", + sha256="0dec2952b2908e3d59e4179289245db7f2c84f9e5e6543e818491deed5978f82", + destination="mtx", + ) + resource( + name="mc2depi", + url="https://sparse.tamu.edu/MM/Williams/mc2depi.tar.gz", + sha256="c02fef86efdd4f4322487e7472697a3d30b084ede1021e6d6889b347d3f6b268", + destination="mtx", + ) + resource( + name="scircuit", + url="https://sparse.tamu.edu/MM/Hamm/scircuit.tar.gz", + sha256="227d4c98e51c8af49c07f89929c62f2523e115e81b672e7f306185ea92c2996f", + destination="mtx", + ) + resource( + name="ASIC_320k", + url="https://sparse.tamu.edu/MM/Sandia/ASIC_320k.tar.gz", + sha256="d0d4ac477f641c8372d7347bc262ffcbde017f50fb17bb1a1539c98dd3440145", + destination="mtx", + ) + resource( + name="bmwcra_1", + url="https://sparse.tamu.edu/MM/GHS_psdef/bmwcra_1.tar.gz", + sha256="31467b319f3d4e8a8fc3a320344650bee14b285755b13ee29264b7a488b3d222", + destination="mtx", + ) + resource( + name="nos1", + url="https://sparse.tamu.edu/MM/HB/nos1.tar.gz", + sha256="7e64dc2408890e85a60dbd2ad048963c74625cc3037dbdff9647d30844a52674", + destination="mtx", + ) + resource( + name="nos2", + url="https://sparse.tamu.edu/MM/HB/nos2.tar.gz", + sha256="7439318b969e8cad0e96f154937a35256374bb8f0e16ed7ecc3a5219f8dc903b", + destination="mtx", + ) + resource( + name="nos3", + url="https://sparse.tamu.edu/MM/HB/nos3.tar.gz", + sha256="7dd62179bbcaeb693c774712a8d70b97316364983f1cbf06cecb3900da8954a5", + destination="mtx", + ) + resource( + name="nos4", + url="https://sparse.tamu.edu/MM/HB/nos4.tar.gz", + sha256="ec2323a5195db153fd6ae32ff537b22eb47f08e73949754b71f8d4104358f10f", + destination="mtx", + ) + resource( + name="nos5", + url="https://sparse.tamu.edu/MM/HB/nos5.tar.gz", + sha256="dd67e906b0392cfbbe5a01a1f1a569c50875cbf88249a31721fb87519666a342", + destination="mtx", + ) + resource( + name="nos6", + url="https://sparse.tamu.edu/MM/HB/nos6.tar.gz", + sha256="a0301c38ed91b849571303db581205cfae113565a7938eaa1a7466320f0d03c4", + destination="mtx", + ) + resource( + name="nos7", + url="https://sparse.tamu.edu/MM/HB/nos7.tar.gz", + sha256="c5d8d99bf4b54ee45e2f45d78530e3787f2e9670c000a68ad986a3b923e9e5ae", + destination="mtx", + ) + resource( + name="shipsec1", + url="https://sparse.tamu.edu/MM/DNVS/shipsec1.tar.gz", + sha256="d021889affed5429f85b606900f76870d0b1b1aefd92529cc6f43bf9d7ef0eb1", + destination="mtx", + ) + resource( + name="mplate", + url="https://sparse.tamu.edu/MM/Cote/mplate.tar.gz", + sha256="647b848343e423a24e05d3a3d462fa6b77958e362aadf70e9bb51bd420730df2", + destination="mtx", + ) + resource( + name="qc2534", + url="https://sparse.tamu.edu/MM/Bai/qc2534.tar.gz", + sha256="591c54ceee70222909353d2a400dd9819e3432143b2c25b6c4ffa262b8e397c8", + destination="mtx", + ) + resource( + name="Chevron2", + url="https://sparse.tamu.edu/MM/Chevron/Chevron2.tar.gz", + sha256="9334b61c25958f5221fd114e9698c11ac0ec57a0432150731d3fe80033da3026", + destination="mtx", + ) + resource( + name="Chevron3", + url="https://sparse.tamu.edu/MM/Chevron/Chevron3.tar.gz", + sha256="5679292ba86defedb0a6afc25274948521ace7ca90fc765265be11ca6eaaaee4", + destination="mtx", + ) + resource( + name="Chevron4", + url="https://sparse.tamu.edu/MM/Chevron/Chevron4.tar.gz", + sha256="2ac9dc0d8d38cbf4a62089c74e53aea87edbb3f0b553b77b27c70df70e1d17d5", + destination="mtx", + ) + + def check(self): + if self.spec.satisfies("+test"): + exe = join_path(self.build_directory, "clients", "staging", "rocsparse-test") + self.run_test(exe, options=["--gtest_filter=*quick*:*pre_checkin*-*known_bug*"]) + def setup_build_environment(self, env): env.set("CXX", self.spec["hip"].hipcc) @@ -138,8 +298,9 @@ class Rocsparse(CMakePackage): def cmake_args(self): args = [ self.define("BUILD_CLIENTS_SAMPLES", "OFF"), - self.define("BUILD_CLIENTS_TESTS", "OFF"), + self.define_from_variant("BUILD_CLIENTS_TESTS", "test"), self.define("BUILD_CLIENTS_BENCHMARKS", "OFF"), + self.define("ROCSPARSE_MTX_DIR", join_path(self.stage.source_path, "mtx")), ] if "auto" not in self.spec.variants["amdgpu_target"]: |