summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorafzpatel <122491982+afzpatel@users.noreply.github.com>2024-10-17 12:34:34 -0400
committerGitHub <noreply@github.com>2024-10-17 18:34:34 +0200
commit962262a1d3d2ca8db50fa82ec5684ba85b2a8c99 (patch)
tree72248203a9c4a7e463d6cf89d1cb9e74f8ec6a6f
parentadaa0a4863a810fa99d392a96c372f8be45e81f4 (diff)
downloadspack-962262a1d3d2ca8db50fa82ec5684ba85b2a8c99.tar.gz
spack-962262a1d3d2ca8db50fa82ec5684ba85b2a8c99.tar.bz2
spack-962262a1d3d2ca8db50fa82ec5684ba85b2a8c99.tar.xz
spack-962262a1d3d2ca8db50fa82ec5684ba85b2a8c99.zip
llvm-amdgpu and composable-kernel: fix build failures (#46891)
-rw-r--r--var/spack/repos/builtin/packages/composable-kernel/0001-mark-kernels-maybe-unused.patch88
-rw-r--r--var/spack/repos/builtin/packages/composable-kernel/package.py6
-rw-r--r--var/spack/repos/builtin/packages/llvm-amdgpu/package.py5
3 files changed, 96 insertions, 3 deletions
diff --git a/var/spack/repos/builtin/packages/composable-kernel/0001-mark-kernels-maybe-unused.patch b/var/spack/repos/builtin/packages/composable-kernel/0001-mark-kernels-maybe-unused.patch
new file mode 100644
index 0000000000..f2fbc24f61
--- /dev/null
+++ b/var/spack/repos/builtin/packages/composable-kernel/0001-mark-kernels-maybe-unused.patch
@@ -0,0 +1,88 @@
+diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp
+index f4f496fc10..d9e300b737 100644
+--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp
++++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp
+@@ -47,12 +47,12 @@ __global__ void
+ #endif
+ kernel_grouped_conv_bwd_weight_xdl_cshuffle_v3(
+ typename GridwiseGemm::Argument karg,
+- const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
+- const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
+- const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
++ [[maybe_unused]] const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
++ [[maybe_unused]] const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
++ [[maybe_unused]] const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
+ c_grid_desc_mblock_mperblock_nblock_nperblock,
+- const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
+- const index_t num_k_per_block)
++ [[maybe_unused]] const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
++ [[maybe_unused]] const index_t num_k_per_block)
+ {
+ #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
+ defined(__gfx94__))
+@@ -103,12 +103,12 @@ __global__ void
+ #endif
+ kernel_grouped_conv_bwd_weight_xdl_cshuffle_v3_2lds(
+ typename GridwiseGemm::Argument karg,
+- const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
+- const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
+- const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
++ [[maybe_unused]] const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
++ [[maybe_unused]] const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
++ [[maybe_unused]] const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
+ c_grid_desc_mblock_mperblock_nblock_nperblock,
+- const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
+- const index_t num_k_per_block)
++ [[maybe_unused]] const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
++ [[maybe_unused]] const index_t num_k_per_block)
+ {
+ #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
+ defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
+diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3.hpp
+index 415ae3d496..a4d4a01a01 100644
+--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3.hpp
++++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3.hpp
+@@ -69,14 +69,15 @@ __global__ void
+ #if CK_USE_LAUNCH_BOUNDS
+ __launch_bounds__(CK_MAX_THREAD_PER_BLOCK, MinimumOccupancy)
+ #endif
+- kernel_grouped_conv_fwd_xdl_cshuffle_v3(typename GridwiseGemm::Argument karg,
+- const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
+- const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
+- const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
+- c_grid_desc_mblock_mperblock_nblock_nperblock,
+- const ComputePtrOffset compute_ptr_offset_of_groups,
+- const ComputePtrOffset compute_ptr_offset_of_n,
+- const index_t groups_count)
++ kernel_grouped_conv_fwd_xdl_cshuffle_v3(
++ typename GridwiseGemm::Argument karg,
++ [[maybe_unused]] const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
++ [[maybe_unused]] const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
++ [[maybe_unused]] const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
++ c_grid_desc_mblock_mperblock_nblock_nperblock,
++ [[maybe_unused]] const ComputePtrOffset compute_ptr_offset_of_groups,
++ [[maybe_unused]] const ComputePtrOffset compute_ptr_offset_of_n,
++ [[maybe_unused]] const index_t groups_count)
+ {
+ #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
+ // offset base pointer for each work-group
+@@ -132,13 +133,13 @@ __global__ void
+ #endif
+ kernel_grouped_conv_fwd_xdl_cshuffle_v3_2lds(
+ typename GridwiseGemm::Argument karg,
+- const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
+- const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
+- const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
++ [[maybe_unused]] const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1,
++ [[maybe_unused]] const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1,
++ [[maybe_unused]] const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
+ c_grid_desc_mblock_mperblock_nblock_nperblock,
+- const ComputePtrOffset compute_ptr_offset_of_groups,
+- const ComputePtrOffset compute_ptr_offset_of_n,
+- const index_t groups_count)
++ [[maybe_unused]] const ComputePtrOffset compute_ptr_offset_of_groups,
++ [[maybe_unused]] const ComputePtrOffset compute_ptr_offset_of_n,
++ [[maybe_unused]] const index_t groups_count)
+ {
+ #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
+ // offset base pointer for each work-group
diff --git a/var/spack/repos/builtin/packages/composable-kernel/package.py b/var/spack/repos/builtin/packages/composable-kernel/package.py
index fcb8dc682d..d666737cb3 100644
--- a/var/spack/repos/builtin/packages/composable-kernel/package.py
+++ b/var/spack/repos/builtin/packages/composable-kernel/package.py
@@ -78,6 +78,10 @@ class ComposableKernel(CMakePackage):
depends_on("llvm-amdgpu@" + ver, when="@" + ver)
depends_on("rocm-cmake@" + ver, when="@" + ver, type="build")
+ # Build is breaking on warning, -Werror, -Wunused-parameter. The patch is part of:
+ # https://github.com/ROCm/composable_kernel/commit/959073842c0db839d45d565eb260fd018c996ce4
+ patch("0001-mark-kernels-maybe-unused.patch", when="@6.2")
+
def setup_build_environment(self, env):
env.set("CXX", self.spec["hip"].hipcc)
@@ -101,6 +105,8 @@ class ComposableKernel(CMakePackage):
args.append(self.define("CMAKE_POSITION_INDEPENDENT_CODE", "ON"))
if self.spec.satisfies("@:5.7"):
args.append(self.define("CMAKE_CXX_FLAGS", "-O3"))
+ if self.spec.satisfies("@6.2:"):
+ args.append(self.define("BUILD_DEV", "OFF"))
return args
def build(self, spec, prefix):
diff --git a/var/spack/repos/builtin/packages/llvm-amdgpu/package.py b/var/spack/repos/builtin/packages/llvm-amdgpu/package.py
index bfa175a8df..3e39a4445b 100644
--- a/var/spack/repos/builtin/packages/llvm-amdgpu/package.py
+++ b/var/spack/repos/builtin/packages/llvm-amdgpu/package.py
@@ -20,7 +20,7 @@ class LlvmAmdgpu(CMakePackage, CompilerPackage):
executables = [r"amdclang", r"amdclang\+\+", r"amdflang", r"clang.*", r"flang.*", "llvm-.*"]
generator("ninja")
- maintainers("srekolam", "renjithravindrankannath", "haampie")
+ maintainers("srekolam", "renjithravindrankannath", "haampie", "afzpatel")
license("Apache-2.0")
@@ -319,6 +319,5 @@ class LlvmAmdgpu(CMakePackage, CompilerPackage):
def setup_dependent_build_environment(self, env, dependent_spec):
for root, _, files in os.walk(self.spec["llvm-amdgpu"].prefix):
if "libclang_rt.asan-x86_64.so" in files:
- asan_lib_path = root
- env.prepend_path("LD_LIBRARY_PATH", asan_lib_path)
+ env.prepend_path("LD_LIBRARY_PATH", root)
env.prune_duplicate_paths("LD_LIBRARY_PATH")