summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--var/spack/repos/builtin/packages/rocblas/0006-Guard-use-of-OpenMP-to-make-it-optional-5.4.patch414
-rw-r--r--var/spack/repos/builtin/packages/rocblas/package.py2
2 files changed, 415 insertions, 1 deletions
diff --git a/var/spack/repos/builtin/packages/rocblas/0006-Guard-use-of-OpenMP-to-make-it-optional-5.4.patch b/var/spack/repos/builtin/packages/rocblas/0006-Guard-use-of-OpenMP-to-make-it-optional-5.4.patch
new file mode 100644
index 0000000000..c70da5a855
--- /dev/null
+++ b/var/spack/repos/builtin/packages/rocblas/0006-Guard-use-of-OpenMP-to-make-it-optional-5.4.patch
@@ -0,0 +1,414 @@
+From bfd22e90c29a6975ff8f55fa968833763816e579 Mon Sep 17 00:00:00 2001
+From: Renjith Ravindran <Renjith.RavindranKannath@amd.com>
+Date: Wed, 26 Apr 2023 15:11:02 +0000
+Subject: [PATCH] Guard use of OpenMP to make it optional
+
+---
+ clients/common/blis_interface.cpp | 3 +-
+ clients/common/cblas_interface.cpp | 14 ++++++--
+ clients/include/rocblas_init.hpp | 52 ++++++++++++++++++++++++++++++
+ clients/include/utility.hpp | 2 ++
+ clients/samples/example_openmp.cpp | 8 ++++-
+ 5 files changed, 75 insertions(+), 4 deletions(-)
+
+diff --git a/clients/common/blis_interface.cpp b/clients/common/blis_interface.cpp
+index da7aef3..76832f6 100644
+--- a/clients/common/blis_interface.cpp
++++ b/clients/common/blis_interface.cpp
+@@ -21,8 +21,9 @@
+ * ************************************************************************ */
+
+ #include "blis.h"
++#ifdef _OPENMP
+ #include "omp.h"
+-
++#endif
+ void setup_blis()
+ {
+ #ifndef WIN32
+diff --git a/clients/common/cblas_interface.cpp b/clients/common/cblas_interface.cpp
+index 91d3681..81aebf6 100644
+--- a/clients/common/cblas_interface.cpp
++++ b/clients/common/cblas_interface.cpp
+@@ -23,8 +23,9 @@
+ #include "rocblas_vector.hpp"
+ #include "utility.hpp"
+ #include <bitset>
++#ifdef _OPENMP
+ #include <omp.h>
+-
++#endif
+ /*
+ * ===========================================================================
+ * level 1 BLAS
+@@ -461,8 +462,9 @@ void cblas_geam_helper(rocblas_operation transA,
+ rocblas_int inc2_A = transA == rocblas_operation_none ? lda : 1;
+ rocblas_int inc1_B = transB == rocblas_operation_none ? 1 : ldb;
+ rocblas_int inc2_B = transB == rocblas_operation_none ? ldb : 1;
+-
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(rocblas_int i = 0; i < M; i++)
+ {
+ for(rocblas_int j = 0; j < N; j++)
+@@ -895,7 +897,9 @@ void cblas_herkx(rocblas_fill uplo,
+ {
+ if(uplo == rocblas_fill_upper)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(int j = 0; j < n; ++j)
+ {
+ for(int i = 0; i <= j; i++)
+@@ -917,7 +921,9 @@ void cblas_herkx(rocblas_fill uplo,
+ }
+ else // lower
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(int j = 0; j < n; ++j)
+ {
+ for(int i = j; i < n; i++)
+@@ -942,7 +948,9 @@ void cblas_herkx(rocblas_fill uplo,
+ {
+ if(uplo == rocblas_fill_upper)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(int j = 0; j < n; ++j)
+ {
+ for(int i = 0; i <= j; i++)
+@@ -966,7 +974,9 @@ void cblas_herkx(rocblas_fill uplo,
+ }
+ else // lower
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(int j = 0; j < n; ++j)
+ {
+ for(int i = j; i < n; i++)
+diff --git a/clients/include/rocblas_init.hpp b/clients/include/rocblas_init.hpp
+index ae2dd6d..ee812e6 100644
+--- a/clients/include/rocblas_init.hpp
++++ b/clients/include/rocblas_init.hpp
+@@ -29,7 +29,9 @@
+ #include "rocblas_random.hpp"
+ #include <cinttypes>
+ #include <iostream>
++#ifdef _OPENMP
+ #include <omp.h>
++#endif
+ #include <vector>
+
+ //!
+@@ -70,7 +72,9 @@ void rocblas_init_matrix_alternating_sign(rocblas_check_matrix_type matrix_type,
+ if(matrix_type == rocblas_client_general_matrix)
+ {
+ for(size_t b = 0; b < batch_count; b++)
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < M; ++i)
+ for(size_t j = 0; j < N; ++j)
+ {
+@@ -81,7 +85,9 @@ void rocblas_init_matrix_alternating_sign(rocblas_check_matrix_type matrix_type,
+ else if(matrix_type == rocblas_client_triangular_matrix)
+ {
+ for(size_t b = 0; b < batch_count; b++)
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < M; ++i)
+ for(size_t j = 0; j < N; ++j)
+ {
+@@ -107,7 +113,9 @@ void rocblas_init_matrix_alternating_sign(rocblas_check_matrix_type matrix_type,
+
+ if(matrix_type == rocblas_client_general_matrix)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < M; ++i)
+ for(size_t j = 0; j < N; ++j)
+ {
+@@ -117,7 +125,9 @@ void rocblas_init_matrix_alternating_sign(rocblas_check_matrix_type matrix_type,
+ }
+ else if(matrix_type == rocblas_client_triangular_matrix)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < M; ++i)
+ for(size_t j = 0; j < N; ++j)
+ {
+@@ -133,7 +143,9 @@ void rocblas_init_matrix_alternating_sign(rocblas_check_matrix_type matrix_type,
+ template <typename T>
+ void rocblas_init_vector_alternating_sign(T rand_gen(), T* x, size_t N, size_t incx)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t j = 0; j < N; ++j)
+ {
+ auto value = rand_gen();
+@@ -159,7 +171,9 @@ void rocblas_init_matrix(rocblas_check_matrix_type matrix_type,
+ if(matrix_type == rocblas_client_general_matrix)
+ {
+ for(size_t b = 0; b < batch_count; b++)
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < M; ++i)
+ for(size_t j = 0; j < N; ++j)
+ A[i + j * lda + b * stride] = rand_gen();
+@@ -167,7 +181,9 @@ void rocblas_init_matrix(rocblas_check_matrix_type matrix_type,
+ else if(matrix_type == rocblas_client_hermitian_matrix)
+ {
+ for(size_t b = 0; b < batch_count; ++b)
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < N; ++i)
+ for(size_t j = 0; j <= i; ++j)
+ {
+@@ -194,7 +210,9 @@ void rocblas_init_matrix(rocblas_check_matrix_type matrix_type,
+ else if(matrix_type == rocblas_client_symmetric_matrix)
+ {
+ for(size_t b = 0; b < batch_count; ++b)
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < N; ++i)
+ for(size_t j = 0; j <= i; ++j)
+ {
+@@ -221,7 +239,9 @@ void rocblas_init_matrix(rocblas_check_matrix_type matrix_type,
+ else if(matrix_type == rocblas_client_triangular_matrix)
+ {
+ for(size_t b = 0; b < batch_count; b++)
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < M; ++i)
+ for(size_t j = 0; j < N; ++j)
+ {
+@@ -246,14 +266,18 @@ void rocblas_init_matrix(rocblas_check_matrix_type matrix_type,
+ auto lda = hA.lda();
+ if(matrix_type == rocblas_client_general_matrix)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < M; ++i)
+ for(size_t j = 0; j < N; ++j)
+ A[i + j * lda] = rand_gen();
+ }
+ else if(matrix_type == rocblas_client_hermitian_matrix)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < N; ++i)
+ for(size_t j = 0; j <= i; ++j)
+ {
+@@ -279,7 +303,9 @@ void rocblas_init_matrix(rocblas_check_matrix_type matrix_type,
+ }
+ else if(matrix_type == rocblas_client_symmetric_matrix)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < N; ++i)
+ for(size_t j = 0; j <= i; ++j)
+ {
+@@ -305,7 +331,9 @@ void rocblas_init_matrix(rocblas_check_matrix_type matrix_type,
+ }
+ else if(matrix_type == rocblas_client_triangular_matrix)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < M; ++i)
+ for(size_t j = 0; j < N; ++j)
+ {
+@@ -323,7 +351,9 @@ void rocblas_init_matrix(rocblas_check_matrix_type matrix_type,
+ template <typename T>
+ void rocblas_init_vector(T rand_gen(), T* x, size_t N, size_t incx)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t j = 0; j < N; ++j)
+ x[j * incx] = rand_gen();
+ }
+@@ -346,7 +376,9 @@ void rocblas_init_matrix_trig(rocblas_check_matrix_type matrix_type,
+ if(matrix_type == rocblas_client_general_matrix)
+ {
+ for(size_t b = 0; b < batch_count; b++)
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < M; ++i)
+ for(size_t j = 0; j < N; ++j)
+ A[i + j * lda + b * stride] = T(seedReset ? cos(i + j * lda + b * stride)
+@@ -355,7 +387,9 @@ void rocblas_init_matrix_trig(rocblas_check_matrix_type matrix_type,
+ else if(matrix_type == rocblas_client_hermitian_matrix)
+ {
+ for(size_t b = 0; b < batch_count; ++b)
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < N; ++i)
+ for(size_t j = 0; j <= i; ++j)
+ {
+@@ -384,7 +418,9 @@ void rocblas_init_matrix_trig(rocblas_check_matrix_type matrix_type,
+ else if(matrix_type == rocblas_client_symmetric_matrix)
+ {
+ for(size_t b = 0; b < batch_count; ++b)
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < N; ++i)
+ for(size_t j = 0; j <= i; ++j)
+ {
+@@ -412,7 +448,9 @@ void rocblas_init_matrix_trig(rocblas_check_matrix_type matrix_type,
+ else if(matrix_type == rocblas_client_triangular_matrix)
+ {
+ for(size_t b = 0; b < batch_count; b++)
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < M; ++i)
+ for(size_t j = 0; j < N; ++j)
+ {
+@@ -443,14 +481,18 @@ void rocblas_init_matrix_trig(rocblas_check_matrix_type matrix_type,
+
+ if(matrix_type == rocblas_client_general_matrix)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < M; ++i)
+ for(size_t j = 0; j < N; ++j)
+ A[i + j * lda] = T(seedReset ? cos(i + j * lda) : sin(i + j * lda));
+ }
+ else if(matrix_type == rocblas_client_hermitian_matrix)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < N; ++i)
+ for(size_t j = 0; j <= i; ++j)
+ {
+@@ -477,7 +519,9 @@ void rocblas_init_matrix_trig(rocblas_check_matrix_type matrix_type,
+ }
+ else if(matrix_type == rocblas_client_symmetric_matrix)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < N; ++i)
+ for(size_t j = 0; j <= i; ++j)
+ {
+@@ -503,7 +547,9 @@ void rocblas_init_matrix_trig(rocblas_check_matrix_type matrix_type,
+ }
+ else if(matrix_type == rocblas_client_triangular_matrix)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t i = 0; i < M; ++i)
+ for(size_t j = 0; j < N; ++j)
+ {
+@@ -524,7 +570,9 @@ void rocblas_init_matrix_trig(rocblas_check_matrix_type matrix_type,
+ template <typename T>
+ void rocblas_init_vector_trig(T* x, size_t N, size_t incx, bool seedReset = false)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t j = 0; j < N; ++j)
+ x[j * incx] = T(seedReset ? cos(j * incx) : sin(j * incx));
+ }
+@@ -803,7 +851,9 @@ void rocblas_copy_matrix(const T* A,
+ {
+ size_t stride_offset_a = i_batch * stridea;
+ size_t stride_offset_b = i_batch * strideb;
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t j = 0; j < N; ++j)
+ {
+ size_t offset_a = stride_offset_a + j * lda;
+@@ -822,7 +872,9 @@ void rocblas_copy_matrix(
+
+ for(size_t i_batch = 0; i_batch < batch_count; i_batch++)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(size_t j = 0; j < N; ++j)
+ {
+ size_t offset_a = j * lda;
+diff --git a/clients/include/utility.hpp b/clients/include/utility.hpp
+index 7635426..34f8f2a 100644
+--- a/clients/include/utility.hpp
++++ b/clients/include/utility.hpp
+@@ -280,7 +280,9 @@ inline void regular_to_packed(bool upper, const T* A, T* AP, rocblas_int n)
+ template <typename U>
+ inline void regular_to_packed(bool upper, U& h_A, U& h_AP, rocblas_int n)
+ {
++#ifdef _OPENMP
+ #pragma omp parallel for
++#endif
+ for(rocblas_int batch_index = 0; batch_index < h_A.batch_count(); ++batch_index)
+ {
+ auto* AP = h_AP[batch_index];
+diff --git a/clients/samples/example_openmp.cpp b/clients/samples/example_openmp.cpp
+index f62dae6..8cc5f2e 100644
+--- a/clients/samples/example_openmp.cpp
++++ b/clients/samples/example_openmp.cpp
+@@ -1,5 +1,5 @@
+ /* ************************************************************************
+- * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved.
++ * Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+@@ -42,7 +42,9 @@
+ #include <cstdlib>
+ #include <hip/hip_runtime.h>
+ #include <iostream>
++#ifdef _OPENMP
+ #include <omp.h>
++#endif
+ #include <vector>
+
+ #define NUM_THREADS 4
+@@ -100,7 +102,9 @@ int main()
+
+ // 1st parallel rocblas routine call : scal x
+ // spawn openmp threads
++#ifdef _OPENMP
+ #pragma omp parallel private(thread_id)
++#endif
+ {
+
+ thread_id = omp_get_thread_num(); // thread_id from 0,...,NUM_THREADS-1
+@@ -118,7 +122,9 @@ int main()
+
+ // 2nd parallel rocblas routine call : copy x to y
+ // spawn openmp threads
++#ifdef _OPENMP
+ #pragma omp parallel private(thread_id)
++#endif
+ {
+
+ thread_id = omp_get_thread_num(); // thread_id from 0,...,NUM_THREADS-1
+--
+2.17.1
+
diff --git a/var/spack/repos/builtin/packages/rocblas/package.py b/var/spack/repos/builtin/packages/rocblas/package.py
index b8f3816e37..ba92d43aec 100644
--- a/var/spack/repos/builtin/packages/rocblas/package.py
+++ b/var/spack/repos/builtin/packages/rocblas/package.py
@@ -135,7 +135,6 @@ class Rocblas(CMakePackage):
depends_on("googletest@1.10.0:", type="test")
depends_on("netlib-lapack@3.7.1:", type="test")
- depends_on("llvm-amdgpu +openmp", type="test")
def check(self):
if "@4.2.0:" in self.spec:
@@ -238,6 +237,7 @@ class Rocblas(CMakePackage):
patch("0003-Fix-rocblas-gentest.patch", when="@4.2.0:5.1")
# Finding Python package and set command python as python3
patch("0004-Find-python.patch", when="@5.2.0:")
+ patch("0006-Guard-use-of-OpenMP-to-make-it-optional-5.4.patch", when="@5.4:")
def setup_build_environment(self, env):
env.set("CXX", self.spec["hip"].hipcc)