summaryrefslogtreecommitdiff
path: root/var/spack/repos/builtin/packages/hip/0017-Set-PARAMETERS_MIN_ALIGNMENT-to-the-native-alignment.patch
blob: cabc64fd2a671c1cc445a0def1d154b07ecd4e04 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
diff --git a/clr/rocclr/platform/kernel.hpp b/clr/rocclr/platform/kernel.hpp
index 8cb3b7f..d441b18 100644
--- a/clr/rocclr/platform/kernel.hpp
+++ b/clr/rocclr/platform/kernel.hpp
@@ -159,7 +159,7 @@ class KernelParameters : protected HeapObject {
         deviceKernelArgs_(false) {
     totalSize_ = signature.paramsSize() + (signature.numMemories() +
         signature.numSamplers() + signature.numQueues()) * sizeof(void*);
-    values_ = reinterpret_cast<address>(this) + alignUp(sizeof(KernelParameters), 16);
+    values_ = reinterpret_cast<address>(this) + alignUp(sizeof(KernelParameters), PARAMETERS_MIN_ALIGNMENT);
     memoryObjOffset_ = signature_.paramsSize();
     memoryObjects_ = reinterpret_cast<amd::Memory**>(values_ + memoryObjOffset_);
     samplerObjOffset_ = memoryObjOffset_ + signature_.numMemories() * sizeof(amd::Memory*);
@@ -183,7 +183,7 @@ class KernelParameters : protected HeapObject {
         execNewVcop_(rhs.execNewVcop_),
         execPfpaVcop_(rhs.execPfpaVcop_),
         deviceKernelArgs_(false) {
-    values_ = reinterpret_cast<address>(this) + alignUp(sizeof(KernelParameters), 16);
+    values_ = reinterpret_cast<address>(this) + alignUp(sizeof(KernelParameters), PARAMETERS_MIN_ALIGNMENT);
     memoryObjOffset_ = signature_.paramsSize();
     memoryObjects_ = reinterpret_cast<amd::Memory**>(values_ + memoryObjOffset_);
     samplerObjOffset_ = memoryObjOffset_ + signature_.numMemories() * sizeof(amd::Memory*);
@@ -220,7 +220,7 @@ class KernelParameters : protected HeapObject {
   //! Allocate memory for this instance as well as the required storage for
   //  the values_, defined_, and rawPointer_ arrays.
   void* operator new(size_t size, const KernelSignature& signature) {
-    size_t requiredSize = alignUp(size, 16) + signature.paramsSize() +
+    size_t requiredSize = alignUp(size, PARAMETERS_MIN_ALIGNMENT) + signature.paramsSize() +
       (signature.numMemories() + signature.numSamplers() + signature.numQueues()) *
        sizeof(void*);
     return AlignedMemory::allocate(requiredSize, PARAMETERS_MIN_ALIGNMENT);
diff --git a/clr/rocclr/utils/flags.hpp b/clr/rocclr/utils/flags.hpp
index df12fe6..88848e5 100644
--- a/clr/rocclr/utils/flags.hpp
+++ b/clr/rocclr/utils/flags.hpp
@@ -52,7 +52,7 @@ debug(size_t, CPU_MEMORY_GUARD_PAGE_SIZE, 64,                                 \
         "Size in KB of CPU memory guard page")                                \
 debug(size_t, CPU_MEMORY_ALIGNMENT_SIZE, 256,                                 \
         "Size in bytes for the default alignment for guarded memory on CPU")  \
-debug(size_t, PARAMETERS_MIN_ALIGNMENT, 16,                                   \
+debug(size_t, PARAMETERS_MIN_ALIGNMENT, NATIVE_ALIGNMENT_SIZE,                \
         "Minimum alignment required for the abstract parameters stack")       \
 debug(size_t, MEMOBJ_BASE_ADDR_ALIGN, 4*Ki,                                   \
         "Alignment of the base address of any allocate memory object")        \
diff --git a/clr/rocclr/utils/macros.hpp b/clr/rocclr/utils/macros.hpp
index 02fef75..e2110eb 100644
--- a/clr/rocclr/utils/macros.hpp
+++ b/clr/rocclr/utils/macros.hpp
@@ -126,6 +126,14 @@
 #define IS_WINDOWS false
 #endif
 
+#if defined(__AVX512F__)
+#define NATIVE_ALIGNMENT_SIZE 64
+#elif defined(__AVX__)
+#define NATIVE_ALIGNMENT_SIZE 32
+#else
+#define NATIVE_ALIGNMENT_SIZE 16
+#endif
+
 #define IF_LEFT_true(x) x
 #define IF_LEFT_false(x)
 #define IF_RIGHT_true(x)