1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
diff --git a/clr/rocclr/platform/kernel.hpp b/clr/rocclr/platform/kernel.hpp
index 8cb3b7f..d441b18 100644
--- a/clr/rocclr/platform/kernel.hpp
+++ b/clr/rocclr/platform/kernel.hpp
@@ -159,7 +159,7 @@ class KernelParameters : protected HeapObject {
deviceKernelArgs_(false) {
totalSize_ = signature.paramsSize() + (signature.numMemories() +
signature.numSamplers() + signature.numQueues()) * sizeof(void*);
- values_ = reinterpret_cast<address>(this) + alignUp(sizeof(KernelParameters), 16);
+ values_ = reinterpret_cast<address>(this) + alignUp(sizeof(KernelParameters), PARAMETERS_MIN_ALIGNMENT);
memoryObjOffset_ = signature_.paramsSize();
memoryObjects_ = reinterpret_cast<amd::Memory**>(values_ + memoryObjOffset_);
samplerObjOffset_ = memoryObjOffset_ + signature_.numMemories() * sizeof(amd::Memory*);
@@ -183,7 +183,7 @@ class KernelParameters : protected HeapObject {
execNewVcop_(rhs.execNewVcop_),
execPfpaVcop_(rhs.execPfpaVcop_),
deviceKernelArgs_(false) {
- values_ = reinterpret_cast<address>(this) + alignUp(sizeof(KernelParameters), 16);
+ values_ = reinterpret_cast<address>(this) + alignUp(sizeof(KernelParameters), PARAMETERS_MIN_ALIGNMENT);
memoryObjOffset_ = signature_.paramsSize();
memoryObjects_ = reinterpret_cast<amd::Memory**>(values_ + memoryObjOffset_);
samplerObjOffset_ = memoryObjOffset_ + signature_.numMemories() * sizeof(amd::Memory*);
@@ -220,7 +220,7 @@ class KernelParameters : protected HeapObject {
//! Allocate memory for this instance as well as the required storage for
// the values_, defined_, and rawPointer_ arrays.
void* operator new(size_t size, const KernelSignature& signature) {
- size_t requiredSize = alignUp(size, 16) + signature.paramsSize() +
+ size_t requiredSize = alignUp(size, PARAMETERS_MIN_ALIGNMENT) + signature.paramsSize() +
(signature.numMemories() + signature.numSamplers() + signature.numQueues()) *
sizeof(void*);
return AlignedMemory::allocate(requiredSize, PARAMETERS_MIN_ALIGNMENT);
diff --git a/clr/rocclr/utils/flags.hpp b/clr/rocclr/utils/flags.hpp
index df12fe6..88848e5 100644
--- a/clr/rocclr/utils/flags.hpp
+++ b/clr/rocclr/utils/flags.hpp
@@ -52,7 +52,7 @@ debug(size_t, CPU_MEMORY_GUARD_PAGE_SIZE, 64, \
"Size in KB of CPU memory guard page") \
debug(size_t, CPU_MEMORY_ALIGNMENT_SIZE, 256, \
"Size in bytes for the default alignment for guarded memory on CPU") \
-debug(size_t, PARAMETERS_MIN_ALIGNMENT, 16, \
+debug(size_t, PARAMETERS_MIN_ALIGNMENT, NATIVE_ALIGNMENT_SIZE, \
"Minimum alignment required for the abstract parameters stack") \
debug(size_t, MEMOBJ_BASE_ADDR_ALIGN, 4*Ki, \
"Alignment of the base address of any allocate memory object") \
diff --git a/clr/rocclr/utils/macros.hpp b/clr/rocclr/utils/macros.hpp
index 02fef75..e2110eb 100644
--- a/clr/rocclr/utils/macros.hpp
+++ b/clr/rocclr/utils/macros.hpp
@@ -126,6 +126,14 @@
#define IS_WINDOWS false
#endif
+#if defined(__AVX512F__)
+#define NATIVE_ALIGNMENT_SIZE 64
+#elif defined(__AVX__)
+#define NATIVE_ALIGNMENT_SIZE 32
+#else
+#define NATIVE_ALIGNMENT_SIZE 16
+#endif
+
#define IF_LEFT_true(x) x
#define IF_LEFT_false(x)
#define IF_RIGHT_true(x)
|