aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGil Pitney <gil.pitney@linaro.org>2014-11-21 18:16:58 -0800
committerGil Pitney <gil.pitney@linaro.org>2014-11-21 18:16:58 -0800
commit183e63662bf50e5cb18e49db7a122f38c6a43f7b (patch)
tree829cf75d51047def3207f1d1a2054bd55faa0a4e
parentcc4318ce5886355ad1b06d5489820db6a3a2a044 (diff)
Fixed kernel stub argument marshalling code to ensure proper alignment
Previously, shamrock was generating load instructions in the kernel stub with a strict type alignment, which was not being followed by the argument marshalling code. This resulted in a NEON vld1 instruction failing in the kernel stub, and trashing a base register which was not 16 byte aligned. Now the marshalling code calculates the proper alignement for each argument, and is based on a buffer aligned to double16 to begin with. With this patch, all the vector sub-tests of the Khronos basic_parameter_types test now pass. Signed-off-by: Gil Pitney <gil.pitney@linaro.org>
-rw-r--r--src/core/cpu/kernel.cpp16
1 files changed, 10 insertions, 6 deletions
diff --git a/src/core/cpu/kernel.cpp b/src/core/cpu/kernel.cpp
index e81391f..49e4dcc 100644
--- a/src/core/cpu/kernel.cpp
+++ b/src/core/cpu/kernel.cpp
@@ -290,10 +290,11 @@ size_t CPUKernel::typeOffset(size_t &offset, size_t type_len)
// Align offset to stype_len
type_len = next_power_of_two(type_len);
- size_t mask = ~(type_len - 1);
+ size_t mask = (type_len - 1);
- while (rs & mask != rs)
- rs++;
+ if (rs&mask) {
+ rs += (type_len - rs%type_len);
+ }
// Where to try to place the next value
offset = rs + type_len;
@@ -566,9 +567,8 @@ void *CPUKernelWorkGroup::callArgs(std::vector<void *> &locals_to_free)
CPUKernel::typeOffset(args_size, arg->valueSize() * arg->vecDim());
}
- rs = std::malloc(args_size);
-
- if (!rs)
+ int retval = posix_memalign(&rs, 128, args_size); // align for type double16 size.
+ if (retval || !rs)
return NULL;
size_t arg_offset = 0;
@@ -649,6 +649,10 @@ bool CPUKernelWorkGroup::run()
std::vector<void *> locals_to_free;
llvm::Function *kernel_func = p_kernel->callFunction();
+#if 0 // Let's see the stub's IR:
+ kernel_func->dump();
+#endif
+
if (!kernel_func)
return false;