diff options
author | Gil Pitney <gil.pitney@linaro.org> | 2014-11-21 18:16:58 -0800 |
---|---|---|
committer | Gil Pitney <gil.pitney@linaro.org> | 2014-11-21 18:16:58 -0800 |
commit | 183e63662bf50e5cb18e49db7a122f38c6a43f7b (patch) | |
tree | 829cf75d51047def3207f1d1a2054bd55faa0a4e | |
parent | cc4318ce5886355ad1b06d5489820db6a3a2a044 (diff) |
Fixed kernel stub argument marshalling code to ensure proper alignment
Previously, shamrock was generating load instructions in the kernel stub with
a strict type alignment, which was not being followed by the argument
marshalling code.
This resulted in a NEON vld1 instruction failing in the kernel stub,
and trashing a base register which was not 16 byte aligned.
Now the marshalling code calculates the proper alignement for each argument,
and is based on a buffer aligned to double16 to begin with.
With this patch, all the vector sub-tests of the Khronos basic_parameter_types
test now pass.
Signed-off-by: Gil Pitney <gil.pitney@linaro.org>
-rw-r--r-- | src/core/cpu/kernel.cpp | 16 |
1 files changed, 10 insertions, 6 deletions
diff --git a/src/core/cpu/kernel.cpp b/src/core/cpu/kernel.cpp index e81391f..49e4dcc 100644 --- a/src/core/cpu/kernel.cpp +++ b/src/core/cpu/kernel.cpp @@ -290,10 +290,11 @@ size_t CPUKernel::typeOffset(size_t &offset, size_t type_len) // Align offset to stype_len type_len = next_power_of_two(type_len); - size_t mask = ~(type_len - 1); + size_t mask = (type_len - 1); - while (rs & mask != rs) - rs++; + if (rs&mask) { + rs += (type_len - rs%type_len); + } // Where to try to place the next value offset = rs + type_len; @@ -566,9 +567,8 @@ void *CPUKernelWorkGroup::callArgs(std::vector<void *> &locals_to_free) CPUKernel::typeOffset(args_size, arg->valueSize() * arg->vecDim()); } - rs = std::malloc(args_size); - - if (!rs) + int retval = posix_memalign(&rs, 128, args_size); // align for type double16 size. + if (retval || !rs) return NULL; size_t arg_offset = 0; @@ -649,6 +649,10 @@ bool CPUKernelWorkGroup::run() std::vector<void *> locals_to_free; llvm::Function *kernel_func = p_kernel->callFunction(); +#if 0 // Let's see the stub's IR: + kernel_func->dump(); +#endif + if (!kernel_func) return false; |