aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGil Pitney <gil.pitney@linaro.org>2015-04-29 16:55:55 +0000
committerGil Pitney <gil.pitney@linaro.org>2015-04-29 16:55:55 +0000
commit7dd00e516e90d5ad84aa6ebedf7fc2bfea25247f (patch)
tree733ef242e3cca9d00279b3e451629b9b86e6e0ce
parent6e94d7f24bf1d4c15fc21003289ad968a240d8b3 (diff)
Added printf builtin for v1.2
This adds an OpenCL C file taken from pocl, with some minor tweaks. Per the Khronos v1.2 test_printf test case, this enables all of the 57 sub tests to pass, with two exceptions: *** Testing printf for vector *** 0)testing printf("%2.2v4hlf",(1.0f,2.0f,3.0f,4.0f)) *** FAILED *** 4)testing printf("%v2ld",(12345678,98765432)) *** FAILED *** Some debugging indicates a possible issue involving va_args and floating point types, which becomes apparent when passing vectors of floats to a variadic function. Signed-off-by: Gil Pitney <gil.pitney@linaro.org>
-rw-r--r--include/cpu.h3
-rw-r--r--src/builtins/CMakeLists.txt29
-rw-r--r--src/builtins/Makefile16
-rw-r--r--src/builtins/_kernel_c.h315
-rw-r--r--src/builtins/pocl_types.h75
-rw-r--r--src/builtins/printf.c476
-rw-r--r--src/core/cpu/builtins.cpp14
7 files changed, 919 insertions, 9 deletions
diff --git a/include/cpu.h b/include/cpu.h
index 1e8380f..0f74f1f 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -267,7 +267,8 @@ TERNARY_VEC_DECL(uint, uint, mad_sat)
TERNARY_VEC_DECL(long, long, mad_sat)
TERNARY_VEC_DECL(ulong, ulong, mad_sat)
-int printf(__constant char* _format, ...);
+int _cl_printf(__constant char* restrict _format, ...);
+#define printf _cl_printf
void *memcpy(void *dst, const void * src, uint size);
_CLC_DECL size_t get_local_id (uint dim);
diff --git a/src/builtins/CMakeLists.txt b/src/builtins/CMakeLists.txt
index d91c775..078dc20 100644
--- a/src/builtins/CMakeLists.txt
+++ b/src/builtins/CMakeLists.txt
@@ -1,9 +1,13 @@
if (SHAMROCK_BUILD)
-set(CUSTOM_COMMAND ${CLANG_EXECUTABLE} -cc1 -emit-llvm-bc -x cl -O2 -fno-builtin -nobuiltininc -Fvisibility=protected -ffake-address-space-map -cl-std=CL1.2 -ffp-contract=off )
+set(CUSTOM_COMMAND_C ${CLANG_EXECUTABLE} -cc1 -emit-llvm-bc -O2 -fno-builtin -nobuiltininc -Fvisibility=protected -ffake-address-space-map -cl-std=CL1.2 -ffp-contract=off )
+
+set(CUSTOM_COMMAND ${CUSTOM_COMMAND_C} -x cl )
+
FILE(GLOB CL_SOURCES ${CLC_BUILTINS_DIR}/*.cl)
-#MESSAGE(STATUS "CL_SOURCES: ${CL_SOURCES}" )
+FILE(GLOB C_SOURCES ${CLC_BUILTINS_DIR}/*.c)
+#MESSAGE(STATUS "C_SOURCES: ${C_SOURCES}" )
set(BC_SOURCES)
foreach(f ${CL_SOURCES})
@@ -18,6 +22,27 @@ foreach(f ${CL_SOURCES})
COMMENT "Generating ${bc}")
list(APPEND BC_SOURCES ${bc})
endforeach()
+
+
+MESSAGE( STATUS "LLVM_LIB_DIR: ${LLVM_LIB_DIR}")
+MESSAGE( STATUS "LLVM_VERSION: ${LLVM_VERSION}")
+set(CLANG_INCLUDE_DIR "${LLVM_LIB_DIR}/clang/${LLVM_VERSION}/include" )
+MESSAGE( STATUS "CLANG_INCLUDE_DIR: ${CLANG_INCLUDE_DIR}")
+
+foreach(f ${C_SOURCES})
+ get_filename_component(fn ${f} NAME_WE)
+ #MESSAGE(STATUS "C_SOURCE: ${f}" )
+ set(bc ${CMAKE_CURRENT_BINARY_DIR}/${fn}.bc)
+ add_custom_command(OUTPUT ${bc}
+ COMMAND ${CUSTOM_COMMAND_C}
+ -I${OCL_BUILTINS_DIR}/include
+ -I${CLANG_INCLUDE_DIR}
+ -o ${bc} ${f}
+ DEPENDS ${f}
+ COMMENT "Generating ${bc}")
+ list(APPEND BC_SOURCES ${bc})
+endforeach()
+
#MESSAGE( STATUS "BC_SOURCES: ${BC_SOURCES}")
add_custom_target(generate_bc_files DEPENDS ${BC_SOURCES})
diff --git a/src/builtins/Makefile b/src/builtins/Makefile
index 1d3349b..b9191dd 100644
--- a/src/builtins/Makefile
+++ b/src/builtins/Makefile
@@ -1,21 +1,29 @@
CLANG = clang
-CLANG_CFLAGS = -cc1 -emit-llvm-bc -x cl -O2 -fno-builtin -nobuiltininc
+CLANG_CFLAGS = -cc1 -emit-llvm-bc -O2 -fno-builtin -nobuiltininc
CLANG_CFLAGS += -Fvisibility=protected -cl-std=CL1.2 -ffp-contract=off
-CLANG_CFLAGS += -I../../include
+CLANG_CFLAGS += -I../../include -I/opt/llvm/lib/clang/3.6.0/include
+
+CLANG_CL_FLAGS += $(CLANG_CFLAGS) -x cl
CL_FILES = $(wildcard *.cl)
+C_FILES = $(wildcard *.c)
BYTECODE := ${CL_FILES:.cl=.bc}
+BYTECODE_FROM_C := ${C_FILES:.c=.bc}
all: builtins.lib
-builtins.lib: $(BYTECODE)
+builtins.lib: $(BYTECODE) $(BYTECODE_FROM_C)
@echo $@ Linking bytecode modules
llvm-link -o $@ $^
-%.bc: %.cl
+%.bc: %.c
@echo $< Parsing
@$(CLANG) $(CLANG_CFLAGS) $< -o $@
+%.bc: %.cl
+ @echo $< Parsing
+ @$(CLANG) $(CLANG_CL_FLAGS) $< -o $@
+
%.ll: %.bc
@echo $< Disassembling
llvm-dis $<
diff --git a/src/builtins/_kernel_c.h b/src/builtins/_kernel_c.h
new file mode 100644
index 0000000..dc03bd8
--- /dev/null
+++ b/src/builtins/_kernel_c.h
@@ -0,0 +1,315 @@
+/* pocl/_kernel_c.h - C compatible OpenCL types and runtime library
+ functions declarations.
+
+ Copyright (c) 2011 Universidad Rey Juan Carlos
+ Copyright (c) 2011-2013 Pekka Jääskeläinen / TUT
+ Copyright (c) 2011-2013 Erik Schnetter <eschnetter@perimeterinstitute.ca>
+ Perimeter Institute for Theoretical Physics
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+*/
+/**
+ * Header that can be implemented in C compiled implementations of
+ * built-in functions to introduce the OpenCL C compatible types etc.
+ */
+#ifndef _KERNEL_C_H
+#define _KERNEL_C_H
+
+#include "pocl_types.h"
+
+/* Function/type attributes supported by Clang/SPIR */
+#if __has_attribute(__always_inline__)
+# define _CL_ALWAYSINLINE __attribute__((__always_inline__))
+#else
+# define _CL_ALWAYSINLINE
+#endif
+#if __has_attribute(__noinline__)
+# define _CL_NOINLINE __attribute__((__noinline__))
+#else
+# define _CL_NOINLINE
+#endif
+#if __has_attribute(__overloadable__)
+# define _CL_OVERLOADABLE __attribute__((__overloadable__))
+#else
+# define _CL_OVERLOADABLE
+#endif
+#if (__clang_major__ == 3) && (__clang_minor__ >= 2)
+/* This causes an error with Clang 3.1: */
+/* #if __has_attribute(__const__) */
+# define _CL_READNONE __attribute__((__const__))
+#else
+# define _CL_READNONE
+#endif
+#if __has_attribute(__pure__)
+# define _CL_READONLY __attribute__((__pure__))
+#else
+# define _CL_READONLY
+#endif
+#if __has_attribute(__unavailable__)
+# define _CL_UNAVAILABLE __attribute__((__unavailable__))
+#else
+# define _CL_UNAVAILABLE
+#endif
+
+typedef char char2 __attribute__((__ext_vector_type__(2)));
+typedef char char3 __attribute__((__ext_vector_type__(3)));
+typedef char char4 __attribute__((__ext_vector_type__(4)));
+typedef char char8 __attribute__((__ext_vector_type__(8)));
+typedef char char16 __attribute__((__ext_vector_type__(16)));
+
+typedef uchar uchar2 __attribute__((__ext_vector_type__(2)));
+typedef uchar uchar3 __attribute__((__ext_vector_type__(3)));
+typedef uchar uchar4 __attribute__((__ext_vector_type__(4)));
+typedef uchar uchar8 __attribute__((__ext_vector_type__(8)));
+typedef uchar uchar16 __attribute__((__ext_vector_type__(16)));
+
+typedef short short2 __attribute__((__ext_vector_type__(2)));
+typedef short short3 __attribute__((__ext_vector_type__(3)));
+typedef short short4 __attribute__((__ext_vector_type__(4)));
+typedef short short8 __attribute__((__ext_vector_type__(8)));
+typedef short short16 __attribute__((__ext_vector_type__(16)));
+
+typedef ushort ushort2 __attribute__((__ext_vector_type__(2)));
+typedef ushort ushort3 __attribute__((__ext_vector_type__(3)));
+typedef ushort ushort4 __attribute__((__ext_vector_type__(4)));
+typedef ushort ushort8 __attribute__((__ext_vector_type__(8)));
+typedef ushort ushort16 __attribute__((__ext_vector_type__(16)));
+
+typedef int int2 __attribute__((__ext_vector_type__(2)));
+typedef int int3 __attribute__((__ext_vector_type__(3)));
+typedef int int4 __attribute__((__ext_vector_type__(4)));
+typedef int int8 __attribute__((__ext_vector_type__(8)));
+typedef int int16 __attribute__((__ext_vector_type__(16)));
+
+typedef uint uint2 __attribute__((__ext_vector_type__(2)));
+typedef uint uint3 __attribute__((__ext_vector_type__(3)));
+typedef uint uint4 __attribute__((__ext_vector_type__(4)));
+typedef uint uint8 __attribute__((__ext_vector_type__(8)));
+typedef uint uint16 __attribute__((__ext_vector_type__(16)));
+
+#if defined(__CBUILD__) && defined(cl_khr_fp16)
+/* NOTE: the Clang's __fp16 does not work robustly in C mode,
+ it might produce invalid code at least with half vectors.
+ Using the native 'half' type in OpenCL C mode works better. */
+typedef __fp16 half;
+#endif
+
+#ifdef cl_khr_fp16
+typedef half half2 __attribute__((__ext_vector_type__(2)));
+typedef half half3 __attribute__((__ext_vector_type__(3)));
+typedef half half4 __attribute__((__ext_vector_type__(4)));
+typedef half half8 __attribute__((__ext_vector_type__(8)));
+typedef half half16 __attribute__((__ext_vector_type__(16)));
+#endif
+
+typedef float float2 __attribute__((__ext_vector_type__(2)));
+typedef float float3 __attribute__((__ext_vector_type__(3)));
+typedef float float4 __attribute__((__ext_vector_type__(4)));
+typedef float float8 __attribute__((__ext_vector_type__(8)));
+typedef float float16 __attribute__((__ext_vector_type__(16)));
+
+#ifdef cl_khr_fp64
+# ifndef __CBUILD__
+# pragma OPENCL EXTENSION cl_khr_fp64 : enable
+# endif
+typedef double double2 __attribute__((__ext_vector_type__(2)));
+typedef double double3 __attribute__((__ext_vector_type__(3)));
+typedef double double4 __attribute__((__ext_vector_type__(4)));
+typedef double double8 __attribute__((__ext_vector_type__(8)));
+typedef double double16 __attribute__((__ext_vector_type__(16)));
+#endif
+
+#ifdef cl_khr_int64
+typedef long long2 __attribute__((__ext_vector_type__(2)));
+typedef long long3 __attribute__((__ext_vector_type__(3)));
+typedef long long4 __attribute__((__ext_vector_type__(4)));
+typedef long long8 __attribute__((__ext_vector_type__(8)));
+typedef long long16 __attribute__((__ext_vector_type__(16)));
+
+typedef ulong ulong2 __attribute__((__ext_vector_type__(2)));
+typedef ulong ulong3 __attribute__((__ext_vector_type__(3)));
+typedef ulong ulong4 __attribute__((__ext_vector_type__(4)));
+typedef ulong ulong8 __attribute__((__ext_vector_type__(8)));
+typedef ulong ulong16 __attribute__((__ext_vector_type__(16)));
+#endif
+
+/* Image support */
+
+/* Starting from Clang 3.3 the image and sampler are detected
+ as opaque types by the frontend. In order to define
+ the default builtins we use C functions which require
+ the typedefs to the actual underlying types. Clang 3.2
+ the typedefs throughout as the types are not detected
+ by the frontend. */
+#if !defined(_CL_HAS_IMAGE_ACCESS)
+typedef int sampler_t;
+
+/* Since some built-ins have different return types
+ * (e.g. get_image_dim returns an int2 for 2D images and arrays,
+ * but an int4 for 3D images) we want each image type to
+ * point to a different type which is actually always the same.
+ * We do this by making it pointer to structs whose only element is a
+ * dev_image_t. The structs are not anonymous to allow identification
+ * by name.
+ */
+typedef struct _pocl_image2d_t { dev_image_t base; }* image2d_t;
+typedef struct _pocl_image3d_t { dev_image_t base; }* image3d_t;
+typedef struct _pocl_image1d_t { dev_image_t base; }* image1d_t;
+typedef struct _pocl_image1d_buffer_t { dev_image_t base; }* image1d_buffer_t;
+typedef struct _pocl_image2d_array_t { dev_image_t base; }* image2d_array_t;
+typedef struct _pocl_image1d_array_t { dev_image_t base; }* image1d_array_t;
+#endif
+
+
+/* cl_channel_order */
+#define CL_R 0x10B0
+#define CL_A 0x10B1
+#define CL_RG 0x10B2
+#define CL_RA 0x10B3
+#define CL_RGB 0x10B4
+#define CL_RGBA 0x10B5
+#define CL_BGRA 0x10B6
+#define CL_ARGB 0x10B7
+#define CL_INTENSITY 0x10B8
+#define CL_LUMINANCE 0x10B9
+#define CL_Rx 0x10BA
+#define CL_RGx 0x10BB
+#define CL_RGBx 0x10BC
+#define CL_DEPTH 0x10BD
+#define CL_DEPTH_STENCIL 0x10BE
+
+/* cl_channel_type */
+#define CL_SNORM_INT8 0x10D0
+#define CL_SNORM_INT16 0x10D1
+#define CL_UNORM_INT8 0x10D2
+#define CL_UNORM_INT16 0x10D3
+#define CL_UNORM_SHORT_565 0x10D4
+#define CL_UNORM_SHORT_555 0x10D5
+#define CL_UNORM_INT_101010 0x10D6
+#define CL_SIGNED_INT8 0x10D7
+#define CL_SIGNED_INT16 0x10D8
+#define CL_SIGNED_INT32 0x10D9
+#define CL_UNSIGNED_INT8 0x10DA
+#define CL_UNSIGNED_INT16 0x10DB
+#define CL_UNSIGNED_INT32 0x10DC
+#define CL_HALF_FLOAT 0x10DD
+#define CL_FLOAT 0x10DE
+#define CL_UNORM_INT24 0x10DF
+
+/* cl_addressing _mode */
+#define CLK_ADDRESS_NONE 0x00
+#define CLK_ADDRESS_MIRRORED_REPEAT 0x01
+#define CLK_ADDRESS_REPEAT 0x02
+#define CLK_ADDRESS_CLAMP_TO_EDGE 0x03
+#define CLK_ADDRESS_CLAMP 0x04
+
+/* cl_sampler_info */
+#define CLK_NORMALIZED_COORDS_FALSE 0x00
+#define CLK_NORMALIZED_COORDS_TRUE 0x08
+
+/* filter_mode */
+#define CLK_FILTER_NEAREST 0x00
+#define CLK_FILTER_LINEAR 0x10
+
+//#ifdef _CL_HAS_IMAGE_ACCESS
+
+float4 _CL_OVERLOADABLE read_imagef (image2d_t image, sampler_t sampler,
+ int2 coord);
+
+float4 _CL_OVERLOADABLE read_imagef (image2d_t image, sampler_t sampler,
+ float2 coord);
+
+uint4 _CL_OVERLOADABLE read_imageui (image2d_t image, sampler_t sampler,
+ int2 coord);
+
+uint4 _CL_OVERLOADABLE read_imageui (image2d_t image, sampler_t sampler,
+ int4 coord);
+
+uint4 _CL_OVERLOADABLE read_imageui (image3d_t image, sampler_t sampler,
+ int4 coord);
+
+int4 _CL_OVERLOADABLE read_imagei (image2d_t image, sampler_t sampler,
+ int2 coord);
+
+
+void _CL_OVERLOADABLE write_imagei (image2d_t image, int2 coord, int4 color);
+
+void _CL_OVERLOADABLE write_imageui (image2d_t image, int2 coord, uint4 color);
+
+
+
+void _CL_OVERLOADABLE write_imagef (image2d_t image, int2 coord,
+ float4 color);
+/* not implemented
+void _CL_OVERLOADABLE write_imagef (image2d_array_t image, int4 coord,
+ float4 color);
+
+void _CL_OVERLOADABLE write_imagei (image2d_array_t image, int4 coord,
+ int4 color);
+
+void _CL_OVERLOADABLE write_imageui (image2d_array_t image, int4 coord,
+ uint4 color);
+
+void _CL_OVERLOADABLE write_imagef (image1d_t image, int coord,
+ float4 color);
+
+void _CL_OVERLOADABLE write_imagei (image1d_t image, int coord,
+ int4 color);
+
+void _CL_OVERLOADABLE write_imageui (image1d_t image, int coord,
+ uint4 color);
+
+void _CL_OVERLOADABLE write_imagef (image1d_buffer_t image, int coord,
+ float4 color);
+
+void _CL_OVERLOADABLE write_imagei (image1d_buffer_t image, int coord,
+ int4 color);
+
+void _CL_OVERLOADABLE write_imageui (image1d_buffer_t image, int coord,
+ uint4 color);
+
+void _CL_OVERLOADABLE write_imagef (image1d_array_t image, int2 coord,
+ float4 color);
+
+void _CL_OVERLOADABLE write_imagei (image1d_array_t image, int2 coord,
+ int4 color);
+
+void _CL_OVERLOADABLE write_imageui (image1d_array_t image, int2 coord,
+ uint4 color);
+
+void _CL_OVERLOADABLE write_imageui (image3d_t image, int4 coord,
+ uint4 color);
+*/
+int _CL_OVERLOADABLE get_image_width (image1d_t image);
+int _CL_OVERLOADABLE get_image_width (image2d_t image);
+int _CL_OVERLOADABLE get_image_width (image3d_t image);
+
+int _CL_OVERLOADABLE get_image_height (image1d_t image);
+int _CL_OVERLOADABLE get_image_height (image2d_t image);
+int _CL_OVERLOADABLE get_image_height (image3d_t image);
+
+int _CL_OVERLOADABLE get_image_depth (image1d_t image);
+int _CL_OVERLOADABLE get_image_depth (image2d_t image);
+int _CL_OVERLOADABLE get_image_depth (image3d_t image);
+
+int2 _CL_OVERLOADABLE get_image_dim (image2d_t image);
+int2 _CL_OVERLOADABLE get_image_dim (image2d_array_t image);
+int4 _CL_OVERLOADABLE get_image_dim (image3d_t image);
+
+#endif
diff --git a/src/builtins/pocl_types.h b/src/builtins/pocl_types.h
new file mode 100644
index 0000000..3f280cf
--- /dev/null
+++ b/src/builtins/pocl_types.h
@@ -0,0 +1,75 @@
+// Scalar type definitions
+
+//#include "pocl_features.h"
+
+#if 0 // GP: ???
+#if defined cl_khr_fp64 && !defined cl_khr_int64
+# error "cl_khr_fp64 requires cl_khr_int64"
+#endif
+#endif
+
+
+/* Disable undefined datatypes */
+
+/* The definitions below intentionally lead to errors if these types
+ are used when they are not available in the language. This prevents
+ accidentally using them if the compiler does not disable these
+ types, but only e.g. defines them with an incorrect size.*/
+
+#ifndef cl_khr_int64
+typedef struct error_undefined_type_long error_undefined_type_long;
+# define long error_undefined_type_long
+typedef struct error_undefined_type_ulong error_undefined_type_ulong;
+# define ulong error_undefined_type_ulong
+#endif
+
+#ifndef cl_khr_fp16
+typedef struct error_undefined_type_half error_undefined_type_half;
+# define half error_undefined_type_half
+#endif
+
+#ifndef cl_khr_fp64
+typedef struct error_undefined_type_double error_undefined_type_double;
+# define double error_undefined_type_double
+#endif
+
+
+/* Define unsigned datatypes */
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+#ifdef cl_khr_int64
+typedef unsigned long ulong;
+#endif
+
+/* Define pointer helper types */
+
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+typedef ptrdiff_t intptr_t;
+typedef size_t uintptr_t;
+
+
+/* Image types.
+ * Note: there is a duplicate definition in
+ * lib/CL/devices/dev_image.h - keep in sync?
+ */
+typedef int dev_sampler_t;
+
+typedef struct dev_image_t {
+ void* data;
+ int width;
+ int height;
+ int depth;
+ int image_array_size;
+ int row_pitch;
+ int slice_pitch;
+ int num_mip_levels; /* maybe not needed */
+ int num_samples; /* maybe not needed */
+ int order;
+ int data_type;
+ int num_channels;
+ int elem_size;
+} dev_image_t;
+
diff --git a/src/builtins/printf.c b/src/builtins/printf.c
new file mode 100644
index 0000000..196373c
--- /dev/null
+++ b/src/builtins/printf.c
@@ -0,0 +1,476 @@
+/* OpenCL built-in library: printf()
+
+ Copyright (c) 2013 Erik Schnetter <eschnetter@perimeterinstitute.ca>
+ Perimeter Institute for Theoretical Physics
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+*/
+
+// Make the C99 printf visible again
+#undef printf
+
+// GP: Add the OpenCL types for "C":
+#define cl_khr_fp64
+#include "_kernel_c.h"
+
+#include <limits.h>
+#include <stdarg.h>
+#include <stdbool.h>
+
+// We implement the OpenCL printf by calling the C99 printf. This is
+// not very efficient, but is easy to implement.
+int printf(const char* restrict fmt, ...);
+int snprintf(char* restrict str, size_t size, const char* restrict fmt, ...);
+
+// For debugging
+void debug_ptr(void * arg);
+
+// Use as: DEBUG_PRINTF((fmt, args...)) -- note double parentheses!
+//#define DEBUG_PRINTF(args) printf args
+#define DEBUG_PRINTF(args) ((void)0)
+
+// Conversion flags
+typedef struct {
+ bool left;
+ bool plus;
+ bool space;
+ bool alt;
+ bool zero;
+} flags_t;
+
+
+
+// Helper routines to output integers
+
+#define INT_CONV_char "hh"
+#define INT_CONV_short "h"
+#define INT_CONV_int ""
+#define INT_CONV_long "ll" // C99 printf uses "ll" for int64_t
+
+#define DEFINE_PRINT_INTS(WIDTH) \
+ void _cl_print_ints_##WIDTH(flags_t flags, int field_width, int precision, \
+ char conv, const void* vals, int n) \
+ { \
+ DEBUG_PRINTF(("[printf:ints:n=%df]\n", n)); \
+ char outfmt[1000]; \
+ snprintf(outfmt, sizeof outfmt, \
+ "%%%s%s%s%s%s%.0d%s%.0d" INT_CONV_##WIDTH "%c", \
+ flags.left ? "-" : "", \
+ flags.plus ? "+" : "", \
+ flags.space ? " " : "", \
+ flags.alt ? "#" : "", \
+ flags.zero ? "0" : "", \
+ field_width, \
+ precision != -1 ? "." : "", \
+ precision != -1 ? precision : 0, \
+ conv); \
+ DEBUG_PRINTF(("[printf:ints:outfmt=%s]\n", outfmt)); \
+ for (int d=0; d<n; ++d) { \
+ DEBUG_PRINTF(("[printf:ints:d=%d]\n", d)); \
+ if (d != 0) printf(","); \
+ printf(outfmt, ((const WIDTH*)vals)[d]); \
+ } \
+ DEBUG_PRINTF(("[printf:ints:done]\n")); \
+ }
+
+DEFINE_PRINT_INTS(char)
+DEFINE_PRINT_INTS(short)
+DEFINE_PRINT_INTS(int)
+#ifdef cl_khr_int64
+DEFINE_PRINT_INTS(long)
+#endif
+
+#undef DEFINE_PRINT_INTS
+
+
+
+// Helper routines to output floats
+
+// Defined in OpenCL
+float __attribute__((overloadable)) vload_half(size_t offset, const half *p);
+
+// Note: To simplify implementation, we print double values with %lf,
+// although %f would suffice as well
+#define FLOAT_CONV_half "h"
+#define FLOAT_CONV_float ""
+#define FLOAT_CONV_double "l"
+#define FLOAT_GET_half(ptr) vload_half(0, ptr)
+#define FLOAT_GET_float(ptr) (*(ptr))
+#define FLOAT_GET_double(ptr) (*(ptr))
+
+#define DEFINE_PRINT_FLOATS(WIDTH) \
+ void _cl_print_floats_##WIDTH(flags_t flags, int field_width, int precision, \
+ char conv, const void* vals, int n) \
+ { \
+ char outfmt[1000]; \
+ DEBUG_PRINTF(("[printf:floats:n=%dd]\n", n)); \
+ snprintf(outfmt, sizeof outfmt, \
+ "%%%s%s%s%s%s%.0d%s%.0d" FLOAT_CONV_##WIDTH "%c", \
+ flags.left ? "-" : "", \
+ flags.plus ? "+" : "", \
+ flags.space ? " " : "", \
+ flags.alt ? "#" : "", \
+ flags.zero ? "0" : "", \
+ field_width, \
+ precision != -1 ? "." : "", \
+ precision != -1 ? precision : 0, \
+ conv); \
+ DEBUG_PRINTF(("[printf:floats:outfmt=%s]\n", outfmt)); \
+ debug_ptr((void *)outfmt); \
+ for (int d=0; d<n; ++d) { \
+ DEBUG_PRINTF(("[printf:floats:d=%d]\n", d)); \
+ if (d != 0) printf(","); \
+ debug_ptr((void *)((const WIDTH*)vals+d)); \
+ printf(outfmt, FLOAT_GET_##WIDTH((const WIDTH*)vals+d)); \
+ } \
+ DEBUG_PRINTF(("[printf:floats:done]\n")); \
+ }
+
+#ifdef cl_khr_fp16
+DEFINE_PRINT_FLOATS(half)
+#endif
+DEFINE_PRINT_FLOATS(float)
+#ifdef cl_khr_fp64
+DEFINE_PRINT_FLOATS(double)
+#endif
+
+#undef DEFINE_PRINT_FLOATS
+
+
+
+// Helper routines to output characters, strings, and pointers
+
+void _cl_print_char(flags_t flags, int field_width, int val)
+{
+ DEBUG_PRINTF(("[printf:char]\n"));
+ char outfmt[1000];
+ snprintf(outfmt, sizeof outfmt,
+ "%%%s%.0dc",
+ flags.left ? "-" : "",
+ field_width);
+ DEBUG_PRINTF(("[printf:char:outfmt=%s]\n", outfmt));
+ printf(outfmt, val);
+ DEBUG_PRINTF(("[printf:char:done]\n"));
+}
+
+void _cl_print_string(flags_t flags, int field_width, int precision, const char* val)
+{
+ DEBUG_PRINTF(("[printf:char]\n"));
+ char outfmt[1000];
+ snprintf(outfmt, sizeof outfmt,
+ precision < 0 ? "%%%s%.0ds" : "%%%s.%.0ds",
+ flags.left ? "-" : "",
+ precision < 0 ? field_width : precision);
+ DEBUG_PRINTF(("[printf:char:outfmt=%s]\n", outfmt));
+ debug_ptr((void *)outfmt);
+ printf(outfmt, val);
+ DEBUG_PRINTF(("[printf:char:done]\n"));
+}
+
+void _cl_print_pointer(flags_t flags, int field_width, const void* val)
+{
+ DEBUG_PRINTF(("[printf:char]\n"));
+ char outfmt[1000];
+ snprintf(outfmt, sizeof outfmt,
+ "%%%s%.0dp",
+ flags.left ? "-" : "",
+ field_width);
+ DEBUG_PRINTF(("[printf:char:outfmt=%s]\n", outfmt));
+ printf(outfmt, val);
+ DEBUG_PRINTF(("[printf:char:done]\n"));
+}
+
+
+
+// The OpenCL printf routine.
+
+// The implementation is straightforward:
+// - walk through the format string
+// - when a variable should be output, parse flags, field width,
+// precision, vector specifier, length, and conversion specifier
+// - call a helper routine to perform the actual output
+// - the helper routine is based on calling C99 printf, and constructs
+// a format string via snprintf
+// - if there is an error during parsing, a "goto error" aborts the
+// routine, returning -1
+
+#define OCL_CONSTANT_AS __attribute__((address_space(3)))
+int _cl_printf(const OCL_CONSTANT_AS char* restrict format, ...)
+{
+ DEBUG_PRINTF(("[printf:format=%s]\n", format));
+ va_list ap;
+ va_start(ap, format);
+
+ char ch = *format;
+ while (ch) {
+ if (ch == '%') {
+ ch = *++format;
+
+ if (ch == '%') {
+ DEBUG_PRINTF(("[printf:%%]\n"));
+ printf("%%"); // literal %
+ ch = *++format;
+ } else {
+ DEBUG_PRINTF(("[printf:arg]\n"));
+ // Flags
+ flags_t flags;
+ flags.left = false;
+ flags.plus = false;
+ flags.space = false;
+ flags.alt = false;
+ flags.zero = false;
+ for (;;) {
+ switch (ch) {
+ case '-': if (flags.left) goto error; flags.left = true; break;
+ case '+': if (flags.plus) goto error; flags.plus = true; break;
+ case ' ': if (flags.space) goto error; flags.space = true; break;
+ case '#': if (flags.alt) goto error; flags.alt = true; break;
+ case '0': if (flags.zero) goto error; flags.zero = true; break;
+ default: goto flags_done;
+ }
+ ch = *++format;
+ }
+ flags_done:;
+ DEBUG_PRINTF(("[printf:flags:left=%d,plus=%d,space=%d,alt=%d,zero=%d]\n",
+ flags.left, flags.plus, flags.space, flags.alt, flags.zero));
+
+ // Field width
+ int field_width = 0;
+ while (ch >= '0' && ch <= '9') {
+ if (ch == '0' && field_width == 0) goto error;
+ if (field_width > (INT_MAX - 9) / 10) goto error;
+ field_width = 10 * field_width + (ch - '0');
+ ch = *++format;
+ }
+ DEBUG_PRINTF(("[printf:width=%d]\n", field_width));
+
+ // Precision
+ int precision = -1;
+ if (ch == '.') {
+ ch = *++format;
+ precision = 0;
+ while (ch >= '0' && ch <= '9') {
+ if (precision > (INT_MAX - 9) / 10) goto error;
+ precision = 10 * precision + (ch - '0');
+ ch = *++format;
+ }
+ }
+ DEBUG_PRINTF(("[printf:precision=%d]\n", precision));
+
+ // Vector specifier
+ int vector_length = 0;
+ if (ch == 'v') {
+ ch = *++format;
+ while (ch >= '0' && ch <= '9') {
+ if (ch == '0' && vector_length == 0) goto error;
+ if (vector_length > (INT_MAX - 9) / 10) goto error;
+ vector_length = 10 * vector_length + (ch - '0');
+ ch = *++format;
+ }
+ if (! (vector_length == 2 ||
+ vector_length == 3 ||
+ vector_length == 4 ||
+ vector_length == 8 ||
+ vector_length == 16)) goto error;
+ }
+ DEBUG_PRINTF(("[printf:vector_length=%d]\n", vector_length));
+
+ // Length modifier
+ int length = 0; // default
+ if (ch == 'h') {
+ ch = *++format;
+ if (ch == 'h') {
+ ch = *++format;
+ length = 1; // "hh" -> char
+ } else if (ch == 'l') {
+ ch = *++format;
+ length = 4; // "hl" -> int or float
+ } else {
+ length = 2; // "h" -> short
+ }
+ } else if (ch == 'l') {
+ ch = *++format;
+ length = 8; // "l" -> long
+ }
+ if (vector_length > 0 && length == 0) goto error;
+ if (vector_length == 0 && length == 4) goto error;
+ if (vector_length == 0) vector_length = 1;
+ DEBUG_PRINTF(("[printf:length=%d]\n", length));
+
+ // Conversion specifier
+ switch (ch) {
+
+ // Output integers
+ case 'd':
+ case 'i':
+ case 'o':
+ case 'u':
+ case 'x':
+ case 'X':
+
+#define CALL_PRINT_INTS(WIDTH, PROMOTED_WIDTH) \
+ { \
+ WIDTH##16 val; \
+ switch (vector_length) { \
+ default: __builtin_unreachable(); \
+ case 1: val.s0 = va_arg(ap, PROMOTED_WIDTH); break; \
+ case 2: val.s01 = va_arg(ap, WIDTH##2); break; \
+ case 3: val.s012 = va_arg(ap, WIDTH##3); break; \
+ case 4: val.s0123 = va_arg(ap, WIDTH##4); break; \
+ case 8: val.lo = va_arg(ap, WIDTH##8); break; \
+ case 16: val = va_arg(ap, WIDTH##16); break; \
+ } \
+ _cl_print_ints_##WIDTH(flags, field_width, precision, \
+ ch, &val, vector_length); \
+ }
+
+ DEBUG_PRINTF(("[printf:int:conversion=%c]\n", ch));
+ switch (length) {
+ default: __builtin_unreachable();
+ case 1: CALL_PRINT_INTS(char, int); break;
+ case 2: CALL_PRINT_INTS(short, int); break;
+ case 0:
+ case 4: CALL_PRINT_INTS(int, int); break;
+#ifdef cl_khr_int64
+ case 8: CALL_PRINT_INTS(long, long); break;
+#endif
+ }
+
+#undef CALL_PRINT_INTS
+
+ break;
+
+ // Output floats
+ case 'f':
+ case 'F':
+ case 'e':
+ case 'E':
+ case 'g':
+ case 'G':
+ case 'a':
+ case 'A':
+
+#define CALL_PRINT_FLOATS(WIDTH, PROMOTED_WIDTH) \
+ { \
+ WIDTH##16 val; \
+ switch (vector_length) { \
+ default: __builtin_unreachable(); \
+ case 1: val.s0 = va_arg(ap, PROMOTED_WIDTH); break; \
+ case 2: val.s01 = va_arg(ap, WIDTH##2); break; \
+ case 3: val.s012 = va_arg(ap, WIDTH##3); break; \
+ case 4: val.s0123 = va_arg(ap, WIDTH##4); break; \
+ case 8: val.lo = va_arg(ap, WIDTH##8); break; \
+ case 16: val = va_arg(ap, WIDTH##16); break; \
+ } \
+ float tmp;\
+ tmp = val.s0; \
+ debug_ptr((void *)&tmp); \
+ tmp = val.s1; \
+ debug_ptr((void *)&tmp); \
+ tmp = val.s2; \
+ debug_ptr((void *)&tmp); \
+ tmp = val.s3; \
+ debug_ptr((void *)&tmp); \
+ _cl_print_floats_##WIDTH(flags, field_width, precision, \
+ ch, &val, vector_length); \
+ }
+
+ DEBUG_PRINTF(("[printf:float:conversion=%c]\n", ch));
+ switch (length) {
+ default: __builtin_unreachable();
+#ifdef cl_khr_fp16
+ // case 2: CALL_PRINT_FLOATS(half, double); break;
+ case 2: goto error; // not yet implemented
+#endif
+ case 0:
+ // Note: width 0 cleverly falls through to float if double
+ // is not supported
+#ifdef cl_khr_fp64
+ case 8: CALL_PRINT_FLOATS(double, double); break;
+ case 4: CALL_PRINT_FLOATS(float, double); break;
+#else
+ break;
+#endif
+ }
+
+#undef CALL_PRINT_FLOATS
+
+ break;
+
+ // Output a character
+ case 'c': {
+ DEBUG_PRINTF(("[printf:char]\n"));
+ if (flags.plus || flags.space || flags.alt || flags.zero) goto error;
+ DEBUG_PRINTF(("[printf:char1]\n"));
+ if (precision != -1) goto error;
+ DEBUG_PRINTF(("[printf:char2]\n"));
+ if (vector_length != 1) goto error;
+ DEBUG_PRINTF(("[printf:char3]\n"));
+ if (length != 0) goto error;
+ DEBUG_PRINTF(("[printf:char4]\n"));
+ int val = va_arg(ap, int);
+ _cl_print_char(flags, field_width, val);
+ break;
+ }
+
+ // Output a string
+ case 's': {
+ if (flags.plus || flags.space || flags.alt || flags.zero) goto error;
+ if (vector_length != 1) goto error;
+ if (length != 0) goto error;
+ const char* val = va_arg(ap, const char*);
+ // GP: Note: v1.2 Khronos test_printf tests for "%.1s", so need to check precision
+ _cl_print_string(flags, field_width, precision, val);
+ break;
+ }
+
+ // Output a pointer
+ case 'p': {
+ if (flags.plus || flags.space || flags.alt || flags.zero) goto error;
+ if (precision != -1) goto error;
+ if (vector_length != 1) goto error;
+ if (length != 0) goto error;
+ const void* val = va_arg(ap, const void*);
+ _cl_print_pointer(flags, field_width, val);
+ break;
+ }
+
+ default: goto error;
+ }
+ ch = *++format;
+
+ } // not a literal %
+
+ } else {
+ DEBUG_PRINTF(("[printf:literal]\n"));
+ printf("%c", ch);
+ ch = *++format;
+ }
+ }
+
+ va_end(ap);
+ DEBUG_PRINTF(("[printf:done]\n"));
+ return 0;
+
+ error:;
+ va_end(ap);
+ DEBUG_PRINTF(("[printf:error]\n"));
+ printf("(printf format string error)");
+ return -1;
+}
diff --git a/src/core/cpu/builtins.cpp b/src/core/cpu/builtins.cpp
index 137d34e..df8fd3c 100644
--- a/src/core/cpu/builtins.cpp
+++ b/src/core/cpu/builtins.cpp
@@ -411,6 +411,14 @@ static void unimplemented_stub()
{
}
+void debug_ptr(void * arg)
+{
+ char *s = (char *)arg;
+ float f = *(float *)arg;
+ double d = *(double *)arg;
+ int i = *(int *)arg;
+}
+
void *getBuiltin(const std::string &name)
{
if (name == "get_global_id")
@@ -465,8 +473,10 @@ void *getBuiltin(const std::string &name)
else if (name == "__cpu_read_imageuif")
return (void *)&read_imageuif;
- else if (name == "debug")
- return (void *)&printf;
+ // Generic hook to set debugger bpt to inspect stack variable passed as (void *)
+ else if (name == "debug_ptr")
+ return(void *)&debug_ptr;
+
else if (name == "__aeabi_unwind_cpp_pr0")
return (void *)&dummy_fxn;
else if (name == "__aeabi_unwind_cpp_pr1")