aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGenOpenCL
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGenOpenCL')
-rw-r--r--test/CodeGenOpenCL/address-spaces-mangling.cl10
-rw-r--r--test/CodeGenOpenCL/amdgcn-automatic-variable.cl2
-rw-r--r--test/CodeGenOpenCL/amdgpu-alignment.cl70
-rw-r--r--test/CodeGenOpenCL/amdgpu-env-amdgcn.cl2
-rw-r--r--test/CodeGenOpenCL/amdgpu-features.cl6
-rw-r--r--test/CodeGenOpenCL/atomic-ops.cl26
-rw-r--r--test/CodeGenOpenCL/blocks.cl34
-rw-r--r--test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl28
-rw-r--r--test/CodeGenOpenCL/builtins-amdgcn-interp.cl34
-rw-r--r--test/CodeGenOpenCL/builtins-amdgcn.cl12
-rw-r--r--test/CodeGenOpenCL/cl20-device-side-enqueue.cl60
-rw-r--r--test/CodeGenOpenCL/constant-addr-space-globals.cl2
-rw-r--r--test/CodeGenOpenCL/images.cl1
-rw-r--r--test/CodeGenOpenCL/printf.cl20
-rw-r--r--test/CodeGenOpenCL/unroll-hint.cl26
-rw-r--r--test/CodeGenOpenCL/visibility.cl128
16 files changed, 325 insertions, 136 deletions
diff --git a/test/CodeGenOpenCL/address-spaces-mangling.cl b/test/CodeGenOpenCL/address-spaces-mangling.cl
index b6e6b87d9e..50622f0991 100644
--- a/test/CodeGenOpenCL/address-spaces-mangling.cl
+++ b/test/CodeGenOpenCL/address-spaces-mangling.cl
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=ASMANG,ASMAN10 %s
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=ASMANG,ASMAN20 %s
-// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=NOASMANG,NOASMAN10 %s
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=NOASMANG,NOASMAN20 %s
+// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="ASMANG,ASMANG10" %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="ASMANG,ASMANG20" %s
+// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="NOASMANG,NOASMANG10" %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="NOASMANG,NOASMANG20" %s
// We check that the address spaces are mangled the same in both version of OpenCL
// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL2.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s
@@ -14,7 +14,7 @@ __attribute__((overloadable))
void ff(int *arg) { }
// ASMANG10: @_Z2ffPi
// ASMANG20: @_Z2ffPU3AS4i
-// NOASMANG10: @_Z2ffPi
+// NOASMANG10: @_Z2ffPU9CLprivatei
// NOASMANG20: @_Z2ffPU9CLgenerici
// OCL-20-DAG: @_Z2ffPU3AS4i
// OCL-12-DAG: @_Z2ffPi
diff --git a/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
index 59f38f80dc..7216cb5174 100644
--- a/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
+++ b/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
@@ -42,7 +42,7 @@ void func2(void) {
// CL20: store i32* %[[r0]], i32* addrspace(5)* %lp1, align 8
int *lp1 = &lv1;
- // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %la, i32 0, i32 0
+ // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %la, i64 0, i64 0
// CL12: store i32 addrspace(5)* %[[arraydecay]], i32 addrspace(5)* addrspace(5)* %lp2, align 4
// CL20: %[[r1:.*]] = addrspacecast i32 addrspace(5)* %[[arraydecay]] to i32*
// CL20: store i32* %[[r1]], i32* addrspace(5)* %lp2, align 8
diff --git a/test/CodeGenOpenCL/amdgpu-alignment.cl b/test/CodeGenOpenCL/amdgpu-alignment.cl
index b5dc47adbc..3241da612f 100644
--- a/test/CodeGenOpenCL/amdgpu-alignment.cl
+++ b/test/CodeGenOpenCL/amdgpu-alignment.cl
@@ -92,48 +92,48 @@ typedef double __attribute__((ext_vector_type(16))) double16;
// CHECK-LABEL: @local_memory_alignment_global(
-// CHECK: store volatile i8 0, i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @local_memory_alignment_global.lds_i8, i32 0, i32 0), align 1
-// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* getelementptr inbounds ([4 x <2 x i8>], [4 x <2 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v2i8, i32 0, i32 0), align 2
+// CHECK: store volatile i8 0, i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @local_memory_alignment_global.lds_i8, i64 0, i64 0), align 1
+// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* getelementptr inbounds ([4 x <2 x i8>], [4 x <2 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v2i8, i64 0, i64 0), align 2
// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(3)* bitcast ([4 x <3 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v3i8 to <4 x i8> addrspace(3)*), align 4
-// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(3)* getelementptr inbounds ([4 x <4 x i8>], [4 x <4 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v4i8, i32 0, i32 0), align 4
-// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(3)* getelementptr inbounds ([4 x <8 x i8>], [4 x <8 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v8i8, i32 0, i32 0), align 8
-// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* getelementptr inbounds ([4 x <16 x i8>], [4 x <16 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v16i8, i32 0, i32 0), align 16
-// CHECK: store volatile i16 0, i16 addrspace(3)* getelementptr inbounds ([4 x i16], [4 x i16] addrspace(3)* @local_memory_alignment_global.lds_i16, i32 0, i32 0), align 2
-// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(3)* getelementptr inbounds ([4 x <2 x i16>], [4 x <2 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v2i16, i32 0, i32 0), align 4
+// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(3)* getelementptr inbounds ([4 x <4 x i8>], [4 x <4 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v4i8, i64 0, i64 0), align 4
+// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(3)* getelementptr inbounds ([4 x <8 x i8>], [4 x <8 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v8i8, i64 0, i64 0), align 8
+// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* getelementptr inbounds ([4 x <16 x i8>], [4 x <16 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v16i8, i64 0, i64 0), align 16
+// CHECK: store volatile i16 0, i16 addrspace(3)* getelementptr inbounds ([4 x i16], [4 x i16] addrspace(3)* @local_memory_alignment_global.lds_i16, i64 0, i64 0), align 2
+// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(3)* getelementptr inbounds ([4 x <2 x i16>], [4 x <2 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v2i16, i64 0, i64 0), align 4
// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(3)* bitcast ([4 x <3 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v3i16 to <4 x i16> addrspace(3)*), align 8
-// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(3)* getelementptr inbounds ([4 x <4 x i16>], [4 x <4 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v4i16, i32 0, i32 0), align 8
-// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* getelementptr inbounds ([4 x <8 x i16>], [4 x <8 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v8i16, i32 0, i32 0), align 16
-// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(3)* getelementptr inbounds ([4 x <16 x i16>], [4 x <16 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v16i16, i32 0, i32 0), align 32
-// CHECK: store volatile i32 0, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @local_memory_alignment_global.lds_i32, i32 0, i32 0), align 4
-// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(3)* getelementptr inbounds ([4 x <2 x i32>], [4 x <2 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v2i32, i32 0, i32 0), align 8
+// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(3)* getelementptr inbounds ([4 x <4 x i16>], [4 x <4 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v4i16, i64 0, i64 0), align 8
+// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* getelementptr inbounds ([4 x <8 x i16>], [4 x <8 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v8i16, i64 0, i64 0), align 16
+// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(3)* getelementptr inbounds ([4 x <16 x i16>], [4 x <16 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v16i16, i64 0, i64 0), align 32
+// CHECK: store volatile i32 0, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @local_memory_alignment_global.lds_i32, i64 0, i64 0), align 4
+// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(3)* getelementptr inbounds ([4 x <2 x i32>], [4 x <2 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v2i32, i64 0, i64 0), align 8
// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(3)* bitcast ([4 x <3 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v3i32 to <4 x i32> addrspace(3)*), align 16
-// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(3)* getelementptr inbounds ([4 x <4 x i32>], [4 x <4 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v4i32, i32 0, i32 0), align 16
-// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(3)* getelementptr inbounds ([4 x <8 x i32>], [4 x <8 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v8i32, i32 0, i32 0), align 32
-// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(3)* getelementptr inbounds ([4 x <16 x i32>], [4 x <16 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v16i32, i32 0, i32 0), align 64
-// CHECK: store volatile i64 0, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @local_memory_alignment_global.lds_i64, i32 0, i32 0), align 8
-// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(3)* getelementptr inbounds ([4 x <2 x i64>], [4 x <2 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v2i64, i32 0, i32 0), align 16
+// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(3)* getelementptr inbounds ([4 x <4 x i32>], [4 x <4 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v4i32, i64 0, i64 0), align 16
+// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(3)* getelementptr inbounds ([4 x <8 x i32>], [4 x <8 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v8i32, i64 0, i64 0), align 32
+// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(3)* getelementptr inbounds ([4 x <16 x i32>], [4 x <16 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v16i32, i64 0, i64 0), align 64
+// CHECK: store volatile i64 0, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @local_memory_alignment_global.lds_i64, i64 0, i64 0), align 8
+// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(3)* getelementptr inbounds ([4 x <2 x i64>], [4 x <2 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v2i64, i64 0, i64 0), align 16
// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(3)* bitcast ([4 x <3 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v3i64 to <4 x i64> addrspace(3)*), align 32
-// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* getelementptr inbounds ([4 x <4 x i64>], [4 x <4 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v4i64, i32 0, i32 0), align 32
-// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(3)* getelementptr inbounds ([4 x <8 x i64>], [4 x <8 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v8i64, i32 0, i32 0), align 64
-// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(3)* getelementptr inbounds ([4 x <16 x i64>], [4 x <16 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v16i64, i32 0, i32 0), align 128
-// CHECK: store volatile half 0xH0000, half addrspace(3)* getelementptr inbounds ([4 x half], [4 x half] addrspace(3)* @local_memory_alignment_global.lds_f16, i32 0, i32 0), align 2
-// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(3)* getelementptr inbounds ([4 x <2 x half>], [4 x <2 x half>] addrspace(3)* @local_memory_alignment_global.lds_v2f16, i32 0, i32 0), align 4
+// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* getelementptr inbounds ([4 x <4 x i64>], [4 x <4 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v4i64, i64 0, i64 0), align 32
+// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(3)* getelementptr inbounds ([4 x <8 x i64>], [4 x <8 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v8i64, i64 0, i64 0), align 64
+// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(3)* getelementptr inbounds ([4 x <16 x i64>], [4 x <16 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v16i64, i64 0, i64 0), align 128
+// CHECK: store volatile half 0xH0000, half addrspace(3)* getelementptr inbounds ([4 x half], [4 x half] addrspace(3)* @local_memory_alignment_global.lds_f16, i64 0, i64 0), align 2
+// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(3)* getelementptr inbounds ([4 x <2 x half>], [4 x <2 x half>] addrspace(3)* @local_memory_alignment_global.lds_v2f16, i64 0, i64 0), align 4
// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(3)* bitcast ([4 x <3 x half>] addrspace(3)* @local_memory_alignment_global.lds_v3f16 to <4 x half> addrspace(3)*), align 8
-// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(3)* getelementptr inbounds ([4 x <4 x half>], [4 x <4 x half>] addrspace(3)* @local_memory_alignment_global.lds_v4f16, i32 0, i32 0), align 8
-// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(3)* getelementptr inbounds ([4 x <8 x half>], [4 x <8 x half>] addrspace(3)* @local_memory_alignment_global.lds_v8f16, i32 0, i32 0), align 16
-// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(3)* getelementptr inbounds ([4 x <16 x half>], [4 x <16 x half>] addrspace(3)* @local_memory_alignment_global.lds_v16f16, i32 0, i32 0), align 32
-// CHECK: store volatile float 0.000000e+00, float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @local_memory_alignment_global.lds_f32, i32 0, i32 0), align 4
-// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(3)* getelementptr inbounds ([4 x <2 x float>], [4 x <2 x float>] addrspace(3)* @local_memory_alignment_global.lds_v2f32, i32 0, i32 0), align 8
+// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(3)* getelementptr inbounds ([4 x <4 x half>], [4 x <4 x half>] addrspace(3)* @local_memory_alignment_global.lds_v4f16, i64 0, i64 0), align 8
+// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(3)* getelementptr inbounds ([4 x <8 x half>], [4 x <8 x half>] addrspace(3)* @local_memory_alignment_global.lds_v8f16, i64 0, i64 0), align 16
+// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(3)* getelementptr inbounds ([4 x <16 x half>], [4 x <16 x half>] addrspace(3)* @local_memory_alignment_global.lds_v16f16, i64 0, i64 0), align 32
+// CHECK: store volatile float 0.000000e+00, float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @local_memory_alignment_global.lds_f32, i64 0, i64 0), align 4
+// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(3)* getelementptr inbounds ([4 x <2 x float>], [4 x <2 x float>] addrspace(3)* @local_memory_alignment_global.lds_v2f32, i64 0, i64 0), align 8
// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(3)* bitcast ([4 x <3 x float>] addrspace(3)* @local_memory_alignment_global.lds_v3f32 to <4 x float> addrspace(3)*), align 16
-// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(3)* getelementptr inbounds ([4 x <4 x float>], [4 x <4 x float>] addrspace(3)* @local_memory_alignment_global.lds_v4f32, i32 0, i32 0), align 16
-// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(3)* getelementptr inbounds ([4 x <8 x float>], [4 x <8 x float>] addrspace(3)* @local_memory_alignment_global.lds_v8f32, i32 0, i32 0), align 32
-// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(3)* getelementptr inbounds ([4 x <16 x float>], [4 x <16 x float>] addrspace(3)* @local_memory_alignment_global.lds_v16f32, i32 0, i32 0), align 64
-// CHECK: store volatile double 0.000000e+00, double addrspace(3)* getelementptr inbounds ([4 x double], [4 x double] addrspace(3)* @local_memory_alignment_global.lds_f64, i32 0, i32 0), align 8
-// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(3)* getelementptr inbounds ([4 x <2 x double>], [4 x <2 x double>] addrspace(3)* @local_memory_alignment_global.lds_v2f64, i32 0, i32 0), align 16
+// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(3)* getelementptr inbounds ([4 x <4 x float>], [4 x <4 x float>] addrspace(3)* @local_memory_alignment_global.lds_v4f32, i64 0, i64 0), align 16
+// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(3)* getelementptr inbounds ([4 x <8 x float>], [4 x <8 x float>] addrspace(3)* @local_memory_alignment_global.lds_v8f32, i64 0, i64 0), align 32
+// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(3)* getelementptr inbounds ([4 x <16 x float>], [4 x <16 x float>] addrspace(3)* @local_memory_alignment_global.lds_v16f32, i64 0, i64 0), align 64
+// CHECK: store volatile double 0.000000e+00, double addrspace(3)* getelementptr inbounds ([4 x double], [4 x double] addrspace(3)* @local_memory_alignment_global.lds_f64, i64 0, i64 0), align 8
+// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(3)* getelementptr inbounds ([4 x <2 x double>], [4 x <2 x double>] addrspace(3)* @local_memory_alignment_global.lds_v2f64, i64 0, i64 0), align 16
// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(3)* bitcast ([4 x <3 x double>] addrspace(3)* @local_memory_alignment_global.lds_v3f64 to <4 x double> addrspace(3)*), align 32
-// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(3)* getelementptr inbounds ([4 x <4 x double>], [4 x <4 x double>] addrspace(3)* @local_memory_alignment_global.lds_v4f64, i32 0, i32 0), align 32
-// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(3)* getelementptr inbounds ([4 x <8 x double>], [4 x <8 x double>] addrspace(3)* @local_memory_alignment_global.lds_v8f64, i32 0, i32 0), align 64
-// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(3)* getelementptr inbounds ([4 x <16 x double>], [4 x <16 x double>] addrspace(3)* @local_memory_alignment_global.lds_v16f64, i32 0, i32 0), align 128
+// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(3)* getelementptr inbounds ([4 x <4 x double>], [4 x <4 x double>] addrspace(3)* @local_memory_alignment_global.lds_v4f64, i64 0, i64 0), align 32
+// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(3)* getelementptr inbounds ([4 x <8 x double>], [4 x <8 x double>] addrspace(3)* @local_memory_alignment_global.lds_v8f64, i64 0, i64 0), align 64
+// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(3)* getelementptr inbounds ([4 x <16 x double>], [4 x <16 x double>] addrspace(3)* @local_memory_alignment_global.lds_v16f64, i64 0, i64 0), align 128
kernel void local_memory_alignment_global()
{
volatile local char lds_i8[4];
diff --git a/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
index bcb00be8c8..4a91652ae1 100644
--- a/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
+++ b/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
-// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
void foo(void) {}
diff --git a/test/CodeGenOpenCL/amdgpu-features.cl b/test/CodeGenOpenCL/amdgpu-features.cl
index 7aac4d3a36..bbfcf096ab 100644
--- a/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/test/CodeGenOpenCL/amdgpu-features.cl
@@ -10,9 +10,9 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s
-// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts"
-// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts"
-// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+s-memrealtime,+vi-insts"
+// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime"
// GFX700: "target-features"="+ci-insts,+fp64-fp16-denormals,-fp32-denormals"
// GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
// GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
diff --git a/test/CodeGenOpenCL/atomic-ops.cl b/test/CodeGenOpenCL/atomic-ops.cl
index 160f7fbd52..88f2e0d0ea 100644
--- a/test/CodeGenOpenCL/atomic-ops.cl
+++ b/test/CodeGenOpenCL/atomic-ops.cl
@@ -41,7 +41,7 @@ void fi1(atomic_int *i) {
// CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices);
- // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst
+ // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
}
@@ -83,7 +83,7 @@ void fi3(atomic_int *i, atomic_uint *ui) {
bool fi4(atomic_int *i) {
// CHECK-LABEL: @fi4(
- // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire
+ // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire
// CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
// CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
// CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
@@ -109,7 +109,7 @@ void fi5(atomic_int *i, int scope) {
// CHECK: load atomic i32, i32* %{{.*}} seq_cst
// CHECK: br label %[[continue]]
// CHECK: [[opencl_subgroup]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst
// CHECK: br label %[[continue]]
// CHECK: [[continue]]:
int x = __opencl_atomic_load(i, memory_order_seq_cst, scope);
@@ -141,21 +141,21 @@ void fi6(atomic_int *i, int order, int scope) {
// CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]]
// CHECK-NEXT: ]
// CHECK: [[MON_WG]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") monotonic
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") monotonic
// CHECK: [[MON_DEV]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") monotonic
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") monotonic
// CHECK: [[MON_ALL]]:
// CHECK: load atomic i32, i32* %{{.*}} monotonic
// CHECK: [[MON_SUB]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") monotonic
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") monotonic
// CHECK: [[ACQ_WG]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") acquire
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") acquire
// CHECK: [[ACQ_DEV]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") acquire
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") acquire
// CHECK: [[ACQ_ALL]]:
// CHECK: load atomic i32, i32* %{{.*}} acquire
// CHECK: [[ACQ_SUB]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") acquire
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") acquire
// CHECK: [[SEQ_WG]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst
// CHECK: [[SEQ_DEV]]:
@@ -163,19 +163,19 @@ void fi6(atomic_int *i, int order, int scope) {
// CHECK: [[SEQ_ALL]]:
// CHECK: load atomic i32, i32* %{{.*}} seq_cst
// CHECK: [[SEQ_SUB]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst
int x = __opencl_atomic_load(i, order, scope);
}
float ff1(global atomic_float *d) {
// CHECK-LABEL: @ff1
- // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic
+ // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic
return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group);
}
void ff2(atomic_float *d) {
// CHECK-LABEL: @ff2
- // CHECK: store atomic i32 {{.*}} syncscope("workgroup") release
+ // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release
__opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group);
}
@@ -198,7 +198,7 @@ void atomic_init_foo()
// CHECK-LABEL: @failureOrder
void failureOrder(atomic_int *ptr, int *ptr2) {
- // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic
+ // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic
__opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
// CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire
diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
index 675240c6f0..c3e26855df 100644
--- a/test/CodeGenOpenCL/blocks.cl
+++ b/test/CodeGenOpenCL/blocks.cl
@@ -35,31 +35,23 @@ void foo(){
// SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3
// SPIR: %[[i_value:.*]] = load i32, i32* %i
// SPIR: store i32 %[[i_value]], i32* %[[block_captured]],
- // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()*
- // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
- // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
- // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
- // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
- // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
+ // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to %struct.__opencl_block_literal_generic*
+ // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
+ // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
+ // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
// SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
- // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
- // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
- // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
+ // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
// AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2
// AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]]
// AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
// AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i
// AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]],
- // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)*
- // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()*
- // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]],
- // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]]
- // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic*
- // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
+ // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to %struct.__opencl_block_literal_generic addrspace(5)*
+ // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
+ // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
+ // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
// AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
- // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
- // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)*
- // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]])
+ // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]])
int (^ block_B)(void) = ^{
return i;
@@ -98,6 +90,12 @@ int get42() {
return blockArgFunc(^{return 42;});
}
+// COMMON-LABEL: define {{.*}}@call_block
+// call {{.*}}@__call_block_block_invoke
+int call_block() {
+ return ^int(int num) { return num; } (11);
+}
+
// CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size"
// CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align"
diff --git a/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl b/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
index e2c03a471b..a82fcbd758 100644
--- a/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
+++ b/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
@@ -12,24 +12,24 @@ kernel void builtins_amdgcn_dl_insts_err(
half2 v2hA, half2 v2hB, float fC,
short2 v2ssA, short2 v2ssB, int siA, int siB, int siC,
ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) {
- fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot-insts}}
- fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot-insts}}
+ fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}}
+ fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}}
- siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot-insts}}
- siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot-insts}}
+ siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}}
+ siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}}
- uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot-insts}}
- uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot-insts}}
+ uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot2-insts}}
+ uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot2-insts}}
- siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot-insts}}
- siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot-insts}}
+ siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}}
+ siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}}
- uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot-insts}}
- uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot-insts}}
+ uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}}
+ uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}}
- siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot-insts}}
- siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot-insts}}
+ siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}}
+ siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}}
- uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot-insts}}
- uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot-insts}}
+ uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}}
+ uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}}
}
diff --git a/test/CodeGenOpenCL/builtins-amdgcn-interp.cl b/test/CodeGenOpenCL/builtins-amdgcn-interp.cl
new file mode 100644
index 0000000000..39d913e902
--- /dev/null
+++ b/test/CodeGenOpenCL/builtins-amdgcn-interp.cl
@@ -0,0 +1,34 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// CHECK-LABEL: test_interp_f16
+// CHECK: call float @llvm.amdgcn.interp.p1.f16
+// CHECK: call half @llvm.amdgcn.interp.p2.f16
+// CHECK: call float @llvm.amdgcn.interp.p1.f16
+// CHECK: call half @llvm.amdgcn.interp.p2.f16
+void test_interp_f16(global half* out, float i, float j, int m0)
+{
+ float p1_0 = __builtin_amdgcn_interp_p1_f16(i, 2, 3, false, m0);
+ half p2_0 = __builtin_amdgcn_interp_p2_f16(p1_0, j, 2, 3, false, m0);
+ float p1_1 = __builtin_amdgcn_interp_p1_f16(i, 2, 3, true, m0);
+ half p2_1 = __builtin_amdgcn_interp_p2_f16(p1_1, j, 2, 3, true, m0);
+ *out = p2_0 + p2_1;
+}
+
+// CHECK-LABEL: test_interp_f32
+// CHECK: call float @llvm.amdgcn.interp.p1
+// CHECK: call float @llvm.amdgcn.interp.p2
+void test_interp_f32(global float* out, float i, float j, int m0)
+{
+ float p1 = __builtin_amdgcn_interp_p1(i, 1, 4, m0);
+ *out = __builtin_amdgcn_interp_p2(p1, j, 1, 4, m0);
+}
+
+// CHECK-LABEL: test_interp_mov
+// CHECK: call float @llvm.amdgcn.interp.mov
+void test_interp_mov(global float* out, float i, float j, int m0)
+{
+ *out = __builtin_amdgcn_interp_mov(2, 3, 4, m0);
+}
diff --git a/test/CodeGenOpenCL/builtins-amdgcn.cl b/test/CodeGenOpenCL/builtins-amdgcn.cl
index dc7f480209..6b7ea52dab 100644
--- a/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -536,6 +536,18 @@ void test_s_getpc(global ulong* out)
*out = __builtin_amdgcn_s_getpc();
}
+// CHECK-LABEL: @test_ds_append_lds(
+// CHECK: call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %ptr, i1 false)
+kernel void test_ds_append_lds(global int* out, local int* ptr) {
+ *out = __builtin_amdgcn_ds_append(ptr);
+}
+
+// CHECK-LABEL: @test_ds_consume_lds(
+// CHECK: call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %ptr, i1 false)
+kernel void test_ds_consume_lds(global int* out, local int* ptr) {
+ *out = __builtin_amdgcn_ds_consume(ptr);
+}
+
// CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
// CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
// CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent }
diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
index 473219478a..8d77c18e7a 100644
--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -11,7 +11,7 @@ typedef struct {int a;} ndrange_t;
// For a block global variable, first emit the block literal as a global variable, then emit the block variable itself.
// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
-// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
+// COMMON: @block_G = addrspace(1) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*)
// For anonymous blocks without captures, emit block literals as global variable.
// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
@@ -77,9 +77,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
- // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
- // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
+ // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic*
+ // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic*
+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
// COMMON-LABEL: call i32 @__enqueue_kernel_basic(
// COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
@@ -95,8 +95,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)*
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
- // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
+ // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to %struct.__opencl_block_literal_generic*
+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
// COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
@@ -107,8 +107,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
});
// COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
- // COMMON-SAME: (%opencl.queue_t{{.*}}* {{%[0-9]+}}, i32 {{%[0-9]+}}, %struct.ndrange_t* {{.*}}, i32 1, %opencl.clk_event_t{{.*}}* addrspace(4)* {{%[0-9]+}}, %opencl.clk_event_t{{.*}}* addrspace(4)* null,
- enqueue_kernel(default_queue, flags, ndrange, 1, &event_wait_list, 0,
+ // COMMON-SAME: (%opencl.queue_t{{.*}}* {{%[0-9]+}}, i32 {{%[0-9]+}}, %struct.ndrange_t* {{.*}}, i32 1, %opencl.clk_event_t{{.*}}* addrspace(4)* null, %opencl.clk_event_t{{.*}}* addrspace(4)* null,
+ enqueue_kernel(default_queue, flags, ndrange, 1, 0, 0,
^(void) {
return;
});
@@ -165,7 +165,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// Emits global block literal [[BLG3]] and block kernel [[INVGK3]].
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
- // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
+ // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)*
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
// CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES3]] to i8*
@@ -192,7 +192,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// Emits global block literal [[BLG4]] and block kernel [[INVGK4]].
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
- // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
+ // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)*
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
// CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES4]] to i8*
@@ -300,21 +300,19 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// Emits global block literal [[BLG8]] and invoke function [[INVG8]].
// The full type of these expressions are long (and repeated elsewhere), so we
// capture it as part of the regex for convenience and clarity.
- // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_A
void (^const block_A)(void) = ^{
return;
};
// Emits global block literal [[BLG9]] and invoke function [[INVG9]].
- // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_B
void (^const block_B)(local void *) = ^(local void *a) {
return;
};
// Uses global block literal [[BLG8]] and invoke function [[INVG8]].
- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
block_A();
// Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
@@ -333,20 +331,40 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
unsigned size = get_kernel_work_group_size(block_A);
// Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
block_A();
+ // Make sure that block invoke function is resolved correctly after sequence of assignements.
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1,
+ bl_t b1 = block_G;
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2,
+ bl_t b2 = b1;
+ // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*)
+ // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null)
+ b2(0);
+ // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
+ // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ size = get_kernel_preferred_work_group_size_multiple(b2);
+
void (^block_C)(void) = ^{
callee(i, a);
};
-
// Emits block literal on stack and block kernel [[INVLK3]].
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)*
// COMMON-LABEL: call i32 @__enqueue_kernel_basic(
// COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
@@ -404,8 +422,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
// COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
// COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
-// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}})
diff --git a/test/CodeGenOpenCL/constant-addr-space-globals.cl b/test/CodeGenOpenCL/constant-addr-space-globals.cl
index 5fcf117dde..47e180c690 100644
--- a/test/CodeGenOpenCL/constant-addr-space-globals.cl
+++ b/test/CodeGenOpenCL/constant-addr-space-globals.cl
@@ -26,6 +26,6 @@ kernel void k(void) {
constant int var1 = 1;
- // CHECK: call spir_func void @foo(i32 addrspace(2)* @k.var1, i32 addrspace(2)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(2)* @k.arr1, i32 0, i32 0)
+ // CHECK: call spir_func void @foo(i32 addrspace(2)* @k.var1, i32 addrspace(2)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(2)* @k.arr1, i64 0, i64 0)
foo(&var1, arr1, arr2, arr3);
}
diff --git a/test/CodeGenOpenCL/images.cl b/test/CodeGenOpenCL/images.cl
index eb054eceb5..baa9197847 100644
--- a/test/CodeGenOpenCL/images.cl
+++ b/test/CodeGenOpenCL/images.cl
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -o - -cl-std=c++ | FileCheck %s
__attribute__((overloadable)) void read_image(read_only image1d_t img_ro);
__attribute__((overloadable)) void read_image(write_only image1d_t img_wo);
diff --git a/test/CodeGenOpenCL/printf.cl b/test/CodeGenOpenCL/printf.cl
index 346f6c35ba..fc139d776d 100644
--- a/test/CodeGenOpenCL/printf.cl
+++ b/test/CodeGenOpenCL/printf.cl
@@ -12,28 +12,26 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)))
// ALL-LABEL: @test_printf_float2(
-// FP64: %conv = fpext <2 x float> %0 to <2 x double>
-// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %conv)
+// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0)
-// NOFP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0)
+
+// NOFP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0)
kernel void test_printf_float2(float2 arg) {
- printf("%v2f", arg);
+ printf("%v2hlf", arg);
}
// ALL-LABEL: @test_printf_half2(
-// FP64: %conv = fpext <2 x half> %0 to <2 x double>
-// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %conv) #2
+// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.1, i32 0, i32 0), <2 x half> %0)
-// NOFP64: %conv = fpext <2 x half> %0 to <2 x float>
-// NOFP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %conv) #2
+// NOFP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.1, i32 0, i32 0), <2 x half> %0)
kernel void test_printf_half2(half2 arg) {
- printf("%v2f", arg);
+ printf("%v2hf", arg);
}
#ifdef cl_khr_fp64
// FP64-LABEL: @test_printf_double2(
-// FP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %0) #2
+// FP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.2, i32 0, i32 0), <2 x double> %0)
kernel void test_printf_double2(double2 arg) {
- printf("%v2f", arg);
+ printf("%v2lf", arg);
}
#endif
diff --git a/test/CodeGenOpenCL/unroll-hint.cl b/test/CodeGenOpenCL/unroll-hint.cl
index 6a9ba87a5e..0f84450a1a 100644
--- a/test/CodeGenOpenCL/unroll-hint.cl
+++ b/test/CodeGenOpenCL/unroll-hint.cl
@@ -18,12 +18,12 @@ void for_disable()
// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_DISABLE:.*]]
}
-void for_full()
+void for_enable()
{
-// CHECK-LABEL: for_full
+// CHECK-LABEL: for_enable
__attribute__((opencl_unroll_hint))
for( int i = 0; i < 1000; ++i);
-// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_FULL:.*]]
+// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_ENABLE:.*]]
}
/*** while ***/
@@ -45,13 +45,13 @@ void while_disable()
// CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_DISABLE:.*]]
}
-void while_full()
+void while_enable()
{
-// CHECK-LABEL: while_full
+// CHECK-LABEL: while_enable
int i = 1000;
__attribute__((opencl_unroll_hint))
while(i-->0);
-// CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_FULL:.*]]
+// CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_ENABLE:.*]]
}
/*** do ***/
@@ -73,13 +73,13 @@ void do_disable()
// CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_DISABLE:.*]]
}
-void do_full()
+void do_enable()
{
-// CHECK-LABEL: do_full
+// CHECK-LABEL: do_enable
int i = 1000;
__attribute__((opencl_unroll_hint))
do {} while(i--> 0);
-// CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_FULL:.*]]
+// CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_ENABLE:.*]]
}
@@ -87,11 +87,11 @@ void do_full()
// CHECK: ![[COUNT]] = !{!"llvm.loop.unroll.count", i32 8}
// CHECK: ![[FOR_DISABLE]] = distinct !{![[FOR_DISABLE]], ![[DISABLE:.*]]}
// CHECK: ![[DISABLE]] = !{!"llvm.loop.unroll.disable"}
-// CHECK: ![[FOR_FULL]] = distinct !{![[FOR_FULL]], ![[FULL:.*]]}
-// CHECK: ![[FULL]] = !{!"llvm.loop.unroll.full"}
+// CHECK: ![[FOR_ENABLE]] = distinct !{![[FOR_ENABLE]], ![[ENABLE:.*]]}
+// CHECK: ![[ENABLE]] = !{!"llvm.loop.unroll.enable"}
// CHECK: ![[WHILE_COUNT]] = distinct !{![[WHILE_COUNT]], ![[COUNT]]}
// CHECK: ![[WHILE_DISABLE]] = distinct !{![[WHILE_DISABLE]], ![[DISABLE]]}
-// CHECK: ![[WHILE_FULL]] = distinct !{![[WHILE_FULL]], ![[FULL]]}
+// CHECK: ![[WHILE_ENABLE]] = distinct !{![[WHILE_ENABLE]], ![[ENABLE]]}
// CHECK: ![[DO_COUNT]] = distinct !{![[DO_COUNT]], ![[COUNT]]}
// CHECK: ![[DO_DISABLE]] = distinct !{![[DO_DISABLE]], ![[DISABLE]]}
-// CHECK: ![[DO_FULL]] = distinct !{![[DO_FULL]], ![[FULL]]}
+// CHECK: ![[DO_ENABLE]] = distinct !{![[DO_ENABLE]], ![[ENABLE]]}
diff --git a/test/CodeGenOpenCL/visibility.cl b/test/CodeGenOpenCL/visibility.cl
new file mode 100644
index 0000000000..8ce8017d06
--- /dev/null
+++ b/test/CodeGenOpenCL/visibility.cl
@@ -0,0 +1,128 @@
+// RUN: %clang_cc1 -std=cl2.0 -fapply-global-visibility-to-externs -fvisibility default -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck --check-prefix=FVIS-DEFAULT %s
+// RUN: %clang_cc1 -std=cl2.0 -fapply-global-visibility-to-externs -fvisibility protected -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck --check-prefix=FVIS-PROTECTED %s
+// RUN: %clang_cc1 -std=cl2.0 -fapply-global-visibility-to-externs -fvisibility hidden -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck --check-prefix=FVIS-HIDDEN %s
+
+// REQUIRES: amdgpu-registered-target
+
+// FVIS-DEFAULT: @glob = local_unnamed_addr
+// FVIS-PROTECTED: @glob = protected local_unnamed_addr
+// FVIS-HIDDEN: @glob = hidden local_unnamed_addr
+int glob = 0;
+// FVIS-DEFAULT: @glob_hidden = hidden local_unnamed_addr
+// FVIS-PROTECTED: @glob_hidden = hidden local_unnamed_addr
+// FVIS-HIDDEN: @glob_hidden = hidden local_unnamed_addr
+__attribute__((visibility("hidden"))) int glob_hidden = 0;
+// FVIS-DEFAULT: @glob_protected = protected local_unnamed_addr
+// FVIS-PROTECTED: @glob_protected = protected local_unnamed_addr
+// FVIS-HIDDEN: @glob_protected = protected local_unnamed_addr
+__attribute__((visibility("protected"))) int glob_protected = 0;
+// FVIS-DEFAULT: @glob_default = local_unnamed_addr
+// FVIS-PROTECTED: @glob_default = local_unnamed_addr
+// FVIS-HIDDEN: @glob_default = local_unnamed_addr
+__attribute__((visibility("default"))) int glob_default = 0;
+
+// FVIS-DEFAULT: @ext = external local_unnamed_addr
+// FVIS-PROTECTED: @ext = external protected local_unnamed_addr
+// FVIS-HIDDEN: @ext = external hidden local_unnamed_addr
+extern int ext;
+// FVIS-DEFAULT: @ext_hidden = external hidden local_unnamed_addr
+// FVIS-PROTECTED: @ext_hidden = external hidden local_unnamed_addr
+// FVIS-HIDDEN: @ext_hidden = external hidden local_unnamed_addr
+__attribute__((visibility("hidden"))) extern int ext_hidden;
+// FVIS-DEFAULT: @ext_protected = external protected local_unnamed_addr
+// FVIS-PROTECTED: @ext_protected = external protected local_unnamed_addr
+// FVIS-HIDDEN: @ext_protected = external protected local_unnamed_addr
+__attribute__((visibility("protected"))) extern int ext_protected;
+// FVIS-DEFAULT: @ext_default = external local_unnamed_addr
+// FVIS-PROTECTED: @ext_default = external local_unnamed_addr
+// FVIS-HIDDEN: @ext_default = external local_unnamed_addr
+__attribute__((visibility("default"))) extern int ext_default;
+
+// FVIS-DEFAULT: define amdgpu_kernel void @kern()
+// FVIS-PROTECTED: define protected amdgpu_kernel void @kern()
+// FVIS-HIDDEN: define protected amdgpu_kernel void @kern()
+kernel void kern() {}
+// FVIS-DEFAULT: define protected amdgpu_kernel void @kern_hidden()
+// FVIS-PROTECTED: define protected amdgpu_kernel void @kern_hidden()
+// FVIS-HIDDEN: define protected amdgpu_kernel void @kern_hidden()
+__attribute__((visibility("hidden"))) kernel void kern_hidden() {}
+// FVIS-DEFAULT: define protected amdgpu_kernel void @kern_protected()
+// FVIS-PROTECTED: define protected amdgpu_kernel void @kern_protected()
+// FVIS-HIDDEN: define protected amdgpu_kernel void @kern_protected()
+__attribute__((visibility("protected"))) kernel void kern_protected() {}
+// FVIS-DEFAULT: define amdgpu_kernel void @kern_default()
+// FVIS-PROTECTED: define amdgpu_kernel void @kern_default()
+// FVIS-HIDDEN: define amdgpu_kernel void @kern_default()
+__attribute__((visibility("default"))) kernel void kern_default() {}
+
+// FVIS-DEFAULT: define void @func()
+// FVIS-PROTECTED: define protected void @func()
+// FVIS-HIDDEN: define hidden void @func()
+void func() {}
+// FVIS-DEFAULT: define hidden void @func_hidden()
+// FVIS-PROTECTED: define hidden void @func_hidden()
+// FVIS-HIDDEN: define hidden void @func_hidden()
+__attribute__((visibility("hidden"))) void func_hidden() {}
+// FVIS-DEFAULT: define protected void @func_protected()
+// FVIS-PROTECTED: define protected void @func_protected()
+// FVIS-HIDDEN: define protected void @func_protected()
+__attribute__((visibility("protected"))) void func_protected() {}
+// FVIS-DEFAULT: define void @func_default()
+// FVIS-PROTECTED: define void @func_default()
+// FVIS-HIDDEN: define void @func_default()
+__attribute__((visibility("default"))) void func_default() {}
+
+extern kernel void ext_kern();
+__attribute__((visibility("hidden"))) extern kernel void ext_kern_hidden();
+__attribute__((visibility("protected"))) extern kernel void ext_kern_protected();
+__attribute__((visibility("default"))) extern kernel void ext_kern_default();
+
+extern void ext_func();
+__attribute__((visibility("hidden"))) extern void ext_func_hidden();
+__attribute__((visibility("protected"))) extern void ext_func_protected();
+__attribute__((visibility("default"))) extern void ext_func_default();
+
+void use() {
+ glob = ext + ext_hidden + ext_protected + ext_default;
+ ext_kern();
+ ext_kern_hidden();
+ ext_kern_protected();
+ ext_kern_default();
+ ext_func();
+ ext_func_hidden();
+ ext_func_protected();
+ ext_func_default();
+}
+
+// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern()
+// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern()
+// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern()
+
+// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_hidden()
+// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_hidden()
+// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_hidden()
+
+// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_protected()
+// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_protected()
+// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_protected()
+
+// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern_default()
+// FVIS-PROTECTED: declare amdgpu_kernel void @ext_kern_default()
+// FVIS-HIDDEN: declare amdgpu_kernel void @ext_kern_default()
+
+
+// FVIS-DEFAULT: declare void @ext_func()
+// FVIS-PROTECTED: declare protected void @ext_func()
+// FVIS-HIDDEN: declare hidden void @ext_func()
+
+// FVIS-DEFAULT: declare hidden void @ext_func_hidden()
+// FVIS-PROTECTED: declare hidden void @ext_func_hidden()
+// FVIS-HIDDEN: declare hidden void @ext_func_hidden()
+
+// FVIS-DEFAULT: declare protected void @ext_func_protected()
+// FVIS-PROTECTED: declare protected void @ext_func_protected()
+// FVIS-HIDDEN: declare protected void @ext_func_protected()
+
+// FVIS-DEFAULT: declare void @ext_func_default()
+// FVIS-PROTECTED: declare void @ext_func_default()
+// FVIS-HIDDEN: declare void @ext_func_default()