diff options
Diffstat (limited to 'test/CodeGenOpenCL')
-rw-r--r-- | test/CodeGenOpenCL/address-spaces-mangling.cl | 10 | ||||
-rw-r--r-- | test/CodeGenOpenCL/amdgcn-automatic-variable.cl | 2 | ||||
-rw-r--r-- | test/CodeGenOpenCL/amdgpu-alignment.cl | 70 | ||||
-rw-r--r-- | test/CodeGenOpenCL/amdgpu-env-amdgcn.cl | 2 | ||||
-rw-r--r-- | test/CodeGenOpenCL/amdgpu-features.cl | 6 | ||||
-rw-r--r-- | test/CodeGenOpenCL/atomic-ops.cl | 26 | ||||
-rw-r--r-- | test/CodeGenOpenCL/blocks.cl | 34 | ||||
-rw-r--r-- | test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl | 28 | ||||
-rw-r--r-- | test/CodeGenOpenCL/builtins-amdgcn-interp.cl | 34 | ||||
-rw-r--r-- | test/CodeGenOpenCL/builtins-amdgcn.cl | 12 | ||||
-rw-r--r-- | test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 60 | ||||
-rw-r--r-- | test/CodeGenOpenCL/constant-addr-space-globals.cl | 2 | ||||
-rw-r--r-- | test/CodeGenOpenCL/images.cl | 1 | ||||
-rw-r--r-- | test/CodeGenOpenCL/printf.cl | 20 | ||||
-rw-r--r-- | test/CodeGenOpenCL/unroll-hint.cl | 26 | ||||
-rw-r--r-- | test/CodeGenOpenCL/visibility.cl | 128 |
16 files changed, 325 insertions, 136 deletions
diff --git a/test/CodeGenOpenCL/address-spaces-mangling.cl b/test/CodeGenOpenCL/address-spaces-mangling.cl index b6e6b87d9e..50622f0991 100644 --- a/test/CodeGenOpenCL/address-spaces-mangling.cl +++ b/test/CodeGenOpenCL/address-spaces-mangling.cl @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=ASMANG,ASMAN10 %s -// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=ASMANG,ASMAN20 %s -// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=NOASMANG,NOASMAN10 %s -// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=NOASMANG,NOASMAN20 %s +// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="ASMANG,ASMANG10" %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="ASMANG,ASMANG20" %s +// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="NOASMANG,NOASMANG10" %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="NOASMANG,NOASMANG20" %s // We check that the address spaces are mangled the same in both version of OpenCL // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL2.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s @@ -14,7 +14,7 @@ __attribute__((overloadable)) void ff(int *arg) { } // ASMANG10: @_Z2ffPi // ASMANG20: @_Z2ffPU3AS4i -// NOASMANG10: @_Z2ffPi +// NOASMANG10: @_Z2ffPU9CLprivatei // NOASMANG20: @_Z2ffPU9CLgenerici // OCL-20-DAG: @_Z2ffPU3AS4i // OCL-12-DAG: @_Z2ffPi diff --git a/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/test/CodeGenOpenCL/amdgcn-automatic-variable.cl index 59f38f80dc..7216cb5174 100644 --- a/test/CodeGenOpenCL/amdgcn-automatic-variable.cl +++ b/test/CodeGenOpenCL/amdgcn-automatic-variable.cl @@ -42,7 +42,7 @@ void func2(void) { // CL20: store i32* %[[r0]], i32* addrspace(5)* %lp1, align 8 int *lp1 = &lv1; - // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %la, i32 0, i32 0 + // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %la, i64 0, i64 0 // CL12: store i32 addrspace(5)* %[[arraydecay]], i32 addrspace(5)* addrspace(5)* %lp2, align 4 // CL20: %[[r1:.*]] = addrspacecast i32 addrspace(5)* %[[arraydecay]] to i32* // CL20: store i32* %[[r1]], i32* addrspace(5)* %lp2, align 8 diff --git a/test/CodeGenOpenCL/amdgpu-alignment.cl b/test/CodeGenOpenCL/amdgpu-alignment.cl index b5dc47adbc..3241da612f 100644 --- a/test/CodeGenOpenCL/amdgpu-alignment.cl +++ b/test/CodeGenOpenCL/amdgpu-alignment.cl @@ -92,48 +92,48 @@ typedef double __attribute__((ext_vector_type(16))) double16; // CHECK-LABEL: @local_memory_alignment_global( -// CHECK: store volatile i8 0, i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @local_memory_alignment_global.lds_i8, i32 0, i32 0), align 1 -// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* getelementptr inbounds ([4 x <2 x i8>], [4 x <2 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v2i8, i32 0, i32 0), align 2 +// CHECK: store volatile i8 0, i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @local_memory_alignment_global.lds_i8, i64 0, i64 0), align 1 +// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* getelementptr inbounds ([4 x <2 x i8>], [4 x <2 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v2i8, i64 0, i64 0), align 2 // CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(3)* bitcast ([4 x <3 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v3i8 to <4 x i8> addrspace(3)*), align 4 -// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(3)* getelementptr inbounds ([4 x <4 x i8>], [4 x <4 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v4i8, i32 0, i32 0), align 4 -// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(3)* getelementptr inbounds ([4 x <8 x i8>], [4 x <8 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v8i8, i32 0, i32 0), align 8 -// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* getelementptr inbounds ([4 x <16 x i8>], [4 x <16 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v16i8, i32 0, i32 0), align 16 -// CHECK: store volatile i16 0, i16 addrspace(3)* getelementptr inbounds ([4 x i16], [4 x i16] addrspace(3)* @local_memory_alignment_global.lds_i16, i32 0, i32 0), align 2 -// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(3)* getelementptr inbounds ([4 x <2 x i16>], [4 x <2 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v2i16, i32 0, i32 0), align 4 +// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(3)* getelementptr inbounds ([4 x <4 x i8>], [4 x <4 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v4i8, i64 0, i64 0), align 4 +// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(3)* getelementptr inbounds ([4 x <8 x i8>], [4 x <8 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v8i8, i64 0, i64 0), align 8 +// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* getelementptr inbounds ([4 x <16 x i8>], [4 x <16 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v16i8, i64 0, i64 0), align 16 +// CHECK: store volatile i16 0, i16 addrspace(3)* getelementptr inbounds ([4 x i16], [4 x i16] addrspace(3)* @local_memory_alignment_global.lds_i16, i64 0, i64 0), align 2 +// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(3)* getelementptr inbounds ([4 x <2 x i16>], [4 x <2 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v2i16, i64 0, i64 0), align 4 // CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(3)* bitcast ([4 x <3 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v3i16 to <4 x i16> addrspace(3)*), align 8 -// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(3)* getelementptr inbounds ([4 x <4 x i16>], [4 x <4 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v4i16, i32 0, i32 0), align 8 -// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* getelementptr inbounds ([4 x <8 x i16>], [4 x <8 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v8i16, i32 0, i32 0), align 16 -// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(3)* getelementptr inbounds ([4 x <16 x i16>], [4 x <16 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v16i16, i32 0, i32 0), align 32 -// CHECK: store volatile i32 0, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @local_memory_alignment_global.lds_i32, i32 0, i32 0), align 4 -// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(3)* getelementptr inbounds ([4 x <2 x i32>], [4 x <2 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v2i32, i32 0, i32 0), align 8 +// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(3)* getelementptr inbounds ([4 x <4 x i16>], [4 x <4 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v4i16, i64 0, i64 0), align 8 +// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* getelementptr inbounds ([4 x <8 x i16>], [4 x <8 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v8i16, i64 0, i64 0), align 16 +// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(3)* getelementptr inbounds ([4 x <16 x i16>], [4 x <16 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v16i16, i64 0, i64 0), align 32 +// CHECK: store volatile i32 0, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @local_memory_alignment_global.lds_i32, i64 0, i64 0), align 4 +// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(3)* getelementptr inbounds ([4 x <2 x i32>], [4 x <2 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v2i32, i64 0, i64 0), align 8 // CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(3)* bitcast ([4 x <3 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v3i32 to <4 x i32> addrspace(3)*), align 16 -// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(3)* getelementptr inbounds ([4 x <4 x i32>], [4 x <4 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v4i32, i32 0, i32 0), align 16 -// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(3)* getelementptr inbounds ([4 x <8 x i32>], [4 x <8 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v8i32, i32 0, i32 0), align 32 -// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(3)* getelementptr inbounds ([4 x <16 x i32>], [4 x <16 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v16i32, i32 0, i32 0), align 64 -// CHECK: store volatile i64 0, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @local_memory_alignment_global.lds_i64, i32 0, i32 0), align 8 -// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(3)* getelementptr inbounds ([4 x <2 x i64>], [4 x <2 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v2i64, i32 0, i32 0), align 16 +// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(3)* getelementptr inbounds ([4 x <4 x i32>], [4 x <4 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v4i32, i64 0, i64 0), align 16 +// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(3)* getelementptr inbounds ([4 x <8 x i32>], [4 x <8 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v8i32, i64 0, i64 0), align 32 +// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(3)* getelementptr inbounds ([4 x <16 x i32>], [4 x <16 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v16i32, i64 0, i64 0), align 64 +// CHECK: store volatile i64 0, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @local_memory_alignment_global.lds_i64, i64 0, i64 0), align 8 +// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(3)* getelementptr inbounds ([4 x <2 x i64>], [4 x <2 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v2i64, i64 0, i64 0), align 16 // CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(3)* bitcast ([4 x <3 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v3i64 to <4 x i64> addrspace(3)*), align 32 -// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* getelementptr inbounds ([4 x <4 x i64>], [4 x <4 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v4i64, i32 0, i32 0), align 32 -// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(3)* getelementptr inbounds ([4 x <8 x i64>], [4 x <8 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v8i64, i32 0, i32 0), align 64 -// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(3)* getelementptr inbounds ([4 x <16 x i64>], [4 x <16 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v16i64, i32 0, i32 0), align 128 -// CHECK: store volatile half 0xH0000, half addrspace(3)* getelementptr inbounds ([4 x half], [4 x half] addrspace(3)* @local_memory_alignment_global.lds_f16, i32 0, i32 0), align 2 -// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(3)* getelementptr inbounds ([4 x <2 x half>], [4 x <2 x half>] addrspace(3)* @local_memory_alignment_global.lds_v2f16, i32 0, i32 0), align 4 +// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* getelementptr inbounds ([4 x <4 x i64>], [4 x <4 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v4i64, i64 0, i64 0), align 32 +// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(3)* getelementptr inbounds ([4 x <8 x i64>], [4 x <8 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v8i64, i64 0, i64 0), align 64 +// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(3)* getelementptr inbounds ([4 x <16 x i64>], [4 x <16 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v16i64, i64 0, i64 0), align 128 +// CHECK: store volatile half 0xH0000, half addrspace(3)* getelementptr inbounds ([4 x half], [4 x half] addrspace(3)* @local_memory_alignment_global.lds_f16, i64 0, i64 0), align 2 +// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(3)* getelementptr inbounds ([4 x <2 x half>], [4 x <2 x half>] addrspace(3)* @local_memory_alignment_global.lds_v2f16, i64 0, i64 0), align 4 // CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(3)* bitcast ([4 x <3 x half>] addrspace(3)* @local_memory_alignment_global.lds_v3f16 to <4 x half> addrspace(3)*), align 8 -// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(3)* getelementptr inbounds ([4 x <4 x half>], [4 x <4 x half>] addrspace(3)* @local_memory_alignment_global.lds_v4f16, i32 0, i32 0), align 8 -// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(3)* getelementptr inbounds ([4 x <8 x half>], [4 x <8 x half>] addrspace(3)* @local_memory_alignment_global.lds_v8f16, i32 0, i32 0), align 16 -// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(3)* getelementptr inbounds ([4 x <16 x half>], [4 x <16 x half>] addrspace(3)* @local_memory_alignment_global.lds_v16f16, i32 0, i32 0), align 32 -// CHECK: store volatile float 0.000000e+00, float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @local_memory_alignment_global.lds_f32, i32 0, i32 0), align 4 -// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(3)* getelementptr inbounds ([4 x <2 x float>], [4 x <2 x float>] addrspace(3)* @local_memory_alignment_global.lds_v2f32, i32 0, i32 0), align 8 +// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(3)* getelementptr inbounds ([4 x <4 x half>], [4 x <4 x half>] addrspace(3)* @local_memory_alignment_global.lds_v4f16, i64 0, i64 0), align 8 +// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(3)* getelementptr inbounds ([4 x <8 x half>], [4 x <8 x half>] addrspace(3)* @local_memory_alignment_global.lds_v8f16, i64 0, i64 0), align 16 +// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(3)* getelementptr inbounds ([4 x <16 x half>], [4 x <16 x half>] addrspace(3)* @local_memory_alignment_global.lds_v16f16, i64 0, i64 0), align 32 +// CHECK: store volatile float 0.000000e+00, float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @local_memory_alignment_global.lds_f32, i64 0, i64 0), align 4 +// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(3)* getelementptr inbounds ([4 x <2 x float>], [4 x <2 x float>] addrspace(3)* @local_memory_alignment_global.lds_v2f32, i64 0, i64 0), align 8 // CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(3)* bitcast ([4 x <3 x float>] addrspace(3)* @local_memory_alignment_global.lds_v3f32 to <4 x float> addrspace(3)*), align 16 -// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(3)* getelementptr inbounds ([4 x <4 x float>], [4 x <4 x float>] addrspace(3)* @local_memory_alignment_global.lds_v4f32, i32 0, i32 0), align 16 -// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(3)* getelementptr inbounds ([4 x <8 x float>], [4 x <8 x float>] addrspace(3)* @local_memory_alignment_global.lds_v8f32, i32 0, i32 0), align 32 -// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(3)* getelementptr inbounds ([4 x <16 x float>], [4 x <16 x float>] addrspace(3)* @local_memory_alignment_global.lds_v16f32, i32 0, i32 0), align 64 -// CHECK: store volatile double 0.000000e+00, double addrspace(3)* getelementptr inbounds ([4 x double], [4 x double] addrspace(3)* @local_memory_alignment_global.lds_f64, i32 0, i32 0), align 8 -// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(3)* getelementptr inbounds ([4 x <2 x double>], [4 x <2 x double>] addrspace(3)* @local_memory_alignment_global.lds_v2f64, i32 0, i32 0), align 16 +// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(3)* getelementptr inbounds ([4 x <4 x float>], [4 x <4 x float>] addrspace(3)* @local_memory_alignment_global.lds_v4f32, i64 0, i64 0), align 16 +// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(3)* getelementptr inbounds ([4 x <8 x float>], [4 x <8 x float>] addrspace(3)* @local_memory_alignment_global.lds_v8f32, i64 0, i64 0), align 32 +// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(3)* getelementptr inbounds ([4 x <16 x float>], [4 x <16 x float>] addrspace(3)* @local_memory_alignment_global.lds_v16f32, i64 0, i64 0), align 64 +// CHECK: store volatile double 0.000000e+00, double addrspace(3)* getelementptr inbounds ([4 x double], [4 x double] addrspace(3)* @local_memory_alignment_global.lds_f64, i64 0, i64 0), align 8 +// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(3)* getelementptr inbounds ([4 x <2 x double>], [4 x <2 x double>] addrspace(3)* @local_memory_alignment_global.lds_v2f64, i64 0, i64 0), align 16 // CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(3)* bitcast ([4 x <3 x double>] addrspace(3)* @local_memory_alignment_global.lds_v3f64 to <4 x double> addrspace(3)*), align 32 -// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(3)* getelementptr inbounds ([4 x <4 x double>], [4 x <4 x double>] addrspace(3)* @local_memory_alignment_global.lds_v4f64, i32 0, i32 0), align 32 -// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(3)* getelementptr inbounds ([4 x <8 x double>], [4 x <8 x double>] addrspace(3)* @local_memory_alignment_global.lds_v8f64, i32 0, i32 0), align 64 -// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(3)* getelementptr inbounds ([4 x <16 x double>], [4 x <16 x double>] addrspace(3)* @local_memory_alignment_global.lds_v16f64, i32 0, i32 0), align 128 +// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(3)* getelementptr inbounds ([4 x <4 x double>], [4 x <4 x double>] addrspace(3)* @local_memory_alignment_global.lds_v4f64, i64 0, i64 0), align 32 +// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(3)* getelementptr inbounds ([4 x <8 x double>], [4 x <8 x double>] addrspace(3)* @local_memory_alignment_global.lds_v8f64, i64 0, i64 0), align 64 +// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(3)* getelementptr inbounds ([4 x <16 x double>], [4 x <16 x double>] addrspace(3)* @local_memory_alignment_global.lds_v16f64, i64 0, i64 0), align 128 kernel void local_memory_alignment_global() { volatile local char lds_i8[4]; diff --git a/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl index bcb00be8c8..4a91652ae1 100644 --- a/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl +++ b/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s -// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" void foo(void) {} diff --git a/test/CodeGenOpenCL/amdgpu-features.cl b/test/CodeGenOpenCL/amdgpu-features.cl index 7aac4d3a36..bbfcf096ab 100644 --- a/test/CodeGenOpenCL/amdgpu-features.cl +++ b/test/CodeGenOpenCL/amdgpu-features.cl @@ -10,9 +10,9 @@ // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s -// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts" -// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts" -// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+s-memrealtime,+vi-insts" +// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime" // GFX700: "target-features"="+ci-insts,+fp64-fp16-denormals,-fp32-denormals" // GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals" // GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals" diff --git a/test/CodeGenOpenCL/atomic-ops.cl b/test/CodeGenOpenCL/atomic-ops.cl index 160f7fbd52..88f2e0d0ea 100644 --- a/test/CodeGenOpenCL/atomic-ops.cl +++ b/test/CodeGenOpenCL/atomic-ops.cl @@ -41,7 +41,7 @@ void fi1(atomic_int *i) { // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices); - // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group); } @@ -83,7 +83,7 @@ void fi3(atomic_int *i, atomic_uint *ui) { bool fi4(atomic_int *i) { // CHECK-LABEL: @fi4( - // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire + // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0 // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1 // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]] @@ -109,7 +109,7 @@ void fi5(atomic_int *i, int scope) { // CHECK: load atomic i32, i32* %{{.*}} seq_cst // CHECK: br label %[[continue]] // CHECK: [[opencl_subgroup]]: - // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst + // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst // CHECK: br label %[[continue]] // CHECK: [[continue]]: int x = __opencl_atomic_load(i, memory_order_seq_cst, scope); @@ -141,21 +141,21 @@ void fi6(atomic_int *i, int order, int scope) { // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]] // CHECK-NEXT: ] // CHECK: [[MON_WG]]: - // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") monotonic + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") monotonic // CHECK: [[MON_DEV]]: - // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") monotonic + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") monotonic // CHECK: [[MON_ALL]]: // CHECK: load atomic i32, i32* %{{.*}} monotonic // CHECK: [[MON_SUB]]: - // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") monotonic + // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") monotonic // CHECK: [[ACQ_WG]]: - // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") acquire + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") acquire // CHECK: [[ACQ_DEV]]: - // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") acquire + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") acquire // CHECK: [[ACQ_ALL]]: // CHECK: load atomic i32, i32* %{{.*}} acquire // CHECK: [[ACQ_SUB]]: - // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") acquire + // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") acquire // CHECK: [[SEQ_WG]]: // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst // CHECK: [[SEQ_DEV]]: @@ -163,19 +163,19 @@ void fi6(atomic_int *i, int order, int scope) { // CHECK: [[SEQ_ALL]]: // CHECK: load atomic i32, i32* %{{.*}} seq_cst // CHECK: [[SEQ_SUB]]: - // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst + // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst int x = __opencl_atomic_load(i, order, scope); } float ff1(global atomic_float *d) { // CHECK-LABEL: @ff1 - // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic + // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group); } void ff2(atomic_float *d) { // CHECK-LABEL: @ff2 - // CHECK: store atomic i32 {{.*}} syncscope("workgroup") release + // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group); } @@ -198,7 +198,7 @@ void atomic_init_foo() // CHECK-LABEL: @failureOrder void failureOrder(atomic_int *ptr, int *ptr2) { - // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic + // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl index 675240c6f0..c3e26855df 100644 --- a/test/CodeGenOpenCL/blocks.cl +++ b/test/CodeGenOpenCL/blocks.cl @@ -35,31 +35,23 @@ void foo(){ // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3 // SPIR: %[[i_value:.*]] = load i32, i32* %i // SPIR: store i32 %[[i_value]], i32* %[[block_captured]], - // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()* - // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)* - // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]], - // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]] - // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* - // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2 + // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to %struct.__opencl_block_literal_generic* + // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* + // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]], + // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]] // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)* - // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]] - // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)* - // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]]) + // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]]) // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2 // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]] // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3 // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]], - // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)* - // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()* - // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]], - // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]] - // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic* - // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2 + // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to %struct.__opencl_block_literal_generic addrspace(5)* + // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic* + // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]], + // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]] // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8* - // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]] - // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)* - // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]]) + // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]]) int (^ block_B)(void) = ^{ return i; @@ -98,6 +90,12 @@ int get42() { return blockArgFunc(^{return 42;}); } +// COMMON-LABEL: define {{.*}}@call_block +// call {{.*}}@__call_block_block_invoke +int call_block() { + return ^int(int num) { return num; } (11); +} + // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size" // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align" diff --git a/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl b/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl index e2c03a471b..a82fcbd758 100644 --- a/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl +++ b/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl @@ -12,24 +12,24 @@ kernel void builtins_amdgcn_dl_insts_err( half2 v2hA, half2 v2hB, float fC, short2 v2ssA, short2 v2ssB, int siA, int siB, int siC, ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) { - fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot-insts}} - fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot-insts}} + fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}} + fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}} - siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot-insts}} - siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot-insts}} + siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}} + siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}} - uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot-insts}} - uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot-insts}} + uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot2-insts}} + uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot2-insts}} - siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot-insts}} - siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot-insts}} + siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}} + siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}} - uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot-insts}} - uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot-insts}} + uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}} + uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}} - siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot-insts}} - siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot-insts}} + siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}} + siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}} - uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot-insts}} - uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot-insts}} + uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}} + uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}} } diff --git a/test/CodeGenOpenCL/builtins-amdgcn-interp.cl b/test/CodeGenOpenCL/builtins-amdgcn-interp.cl new file mode 100644 index 0000000000..39d913e902 --- /dev/null +++ b/test/CodeGenOpenCL/builtins-amdgcn-interp.cl @@ -0,0 +1,34 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// CHECK-LABEL: test_interp_f16 +// CHECK: call float @llvm.amdgcn.interp.p1.f16 +// CHECK: call half @llvm.amdgcn.interp.p2.f16 +// CHECK: call float @llvm.amdgcn.interp.p1.f16 +// CHECK: call half @llvm.amdgcn.interp.p2.f16 +void test_interp_f16(global half* out, float i, float j, int m0) +{ + float p1_0 = __builtin_amdgcn_interp_p1_f16(i, 2, 3, false, m0); + half p2_0 = __builtin_amdgcn_interp_p2_f16(p1_0, j, 2, 3, false, m0); + float p1_1 = __builtin_amdgcn_interp_p1_f16(i, 2, 3, true, m0); + half p2_1 = __builtin_amdgcn_interp_p2_f16(p1_1, j, 2, 3, true, m0); + *out = p2_0 + p2_1; +} + +// CHECK-LABEL: test_interp_f32 +// CHECK: call float @llvm.amdgcn.interp.p1 +// CHECK: call float @llvm.amdgcn.interp.p2 +void test_interp_f32(global float* out, float i, float j, int m0) +{ + float p1 = __builtin_amdgcn_interp_p1(i, 1, 4, m0); + *out = __builtin_amdgcn_interp_p2(p1, j, 1, 4, m0); +} + +// CHECK-LABEL: test_interp_mov +// CHECK: call float @llvm.amdgcn.interp.mov +void test_interp_mov(global float* out, float i, float j, int m0) +{ + *out = __builtin_amdgcn_interp_mov(2, 3, 4, m0); +} diff --git a/test/CodeGenOpenCL/builtins-amdgcn.cl b/test/CodeGenOpenCL/builtins-amdgcn.cl index dc7f480209..6b7ea52dab 100644 --- a/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -536,6 +536,18 @@ void test_s_getpc(global ulong* out) *out = __builtin_amdgcn_s_getpc(); } +// CHECK-LABEL: @test_ds_append_lds( +// CHECK: call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %ptr, i1 false) +kernel void test_ds_append_lds(global int* out, local int* ptr) { + *out = __builtin_amdgcn_ds_append(ptr); +} + +// CHECK-LABEL: @test_ds_consume_lds( +// CHECK: call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %ptr, i1 false) +kernel void test_ds_consume_lds(global int* out, local int* ptr) { + *out = __builtin_amdgcn_ds_consume(ptr); +} + // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl index 473219478a..8d77c18e7a 100644 --- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -11,7 +11,7 @@ typedef struct {int a;} ndrange_t; // For a block global variable, first emit the block literal as a global variable, then emit the block variable itself. // COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) } -// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*) +// COMMON: @block_G = addrspace(1) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*) // For anonymous blocks without captures, emit block literals as global variable. // COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } @@ -77,9 +77,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke - // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()* - // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()* - // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* + // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic* + // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic* + // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)* // COMMON-LABEL: call i32 @__enqueue_kernel_basic( // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), @@ -95,8 +95,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)* // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke - // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()* - // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* + // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to %struct.__opencl_block_literal_generic* + // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)* // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), @@ -107,8 +107,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { }); // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events - // COMMON-SAME: (%opencl.queue_t{{.*}}* {{%[0-9]+}}, i32 {{%[0-9]+}}, %struct.ndrange_t* {{.*}}, i32 1, %opencl.clk_event_t{{.*}}* addrspace(4)* {{%[0-9]+}}, %opencl.clk_event_t{{.*}}* addrspace(4)* null, - enqueue_kernel(default_queue, flags, ndrange, 1, &event_wait_list, 0, + // COMMON-SAME: (%opencl.queue_t{{.*}}* {{%[0-9]+}}, i32 {{%[0-9]+}}, %struct.ndrange_t* {{.*}}, i32 1, %opencl.clk_event_t{{.*}}* addrspace(4)* null, %opencl.clk_event_t{{.*}}* addrspace(4)* null, + enqueue_kernel(default_queue, flags, ndrange, 1, 0, 0, ^(void) { return; }); @@ -165,7 +165,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // Emits global block literal [[BLG3]] and block kernel [[INVGK3]]. // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 + // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0 // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)* // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES3]] to i8* @@ -192,7 +192,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // Emits global block literal [[BLG4]] and block kernel [[INVGK4]]. // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 + // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0 // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)* // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES4]] to i8* @@ -300,21 +300,19 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // Emits global block literal [[BLG8]] and invoke function [[INVG8]]. // The full type of these expressions are long (and repeated elsewhere), so we // capture it as part of the regex for convenience and clarity. - // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_A void (^const block_A)(void) = ^{ return; }; // Emits global block literal [[BLG9]] and invoke function [[INVG9]]. - // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_B void (^const block_B)(local void *) = ^(local void *a) { return; }; // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. - // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) - // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* - // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) block_A(); // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]]. @@ -333,20 +331,40 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { unsigned size = get_kernel_work_group_size(block_A); // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted. - // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) - // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* - // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) block_A(); + // Make sure that block invoke function is resolved correctly after sequence of assignements. + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* + // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) + // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*), + // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1, + bl_t b1 = block_G; + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* + // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) + // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*), + // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2, + bl_t b2 = b1; + // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) + // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null) + b2(0); + // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]]. + // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl( + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + size = get_kernel_preferred_work_group_size_multiple(b2); + void (^block_C)(void) = ^{ callee(i, a); }; - // Emits block literal on stack and block kernel [[INVLK3]]. // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)* + // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)* // COMMON-LABEL: call i32 @__enqueue_kernel_basic( // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), @@ -404,8 +422,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}}) // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}}) // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}}) +// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) // COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}}) // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) -// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) // COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}}) // COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}}) diff --git a/test/CodeGenOpenCL/constant-addr-space-globals.cl b/test/CodeGenOpenCL/constant-addr-space-globals.cl index 5fcf117dde..47e180c690 100644 --- a/test/CodeGenOpenCL/constant-addr-space-globals.cl +++ b/test/CodeGenOpenCL/constant-addr-space-globals.cl @@ -26,6 +26,6 @@ kernel void k(void) { constant int var1 = 1; - // CHECK: call spir_func void @foo(i32 addrspace(2)* @k.var1, i32 addrspace(2)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(2)* @k.arr1, i32 0, i32 0) + // CHECK: call spir_func void @foo(i32 addrspace(2)* @k.var1, i32 addrspace(2)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(2)* @k.arr1, i64 0, i64 0) foo(&var1, arr1, arr2, arr3); } diff --git a/test/CodeGenOpenCL/images.cl b/test/CodeGenOpenCL/images.cl index eb054eceb5..baa9197847 100644 --- a/test/CodeGenOpenCL/images.cl +++ b/test/CodeGenOpenCL/images.cl @@ -1,4 +1,5 @@ // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -o - -cl-std=c++ | FileCheck %s __attribute__((overloadable)) void read_image(read_only image1d_t img_ro); __attribute__((overloadable)) void read_image(write_only image1d_t img_wo); diff --git a/test/CodeGenOpenCL/printf.cl b/test/CodeGenOpenCL/printf.cl index 346f6c35ba..fc139d776d 100644 --- a/test/CodeGenOpenCL/printf.cl +++ b/test/CodeGenOpenCL/printf.cl @@ -12,28 +12,26 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))) // ALL-LABEL: @test_printf_float2( -// FP64: %conv = fpext <2 x float> %0 to <2 x double> -// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %conv) +// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0) -// NOFP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0) + +// NOFP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0) kernel void test_printf_float2(float2 arg) { - printf("%v2f", arg); + printf("%v2hlf", arg); } // ALL-LABEL: @test_printf_half2( -// FP64: %conv = fpext <2 x half> %0 to <2 x double> -// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %conv) #2 +// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.1, i32 0, i32 0), <2 x half> %0) -// NOFP64: %conv = fpext <2 x half> %0 to <2 x float> -// NOFP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %conv) #2 +// NOFP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.1, i32 0, i32 0), <2 x half> %0) kernel void test_printf_half2(half2 arg) { - printf("%v2f", arg); + printf("%v2hf", arg); } #ifdef cl_khr_fp64 // FP64-LABEL: @test_printf_double2( -// FP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %0) #2 +// FP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.2, i32 0, i32 0), <2 x double> %0) kernel void test_printf_double2(double2 arg) { - printf("%v2f", arg); + printf("%v2lf", arg); } #endif diff --git a/test/CodeGenOpenCL/unroll-hint.cl b/test/CodeGenOpenCL/unroll-hint.cl index 6a9ba87a5e..0f84450a1a 100644 --- a/test/CodeGenOpenCL/unroll-hint.cl +++ b/test/CodeGenOpenCL/unroll-hint.cl @@ -18,12 +18,12 @@ void for_disable() // CHECK: br label %{{.*}}, !llvm.loop ![[FOR_DISABLE:.*]] } -void for_full() +void for_enable() { -// CHECK-LABEL: for_full +// CHECK-LABEL: for_enable __attribute__((opencl_unroll_hint)) for( int i = 0; i < 1000; ++i); -// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_FULL:.*]] +// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_ENABLE:.*]] } /*** while ***/ @@ -45,13 +45,13 @@ void while_disable() // CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_DISABLE:.*]] } -void while_full() +void while_enable() { -// CHECK-LABEL: while_full +// CHECK-LABEL: while_enable int i = 1000; __attribute__((opencl_unroll_hint)) while(i-->0); -// CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_FULL:.*]] +// CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_ENABLE:.*]] } /*** do ***/ @@ -73,13 +73,13 @@ void do_disable() // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_DISABLE:.*]] } -void do_full() +void do_enable() { -// CHECK-LABEL: do_full +// CHECK-LABEL: do_enable int i = 1000; __attribute__((opencl_unroll_hint)) do {} while(i--> 0); -// CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_FULL:.*]] +// CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_ENABLE:.*]] } @@ -87,11 +87,11 @@ void do_full() // CHECK: ![[COUNT]] = !{!"llvm.loop.unroll.count", i32 8} // CHECK: ![[FOR_DISABLE]] = distinct !{![[FOR_DISABLE]], ![[DISABLE:.*]]} // CHECK: ![[DISABLE]] = !{!"llvm.loop.unroll.disable"} -// CHECK: ![[FOR_FULL]] = distinct !{![[FOR_FULL]], ![[FULL:.*]]} -// CHECK: ![[FULL]] = !{!"llvm.loop.unroll.full"} +// CHECK: ![[FOR_ENABLE]] = distinct !{![[FOR_ENABLE]], ![[ENABLE:.*]]} +// CHECK: ![[ENABLE]] = !{!"llvm.loop.unroll.enable"} // CHECK: ![[WHILE_COUNT]] = distinct !{![[WHILE_COUNT]], ![[COUNT]]} // CHECK: ![[WHILE_DISABLE]] = distinct !{![[WHILE_DISABLE]], ![[DISABLE]]} -// CHECK: ![[WHILE_FULL]] = distinct !{![[WHILE_FULL]], ![[FULL]]} +// CHECK: ![[WHILE_ENABLE]] = distinct !{![[WHILE_ENABLE]], ![[ENABLE]]} // CHECK: ![[DO_COUNT]] = distinct !{![[DO_COUNT]], ![[COUNT]]} // CHECK: ![[DO_DISABLE]] = distinct !{![[DO_DISABLE]], ![[DISABLE]]} -// CHECK: ![[DO_FULL]] = distinct !{![[DO_FULL]], ![[FULL]]} +// CHECK: ![[DO_ENABLE]] = distinct !{![[DO_ENABLE]], ![[ENABLE]]} diff --git a/test/CodeGenOpenCL/visibility.cl b/test/CodeGenOpenCL/visibility.cl new file mode 100644 index 0000000000..8ce8017d06 --- /dev/null +++ b/test/CodeGenOpenCL/visibility.cl @@ -0,0 +1,128 @@ +// RUN: %clang_cc1 -std=cl2.0 -fapply-global-visibility-to-externs -fvisibility default -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck --check-prefix=FVIS-DEFAULT %s +// RUN: %clang_cc1 -std=cl2.0 -fapply-global-visibility-to-externs -fvisibility protected -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck --check-prefix=FVIS-PROTECTED %s +// RUN: %clang_cc1 -std=cl2.0 -fapply-global-visibility-to-externs -fvisibility hidden -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck --check-prefix=FVIS-HIDDEN %s + +// REQUIRES: amdgpu-registered-target + +// FVIS-DEFAULT: @glob = local_unnamed_addr +// FVIS-PROTECTED: @glob = protected local_unnamed_addr +// FVIS-HIDDEN: @glob = hidden local_unnamed_addr +int glob = 0; +// FVIS-DEFAULT: @glob_hidden = hidden local_unnamed_addr +// FVIS-PROTECTED: @glob_hidden = hidden local_unnamed_addr +// FVIS-HIDDEN: @glob_hidden = hidden local_unnamed_addr +__attribute__((visibility("hidden"))) int glob_hidden = 0; +// FVIS-DEFAULT: @glob_protected = protected local_unnamed_addr +// FVIS-PROTECTED: @glob_protected = protected local_unnamed_addr +// FVIS-HIDDEN: @glob_protected = protected local_unnamed_addr +__attribute__((visibility("protected"))) int glob_protected = 0; +// FVIS-DEFAULT: @glob_default = local_unnamed_addr +// FVIS-PROTECTED: @glob_default = local_unnamed_addr +// FVIS-HIDDEN: @glob_default = local_unnamed_addr +__attribute__((visibility("default"))) int glob_default = 0; + +// FVIS-DEFAULT: @ext = external local_unnamed_addr +// FVIS-PROTECTED: @ext = external protected local_unnamed_addr +// FVIS-HIDDEN: @ext = external hidden local_unnamed_addr +extern int ext; +// FVIS-DEFAULT: @ext_hidden = external hidden local_unnamed_addr +// FVIS-PROTECTED: @ext_hidden = external hidden local_unnamed_addr +// FVIS-HIDDEN: @ext_hidden = external hidden local_unnamed_addr +__attribute__((visibility("hidden"))) extern int ext_hidden; +// FVIS-DEFAULT: @ext_protected = external protected local_unnamed_addr +// FVIS-PROTECTED: @ext_protected = external protected local_unnamed_addr +// FVIS-HIDDEN: @ext_protected = external protected local_unnamed_addr +__attribute__((visibility("protected"))) extern int ext_protected; +// FVIS-DEFAULT: @ext_default = external local_unnamed_addr +// FVIS-PROTECTED: @ext_default = external local_unnamed_addr +// FVIS-HIDDEN: @ext_default = external local_unnamed_addr +__attribute__((visibility("default"))) extern int ext_default; + +// FVIS-DEFAULT: define amdgpu_kernel void @kern() +// FVIS-PROTECTED: define protected amdgpu_kernel void @kern() +// FVIS-HIDDEN: define protected amdgpu_kernel void @kern() +kernel void kern() {} +// FVIS-DEFAULT: define protected amdgpu_kernel void @kern_hidden() +// FVIS-PROTECTED: define protected amdgpu_kernel void @kern_hidden() +// FVIS-HIDDEN: define protected amdgpu_kernel void @kern_hidden() +__attribute__((visibility("hidden"))) kernel void kern_hidden() {} +// FVIS-DEFAULT: define protected amdgpu_kernel void @kern_protected() +// FVIS-PROTECTED: define protected amdgpu_kernel void @kern_protected() +// FVIS-HIDDEN: define protected amdgpu_kernel void @kern_protected() +__attribute__((visibility("protected"))) kernel void kern_protected() {} +// FVIS-DEFAULT: define amdgpu_kernel void @kern_default() +// FVIS-PROTECTED: define amdgpu_kernel void @kern_default() +// FVIS-HIDDEN: define amdgpu_kernel void @kern_default() +__attribute__((visibility("default"))) kernel void kern_default() {} + +// FVIS-DEFAULT: define void @func() +// FVIS-PROTECTED: define protected void @func() +// FVIS-HIDDEN: define hidden void @func() +void func() {} +// FVIS-DEFAULT: define hidden void @func_hidden() +// FVIS-PROTECTED: define hidden void @func_hidden() +// FVIS-HIDDEN: define hidden void @func_hidden() +__attribute__((visibility("hidden"))) void func_hidden() {} +// FVIS-DEFAULT: define protected void @func_protected() +// FVIS-PROTECTED: define protected void @func_protected() +// FVIS-HIDDEN: define protected void @func_protected() +__attribute__((visibility("protected"))) void func_protected() {} +// FVIS-DEFAULT: define void @func_default() +// FVIS-PROTECTED: define void @func_default() +// FVIS-HIDDEN: define void @func_default() +__attribute__((visibility("default"))) void func_default() {} + +extern kernel void ext_kern(); +__attribute__((visibility("hidden"))) extern kernel void ext_kern_hidden(); +__attribute__((visibility("protected"))) extern kernel void ext_kern_protected(); +__attribute__((visibility("default"))) extern kernel void ext_kern_default(); + +extern void ext_func(); +__attribute__((visibility("hidden"))) extern void ext_func_hidden(); +__attribute__((visibility("protected"))) extern void ext_func_protected(); +__attribute__((visibility("default"))) extern void ext_func_default(); + +void use() { + glob = ext + ext_hidden + ext_protected + ext_default; + ext_kern(); + ext_kern_hidden(); + ext_kern_protected(); + ext_kern_default(); + ext_func(); + ext_func_hidden(); + ext_func_protected(); + ext_func_default(); +} + +// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern() +// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern() +// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern() + +// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_hidden() +// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_hidden() +// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_hidden() + +// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_protected() +// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_protected() +// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_protected() + +// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern_default() +// FVIS-PROTECTED: declare amdgpu_kernel void @ext_kern_default() +// FVIS-HIDDEN: declare amdgpu_kernel void @ext_kern_default() + + +// FVIS-DEFAULT: declare void @ext_func() +// FVIS-PROTECTED: declare protected void @ext_func() +// FVIS-HIDDEN: declare hidden void @ext_func() + +// FVIS-DEFAULT: declare hidden void @ext_func_hidden() +// FVIS-PROTECTED: declare hidden void @ext_func_hidden() +// FVIS-HIDDEN: declare hidden void @ext_func_hidden() + +// FVIS-DEFAULT: declare protected void @ext_func_protected() +// FVIS-PROTECTED: declare protected void @ext_func_protected() +// FVIS-HIDDEN: declare protected void @ext_func_protected() + +// FVIS-DEFAULT: declare void @ext_func_default() +// FVIS-PROTECTED: declare void @ext_func_default() +// FVIS-HIDDEN: declare void @ext_func_default() |