130 files changed, 6580 insertions, 1449 deletions
diff --git a/test/CodeGen/Inputs/pgotestir.proftext b/test/CodeGen/Inputs/pgotestir.proftext
new file mode 100644
index 0000000000..05ab839db0
--- /dev/null
+++ b/test/CodeGen/Inputs/pgotestir.proftext
@@ -0,0 +1,2 @@
+# IR level Instrumentation Flag
+:ir
diff --git a/test/CodeGen/Inputs/pgotestir_cs.proftext b/test/CodeGen/Inputs/pgotestir_cs.proftext
new file mode 100644
index 0000000000..a9aca5b9df
--- /dev/null
+++ b/test/CodeGen/Inputs/pgotestir_cs.proftext
@@ -0,0 +1,2 @@
+# IR level Instrumentation Flag with CS
+:csir
diff --git a/test/CodeGen/aarch64-neon-fp16fml.c b/test/CodeGen/aarch64-neon-fp16fml.c
index ad3dd9c226..3436d8b212 100644
--- a/test/CodeGen/aarch64-neon-fp16fml.c
+++ b/test/CodeGen/aarch64-neon-fp16fml.c
@@ -9,188 +9,188 @@
 
 // Vector form
 
-float32x2_t test_vfmlal_low_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlal_low_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlal_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlal_low_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlal_low_u32(a, b, c);
+  return vfmlal_low_f16(a, b, c);
 }
 
-float32x2_t test_vfmlsl_low_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlsl_low_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlsl_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlsl_low_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlsl_low_u32(a, b, c);
+  return vfmlsl_low_f16(a, b, c);
 }
 
-float32x2_t test_vfmlal_high_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlal_high_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlal_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlal_high_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlal_high_u32(a, b, c);
+  return vfmlal_high_f16(a, b, c);
 }
 
-float32x2_t test_vfmlsl_high_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlsl_high_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlsl_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlsl_high_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlsl_high_u32(a, b, c);
+  return vfmlsl_high_f16(a, b, c);
 }
 
-float32x4_t test_vfmlalq_low_u32(float32x4_t a, float16x8_t b, float16x8_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlalq_low_u32(<4 x float> %a, <8 x half> %b, <8 x half> %c)
+float32x4_t test_vfmlalq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlalq_low_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlalq_low_u32(a, b, c);
+  return vfmlalq_low_f16(a, b, c);
 }
 
-float32x4_t test_vfmlslq_low_u32(float32x4_t a, float16x8_t b, float16x8_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlslq_low_u32(<4 x float> %a, <8 x half> %b, <8 x half> %c)
+float32x4_t test_vfmlslq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlslq_low_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlslq_low_u32(a, b, c);
+  return vfmlslq_low_f16(a, b, c);
 }
 
-float32x4_t test_vfmlalq_high_u32(float32x4_t a, float16x8_t b, float16x8_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlalq_high_u32(<4 x float> %a, <8 x half> %b, <8 x half> %c)
+float32x4_t test_vfmlalq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlalq_high_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlalq_high_u32(a, b, c);
+  return vfmlalq_high_f16(a, b, c);
 }
 
-float32x4_t test_vfmlslq_high_u32(float32x4_t a, float16x8_t b, float16x8_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlslq_high_u32(<4 x float> %a, <8 x half> %b, <8 x half> %c)
+float32x4_t test_vfmlslq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlslq_high_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlslq_high_u32(a, b, c);
+  return vfmlslq_high_f16(a, b, c);
 }
 
 // Indexed form
 
-float32x2_t test_vfmlal_lane_low_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlal_lane_low_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlal_lane_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlal_lane_low_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlal_lane_low_u32(a, b, c, 0);
+  return vfmlal_lane_low_f16(a, b, c, 0);
 }
 
-float32x2_t test_vfmlal_lane_high_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlal_lane_high_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlal_lane_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlal_lane_high_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlal_lane_high_u32(a, b, c, 1);
+  return vfmlal_lane_high_f16(a, b, c, 1);
 }
 
-float32x4_t test_vfmlalq_lane_low_u32(float32x4_t a, float16x8_t b, float16x4_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlalq_lane_low_u32(<4 x float> %a, <8 x half> %b, <4 x half> %c)
+float32x4_t test_vfmlalq_lane_low_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlalq_lane_low_f16(<4 x float> %a, <8 x half> %b, <4 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlalq_lane_low_u32(a, b, c, 2);
+  return vfmlalq_lane_low_f16(a, b, c, 2);
 }
 
-float32x4_t test_vfmlalq_lane_high_u32(float32x4_t a, float16x8_t b, float16x4_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlalq_lane_high_u32(<4 x float> %a, <8 x half> %b, <4 x half> %c)
+float32x4_t test_vfmlalq_lane_high_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlalq_lane_high_f16(<4 x float> %a, <8 x half> %b, <4 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlalq_lane_high_u32(a, b, c, 3);
+  return vfmlalq_lane_high_f16(a, b, c, 3);
 }
 
-float32x2_t test_vfmlal_laneq_low_u32(float32x2_t a, float16x4_t b, float16x8_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlal_laneq_low_u32(<2 x float> %a, <4 x half> %b, <8 x half> %c)
+float32x2_t test_vfmlal_laneq_low_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlal_laneq_low_f16(<2 x float> %a, <4 x half> %b, <8 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlal_laneq_low_u32(a, b, c, 4);
+  return vfmlal_laneq_low_f16(a, b, c, 4);
 }
 
-float32x2_t test_vfmlal_laneq_high_u32(float32x2_t a, float16x4_t b, float16x8_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlal_laneq_high_u32(<2 x float> %a, <4 x half> %b, <8 x half> %c)
+float32x2_t test_vfmlal_laneq_high_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlal_laneq_high_f16(<2 x float> %a, <4 x half> %b, <8 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlal_laneq_high_u32(a, b, c, 5);
+  return vfmlal_laneq_high_f16(a, b, c, 5);
 }
 
-float32x4_t test_vfmlalq_laneq_low_u32(float32x4_t a, float16x8_t b, float16x8_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlalq_laneq_low_u32(<4 x float> %a, <8 x half> %b, <8 x half> %c)
+float32x4_t test_vfmlalq_laneq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlalq_laneq_low_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlalq_laneq_low_u32(a, b, c, 6);
+  return vfmlalq_laneq_low_f16(a, b, c, 6);
 }
 
-float32x4_t test_vfmlalq_laneq_high_u32(float32x4_t a, float16x8_t b, float16x8_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlalq_laneq_high_u32(<4 x float> %a, <8 x half> %b, <8 x half> %c)
+float32x4_t test_vfmlalq_laneq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlalq_laneq_high_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlalq_laneq_high_u32(a, b, c, 7);
+  return vfmlalq_laneq_high_f16(a, b, c, 7);
 }
 
-float32x2_t test_vfmlsl_lane_low_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlsl_lane_low_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlsl_lane_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlsl_lane_low_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlsl_lane_low_u32(a, b, c, 0);
+  return vfmlsl_lane_low_f16(a, b, c, 0);
 }
 
-float32x2_t test_vfmlsl_lane_high_u32(float32x2_t a, float16x4_t b, float16x4_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlsl_lane_high_u32(<2 x float> %a, <4 x half> %b, <4 x half> %c)
+float32x2_t test_vfmlsl_lane_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlsl_lane_high_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlsl_lane_high_u32(a, b, c, 1);
+  return vfmlsl_lane_high_f16(a, b, c, 1);
 }
 
-float32x4_t test_vfmlslq_lane_low_u32(float32x4_t a, float16x8_t b, float16x4_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlslq_lane_low_u32(<4 x float> %a, <8 x half> %b, <4 x half> %c)
+float32x4_t test_vfmlslq_lane_low_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlslq_lane_low_f16(<4 x float> %a, <8 x half> %b, <4 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlslq_lane_low_u32(a, b, c, 2);
+  return vfmlslq_lane_low_f16(a, b, c, 2);
 }
 
-float32x4_t test_vfmlslq_lane_high_u32(float32x4_t a, float16x8_t b, float16x4_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlslq_lane_high_u32(<4 x float> %a, <8 x half> %b, <4 x half> %c)
+float32x4_t test_vfmlslq_lane_high_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlslq_lane_high_f16(<4 x float> %a, <8 x half> %b, <4 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlslq_lane_high_u32(a, b, c, 3);
+  return vfmlslq_lane_high_f16(a, b, c, 3);
 }
 
-float32x2_t test_vfmlsl_laneq_low_u32(float32x2_t a, float16x4_t b, float16x8_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlsl_laneq_low_u32(<2 x float> %a, <4 x half> %b, <8 x half> %c)
+float32x2_t test_vfmlsl_laneq_low_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlsl_laneq_low_f16(<2 x float> %a, <4 x half> %b, <8 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlsl_laneq_low_u32(a, b, c, 4);
+  return vfmlsl_laneq_low_f16(a, b, c, 4);
 }
 
-float32x2_t test_vfmlsl_laneq_high_u32(float32x2_t a, float16x4_t b, float16x8_t c) {
-// CHECK-LABEL: define <2 x float> @test_vfmlsl_laneq_high_u32(<2 x float> %a, <4 x half> %b, <8 x half> %c)
+float32x2_t test_vfmlsl_laneq_high_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
+// CHECK-LABEL: define <2 x float> @test_vfmlsl_laneq_high_f16(<2 x float> %a, <4 x half> %b, <8 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
 // CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
 // CHECK: ret <2 x float> [[RESULT]]
-  return vfmlsl_laneq_high_u32(a, b, c, 5);
+  return vfmlsl_laneq_high_f16(a, b, c, 5);
 }
 
-float32x4_t test_vfmlslq_laneq_low_u32(float32x4_t a, float16x8_t b, float16x8_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlslq_laneq_low_u32(<4 x float> %a, <8 x half> %b, <8 x half> %c)
+float32x4_t test_vfmlslq_laneq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlslq_laneq_low_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlslq_laneq_low_u32(a, b, c, 6);
+  return vfmlslq_laneq_low_f16(a, b, c, 6);
 }
 
-float32x4_t test_vfmlslq_laneq_high_u32(float32x4_t a, float16x8_t b, float16x8_t c) {
-// CHECK-LABEL: define <4 x float> @test_vfmlslq_laneq_high_u32(<4 x float> %a, <8 x half> %b, <8 x half> %c)
+float32x4_t test_vfmlslq_laneq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
+// CHECK-LABEL: define <4 x float> @test_vfmlslq_laneq_high_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
 // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
 // CHECK: ret <4 x float> [[RESULT]]
-  return vfmlslq_laneq_high_u32(a, b, c, 7);
+  return vfmlslq_laneq_high_f16(a, b, c, 7);
 }
diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c
index 40e39912be..9a5b3a9f18 100644
--- a/test/CodeGen/aarch64-neon-intrinsics.c
+++ b/test/CodeGen/aarch64-neon-intrinsics.c
@@ -4411,7 +4411,7 @@ uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
 // CHECK-LABEL: @test_vpadd_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %a, <2 x float> %b)
+// CHECK:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b)
 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
 // CHECK:   ret <2 x float> [[VPADD_V2_I]]
 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
@@ -4475,7 +4475,7 @@ uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
 // CHECK-LABEL: @test_vpaddq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %a, <4 x float> %b)
+// CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b)
 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
 // CHECK:   ret <4 x float> [[VPADDQ_V2_I]]
 float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
@@ -4485,7 +4485,7 @@ float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
 // CHECK-LABEL: @test_vpaddq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %a, <2 x double> %b)
+// CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b)
 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
 // CHECK:   ret <2 x double> [[VPADDQ_V2_I]]
 float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
diff --git a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
index e1a2e3fb92..a84445b62a 100644
--- a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -736,14 +736,14 @@ float16x8_t test_vmulxq_f16(float16x8_t a, float16x8_t b) {
 }
 
 // CHECK-LABEL: test_vpadd_f16
-// CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.addp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half> %a, <4 x half> %b)
 // CHECK:  ret <4 x half> [[ADD]]
 float16x4_t test_vpadd_f16(float16x4_t a, float16x4_t b) {
   return vpadd_f16(a, b);
 }
 
 // CHECK-LABEL: test_vpaddq_f16
-// CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.addp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.faddp.v8f16(<8 x half> %a, <8 x half> %b)
 // CHECK:  ret <8 x half> [[ADD]]
 float16x8_t test_vpaddq_f16(float16x8_t a, float16x8_t b) {
   return vpaddq_f16(a, b);
@@ -1618,3 +1618,16 @@ float16x8_t test_vtrn2q_f16(float16x8_t a, float16x8_t b) {
   return vtrn2q_f16(a, b);
 }
 
+// CHECK-LABEL: @test_vduph_laneq_f16(
+// CHECK:        [[V:%.*]] = extractelement <8 x half> [[V2:%.*]], i32 7
+// CHECK-NEXT:   ret half [[V]]
+float16_t test_vduph_laneq_f16(float16x8_t vec) {
+  return vduph_laneq_f16(vec, 7);
+}
+
+// CHECK-LABEL: @test_vduph_lane_f16(
+// CHECK:        [[V:%.*]] = extractelement <4 x half> [[V2:%.*]], i32 3
+// CHECK-NEXT:   ret half [[V]]
+float16_t test_vduph_lane_f16(float16x4_t vec) {
+  return vduph_lane_f16(vec, 3);
+}
diff --git a/test/CodeGen/aarch64-vpcs.c b/test/CodeGen/aarch64-vpcs.c
index 0fc2e96511..a9edb7490c 100644
--- a/test/CodeGen/aarch64-vpcs.c
+++ b/test/CodeGen/aarch64-vpcs.c
@@ -2,7 +2,7 @@
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm -x c++ -o - %s | FileCheck %s -check-prefix=CHECKCXX
 // RUN: %clang_cc1 -triple i686-pc-linux-gnu -verify %s
 
-void __attribute__((aarch64_vector_pcs)) f(int *); // expected-warning {{calling convention 'aarch64_vector_pcs' ignored for this target}}
+void __attribute__((aarch64_vector_pcs)) f(int *); // expected-warning {{'aarch64_vector_pcs' calling convention ignored for this target}}
 
 // CHECKC: define void @g(
 // CHECKCXX: define void @_Z1gPi(
@@ -16,7 +16,7 @@ void g(int *a) {
 // CHECKC: declare aarch64_vector_pcs void @f(
 // CHECKCXX: declare aarch64_vector_pcs void @_Z1fPi
 
-void __attribute__((aarch64_vector_pcs)) h(int *a){ // expected-warning {{calling convention 'aarch64_vector_pcs' ignored for this target}}
+void __attribute__((aarch64_vector_pcs)) h(int *a){ // expected-warning {{'aarch64_vector_pcs' calling convention ignored for this target}}
 // CHECKC: define aarch64_vector_pcs void @h(
 // CHECKCXX: define aarch64_vector_pcs void @_Z1hPi(
   f(a);
diff --git a/test/CodeGen/alloc-align-attr.c b/test/CodeGen/alloc-align-attr.c
index b7cfcf76d4..6294450d04 100644
--- a/test/CodeGen/alloc-align-attr.c
+++ b/test/CodeGen/alloc-align-attr.c
@@ -6,11 +6,9 @@ __INT32_TYPE__*m1(__INT32_TYPE__ i) __attribute__((alloc_align(1)));
 __INT32_TYPE__ test1(__INT32_TYPE__ a) {
 // CHECK: define i32 @test1
   return *m1(a);
-// CHECK: call i32* @m1(i32 [[PARAM1:%[^\)]+]]) 
-// CHECK: [[ALIGNCAST1:%.+]] = sext i32 [[PARAM1]] to i64
-// CHECK: [[ISPOS1:%.+]] = icmp sgt i64 [[ALIGNCAST1]], 0
-// CHECK: [[POSMASK1:%.+]] = sub i64 [[ALIGNCAST1]], 1
-// CHECK: [[MASK1:%.+]] = select i1 [[ISPOS1]], i64 [[POSMASK1]], i64 0
+// CHECK: call i32* @m1(i32 [[PARAM1:%[^\)]+]])
+// CHECK: [[ALIGNCAST1:%.+]] = zext i32 [[PARAM1]] to i64
+// CHECK: [[MASK1:%.+]] = sub i64 [[ALIGNCAST1]], 1
 // CHECK: [[PTRINT1:%.+]] = ptrtoint
 // CHECK: [[MASKEDPTR1:%.+]] = and i64 [[PTRINT1]], [[MASK1]]
 // CHECK: [[MASKCOND1:%.+]] = icmp eq i64 [[MASKEDPTR1]], 0
@@ -22,10 +20,8 @@ __INT32_TYPE__ test2(__SIZE_TYPE__ a) {
   return *m1(a);
 // CHECK: [[CONV2:%.+]] = trunc i64 %{{.+}} to i32
 // CHECK: call i32* @m1(i32 [[CONV2]])
-// CHECK: [[ALIGNCAST2:%.+]] = sext i32 [[CONV2]] to i64
-// CHECK: [[ISPOS2:%.+]] = icmp sgt i64 [[ALIGNCAST2]], 0
-// CHECK: [[POSMASK2:%.+]] = sub i64 [[ALIGNCAST2]], 1
-// CHECK: [[MASK2:%.+]] = select i1 [[ISPOS2]], i64 [[POSMASK2]], i64 0
+// CHECK: [[ALIGNCAST2:%.+]] = zext i32 [[CONV2]] to i64
+// CHECK: [[MASK2:%.+]] = sub i64 [[ALIGNCAST2]], 1
 // CHECK: [[PTRINT2:%.+]] = ptrtoint
 // CHECK: [[MASKEDPTR2:%.+]] = and i64 [[PTRINT2]], [[MASK2]]
 // CHECK: [[MASKCOND2:%.+]] = icmp eq i64 [[MASKEDPTR2]], 0
@@ -39,9 +35,7 @@ __INT32_TYPE__ test3(__INT32_TYPE__ a) {
   return *m2(a);
 // CHECK: [[CONV3:%.+]] = sext i32 %{{.+}} to i64
 // CHECK: call i32* @m2(i64 [[CONV3]])
-// CHECK: [[ISPOS3:%.+]] = icmp sgt i64 [[CONV3]], 0
-// CHECK: [[POSMASK3:%.+]] = sub i64 [[CONV3]], 1
-// CHECK: [[MASK3:%.+]] = select i1 [[ISPOS3]], i64 [[POSMASK3]], i64 0
+// CHECK: [[MASK3:%.+]] = sub i64 [[CONV3]], 1
 // CHECK: [[PTRINT3:%.+]] = ptrtoint
 // CHECK: [[MASKEDPTR3:%.+]] = and i64 [[PTRINT3]], [[MASK3]]
 // CHECK: [[MASKCOND3:%.+]] = icmp eq i64 [[MASKEDPTR3]], 0
@@ -52,10 +46,8 @@ __INT32_TYPE__ test3(__INT32_TYPE__ a) {
 __INT32_TYPE__ test4(__SIZE_TYPE__ a) {
 // CHECK: define i32 @test4
   return *m2(a);
-// CHECK: call i32* @m2(i64 [[PARAM4:%[^\)]+]]) 
-// CHECK: [[ISPOS4:%.+]] = icmp sgt i64 [[PARAM4]], 0
-// CHECK: [[POSMASK4:%.+]] = sub i64 [[PARAM4]], 1
-// CHECK: [[MASK4:%.+]] = select i1 [[ISPOS4]], i64 [[POSMASK4]], i64 0
+// CHECK: call i32* @m2(i64 [[PARAM4:%[^\)]+]])
+// CHECK: [[MASK4:%.+]] = sub i64 [[PARAM4]], 1
 // CHECK: [[PTRINT4:%.+]] = ptrtoint
 // CHECK: [[MASKEDPTR4:%.+]] = and i64 [[PTRINT4]], [[MASK4]]
 // CHECK: [[MASKCOND4:%.+]] = icmp eq i64 [[MASKEDPTR4]], 0
@@ -72,11 +64,9 @@ __INT32_TYPE__ test5(__int128_t a) {
 // CHECK: define i32 @test5
   struct Empty e;
   return *m3(e, a);
-// CHECK: call i32* @m3(i64 %{{.*}}, i64 %{{.*}}) 
+// CHECK: call i32* @m3(i64 %{{.*}}, i64 %{{.*}})
 // CHECK: [[ALIGNCAST5:%.+]] = trunc i128 %{{.*}} to i64
-// CHECK: [[ISPOS5:%.+]] = icmp sgt i64 [[ALIGNCAST5]], 0
-// CHECK: [[POSMASK5:%.+]] = sub i64 [[ALIGNCAST5]], 1
-// CHECK: [[MASK5:%.+]] = select i1 [[ISPOS5]], i64 [[POSMASK5]], i64 0
+// CHECK: [[MASK5:%.+]] = sub i64 [[ALIGNCAST5]], 1
 // CHECK: [[PTRINT5:%.+]] = ptrtoint
 // CHECK: [[MASKEDPTR5:%.+]] = and i64 [[PTRINT5]], [[MASK5]]
 // CHECK: [[MASKCOND5:%.+]] = icmp eq i64 [[MASKEDPTR5]], 0
@@ -88,11 +78,9 @@ __INT32_TYPE__ test6(__int128_t a) {
 // CHECK: define i32 @test6
   struct MultiArgs e;
   return *m4(e, a);
-// CHECK: call i32* @m4(i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) 
+// CHECK: call i32* @m4(i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
 // CHECK: [[ALIGNCAST6:%.+]] = trunc i128 %{{.*}} to i64
-// CHECK: [[ISPOS6:%.+]] = icmp sgt i64 [[ALIGNCAST6]], 0
-// CHECK: [[POSMASK6:%.+]] = sub i64 [[ALIGNCAST6]], 1
-// CHECK: [[MASK6:%.+]] = select i1 [[ISPOS6]], i64 [[POSMASK6]], i64 0
+// CHECK: [[MASK6:%.+]] = sub i64 [[ALIGNCAST6]], 1
 // CHECK: [[PTRINT6:%.+]] = ptrtoint
 // CHECK: [[MASKEDPTR6:%.+]] = and i64 [[PTRINT6]], [[MASK6]]
 // CHECK: [[MASKCOND6:%.+]] = icmp eq i64 [[MASKEDPTR6]], 0
diff --git a/test/CodeGen/alloc-size.c b/test/CodeGen/alloc-size.c
index 1c98b6874d..16cc0fe118 100644
--- a/test/CodeGen/alloc-size.c
+++ b/test/CodeGen/alloc-size.c
@@ -1,4 +1,11 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - 2>&1 | FileCheck %s
+// RUN: %clang_cc1           -triple x86_64-apple-darwin -emit-llvm %s -o - 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -DDYNAMIC -triple x86_64-apple-darwin -emit-llvm %s -o - 2>&1 | FileCheck %s
+
+#ifdef DYNAMIC
+#define OBJECT_SIZE_BUILTIN __builtin_dynamic_object_size
+#else
+#define OBJECT_SIZE_BUILTIN __builtin_object_size
+#endif
 
 #define NULL ((void *)0)
 
@@ -20,86 +27,86 @@ void *my_calloc(size_t, size_t) __attribute__((alloc_size(1, 2)));
 void test1() {
   void *const vp = my_malloc(100);
   // CHECK: store i32 100
-  gi = __builtin_object_size(vp, 0);
+  gi = OBJECT_SIZE_BUILTIN(vp, 0);
   // CHECK: store i32 100
-  gi = __builtin_object_size(vp, 1);
+  gi = OBJECT_SIZE_BUILTIN(vp, 1);
   // CHECK: store i32 100
-  gi = __builtin_object_size(vp, 2);
+  gi = OBJECT_SIZE_BUILTIN(vp, 2);
   // CHECK: store i32 100
-  gi = __builtin_object_size(vp, 3);
+  gi = OBJECT_SIZE_BUILTIN(vp, 3);
 
   void *const arr = my_calloc(100, 5);
   // CHECK: store i32 500
-  gi = __builtin_object_size(arr, 0);
+  gi = OBJECT_SIZE_BUILTIN(arr, 0);
   // CHECK: store i32 500
-  gi = __builtin_object_size(arr, 1);
+  gi = OBJECT_SIZE_BUILTIN(arr, 1);
   // CHECK: store i32 500
-  gi = __builtin_object_size(arr, 2);
+  gi = OBJECT_SIZE_BUILTIN(arr, 2);
   // CHECK: store i32 500
-  gi = __builtin_object_size(arr, 3);
+  gi = OBJECT_SIZE_BUILTIN(arr, 3);
 
   // CHECK: store i32 100
-  gi = __builtin_object_size(my_malloc(100), 0);
+  gi = OBJECT_SIZE_BUILTIN(my_malloc(100), 0);
   // CHECK: store i32 100
-  gi = __builtin_object_size(my_malloc(100), 1);
+  gi = OBJECT_SIZE_BUILTIN(my_malloc(100), 1);
   // CHECK: store i32 100
-  gi = __builtin_object_size(my_malloc(100), 2);
+  gi = OBJECT_SIZE_BUILTIN(my_malloc(100), 2);
   // CHECK: store i32 100
-  gi = __builtin_object_size(my_malloc(100), 3);
+  gi = OBJECT_SIZE_BUILTIN(my_malloc(100), 3);
 
   // CHECK: store i32 500
-  gi = __builtin_object_size(my_calloc(100, 5), 0);
+  gi = OBJECT_SIZE_BUILTIN(my_calloc(100, 5), 0);
   // CHECK: store i32 500
-  gi = __builtin_object_size(my_calloc(100, 5), 1);
+  gi = OBJECT_SIZE_BUILTIN(my_calloc(100, 5), 1);
   // CHECK: store i32 500
-  gi = __builtin_object_size(my_calloc(100, 5), 2);
+  gi = OBJECT_SIZE_BUILTIN(my_calloc(100, 5), 2);
   // CHECK: store i32 500
-  gi = __builtin_object_size(my_calloc(100, 5), 3);
+  gi = OBJECT_SIZE_BUILTIN(my_calloc(100, 5), 3);
 
   void *const zeroPtr = my_malloc(0);
   // CHECK: store i32 0
-  gi = __builtin_object_size(zeroPtr, 0);
+  gi = OBJECT_SIZE_BUILTIN(zeroPtr, 0);
   // CHECK: store i32 0
-  gi = __builtin_object_size(my_malloc(0), 0);
+  gi = OBJECT_SIZE_BUILTIN(my_malloc(0), 0);
 
   void *const zeroArr1 = my_calloc(0, 1);
   void *const zeroArr2 = my_calloc(1, 0);
   // CHECK: store i32 0
-  gi = __builtin_object_size(zeroArr1, 0);
+  gi = OBJECT_SIZE_BUILTIN(zeroArr1, 0);
   // CHECK: store i32 0
-  gi = __builtin_object_size(zeroArr2, 0);
+  gi = OBJECT_SIZE_BUILTIN(zeroArr2, 0);
   // CHECK: store i32 0
-  gi = __builtin_object_size(my_calloc(1, 0), 0);
+  gi = OBJECT_SIZE_BUILTIN(my_calloc(1, 0), 0);
   // CHECK: store i32 0
-  gi = __builtin_object_size(my_calloc(0, 1), 0);
+  gi = OBJECT_SIZE_BUILTIN(my_calloc(0, 1), 0);
 }
 
 // CHECK-LABEL: @test2
 void test2() {
   void *const vp = my_malloc(gi);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(vp, 0);
+  gi = OBJECT_SIZE_BUILTIN(vp, 0);
 
   void *const arr1 = my_calloc(gi, 1);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(arr1, 0);
+  gi = OBJECT_SIZE_BUILTIN(arr1, 0);
 
   void *const arr2 = my_calloc(1, gi);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(arr2, 0);
+  gi = OBJECT_SIZE_BUILTIN(arr2, 0);
 }
 
 // CHECK-LABEL: @test3
 void test3() {
   char *const buf = (char *)my_calloc(100, 5);
   // CHECK: store i32 500
-  gi = __builtin_object_size(buf, 0);
+  gi = OBJECT_SIZE_BUILTIN(buf, 0);
   // CHECK: store i32 500
-  gi = __builtin_object_size(buf, 1);
+  gi = OBJECT_SIZE_BUILTIN(buf, 1);
   // CHECK: store i32 500
-  gi = __builtin_object_size(buf, 2);
+  gi = OBJECT_SIZE_BUILTIN(buf, 2);
   // CHECK: store i32 500
-  gi = __builtin_object_size(buf, 3);
+  gi = OBJECT_SIZE_BUILTIN(buf, 3);
 }
 
 struct Data {
@@ -113,41 +120,41 @@ struct Data {
 void test5() {
   struct Data *const data = my_malloc(sizeof(*data));
   // CHECK: store i32 48
-  gi = __builtin_object_size(data, 0);
+  gi = OBJECT_SIZE_BUILTIN(data, 0);
   // CHECK: store i32 48
-  gi = __builtin_object_size(data, 1);
+  gi = OBJECT_SIZE_BUILTIN(data, 1);
   // CHECK: store i32 48
-  gi = __builtin_object_size(data, 2);
+  gi = OBJECT_SIZE_BUILTIN(data, 2);
   // CHECK: store i32 48
-  gi = __builtin_object_size(data, 3);
+  gi = OBJECT_SIZE_BUILTIN(data, 3);
 
   // CHECK: store i32 40
-  gi = __builtin_object_size(&data->t[1], 0);
+  gi = OBJECT_SIZE_BUILTIN(&data->t[1], 0);
   // CHECK: store i32 36
-  gi = __builtin_object_size(&data->t[1], 1);
+  gi = OBJECT_SIZE_BUILTIN(&data->t[1], 1);
   // CHECK: store i32 40
-  gi = __builtin_object_size(&data->t[1], 2);
+  gi = OBJECT_SIZE_BUILTIN(&data->t[1], 2);
   // CHECK: store i32 36
-  gi = __builtin_object_size(&data->t[1], 3);
+  gi = OBJECT_SIZE_BUILTIN(&data->t[1], 3);
 
   struct Data *const arr = my_calloc(sizeof(*data), 2);
   // CHECK: store i32 96
-  gi = __builtin_object_size(arr, 0);
+  gi = OBJECT_SIZE_BUILTIN(arr, 0);
   // CHECK: store i32 96
-  gi = __builtin_object_size(arr, 1);
+  gi = OBJECT_SIZE_BUILTIN(arr, 1);
   // CHECK: store i32 96
-  gi = __builtin_object_size(arr, 2);
+  gi = OBJECT_SIZE_BUILTIN(arr, 2);
   // CHECK: store i32 96
-  gi = __builtin_object_size(arr, 3);
+  gi = OBJECT_SIZE_BUILTIN(arr, 3);
 
   // CHECK: store i32 88
-  gi = __builtin_object_size(&arr->t[1], 0);
+  gi = OBJECT_SIZE_BUILTIN(&arr->t[1], 0);
   // CHECK: store i32 36
-  gi = __builtin_object_size(&arr->t[1], 1);
+  gi = OBJECT_SIZE_BUILTIN(&arr->t[1], 1);
   // CHECK: store i32 88
-  gi = __builtin_object_size(&arr->t[1], 2);
+  gi = OBJECT_SIZE_BUILTIN(&arr->t[1], 2);
   // CHECK: store i32 36
-  gi = __builtin_object_size(&arr->t[1], 3);
+  gi = OBJECT_SIZE_BUILTIN(&arr->t[1], 3);
 }
 
 // CHECK-LABEL: @test6
@@ -156,13 +163,13 @@ void test6() {
   // so when we know the source of the allocation.
   struct Data *const data = my_malloc(sizeof(*data) + 10);
   // CHECK: store i32 11
-  gi = __builtin_object_size(data->end, 0);
+  gi = OBJECT_SIZE_BUILTIN(data->end, 0);
   // CHECK: store i32 11
-  gi = __builtin_object_size(data->end, 1);
+  gi = OBJECT_SIZE_BUILTIN(data->end, 1);
   // CHECK: store i32 11
-  gi = __builtin_object_size(data->end, 2);
+  gi = OBJECT_SIZE_BUILTIN(data->end, 2);
   // CHECK: store i32 11
-  gi = __builtin_object_size(data->end, 3);
+  gi = OBJECT_SIZE_BUILTIN(data->end, 3);
 
   struct Data *const arr = my_calloc(sizeof(*arr) + 5, 3);
   // AFAICT, GCC treats malloc and calloc identically. So, we should do the
@@ -178,67 +185,67 @@ void test6() {
   // or "allocate M smaller `Data`s with extra padding".
 
   // CHECK: store i32 112
-  gi = __builtin_object_size(arr->end, 0);
+  gi = OBJECT_SIZE_BUILTIN(arr->end, 0);
   // CHECK: store i32 112
-  gi = __builtin_object_size(arr->end, 1);
+  gi = OBJECT_SIZE_BUILTIN(arr->end, 1);
   // CHECK: store i32 112
-  gi = __builtin_object_size(arr->end, 2);
+  gi = OBJECT_SIZE_BUILTIN(arr->end, 2);
   // CHECK: store i32 112
-  gi = __builtin_object_size(arr->end, 3);
+  gi = OBJECT_SIZE_BUILTIN(arr->end, 3);
 
   // CHECK: store i32 112
-  gi = __builtin_object_size(arr[0].end, 0);
+  gi = OBJECT_SIZE_BUILTIN(arr[0].end, 0);
   // CHECK: store i32 112
-  gi = __builtin_object_size(arr[0].end, 1);
+  gi = OBJECT_SIZE_BUILTIN(arr[0].end, 1);
   // CHECK: store i32 112
-  gi = __builtin_object_size(arr[0].end, 2);
+  gi = OBJECT_SIZE_BUILTIN(arr[0].end, 2);
   // CHECK: store i32 112
-  gi = __builtin_object_size(arr[0].end, 3);
+  gi = OBJECT_SIZE_BUILTIN(arr[0].end, 3);
 
   // CHECK: store i32 64
-  gi = __builtin_object_size(arr[1].end, 0);
+  gi = OBJECT_SIZE_BUILTIN(arr[1].end, 0);
   // CHECK: store i32 64
-  gi = __builtin_object_size(arr[1].end, 1);
+  gi = OBJECT_SIZE_BUILTIN(arr[1].end, 1);
   // CHECK: store i32 64
-  gi = __builtin_object_size(arr[1].end, 2);
+  gi = OBJECT_SIZE_BUILTIN(arr[1].end, 2);
   // CHECK: store i32 64
-  gi = __builtin_object_size(arr[1].end, 3);
+  gi = OBJECT_SIZE_BUILTIN(arr[1].end, 3);
 
   // CHECK: store i32 16
-  gi = __builtin_object_size(arr[2].end, 0);
+  gi = OBJECT_SIZE_BUILTIN(arr[2].end, 0);
   // CHECK: store i32 16
-  gi = __builtin_object_size(arr[2].end, 1);
+  gi = OBJECT_SIZE_BUILTIN(arr[2].end, 1);
   // CHECK: store i32 16
-  gi = __builtin_object_size(arr[2].end, 2);
+  gi = OBJECT_SIZE_BUILTIN(arr[2].end, 2);
   // CHECK: store i32 16
-  gi = __builtin_object_size(arr[2].end, 3);
+  gi = OBJECT_SIZE_BUILTIN(arr[2].end, 3);
 }
 
 // CHECK-LABEL: @test7
 void test7() {
   struct Data *const data = my_malloc(sizeof(*data) + 5);
   // CHECK: store i32 9
-  gi = __builtin_object_size(data->pad, 0);
+  gi = OBJECT_SIZE_BUILTIN(data->pad, 0);
   // CHECK: store i32 3
-  gi = __builtin_object_size(data->pad, 1);
+  gi = OBJECT_SIZE_BUILTIN(data->pad, 1);
   // CHECK: store i32 9
-  gi = __builtin_object_size(data->pad, 2);
+  gi = OBJECT_SIZE_BUILTIN(data->pad, 2);
   // CHECK: store i32 3
-  gi = __builtin_object_size(data->pad, 3);
+  gi = OBJECT_SIZE_BUILTIN(data->pad, 3);
 }
 
 // CHECK-LABEL: @test8
 void test8() {
   // Non-const pointers aren't currently supported.
   void *buf = my_calloc(100, 5);
-  // CHECK: @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(buf, 0);
+  // CHECK: @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(buf, 0);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(buf, 1);
+  gi = OBJECT_SIZE_BUILTIN(buf, 1);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(buf, 2);
+  gi = OBJECT_SIZE_BUILTIN(buf, 2);
   // CHECK: store i32 0
-  gi = __builtin_object_size(buf, 3);
+  gi = OBJECT_SIZE_BUILTIN(buf, 3);
 }
 
 // CHECK-LABEL: @test9
@@ -249,11 +256,11 @@ void test9() {
   short *const buf2 = ((short*)(my_malloc(100)));
 
   // CHECK: store i32 100
-  gi = __builtin_object_size(buf0, 0);
+  gi = OBJECT_SIZE_BUILTIN(buf0, 0);
   // CHECK: store i32 100
-  gi = __builtin_object_size(buf1, 0);
+  gi = OBJECT_SIZE_BUILTIN(buf1, 0);
   // CHECK: store i32 100
-  gi = __builtin_object_size(buf2, 0);
+  gi = OBJECT_SIZE_BUILTIN(buf2, 0);
 }
 
 // CHECK-LABEL: @test10
@@ -261,36 +268,36 @@ void test10() {
   // Yay overflow
   short *const arr = my_calloc((size_t)-1 / 2 + 1, 2);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(arr, 0);
+  gi = OBJECT_SIZE_BUILTIN(arr, 0);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(arr, 1);
+  gi = OBJECT_SIZE_BUILTIN(arr, 1);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(arr, 2);
+  gi = OBJECT_SIZE_BUILTIN(arr, 2);
   // CHECK: store i32 0
-  gi = __builtin_object_size(arr, 3);
+  gi = OBJECT_SIZE_BUILTIN(arr, 3);
 
   // As an implementation detail, CharUnits can't handle numbers greater than or
   // equal to 2**63. Realistically, this shouldn't be a problem, but we should
   // be sure we don't emit crazy results for this case.
   short *const buf = my_malloc((size_t)-1);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(buf, 0);
+  gi = OBJECT_SIZE_BUILTIN(buf, 0);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(buf, 1);
+  gi = OBJECT_SIZE_BUILTIN(buf, 1);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(buf, 2);
+  gi = OBJECT_SIZE_BUILTIN(buf, 2);
   // CHECK: store i32 0
-  gi = __builtin_object_size(buf, 3);
+  gi = OBJECT_SIZE_BUILTIN(buf, 3);
 
   short *const arr_big = my_calloc((size_t)-1 / 2 - 1, 2);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(arr_big, 0);
+  gi = OBJECT_SIZE_BUILTIN(arr_big, 0);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(arr_big, 1);
+  gi = OBJECT_SIZE_BUILTIN(arr_big, 1);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(arr_big, 2);
+  gi = OBJECT_SIZE_BUILTIN(arr_big, 2);
   // CHECK: store i32 0
-  gi = __builtin_object_size(arr_big, 3);
+  gi = OBJECT_SIZE_BUILTIN(arr_big, 3);
 }
 
 void *my_tiny_malloc(char) __attribute__((alloc_size(1)));
@@ -300,25 +307,25 @@ void *my_tiny_calloc(char, char) __attribute__((alloc_size(1, 2)));
 void test11() {
   void *const vp = my_tiny_malloc(100);
   // CHECK: store i32 100
-  gi = __builtin_object_size(vp, 0);
+  gi = OBJECT_SIZE_BUILTIN(vp, 0);
   // CHECK: store i32 100
-  gi = __builtin_object_size(vp, 1);
+  gi = OBJECT_SIZE_BUILTIN(vp, 1);
   // CHECK: store i32 100
-  gi = __builtin_object_size(vp, 2);
+  gi = OBJECT_SIZE_BUILTIN(vp, 2);
   // CHECK: store i32 100
-  gi = __builtin_object_size(vp, 3);
+  gi = OBJECT_SIZE_BUILTIN(vp, 3);
 
   // N.B. This causes char overflow, but not size_t overflow, so it should be
   // supported.
   void *const arr = my_tiny_calloc(100, 5);
   // CHECK: store i32 500
-  gi = __builtin_object_size(arr, 0);
+  gi = OBJECT_SIZE_BUILTIN(arr, 0);
   // CHECK: store i32 500
-  gi = __builtin_object_size(arr, 1);
+  gi = OBJECT_SIZE_BUILTIN(arr, 1);
   // CHECK: store i32 500
-  gi = __builtin_object_size(arr, 2);
+  gi = OBJECT_SIZE_BUILTIN(arr, 2);
   // CHECK: store i32 500
-  gi = __builtin_object_size(arr, 3);
+  gi = OBJECT_SIZE_BUILTIN(arr, 3);
 }
 
 void *my_signed_malloc(long) __attribute__((alloc_size(1)));
@@ -327,26 +334,35 @@ void *my_signed_calloc(long, long) __attribute__((alloc_size(1, 2)));
 // CHECK-LABEL: @test12
 void test12() {
   // CHECK: store i32 100
-  gi = __builtin_object_size(my_signed_malloc(100), 0);
+  gi = OBJECT_SIZE_BUILTIN(my_signed_malloc(100), 0);
   // CHECK: store i32 500
-  gi = __builtin_object_size(my_signed_calloc(100, 5), 0);
+  gi = OBJECT_SIZE_BUILTIN(my_signed_calloc(100, 5), 0);
 
   void *const vp = my_signed_malloc(-2);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(vp, 0);
+  gi = OBJECT_SIZE_BUILTIN(vp, 0);
   // N.B. These get lowered to -1 because the function calls may have
   // side-effects, and we can't determine the objectsize.
   // CHECK: store i32 -1
-  gi = __builtin_object_size(my_signed_malloc(-2), 0);
+  gi = OBJECT_SIZE_BUILTIN(my_signed_malloc(-2), 0);
 
   void *const arr1 = my_signed_calloc(-2, 1);
   void *const arr2 = my_signed_calloc(1, -2);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(arr1, 0);
+  gi = OBJECT_SIZE_BUILTIN(arr1, 0);
   // CHECK: @llvm.objectsize
-  gi = __builtin_object_size(arr2, 0);
+  gi = OBJECT_SIZE_BUILTIN(arr2, 0);
   // CHECK: store i32 -1
-  gi = __builtin_object_size(my_signed_calloc(1, -2), 0);
+  gi = OBJECT_SIZE_BUILTIN(my_signed_calloc(1, -2), 0);
   // CHECK: store i32 -1
-  gi = __builtin_object_size(my_signed_calloc(-2, 1), 0);
+  gi = OBJECT_SIZE_BUILTIN(my_signed_calloc(-2, 1), 0);
+}
+
+void *alloc_uchar(unsigned char) __attribute__((alloc_size(1)));
+
+// CHECK-LABEL: @test13
+void test13() {
+  // If 128 were incorrectly seen as negative, the result would become -1.
+  // CHECK: store i32 128,
+  gi = OBJECT_SIZE_BUILTIN(alloc_uchar(128), 0);
 }
diff --git a/test/CodeGen/annotations-builtin.c b/test/CodeGen/annotations-builtin.c
index 8a3b3ffcec..e6dd3587af 100644
--- a/test/CodeGen/annotations-builtin.c
+++ b/test/CodeGen/annotations-builtin.c
@@ -43,4 +43,7 @@ int main(int argc, char **argv) {
 // CHECK: call i32 @llvm.annotation.i32
 // CHECK: inttoptr {{.*}} to i8**
     return 0;
+
+    int after_return = __builtin_annotation(argc, "annotation_a");
+// CHECK-NOT: call i32 @llvm.annotation.i32
 }
diff --git a/test/CodeGen/annotations-var.c b/test/CodeGen/annotations-var.c
index 6e8ad34c65..3dc6dca3e7 100644
--- a/test/CodeGen/annotations-var.c
+++ b/test/CodeGen/annotations-var.c
@@ -39,10 +39,19 @@ void local(void) {
 // LOCAL-NEXT: call void @llvm.var.annotation(i8* [[T0]], i8* getelementptr inbounds ([15 x i8], [15 x i8]* @{{.*}}), i8* getelementptr inbounds ({{.*}}), i32 33)
 }
 
+void local_after_return(void) {
+    return;
+    int localvar __attribute__((annotate("localvar_after_return"))) = 3;
+// Test we are not emitting instructions like bitcast or call outside of a basic block.
+// LOCAL-LABEL: define void @local_after_return()
+// LOCAL:      [[LOCALVAR:%.*]] = alloca i32,
+// LOCAL-NEXT: ret void
+}
+
 void undef(void) {
     int undefvar __attribute__((annotate("undefvar_ann_0")));
 // UNDEF-LABEL: define void @undef()
 // UNDEF:      [[UNDEFVAR:%.*]] = alloca i32,
 // UNDEF-NEXT: [[T0:%.*]] = bitcast i32* [[UNDEFVAR]] to i8*
-// UNDEF-NEXT: call void @llvm.var.annotation(i8* [[T0]], i8* getelementptr inbounds ([15 x i8], [15 x i8]* @{{.*}}), i8* getelementptr inbounds ({{.*}}), i32 43)
+// UNDEF-NEXT: call void @llvm.var.annotation(i8* [[T0]], i8* getelementptr inbounds ([15 x i8], [15 x i8]* @{{.*}}), i8* getelementptr inbounds ({{.*}}), i32 52)
 }
diff --git a/test/CodeGen/arm-target-features.c b/test/CodeGen/arm-target-features.c
index ca574cc05d..f58d37824a 100644
--- a/test/CodeGen/arm-target-features.c
+++ b/test/CodeGen/arm-target-features.c
@@ -31,6 +31,7 @@
 // CHECK-BASIC-V8: "target-features"="+armv8-a,+crc,+crypto,+dsp,+fp-armv8,+hwdiv,+hwdiv-arm,+neon,+thumb-mode"
 
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V82
+// RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V82
 // CHECK-BASIC-V82: "target-features"="+armv8.2-a,+crc,+crypto,+dotprod,+dsp,+fp-armv8,+hwdiv,+hwdiv-arm,+neon,+ras,+thumb-mode"
 
 // RUN: %clang_cc1 -triple armv8-linux-gnueabi -target-cpu cortex-a53 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8-ARM
diff --git a/test/CodeGen/arm64-crc32.c b/test/CodeGen/arm64-crc32.c
index 2d913fb123..26d69a23b6 100644
--- a/test/CodeGen/arm64-crc32.c
+++ b/test/CodeGen/arm64-crc32.c
@@ -1,54 +1,57 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple arm64-none-linux-gnu \
 // RUN:  -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-windows \
+// RUN:  -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+#include <stdint.h>
 
-int crc32b(int a, char b)
+uint32_t crc32b(uint32_t a, uint8_t b)
 {
         return __builtin_arm_crc32b(a,b);
 // CHECK: [[T0:%[0-9]+]] = zext i8 %b to i32
 // CHECK: call i32 @llvm.aarch64.crc32b(i32 %a, i32 [[T0]])
 }
 
-int crc32cb(int a, char b)
+uint32_t crc32cb(uint32_t a, uint8_t b)
 {
         return __builtin_arm_crc32cb(a,b);
 // CHECK: [[T0:%[0-9]+]] = zext i8 %b to i32
 // CHECK: call i32 @llvm.aarch64.crc32cb(i32 %a, i32 [[T0]])
 }
 
-int crc32h(int a, short b)
+uint32_t crc32h(uint32_t a, uint16_t b)
 {
         return __builtin_arm_crc32h(a,b);
 // CHECK: [[T0:%[0-9]+]] = zext i16 %b to i32
 // CHECK: call i32 @llvm.aarch64.crc32h(i32 %a, i32 [[T0]])
 }
 
-int crc32ch(int a, short b)
+uint32_t crc32ch(uint32_t a, uint16_t b)
 {
         return __builtin_arm_crc32ch(a,b);
 // CHECK: [[T0:%[0-9]+]] = zext i16 %b to i32
 // CHECK: call i32 @llvm.aarch64.crc32ch(i32 %a, i32 [[T0]])
 }
 
-int crc32w(int a, int b)
+uint32_t crc32w(uint32_t a, uint32_t b)
 {
         return __builtin_arm_crc32w(a,b);
 // CHECK: call i32 @llvm.aarch64.crc32w(i32 %a, i32 %b)
 }
 
-int crc32cw(int a, int b)
+uint32_t crc32cw(uint32_t a, uint32_t b)
 {
         return __builtin_arm_crc32cw(a,b);
 // CHECK: call i32 @llvm.aarch64.crc32cw(i32 %a, i32 %b)
 }
 
-int crc32d(int a, long b)
+uint32_t crc32d(uint32_t a, uint64_t b)
 {
         return __builtin_arm_crc32d(a,b);
 // CHECK: call i32 @llvm.aarch64.crc32x(i32 %a, i64 %b)
 }
 
-int crc32cd(int a, long b)
+uint32_t crc32cd(uint32_t a, uint64_t b)
 {
         return __builtin_arm_crc32cd(a,b);
 // CHECK: call i32 @llvm.aarch64.crc32cx(i32 %a, i64 %b)
diff --git a/test/CodeGen/arm64-microsoft-arguments.cpp b/test/CodeGen/arm64-microsoft-arguments.cpp
index 3ef468880a..356bd8c974 100644
--- a/test/CodeGen/arm64-microsoft-arguments.cpp
+++ b/test/CodeGen/arm64-microsoft-arguments.cpp
@@ -1,25 +1,203 @@
 // RUN: %clang_cc1 -triple aarch64-windows -ffreestanding -emit-llvm -O0 \
 // RUN: -x c++ -o - %s | FileCheck %s
 
-struct pod { int a, b, c, d, e; };
+// Pass and return for type size <= 8 bytes.
+// CHECK: define {{.*}} i64 @{{.*}}f1{{.*}}()
+// CHECK: call i64 {{.*}}func1{{.*}}(i64 %3)
+struct S1 {
+  int a[2];
+};
+
+S1 func1(S1 x);
+S1 f1() {
+  S1 x;
+  return func1(x);
+}
+
+// Pass and return type size <= 16 bytes.
+// CHECK: define {{.*}} [2 x i64] @{{.*}}f2{{.*}}()
+// CHECK: call [2 x i64] {{.*}}func2{{.*}}([2 x i64] %3)
+struct S2 {
+  int a[4];
+};
+
+S2 func2(S2 x);
+S2 f2() {
+  S2 x;
+  return func2(x);
+}
+
+// Pass and return for type size > 16 bytes.
+// CHECK: define {{.*}} void @{{.*}}f3{{.*}}(%struct.S3* noalias sret %agg.result)
+// CHECK: call void {{.*}}func3{{.*}}(%struct.S3* sret %agg.result, %struct.S3* %agg.tmp)
+struct S3 {
+  int a[5];
+};
+
+S3 func3(S3 x);
+S3 f3() {
+  S3 x;
+  return func3(x);
+}
+
+// Pass and return aggregate (of size < 16 bytes) with non-trivial destructor.
+// Passed directly but returned indirectly.
+// CHECK: define {{.*}} void {{.*}}f4{{.*}}(%struct.S4* inreg noalias sret %agg.result)
+// CHECK: call void {{.*}}func4{{.*}}(%struct.S4* inreg sret %agg.result, [2 x i64] %4)
+struct S4 {
+  int a[3];
+  ~S4();
+};
+
+S4 func4(S4 x);
+S4 f4() {
+  S4 x;
+  return func4(x);
+}
+
+// Pass and return from instance method called from instance method.
+// CHECK: define {{.*}} void @{{.*}}bar@Q1{{.*}}(%class.Q1* %this, %class.P1* inreg noalias sret %agg.result)
+// CHECK: call void {{.*}}foo@P1{{.*}}(%class.P1* %ref.tmp, %class.P1* inreg sret %agg.result, i8 %0)
+
+class P1 {
+public:
+  P1 foo(P1 x);
+};
+
+class Q1 {
+public:
+  P1 bar();
+};
+
+P1 Q1::bar() {
+  P1 p1;
+  return P1().foo(p1);
+}
+
+// Pass and return from instance method called from free function.
+// CHECK: define {{.*}} void {{.*}}bar{{.*}}()
+// CHECK: call void {{.*}}foo@P2{{.*}}(%class.P2* %ref.tmp, %class.P2* inreg sret %retval, i8 %0)
+class P2 {
+public:
+  P2 foo(P2 x);
+};
+
+P2 bar() {
+  P2 p2;
+  return P2().foo(p2);
+}
 
-struct non_pod {
-  int a;
-  non_pod() {}
+// Pass and return an object with a user-provided constructor (passed directly,
+// returned indirectly)
+// CHECK: define {{.*}} void @{{.*}}f5{{.*}}(%struct.S5* inreg noalias sret %agg.result)
+// CHECK: call void {{.*}}func5{{.*}}(%struct.S5* inreg sret %agg.result, i64 {{.*}})
+struct S5 {
+  S5();
+  int x;
 };
 
-struct pod s;
-struct non_pod t;
+S5 func5(S5 x);
+S5 f5() {
+  S5 x;
+  return func5(x);
+}
 
-struct pod bar() { return s; }
-struct non_pod foo() { return t; }
-// CHECK: define {{.*}} void @{{.*}}bar{{.*}}(%struct.pod* noalias sret %agg.result)
-// CHECK: define {{.*}} void @{{.*}}foo{{.*}}(%struct.non_pod* noalias %agg.result)
+// Pass and return an object with a non-trivial explicitly defaulted constructor
+// (passed directly, returned directly)
+// CHECK: define {{.*}} i64 @"?f6@@YA?AUS6@@XZ"()
+// CHECK: call i64 {{.*}}func6{{.*}}(i64 {{.*}})
+struct S6a {
+  S6a();
+};
+
+struct S6 {
+  S6() = default;
+  S6a x;
+};
+
+S6 func6(S6 x);
+S6 f6() {
+  S6 x;
+  return func6(x);
+}
+
+// Pass and return an object with a non-trivial implicitly defaulted constructor
+// (passed directly, returned directly)
+// CHECK: define {{.*}} i64 @"?f7@@YA?AUS7@@XZ"()
+// CHECK: call i64 {{.*}}func7{{.*}}(i64 {{.*}})
+struct S7 {
+  S6a x;
+};
+
+S7 func7(S7 x);
+S7 f7() {
+  S7 x;
+  return func7(x);
+}
+
+struct S8a {
+  ~S8a();
+};
+
+// Pass and return an object with a non-trivial default destructor (passed
+// directly, returne indirectly)
+struct S8 {
+  S8a x;
+  int y;
+};
+
+// CHECK: define {{.*}} void {{.*}}?f8{{.*}}(%struct.S8* inreg noalias sret {{.*}})
+// CHECK: call void {{.*}}func8{{.*}}(%struct.S8* inreg sret {{.*}}, i64 {{.*}})
+S8 func8(S8 x);
+S8 f8() {
+  S8 x;
+  return func8(x);
+}
+
+
+// Pass and return an object with a non-trivial copy-assignment operator and
+// a trivial copy constructor (passed directly, returned indirectly)
+// CHECK: define {{.*}} void @"?f9@@YA?AUS9@@XZ"(%struct.S9* inreg noalias sret {{.*}})
+// CHECK: call void {{.*}}func9{{.*}}(%struct.S9* inreg sret {{.*}}, i64 {{.*}})
+struct S9 {
+  S9& operator=(const S9&);
+  int x;
+};
 
+S9 func9(S9 x);
+S9 f9() {
+  S9 x;
+  S9 y = x;
+  x = y;
+  return func9(x);
+}
 
-// Check instance methods.
-struct pod2 { int x; };
-struct Baz { pod2 baz(); };
+// Pass and return an object with a base class (passed directly, returned
+// indirectly).
+// CHECK: define dso_local void {{.*}}f10{{.*}}(%struct.S10* inreg noalias sret {{.*}})
+// CHECK: call void {{.*}}func10{{.*}}(%struct.S10* inreg sret {{.*}}, [2 x i64] {{.*}})
+struct S10 : public S1 {
+  int x;
+};
+
+S10 func10(S10 x);
+S10 f10() {
+  S10 x;
+  return func10(x);
+}
+
+
+// Pass and return a non aggregate object exceeding > 128 bits (passed
+// indirectly, returned indirectly)
+// CHECK: define dso_local void {{.*}}f11{{.*}}(%struct.S11* inreg noalias sret {{.*}})
+// CHECK: call void {{.*}}func11{{.*}}(%struct.S11* inreg sret {{.*}}, %struct.S11* {{.*}})
+struct S11 {
+  virtual void f();
+  int a[5];
+};
 
-int qux() { return Baz().baz().x; }
-// CHECK: declare {{.*}} void @{{.*}}baz@Baz{{.*}}(%struct.Baz*, %struct.pod2*)
+S11 func11(S11 x);
+S11 f11() {
+  S11 x;
+  return func11(x);
+}
diff --git a/test/CodeGen/arm64-microsoft-status-reg.cpp b/test/CodeGen/arm64-microsoft-status-reg.cpp
index eb59bae50f..ee8439cbec 100644
--- a/test/CodeGen/arm64-microsoft-status-reg.cpp
+++ b/test/CodeGen/arm64-microsoft-status-reg.cpp
@@ -23,88 +23,112 @@
 #define ARM64_TPIDRRO_EL0       ARM64_SYSREG(3,3,13, 0,3)  // Thread ID Register, User Read Only [CP15_TPIDRURO]
 #define ARM64_TPIDR_EL1         ARM64_SYSREG(3,0,13, 0,4)  // Thread ID Register, Privileged Only [CP15_TPIDRPRW]
 
-void check_ReadWriteStatusReg(int v) {
-  int ret;
+// From intrin.h
+__int64 _ReadStatusReg(int);
+void _WriteStatusReg(int, __int64);
+
+void check_ReadWriteStatusReg(__int64 v) {
+  __int64 ret;
   ret = _ReadStatusReg(ARM64_CNTVCT);
 // CHECK-ASM: mrs     x8, CNTVCT_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD2:.*]])
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD2:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_PMCCNTR_EL0);
 // CHECK-ASM: mrs     x8, PMCCNTR_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD3:.*]])
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD3:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_PMSELR_EL0);
 // CHECK-ASM: mrs     x8, PMSELR_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD4:.*]])
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD4:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_PMXEVCNTR_EL0);
 // CHECK-ASM: mrs     x8, PMXEVCNTR_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD5:.*]])
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD5:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_PMXEVCNTRn_EL0(0));
 // CHECK-ASM: mrs     x8, PMEVCNTR0_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD6:.*]])
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD6:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_PMXEVCNTRn_EL0(1));
 // CHECK-ASM: mrs     x8, PMEVCNTR1_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD7:.*]])
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD7:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_PMXEVCNTRn_EL0(30));
 // CHECK-ASM: mrs     x8, PMEVCNTR30_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD8:.*]])
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD8:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_TPIDR_EL0);
 // CHECK-ASM: mrs     x8, TPIDR_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD9:.*]])
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD9:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_TPIDRRO_EL0);
 // CHECK-ASM: mrs     x8, TPIDRRO_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD10:.*]])
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD10:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_TPIDR_EL1);
 // CHECK-ASM: mrs     x8, TPIDR_EL1
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD11:.*]])
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD11:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
 
   _WriteStatusReg(ARM64_CNTVCT, v);
 // CHECK-ASM: msr     S3_3_C14_C0_2, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD2:.*]], i64 {{%.*}})
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD2:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_PMCCNTR_EL0, v);
 // CHECK-ASM: msr     PMCCNTR_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD3:.*]], i64 {{%.*}})
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD3:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_PMSELR_EL0, v);
 // CHECK-ASM: msr     PMSELR_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD4:.*]], i64 {{%.*}})
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD4:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_PMXEVCNTR_EL0, v);
 // CHECK-ASM: msr     PMXEVCNTR_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD5:.*]], i64 {{%.*}})
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD5:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_PMXEVCNTRn_EL0(0), v);
 // CHECK-ASM: msr     PMEVCNTR0_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD6:.*]], i64 {{%.*}})
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD6:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_PMXEVCNTRn_EL0(1), v);
 // CHECK-ASM: msr     PMEVCNTR1_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD7:.*]], i64 {{%.*}})
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD7:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_PMXEVCNTRn_EL0(30), v);
 // CHECK-ASM: msr     PMEVCNTR30_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD8:.*]], i64 {{%.*}})
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD8:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_TPIDR_EL0, v);
 // CHECK-ASM: msr     TPIDR_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD9:.*]], i64 {{%.*}})
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD9:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_TPIDRRO_EL0, v);
 // CHECK-ASM: msr     TPIDRRO_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD10:.*]], i64 {{%.*}})
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD10:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_TPIDR_EL1, v);
 // CHECK-ASM: msr     TPIDR_EL1, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD11:.*]], i64 {{%.*}})
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD11:.*]], i64 %[[VAR]])
 }
 
 // CHECK-IR: ![[MD2]] = !{!"3:3:14:0:2"}
diff --git a/test/CodeGen/arm64-microsoft-struct-align.cpp b/test/CodeGen/arm64-microsoft-struct-align.cpp
new file mode 100644
index 0000000000..4076c3ca34
--- /dev/null
+++ b/test/CodeGen/arm64-microsoft-struct-align.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple aarch64-windows -ffreestanding -emit-llvm -O0 \
+// RUN: -x c++ -o - %s | FileCheck %s
+
+struct size1 { char str[1]; };
+struct size2 { char str[2]; };
+struct size7 { char str[4]; };
+struct size8 { char str[8]; };
+struct size63 { char str[63]; };
+struct size64 { char str[64]; };
+
+struct size1 s1;
+// CHECK: @"?s1@@3Usize1@@A" = dso_local global %struct.size1 zeroinitializer, align 1
+
+struct size2 s2;
+// CHECK: @"?s2@@3Usize2@@A" = dso_local global %struct.size2 zeroinitializer, align 4
+
+struct size7 s7;
+// CHECK: @"?s7@@3Usize7@@A" = dso_local global %struct.size7 zeroinitializer, align 4
+
+struct size8 s8;
+// CHECK: @"?s8@@3Usize8@@A" = dso_local global %struct.size8 zeroinitializer, align 8
+
+struct size63 s63;
+// CHECK: @"?s63@@3Usize63@@A" = dso_local global %struct.size63 zeroinitializer, align 8
+
+struct size64 s64;
+// CHECK: @"?s64@@3Usize64@@A" = dso_local global %struct.size64 zeroinitializer, align 16
diff --git a/test/CodeGen/arm64-mte.c b/test/CodeGen/arm64-mte.c
new file mode 100644
index 0000000000..675c37ee4e
--- /dev/null
+++ b/test/CodeGen/arm64-mte.c
@@ -0,0 +1,110 @@
+// Test memory tagging extension intrinsics
+// RUN: %clang_cc1 -triple aarch64-none-linux-eabi -target-feature +mte -O3 -S -emit-llvm -o - %s  | FileCheck %s
+#include <stddef.h>
+#include <arm_acle.h>
+
+// CHECK-LABEL: define i32* @create_tag1
+int *create_tag1(int *a, unsigned b) {
+// CHECK: [[T0:%[0-9]+]] = bitcast i32* %a to i8*
+// CHECK: [[T1:%[0-9]+]] = zext i32 %b to i64
+// CHECK: [[T2:%[0-9]+]] = tail call i8* @llvm.aarch64.irg(i8* [[T0]], i64 [[T1]])
+// CHECK: bitcast i8* [[T2]] to i32*
+        return __arm_mte_create_random_tag(a,b);
+}
+
+// CHECK-LABEL: define i16* @create_tag2
+short *create_tag2(short *a, unsigned b) {
+// CHECK: [[T0:%[0-9]+]] = bitcast i16* %a to i8*
+// CHECK: [[T1:%[0-9]+]] = zext i32 %b to i64
+// CHECK: [[T2:%[0-9]+]] = tail call i8* @llvm.aarch64.irg(i8* [[T0]], i64 [[T1]])
+// CHECK: bitcast i8* [[T2]] to i16*
+        return __arm_mte_create_random_tag(a,b);
+}
+
+// CHECK-LABEL: define i8* @create_tag3
+char *create_tag3(char *a, unsigned b) {
+// CHECK: [[T1:%[0-9]+]] = zext i32 %b to i64
+// CHECK: [[T2:%[0-9]+]] = tail call i8* @llvm.aarch64.irg(i8* %a, i64 [[T1]])
+// CHECK: ret i8* [[T2:%[0-9]+]]
+        return __arm_mte_create_random_tag(a,b);
+}
+
+// CHECK-LABEL: define i8* @increment_tag1
+char *increment_tag1(char *a) {
+// CHECK: call i8* @llvm.aarch64.addg(i8* %a, i64 3)
+        return __arm_mte_increment_tag(a,3);
+}
+
+// CHECK-LABEL: define i16* @increment_tag2
+short *increment_tag2(short *a) {
+// CHECK: [[T0:%[0-9]+]] = bitcast i16* %a to i8*
+// CHECK: [[T1:%[0-9]+]] = tail call i8* @llvm.aarch64.addg(i8* [[T0]], i64 3)
+// CHECK: [[T2:%[0-9]+]]  = bitcast i8* [[T1]] to i16*
+        return __arm_mte_increment_tag(a,3);
+}
+
+// CHECK-LABEL: define i32 @exclude_tag
+unsigned exclude_tag(int *a, unsigned m) {
+// CHECK: [[T0:%[0-9]+]] = zext i32 %m to i64
+// CHECK: [[T1:%[0-9]+]] = bitcast i32* %a to i8*
+// CHECK: [[T2:%[0-9]+]] = tail call i64 @llvm.aarch64.gmi(i8* [[T1]], i64 [[T0]])
+// CHECK: trunc i64 [[T2]] to i32
+  return __arm_mte_exclude_tag(a, m);
+}
+
+// CHECK-LABEL: define i32* @get_tag1
+int *get_tag1(int *a) {
+// CHECK: [[T0:%[0-9]+]] = bitcast i32* %a to i8*
+// CHECK: [[T1:%[0-9]+]] = tail call i8* @llvm.aarch64.ldg(i8* [[T0]], i8* [[T0]])
+// CHECK: [[T2:%[0-9]+]]  = bitcast i8* [[T1]] to i32*
+   return __arm_mte_get_tag(a);
+}
+
+// CHECK-LABEL: define i16* @get_tag2
+short *get_tag2(short *a) {
+// CHECK: [[T0:%[0-9]+]] = bitcast i16* %a to i8*
+// CHECK: [[T1:%[0-9]+]] = tail call i8* @llvm.aarch64.ldg(i8* [[T0]], i8* [[T0]])
+// CHECK: [[T2:%[0-9]+]]  = bitcast i8* [[T1]] to i16*
+   return __arm_mte_get_tag(a);
+}
+
+// CHECK-LABEL: define void @set_tag1
+void set_tag1(int *a) {
+// CHECK: [[T0:%[0-9]+]] = bitcast i32* %a to i8*
+// CHECK: tail call void @llvm.aarch64.stg(i8* [[T0]], i8* [[T0]])
+   __arm_mte_set_tag(a);
+}
+
+// CHECK-LABEL: define i64 @subtract_pointers
+ptrdiff_t subtract_pointers(int *a, int *b) {
+// CHECK: [[T0:%[0-9]+]] = bitcast i32* %a to i8*
+// CHECK: [[T1:%[0-9]+]] = bitcast i32* %b to i8*
+// CHECK: [[T2:%[0-9]+]] = tail call i64 @llvm.aarch64.subp(i8* [[T0]], i8* [[T1]])
+// CHECK: ret i64 [[T2]]
+   return __arm_mte_ptrdiff(a, b);
+}
+
+// CHECK-LABEL: define i64 @subtract_pointers_null_1
+ptrdiff_t subtract_pointers_null_1(int *a) {
+// CHECK: [[T0:%[0-9]+]] = bitcast i32* %a to i8*
+// CHECK: [[T1:%[0-9]+]] = tail call i64 @llvm.aarch64.subp(i8* [[T0]], i8* null)
+// CHECK: ret i64 [[T1]]
+   return __arm_mte_ptrdiff(a, NULL);
+}
+
+// CHECK-LABEL: define i64 @subtract_pointers_null_2
+ptrdiff_t subtract_pointers_null_2(int *a) {
+// CHECK: [[T0:%[0-9]+]] = bitcast i32* %a to i8*
+// CHECK: [[T1:%[0-9]+]] = tail call i64 @llvm.aarch64.subp(i8* null, i8* [[T0]])
+// CHECK: ret i64 [[T1]]
+   return __arm_mte_ptrdiff(NULL, a);
+}
+
+// Check arithmetic promotion on return type
+// CHECK-LABEL: define i32 @subtract_pointers4
+int subtract_pointers4(void* a, void *b) {
+// CHECK: [[T0:%[0-9]+]] = tail call i64 @llvm.aarch64.subp(i8* %a, i8* %b)
+// CHECK-NEXT: %cmp = icmp slt i64 [[T0]], 1
+// CHECK-NEXT:  = zext i1 %cmp to i32
+  return __arm_mte_ptrdiff(a,b) <= 0;
+}
diff --git a/test/CodeGen/armv7k-abi.c b/test/CodeGen/armv7k-abi.c
index 9b57de8727..c17bc9d2f2 100644
--- a/test/CodeGen/armv7k-abi.c
+++ b/test/CodeGen/armv7k-abi.c
@@ -4,7 +4,7 @@
 
 // Make sure 64 and 128 bit types are naturally aligned by the v7k ABI:
 
-// CHECK: target datalayout = "e-m:o-p:32:32-i64:64-a:0:32-n32-S128"
+// CHECK: target datalayout = "e-m:o-p:32:32-Fi8-i64:64-a:0:32-n32-S128"
 
 typedef struct {
   float arr[4];
diff --git a/test/CodeGen/asan-new-pm.ll b/test/CodeGen/asan-new-pm.ll
new file mode 100644
index 0000000000..72bcf055a6
--- /dev/null
+++ b/test/CodeGen/asan-new-pm.ll
@@ -0,0 +1,30 @@
+; Test that ASan runs with the new pass manager
+; RUN: %clang_cc1 -S -emit-llvm -o - -fexperimental-new-pass-manager -fsanitize=address %s | FileCheck %s --check-prefixes=CHECK,LTO,THINLTO
+; RUN: %clang_cc1 -S -emit-llvm -o - -fexperimental-new-pass-manager -fsanitize=address -flto %s | FileCheck %s --check-prefixes=CHECK,LTO
+; RUN: %clang_cc1 -S -emit-llvm -o - -fexperimental-new-pass-manager -fsanitize=address -flto=thin %s | FileCheck %s --check-prefixes=CHECK,THINLTO
+; RUN: %clang_cc1 -S -emit-llvm -o - -O1 -fexperimental-new-pass-manager -fsanitize=address %s | FileCheck %s --check-prefixes=CHECK,LTO,THINLTO
+; RUN: %clang_cc1 -S -emit-llvm -o - -O1 -fexperimental-new-pass-manager -fsanitize=address -flto %s | FileCheck %s --check-prefixes=CHECK,LTO
+; RUN: %clang_cc1 -S -emit-llvm -o - -O1 -fexperimental-new-pass-manager -fsanitize=address -flto=thin %s | FileCheck %s --check-prefixes=CHECK,THINLTO
+
+target triple = "x86_64-unknown-unknown"
+
+; DAG-CHECK: @llvm.global_ctors = {{.*}}@asan.module_ctor
+
+define i32 @test_load(i32* %a) sanitize_address {
+entry:
+  %tmp1 = load i32, i32* %a, align 4
+  ret i32 %tmp1
+}
+
+; CHECK: __asan_init
+
+; DAG-CHECK: define internal void @asan.module_ctor() {
+; CHECK:       {{.*}} call void @__asan_init()
+; CHECK:       {{.*}} call void @__asan_version_mismatch_check_v8()
+; CHECK:       ret void
+; CHECK:     }
+
+; DAG-CHECK: __asan_version_mismatch_check_v8
+
+; This is not used in ThinLTO
+; DAG-LTO: __asan_report_load4
diff --git a/test/CodeGen/asm-inout.c b/test/CodeGen/asm-inout.c
index e5da5c5e93..411f6fadac 100644
--- a/test/CodeGen/asm-inout.c
+++ b/test/CodeGen/asm-inout.c
@@ -46,3 +46,12 @@ __m64 test5(__m64 __A, __m64 __B) {
   asm ("pmulhuw %1, %0\n\t" : "+y" (__A) : "y" (__B));
   return __A;
 }
+
+// CHECK: @test6
+int test6(void) {
+  typedef unsigned char __attribute__((vector_size(8))) _m64u8;
+  _m64u8 __attribute__((aligned(16))) Mu8_0, __attribute__((aligned(16))) Mu8_1;
+  // CHECK: call x86_mmx asm "nop", "=y,0,~{dirflag},~{fpsr},~{flags}"(x86_mmx %1)
+  asm ("nop" : "=y"(Mu8_1 ) : "0"(Mu8_0 ));
+  return 0;
+}
diff --git a/test/CodeGen/attr-callback.c b/test/CodeGen/attr-callback.c
new file mode 100644
index 0000000000..5d96b83528
--- /dev/null
+++ b/test/CodeGen/attr-callback.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple i386-unknown-unknown %s -emit-llvm -o - | FileCheck %s
+
+void cb0(void);
+
+// CHECK-DAG: !callback ![[cid0:[0-9]+]] void @no_args
+__attribute__((callback(1))) void no_args(void (*callback)(void));
+
+// CHECK-DAG: @args_1({{[^#]*#[0-9]+}} !callback ![[cid1:[0-9]+]]
+__attribute__((callback(1, 2, 3))) void args_1(void (*callback)(int, double), int a, double b) { no_args(cb0); }
+
+// CHECK-DAG: !callback ![[cid2:[0-9]+]]  void @args_2a
+__attribute__((callback(2, 3, 3))) void args_2a(int a, void (*callback)(double, double), double b);
+// CHECK-DAG: !callback ![[cid2]]         void @args_2b
+__attribute__((callback(callback, b, b))) void args_2b(int a, void (*callback)(double, double), double b);
+
+// CHECK-DAG: void @args_3a({{[^#]*#[0-9]+}} !callback ![[cid3:[0-9]+]]
+__attribute__((callback(2, -1, -1))) void args_3a(int a, void (*callback)(double, double), double b) { args_2a(a, callback, b); }
+// CHECK-DAG: void @args_3b({{[^#]*#[0-9]+}} !callback ![[cid3]]
+__attribute__((callback(callback, __, __))) void args_3b(int a, void (*callback)(double, double), double b) { args_2b(a, callback, b); }
+
+// CHECK-DAG: ![[cid0]] = !{![[cid0b:[0-9]+]]}
+// CHECK-DAG: ![[cid0b]] = !{i64 0, i1 false}
+// CHECK-DAG: ![[cid1]] = !{![[cid1b:[0-9]+]]}
+// CHECK-DAG: ![[cid1b]] = !{i64 0, i64 1, i64 2, i1 false}
+// CHECK-DAG: ![[cid2]] = !{![[cid2b:[0-9]+]]}
+// CHECK-DAG: ![[cid2b]] = !{i64 1, i64 2, i64 2, i1 false}
+// CHECK-DAG: ![[cid3]] = !{![[cid3b:[0-9]+]]}
+// CHECK-DAG: ![[cid3b]] = !{i64 1, i64 -1, i64 -1, i1 false}
diff --git a/test/CodeGen/attr-cpuspecific.c b/test/CodeGen/attr-cpuspecific.c
index d6c99648cb..2c5e411ce3 100644
--- a/test/CodeGen/attr-cpuspecific.c
+++ b/test/CodeGen/attr-cpuspecific.c
@@ -254,6 +254,6 @@ int DispatchFirst(void) {return 1;}
 // WINDOWS: define dso_local i32 @DispatchFirst.B
 // WINDOWS: ret i32 1
 
-// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
-// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
-// CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87"
+// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx8,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87"
diff --git a/test/CodeGen/attr-msp430.c b/test/CodeGen/attr-msp430.c
new file mode 100644
index 0000000000..e8b6d0d0fa
--- /dev/null
+++ b/test/CodeGen/attr-msp430.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple msp430-unknown-unknown -emit-llvm < %s| FileCheck %s
+
+__attribute__((interrupt(1))) void foo(void) {}
+// CHECK: @llvm.used
+// CHECK-SAME: @foo
+
+// CHECK: define msp430_intrcc void @foo() #0
+// CHECK: attributes #0
+// CHECK-SAME: noinline
+// CHECK-SAME: "interrupt"="1"
diff --git a/test/CodeGen/attr-speculative-load-hardening.cpp b/test/CodeGen/attr-speculative-load-hardening.cpp
deleted file mode 100644
index e2eb805cbb..0000000000
--- a/test/CodeGen/attr-speculative-load-hardening.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-// RUN: %clang_cc1 -std=c++11 -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
-// RUN: %clang_cc1 -std=c++11 -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2
-//
-// Check that we set the attribute on each function.
-
-[[clang::speculative_load_hardening]]
-int test1() {
-  return 42;
-}
-
-int __attribute__((speculative_load_hardening)) test2() {
-  return 42;
-}
-// CHECK1: @{{.*}}test1{{.*}}[[SLH1:#[0-9]+]]
-// CHECK1: attributes [[SLH1]] = { {{.*}}speculative_load_hardening{{.*}} }
-
-// CHECK2: @{{.*}}test2{{.*}}[[SLH2:#[0-9]+]]
-// CHECK2: attributes [[SLH2]] = { {{.*}}speculative_load_hardening{{.*}} }
diff --git a/test/CodeGen/attr-speculative-load-hardening.m b/test/CodeGen/attr-speculative-load-hardening.m
deleted file mode 100644
index 2de945b974..0000000000
--- a/test/CodeGen/attr-speculative-load-hardening.m
+++ /dev/null
@@ -1,9 +0,0 @@
-// RUN: %clang -emit-llvm %s -o - -S | FileCheck %s -check-prefix=SLH
-
-int main() __attribute__((speculative_load_hardening)) {
-  return 0;
-}
-
-// SLH: @{{.*}}main{{.*}}[[SLH:#[0-9]+]]
-
-// SLH: attributes [[SLH]] = { {{.*}}speculative_load_hardening{{.*}} }
diff --git a/test/CodeGen/attr-target-x86-mmx.c b/test/CodeGen/attr-target-x86-mmx.c
index 412e8e93af..01663766d9 100644
--- a/test/CodeGen/attr-target-x86-mmx.c
+++ b/test/CodeGen/attr-target-x86-mmx.c
@@ -19,4 +19,4 @@ void __attribute__((target("sse"))) shift(__m64 a, __m64 b, int c) {
   _mm_srai_pi32(a, c);
 }
 
-// CHECK: "target-features"="+mmx,+sse,+x87"
+// CHECK: "target-features"="+cx8,+mmx,+sse,+x87"
diff --git a/test/CodeGen/attr-target-x86.c b/test/CodeGen/attr-target-x86.c
index 153cdb3e94..e3a2cb2e16 100644
--- a/test/CodeGen/attr-target-x86.c
+++ b/test/CodeGen/attr-target-x86.c
@@ -48,11 +48,11 @@ int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) {
 // CHECK: qq{{.*}} #6
 // CHECK: lake{{.*}} #7
 // CHECK: use_before_def{{.*}} #7
-// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87"
-// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
-// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
-// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
-// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx"
-// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx"
+// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87"
+// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
+// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx"
+// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
diff --git a/test/CodeGen/attr-target-x87-softfp.c b/test/CodeGen/attr-target-x87-softfp.c
index 16b7cfe827..0d26dab74e 100644
--- a/test/CodeGen/attr-target-x87-softfp.c
+++ b/test/CodeGen/attr-target-x87-softfp.c
@@ -7,10 +7,10 @@ int __attribute__((target("no-x87"))) bar(int a) { return 4; }
 // CHECK: foo{{.*}} #0
 // CHECK: bar{{.*}} #1
 
-// CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87"
+// CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87"
 // HARD: "use-soft-float"="false"
 // SOFT: "use-soft-float"="true"
 
-// CHECK: #1 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,-x87"
+// CHECK: #1 = {{.*}}"target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,-x87"
 // HARD: "use-soft-float"="false"
 // SOFT: "use-soft-float"="true"
diff --git a/test/CodeGen/avx-builtins.c b/test/CodeGen/avx-builtins.c
index 3e7709b1b7..c737514d90 100644
--- a/test/CodeGen/avx-builtins.c
+++ b/test/CodeGen/avx-builtins.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s
 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 
 #include <immintrin.h>
diff --git a/test/CodeGen/avx-cmp-builtins.c b/test/CodeGen/avx-cmp-builtins.c
index 38d3ae24cd..609f5964f3 100644
--- a/test/CodeGen/avx-cmp-builtins.c
+++ b/test/CodeGen/avx-cmp-builtins.c
@@ -22,25 +22,25 @@ __m128d test_cmp_ss(__m128 a, __m128 b) {
 
 __m128 test_cmpgt_ss(__m128 a, __m128 b) {
   // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 1)
-  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   return _mm_cmpgt_ss(a, b);
 }
 
 __m128 test_cmpge_ss(__m128 a, __m128 b) {
   // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 2)
-  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   return _mm_cmpge_ss(a, b);
 }
 
 __m128 test_cmpngt_ss(__m128 a, __m128 b) {
   // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 5)
-  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   return _mm_cmpngt_ss(a, b);
 }
 
 __m128 test_cmpnge_ss(__m128 a, __m128 b) {
   // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 6)
-  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   return _mm_cmpnge_ss(a, b);
 }
 
diff --git a/test/CodeGen/avx-shuffle-builtins.c b/test/CodeGen/avx-shuffle-builtins.c
index fef2879abd..061cad76a5 100644
--- a/test/CodeGen/avx-shuffle-builtins.c
+++ b/test/CodeGen/avx-shuffle-builtins.c
@@ -91,19 +91,19 @@ test_mm256_broadcast_ss(float const *__a) {
 
 __m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) {
   // CHECK-LABEL: @test_mm256_insertf128_ps_0
-  // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
   return _mm256_insertf128_ps(a, b, 0);
 }
 
 __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) {
   // CHECK-LABEL: @test_mm256_insertf128_pd_0
-  // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 6, i32 7>
   return _mm256_insertf128_pd(a, b, 0);
 }
 
 __m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) {
   // CHECK-LABEL: @test_mm256_insertf128_si256_0
-  // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
   return _mm256_insertf128_si256(a, b, 0);
 }
 
diff --git a/test/CodeGen/avx2-builtins.c b/test/CodeGen/avx2-builtins.c
index 90c07c1d38..4ef1147885 100644
--- a/test/CodeGen/avx2-builtins.c
+++ b/test/CodeGen/avx2-builtins.c
@@ -107,25 +107,13 @@ __m256i test_mm256_andnot_si256(__m256i a, __m256i b) {
 
 __m256i test_mm256_avg_epu8(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_avg_epu8
-  // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
-  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
-  // CHECK: add <32 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: lshr <32 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
+  // CHECK: call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_avg_epu8(a, b);
 }
 
 __m256i test_mm256_avg_epu16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_avg_epu16
-  // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
-  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
-  // CHECK: add <16 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: lshr <16 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
+  // CHECK: call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_avg_epu16(a, b);
 }
 
diff --git a/test/CodeGen/avx512-kconstraints-att_inline_asm.c b/test/CodeGen/avx512-kconstraints-att_inline_asm.c
index df93ddf7bb..125c5a508c 100644
--- a/test/CodeGen/avx512-kconstraints-att_inline_asm.c
+++ b/test/CodeGen/avx512-kconstraints-att_inline_asm.c
@@ -1,59 +1,77 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror |opt -instnamer -S |FileCheck %s
+// RUN: %clang_cc1 %s -O0 -ffreestanding -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror |opt -instnamer -S |FileCheck %s
 // This test checks validity of att\gcc style inline assmebly for avx512 k and Yk constraints.
 // Also checks mask register allows flexible type (size <= 64 bit)
 
-void mask_Yk_i8(char msk){ 
-//CHECK: vpaddb\09 %xmm1, %xmm0, %xmm1 {$0}\09
- asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
-       :                //output
-       : "Yk" (msk));   //inputs
+#include <x86intrin.h>
+
+__m512i mask_Yk_i8(char msk, __m512i x, __m512i y){
+// CHECK: <8 x i64> asm "vpaddq\09$3, $2, $0 {$1}", "=x,^Yk,x,x,~{dirflag},~{fpsr},~{flags}"(i8 %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
+  __m512i dst;
+  asm ("vpaddq\t%3, %2, %0 %{%1%}"
+       : "=x" (dst)      //output
+       : "Yk" (msk), "x" (x), "x" (y));   //inputs
+  return dst;
 }
 
-void mask_Yk_i16(short msk){
-//CHECK: vpaddb\09 %xmm1, %xmm0, %xmm1 {$0}\09
- asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
-       :                //output
-       :  "Yk" (msk));  //inputs
+__m512i mask_Yk_i16(short msk, __m512i x, __m512i y){
+// CHECK: <8 x i64> asm "vpaddd\09$3, $2, $0 {$1}", "=x,^Yk,x,x,~{dirflag},~{fpsr},~{flags}"(i16 %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
+  __m512i dst;
+  asm ("vpaddd\t%3, %2, %0 %{%1%}"
+       : "=x" (dst)      //output
+       : "Yk" (msk), "x" (x), "x" (y));   //inputs
+  return dst;
 }
 
-void mask_Yk_i32(int msk){
-//CHECK: vpaddb\09 %xmm1, %xmm0, %xmm1 {$0}\09
-    asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
-       :                //output
-       :  "Yk" (msk));   //inputs
+__m512i mask_Yk_i32(int msk, __m512i x, __m512i y){
+// CHECK: <8 x i64> asm "vpaddw\09$3, $2, $0 {$1}", "=x,^Yk,x,x,~{dirflag},~{fpsr},~{flags}"(i32 %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
+  __m512i dst;
+  asm ("vpaddw\t%3, %2, %0 %{%1%}"
+       : "=x" (dst)      //output
+       : "Yk" (msk), "x" (x), "x" (y));   //inputs
+  return dst;
 }
 
-void mask_Yk_i64(long long msk){
-//CHECK: vpaddb\09 %xmm1, %xmm0, %xmm1 {$0}\09
- asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
-       :                //output
-       :  "Yk" (msk));   //inputs
+__m512i mask_Yk_i64(long long msk, __m512i x, __m512i y){
+// CHECK: <8 x i64> asm "vpaddb\09$3, $2, $0 {$1}", "=x,^Yk,x,x,~{dirflag},~{fpsr},~{flags}"(i64 %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
+  __m512i dst;
+  asm ("vpaddb\t%3, %2, %0 %{%1%}"
+       : "=x" (dst)      //output
+       : "Yk" (msk), "x" (x), "x" (y));   //inputs
+  return dst;
 }
 
-void k_wise_op_i8(char msk_dst,char msk_src1,char msk_src2){
-//CHECK: kandw\09$2, $1, $0
- asm ("kandw\t%2, %1, %0"
+char k_wise_op_i8(char msk_src1,char msk_src2){
+//CHECK: i8 asm "kandb\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i8 %{{.*}}, i8 %{{.*}})
+  char msk_dst;
+  asm ("kandb\t%2, %1, %0"
        : "=k" (msk_dst)
        : "k" (msk_src1), "k" (msk_src2));
+  return msk_dst;
 }
 
-void k_wise_op_i16(short msk_dst, short msk_src1, short msk_src2){
-//CHECK: kandw\09$2, $1, $0
+short k_wise_op_i16(short msk_src1, short msk_src2){
+//CHECK: i16 asm "kandw\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i16 %{{.*}}, i16 %{{.*}})
+  short msk_dst;
   asm ("kandw\t%2, %1, %0"
        : "=k" (msk_dst)
        : "k" (msk_src1), "k" (msk_src2));
+  return msk_dst;
 }
 
-void k_wise_op_i32(int msk_dst, int msk_src1, int msk_src2){
-//CHECK: kandw\09$2, $1, $0
-  asm ("kandw\t%2, %1, %0"
+int k_wise_op_i32(int msk_src1, int msk_src2){
+//CHECK: i32 asm "kandd\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i32 %{{.*}}, i32 %{{.*}})
+  int msk_dst;
+  asm ("kandd\t%2, %1, %0"
        : "=k" (msk_dst)
        : "k" (msk_src1), "k" (msk_src2));
+  return msk_dst;
 }
 
-void k_wise_op_i64(long long msk_dst, long long msk_src1, long long msk_src2){
-//CHECK: kandw\09$2, $1, $0
-  asm ("kandw\t%2, %1, %0"
+long long k_wise_op_i64(long long msk_src1, long long msk_src2){
+//CHECK: i64 asm "kandq\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i64 %{{.*}}, i64 %{{.*}})
+  long long msk_dst;
+  asm ("kandq\t%2, %1, %0"
        : "=k" (msk_dst)
        : "k" (msk_src1), "k" (msk_src2));
+  return msk_dst;
 }
diff --git a/test/CodeGen/avx512bf16-builtins.c b/test/CodeGen/avx512bf16-builtins.c
new file mode 100644
index 0000000000..f7b26e14bb
--- /dev/null
+++ b/test/CodeGen/avx512bf16-builtins.c
@@ -0,0 +1,74 @@
+//  RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin \
+//  RUN:            -target-feature +avx512bf16 -emit-llvm -o - -Wall -Werror \
+//  RUN:            | FileCheck %s
+
+#include <immintrin.h>
+
+__m512bh test_mm512_cvtne2ps2bf16(__m512 A, __m512 B) {
+  // CHECK-LABEL: @test_mm512_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
+  // CHECK: ret <32 x i16> %{{.*}}
+  return _mm512_cvtne2ps_pbh(A, B);
+}
+
+__m512bh test_mm512_maskz_cvtne2ps2bf16(__m512 A, __m512 B, __mmask32 U) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  // CHECK: ret <32 x i16> %{{.*}}
+  return _mm512_maskz_cvtne2ps_pbh(U, A, B);
+}
+
+__m512bh test_mm512_mask_cvtne2ps2bf16(__m512bh C, __mmask32 U, __m512 A, __m512 B) {
+  // CHECK-LABEL: @test_mm512_mask_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  // CHECK: ret <32 x i16> %{{.*}}
+  return _mm512_mask_cvtne2ps_pbh(C, U, A, B);
+}
+
+__m256bh test_mm512_cvtneps2bf16(__m512 A) {
+  // CHECK-LABEL: @test_mm512_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.512
+  // CHECK: ret <16 x i16> %{{.*}}
+  return _mm512_cvtneps_pbh(A);
+}
+
+__m256bh test_mm512_mask_cvtneps2bf16(__m256bh C, __mmask16 U, __m512 A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.512
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+  // CHECK: ret <16 x i16> %{{.*}}
+  return _mm512_mask_cvtneps_pbh(C, U, A);
+}
+
+__m256bh test_mm512_maskz_cvtneps2bf16(__m512 A, __mmask16 U) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.512
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+  // CHECK: ret <16 x i16> %{{.*}}
+  return _mm512_maskz_cvtneps_pbh(U, A);
+}
+
+__m512 test_mm512_dpbf16_ps(__m512 D, __m512bh A, __m512bh B) {
+  // CHECK-LABEL: @test_mm512_dpbf16_ps
+  // CHECK: @llvm.x86.avx512bf16.dpbf16ps.512
+  // CHECK: ret <16 x float> %{{.*}}
+  return _mm512_dpbf16_ps(D, A, B);
+}
+
+__m512 test_mm512_maskz_dpbf16_ps(__m512 D, __m512bh A, __m512bh B, __mmask16 U) {
+  // CHECK-LABEL: @test_mm512_maskz_dpbf16_ps
+  // CHECK: @llvm.x86.avx512bf16.dpbf16ps.512
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  // CHECK: ret <16 x float> %{{.*}}
+  return _mm512_maskz_dpbf16_ps(U, D, A, B);
+}
+
+__m512 test_mm512_mask_dpbf16_ps(__m512 D, __m512bh A, __m512bh B, __mmask16 U) {
+  // CHECK-LABEL: @test_mm512_mask_dpbf16_ps
+  // CHECK: @llvm.x86.avx512bf16.dpbf16ps.512
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  // CHECK: ret <16 x float> %{{.*}}
+  return _mm512_mask_dpbf16_ps(D, U, A, B);
+}
diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c
index 9e75baae77..562bc31288 100644
--- a/test/CodeGen/avx512bw-builtins.c
+++ b/test/CodeGen/avx512bw-builtins.c
@@ -1066,73 +1066,35 @@ __m512i test_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
 }
 __m512i test_mm512_avg_epu8(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_avg_epu8
-  // CHECK-NOT: @llvm.x86.avx512.mask.pavg.b.512
-  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
-  // CHECK: add <64 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: lshr <64 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
+  // CHECK: @llvm.x86.avx512.pavg.b.512
   return _mm512_avg_epu8(__A,__B); 
 }
 __m512i test_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_avg_epu8
-  // CHECK-NOT: @llvm.x86.avx512.mask.pavg.b.512
-  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
-  // CHECK: add <64 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: lshr <64 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
+  // CHECK: @llvm.x86.avx512.pavg.b.512
   // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
   return _mm512_mask_avg_epu8(__W,__U,__A,__B); 
 }
 __m512i test_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_avg_epu8
-  // CHECK-NOT: @llvm.x86.avx512.mask.pavg.b.512
-  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
-  // CHECK: add <64 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: lshr <64 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
-  // CHECK: store <8 x i64> zeroinitializer
+  // CHECK: @llvm.x86.avx512.pavg.b.512
   // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
   return _mm512_maskz_avg_epu8(__U,__A,__B); 
 }
 __m512i test_mm512_avg_epu16(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_avg_epu16
-  // CHECK-NOT: @llvm.x86.avx512.mask.pavg.w.512
-  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
-  // CHECK: add <32 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: lshr <32 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
+  // CHECK: @llvm.x86.avx512.pavg.w.512
   return _mm512_avg_epu16(__A,__B); 
 }
 __m512i test_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_avg_epu16
-  // CHECK-NOT: @llvm.x86.avx512.mask.pavg.w.512
-  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
-  // CHECK: add <32 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: lshr <32 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
+  // CHECK: @llvm.x86.avx512.pavg.w.512
   // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_mask_avg_epu16(__W,__U,__A,__B); 
 }
 __m512i test_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_avg_epu16
-  // CHECK-NOT: @llvm.x86.avx512.mask.pavg.w.512
-  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
-  // CHECK: add <32 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: lshr <32 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
-  // CHECK: store <8 x i64> zeroinitializer
+  // CHECK: @llvm.x86.avx512.pavg.w.512
   // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_maskz_avg_epu16(__U,__A,__B); 
 }
diff --git a/test/CodeGen/avx512cdintrin.c b/test/CodeGen/avx512cdintrin.c
index e01d277be9..6483d7e8dd 100644
--- a/test/CodeGen/avx512cdintrin.c
+++ b/test/CodeGen/avx512cdintrin.c
@@ -5,32 +5,36 @@
 
 __m512i test_mm512_conflict_epi64(__m512i __A) {
   // CHECK-LABEL: @test_mm512_conflict_epi64
-  // CHECK: @llvm.x86.avx512.mask.conflict.q.512
+  // CHECK: @llvm.x86.avx512.conflict.q.512
   return _mm512_conflict_epi64(__A); 
 }
 __m512i test_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_conflict_epi64
-  // CHECK: @llvm.x86.avx512.mask.conflict.q.512
+  // CHECK: @llvm.x86.avx512.conflict.q.512
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_conflict_epi64(__W,__U,__A); 
 }
 __m512i test_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_conflict_epi64
-  // CHECK: @llvm.x86.avx512.mask.conflict.q.512
+  // CHECK: @llvm.x86.avx512.conflict.q.512
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_conflict_epi64(__U,__A); 
 }
 __m512i test_mm512_conflict_epi32(__m512i __A) {
   // CHECK-LABEL: @test_mm512_conflict_epi32
-  // CHECK: @llvm.x86.avx512.mask.conflict.d.512
+  // CHECK: @llvm.x86.avx512.conflict.d.512
   return _mm512_conflict_epi32(__A); 
 }
 __m512i test_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_conflict_epi32
-  // CHECK: @llvm.x86.avx512.mask.conflict.d.512
+  // CHECK: @llvm.x86.avx512.conflict.d.512
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_conflict_epi32(__W,__U,__A); 
 }
 __m512i test_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_conflict_epi32
-  // CHECK: @llvm.x86.avx512.mask.conflict.d.512
+  // CHECK: @llvm.x86.avx512.conflict.d.512
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_conflict_epi32(__U,__A); 
 }
 __m512i test_mm512_lzcnt_epi32(__m512i __A) {
diff --git a/test/CodeGen/avx512dq-builtins.c b/test/CodeGen/avx512dq-builtins.c
index 6227a83b55..a85e173432 100644
--- a/test/CodeGen/avx512dq-builtins.c
+++ b/test/CodeGen/avx512dq-builtins.c
@@ -613,55 +613,61 @@ __m512d test_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) {
 
 __m512d test_mm512_cvt_roundepi64_pd(__m512i __A) {
   // CHECK-LABEL: @test_mm512_cvt_roundepi64_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  // CHECK: @llvm.x86.avx512.sitofp.round.v8f64.v8i64
   return _mm512_cvt_roundepi64_pd(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512d test_mm512_mask_cvt_roundepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundepi64_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  // CHECK: @llvm.x86.avx512.sitofp.round.v8f64.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_cvt_roundepi64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512d test_mm512_maskz_cvt_roundepi64_pd(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi64_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  // CHECK: @llvm.x86.avx512.sitofp.round.v8f64.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_cvt_roundepi64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_cvtepi64_ps(__m512i __A) {
   // CHECK-LABEL: @test_mm512_cvtepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  // CHECK: sitofp <8 x i64> %{{.*}} to <8 x float>
   return _mm512_cvtepi64_ps(__A); 
 }
 
 __m256 test_mm512_mask_cvtepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_cvtepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  // CHECK: sitofp <8 x i64> %{{.*}} to <8 x float>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_cvtepi64_ps(__W, __U, __A); 
 }
 
 __m256 test_mm512_maskz_cvtepi64_ps(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_cvtepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  // CHECK: sitofp <8 x i64> %{{.*}} to <8 x float>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_cvtepi64_ps(__U, __A); 
 }
 
 __m256 test_mm512_cvt_roundepi64_ps(__m512i __A) {
   // CHECK-LABEL: @test_mm512_cvt_roundepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  // CHECK: @llvm.x86.avx512.sitofp.round.v8f32.v8i64
   return _mm512_cvt_roundepi64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_mask_cvt_roundepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  // CHECK: @llvm.x86.avx512.sitofp.round.v8f32.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_cvt_roundepi64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_maskz_cvt_roundepi64_ps(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  // CHECK: @llvm.x86.avx512.sitofp.round.v8f32.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_cvt_roundepi64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
@@ -831,55 +837,61 @@ __m512d test_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) {
 
 __m512d test_mm512_cvt_roundepu64_pd(__m512i __A) {
   // CHECK-LABEL: @test_mm512_cvt_roundepu64_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  // CHECK: @llvm.x86.avx512.uitofp.round.v8f64.v8i64
   return _mm512_cvt_roundepu64_pd(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512d test_mm512_mask_cvt_roundepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundepu64_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  // CHECK: @llvm.x86.avx512.uitofp.round.v8f64.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_cvt_roundepu64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512d test_mm512_maskz_cvt_roundepu64_pd(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu64_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  // CHECK: @llvm.x86.avx512.uitofp.round.v8f64.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_cvt_roundepu64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_cvtepu64_ps(__m512i __A) {
   // CHECK-LABEL: @test_mm512_cvtepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  // CHECK: uitofp <8 x i64> %{{.*}} to <8 x float>
   return _mm512_cvtepu64_ps(__A); 
 }
 
 __m256 test_mm512_mask_cvtepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_cvtepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  // CHECK: uitofp <8 x i64> %{{.*}} to <8 x float>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_cvtepu64_ps(__W, __U, __A); 
 }
 
 __m256 test_mm512_maskz_cvtepu64_ps(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_cvtepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  // CHECK: uitofp <8 x i64> %{{.*}} to <8 x float>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_cvtepu64_ps(__U, __A); 
 }
 
 __m256 test_mm512_cvt_roundepu64_ps(__m512i __A) {
   // CHECK-LABEL: @test_mm512_cvt_roundepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  // CHECK: @llvm.x86.avx512.uitofp.round.v8f32.v8i64
   return _mm512_cvt_roundepu64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_mask_cvt_roundepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  // CHECK: @llvm.x86.avx512.uitofp.round.v8f32.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_cvt_roundepu64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_maskz_cvt_roundepu64_ps(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  // CHECK: @llvm.x86.avx512.uitofp.round.v8f32.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_cvt_roundepu64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c
index 5154e820bd..eedd3fc9e2 100644
--- a/test/CodeGen/avx512f-builtins.c
+++ b/test/CodeGen/avx512f-builtins.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 #include <immintrin.h>
 
@@ -5019,137 +5020,197 @@ __m512 test_mm512_maskz_cvt_roundph_ps(__mmask16 __U, __m256i __A)
     return _mm512_maskz_cvt_roundph_ps(__U, __A, _MM_FROUND_CUR_DIRECTION);
 }
 
+__m512 test_mm512_cvt_roundepi32_ps( __m512i __A)
+{
+  // CHECK-LABEL: @test_mm512_cvt_roundepi32_ps
+  // CHECK: @llvm.x86.avx512.sitofp.round.v16f32.v16i32
+  return _mm512_cvt_roundepi32_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
 __m512 test_mm512_mask_cvt_roundepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
 {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundepi32_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.512
-  return _mm512_mask_cvt_roundepi32_ps(__W,__U,__A,4);
+  // CHECK: @llvm.x86.avx512.sitofp.round.v16f32.v16i32
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  return _mm512_mask_cvt_roundepi32_ps(__W,__U,__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512 test_mm512_maskz_cvt_roundepi32_ps(__mmask16 __U, __m512i __A)
 {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi32_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.512
-  return _mm512_maskz_cvt_roundepi32_ps(__U,__A,4);
+  // CHECK: @llvm.x86.avx512.sitofp.round.v16f32.v16i32
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  return _mm512_maskz_cvt_roundepi32_ps(__U,__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m512 test_mm512_cvt_roundepu32_ps(__m512i __A)
+{
+  // CHECK-LABEL: @test_mm512_cvt_roundepu32_ps
+  // CHECK: @llvm.x86.avx512.uitofp.round.v16f32.v16i32
+  return _mm512_cvt_roundepu32_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512 test_mm512_mask_cvt_roundepu32_ps(__m512 __W, __mmask16 __U,__m512i __A)
 {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundepu32_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.512
-  return _mm512_mask_cvt_roundepu32_ps(__W,__U,__A,4);
+  // CHECK: @llvm.x86.avx512.uitofp.round.v16f32.v16i32
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  return _mm512_mask_cvt_roundepu32_ps(__W,__U,__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512 test_mm512_maskz_cvt_roundepu32_ps(__mmask16 __U,__m512i __A)
 {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu32_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.512
-  return _mm512_maskz_cvt_roundepu32_ps(__U,__A,4);
+  // CHECK: @llvm.x86.avx512.uitofp.round.v16f32.v16i32
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  return _mm512_maskz_cvt_roundepu32_ps(__U,__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm512_cvt_roundpd_ps(__m512d A)
+{
+  // CHECK-LABEL: @test_mm512_cvt_roundpd_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512
+  return _mm512_cvt_roundpd_ps(A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m256 test_mm512_mask_cvt_roundpd_ps(__m256 W, __mmask8 U,__m512d A)
 {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_ps
   // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512
-  return _mm512_mask_cvt_roundpd_ps(W,U,A,4);
+  return _mm512_mask_cvt_roundpd_ps(W,U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m256 test_mm512_maskz_cvt_roundpd_ps(__mmask8 U, __m512d A)
 {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_ps
   // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512
-  return _mm512_maskz_cvt_roundpd_ps(U,A,4);
+  return _mm512_maskz_cvt_roundpd_ps(U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m256i test_mm512_cvtt_roundpd_epi32(__m512d A)
 {
   // CHECK-LABEL: @test_mm512_cvtt_roundpd_epi32
   // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512
-  return _mm512_cvtt_roundpd_epi32(A,4);
+  return _mm512_cvtt_roundpd_epi32(A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m256i test_mm512_mask_cvtt_roundpd_epi32(__m256i W, __mmask8 U, __m512d A)
 {
   // CHECK-LABEL: @test_mm512_mask_cvtt_roundpd_epi32
   // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512
-  return _mm512_mask_cvtt_roundpd_epi32(W,U,A,4);
+  return _mm512_mask_cvtt_roundpd_epi32(W,U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m256i test_mm512_maskz_cvtt_roundpd_epi32(__mmask8 U, __m512d A)
 {
   // CHECK-LABEL: @test_mm512_maskz_cvtt_roundpd_epi32
   // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512
-  return _mm512_maskz_cvtt_roundpd_epi32(U,A,4);
+  return _mm512_maskz_cvtt_roundpd_epi32(U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvtt_roundps_epi32(__m512 A)
+{
+  // CHECK-LABEL: @test_mm512_cvtt_roundps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512
+  return _mm512_cvtt_roundps_epi32(A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_mask_cvtt_roundps_epi32(__m512i W,__mmask16 U, __m512 A)
 {
   // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epi32
   // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512
-  return _mm512_mask_cvtt_roundps_epi32(W,U,A,4);
+  return _mm512_mask_cvtt_roundps_epi32(W,U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_maskz_cvtt_roundps_epi32(__mmask16 U, __m512 A)
 {
   // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epi32
   // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512
-  return _mm512_maskz_cvtt_roundps_epi32(U,A,4);
+  return _mm512_maskz_cvtt_roundps_epi32(U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvt_roundps_epi32(__m512 __A)
+{
+  // CHECK-LABEL: @test_mm512_cvt_roundps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512
+  return _mm512_cvt_roundps_epi32(__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_mask_cvt_roundps_epi32(__m512i __W,__mmask16 __U,__m512 __A)
 {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epi32
   // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512
-  return _mm512_mask_cvt_roundps_epi32(__W,__U,__A,4);
+  return _mm512_mask_cvt_roundps_epi32(__W,__U,__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_maskz_cvt_roundps_epi32(__mmask16 __U, __m512 __A)
 {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epi32
   // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512
-  return _mm512_maskz_cvt_roundps_epi32(__U,__A,4);
+  return _mm512_maskz_cvt_roundps_epi32(__U,__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm512_cvt_roundpd_epi32(__m512d A)
+{
+  // CHECK-LABEL: @test_mm512_cvt_roundpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512
+  return _mm512_cvt_roundpd_epi32(A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m256i test_mm512_mask_cvt_roundpd_epi32(__m256i W,__mmask8 U,__m512d A)
 {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epi32
   // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512
-  return _mm512_mask_cvt_roundpd_epi32(W,U,A,4);
+  return _mm512_mask_cvt_roundpd_epi32(W,U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m256i test_mm512_maskz_cvt_roundpd_epi32(__mmask8 U, __m512d A)
 {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epi32
   // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512
-  return _mm512_maskz_cvt_roundpd_epi32(U,A,4);
+  return _mm512_maskz_cvt_roundpd_epi32(U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvt_roundps_epu32(__m512 __A)
+{
+  // CHECK-LABEL: @test_mm512_cvt_roundps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512
+  return _mm512_cvt_roundps_epu32(__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_mask_cvt_roundps_epu32(__m512i __W,__mmask16 __U,__m512 __A)
 {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epu32
   // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512
-  return _mm512_mask_cvt_roundps_epu32(__W,__U,__A,4);
+  return _mm512_mask_cvt_roundps_epu32(__W,__U,__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_maskz_cvt_roundps_epu32(__mmask16 __U,__m512 __A)
 {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epu32
   // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512
-  return _mm512_maskz_cvt_roundps_epu32(__U,__A, 4);
+  return _mm512_maskz_cvt_roundps_epu32(__U,__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm512_cvt_roundpd_epu32(__m512d A)
+{
+  // CHECK-LABEL: @test_mm512_cvt_roundpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512
+  return _mm512_cvt_roundpd_epu32(A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m256i test_mm512_mask_cvt_roundpd_epu32(__m256i W, __mmask8 U, __m512d A)
 {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epu32
   // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512
-  return _mm512_mask_cvt_roundpd_epu32(W,U,A,4);
+  return _mm512_mask_cvt_roundpd_epu32(W,U,A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m256i test_mm512_maskz_cvt_roundpd_epu32(__mmask8 U, __m512d A) 
 {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epu32
   // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512
-  return _mm512_maskz_cvt_roundpd_epu32(U, A, 4);
+  return _mm512_maskz_cvt_roundpd_epu32(U, A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512 test_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B) {
@@ -7002,193 +7063,193 @@ __m512 test_mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A) {
 
 __m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i64gather_ps
-  // CHECK: @llvm.x86.avx512.gather.qps.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qps.512
   return _mm512_i64gather_ps(__index, __addr, 2); 
 }
 
 __m256 test_mm512_mask_i64gather_ps(__m256 __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i64gather_ps
-  // CHECK: @llvm.x86.avx512.gather.qps.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qps.512
   return _mm512_mask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m256i test_mm512_i64gather_epi32(__m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i64gather_epi32
-  // CHECK: @llvm.x86.avx512.gather.qpi.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qpi.512
   return _mm512_i64gather_epi32(__index, __addr, 2); 
 }
 
 __m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i64gather_epi32
-  // CHECK: @llvm.x86.avx512.gather.qpi.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qpi.512
   return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m512d test_mm512_i64gather_pd(__m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i64gather_pd
-  // CHECK: @llvm.x86.avx512.gather.qpd.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qpd.512
   return _mm512_i64gather_pd(__index, __addr, 2); 
 }
 
 __m512d test_mm512_mask_i64gather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i64gather_pd
-  // CHECK: @llvm.x86.avx512.gather.qpd.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qpd.512
   return _mm512_mask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m512i test_mm512_i64gather_epi64(__m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i64gather_epi64
-  // CHECK: @llvm.x86.avx512.gather.qpq.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qpq.512
   return _mm512_i64gather_epi64(__index, __addr, 2); 
 }
 
 __m512i test_mm512_mask_i64gather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i64gather_epi64
-  // CHECK: @llvm.x86.avx512.gather.qpq.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qpq.512
   return _mm512_mask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m512 test_mm512_i32gather_ps(__m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i32gather_ps
-  // CHECK: @llvm.x86.avx512.gather.dps.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dps.512
   return _mm512_i32gather_ps(__index, __addr, 2); 
 }
 
 __m512 test_mm512_mask_i32gather_ps(__m512 v1_old, __mmask16 __mask, __m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i32gather_ps
-  // CHECK: @llvm.x86.avx512.gather.dps.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dps.512
   return _mm512_mask_i32gather_ps(v1_old, __mask, __index, __addr, 2); 
 }
 
 __m512i test_mm512_i32gather_epi32(__m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i32gather_epi32
-  // CHECK: @llvm.x86.avx512.gather.dpi.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dpi.512
   return _mm512_i32gather_epi32(__index, __addr, 2); 
 }
 
 __m512i test_mm512_mask_i32gather_epi32(__m512i __v1_old, __mmask16 __mask, __m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i32gather_epi32
-  // CHECK: @llvm.x86.avx512.gather.dpi.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dpi.512
   return _mm512_mask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m512d test_mm512_i32gather_pd(__m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i32gather_pd
-  // CHECK: @llvm.x86.avx512.gather.dpd.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
   return _mm512_i32gather_pd(__index, __addr, 2); 
 }
 
 __m512d test_mm512_mask_i32gather_pd(__m512d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i32gather_pd
-  // CHECK: @llvm.x86.avx512.gather.dpd.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
   return _mm512_mask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m512i test_mm512_i32gather_epi64(__m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i32gather_epi64
-  // CHECK: @llvm.x86.avx512.gather.dpq.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
   return _mm512_i32gather_epi64(__index, __addr, 2); 
 }
 
 __m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i32gather_epi64
-  // CHECK: @llvm.x86.avx512.gather.dpq.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
   return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); 
 }
 
 void test_mm512_i64scatter_ps(void *__addr, __m512i __index, __m256 __v1) {
   // CHECK-LABEL: @test_mm512_i64scatter_ps
-  // CHECK: @llvm.x86.avx512.scatter.qps.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qps.512
   return _mm512_i64scatter_ps(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i64scatter_ps(void *__addr, __mmask8 __mask, __m512i __index, __m256 __v1) {
   // CHECK-LABEL: @test_mm512_mask_i64scatter_ps
-  // CHECK: @llvm.x86.avx512.scatter.qps.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qps.512
   return _mm512_mask_i64scatter_ps(__addr, __mask, __index, __v1, 2); 
 }
 
 void test_mm512_i64scatter_epi32(void *__addr, __m512i __index, __m256i __v1) {
   // CHECK-LABEL: @test_mm512_i64scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatter.qpi.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qpi.512
   return _mm512_i64scatter_epi32(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i64scatter_epi32(void *__addr, __mmask8 __mask, __m512i __index, __m256i __v1) {
   // CHECK-LABEL: @test_mm512_mask_i64scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatter.qpi.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qpi.512
   return _mm512_mask_i64scatter_epi32(__addr, __mask, __index, __v1, 2); 
 }
 
 void test_mm512_i64scatter_pd(void *__addr, __m512i __index, __m512d __v1) {
   // CHECK-LABEL: @test_mm512_i64scatter_pd
-  // CHECK: @llvm.x86.avx512.scatter.qpd.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qpd.512
   return _mm512_i64scatter_pd(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i64scatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
   // CHECK-LABEL: @test_mm512_mask_i64scatter_pd
-  // CHECK: @llvm.x86.avx512.scatter.qpd.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qpd.512
   return _mm512_mask_i64scatter_pd(__addr, __mask, __index, __v1, 2); 
 }
 
 void test_mm512_i64scatter_epi64(void *__addr, __m512i __index, __m512i __v1) {
   // CHECK-LABEL: @test_mm512_i64scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatter.qpq.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qpq.512
   return _mm512_i64scatter_epi64(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i64scatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) {
   // CHECK-LABEL: @test_mm512_mask_i64scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatter.qpq.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qpq.512
   return _mm512_mask_i64scatter_epi64(__addr, __mask, __index, __v1, 2); 
 }
 
 void test_mm512_i32scatter_ps(void *__addr, __m512i __index, __m512 __v1) {
   // CHECK-LABEL: @test_mm512_i32scatter_ps
-  // CHECK: @llvm.x86.avx512.scatter.dps.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dps.512
   return _mm512_i32scatter_ps(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i32scatter_ps(void *__addr, __mmask16 __mask, __m512i __index, __m512 __v1) {
   // CHECK-LABEL: @test_mm512_mask_i32scatter_ps
-  // CHECK: @llvm.x86.avx512.scatter.dps.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dps.512
   return _mm512_mask_i32scatter_ps(__addr, __mask, __index, __v1, 2); 
 }
 
 void test_mm512_i32scatter_epi32(void *__addr, __m512i __index, __m512i __v1) {
   // CHECK-LABEL: @test_mm512_i32scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatter.dpi.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpi.512
   return _mm512_i32scatter_epi32(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i32scatter_epi32(void *__addr, __mmask16 __mask, __m512i __index, __m512i __v1) {
   // CHECK-LABEL: @test_mm512_mask_i32scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatter.dpi.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpi.512
   return _mm512_mask_i32scatter_epi32(__addr, __mask, __index, __v1, 2); 
 }
 
 void test_mm512_i32scatter_pd(void *__addr, __m256i __index, __m512d __v1) {
   // CHECK-LABEL: @test_mm512_i32scatter_pd
-  // CHECK: @llvm.x86.avx512.scatter.dpd.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
   return _mm512_i32scatter_pd(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i32scatter_pd(void *__addr, __mmask8 __mask, __m256i __index, __m512d __v1) {
   // CHECK-LABEL: @test_mm512_mask_i32scatter_pd
-  // CHECK: @llvm.x86.avx512.scatter.dpd.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
   return _mm512_mask_i32scatter_pd(__addr, __mask, __index, __v1, 2); 
 }
 
 void test_mm512_i32scatter_epi64(void *__addr, __m256i __index, __m512i __v1) {
   // CHECK-LABEL: @test_mm512_i32scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatter.dpq.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
   return _mm512_i32scatter_epi64(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i32scatter_epi64(void *__addr, __mmask8 __mask, __m256i __index, __m512i __v1) {
   // CHECK-LABEL: @test_mm512_mask_i32scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatter.dpq.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
   return _mm512_mask_i32scatter_epi64(__addr, __mask, __index, __v1, 2); 
 }
 
@@ -8534,49 +8595,49 @@ void test_mm512_stream_ps(float *__P, __m512 __A) {
 
 __m512d test_mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A) {
   // CHECK-LABEL: @test_mm512_mask_compress_pd
-  // CHECK: @llvm.x86.avx512.mask.compress.pd.512
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm512_mask_compress_pd(__W, __U, __A); 
 }
 
 __m512d test_mm512_maskz_compress_pd(__mmask8 __U, __m512d __A) {
   // CHECK-LABEL: @test_mm512_maskz_compress_pd
-  // CHECK: @llvm.x86.avx512.mask.compress.pd.512
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm512_maskz_compress_pd(__U, __A); 
 }
 
 __m512i test_mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_compress_epi64
-  // CHECK: @llvm.x86.avx512.mask.compress.q.512
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm512_mask_compress_epi64(__W, __U, __A); 
 }
 
 __m512i test_mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_compress_epi64
-  // CHECK: @llvm.x86.avx512.mask.compress.q.512
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm512_maskz_compress_epi64(__U, __A); 
 }
 
 __m512 test_mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A) {
   // CHECK-LABEL: @test_mm512_mask_compress_ps
-  // CHECK: @llvm.x86.avx512.mask.compress.ps.512
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm512_mask_compress_ps(__W, __U, __A); 
 }
 
 __m512 test_mm512_maskz_compress_ps(__mmask16 __U, __m512 __A) {
   // CHECK-LABEL: @test_mm512_maskz_compress_ps
-  // CHECK: @llvm.x86.avx512.mask.compress.ps.512
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm512_maskz_compress_ps(__U, __A); 
 }
 
 __m512i test_mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_compress_epi32
-  // CHECK: @llvm.x86.avx512.mask.compress.d.512
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm512_mask_compress_epi32(__W, __U, __A); 
 }
 
 __m512i test_mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_compress_epi32
-  // CHECK: @llvm.x86.avx512.mask.compress.d.512
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm512_maskz_compress_epi32(__U, __A); 
 }
 
@@ -8690,25 +8751,25 @@ __m512i test_mm512_maskz_shuffle_epi32(__mmask16 __U, __m512i __A) {
 
 __m512d test_mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A) {
   // CHECK-LABEL: @test_mm512_mask_expand_pd
-  // CHECK: @llvm.x86.avx512.mask.expand.pd.512
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm512_mask_expand_pd(__W, __U, __A); 
 }
 
 __m512d test_mm512_maskz_expand_pd(__mmask8 __U, __m512d __A) {
   // CHECK-LABEL: @test_mm512_maskz_expand_pd
-  // CHECK: @llvm.x86.avx512.mask.expand.pd.512
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm512_maskz_expand_pd(__U, __A); 
 }
 
 __m512i test_mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_expand_epi64
-  // CHECK: @llvm.x86.avx512.mask.expand.q.512
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm512_mask_expand_epi64(__W, __U, __A); 
 }
 
 __m512i test_mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_expand_epi64
-  // CHECK: @llvm.x86.avx512.mask.expand.q.512
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm512_maskz_expand_epi64(__U, __A); 
 }
 __m512i test_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) {
@@ -8761,25 +8822,25 @@ __m512 test_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P) {
 
 __m512 test_mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A) {
   // CHECK-LABEL: @test_mm512_mask_expand_ps
-  // CHECK: @llvm.x86.avx512.mask.expand.ps.512
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm512_mask_expand_ps(__W, __U, __A); 
 }
 
 __m512 test_mm512_maskz_expand_ps(__mmask16 __U, __m512 __A) {
   // CHECK-LABEL: @test_mm512_maskz_expand_ps
-  // CHECK: @llvm.x86.avx512.mask.expand.ps.512
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm512_maskz_expand_ps(__U, __A); 
 }
 
 __m512i test_mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_expand_epi32
-  // CHECK: @llvm.x86.avx512.mask.expand.d.512
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm512_mask_expand_epi32(__W, __U, __A); 
 }
 
 __m512i test_mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_expand_epi32
-  // CHECK: @llvm.x86.avx512.mask.expand.d.512
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm512_maskz_expand_epi32(__U, __A); 
 }
 __m512d test_mm512_cvt_roundps_pd(__m256 __A) {
@@ -10079,70 +10140,70 @@ __m512i test_mm512_set_epi8(char e63, char e62, char e61, char e60, char e59,
     char e1, char e0) {
 
   //CHECK-LABEL: @test_mm512_set_epi8
-  //CHECK: load i8, i8* %e63.addr, align 1
-   //CHECK: load i8, i8* %e62.addr, align 1
-   //CHECK: load i8, i8* %e61.addr, align 1
-   //CHECK: load i8, i8* %e60.addr, align 1
-   //CHECK: load i8, i8* %e59.addr, align 1
-   //CHECK: load i8, i8* %e58.addr, align 1
-   //CHECK: load i8, i8* %e57.addr, align 1
-   //CHECK: load i8, i8* %e56.addr, align 1
-   //CHECK: load i8, i8* %e55.addr, align 1
-   //CHECK: load i8, i8* %e54.addr, align 1
-   //CHECK: load i8, i8* %e53.addr, align 1
-   //CHECK: load i8, i8* %e52.addr, align 1
-   //CHECK: load i8, i8* %e51.addr, align 1
-   //CHECK: load i8, i8* %e50.addr, align 1
-   //CHECK: load i8, i8* %e49.addr, align 1
-   //CHECK: load i8, i8* %e48.addr, align 1
-   //CHECK: load i8, i8* %e47.addr, align 1
-   //CHECK: load i8, i8* %e46.addr, align 1
-   //CHECK: load i8, i8* %e45.addr, align 1
-   //CHECK: load i8, i8* %e44.addr, align 1
-   //CHECK: load i8, i8* %e43.addr, align 1
-   //CHECK: load i8, i8* %e42.addr, align 1
-   //CHECK: load i8, i8* %e41.addr, align 1
-   //CHECK: load i8, i8* %e40.addr, align 1
-   //CHECK: load i8, i8* %e39.addr, align 1
-   //CHECK: load i8, i8* %e38.addr, align 1
-   //CHECK: load i8, i8* %e37.addr, align 1
-   //CHECK: load i8, i8* %e36.addr, align 1
-   //CHECK: load i8, i8* %e35.addr, align 1
-   //CHECK: load i8, i8* %e34.addr, align 1
-   //CHECK: load i8, i8* %e33.addr, align 1
-   //CHECK: load i8, i8* %e32.addr, align 1
-   //CHECK: load i8, i8* %e31.addr, align 1
-   //CHECK: load i8, i8* %e30.addr, align 1
-   //CHECK: load i8, i8* %e29.addr, align 1
-   //CHECK: load i8, i8* %e28.addr, align 1
-   //CHECK: load i8, i8* %e27.addr, align 1
-   //CHECK: load i8, i8* %e26.addr, align 1
-   //CHECK: load i8, i8* %e25.addr, align 1
-   //CHECK: load i8, i8* %e24.addr, align 1
-   //CHECK: load i8, i8* %e23.addr, align 1
-   //CHECK: load i8, i8* %e22.addr, align 1
-   //CHECK: load i8, i8* %e21.addr, align 1
-   //CHECK: load i8, i8* %e20.addr, align 1
-   //CHECK: load i8, i8* %e19.addr, align 1
-   //CHECK: load i8, i8* %e18.addr, align 1
-   //CHECK: load i8, i8* %e17.addr, align 1
-   //CHECK: load i8, i8* %e16.addr, align 1
-   //CHECK: load i8, i8* %e15.addr, align 1
-   //CHECK: load i8, i8* %e14.addr, align 1
-   //CHECK: load i8, i8* %e13.addr, align 1
-   //CHECK: load i8, i8* %e12.addr, align 1
-   //CHECK: load i8, i8* %e11.addr, align 1
-   //CHECK: load i8, i8* %e10.addr, align 1
-   //CHECK: load i8, i8* %e9.addr, align 1
-   //CHECK: load i8, i8* %e8.addr, align 1
-   //CHECK: load i8, i8* %e7.addr, align 1
-   //CHECK: load i8, i8* %e6.addr, align 1
-   //CHECK: load i8, i8* %e5.addr, align 1
-   //CHECK: load i8, i8* %e4.addr, align 1
-   //CHECK: load i8, i8* %e3.addr, align 1
-   //CHECK: load i8, i8* %e2.addr, align 1
-   //CHECK: load i8, i8* %e1.addr, align 1
-   //CHECK: load i8, i8* %e0.addr, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
+  //CHECK: load i8, i8* %{{.*}}, align 1
   return _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54,
       e53, e52, e51, e50, e49, e48,e47, e46, e45, e44, e43, e42, e41, e40,
       e39, e38, e37, e36, e35, e34, e33, e32,e31, e30, e29, e28, e27, e26,
@@ -10226,22 +10287,22 @@ __m512i test_mm512_setr_epi32 (int __A, int __B, int __C, int __D,
                int __M, int __N, int __O, int __P)
 {
  //CHECK-LABEL: @test_mm512_setr_epi32
- //CHECK: load{{.*}}%__P.addr, align 4
- //CHECK: load{{.*}}%__O.addr, align 4
- //CHECK: load{{.*}}%__N.addr, align 4
- //CHECK: load{{.*}}%__M.addr, align 4
- //CHECK: load{{.*}}%__L.addr, align 4
- //CHECK: load{{.*}}%__K.addr, align 4
- //CHECK: load{{.*}}%__J.addr, align 4
- //CHECK: load{{.*}}%__I.addr, align 4
- //CHECK: load{{.*}}%__H.addr, align 4
- //CHECK: load{{.*}}%__G.addr, align 4
- //CHECK: load{{.*}}%__F.addr, align 4
- //CHECK: load{{.*}}%__E.addr, align 4
- //CHECK: load{{.*}}%__D.addr, align 4
- //CHECK: load{{.*}}%__C.addr, align 4
- //CHECK: load{{.*}}%__B.addr, align 4
- //CHECK: load{{.*}}%__A.addr, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
+ //CHECK: load{{.*}}%{{.*}}, align 4
  //CHECK: insertelement{{.*}}i32 0
  //CHECK: insertelement{{.*}}i32 1
  //CHECK: insertelement{{.*}}i32 2
@@ -10314,14 +10375,14 @@ __m512i test_mm512_setr_epi64 (long long __A, long long __B, long long __C,
                               long long __G, long long __H)
 {
     //CHECK-LABEL: @test_mm512_setr_epi64
-    //CHECK: load{{.*}}%__H.addr, align 8
-    //CHECK: load{{.*}}%__G.addr, align 8
-    //CHECK: load{{.*}}%__F.addr, align 8
-    //CHECK: load{{.*}}%__E.addr, align 8
-    //CHECK: load{{.*}}%__D.addr, align 8
-    //CHECK: load{{.*}}%__C.addr, align 8
-    //CHECK: load{{.*}}%__B.addr, align 8
-    //CHECK: load{{.*}}%__A.addr, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
     //CHECK: insertelement{{.*}}i32 0
     //CHECK: insertelement{{.*}}i32 1
     //CHECK: insertelement{{.*}}i32 2
@@ -10352,14 +10413,14 @@ __m512d test_mm512_setr_pd (double __A, double __B, double __C, double __D,
                            double __E, double __F, double __G, double __H)
 {
     //CHECK-LABEL: @test_mm512_setr_pd
-    //CHECK: load{{.*}}%__H.addr, align 8
-    //CHECK: load{{.*}}%__G.addr, align 8
-    //CHECK: load{{.*}}%__F.addr, align 8
-    //CHECK: load{{.*}}%__E.addr, align 8
-    //CHECK: load{{.*}}%__D.addr, align 8
-    //CHECK: load{{.*}}%__C.addr, align 8
-    //CHECK: load{{.*}}%__B.addr, align 8
-    //CHECK: load{{.*}}%__A.addr, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
+    //CHECK: load{{.*}}%{{.*}}, align 8
     //CHECK: insertelement{{.*}}i32 0
     //CHECK: insertelement{{.*}}i32 1
     //CHECK: insertelement{{.*}}i32 2
@@ -10443,22 +10504,22 @@ __m512 test_mm512_setr_ps (float __A, float __B, float __C, float __D,
                           float __M, float __N, float __O, float __P)
 {
     //CHECK-LABEL: @test_mm512_setr_ps
-    //CHECK: load{{.*}}%__P.addr, align 4
-    //CHECK: load{{.*}}%__O.addr, align 4
-    //CHECK: load{{.*}}%__N.addr, align 4
-    //CHECK: load{{.*}}%__M.addr, align 4
-    //CHECK: load{{.*}}%__L.addr, align 4
-    //CHECK: load{{.*}}%__K.addr, align 4
-    //CHECK: load{{.*}}%__J.addr, align 4
-    //CHECK: load{{.*}}%__I.addr, align 4
-    //CHECK: load{{.*}}%__H.addr, align 4
-    //CHECK: load{{.*}}%__G.addr, align 4
-    //CHECK: load{{.*}}%__F.addr, align 4
-    //CHECK: load{{.*}}%__E.addr, align 4
-    //CHECK: load{{.*}}%__D.addr, align 4
-    //CHECK: load{{.*}}%__C.addr, align 4
-    //CHECK: load{{.*}}%__B.addr, align 4
-    //CHECK: load{{.*}}%__A.addr, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
+    //CHECK: load{{.*}}%{{.*}}, align 4
     //CHECK: insertelement{{.*}}i32 0
     //CHECK: insertelement{{.*}}i32 1
     //CHECK: insertelement{{.*}}i32 2
diff --git a/test/CodeGen/avx512vbmi2-builtins.c b/test/CodeGen/avx512vbmi2-builtins.c
index 304561d9fa..d4812695e5 100644
--- a/test/CodeGen/avx512vbmi2-builtins.c
+++ b/test/CodeGen/avx512vbmi2-builtins.c
@@ -4,25 +4,25 @@
 
 __m512i test_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) {
   // CHECK-LABEL: @test_mm512_mask_compress_epi16
-  // CHECK: @llvm.x86.avx512.mask.compress.w.512
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm512_mask_compress_epi16(__S, __U, __D);
 }
 
 __m512i test_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) {
   // CHECK-LABEL: @test_mm512_maskz_compress_epi16
-  // CHECK: @llvm.x86.avx512.mask.compress.w.512
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm512_maskz_compress_epi16(__U, __D);
 }
 
 __m512i test_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) {
   // CHECK-LABEL: @test_mm512_mask_compress_epi8
-  // CHECK: @llvm.x86.avx512.mask.compress.b.512
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm512_mask_compress_epi8(__S, __U, __D);
 }
 
 __m512i test_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) {
   // CHECK-LABEL: @test_mm512_maskz_compress_epi8
-  // CHECK: @llvm.x86.avx512.mask.compress.b.512
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm512_maskz_compress_epi8(__U, __D);
 }
 
@@ -40,25 +40,25 @@ void test_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D)
 
 __m512i test_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) {
   // CHECK-LABEL: @test_mm512_mask_expand_epi16
-  // CHECK: @llvm.x86.avx512.mask.expand.w.512
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm512_mask_expand_epi16(__S, __U, __D);
 }
 
 __m512i test_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) {
   // CHECK-LABEL: @test_mm512_maskz_expand_epi16
-  // CHECK: @llvm.x86.avx512.mask.expand.w.512
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm512_maskz_expand_epi16(__U, __D);
 }
 
 __m512i test_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) {
   // CHECK-LABEL: @test_mm512_mask_expand_epi8
-  // CHECK: @llvm.x86.avx512.mask.expand.b.512
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm512_mask_expand_epi8(__S, __U, __D);
 }
 
 __m512i test_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) {
   // CHECK-LABEL: @test_mm512_maskz_expand_epi8
-  // CHECK: @llvm.x86.avx512.mask.expand.b.512
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm512_maskz_expand_epi8(__U, __D);
 }
 
diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c
index 5547ac9e2f..8c9e15d410 100644
--- a/test/CodeGen/avx512vl-builtins.c
+++ b/test/CodeGen/avx512vl-builtins.c
@@ -3675,82 +3675,82 @@ __m256i test_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) {
 }
 __m128d test_mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A) {
   // CHECK-LABEL: @test_mm_mask_compress_pd
-  // CHECK: @llvm.x86.avx512.mask.compress.pd.128
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm_mask_compress_pd(__W,__U,__A); 
 }
 __m128d test_mm_maskz_compress_pd(__mmask8 __U, __m128d __A) {
   // CHECK-LABEL: @test_mm_maskz_compress_pd
-  // CHECK: @llvm.x86.avx512.mask.compress.pd.128
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm_maskz_compress_pd(__U,__A); 
 }
 __m256d test_mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_mask_compress_pd
-  // CHECK: @llvm.x86.avx512.mask.compress.pd.256
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm256_mask_compress_pd(__W,__U,__A); 
 }
 __m256d test_mm256_maskz_compress_pd(__mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_maskz_compress_pd
-  // CHECK: @llvm.x86.avx512.mask.compress.pd.256
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm256_maskz_compress_pd(__U,__A); 
 }
 __m128i test_mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_compress_epi64
-  // CHECK: @llvm.x86.avx512.mask.compress.q.128
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm_mask_compress_epi64(__W,__U,__A); 
 }
 __m128i test_mm_maskz_compress_epi64(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_compress_epi64
-  // CHECK: @llvm.x86.avx512.mask.compress.q.128
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm_maskz_compress_epi64(__U,__A); 
 }
 __m256i test_mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_compress_epi64
-  // CHECK: @llvm.x86.avx512.mask.compress.q.256
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm256_mask_compress_epi64(__W,__U,__A); 
 }
 __m256i test_mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_compress_epi64
-  // CHECK: @llvm.x86.avx512.mask.compress.q.256
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm256_maskz_compress_epi64(__U,__A); 
 }
 __m128 test_mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_mask_compress_ps
-  // CHECK: @llvm.x86.avx512.mask.compress.ps.128
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm_mask_compress_ps(__W,__U,__A); 
 }
 __m128 test_mm_maskz_compress_ps(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_maskz_compress_ps
-  // CHECK: @llvm.x86.avx512.mask.compress.ps.128
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm_maskz_compress_ps(__U,__A); 
 }
 __m256 test_mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_mask_compress_ps
-  // CHECK: @llvm.x86.avx512.mask.compress.ps.256
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm256_mask_compress_ps(__W,__U,__A); 
 }
 __m256 test_mm256_maskz_compress_ps(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_maskz_compress_ps
-  // CHECK: @llvm.x86.avx512.mask.compress.ps.256
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm256_maskz_compress_ps(__U,__A); 
 }
 __m128i test_mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_compress_epi32
-  // CHECK: @llvm.x86.avx512.mask.compress.d.128
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm_mask_compress_epi32(__W,__U,__A); 
 }
 __m128i test_mm_maskz_compress_epi32(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_compress_epi32
-  // CHECK: @llvm.x86.avx512.mask.compress.d.128
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm_maskz_compress_epi32(__U,__A); 
 }
 __m256i test_mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_compress_epi32
-  // CHECK: @llvm.x86.avx512.mask.compress.d.256
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm256_mask_compress_epi32(__W,__U,__A); 
 }
 __m256i test_mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_compress_epi32
-  // CHECK: @llvm.x86.avx512.mask.compress.d.256
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm256_maskz_compress_epi32(__U,__A); 
 }
 void test_mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A) {
@@ -4222,42 +4222,42 @@ __m256 test_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
 }
 __m128d test_mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A) {
   // CHECK-LABEL: @test_mm_mask_expand_pd
-  // CHECK: @llvm.x86.avx512.mask.expand.pd.128
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm_mask_expand_pd(__W,__U,__A); 
 }
 __m128d test_mm_maskz_expand_pd(__mmask8 __U, __m128d __A) {
   // CHECK-LABEL: @test_mm_maskz_expand_pd
-  // CHECK: @llvm.x86.avx512.mask.expand.pd.128
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm_maskz_expand_pd(__U,__A); 
 }
 __m256d test_mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_mask_expand_pd
-  // CHECK: @llvm.x86.avx512.mask.expand.pd.256
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm256_mask_expand_pd(__W,__U,__A); 
 }
 __m256d test_mm256_maskz_expand_pd(__mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_maskz_expand_pd
-  // CHECK: @llvm.x86.avx512.mask.expand.pd.256
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm256_maskz_expand_pd(__U,__A); 
 }
 __m128i test_mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_expand_epi64
-  // CHECK: @llvm.x86.avx512.mask.expand.q.128
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm_mask_expand_epi64(__W,__U,__A); 
 }
 __m128i test_mm_maskz_expand_epi64(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_expand_epi64
-  // CHECK: @llvm.x86.avx512.mask.expand.q.128
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm_maskz_expand_epi64(__U,__A); 
 }
 __m256i test_mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_expand_epi64
-  // CHECK: @llvm.x86.avx512.mask.expand.q.256
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm256_mask_expand_epi64(__W,__U,__A); 
 }
 __m256i test_mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_expand_epi64
-  // CHECK: @llvm.x86.avx512.mask.expand.q.256
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm256_maskz_expand_epi64(__U,__A); 
 }
 __m128d test_mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P) {
@@ -4342,42 +4342,42 @@ __m256i test_mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P) {
 }
 __m128 test_mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_mask_expand_ps
-  // CHECK: @llvm.x86.avx512.mask.expand.ps.128
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm_mask_expand_ps(__W,__U,__A); 
 }
 __m128 test_mm_maskz_expand_ps(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_maskz_expand_ps
-  // CHECK: @llvm.x86.avx512.mask.expand.ps.128
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm_maskz_expand_ps(__U,__A); 
 }
 __m256 test_mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_mask_expand_ps
-  // CHECK: @llvm.x86.avx512.mask.expand.ps.256
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm256_mask_expand_ps(__W,__U,__A); 
 }
 __m256 test_mm256_maskz_expand_ps(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_maskz_expand_ps
-  // CHECK: @llvm.x86.avx512.mask.expand.ps.256
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm256_maskz_expand_ps(__U,__A); 
 }
 __m128i test_mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_expand_epi32
-  // CHECK: @llvm.x86.avx512.mask.expand.d.128
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm_mask_expand_epi32(__W,__U,__A); 
 }
 __m128i test_mm_maskz_expand_epi32(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_expand_epi32
-  // CHECK: @llvm.x86.avx512.mask.expand.d.128
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm_maskz_expand_epi32(__U,__A); 
 }
 __m256i test_mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_expand_epi32
-  // CHECK: @llvm.x86.avx512.mask.expand.d.256
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm256_mask_expand_epi32(__W,__U,__A); 
 }
 __m256i test_mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_expand_epi32
-  // CHECK: @llvm.x86.avx512.mask.expand.d.256
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm256_maskz_expand_epi32(__U,__A); 
 }
 __m128d test_mm_getexp_pd(__m128d __A) {
@@ -5058,162 +5058,162 @@ __m256 test_mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B) {
 }
 void test_mm_i64scatter_pd(double *__addr, __m128i __index,  __m128d __v1) {
   // CHECK-LABEL: @test_mm_i64scatter_pd
-  // CHECK: @llvm.x86.avx512.scatterdiv2.df
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv2.df
   return _mm_i64scatter_pd(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i64scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m128d __v1) {
   // CHECK-LABEL: @test_mm_mask_i64scatter_pd
-  // CHECK: @llvm.x86.avx512.scatterdiv2.df
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv2.df
   return _mm_mask_i64scatter_pd(__addr,__mask,__index,__v1,2); 
 }
 void test_mm_i64scatter_epi64(long long *__addr, __m128i __index,  __m128i __v1) {
   // CHECK-LABEL: @test_mm_i64scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatterdiv2.di
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv2.di
   return _mm_i64scatter_epi64(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i64scatter_epi64(long long *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
   // CHECK-LABEL: @test_mm_mask_i64scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatterdiv2.di
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv2.di
   return _mm_mask_i64scatter_epi64(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i64scatter_pd(double *__addr, __m256i __index,  __m256d __v1) {
   // CHECK-LABEL: @test_mm256_i64scatter_pd
-  // CHECK: @llvm.x86.avx512.scatterdiv4.df
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.df
   return _mm256_i64scatter_pd(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i64scatter_pd(double *__addr, __mmask8 __mask, __m256i __index, __m256d __v1) {
   // CHECK-LABEL: @test_mm256_mask_i64scatter_pd
-  // CHECK: @llvm.x86.avx512.scatterdiv4.df
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.df
   return _mm256_mask_i64scatter_pd(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i64scatter_epi64(long long *__addr, __m256i __index,  __m256i __v1) {
   // CHECK-LABEL: @test_mm256_i64scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatterdiv4.di
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.di
   return _mm256_i64scatter_epi64(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i64scatter_epi64(long long *__addr, __mmask8 __mask,  __m256i __index, __m256i __v1) {
   // CHECK-LABEL: @test_mm256_mask_i64scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatterdiv4.di
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.di
   return _mm256_mask_i64scatter_epi64(__addr,__mask,__index,__v1,2); 
 }
 void test_mm_i64scatter_ps(float *__addr, __m128i __index, __m128 __v1) {
   // CHECK-LABEL: @test_mm_i64scatter_ps
-  // CHECK: @llvm.x86.avx512.scatterdiv4.sf
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.sf
   return _mm_i64scatter_ps(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i64scatter_ps(float *__addr, __mmask8 __mask, __m128i __index, __m128 __v1) {
   // CHECK-LABEL: @test_mm_mask_i64scatter_ps
-  // CHECK: @llvm.x86.avx512.scatterdiv4.sf
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.sf
   return _mm_mask_i64scatter_ps(__addr,__mask,__index,__v1,2); 
 }
 void test_mm_i64scatter_epi32(int *__addr, __m128i __index,  __m128i __v1) {
   // CHECK-LABEL: @test_mm_i64scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatterdiv4.si
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.si
   return _mm_i64scatter_epi32(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i64scatter_epi32(int *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
   // CHECK-LABEL: @test_mm_mask_i64scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatterdiv4.si
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.si
   return _mm_mask_i64scatter_epi32(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i64scatter_ps(float *__addr, __m256i __index,  __m128 __v1) {
   // CHECK-LABEL: @test_mm256_i64scatter_ps
-  // CHECK: @llvm.x86.avx512.scatterdiv8.sf
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv8.sf
   return _mm256_i64scatter_ps(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i64scatter_ps(float *__addr, __mmask8 __mask, __m256i __index, __m128 __v1) {
   // CHECK-LABEL: @test_mm256_mask_i64scatter_ps
-  // CHECK: @llvm.x86.avx512.scatterdiv8.sf
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv8.sf
   return _mm256_mask_i64scatter_ps(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i64scatter_epi32(int *__addr, __m256i __index,  __m128i __v1) {
   // CHECK-LABEL: @test_mm256_i64scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatterdiv8.si
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv8.si
   return _mm256_i64scatter_epi32(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i64scatter_epi32(int *__addr, __mmask8 __mask,  __m256i __index, __m128i __v1) {
   // CHECK-LABEL: @test_mm256_mask_i64scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatterdiv8.si
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv8.si
   return _mm256_mask_i64scatter_epi32(__addr,__mask,__index,__v1,2); 
 }
 void test_mm_i32scatter_pd(double *__addr, __m128i __index,  __m128d __v1) {
   // CHECK-LABEL: @test_mm_i32scatter_pd
-  // CHECK: @llvm.x86.avx512.scattersiv2.df
+  // CHECK: @llvm.x86.avx512.mask.scattersiv2.df
   return _mm_i32scatter_pd(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i32scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m128d __v1) {
   // CHECK-LABEL: @test_mm_mask_i32scatter_pd
-  // CHECK: @llvm.x86.avx512.scattersiv2.df
+  // CHECK: @llvm.x86.avx512.mask.scattersiv2.df
   return _mm_mask_i32scatter_pd(__addr,__mask,__index,__v1,2); 
 }
 void test_mm_i32scatter_epi64(long long *__addr, __m128i __index,  __m128i __v1) {
   // CHECK-LABEL: @test_mm_i32scatter_epi64
-  // CHECK: @llvm.x86.avx512.scattersiv2.di
+  // CHECK: @llvm.x86.avx512.mask.scattersiv2.di
   return _mm_i32scatter_epi64(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i32scatter_epi64(long long *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
   // CHECK-LABEL: @test_mm_mask_i32scatter_epi64
-  // CHECK: @llvm.x86.avx512.scattersiv2.di
+  // CHECK: @llvm.x86.avx512.mask.scattersiv2.di
   return _mm_mask_i32scatter_epi64(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i32scatter_pd(double *__addr, __m128i __index,  __m256d __v1) {
   // CHECK-LABEL: @test_mm256_i32scatter_pd
-  // CHECK: @llvm.x86.avx512.scattersiv4.df
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.df
   return _mm256_i32scatter_pd(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i32scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m256d __v1) {
   // CHECK-LABEL: @test_mm256_mask_i32scatter_pd
-  // CHECK: @llvm.x86.avx512.scattersiv4.df
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.df
   return _mm256_mask_i32scatter_pd(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i32scatter_epi64(long long *__addr, __m128i __index,  __m256i __v1) {
   // CHECK-LABEL: @test_mm256_i32scatter_epi64
-  // CHECK: @llvm.x86.avx512.scattersiv4.di
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.di
   return _mm256_i32scatter_epi64(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i32scatter_epi64(long long *__addr, __mmask8 __mask,  __m128i __index, __m256i __v1) {
   // CHECK-LABEL: @test_mm256_mask_i32scatter_epi64
-  // CHECK: @llvm.x86.avx512.scattersiv4.di
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.di
   return _mm256_mask_i32scatter_epi64(__addr,__mask,__index,__v1,2); 
 }
 void test_mm_i32scatter_ps(float *__addr, __m128i __index, __m128 __v1) {
   // CHECK-LABEL: @test_mm_i32scatter_ps
-  // CHECK: @llvm.x86.avx512.scattersiv4.sf
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.sf
   return _mm_i32scatter_ps(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i32scatter_ps(float *__addr, __mmask8 __mask, __m128i __index, __m128 __v1) {
   // CHECK-LABEL: @test_mm_mask_i32scatter_ps
-  // CHECK: @llvm.x86.avx512.scattersiv4.sf
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.sf
   return _mm_mask_i32scatter_ps(__addr,__mask,__index,__v1,2); 
 }
 void test_mm_i32scatter_epi32(int *__addr, __m128i __index,  __m128i __v1) {
   // CHECK-LABEL: @test_mm_i32scatter_epi32
-  // CHECK: @llvm.x86.avx512.scattersiv4.si
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.si
   return _mm_i32scatter_epi32(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i32scatter_epi32(int *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
   // CHECK-LABEL: @test_mm_mask_i32scatter_epi32
-  // CHECK: @llvm.x86.avx512.scattersiv4.si
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.si
   return _mm_mask_i32scatter_epi32(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i32scatter_ps(float *__addr, __m256i __index,  __m256 __v1) {
   // CHECK-LABEL: @test_mm256_i32scatter_ps
-  // CHECK: @llvm.x86.avx512.scattersiv8.sf
+  // CHECK: @llvm.x86.avx512.mask.scattersiv8.sf
   return _mm256_i32scatter_ps(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i32scatter_ps(float *__addr, __mmask8 __mask, __m256i __index, __m256 __v1) {
   // CHECK-LABEL: @test_mm256_mask_i32scatter_ps
-  // CHECK: @llvm.x86.avx512.scattersiv8.sf
+  // CHECK: @llvm.x86.avx512.mask.scattersiv8.sf
   return _mm256_mask_i32scatter_ps(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i32scatter_epi32(int *__addr, __m256i __index,  __m256i __v1) {
   // CHECK-LABEL: @test_mm256_i32scatter_epi32
-  // CHECK: @llvm.x86.avx512.scattersiv8.si
+  // CHECK: @llvm.x86.avx512.mask.scattersiv8.si
   return _mm256_i32scatter_epi32(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i32scatter_epi32(int *__addr, __mmask8 __mask,  __m256i __index, __m256i __v1) {
   // CHECK-LABEL: @test_mm256_mask_i32scatter_epi32
-  // CHECK: @llvm.x86.avx512.scattersiv8.si
+  // CHECK: @llvm.x86.avx512.mask.scattersiv8.si
   return _mm256_mask_i32scatter_epi32(__addr,__mask,__index,__v1,2); 
 }
 __m128d test_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
@@ -9280,97 +9280,97 @@ __m256 test_mm256_maskz_getmant_ps(__mmask8 __U, __m256 __A) {
 
 __m128d test_mm_mmask_i64gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mmask_i64gather_pd
-  // CHECK: @llvm.x86.avx512.gather3div2.df
+  // CHECK: @llvm.x86.avx512.mask.gather3div2.df
   return _mm_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128i test_mm_mmask_i64gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mmask_i64gather_epi64
-  // CHECK: @llvm.x86.avx512.gather3div2.di
+  // CHECK: @llvm.x86.avx512.mask.gather3div2.di
   return _mm_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m256d test_mm256_mmask_i64gather_pd(__m256d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mmask_i64gather_pd
-  // CHECK: @llvm.x86.avx512.gather3div4.df
+  // CHECK: @llvm.x86.avx512.mask.gather3div4.df
   return _mm256_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m256i test_mm256_mmask_i64gather_epi64(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mmask_i64gather_epi64
-  // CHECK: @llvm.x86.avx512.gather3div4.di
+  // CHECK: @llvm.x86.avx512.mask.gather3div4.di
   return _mm256_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128 test_mm_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mmask_i64gather_ps
-  // CHECK: @llvm.x86.avx512.gather3div4.sf
+  // CHECK: @llvm.x86.avx512.mask.gather3div4.sf
   return _mm_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128i test_mm_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mmask_i64gather_epi32
-  // CHECK: @llvm.x86.avx512.gather3div4.si
+  // CHECK: @llvm.x86.avx512.mask.gather3div4.si
   return _mm_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128 test_mm256_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mmask_i64gather_ps
-  // CHECK: @llvm.x86.avx512.gather3div8.sf
+  // CHECK: @llvm.x86.avx512.mask.gather3div8.sf
   return _mm256_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128i test_mm256_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mmask_i64gather_epi32
-  // CHECK: @llvm.x86.avx512.gather3div8.si
+  // CHECK: @llvm.x86.avx512.mask.gather3div8.si
   return _mm256_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128d test_mm_mask_i32gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mask_i32gather_pd
-  // CHECK: @llvm.x86.avx512.gather3siv2.df
+  // CHECK: @llvm.x86.avx512.mask.gather3siv2.df
   return _mm_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128i test_mm_mask_i32gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mask_i32gather_epi64
-  // CHECK: @llvm.x86.avx512.gather3siv2.di
+  // CHECK: @llvm.x86.avx512.mask.gather3siv2.di
   return _mm_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m256d test_mm256_mask_i32gather_pd(__m256d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mask_i32gather_pd
-  // CHECK: @llvm.x86.avx512.gather3siv4.df
+  // CHECK: @llvm.x86.avx512.mask.gather3siv4.df
   return _mm256_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m256i test_mm256_mask_i32gather_epi64(__m256i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mask_i32gather_epi64
-  // CHECK: @llvm.x86.avx512.gather3siv4.di
+  // CHECK: @llvm.x86.avx512.mask.gather3siv4.di
   return _mm256_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128 test_mm_mask_i32gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mask_i32gather_ps
-  // CHECK: @llvm.x86.avx512.gather3siv4.sf
+  // CHECK: @llvm.x86.avx512.mask.gather3siv4.sf
   return _mm_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128i test_mm_mask_i32gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mask_i32gather_epi32
-  // CHECK: @llvm.x86.avx512.gather3siv4.si
+  // CHECK: @llvm.x86.avx512.mask.gather3siv4.si
   return _mm_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m256 test_mm256_mask_i32gather_ps(__m256 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mask_i32gather_ps
-  // CHECK: @llvm.x86.avx512.gather3siv8.sf
+  // CHECK: @llvm.x86.avx512.mask.gather3siv8.sf
   return _mm256_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mask_i32gather_epi32
-  // CHECK: @llvm.x86.avx512.gather3siv8.si
+  // CHECK: @llvm.x86.avx512.mask.gather3siv8.si
   return _mm256_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); 
 }
 
diff --git a/test/CodeGen/avx512vlbf16-builtins.c b/test/CodeGen/avx512vlbf16-builtins.c
new file mode 100644
index 0000000000..db24d57c67
--- /dev/null
+++ b/test/CodeGen/avx512vlbf16-builtins.c
@@ -0,0 +1,163 @@
+//  RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin \
+//  RUN:            -target-feature +avx512bf16 -target-feature \
+//  RUN:            +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m128bh test_mm_cvtne2ps2bf16(__m128 A, __m128 B) {
+  // CHECK-LABEL: @test_mm_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_cvtne2ps_pbh(A, B);
+}
+
+__m128bh test_mm_maskz_cvtne2ps2bf16(__m128 A, __m128 B, __mmask8 U) {
+  // CHECK-LABEL: @test_mm_maskz_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_maskz_cvtne2ps_pbh(U, A, B);
+}
+
+__m128bh test_mm_mask_cvtne2ps2bf16(__m128bh C, __mmask8 U, __m128 A, __m128 B) {
+  // CHECK-LABEL: @test_mm_mask_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_mask_cvtne2ps_pbh(C, U, A, B);
+}
+
+__m256bh test_mm256_cvtne2ps2bf16(__m256 A, __m256 B) {
+  // CHECK-LABEL: @test_mm256_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
+  // CHECK: ret <16 x i16> %{{.*}}
+  return _mm256_cvtne2ps_pbh(A, B);
+}
+
+__m256bh test_mm256_maskz_cvtne2ps2bf16(__m256 A, __m256 B, __mmask16 U) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+  // CHECK: ret <16 x i16> %{{.*}}
+  return _mm256_maskz_cvtne2ps_pbh(U, A, B);
+}
+
+__m256bh test_mm256_mask_cvtne2ps2bf16(__m256bh C, __mmask16 U, __m256 A, __m256 B) {
+  // CHECK-LABEL: @test_mm256_mask_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+  // CHECK: ret <16 x i16> %{{.*}}
+  return _mm256_mask_cvtne2ps_pbh(C, U, A, B);
+}
+
+__m512bh test_mm512_cvtne2ps2bf16(__m512 A, __m512 B) {
+  // CHECK-LABEL: @test_mm512_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
+  // CHECK: ret <32 x i16> %{{.*}}
+  return _mm512_cvtne2ps_pbh(A, B);
+}
+
+__m512bh test_mm512_maskz_cvtne2ps2bf16(__m512 A, __m512 B, __mmask32 U) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  // CHECK: ret <32 x i16> %{{.*}}
+  return _mm512_maskz_cvtne2ps_pbh(U, A, B);
+}
+
+__m512bh test_mm512_mask_cvtne2ps2bf16(__m512bh C, __mmask32 U, __m512 A, __m512 B) {
+  // CHECK-LABEL: @test_mm512_mask_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  // CHECK: ret <32 x i16> %{{.*}}
+  return _mm512_mask_cvtne2ps_pbh(C, U, A, B);
+}
+
+__m128bh test_mm_cvtneps2bf16(__m128 A) {
+  // CHECK-LABEL: @test_mm_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_cvtneps_pbh(A);
+}
+
+__m128bh test_mm_mask_cvtneps2bf16(__m128bh C, __mmask8 U, __m128 A) {
+  // CHECK-LABEL: @test_mm_mask_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_mask_cvtneps_pbh(C, U, A);
+}
+
+__m128bh test_mm_maskz_cvtneps2bf16(__m128 A, __mmask8 U) {
+  // CHECK-LABEL: @test_mm_maskz_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_maskz_cvtneps_pbh(U, A);
+}
+
+__m128bh test_mm256_cvtneps2bf16(__m256 A) {
+  // CHECK-LABEL: @test_mm256_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.256
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm256_cvtneps_pbh(A);
+}
+
+__m128bh test_mm256_mask_cvtneps2bf16(__m128bh C, __mmask8 U, __m256 A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.256
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm256_mask_cvtneps_pbh(C, U, A);
+}
+
+__m128bh test_mm256_maskz_cvtneps2bf16(__m256 A, __mmask8 U) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.256
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm256_maskz_cvtneps_pbh(U, A);
+}
+
+__m128 test_mm_dpbf16_ps(__m128 D, __m128bh A, __m128bh B) {
+  // CHECK-LABEL: @test_mm_dpbf16_ps
+  // CHECK: @llvm.x86.avx512bf16.dpbf16ps.128
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_dpbf16_ps(D, A, B);
+}
+
+__m128 test_mm_maskz_dpbf16_ps(__m128 D, __m128bh A, __m128bh B, __mmask8 U) {
+  // CHECK-LABEL: @test_mm_maskz_dpbf16_ps
+  // CHECK: @llvm.x86.avx512bf16.dpbf16ps.128
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_maskz_dpbf16_ps(U, D, A, B);
+}
+
+__m128 test_mm_mask_dpbf16_ps(__m128 D, __m128bh A, __m128bh B, __mmask8 U) {
+  // CHECK-LABEL: @test_mm_mask_dpbf16_ps
+  // CHECK: @llvm.x86.avx512bf16.dpbf16ps.128
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_mask_dpbf16_ps(D, U, A, B);
+}
+__m256 test_mm256_dpbf16_ps(__m256 D, __m256bh A, __m256bh B) {
+  // CHECK-LABEL: @test_mm256_dpbf16_ps
+  // CHECK: @llvm.x86.avx512bf16.dpbf16ps.256
+  // CHECK: ret <8 x float> %{{.*}}
+  return _mm256_dpbf16_ps(D, A, B);
+}
+
+__m256 test_mm256_maskz_dpbf16_ps(__m256 D, __m256bh A, __m256bh B, __mmask8 U) {
+  // CHECK-LABEL: @test_mm256_maskz_dpbf16_ps
+  // CHECK: @llvm.x86.avx512bf16.dpbf16ps.256
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  // CHECK: ret <8 x float> %{{.*}}
+  return _mm256_maskz_dpbf16_ps(U, D, A, B);
+}
+
+__m256 test_mm256_mask_dpbf16_ps(__m256 D, __m256bh A, __m256bh B, __mmask8 U) {
+  // CHECK-LABEL: @test_mm256_mask_dpbf16_ps
+  // CHECK: @llvm.x86.avx512bf16.dpbf16ps.256
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  // CHECK: ret <8 x float> %{{.*}}
+  return _mm256_mask_dpbf16_ps(D, U, A, B);
+}
diff --git a/test/CodeGen/avx512vlbw-builtins.c b/test/CodeGen/avx512vlbw-builtins.c
index 80efe72787..1d853844be 100644
--- a/test/CodeGen/avx512vlbw-builtins.c
+++ b/test/CodeGen/avx512vlbw-builtins.c
@@ -1179,101 +1179,49 @@ __m256i test_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
 }
 __m128i test_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_avg_epu8
-  // CHECK-NOT: @llvm.x86.sse2.pavg.b
-  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
-  // CHECK: add <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: lshr <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
+  // CHECK: @llvm.x86.sse2.pavg.b
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_mask_avg_epu8(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_avg_epu8
-  // CHECK-NOT: @llvm.x86.sse2.pavg.b
-  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
-  // CHECK: add <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: lshr <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
-  // CHECK: store <2 x i64> zeroinitializer
+  // CHECK: @llvm.x86.sse2.pavg.b
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_maskz_avg_epu8(__U,__A,__B); 
 }
 __m256i test_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_avg_epu8
-  // CHECK-NOT: @llvm.x86.avx2.pavg.b
-  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
-  // CHECK: add <32 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: lshr <32 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
+  // CHECK: @llvm.x86.avx2.pavg.b
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_mask_avg_epu8(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_avg_epu8
-  // CHECK-NOT: @llvm.x86.avx2.pavg.b
-  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
-  // CHECK: add <32 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: lshr <32 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
-  // CHECK: store <4 x i64> zeroinitializer
+  // CHECK: @llvm.x86.avx2.pavg.b
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_maskz_avg_epu8(__U,__A,__B); 
 }
 __m128i test_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_avg_epu16
-  // CHECK-NOT: @llvm.x86.sse2.pavg.w
-  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
-  // CHECK: add <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: lshr <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
+  // CHECK: @llvm.x86.sse2.pavg.w
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_avg_epu16(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_avg_epu16
-  // CHECK-NOT: @llvm.x86.sse2.pavg.w
-  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
-  // CHECK: add <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: lshr <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
-  // CHECK: store <2 x i64> zeroinitializer
+  // CHECK: @llvm.x86.sse2.pavg.w
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_avg_epu16(__U,__A,__B); 
 }
 __m256i test_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_avg_epu16
-  // CHECK-NOT: @llvm.x86.avx2.pavg.w
-  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
-  // CHECK: add <16 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: lshr <16 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
+  // CHECK: @llvm.x86.avx2.pavg.w
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_avg_epu16(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_avg_epu16
-  // CHECK-NOT: @llvm.x86.avx2.pavg.w
-  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
-  // CHECK: add <16 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: lshr <16 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
-  // CHECK: store <4 x i64> zeroinitializer
+  // CHECK: @llvm.x86.avx2.pavg.w
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_avg_epu16(__U,__A,__B); 
 }
diff --git a/test/CodeGen/avx512vlcd-builtins.c b/test/CodeGen/avx512vlcd-builtins.c
index 376a342f76..e9330a52da 100644
--- a/test/CodeGen/avx512vlcd-builtins.c
+++ b/test/CodeGen/avx512vlcd-builtins.c
@@ -57,73 +57,81 @@ __m256i test_mm256_broadcastmw_epi32(__m512i a, __m512i b) {
 
 __m128i test_mm_conflict_epi64(__m128i __A) {
   // CHECK-LABEL: @test_mm_conflict_epi64
-  // CHECK: @llvm.x86.avx512.mask.conflict.q.128
+  // CHECK: @llvm.x86.avx512.conflict.q.128
   return _mm_conflict_epi64(__A); 
 }
 
 __m128i test_mm_mask_conflict_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_conflict_epi64
-  // CHECK: @llvm.x86.avx512.mask.conflict.q.128
+  // CHECK: @llvm.x86.avx512.conflict.q.128
+  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_mask_conflict_epi64(__W, __U, __A); 
 }
 
 __m128i test_mm_maskz_conflict_epi64(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_conflict_epi64
-  // CHECK: @llvm.x86.avx512.mask.conflict.q.128
+  // CHECK: @llvm.x86.avx512.conflict.q.128
+  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_maskz_conflict_epi64(__U, __A); 
 }
 
 __m256i test_mm256_conflict_epi64(__m256i __A) {
   // CHECK-LABEL: @test_mm256_conflict_epi64
-  // CHECK: @llvm.x86.avx512.mask.conflict.q.256
+  // CHECK: @llvm.x86.avx512.conflict.q.256
   return _mm256_conflict_epi64(__A); 
 }
 
 __m256i test_mm256_mask_conflict_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_conflict_epi64
-  // CHECK: @llvm.x86.avx512.mask.conflict.q.256
+  // CHECK: @llvm.x86.avx512.conflict.q.256
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_mask_conflict_epi64(__W, __U, __A); 
 }
 
 __m256i test_mm256_maskz_conflict_epi64(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_conflict_epi64
-  // CHECK: @llvm.x86.avx512.mask.conflict.q.256
+  // CHECK: @llvm.x86.avx512.conflict.q.256
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_maskz_conflict_epi64(__U, __A); 
 }
 
 __m128i test_mm_conflict_epi32(__m128i __A) {
   // CHECK-LABEL: @test_mm_conflict_epi32
-  // CHECK: @llvm.x86.avx512.mask.conflict.d.128
+  // CHECK: @llvm.x86.avx512.conflict.d.128
   return _mm_conflict_epi32(__A); 
 }
 
 __m128i test_mm_mask_conflict_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_conflict_epi32
-  // CHECK: @llvm.x86.avx512.mask.conflict.d.128
+  // CHECK: @llvm.x86.avx512.conflict.d.128
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_conflict_epi32(__W, __U, __A); 
 }
 
 __m128i test_mm_maskz_conflict_epi32(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_conflict_epi32
-  // CHECK: @llvm.x86.avx512.mask.conflict.d.128
+  // CHECK: @llvm.x86.avx512.conflict.d.128
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_maskz_conflict_epi32(__U, __A); 
 }
 
 __m256i test_mm256_conflict_epi32(__m256i __A) {
   // CHECK-LABEL: @test_mm256_conflict_epi32
-  // CHECK: @llvm.x86.avx512.mask.conflict.d.256
+  // CHECK: @llvm.x86.avx512.conflict.d.256
   return _mm256_conflict_epi32(__A); 
 }
 
 __m256i test_mm256_mask_conflict_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_conflict_epi32
-  // CHECK: @llvm.x86.avx512.mask.conflict.d.256
+  // CHECK: @llvm.x86.avx512.conflict.d.256
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_conflict_epi32(__W, __U, __A); 
 }
 
 __m256i test_mm256_maskz_conflict_epi32(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_conflict_epi32
-  // CHECK: @llvm.x86.avx512.mask.conflict.d.256
+  // CHECK: @llvm.x86.avx512.conflict.d.256
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_conflict_epi32(__U, __A); 
 }
 
diff --git a/test/CodeGen/avx512vldq-builtins.c b/test/CodeGen/avx512vldq-builtins.c
index b21b665eb9..8bb209862f 100644
--- a/test/CodeGen/avx512vldq-builtins.c
+++ b/test/CodeGen/avx512vldq-builtins.c
@@ -479,19 +479,21 @@ __m128 test_mm_maskz_cvtepi64_ps(__mmask8 __U, __m128i __A) {
 
 __m128 test_mm256_cvtepi64_ps(__m256i __A) {
   // CHECK-LABEL: @test_mm256_cvtepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.256
+  // CHECK: sitofp <4 x i64> %{{.*}} to <4 x float>
   return _mm256_cvtepi64_ps(__A); 
 }
 
 __m128 test_mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_cvtepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.256
+  // CHECK: sitofp <4 x i64> %{{.*}} to <4 x float>
+  // select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm256_mask_cvtepi64_ps(__W, __U, __A); 
 }
 
 __m128 test_mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_cvtepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.256
+  // CHECK: sitofp <4 x i64> %{{.*}} to <4 x float>
+  // select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm256_maskz_cvtepi64_ps(__U, __A); 
 }
 
@@ -699,19 +701,21 @@ __m128 test_mm_maskz_cvtepu64_ps(__mmask8 __U, __m128i __A) {
 
 __m128 test_mm256_cvtepu64_ps(__m256i __A) {
   // CHECK-LABEL: @test_mm256_cvtepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.256
+  // CHECK: uitofp <4 x i64> %{{.*}} to <4 x float>
   return _mm256_cvtepu64_ps(__A); 
 }
 
 __m128 test_mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_cvtepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.256
+  // CHECK: uitofp <4 x i64> %{{.*}} to <4 x float>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm256_mask_cvtepu64_ps(__W, __U, __A); 
 }
 
 __m128 test_mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_cvtepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.256
+  // CHECK: uitofp <4 x i64> %{{.*}} to <4 x float>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm256_maskz_cvtepu64_ps(__U, __A); 
 }
 
diff --git a/test/CodeGen/avx512vlvbmi2-builtins.c b/test/CodeGen/avx512vlvbmi2-builtins.c
index b512a728a5..de3b7ed834 100644
--- a/test/CodeGen/avx512vlvbmi2-builtins.c
+++ b/test/CodeGen/avx512vlvbmi2-builtins.c
@@ -4,25 +4,25 @@
 
 __m128i test_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) {
   // CHECK-LABEL: @test_mm_mask_compress_epi16
-  // CHECK: @llvm.x86.avx512.mask.compress.w.128
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm_mask_compress_epi16(__S, __U, __D);
 }
 
 __m128i test_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) {
   // CHECK-LABEL: @test_mm_maskz_compress_epi16
-  // CHECK: @llvm.x86.avx512.mask.compress.w.128
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm_maskz_compress_epi16(__U, __D);
 }
 
 __m128i test_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) {
   // CHECK-LABEL: @test_mm_mask_compress_epi8
-  // CHECK: @llvm.x86.avx512.mask.compress.b.128
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm_mask_compress_epi8(__S, __U, __D);
 }
 
 __m128i test_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) {
   // CHECK-LABEL: @test_mm_maskz_compress_epi8
-  // CHECK: @llvm.x86.avx512.mask.compress.b.128
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm_maskz_compress_epi8(__U, __D);
 }
 
@@ -40,25 +40,25 @@ void test_mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) {
 
 __m128i test_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) {
   // CHECK-LABEL: @test_mm_mask_expand_epi16
-  // CHECK: @llvm.x86.avx512.mask.expand.w.128
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm_mask_expand_epi16(__S, __U, __D);
 }
 
 __m128i test_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) {
   // CHECK-LABEL: @test_mm_maskz_expand_epi16
-  // CHECK: @llvm.x86.avx512.mask.expand.w.128
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm_maskz_expand_epi16(__U, __D);
 }
 
 __m128i test_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) {
   // CHECK-LABEL: @test_mm_mask_expand_epi8
-  // CHECK: @llvm.x86.avx512.mask.expand.b.128
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm_mask_expand_epi8(__S, __U, __D);
 }
 
 __m128i test_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) {
   // CHECK-LABEL: @test_mm_maskz_expand_epi8
-  // CHECK: @llvm.x86.avx512.mask.expand.b.128
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm_maskz_expand_epi8(__U, __D);
 }
 
@@ -88,25 +88,25 @@ __m128i test_mm_maskz_expandloadu_epi8(__mmask16 __U, void const* __P) {
 
 __m256i test_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) {
   // CHECK-LABEL: @test_mm256_mask_compress_epi16
-  // CHECK: @llvm.x86.avx512.mask.compress.w.256
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm256_mask_compress_epi16(__S, __U, __D);
 }
 
 __m256i test_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) {
   // CHECK-LABEL: @test_mm256_maskz_compress_epi16
-  // CHECK: @llvm.x86.avx512.mask.compress.w.256
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm256_maskz_compress_epi16(__U, __D);
 }
 
 __m256i test_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) {
   // CHECK-LABEL: @test_mm256_mask_compress_epi8
-  // CHECK: @llvm.x86.avx512.mask.compress.b.256
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm256_mask_compress_epi8(__S, __U, __D);
 }
 
 __m256i test_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) {
   // CHECK-LABEL: @test_mm256_maskz_compress_epi8
-  // CHECK: @llvm.x86.avx512.mask.compress.b.256
+  // CHECK: @llvm.x86.avx512.mask.compress
   return _mm256_maskz_compress_epi8(__U, __D);
 }
 
@@ -124,25 +124,25 @@ void test_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D)
 
 __m256i test_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) {
   // CHECK-LABEL: @test_mm256_mask_expand_epi16
-  // CHECK: @llvm.x86.avx512.mask.expand.w.256
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm256_mask_expand_epi16(__S, __U, __D);
 }
 
 __m256i test_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) {
   // CHECK-LABEL: @test_mm256_maskz_expand_epi16
-  // CHECK: @llvm.x86.avx512.mask.expand.w.256
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm256_maskz_expand_epi16(__U, __D);
 }
 
 __m256i test_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) {
   // CHECK-LABEL: @test_mm256_mask_expand_epi8
-  // CHECK: @llvm.x86.avx512.mask.expand.b.256
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm256_mask_expand_epi8(__S, __U, __D);
 }
 
 __m256i test_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) {
   // CHECK-LABEL: @test_mm256_maskz_expand_epi8
-  // CHECK: @llvm.x86.avx512.mask.expand.b.256
+  // CHECK: @llvm.x86.avx512.mask.expand
   return _mm256_maskz_expand_epi8(__U, __D);
 }
 
diff --git a/test/CodeGen/bitscan-builtins.c b/test/CodeGen/bitscan-builtins.c
index 25dfa40462..176d829127 100644
--- a/test/CodeGen/bitscan-builtins.c
+++ b/test/CodeGen/bitscan-builtins.c
@@ -3,18 +3,45 @@
 // PR33722
 // RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-unknown -fms-extensions -fms-compatibility-version=19.00 -emit-llvm -o - %s | FileCheck %s
 
-#include <immintrin.h>
+#include <x86intrin.h>
 
 int test_bit_scan_forward(int a) {
   return _bit_scan_forward(a);
 // CHECK: @test_bit_scan_forward
-// CHECK: %[[call:.*]] = call i32 @llvm.cttz.i32(
+// CHECK: %[[call:.*]] = call i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true)
 // CHECK: ret i32 %[[call]]
 }
 
 int test_bit_scan_reverse(int a) {
   return _bit_scan_reverse(a);
-// CHECK:  %[[call:.*]] = call i32 @llvm.ctlz.i32(
+// CHECK:  %[[call:.*]] = call i32 @llvm.ctlz.i32(i32 %{{.*}}, i1 true)
 // CHECK:  %[[sub:.*]] = sub nsw i32 31, %[[call]]
 // CHECK: ret i32 %[[sub]]
 }
+
+int test__bsfd(int X) {
+// CHECK: @test__bsfd
+// CHECK: %[[call:.*]] = call i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true)
+  return __bsfd(X);
+}
+
+int test__bsfq(long long X) {
+// CHECK: @test__bsfq
+// CHECK: %[[call:.*]] = call i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true)
+  return __bsfq(X);
+}
+
+int test__bsrd(int X) {
+// CHECK: @test__bsrd
+// CHECK:  %[[call:.*]] = call i32 @llvm.ctlz.i32(i32 %{{.*}}, i1 true)
+// CHECK:  %[[sub:.*]] = sub nsw i32 31, %[[call]]
+  return __bsrd(X);
+}
+
+int test__bsrq(long long X) {
+// CHECK: @test__bsrq
+// CHECK:  %[[call:.*]] = call i64 @llvm.ctlz.i64(i64 %{{.*}}, i1 true)
+// CHECK:  %[[cast:.*]] = trunc i64 %[[call]] to i32
+// CHECK:  %[[sub:.*]] = sub nsw i32 63, %[[cast]]
+  return __bsrq(X);
+}
diff --git a/test/CodeGen/blocks-1.c b/test/CodeGen/blocks-1.c
index 8589a7bedf..0df13790dd 100644
--- a/test/CodeGen/blocks-1.c
+++ b/test/CodeGen/blocks-1.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -o %t -fblocks
+// RUN: %clang_cc1 -triple thumbv7-apple-ios %s -emit-llvm -o %t -fblocks
 // RUN: grep "_Block_object_dispose" %t | count 12
 // RUN: grep "__copy_helper_block_" %t | count 9
 // RUN: grep "__destroy_helper_block_" %t | count 9
@@ -7,6 +7,15 @@
 // RUN: grep "i32 135)" %t | count 2
 // RUN: grep "_Block_object_assign" %t | count 5
 
+// RUN: %clang_cc1 -triple thumbv7-unknown-windows %s -emit-llvm -o %t -fblocks
+// RUN: grep "_Block_object_dispose" %t | count 12
+// RUN: grep "__copy_helper_block_" %t | count 11
+// RUN: grep "__destroy_helper_block_" %t | count 11
+// RUN: grep "__Block_byref_object_copy_" %t | count 2
+// RUN: grep "__Block_byref_object_dispose_" %t | count 2
+// RUN: grep "i32 135)" %t | count 2
+// RUN: grep "_Block_object_assign" %t | count 5
+
 int printf(const char *, ...);
 
 void test1() {
diff --git a/test/CodeGen/builtin-constant-p.c b/test/CodeGen/builtin-constant-p.c
index f1cd06ad4a..36c45988d6 100644
--- a/test/CodeGen/builtin-constant-p.c
+++ b/test/CodeGen/builtin-constant-p.c
@@ -177,3 +177,11 @@ void test17() {
   // CHECK: call void asm sideeffect "", {{.*}}(i32 -1) 
   __asm__ __volatile__("" :: "n"( (__builtin_constant_p(test17_v) || 0) ? 1 : -1));
 }
+
+int test18_f();
+// CHECK: define void @test18
+// CHECK-NOT: call {{.*}}test18_f
+void test18() {
+  int a, b;
+  (void)__builtin_constant_p((a = b, test18_f()));
+}
diff --git a/test/CodeGen/builtin-expect.c b/test/CodeGen/builtin-expect.c
index 2d49700217..d0dce9b4b9 100644
--- a/test/CodeGen/builtin-expect.c
+++ b/test/CodeGen/builtin-expect.c
@@ -78,3 +78,20 @@ int switch_cond(int x) {
   return 0;
 }
 
+int variable_expected(int stuff) {
+// ALL-LABEL: define i32 @variable_expected(
+// O1: call i64 @llvm.expect.i64(i64 {{%.*}}, i64 {{%.*}})
+// O0-NOT: @llvm.expect
+
+  int res = 0;
+
+  switch (__builtin_expect(stuff, stuff)) {
+  case 0:
+    res = 1;
+    break;
+  default:
+    break;
+  }
+
+  return res;
+}
diff --git a/test/CodeGen/builtin-sponentry.c b/test/CodeGen/builtin-sponentry.c
new file mode 100644
index 0000000000..0a85089106
--- /dev/null
+++ b/test/CodeGen/builtin-sponentry.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple aarch64-windows-gnu -Oz -emit-llvm %s -o - | FileCheck %s
+
+void *test_sponentry() {
+  return __builtin_sponentry();
+}
+// CHECK-LABEL: define dso_local i8* @test_sponentry()
+// CHECK: = tail call i8* @llvm.sponentry()
+// CHECK: ret i8*
diff --git a/test/CodeGen/builtins-arm64.c b/test/CodeGen/builtins-arm64.c
index 7027a6e220..f164c2f6f3 100644
--- a/test/CodeGen/builtins-arm64.c
+++ b/test/CodeGen/builtins-arm64.c
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -triple arm64-unknown-linux -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-unknown-linux -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LINUX
+// RUN: %clang_cc1 -triple aarch64-windows -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-WIN
+#include <stdint.h>
 
 void f0(void *a, void *b) {
 	__clear_cache(a,b);
@@ -15,8 +17,15 @@ unsigned rbit(unsigned a) {
   return __builtin_arm_rbit(a);
 }
 
+// CHECK-WIN: [[A64:%[^ ]+]] = zext i32 %a to i64
+// CHECK-WIN: call i64 @llvm.bitreverse.i64(i64 [[A64]])
+// CHECK-LINUX: call i64 @llvm.bitreverse.i64(i64 %a)
+unsigned long rbitl(unsigned long a) {
+  return __builtin_arm_rbit64(a);
+}
+
 // CHECK: call {{.*}} @llvm.bitreverse.i64(i64 %a)
-unsigned long long rbit64(unsigned long long a) {
+uint64_t rbit64(uint64_t a) {
   return __builtin_arm_rbit64(a);
 }
 
@@ -49,13 +58,17 @@ void prefetch() {
 // CHECK: call {{.*}} @llvm.prefetch(i8* null, i32 0, i32 3, i32 0)
 }
 
-unsigned rsr() {
+__typeof__(__builtin_arm_rsr("1:2:3:4:5")) rsr(void);
+
+uint32_t rsr() {
   // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
   // CHECK-NEXT: trunc i64 [[V0]] to i32
   return __builtin_arm_rsr("1:2:3:4:5");
 }
 
-unsigned long rsr64() {
+__typeof__(__builtin_arm_rsr64("1:2:3:4:5")) rsr64(void);
+
+uint64_t rsr64(void) {
   // CHECK: call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
   return __builtin_arm_rsr64("1:2:3:4:5");
 }
@@ -66,13 +79,17 @@ void *rsrp() {
   return __builtin_arm_rsrp("1:2:3:4:5");
 }
 
+__typeof__(__builtin_arm_wsr("1:2:3:4:5", 0)) wsr(unsigned);
+
 void wsr(unsigned v) {
   // CHECK: [[V0:[%A-Za-z0-9.]+]] = zext i32 %v to i64
   // CHECK-NEXT: call void @llvm.write_register.i64(metadata ![[M0:[0-9]]], i64 [[V0]])
   __builtin_arm_wsr("1:2:3:4:5", v);
 }
 
-void wsr64(unsigned long v) {
+__typeof__(__builtin_arm_wsr64("1:2:3:4:5", 0)) wsr64(uint64_t);
+
+void wsr64(uint64_t v) {
   // CHECK: call void @llvm.write_register.i64(metadata ![[M0:[0-9]]], i64 %v)
   __builtin_arm_wsr64("1:2:3:4:5", v);
 }
diff --git a/test/CodeGen/builtins-msp430.c b/test/CodeGen/builtins-msp430.c
new file mode 100644
index 0000000000..8fb409b666
--- /dev/null
+++ b/test/CodeGen/builtins-msp430.c
@@ -0,0 +1,10 @@
+// REQUIRES: msp430-registered-target
+// RUN: %clang_cc1 -triple msp430-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+
+int test_builtin_flt_rounds() {
+  // CHECK:     [[V0:[%A-Za-z0-9.]+]] = call i32 @llvm.flt.rounds()
+  // CHECK-DAG: [[V1:[%A-Za-z0-9.]+]] = trunc i32 [[V0]] to i16
+  // CHECK-DAG: ret i16 [[V1]]
+  return __builtin_flt_rounds();
+}
+
diff --git a/test/CodeGen/builtins-nvptx-mma.cu b/test/CodeGen/builtins-nvptx-mma.cu
new file mode 100644
index 0000000000..3d62196f7c
--- /dev/null
+++ b/test/CodeGen/builtins-nvptx-mma.cu
@@ -0,0 +1,755 @@
+
+//
+// *** DO NOT EDIT ***
+//
+//  This test has been automatically generated by
+//  builtins-nvtx-mma.py --ptx=63 --gpu-arch=75
+//
+// Make sure we can handle all builtins available on sm_75 with PTX63
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_75 \
+// RUN:            -fcuda-is-device -target-feature +ptx63 \
+// RUN:            -DPTX=63 -DSM=75 \
+// RUN:            -S -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefixes=CHECK_PTX61_SM70,CHECK_PTX63_SM75,CHECK_PTX63_SM72,CHECK_PTX60_SM70 %s
+// Verify that all builtins have correct constraints.
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown \
+// RUN:   -target-cpu sm_60 -target-feature +ptx42 \
+// RUN:   -DPTX=63 -DSM=75 -fcuda-is-device -S -o /dev/null -x cuda \
+// RUN:   -verify %s
+
+
+#if !defined(CUDA_VERSION)
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+typedef unsigned long long uint64_t;
+#endif
+
+// CHECK-LABEL: test_wmma_buitins
+__device__ void test_wmma_buitins(int *src, int *dst,
+                                  float *fsrc, float *fdst, int ldm) {
+
+
+#if (PTX >= 60) && (SM >= 70)
+
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_ld_a(dst, src, ldm, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_ld_a(dst, src, ldm, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_ld_b(dst, src, ldm, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_ld_b(dst, src, ldm, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32
+  // expected-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32
+  // expected-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_st_c_f16(dst, src, ldm, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_st_c_f16(dst, src, ldm, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f32
+  // expected-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f32
+  // expected-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
+  // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
+  __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
+#endif // (PTX >= 60) && (SM >= 70) 
+
+#if (PTX >= 61) && (SM >= 70)
+
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.col.stride.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_ld_a(dst, src, ldm, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.row.stride.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_ld_a(dst, src, ldm, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.col.stride.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_ld_b(dst, src, ldm, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.row.stride.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_ld_b(dst, src, ldm, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_ld_c_f16(dst, src, ldm, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_ld_c_f16(dst, src, ldm, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.f32
+  // expected-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_ld_c_f32(fdst, fsrc, ldm, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.f32
+  // expected-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_ld_c_f32(fdst, fsrc, ldm, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_st_c_f16(dst, src, ldm, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_st_c_f16(dst, src, ldm, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.f32
+  // expected-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_st_c_f32(fdst, fsrc, ldm, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.f32
+  // expected-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_st_c_f32(fdst, fsrc, ldm, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.col.stride.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_ld_a(dst, src, ldm, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.row.stride.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_ld_a(dst, src, ldm, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.col.stride.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_ld_b(dst, src, ldm, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.row.stride.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_ld_b(dst, src, ldm, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_ld_c_f16(dst, src, ldm, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_ld_c_f16(dst, src, ldm, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.f32
+  // expected-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_ld_c_f32(fdst, fsrc, ldm, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.f32
+  // expected-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_ld_c_f32(fdst, fsrc, ldm, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_st_c_f16(dst, src, ldm, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_st_c_f16(dst, src, ldm, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.f32
+  // expected-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_st_c_f32(fdst, fsrc, ldm, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.f32
+  // expected-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_st_c_f32(fdst, fsrc, ldm, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 3, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 3, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 2, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 2, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 1, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 1, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f16
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 0, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 0, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f32
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f32
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f32
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f32
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f32
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f32
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f32
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f32
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 3, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 3, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 2, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 2, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 1, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 1, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f16
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 0, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f16.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 0, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f32
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f32
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f32
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f32
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f32
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f32
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f32
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f32
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
+  // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f32.satfinite
+  // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
+  __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
+#endif // (PTX >= 61) && (SM >= 70) 
+
+#if (PTX >= 63) && (SM >= 72)
+
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.s8
+  // expected-error-re@+1 {{'__imma_m16n16k16_ld_a_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_ld_a_s8(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.s8
+  // expected-error-re@+1 {{'__imma_m16n16k16_ld_a_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_ld_a_s8(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.u8
+  // expected-error-re@+1 {{'__imma_m16n16k16_ld_a_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_ld_a_u8(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.u8
+  // expected-error-re@+1 {{'__imma_m16n16k16_ld_a_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_ld_a_u8(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.s8
+  // expected-error-re@+1 {{'__imma_m16n16k16_ld_b_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_ld_b_s8(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.s8
+  // expected-error-re@+1 {{'__imma_m16n16k16_ld_b_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_ld_b_s8(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.u8
+  // expected-error-re@+1 {{'__imma_m16n16k16_ld_b_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_ld_b_u8(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.u8
+  // expected-error-re@+1 {{'__imma_m16n16k16_ld_b_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_ld_b_u8(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.s32
+  // expected-error-re@+1 {{'__imma_m16n16k16_ld_c' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_ld_c(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.s32
+  // expected-error-re@+1 {{'__imma_m16n16k16_ld_c' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_ld_c(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.s32
+  // expected-error-re@+1 {{'__imma_m16n16k16_st_c_i32' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_st_c_i32(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.s32
+  // expected-error-re@+1 {{'__imma_m16n16k16_st_c_i32' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_st_c_i32(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.col.stride.s8
+  // expected-error-re@+1 {{'__imma_m32n8k16_ld_a_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_ld_a_s8(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.row.stride.s8
+  // expected-error-re@+1 {{'__imma_m32n8k16_ld_a_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_ld_a_s8(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.col.stride.u8
+  // expected-error-re@+1 {{'__imma_m32n8k16_ld_a_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_ld_a_u8(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.row.stride.u8
+  // expected-error-re@+1 {{'__imma_m32n8k16_ld_a_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_ld_a_u8(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.col.stride.s8
+  // expected-error-re@+1 {{'__imma_m32n8k16_ld_b_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_ld_b_s8(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.row.stride.s8
+  // expected-error-re@+1 {{'__imma_m32n8k16_ld_b_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_ld_b_s8(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.col.stride.u8
+  // expected-error-re@+1 {{'__imma_m32n8k16_ld_b_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_ld_b_u8(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.row.stride.u8
+  // expected-error-re@+1 {{'__imma_m32n8k16_ld_b_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_ld_b_u8(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.s32
+  // expected-error-re@+1 {{'__imma_m32n8k16_ld_c' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_ld_c(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.s32
+  // expected-error-re@+1 {{'__imma_m32n8k16_ld_c' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_ld_c(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.s32
+  // expected-error-re@+1 {{'__imma_m32n8k16_st_c_i32' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_st_c_i32(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.s32
+  // expected-error-re@+1 {{'__imma_m32n8k16_st_c_i32' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_st_c_i32(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.col.stride.s8
+  // expected-error-re@+1 {{'__imma_m8n32k16_ld_a_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_ld_a_s8(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.row.stride.s8
+  // expected-error-re@+1 {{'__imma_m8n32k16_ld_a_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_ld_a_s8(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.col.stride.u8
+  // expected-error-re@+1 {{'__imma_m8n32k16_ld_a_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_ld_a_u8(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.row.stride.u8
+  // expected-error-re@+1 {{'__imma_m8n32k16_ld_a_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_ld_a_u8(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.col.stride.s8
+  // expected-error-re@+1 {{'__imma_m8n32k16_ld_b_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_ld_b_s8(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.row.stride.s8
+  // expected-error-re@+1 {{'__imma_m8n32k16_ld_b_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_ld_b_s8(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.col.stride.u8
+  // expected-error-re@+1 {{'__imma_m8n32k16_ld_b_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_ld_b_u8(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.row.stride.u8
+  // expected-error-re@+1 {{'__imma_m8n32k16_ld_b_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_ld_b_u8(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.s32
+  // expected-error-re@+1 {{'__imma_m8n32k16_ld_c' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_ld_c(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.s32
+  // expected-error-re@+1 {{'__imma_m8n32k16_ld_c' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_ld_c(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.s32
+  // expected-error-re@+1 {{'__imma_m8n32k16_st_c_i32' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_st_c_i32(dst, src, ldm, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.s32
+  // expected-error-re@+1 {{'__imma_m8n32k16_st_c_i32' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_st_c_i32(dst, src, ldm, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.s8
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_s8(dst, src, src, src, 3, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.s8.satfinite
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_s8(dst, src, src, src, 3, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.s8
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_s8(dst, src, src, src, 2, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.s8.satfinite
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_s8(dst, src, src, src, 2, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.s8
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_s8(dst, src, src, src, 1, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.s8.satfinite
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_s8(dst, src, src, src, 1, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.s8
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_s8(dst, src, src, src, 0, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.s8.satfinite
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_s8(dst, src, src, src, 0, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.u8
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_u8(dst, src, src, src, 3, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.u8.satfinite
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_u8(dst, src, src, src, 3, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.u8
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_u8(dst, src, src, src, 2, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.u8.satfinite
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_u8(dst, src, src, src, 2, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.u8
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_u8(dst, src, src, src, 1, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.u8.satfinite
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_u8(dst, src, src, src, 1, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.u8
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_u8(dst, src, src, src, 0, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.u8.satfinite
+  // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m16n16k16_mma_u8(dst, src, src, src, 0, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.s8
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_s8(dst, src, src, src, 3, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.s8.satfinite
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_s8(dst, src, src, src, 3, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.s8
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_s8(dst, src, src, src, 2, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.s8.satfinite
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_s8(dst, src, src, src, 2, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.s8
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_s8(dst, src, src, src, 1, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.s8.satfinite
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_s8(dst, src, src, src, 1, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.s8
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_s8(dst, src, src, src, 0, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.s8.satfinite
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_s8(dst, src, src, src, 0, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.u8
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_u8(dst, src, src, src, 3, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.u8.satfinite
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_u8(dst, src, src, src, 3, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.u8
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_u8(dst, src, src, src, 2, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.u8.satfinite
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_u8(dst, src, src, src, 2, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.u8
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_u8(dst, src, src, src, 1, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.u8.satfinite
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_u8(dst, src, src, src, 1, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.u8
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_u8(dst, src, src, src, 0, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.u8.satfinite
+  // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m32n8k16_mma_u8(dst, src, src, src, 0, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.s8
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_s8(dst, src, src, src, 3, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.s8.satfinite
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_s8(dst, src, src, src, 3, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.s8
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_s8(dst, src, src, src, 2, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.s8.satfinite
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_s8(dst, src, src, src, 2, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.s8
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_s8(dst, src, src, src, 1, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.s8.satfinite
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_s8(dst, src, src, src, 1, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.s8
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_s8(dst, src, src, src, 0, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.s8.satfinite
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_s8(dst, src, src, src, 0, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.u8
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_u8(dst, src, src, src, 3, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.u8.satfinite
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_u8(dst, src, src, src, 3, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.u8
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_u8(dst, src, src, src, 2, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.u8.satfinite
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_u8(dst, src, src, src, 2, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.u8
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_u8(dst, src, src, src, 1, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.u8.satfinite
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_u8(dst, src, src, src, 1, 1);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.u8
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_u8(dst, src, src, src, 0, 0);
+  // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.u8.satfinite
+  // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}}
+  __imma_m8n32k16_mma_u8(dst, src, src, src, 0, 1);
+#endif // (PTX >= 63) && (SM >= 72) 
+
+#if (PTX >= 63) && (SM >= 75)
+
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.load.a.row.stride.b1
+  // expected-error-re@+1 {{'__bmma_m8n8k128_ld_a_b1' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __bmma_m8n8k128_ld_a_b1(dst, src, ldm, 0);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.load.b.col.stride.b1
+  // expected-error-re@+1 {{'__bmma_m8n8k128_ld_b_b1' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __bmma_m8n8k128_ld_b_b1(dst, src, ldm, 1);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.load.c.col.stride.s32
+  // expected-error-re@+1 {{'__bmma_m8n8k128_ld_c' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __bmma_m8n8k128_ld_c(dst, src, ldm, 1);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.load.c.row.stride.s32
+  // expected-error-re@+1 {{'__bmma_m8n8k128_ld_c' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __bmma_m8n8k128_ld_c(dst, src, ldm, 0);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.store.d.col.stride.s32
+  // expected-error-re@+1 {{'__bmma_m8n8k128_st_c_i32' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __bmma_m8n8k128_st_c_i32(dst, src, ldm, 1);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.store.d.row.stride.s32
+  // expected-error-re@+1 {{'__bmma_m8n8k128_st_c_i32' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __bmma_m8n8k128_st_c_i32(dst, src, ldm, 0);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.load.a.row.stride.s4
+  // expected-error-re@+1 {{'__imma_m8n8k32_ld_a_s4' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __imma_m8n8k32_ld_a_s4(dst, src, ldm, 0);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.load.a.row.stride.u4
+  // expected-error-re@+1 {{'__imma_m8n8k32_ld_a_u4' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __imma_m8n8k32_ld_a_u4(dst, src, ldm, 0);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.load.b.col.stride.s4
+  // expected-error-re@+1 {{'__imma_m8n8k32_ld_b_s4' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __imma_m8n8k32_ld_b_s4(dst, src, ldm, 1);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.load.b.col.stride.u4
+  // expected-error-re@+1 {{'__imma_m8n8k32_ld_b_u4' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __imma_m8n8k32_ld_b_u4(dst, src, ldm, 1);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.load.c.col.stride.s32
+  // expected-error-re@+1 {{'__imma_m8n8k32_ld_c' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __imma_m8n8k32_ld_c(dst, src, ldm, 1);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.load.c.row.stride.s32
+  // expected-error-re@+1 {{'__imma_m8n8k32_ld_c' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __imma_m8n8k32_ld_c(dst, src, ldm, 0);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.store.d.col.stride.s32
+  // expected-error-re@+1 {{'__imma_m8n8k32_st_c_i32' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __imma_m8n8k32_st_c_i32(dst, src, ldm, 1);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.store.d.row.stride.s32
+  // expected-error-re@+1 {{'__imma_m8n8k32_st_c_i32' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __imma_m8n8k32_st_c_i32(dst, src, ldm, 0);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.mma.row.col.b1
+  // expected-error-re@+1 {{'__bmma_m8n8k128_mma_xor_popc_b1' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __bmma_m8n8k128_mma_xor_popc_b1(dst, src, src, src, 1);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.mma.row.col.s4
+  // expected-error-re@+1 {{'__imma_m8n8k32_mma_s4' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __imma_m8n8k32_mma_s4(dst, src, src, src, 1, 0);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.mma.row.col.s4.satfinite
+  // expected-error-re@+1 {{'__imma_m8n8k32_mma_s4' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __imma_m8n8k32_mma_s4(dst, src, src, src, 1, 1);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.mma.row.col.u4
+  // expected-error-re@+1 {{'__imma_m8n8k32_mma_u4' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __imma_m8n8k32_mma_u4(dst, src, src, src, 1, 0);
+  // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.mma.row.col.u4.satfinite
+  // expected-error-re@+1 {{'__imma_m8n8k32_mma_u4' needs target feature sm_75{{.*}},ptx63{{.*}}}}
+  __imma_m8n8k32_mma_u4(dst, src, src, src, 1, 1);
+#endif // (PTX >= 63) && (SM >= 75) 
+}
diff --git a/test/CodeGen/builtins-nvptx-mma.py b/test/CodeGen/builtins-nvptx-mma.py
new file mode 100644
index 0000000000..1b395fc4f3
--- /dev/null
+++ b/test/CodeGen/builtins-nvptx-mma.py
@@ -0,0 +1,343 @@
+# This script generates all variants of wmma builtins, verifies that clang calls
+# correct LLVM instrinsics, and checks that availability of specific builtins is
+# constrained by the correct PTX version and the target GPU variant.
+
+# Dummy test run to avoid lit warnings.
+# RUN: echo "This is not a real test. It's a generator for builtins-nvpts-mma.cu" >/dev/null
+
+from __future__ import print_function
+
+import argparse
+from collections import defaultdict
+from itertools import product
+from string import Template
+
+class MMAFrag:
+  def __init__(self, geom, frag, ptx_elt_type):
+    self.geom = geom
+    self.frag = frag
+    self.ptx_type = ptx_elt_type;
+
+  def __repr__(self):
+    return "%s:%s:%s" % (self.geom, self.frag, self.ptx_type)
+
+class MMAOp:
+  def __init__(self, a, b, c, d):
+    self.a = a
+    self.b = b
+    self.c = c
+    self.d = d
+
+  def __repr__(self):
+    return ("{A:%s, B:%s, C:%s, D:%s}" % (self.a, self.b, self.c, self.d ))
+
+def make_mma_ops(geoms, types_a, types_b, types_c, types_d):
+  ops = []
+  for geom, type_a, type_c in product( geoms,  types_a, types_c):
+    for type_b, type_d in product(types_b if types_b else [type_a],
+                                  types_d if types_d else [type_c]):
+      ops.append(MMAOp(MMAFrag(geom, "a", type_a),
+                       MMAFrag(geom, "b", type_b),
+                       MMAFrag(geom, "c", type_c),
+                       MMAFrag(geom, "d", type_d)))
+  return ops
+
+def make_ldst_ops(geoms, frags, types):
+  return [MMAFrag(geom, frag, ptx_type) for (geom, frag, ptx_type)
+          in product(geoms, frags, types)]
+
+def get_mma_ops():
+  return (make_mma_ops(["m16n16k16", "m32n8k16", "m8n32k16"],
+                       ["f16"], [], ["f16", "f32"], ["f16", "f32"]) +
+          make_mma_ops(["m16n16k16", "m32n8k16", "m8n32k16"],
+                       ["s8", "u8"], [], ["s32"], []) +
+          make_mma_ops(["m8n8k32"],
+                       ["s4", "u4"], [], ["s32"], []) +
+          make_mma_ops(["m8n8k128"],
+                       ["b1"], [], ["s32"], []))
+def get_ldst_ops():
+  return (make_ldst_ops(["m16n16k16", "m32n8k16", "m8n32k16"],
+                        ["a", "b"], ["f16", "u8", "s8"]) +
+          make_ldst_ops(["m16n16k16", "m32n8k16", "m8n32k16"],
+                        ["c", "d"], ["f16", "f32", "s32"]) +
+          make_ldst_ops(["m8n8k32"], ["a", "b"], ["s4","u4"]) +
+          make_ldst_ops(["m8n8k128"], ["a", "b"], ["b1"]) +
+          make_ldst_ops(["m8n8k32", "m8n8k128"],  ["c", "d"], ["s32"]))
+
+def is_geom_supported(geom):
+  # geometries for FP and ints.
+  if geom in ["m8n32k16", "m32n8k16"]:
+    return ptx_version >= 61
+  # geometries for sub-ints.
+  if geom in ["m8n8k32", "m8n8k128"]:
+    return ptx_version >= 63 and gpu_arch >= 75
+  if geom == "m16n16k16":
+    return ptx_version >= 60
+  assert(False) # Unexpected geometry.
+
+def is_type_supported(ptx_type):
+  if ptx_type in ["s8", "u8", "s32"]:
+    return ptx_version >= 63 and gpu_arch >= 72
+  if ptx_type in ["s4", "u4", "b1"]:
+    return ptx_version >= 63 and gpu_arch >= 75
+  return ptx_version >= 60 and gpu_arch >= 70
+
+def is_mma_variant_supported(op, layout_a, layout_b, satf):
+  if not (is_type_supported(op.a.ptx_type)
+          and is_geom_supported(op.a.geom)):
+    return False
+  # sub-integer require row/col layout, and no satf.
+  if op.a.ptx_type in ["s4", "u4", "b1"]:
+    if op.a.ptx_type == "b1" and satf:
+      return False
+    return layout_a == "row" and layout_b == "col"
+  return True
+
+def is_ldst_variant_supported(frag, layout):
+  if not (is_type_supported(frag.ptx_type)
+          and is_geom_supported(frag.geom)):
+    return False
+  if frag.ptx_type in ["s4", "u4", "b1"]:
+    # sub-integer require sm_75 and ptx63, row/col layout for a/b.
+    return ((frag.frag == "a" and layout == "row")
+            or (frag.frag == "b" and layout == "col")
+            or frag.frag in ["c", "d"])
+  return True
+
+def get_builtin_prefix(frag):
+  prefix = None
+  if frag.geom in ["m16n16k16", "m32n8k16", "m8n32k16"]:
+    if frag.ptx_type in ["f16", "f32"]:
+      prefix = "__hmma"
+    else:
+      prefix = "__imma"
+  elif frag.geom == "m8n8k32":
+    prefix = "__imma" # sub-integers
+  elif frag.geom == "m8n8k128":
+    prefix = "__bmma"
+  assert prefix
+  return prefix
+
+def get_ldst_builtin_name(frag):
+  prefix = get_builtin_prefix(frag)
+
+  if prefix == "__hmma":
+    suffix = "" if frag.frag in ["a","b"] else frag.ptx_type
+  elif prefix in ["__imma", "__bmma"]:
+    suffix = "" if frag.frag in ["c"] else frag.ptx_type
+    if suffix == "s32":
+      suffix = "i32"
+  if frag.frag == "d":
+    ifrag = "c"
+    op = "st"
+  else:
+    ifrag = frag.frag
+    op = "ld"
+
+  name = "%s_%s_%s_%s%s" % (prefix, frag.geom, op, ifrag,
+                             "_" + suffix if suffix else "")
+  return name
+
+def get_mma_builtin_name(op):
+  prefix = get_builtin_prefix(op.a)
+
+  if prefix == "__hmma":
+    suffix = op.d.ptx_type + op.c.ptx_type
+  else:
+    suffix = op.a.ptx_type
+
+  name = "%s_%s_mma%s_%s" % (prefix, op.a.geom,
+                             "_xor_popc" if op.a.ptx_type == "b1" else "",
+                             suffix)
+  return name
+
+
+def get_required_sm(frag):
+  if frag.ptx_type in ["u4", "s4", "b1"]:
+    return 75
+  if frag.ptx_type in ["s8", "u8"]:
+    return 72
+  if frag.ptx_type == "s32":
+    if frag.geom in ["m8n8k32", "m8n8k128"]: # s4/u4/b1
+      return 75
+    else:                       # s8/u8
+      return 72
+  if frag.ptx_type in ["f16", "f32"]:
+    return 70
+  assert(False)
+
+def get_required_ptx(frag):
+  if frag.ptx_type in ["f16", "f32"]:
+    return 60 if frag.geom == "m16n16k16" else 61
+  return 63
+
+def gen_wmma_ldst_tests(results):
+  load_template = """
+  // CHECK${check_suffix}: call {{.*}} @${intrinsic}
+  // expected-error-re@+1 {{'${builtin}' needs target feature sm_${min_sm}{{.*}},ptx${min_ptx}{{.*}}}}
+  ${builtin}(${dst}, ${src}, ldm, ${blayout});
+""".rstrip()
+  intrinsic_template = "llvm.nvvm.wmma.${geom}.${op}.${frag}.${ilayout}.stride.${itype}"
+
+  for frag, layout in sorted(product(get_ldst_ops(), ["row","col"]), key=str):
+
+    if not is_ldst_variant_supported(frag, layout):
+      continue
+
+    is_fp = frag.ptx_type  == "f32"
+    min_sm = get_required_sm(frag)
+    min_ptx = get_required_ptx(frag)
+    params = {
+        "check_suffix" : "_PTX%d_SM%d" % (min_ptx, min_sm),
+        "builtin" : get_ldst_builtin_name(frag),
+        "min_ptx" : min_ptx,
+        "min_sm" : min_sm,
+        "dst": "fdst" if is_fp else "dst",
+        "src": "fsrc" if is_fp else "src",
+        "blayout" : 0 if layout == "row" else 1,
+        "intrinsic" : Template(intrinsic_template).substitute({
+            "frag" : frag.frag,
+            "geom"   : frag.geom,
+            "ilayout" : layout,
+            "itype" : frag.ptx_type,
+            "op" : "store" if frag.frag == "d" else "load",
+        })
+    }
+    results[(min_ptx,min_sm)] += Template(load_template).substitute(params)
+
+  return results
+
+def mma_signature(op):
+  if op.a.ptx_type in ["s8", "u8", "s4", "u4", "b1"]:
+    # int and sub-int ops are identified by input type.
+    return op.a.ptx_type
+  else:
+    # the rest are FP ops identified by accumulator & result type.
+    return "%s.%s" % (op.d.ptx_type, op.c.ptx_type)
+
+# Get numeric value for rowcol parameter of the builtin
+# AFAICT it uses the encoding accepted by NVVM intrinsics:
+# https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#nvvm-intrin-warp-level-matrix-mma
+def get_ilayout(a, b):
+  return {
+      "row.row" : 0,
+      "row.col" : 1,
+      "col.row" : 2,
+      "col.col" : 3
+  }[a + "." + b]
+
+def gen_wmma_mma_tests(results):
+  mma_template = """
+  // CHECK${check_suffix}: call {{.*}} @${intrinsic}
+  // expected-error-re@+1 {{'${builtin}' needs target feature sm_${min_sm}{{.*}},ptx${min_ptx}{{.*}}}}
+  ${builtin}(${dst}, ${asrc}, ${asrc}, ${csrc}, ${ilayout}${maybe_isatf});
+""".rstrip()
+  intrinsic_template = "llvm.nvvm.wmma.${geom}.mma.${alayout}.${blayout}.${intrinsic_signature}${satf}"
+
+  for op, alayout, blayout, satf in sorted(product( get_mma_ops(),
+                                                    ["row","col"],
+                                                    ["row","col"],
+                                                    [".satfinite", ""]),
+                                           key=str):
+
+    if not is_mma_variant_supported(op, alayout, blayout, satf):
+      continue
+
+    a_is_fp = op.a.ptx_type  == "f32"
+    c_is_fp = op.c.ptx_type  == "f32"
+    d_is_fp = op.d.ptx_type  == "f32"
+    min_sm = get_required_sm(op.a)
+    min_ptx = get_required_ptx(op.a)
+    if op.a.ptx_type == "b1": # .b1 MMA has no satf argument.
+       isatf_arg = ""
+    else:
+       isatf_arg = ", 1" if satf else ", 0"
+    params = {
+        "check_suffix" : "_PTX%d_SM%d" % (min_ptx, min_sm),
+        "builtin" : get_mma_builtin_name(op),
+        "min_ptx" : min_ptx,
+        "min_sm" : min_sm,
+        "dst": "fdst" if d_is_fp else "dst",
+        "asrc": "fsrc" if a_is_fp else "src",
+        "csrc": "fsrc" if c_is_fp else "src",
+        "ilayout" : get_ilayout(alayout, blayout),
+        "maybe_isatf" : isatf_arg,
+        "intrinsic" : Template(intrinsic_template).substitute({
+            "geom"  : op.a.geom,
+            "alayout" : alayout,
+            "blayout" : blayout,
+            "intrinsic_signature" : mma_signature(op),
+            "satf"  : satf,
+        })
+    }
+    results[(min_ptx, min_sm)] += Template(mma_template).substitute(params)
+
+  return results
+
+def gen_tests():
+  results = gen_wmma_ldst_tests(defaultdict(str))
+  results = gen_wmma_mma_tests(results)
+
+  run_template = r"""
+//
+// *** DO NOT EDIT ***
+//
+//  This test has been automatically generated by
+//  builtins-nvtx-mma.py --ptx=${ptx} --gpu-arch=${sm}
+//
+// Make sure we can handle all builtins available on sm_${sm} with PTX${ptx}
+// ${run}: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_${sm} \
+// ${run}:            -fcuda-is-device -target-feature +ptx${ptx} \
+// ${run}:            -DPTX=${ptx} -DSM=${sm} \
+// ${run}:            -S -emit-llvm -o - -x cuda %s \
+// ${run}:   | FileCheck -check-prefixes=${check_labels} %s
+// Verify that all builtins have correct constraints.
+// ${run}: %clang_cc1 -triple nvptx-unknown-unknown \
+// ${run}:   -target-cpu sm_60 -target-feature +ptx42 \
+// ${run}:   -DPTX=${ptx} -DSM=${sm} -fcuda-is-device -S -o /dev/null -x cuda \
+// ${run}:   -verify %s
+"""
+  def supported_variants(ptx, sm, results):
+    return [(ptx_, sm_) for ptx_, sm_ in results if ptx_ <= ptx and sm_ <= sm]
+
+  print(Template(run_template).substitute({
+      "run" : "RUN", # To avoid lit misinterpreting the template
+      "ptx" : ptx_version,
+      "sm" : gpu_arch,
+      "check_labels" : ",".join(["CHECK_PTX%d_SM%d" % (ptx_, sm_)
+                                 for ptx_, sm_
+                                 in supported_variants(ptx_version, gpu_arch,
+                                                       results)])
+  }))
+
+  print("""
+#if !defined(CUDA_VERSION)
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+typedef unsigned long long uint64_t;
+#endif
+
+// CHECK-LABEL: test_wmma_buitins
+__device__ void test_wmma_buitins(int *src, int *dst,
+                                  float *fsrc, float *fdst, int ldm) {
+""");
+
+  for (ptx, sm), tests in sorted(results.items()):
+    print()
+    print("#if (PTX >= %d) && (SM >= %d)" % (ptx, sm))
+    print(tests)
+    print("#endif // (PTX >= %d) && (SM >= %d) "% (ptx, sm))
+
+  print("}")
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--ptx", type=int, default=60)
+parser.add_argument("--gpu-arch", type=int, default=70)
+args = parser.parse_args()
+ptx_version = args.ptx
+gpu_arch = args.gpu_arch
+
+gen_tests()
diff --git a/test/CodeGen/builtins-ppc-cache.c b/test/CodeGen/builtins-ppc-cache.c
new file mode 100644
index 0000000000..81c69e97bd
--- /dev/null
+++ b/test/CodeGen/builtins-ppc-cache.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm \
+// RUN:   -o - %s | FileCheck %s
+
+int A;
+int B[5];
+float C;
+float D[5];
+double E;
+double F[5];
+
+void func(int a, int b[], float c, float d[], double e, double f[]) {
+  __builtin_dcbf (&a);
+  // CHECK: @llvm.ppc.dcbf(i8*
+
+  __builtin_dcbf (&A);
+  // CHECK: @llvm.ppc.dcbf(i8*
+
+  __builtin_dcbf (&b[2]);
+  // CHECK: @llvm.ppc.dcbf(i8*
+
+  __builtin_dcbf (&B[2]);
+  // CHECK: @llvm.ppc.dcbf(i8*
+
+  __builtin_dcbf (&c);
+  // CHECK: @llvm.ppc.dcbf(i8*
+
+  __builtin_dcbf (&C);
+  // CHECK: @llvm.ppc.dcbf(i8*
+
+  __builtin_dcbf (&d[2]);
+  // CHECK: @llvm.ppc.dcbf(i8*
+
+  __builtin_dcbf (&D[2]);
+  // CHECK: @llvm.ppc.dcbf(i8*
+
+  __builtin_dcbf (&e);
+  // CHECK: @llvm.ppc.dcbf(i8*
+
+  __builtin_dcbf (&E);
+  // CHECK: @llvm.ppc.dcbf(i8*
+
+  __builtin_dcbf (&f[0]);
+  // CHECK: @llvm.ppc.dcbf(i8*
+
+  __builtin_dcbf (&F[0]);
+  // CHECK: @llvm.ppc.dcbf(i8*
+}
diff --git a/test/CodeGen/builtins-ppc.c b/test/CodeGen/builtins-ppc.c
index 1f17787ad9..1ff1b811c1 100644
--- a/test/CodeGen/builtins-ppc.c
+++ b/test/CodeGen/builtins-ppc.c
@@ -14,3 +14,16 @@ long long test_builtin_ppc_get_timebase() {
   return __builtin_ppc_get_timebase();
 }
 
+void test_builtin_ppc_setrnd() {
+  volatile double res;
+  volatile int x = 100;
+  
+  // CHECK: call double @llvm.ppc.setrnd(i32 2)
+  res = __builtin_setrnd(2);
+
+  // CHECK: call double @llvm.ppc.setrnd(i32 100)
+  res = __builtin_setrnd(100);
+
+  // CHECK: call double @llvm.ppc.setrnd(i32 %2)
+  res = __builtin_setrnd(x);
+}
diff --git a/test/CodeGen/builtins-wasm.c b/test/CodeGen/builtins-wasm.c
index 4f14e901ed..4784d6ff86 100644
--- a/test/CodeGen/builtins-wasm.c
+++ b/test/CodeGen/builtins-wasm.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +unimplemented-simd128 -target-feature +nontrapping-fptoint -target-feature +exception-handling -fno-lax-vector-conversions -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY32
-// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +unimplemented-simd128 -target-feature +nontrapping-fptoint -target-feature +exception-handling -fno-lax-vector-conversions -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY64
-// RUN: not %clang_cc1 -triple wasm64-unknown-unknown -target-feature +nontrapping-fptoint -target-feature +exception-handling -fno-lax-vector-conversions -O3 -emit-llvm -o - %s 2>&1 | FileCheck %s -check-prefixes MISSING-SIMD
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +unimplemented-simd128 -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -fno-lax-vector-conversions -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY32
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +unimplemented-simd128 -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -fno-lax-vector-conversions -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY64
+// RUN: not %clang_cc1 -triple wasm64-unknown-unknown -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -fno-lax-vector-conversions -O3 -emit-llvm -o - %s 2>&1 | FileCheck %s -check-prefixes MISSING-SIMD
 
 // SIMD convenience types
 typedef char i8x16 __attribute((vector_size(16)));
@@ -26,16 +26,28 @@ __SIZE_TYPE__ memory_grow(__SIZE_TYPE__ delta) {
   // WEBASSEMBLY64: call i64 @llvm.wasm.memory.grow.i64(i32 0, i64 %{{.*}})
 }
 
-void throw(unsigned int tag, void *obj) {
-  return __builtin_wasm_throw(tag, obj);
-  // WEBASSEMBLY32: call void @llvm.wasm.throw(i32 %{{.*}}, i8* %{{.*}})
-  // WEBASSEMBLY64: call void @llvm.wasm.throw(i32 %{{.*}}, i8* %{{.*}})
+void memory_init(void *dest, int offset, int size) {
+  __builtin_wasm_memory_init(3, 0, dest, offset, size);
+  // WEBASSEMBLY32: call void @llvm.wasm.memory.init(i32 3, i32 0, i8* %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+  // WEBASSEMBLY64: call void @llvm.wasm.memory.init(i32 3, i32 0, i8* %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
 }
 
-void rethrow(void) {
-  return __builtin_wasm_rethrow();
-  // WEBASSEMBLY32: call void @llvm.wasm.rethrow()
-  // WEBASSEMBLY64: call void @llvm.wasm.rethrow()
+void data_drop() {
+  __builtin_wasm_data_drop(3);
+  // WEBASSEMBLY32: call void @llvm.wasm.data.drop(i32 3)
+  // WEBASSEMBLY64: call void @llvm.wasm.data.drop(i32 3)
+}
+
+void throw(void *obj) {
+  return __builtin_wasm_throw(0, obj);
+  // WEBASSEMBLY32: call void @llvm.wasm.throw(i32 0, i8* %{{.*}})
+  // WEBASSEMBLY64: call void @llvm.wasm.throw(i32 0, i8* %{{.*}})
+}
+
+void rethrow_in_catch(void) {
+  return __builtin_wasm_rethrow_in_catch();
+  // WEBASSEMBLY32: call void @llvm.wasm.rethrow.in.catch()
+  // WEBASSEMBLY64: call void @llvm.wasm.rethrow.in.catch()
 }
 
 int atomic_wait_i32(int *addr, int expected, long long timeout) {
diff --git a/test/CodeGen/builtins-x86.c b/test/CodeGen/builtins-x86.c
index fd99dd2be3..e237bc2d12 100644
--- a/test/CodeGen/builtins-x86.c
+++ b/test/CodeGen/builtins-x86.c
@@ -281,6 +281,8 @@ void f0() {
 
   (void)__builtin_ia32_xsave(tmp_vp, tmp_ULLi);
   (void)__builtin_ia32_xsave64(tmp_vp, tmp_ULLi);
+  tmp_ULLi = __builtin_ia32_xgetbv(tmp_Ui);
+  (void)__builtin_ia32_xsetbv(tmp_Ui, tmp_ULLi);
   (void)__builtin_ia32_xrstor(tmp_vp, tmp_ULLi);
   (void)__builtin_ia32_xrstor64(tmp_vp, tmp_ULLi);
   (void)__builtin_ia32_xsaveopt(tmp_vp, tmp_ULLi);
diff --git a/test/CodeGen/builtins.c b/test/CodeGen/builtins.c
index 8e0542a1a8..25b909ad3a 100644
--- a/test/CodeGen/builtins.c
+++ b/test/CodeGen/builtins.c
@@ -246,6 +246,9 @@ void test_float_builtins(float F, double D, long double LD) {
   // CHECK: fcmp uge float {{.*}}, 0x3810000000000000
   // CHECK: and i1
   // CHECK: and i1
+
+  res = __builtin_flt_rounds();
+  // CHECK: call i32 @llvm.flt.rounds(
 }
 
 // CHECK-LABEL: define void @test_float_builtin_ops
@@ -768,7 +771,7 @@ void test_builtin_os_log_merge_helper1(void *buf, unsigned u, long long ll) {
 void test_builtin_os_log_errno() {
   // CHECK-NOT: @stacksave
   // CHECK: %[[BUF:.*]] = alloca [4 x i8], align 1
-  // CHECK: %[[DECAY:.*]] = getelementptr inbounds [4 x i8], [4 x i8]* %[[BUF]], i32 0, i32 0
+  // CHECK: %[[DECAY:.*]] = getelementptr inbounds [4 x i8], [4 x i8]* %[[BUF]], i64 0, i64 0
   // CHECK: call void @__os_log_helper_1_2_1_0_96(i8* %[[DECAY]])
   // CHECK-NOT: @stackrestore
 
diff --git a/test/CodeGen/callback_annotated.c b/test/CodeGen/callback_annotated.c
new file mode 100644
index 0000000000..feacda2754
--- /dev/null
+++ b/test/CodeGen/callback_annotated.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | FileCheck %s --check-prefix=RUN1
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | FileCheck %s --check-prefix=RUN2
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | opt -ipconstprop -S | FileCheck --check-prefix=IPCP %s
+
+// RUN1-DAG: @broker0({{[^#]*#[0-9]+}} !callback ![[cid0:[0-9]+]]
+__attribute__((callback(1, 2))) void *broker0(void *(*callee)(void *), void *payload) {
+  return callee(payload);
+}
+
+// RUN1-DAG: @broker1({{[^#]*#[0-9]+}} !callback ![[cid1:[0-9]+]]
+__attribute__((callback(callee, payload))) void *broker1(void *payload, void *(*callee)(void *)) {
+  return broker0(callee, payload);
+}
+
+void *broker2(void (*callee)(void));
+
+// RUN1-DAG: declare !callback ![[cid2:[0-9]+]] i8* @broker2
+__attribute__((callback(callee))) void *broker2(void (*callee)(void));
+
+void *broker2(void (*callee)(void));
+
+// RUN1-DAG: declare !callback ![[cid3:[0-9]+]] i8* @broker3
+__attribute__((callback(4, 1, 2, c))) void *broker3(int, int, int c, int (*callee)(int, int, int), int);
+
+// RUN1-DAG: declare !callback ![[cid4:[0-9]+]] i8* @broker4
+__attribute__((callback(4, -1, a, __))) void *broker4(int a, int, int, int (*callee)(int, int, int), int);
+
+// RUN1-DAG: declare !callback ![[cid5:[0-9]+]] i8* @broker5
+__attribute__((callback(4, d, 5, 2))) void *broker5(int, int, int, int (*callee)(int, int, int), int d);
+
+static void *VoidPtr2VoidPtr(void *payload) {
+  // RUN2: ret i8* %payload
+  // IPCP: ret i8* null
+  return payload;
+}
+
+static int ThreeInt2Int(int a, int b, int c) {
+  // RUN2:   define internal i32 @ThreeInt2Int(i32 %a, i32 %b, i32 %c)
+  // RUN2:     %mul = mul nsw i32 %b, %a
+  // RUN2:     %add = add nsw i32 %mul, %c
+  // RUN2:     ret i32 %add
+
+  // IPCP:   define internal i32 @ThreeInt2Int(i32 %a, i32 %b, i32 %c)
+  // IPCP:     %mul = mul nsw i32 4, %a
+  // IPCP:     %add = add nsw i32 %mul, %c
+  // IPCP:     ret i32 %add
+
+  return a * b + c;
+}
+
+void foo() {
+  broker0(VoidPtr2VoidPtr, 0l);
+  broker1(0l, VoidPtr2VoidPtr);
+  broker2(foo);
+  broker3(1, 4, 5, ThreeInt2Int, 1);
+  broker4(4, 2, 7, ThreeInt2Int, 0);
+  broker5(8, 0, 3, ThreeInt2Int, 4);
+}
+
+// RUN1-DAG: ![[cid0]] = !{![[cid0b:[0-9]+]]}
+// RUN1-DAG: ![[cid0b]] = !{i64 0, i64 1, i1 false}
+// RUN1-DAG: ![[cid1]] = !{![[cid1b:[0-9]+]]}
+// RUN1-DAG: ![[cid1b]] = !{i64 1, i64 0, i1 false}
+// RUN1-DAG: ![[cid2]] = !{![[cid2b:[0-9]+]]}
+// RUN1-DAG: ![[cid2b]] = !{i64 0, i1 false}
+// RUN1-DAG: ![[cid3]] = !{![[cid3b:[0-9]+]]}
+// RUN1-DAG: ![[cid3b]] = !{i64 3, i64 0, i64 1, i64 2, i1 false}
+// RUN1-DAG: ![[cid4]] = !{![[cid4b:[0-9]+]]}
+// RUN1-DAG: ![[cid4b]] = !{i64 3, i64 -1, i64 0, i64 -1, i1 false}
+// RUN1-DAG: ![[cid5]] = !{![[cid5b:[0-9]+]]}
+// RUN1-DAG: ![[cid5b]] = !{i64 3, i64 4, i64 4, i64 1, i1 false}
diff --git a/test/CodeGen/callback_openmp.c b/test/CodeGen/callback_openmp.c
new file mode 100644
index 0000000000..2fc9dcd391
--- /dev/null
+++ b/test/CodeGen/callback_openmp.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | opt -ipconstprop -S | FileCheck --check-prefix=IPCP %s
+
+// CHECK: declare !callback ![[cid:[0-9]+]] void @__kmpc_fork_call
+// CHECK: declare !callback ![[cid]] void @__kmpc_fork_teams
+// CHECK: ![[cid]] = !{![[cidb:[0-9]+]]}
+// CHECK: ![[cidb]] = !{i64 2, i64 -1, i64 -1, i1 true}
+
+void work1(int, int);
+void work2(int, int);
+void work12(int, int);
+
+void foo(int q) {
+  int p = 2;
+
+  #pragma omp parallel firstprivate(q, p)
+  work1(p, q);
+// IPCP: call void @work1(i32 2, i32 %{{[._a-zA-Z0-9]*}})
+
+  #pragma omp parallel for firstprivate(p, q)
+  for (int i = 0; i < q; i++)
+    work2(i, p);
+// IPCP: call void @work2(i32 %{{[._a-zA-Z0-9]*}}, i32 2)
+
+  #pragma omp target teams firstprivate(p)
+  work12(p, p);
+// IPCP: call void @work12(i32 2, i32 2)
+}
diff --git a/test/CodeGen/callback_pthread_create.c b/test/CodeGen/callback_pthread_create.c
new file mode 100644
index 0000000000..785440030b
--- /dev/null
+++ b/test/CodeGen/callback_pthread_create.c
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -O1 %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -O1 %s -S -emit-llvm -o - | opt -ipconstprop -S | FileCheck --check-prefix=IPCP %s
+
+// CHECK: declare !callback ![[cid:[0-9]+]] {{.*}}i32 @pthread_create
+// CHECK: ![[cid]] = !{![[cidb:[0-9]+]]}
+// CHECK: ![[cidb]] = !{i64 2, i64 3, i1 false}
+
+// Taken from test/Analysis/retain-release.m
+//{
+struct _opaque_pthread_t {};
+struct _opaque_pthread_attr_t {};
+typedef struct _opaque_pthread_t *__darwin_pthread_t;
+typedef struct _opaque_pthread_attr_t __darwin_pthread_attr_t;
+typedef __darwin_pthread_t pthread_t;
+typedef __darwin_pthread_attr_t pthread_attr_t;
+
+int pthread_create(pthread_t *, const pthread_attr_t *,
+                   void *(*)(void *), void *);
+//}
+
+const int GlobalVar = 0;
+
+static void *callee0(void *payload) {
+// IPCP:      define internal i8* @callee0
+// IPCP:        ret i8* null
+  return payload;
+}
+
+static void *callee1(void *payload) {
+// IPCP:      define internal i8* @callee1
+// IPCP:        ret i8* bitcast (i32* @GlobalVar to i8*)
+  return payload;
+}
+
+void foo() {
+  pthread_t MyFirstThread;
+  pthread_create(&MyFirstThread, 0, callee0, 0);
+
+  pthread_t MySecondThread;
+  pthread_create(&MySecondThread, 0, callee1, (void *)&GlobalVar);
+}
diff --git a/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp b/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp
index 1b5a392276..591eaa0e13 100644
--- a/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp
+++ b/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp
@@ -30,9 +30,7 @@ char **caller(char **x, unsigned long alignment) {
   // CHECK-NEXT:                        %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[ALIGNMENT_RELOADED:.*]] = load i64, i64* %[[ALIGNMENT_ADDR]], align 8
   // CHECK-NEXT:                        %[[X_RETURNED:.*]] = call i8** @[[PASSTHROUGH]](i8** %[[X_RELOADED]], i64 %[[ALIGNMENT_RELOADED]])
-  // CHECK-NEXT:                        %[[ISPOSITIVE:.*]] = icmp sgt i64 %[[ALIGNMENT_RELOADED]], 0
-  // CHECK-NEXT:                        %[[POSITIVEMASK:.*]] = sub i64 %[[ALIGNMENT_RELOADED]], 1
-  // CHECK-NEXT:                        %[[MASK:.*]] = select i1 %[[ISPOSITIVE]], i64 %[[POSITIVEMASK]], i64 0
+  // CHECK-NEXT:                        %[[MASK:.*]] = sub i64 %[[ALIGNMENT_RELOADED]], 1
   // CHECK-NEXT:                        %[[PTRINT:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64
   // CHECK-NEXT:                        %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], %[[MASK]]
   // CHECK-NEXT:                        %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
diff --git a/test/CodeGen/catch-undef-behavior.c b/test/CodeGen/catch-undef-behavior.c
index 7915ed9db1..e4861aef52 100644
--- a/test/CodeGen/catch-undef-behavior.c
+++ b/test/CodeGen/catch-undef-behavior.c
@@ -35,7 +35,7 @@ void foo() {
   union { int i; } u;
 
   // CHECK-COMMON:      %[[I8PTR:.*]] = bitcast i32* %[[PTR:.*]] to i8*
-  // CHECK-COMMON-NEXT: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64.p0i8(i8* %[[I8PTR]], i1 false, i1 false)
+  // CHECK-COMMON-NEXT: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64.p0i8(i8* %[[I8PTR]], i1 false, i1 false, i1 false)
   // CHECK-COMMON-NEXT: %[[OK:.*]] = icmp uge i64 %[[SIZE]], 4
 
   // CHECK-UBSAN: br i1 %[[OK]], {{.*}} !prof ![[WEIGHT_MD:.*]], !nosanitize
diff --git a/test/CodeGen/complex-math.c b/test/CodeGen/complex-math.c
index 5fd25d0092..e28941f838 100644
--- a/test/CodeGen/complex-math.c
+++ b/test/CodeGen/complex-math.c
@@ -2,7 +2,7 @@
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple x86_64-pc-win64 -o - | FileCheck %s --check-prefix=X86
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple i686-unknown-unknown -o - | FileCheck %s --check-prefix=X86
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple powerpc-unknown-unknown -o - | FileCheck %s --check-prefix=PPC
-// RUN %clang_cc1 %s -O1 -emit-llvm -triple armv7-none-linux-gnueabi -o - | FileCheck %s --check-prefix=ARM
+// RUN: %clang_cc1 %s -O1 -emit-llvm -triple armv7-none-linux-gnueabi -o - | FileCheck %s --check-prefix=ARM
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple armv7-none-linux-gnueabihf -o - | FileCheck %s --check-prefix=ARMHF
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple thumbv7k-apple-watchos2.0 -o - -target-abi aapcs16 | FileCheck %s --check-prefix=ARM7K
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple aarch64-unknown-unknown -ffast-math -o - | FileCheck %s --check-prefix=AARCH64-FASTMATH
@@ -621,7 +621,7 @@ _Complex double foo(_Complex double a, _Complex double b) {
   // use the base AAPCS.
 
   // ARM-LABEL: @foo(
-  // ARM: call void { double, double } @__muldc3
+  // ARM: call void @__muldc3
 
   // ARMHF-LABEL: @foo(
   // ARMHF: call { double, double } @__muldc3
diff --git a/test/CodeGen/compound-literal.c b/test/CodeGen/compound-literal.c
index 38675a7dda..17a24d47f2 100644
--- a/test/CodeGen/compound-literal.c
+++ b/test/CodeGen/compound-literal.c
@@ -11,6 +11,11 @@ _Complex double * x = &(_Complex double){1.0f};
 typedef int v4i32 __attribute((vector_size(16)));
 v4i32 *y = &(v4i32){1,2,3,4};
 
+// Check generated code for GNU constant array init from compound literal,
+// for a global variable.
+// CHECK: @compound_array = global [8 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8]
+int compound_array[] = __extension__(__builtin_choose_expr(0, 0, _Generic(1, int: (int[]){1, 2, 3, 4, 5, 6, 7, 8})));
+
 void xxx() {
 int* a = &(int){1};
 struct s {int a, b, c;} * b = &(struct s) {1, 2, 3};
@@ -82,3 +87,13 @@ int compareMyCLH() {
   const void *b = MyCLH;
   return a == b;
 }
+
+// Check generated code for GNU constant array init from compound literal,
+// for a local variable.
+// CHECK-LABEL: define i32 @compound_array_fn()
+// CHECK: [[COMPOUND_ARRAY:%.*]] = alloca [8 x i32]
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}, i64 32, i1 false)
+int compound_array_fn() {
+  int compound_array[] = (int[]){1,2,3,4,5,6,7,8};
+  return compound_array[0];
+}
diff --git a/test/CodeGen/const-init.c b/test/CodeGen/const-init.c
index 3fd231b630..41ac8f2eb0 100644
--- a/test/CodeGen/const-init.c
+++ b/test/CodeGen/const-init.c
@@ -121,8 +121,8 @@ struct g22 {int x;} __attribute((packed));
 struct g23 {char a; short b; char c; struct g22 d;};
 struct g23 g24 = {1,2,3,4};
 
-// CHECK: @g25.g26 = internal global i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__func__.g25, i32 0, i32 0)
-// CHECK: @__func__.g25 = private unnamed_addr constant [4 x i8] c"g25\00"
+// CHECK: @g25.g26 = internal global i8* getelementptr inbounds ([4 x i8], [4 x i8]* @[[FUNC:.*]], i32 0, i32 0)
+// CHECK: @[[FUNC]] = private unnamed_addr constant [4 x i8] c"g25\00"
 int g25() {
   static const char *g26 = __func__;
   return *g26;
@@ -153,7 +153,7 @@ void g29() {
       DCC_PASSWD passwd;
   } DCC_SRVR_NM;
   // CHECK: @g29.a = internal global %struct.DCC_SRVR_NM { [2 x i8] c"@\00" }, align 1
-  // CHECK: @g29.b = internal global [1 x i32] [i32 ptrtoint ([5 x i8]* @.str to i32)], align 4
+  // CHECK: @g29.b = internal global [1 x i32] [i32 ptrtoint ([5 x i8]* @.str.1 to i32)], align 4
   // CHECK: @g29.c = internal global [1 x i32] [i32 97], align 4
   static DCC_SRVR_NM a = { {"@"} };
   static int b[1] = { "asdf" }; // expected-warning {{incompatible pointer to integer conversion initializing 'int' with an expression of type 'char [5]'}}
diff --git a/test/CodeGen/construction-vtable-visibility.cpp b/test/CodeGen/construction-vtable-visibility.cpp
new file mode 100644
index 0000000000..127e1b1905
--- /dev/null
+++ b/test/CodeGen/construction-vtable-visibility.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple x86_64-linux-unknown -fvisibility hidden -emit-llvm %s -o - | FileCheck %s
+
+struct Base {};
+
+class Parent1 : virtual public Base {};
+
+class Parent2 : virtual public Base {};
+
+class Child : public Parent1, public Parent2 {};
+
+void test() {
+  Child x;
+}
+
+// CHECK: @_ZTC5Child0_7Parent1 = linkonce_odr hidden unnamed_addr constant
+// CHECK: @_ZTC5Child8_7Parent2 = linkonce_odr hidden unnamed_addr constant
diff --git a/test/CodeGen/cspgo-instrumentation.c b/test/CodeGen/cspgo-instrumentation.c
new file mode 100644
index 0000000000..9ecdbe7483
--- /dev/null
+++ b/test/CodeGen/cspgo-instrumentation.c
@@ -0,0 +1,41 @@
+// Test if CSPGO instrumentation and use pass are invoked.
+//
+// Ensure Pass PGOInstrumentationGenPass is invoked.
+// RUN: %clang_cc1 -O2 -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw  %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN
+// RUN: %clang_cc1 -O2 -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw  %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-NEWPM
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN: PGOInstrumentationGenCreateVarPass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN: PGOInstrumentationGenPass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-NEWPM: Running pass: PGOInstrumentationGenCreateVar on
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-NEWPM: Running pass: PGOInstrumentationGen on
+//
+// RUN: rm -rf %t && mkdir %t
+// RUN: llvm-profdata merge -o %t/noncs.profdata %S/Inputs/pgotestir.proftext
+//
+// Ensure Pass PGOInstrumentationUsePass and PGOInstrumentationGenPass are invoked.
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw  %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN2
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw  %s -fexperimental-new-pass-manager -fdebug-pass-manager  -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN2-NEWPM
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN2: PGOInstrumentationUsePass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN2: PGOInstrumentationGenCreateVarPass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN2: PGOInstrumentationGenPass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN2-NEWPM: Running pass: PGOInstrumentationUse
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN2-NEWPM: Running pass: PGOInstrumentationGenCreateVar on
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN2-NEWPM: Running pass: PGOInstrumentationGen on
+
+// Ensure Pass PGOInstrumentationUsePass is invoked only once.
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-USE
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-USE-NEWPM
+// CHECK-PGOUSEPASS-INVOKED-USE: PGOInstrumentationUsePass
+// CHECK-PGOUSEPASS-INVOKED-USE-NOT: PGOInstrumentationGenCreateVarPass
+// CHECK-PGOUSEPASS-INVOKED-USE-NOT: PGOInstrumentationUsePass
+// CHECK-PGOUSEPASS-INVOKED-USE-NEWPM: Running pass: PGOInstrumentationUse
+// CHECK-PGOUSEPASS-INVOKED-USE-NEWPM-NOT: Running pass: PGOInstrumentationGenCreateVar
+// CHECK-PGOUSEPASS-INVOKED-USE-NEWPM-NOT: Running pass: PGOInstrumentationUse
+//
+// Ensure Pass PGOInstrumentationUsePass is invoked twice.
+// RUN: llvm-profdata merge -o %t/cs.profdata %S/Inputs/pgotestir_cs.proftext
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-USE2
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -fexperimental-new-pass-manager -fdebug-pass-manager  -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-USE2-NEWPM
+// CHECK-PGOUSEPASS-INVOKED-USE2: PGOInstrumentationUsePass
+// CHECK-PGOUSEPASS-INVOKED-USE2: PGOInstrumentationUsePass
+// CHECK-PGOUSEPASS-INVOKED-USE2-NEWPM: Running pass: PGOInstrumentationUse
+// CHECK-PGOUSEPASS-INVOKED-USE2-NEWPM: Running pass: PGOInstrumentationUse
diff --git a/test/CodeGen/cspgo-instrumentation_lto.c b/test/CodeGen/cspgo-instrumentation_lto.c
new file mode 100644
index 0000000000..822fe07d1a
--- /dev/null
+++ b/test/CodeGen/cspgo-instrumentation_lto.c
@@ -0,0 +1,44 @@
+// Test if CSPGO instrumentation and use pass are invoked in lto.
+//
+// RUN: rm -rf %t && mkdir %t
+// RUN: llvm-profdata merge -o %t/noncs.profdata %S/Inputs/pgotestir.proftext
+//
+// Ensure Pass PGOInstrumentationGenPass is not invoked in PreLink.
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm %s -flto -mllvm -debug-pass=Structure -emit-llvm-bc -o %t/foo_fe.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm %s -flto -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm-bc -o %t/foo_fe_pm.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE-NEWPM
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE: PGOInstrumentationUsePass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE: PGOInstrumentationGenCreateVarPass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE-NOT: PGOInstrumentationGenPass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE-NEWPM: Running pass: PGOInstrumentationUse
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE-NEWPM: Running pass: PGOInstrumentationGenCreateVar
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE-NEWPM-NOT: Running pass: PGOInstrumentationGen on
+//
+// Ensure Pass PGOInstrumentationGenPass is invoked in PostLink.
+// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fprofile-instrument=csllvm -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST
+// RUN: %clang_cc1 -O2 -x ir %t/foo_fe_pm.bc -fexperimental-new-pass-manager -fdebug-pass-manager -fprofile-instrument=csllvm -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NEWPM
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NOT: PGOInstrumentationUsePass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST: PGOInstrumentationGenPass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NOT: PGOInstrumentationGenCreateVarPass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NEWPM-NOT: Running pass: PGOInstrumentationUse
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NEWPM: Running pass: PGOInstrumentationGen on
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NEWPM-NOT: Running pass: PGOInstrumentationGenCreateVar
+//
+// RUN: llvm-profdata merge -o %t/cs.profdata %S/Inputs/pgotestir_cs.proftext
+//
+// Ensure Pass PGOInstrumentationUsePass is invoked Once in PreLink.
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -flto -mllvm -debug-pass=Structure -emit-llvm-bc -o %t/foo_fe.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -flto -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm-bc -o %t/foo_fe_pm.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NEWPM
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE: PGOInstrumentationUsePass
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NOT: PGOInstrumentationGenCreateVarPass
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NOT: PGOInstrumentationUsePass
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NEWPM: Running pass: PGOInstrumentationUse
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NEWPM-NOT: Running pass: PGOInstrumentationGenCreateVar
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NEWPM-NOT: Running pass: PGOInstrumentationUse
+//
+// Ensure Pass PGOInstrumentationUSEPass is invoked in PostLink.
+// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fprofile-instrument-use-path=%t/cs.profdata -flto -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST
+// RUN: %clang_cc1 -O2 -x ir %t/foo_fe_pm.bc -fexperimental-new-pass-manager -fdebug-pass-manager -fprofile-instrument-use-path=%t/cs.profdata -flto -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST-NEWPM
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST: PGOInstrumentationUsePass
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST-NOT: PGOInstrumentationUsePass
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST-NEWPM: Running pass: PGOInstrumentationUse
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST-NEWPM-NOT: Running pass: PGOInstrumentationUse
diff --git a/test/CodeGen/cspgo-instrumentation_thinlto.c b/test/CodeGen/cspgo-instrumentation_thinlto.c
new file mode 100644
index 0000000000..afcc9e771c
--- /dev/null
+++ b/test/CodeGen/cspgo-instrumentation_thinlto.c
@@ -0,0 +1,52 @@
+// Test if CSPGO instrumentation and use pass are invoked in thinlto.
+//
+// RUN: rm -rf %t && mkdir %t
+// RUN: llvm-profdata merge -o %t/noncs.profdata %S/Inputs/pgotestir.proftext
+//
+// Ensure Pass PGOInstrumentationGenPass is not invoked in PreLink.
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm %s -fprofile-instrument-path=default.profraw -flto=thin -mllvm -debug-pass=Structure -emit-llvm-bc -o %t/foo_fe.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm %s -fprofile-instrument-path=default.profraw  -flto=thin -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm-bc -o %t/foo_fe_pm.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE-NEWPM
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE: PGOInstrumentationUsePass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE: PGOInstrumentationGenCreateVarPass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE-NOT: PGOInstrumentationGenPass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE-NEWPM: Running pass: PGOInstrumentationUse
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE-NEWPM: Running pass: PGOInstrumentationGenCreateVar
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE-NEWPM-NOT: Running pass: PGOInstrumentationGen on
+//
+// RUN: llvm-lto -thinlto -o %t/foo %t/foo_fe.bc
+// RUN: llvm-lto -thinlto -o %t/foo_pm %t/foo_fe_pm.bc
+// Ensure Pass PGOInstrumentationGenPass is invoked in PostLink.
+// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fthinlto-index=%t/foo.thinlto.bc -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw  -flto=thin -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST
+// RUN: %clang_cc1 -O2 -x ir %t/foo_fe_pm.bc -fthinlto-index=%t/foo_pm.thinlto.bc -fexperimental-new-pass-manager -fdebug-pass-manager  -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw  -flto=thin -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NEWPM
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NOT: PGOInstrumentationUsePass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NOT: PGOInstrumentationGenCreateVarPass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST: PGOInstrumentationGenPass
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NEWPM-NOT: Running pass: PGOInstrumentationUse
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NEWPM-NOT: Running pass: PGOInstrumentationGenCreateVar
+// CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NEWPM: Running pass: PGOInstrumentationGen on
+//
+// RUN: llvm-profdata merge -o %t/cs.profdata %S/Inputs/pgotestir_cs.proftext
+//
+// Ensure Pass PGOInstrumentationUsePass is invoked Once in PreLink.
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -flto=thin -mllvm -debug-pass=Structure -emit-llvm-bc -o %t/foo_fe.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -flto=thin -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm-bc -o %t/foo_fe_pm.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NEWPM
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE: PGOInstrumentationUsePass
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NOT: PGOInstrumentationUsePass
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NEWPM: Running pass: PGOInstrumentationUse
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NEWPM-NOT: Running pass: PGOInstrumentationUse
+//
+// RUN: llvm-lto -thinlto -o %t/foo %t/foo_fe.bc
+// RUN: llvm-lto -thinlto -o %t/foo_pm %t/foo_fe_pm.bc
+// Ensure Pass PGOInstrumentationUSEPass is invoked in PostLink.
+// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fthinlto-index=%t/foo.thinlto.bc -fprofile-instrument-use-path=%t/cs.profdata -flto=thin -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST -dump-input=always
+// RUN: %clang_cc1 -O2 -x ir %t/foo_fe_pm.bc -fthinlto-index=%t/foo_pm.thinlto.bc -fexperimental-new-pass-manager -fdebug-pass-manager -fprofile-instrument-use-path=%t/cs.profdata -flto=thin -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST-NEWPM -dump-input=always
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST: PGOInstrumentationUsePass
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST-NOT: PGOInstrumentationUsePass
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST-NEWPM: Running pass: PGOInstrumentationUse
+// CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST-NEWPM-NOT: Running pass: PGOInstrumentationUse
+//
+// Finally, test if a non-cs profile is passed to PostLink passes, PGO UsePass is not invoked.
+// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fthinlto-index=%t/foo.thinlto.bc -fprofile-instrument-use-path=%t/noncs.profdata -flto=thin -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-INSTR-USE-POST
+// RUN: %clang_cc1 -O2 -x ir %t/foo_fe_pm.bc -fthinlto-index=%t/foo_pm.thinlto.bc -fexperimental-new-pass-manager -fdebug-pass-manager -fprofile-instrument-use-path=%t/noncs.profdata -flto=thin -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-INSTR-USE-POST-NEWPM
+// CHECK-PGOUSEPASS-INVOKED-INSTR-USE-POST-NOT: PGOInstrumentationUsePass
+// CHECK-PGOUSEPASS-INVOKED-INSTR-USE-POST-NEWPM-NOT: Running pass: PGOInstrumentationUse
diff --git a/test/CodeGen/debug-info-codeview-heapallocsite.c b/test/CodeGen/debug-info-codeview-heapallocsite.c
new file mode 100644
index 0000000000..dfc0d19b25
--- /dev/null
+++ b/test/CodeGen/debug-info-codeview-heapallocsite.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -debug-info-kind=limited -gcodeview -fdeclspec -S -emit-llvm < %s | FileCheck %s
+
+struct Foo;
+struct Bar;
+
+__declspec(allocator) void *alloc_void();
+
+void call_alloc() {
+  struct Foo *p = alloc_void();
+  struct Foo *q = (struct Foo*)alloc_void();
+  struct Foo *r = (struct Foo*)(struct Bar*)alloc_void();
+}
+
+// CHECK-LABEL: define {{.*}}void @call_alloc
+// CHECK: call i8* {{.*}}@alloc_void{{.*}} !heapallocsite [[DBG1:!.*]]
+// CHECK: call i8* {{.*}}@alloc_void{{.*}} !heapallocsite [[DBG2:!.*]]
+// CHECK: call i8* {{.*}}@alloc_void{{.*}} !heapallocsite [[DBG3:!.*]]
+
+// CHECK: [[DBG1]] = !{}
+// CHECK: [[DBG2]] = !DICompositeType(tag: DW_TAG_structure_type,
+// CHECK-SAME:                                 name: "Foo"
+// CHECK: [[DBG3]] = !DICompositeType(tag: DW_TAG_structure_type,
+// CHECK-SAME:                                 name: "Bar"
diff --git a/test/CodeGen/debug-label-inline.c b/test/CodeGen/debug-label-inline.c
new file mode 100644
index 0000000000..c0b089aad8
--- /dev/null
+++ b/test/CodeGen/debug-label-inline.c
@@ -0,0 +1,28 @@
+// This test will test the correctness of generating DILabel and
+// llvm.dbg.label when the label is in inlined functions.
+//
+// RUN: %clang_cc1 -O2 %s -o - -emit-llvm -debug-info-kind=limited | FileCheck %s
+inline int f1(int a, int b) {
+  int sum;
+
+top:
+  sum = a + b;
+  return sum;
+}
+
+extern int ga, gb;
+
+int f2(void) {
+  int result;
+
+  result = f1(ga, gb);
+  // CHECK: call void @llvm.dbg.label(metadata [[LABEL_METADATA:!.*]]), !dbg [[LABEL_LOCATION:!.*]]
+
+  return result;
+}
+
+// CHECK: distinct !DISubprogram(name: "f1", {{.*}}, retainedNodes: [[ELEMENTS:!.*]])
+// CHECK: [[ELEMENTS]] = !{{{.*}}, [[LABEL_METADATA]]}
+// CHECK: [[LABEL_METADATA]] = !DILabel({{.*}}, name: "top", {{.*}}, line: 8)
+// CHECK: [[INLINEDAT:!.*]] = distinct !DILocation(line: 18,
+// CHECK: [[LABEL_LOCATION]] = !DILocation(line: 8, {{.*}}, inlinedAt: [[INLINEDAT]])
diff --git a/test/CodeGen/debug-label.c b/test/CodeGen/debug-label.c
new file mode 100644
index 0000000000..20efa49b0a
--- /dev/null
+++ b/test/CodeGen/debug-label.c
@@ -0,0 +1,16 @@
+// This test will test the correstness of generating DILabel and
+// llvm.dbg.label for labels.
+//
+// RUN: %clang_cc1 -emit-llvm %s -o - -emit-llvm -debug-info-kind=limited | FileCheck %s
+
+int f1(int a, int b) {
+  int sum;
+
+top:
+  // CHECK: call void @llvm.dbg.label(metadata [[LABEL_METADATA:!.*]]), !dbg [[LABEL_LOCATION:!.*]]
+  sum = a + b;
+  return sum;
+}
+
+// CHECK: [[LABEL_METADATA]] = !DILabel({{.*}}, name: "top", {{.*}}, line: 9)
+// CHECK: [[LABEL_LOCATION]] = !DILocation(line: 9,
diff --git a/test/CodeGen/dllexport-1.c b/test/CodeGen/dllexport-1.c
new file mode 100644
index 0000000000..5860591c83
--- /dev/null
+++ b/test/CodeGen/dllexport-1.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -fms-extensions -Wno-ignored-attributes -Wno-extern-initializer -o - %s | FileCheck %s -check-prefix CHECK-LNX
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm -fms-extensions -o - -DMSVC %s | FileCheck %s -check-prefix CHECK-MSVC
+
+// Export const variable.
+
+// CHECK-MSVC: @x = dso_local dllexport constant i32 3, align 4
+// CHECK-LNX: @x = constant i32 3, align 4
+
+// CHECK-MSVC: @z = dso_local constant i32 4, align 4
+// CHECK-LNX: @z = constant i32 4, align 4
+
+// CHECK-MSVC: @y = common dso_local dllexport global i32 0, align 4
+// CHECK-LNX: @y = common global i32 0, align 4
+
+__declspec(dllexport) int const x = 3;
+__declspec(dllexport) const int y;
+
+// expected-warning@+1 {{'extern' variable has an initializer}}
+extern int const z = 4;
+
+int main() {
+  int a = x + y + z;
+  return a;
+}
diff --git a/test/CodeGen/exceptions-seh-finally.c b/test/CodeGen/exceptions-seh-finally.c
index 655f0a782f..3e10d15deb 100644
--- a/test/CodeGen/exceptions-seh-finally.c
+++ b/test/CodeGen/exceptions-seh-finally.c
@@ -279,7 +279,7 @@ void finally_with_func() {
 }
 
 // CHECK-LABEL: define internal void @"?fin$0@0@finally_with_func@@"({{[^)]*}})
-// CHECK: call void @cleanup_with_func(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @"??_C@_0BC@COAGBPGM@finally_with_func?$AA@", i32 0, i32 0))
+// CHECK: call void @cleanup_with_func(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @"??_C@_0BC@COAGBPGM@finally_with_func?$AA@", i{{32|64}} 0, i{{32|64}} 0))
 
 // Look for the absence of noinline. Enum attributes come first, so check that
 // a string attribute is the first to verify that no enum attributes are
diff --git a/test/CodeGen/inline-asm-x86-flag-output.c b/test/CodeGen/inline-asm-x86-flag-output.c
new file mode 100644
index 0000000000..74ad3a46e7
--- /dev/null
+++ b/test/CodeGen/inline-asm-x86-flag-output.c
@@ -0,0 +1,376 @@
+// RUN: %clang_cc1 -O2 -emit-llvm %s -o - -triple x86_64-unknown-linux-gnu | FileCheck %s
+
+int test_cca(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_cca
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@cca},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@cca"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccae(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccae
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccae"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccb(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccb
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccb"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccbe(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccbe
+  //CHECK: tail call i32 asm "cmp $2,$1", "={@ccbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccbe"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccc(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccc
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccc"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_cce(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_cce
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@cce},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@cce"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccz(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccz
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccz"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccg(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccg
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccg},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccg"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccge(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccge
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccge"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccl(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccl
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccl"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccle(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccle
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccle"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccna(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccna
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccna},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccna"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccnae(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccnae
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccnae"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccnb(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccnb
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccnb"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccnbe(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccnbe
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccnbe"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccnc(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccnc
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccnc"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccne(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccne
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccne},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccne"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccnz(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccnz
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccnz"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccng(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccng
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccng},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccng"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccnge(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccnge
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccnge"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccnl(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccnl
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccnl"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccnle(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccnle
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccnle"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccno(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccno
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccno},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccno"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccnp(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccnp
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccnp"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccns(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccns
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccns},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccns"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_cco(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_cco
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@cco},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@cco"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccp(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccp
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccp"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+int test_ccs(long nr, volatile long *addr) {
+  //CHECK-LABEL: @test_ccs
+  //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccs},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr)
+  int x;
+  asm("cmp %2,%1"
+      : "=@ccs"(x), "=m"(*(volatile long *)(addr))
+      : "r"(nr)
+      : "cc");
+  if (x)
+    return 0;
+  return 1;
+}
+
+_Bool check_no_clobber_conflicts() {
+  //CHECK-LABEL: @check_no_clobber_conflicts
+  //CHECK:  = tail call i8 asm "", "={@cce},~{cx},~{dirflag},~{fpsr},~{flags}"()
+  _Bool b;
+  asm(""
+      : "=@cce"(b)
+      :
+      : "cx");
+  return b;
+}
diff --git a/test/CodeGen/microsoft-no-common-align.c b/test/CodeGen/microsoft-no-common-align.c
index fc46946c00..a7a27a0627 100644
--- a/test/CodeGen/microsoft-no-common-align.c
+++ b/test/CodeGen/microsoft-no-common-align.c
@@ -6,3 +6,6 @@ TooLargeAlignment TooBig;
 // CHECK: @TooBig = dso_local global <16 x float>  zeroinitializer, align 64
 NormalAlignment JustRight;
 // CHECK: @JustRight = common dso_local global <1 x float>  zeroinitializer, align 4
+
+TooLargeAlignment *IsAPointer;
+// CHECK: @IsAPointer = common dso_local global <16 x float>* null, align 8
diff --git a/test/CodeGen/ms-intrinsics-rotations.c b/test/CodeGen/ms-intrinsics-rotations.c
index 30428b12aa..b1bb2e6eb0 100644
--- a/test/CodeGen/ms-intrinsics-rotations.c
+++ b/test/CodeGen/ms-intrinsics-rotations.c
@@ -12,17 +12,10 @@
 // RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
 // RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
 // RUN:         -triple x86_64--linux -emit-llvm %s -o - \
-// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG
 // RUN: %clang_cc1 -ffreestanding -fms-extensions \
 // RUN:         -triple x86_64--darwin -emit-llvm %s -o - \
-// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
-
-// LP64 targets use 'long' as 'int' for MS intrinsics (-fms-extensions)
-#ifdef __LP64__
-#define LONG int
-#else
-#define LONG long
-#endif
+// RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG
 
 // rotate left
 
@@ -47,12 +40,15 @@ unsigned int test_rotl(unsigned int value, int shift) {
 // CHECK:   [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
 // CHECK:   ret i32 [[R]]
 
-unsigned LONG test_lrotl(unsigned LONG value, int shift) {
+unsigned long test_lrotl(unsigned long value, int shift) {
   return _lrotl(value, shift);
 }
 // CHECK-32BIT-LONG: i32 @test_lrotl
 // CHECK-32BIT-LONG:   [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
 // CHECK-32BIT-LONG:   ret i32 [[R]]
+// CHECK-64BIT-LONG: i64 @test_lrotl
+// CHECK-64BIT-LONG:   [[R:%.*]] = call i64 @llvm.fshl.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
+// CHECK-64BIT-LONG:   ret i64 [[R]]
 
 unsigned __int64 test_rotl64(unsigned __int64 value, int shift) {
   return _rotl64(value, shift);
@@ -84,12 +80,15 @@ unsigned int test_rotr(unsigned int value, int shift) {
 // CHECK:   [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
 // CHECK:   ret i32 [[R]]
 
-unsigned LONG test_lrotr(unsigned LONG value, int shift) {
+unsigned long test_lrotr(unsigned long value, int shift) {
   return _lrotr(value, shift);
 }
 // CHECK-32BIT-LONG: i32 @test_lrotr
 // CHECK-32BIT-LONG:   [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
 // CHECK-32BIT-LONG:   ret i32 [[R]]
+// CHECK-64BIT-LONG: i64 @test_lrotr
+// CHECK-64BIT-LONG:   [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
+// CHECK-64BIT-LONG:   ret i64 [[R]]
 
 unsigned __int64 test_rotr64(unsigned __int64 value, int shift) {
   return _rotr64(value, shift);
diff --git a/test/CodeGen/ms-intrinsics.c b/test/CodeGen/ms-intrinsics.c
index e59b1d36a8..cf41d23d35 100644
--- a/test/CodeGen/ms-intrinsics.c
+++ b/test/CodeGen/ms-intrinsics.c
@@ -9,7 +9,7 @@
 // RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-X64,CHECK-ARM-X64,CHECK-INTEL
 // RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
 // RUN:         -triple aarch64-windows -Oz -emit-llvm %s -o - \
-// RUN:         | FileCheck %s --check-prefixes CHECK-ARM-ARM64,CHECK-ARM-X64
+// RUN:         | FileCheck %s --check-prefixes CHECK-ARM-ARM64,CHECK-ARM-X64,CHECK-ARM64
 
 // intrin.h needs size_t, but -ffreestanding prevents us from getting it from
 // stddef.h.  Work around it with this typedef.
@@ -494,6 +494,35 @@ long test_InterlockedDecrement(long volatile *Addend) {
 // CHECK: ret i32 [[RESULT]]
 // CHECK: }
 
+char test_iso_volatile_load8(char volatile *p) { return __iso_volatile_load8(p); }
+short test_iso_volatile_load16(short volatile *p) { return __iso_volatile_load16(p); }
+int test_iso_volatile_load32(int volatile *p) { return __iso_volatile_load32(p); }
+__int64 test_iso_volatile_load64(__int64 volatile *p) { return __iso_volatile_load64(p); }
+
+// CHECK: define{{.*}}i8 @test_iso_volatile_load8(i8*{{[a-z_ ]*}}%p)
+// CHECK: = load volatile i8, i8* %p
+// CHECK: define{{.*}}i16 @test_iso_volatile_load16(i16*{{[a-z_ ]*}}%p)
+// CHECK: = load volatile i16, i16* %p
+// CHECK: define{{.*}}i32 @test_iso_volatile_load32(i32*{{[a-z_ ]*}}%p)
+// CHECK: = load volatile i32, i32* %p
+// CHECK: define{{.*}}i64 @test_iso_volatile_load64(i64*{{[a-z_ ]*}}%p)
+// CHECK: = load volatile i64, i64* %p
+
+void test_iso_volatile_store8(char volatile *p, char v) { __iso_volatile_store8(p, v); }
+void test_iso_volatile_store16(short volatile *p, short v) { __iso_volatile_store16(p, v); }
+void test_iso_volatile_store32(int volatile *p, int v) { __iso_volatile_store32(p, v); }
+void test_iso_volatile_store64(__int64 volatile *p, __int64 v) { __iso_volatile_store64(p, v); }
+
+// CHECK: define{{.*}}void @test_iso_volatile_store8(i8*{{[a-z_ ]*}}%p, i8 {{[a-z_ ]*}}%v)
+// CHECK: store volatile i8 %v, i8* %p
+// CHECK: define{{.*}}void @test_iso_volatile_store16(i16*{{[a-z_ ]*}}%p, i16 {{[a-z_ ]*}}%v)
+// CHECK: store volatile i16 %v, i16* %p
+// CHECK: define{{.*}}void @test_iso_volatile_store32(i32*{{[a-z_ ]*}}%p, i32 {{[a-z_ ]*}}%v)
+// CHECK: store volatile i32 %v, i32* %p
+// CHECK: define{{.*}}void @test_iso_volatile_store64(i64*{{[a-z_ ]*}}%p, i64 {{[a-z_ ]*}}%v)
+// CHECK: store volatile i64 %v, i64* %p
+
+
 #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
 __int64 test_InterlockedExchange64(__int64 volatile *value, __int64 mask) {
   return _InterlockedExchange64(value, mask);
@@ -1342,15 +1371,14 @@ __int64 test_InterlockedDecrement64_nf(__int64 volatile *Addend) {
 // CHECK-ARM-ARM64: }
 #endif
 
-#if !defined(__aarch64__)
 void test__fastfail() {
   __fastfail(42);
 }
 // CHECK-LABEL: define{{.*}} void @test__fastfail()
 // CHECK-ARM: call void asm sideeffect "udf #251", "{r0}"(i32 42) #[[NORETURN:[0-9]+]]
 // CHECK-INTEL: call void asm sideeffect "int $$0x29", "{cx}"(i32 42) #[[NORETURN]]
+// CHECK-ARM64: call void asm sideeffect "brk #0xF003", "{w0}"(i32 42) #[[NORETURN:[0-9]+]]
 
 // Attributes come last.
 
 // CHECK: attributes #[[NORETURN]] = { noreturn{{.*}} }
-#endif
diff --git a/test/CodeGen/ms-setjmp.c b/test/CodeGen/ms-setjmp.c
index a6e30cb96a..5df9ce5ad2 100644
--- a/test/CodeGen/ms-setjmp.c
+++ b/test/CodeGen/ms-setjmp.c
@@ -21,12 +21,12 @@ int test_setjmp() {
 
   // X64-LABEL: define dso_local i32 @test_setjmp
   // X64:       %[[addr:.*]] = call i8* @llvm.frameaddress(i32 0)
-  // X64:       %[[call:.*]] = call i32 @_setjmp(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @jb, i32 0, i32 0), i8* %[[addr]])
+  // X64:       %[[call:.*]] = call i32 @_setjmp(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @jb, i64 0, i64 0), i8* %[[addr]])
   // X64-NEXT:  ret i32 %[[call]]
 
   // AARCH64-LABEL: define dso_local i32 @test_setjmp
   // AARCH64:       %[[addr:.*]] = call i8* @llvm.sponentry()
-  // AARCH64:       %[[call:.*]] = call i32 @_setjmpex(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @jb, i32 0, i32 0), i8* %[[addr]])
+  // AARCH64:       %[[call:.*]] = call i32 @_setjmpex(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @jb, i64 0, i64 0), i8* %[[addr]])
   // AARCH64-NEXT:  ret i32 %[[call]]
 }
 
@@ -34,11 +34,11 @@ int test_setjmpex() {
   return _setjmpex(jb);
   // X64-LABEL: define dso_local i32 @test_setjmpex
   // X64:       %[[addr:.*]] = call i8* @llvm.frameaddress(i32 0)
-  // X64:       %[[call:.*]] = call i32 @_setjmpex(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @jb, i32 0, i32 0), i8* %[[addr]])
+  // X64:       %[[call:.*]] = call i32 @_setjmpex(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @jb, i64 0, i64 0), i8* %[[addr]])
   // X64-NEXT:  ret i32 %[[call]]
 
   // AARCH64-LABEL: define dso_local i32 @test_setjmpex
   // AARCH64:       %[[addr:.*]] = call i8* @llvm.sponentry()
-  // AARCH64:       %[[call:.*]] = call i32 @_setjmpex(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @jb, i32 0, i32 0), i8* %[[addr]])
+  // AARCH64:       %[[call:.*]] = call i32 @_setjmpex(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @jb, i64 0, i64 0), i8* %[[addr]])
   // AARCH64-NEXT:  ret i32 %[[call]]
 }
diff --git a/test/CodeGen/ms-volatile-aarch64.c b/test/CodeGen/ms-volatile-aarch64.c
deleted file mode 100644
index 2a139f5139..0000000000
--- a/test/CodeGen/ms-volatile-aarch64.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-win32 -emit-llvm -fms-extensions -fms-volatile -o - < %s | FileCheck %s
-
-void test1(int volatile *p, int v) {
-  __iso_volatile_store32(p, v);
-  // CHECK-LABEL: @test1
-  // CHECK: store volatile {{.*}}, {{.*}}
-}
-int test2(const int volatile *p) {
-  return __iso_volatile_load32(p);
-  // CHECK-LABEL: @test2
-  // CHECK: load volatile {{.*}}
-}
diff --git a/test/CodeGen/ms-volatile-arm.c b/test/CodeGen/ms-volatile-arm.c
deleted file mode 100644
index 065e624a4c..0000000000
--- a/test/CodeGen/ms-volatile-arm.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// REQUIRES: arm-registered-target
-// RUN: %clang_cc1 -triple thumbv7-win32 -emit-llvm -fms-extensions -fms-volatile -o - < %s | FileCheck %s
-
-void test1(int volatile *p, int v) {
-  __iso_volatile_store32(p, v);
-  // CHECK-LABEL: @test1
-  // CHECK: store volatile {{.*}}, {{.*}}
-}
-int test2(const int volatile *p) {
-  return __iso_volatile_load32(p);
-  // CHECK-LABEL: @test2
-  // CHECK: load volatile {{.*}}
-}
diff --git a/test/CodeGen/ms-x86-intrinsics.c b/test/CodeGen/ms-x86-intrinsics.c
index c231dbd56e..ffcf09052b 100644
--- a/test/CodeGen/ms-x86-intrinsics.c
+++ b/test/CodeGen/ms-x86-intrinsics.c
@@ -144,28 +144,28 @@ unsigned __int64 test__shiftleft128(unsigned __int64 l, unsigned __int64 h,
   return __shiftleft128(l, h, d);
 }
 // CHECK-X64-LABEL: define dso_local i64 @test__shiftleft128(i64 %l, i64 %h, i8 %d)
-// CHECK-X64  = zext i64 %h to i128
-// CHECK-X64  = shl nuw i128 %0, 64
-// CHECK-X64  = zext i64 %l to i128
-// CHECK-X64  = or i128 %1, %2
-// CHECK-X64  = and i8 %d, 63
-// CHECK-X64  = shl i128 %
-// CHECK-X64  = lshr i128 %
-// CHECK-X64  = trunc i128 %
-// CHECK-X64  ret i64 %
+// CHECK-X64:  = zext i64 %{{.*}} to i128
+// CHECK-X64: = shl nuw i128 %{{.*}}, 64
+// CHECK-X64: = zext i64 %{{.*}} to i128
+// CHECK-X64: = or i128 %
+// CHECK-X64: = and i8 %{{.*}}, 63
+// CHECK-X64: = shl i128 %
+// CHECK-X64: = lshr i128 %
+// CHECK-X64: = trunc i128 %
+// CHECK-X64:  ret i64 %
 
 unsigned __int64 test__shiftright128(unsigned __int64 l, unsigned __int64 h,
                                      unsigned char d) {
   return __shiftright128(l, h, d);
 }
 // CHECK-X64-LABEL: define dso_local i64 @test__shiftright128(i64 %l, i64 %h, i8 %d)
-// CHECK-X64  = zext i64 %h to i128
-// CHECK-X64  = shl nuw i128 %
-// CHECK-X64  = zext i64 %l to i128
-// CHECK-X64  = or i128 %
-// CHECK-X64  = and i8 %d, 63
-// CHECK-X64  = lshr i128 %
-// CHECK-X64  = trunc i128 %
-// CHECK-X64  ret i64 %
+// CHECK-X64:  = zext i64 %{{.*}} to i128
+// CHECK-X64: = shl nuw i128 %{{.*}}, 64
+// CHECK-X64: = zext i64 %{{.*}} to i128
+// CHECK-X64: = or i128 %
+// CHECK-X64: = and i8 %{{.*}}, 63
+// CHECK-X64: = lshr i128 %
+// CHECK-X64: = trunc i128 %
+// CHECK-X64:  ret i64 %
 
 #endif // defined(__x86_64__)
diff --git a/test/CodeGen/msp430-align.c b/test/CodeGen/msp430-align.c
new file mode 100644
index 0000000000..72de87b3bb
--- /dev/null
+++ b/test/CodeGen/msp430-align.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple msp430-elf -emit-llvm %s -o - | FileCheck %s
+
+// MSP430 target prefers chars to be aligned to 8 bit and other types to 16 bit.
+
+// CHECK: @c ={{.*}}global i8 1, align 1
+// CHECK: @s ={{.*}}global i16 266, align 2
+// CHECK: @i ={{.*}}global i16 266, align 2
+// CHECK: @l ={{.*}}global i32 16909060, align 2
+// CHECK: @ll ={{.*}}global i64 283686952306183, align 2
+// CHECK: @f ={{.*}}global float 1.000000e+00, align 2
+// CHECK: @d ={{.*}}global double 1.000000e+00, align 2
+// CHECK: @ld ={{.*}}global double 1.000000e+00, align 2
+// CHECK: @p ={{.*}}global i8* @c, align 2
+
+char c = 1;
+short s = 266;
+int i = 266;
+long l = 16909060;
+long long ll = 283686952306183;
+float f = 1.0f;
+double d = 1.0;
+long double ld = 1.0;
+char *p = &c;
diff --git a/test/CodeGen/msp430-fp-elim.c b/test/CodeGen/msp430-fp-elim.c
new file mode 100644
index 0000000000..c022adecbc
--- /dev/null
+++ b/test/CodeGen/msp430-fp-elim.c
@@ -0,0 +1,19 @@
+// REQUIRES: msp430-registered-target
+// RUN: %clang_cc1 -mdisable-fp-elim -triple msp430 -S %s -o - | FileCheck %s --check-prefix=FP_ENFORCED
+// RUN: %clang_cc1 -triple msp430 -S %s -o - | FileCheck %s --check-prefix=FP_DEFAULT
+
+// Check the frame pointer is not used on MSP430 by default, but can be forcibly turned on.
+
+// FP_ENFORCED: push r4
+// FP_ENFORCED: mov r4, r4
+// FP_ENFORCED: pop r4
+// FP_DEFAULT: .globl fp_elim_check
+// FP_DEFAULT-NOT: push r4
+// FP_DEFAULT: mov r4, r4
+// FP_DEFAULT-NOT: pop r4
+
+void fp_elim_check()
+{
+	asm volatile ("mov r4, r4");
+}
+
diff --git a/test/CodeGen/msp430-reloc.c b/test/CodeGen/msp430-reloc.c
new file mode 100644
index 0000000000..f3d858839e
--- /dev/null
+++ b/test/CodeGen/msp430-reloc.c
@@ -0,0 +1,30 @@
+// REQUIRES: msp430-registered-target
+// RUN: %clang -target msp430 -fPIC -S %s -o - | FileCheck %s
+
+// Check the compilation does not crash as it was crashing before with "-fPIC" enabled
+
+void *alloca(unsigned int size);
+
+// CHECK: .globl foo
+short foo(char** data, char encoding)
+{
+	char* encoding_addr = alloca(sizeof(char));
+	*encoding_addr = encoding;
+
+	char tmp3 = *encoding_addr;
+	short conv2 = tmp3;
+	short and = conv2 & 0xf;
+
+	switch (and)
+	{
+	case 0 :
+	case 4 :
+	case 10 :
+		return 1;
+	case 11 :
+		return 2;
+	}
+
+	return 0;
+}
+
diff --git a/test/CodeGen/mult-alt-generic.c b/test/CodeGen/mult-alt-generic.c
index 6e7b11efe1..f5546d45cc 100644
--- a/test/CodeGen/mult-alt-generic.c
+++ b/test/CodeGen/mult-alt-generic.c
@@ -130,7 +130,7 @@ void single_X()
   asm("foo %1,%0" : "=r" (out0) : "X" (min1));
   // CHECK: call i32 asm "foo $1,$0", "=r,X[[CLOBBERS]](i32 1)
   asm("foo %1,%0" : "=r" (out0) : "X" (1));
-  // CHECK: call i32 asm "foo $1,$0", "=r,X[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32], [2 x i32]* {{[a-zA-Z0-9@%]+}}, i32 0, i32 0))
+  // CHECK: call i32 asm "foo $1,$0", "=r,X[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32], [2 x i32]* {{[a-zA-Z0-9@%]+}}, i{{32|64}} 0, i{{32|64}} 0))
   asm("foo %1,%0" : "=r" (out0) : "X" (marray));
   // CHECK: call i32 asm "foo $1,$0", "=r,X[[CLOBBERS]](double {{[0-9.eE+-]+}})
   asm("foo %1,%0" : "=r" (out0) : "X" (1.0e+01));
@@ -143,7 +143,7 @@ void single_p()
 {
   register int out0 = 0;
   // Constraint converted differently on different platforms moved to platform-specific.
-  // : call i32 asm "foo $1,$0", "=r,im[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32], [2 x i32]* {{[a-zA-Z0-9@%]+}}, i32 0, i32 0))
+  // : call i32 asm "foo $1,$0", "=r,im[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32], [2 x i32]* {{[a-zA-Z0-9@%]+}}, i{{32|64}} 0, i{{32|64}} 0))
   asm("foo %1,%0" : "=r" (out0) : "p" (marray));
 }
 
@@ -263,7 +263,7 @@ void multi_X()
   asm("foo %1,%0" : "=r,r" (out0) : "r,X" (min1));
   // CHECK: call i32 asm "foo $1,$0", "=r|r,r|X[[CLOBBERS]](i32 1)
   asm("foo %1,%0" : "=r,r" (out0) : "r,X" (1));
-  // CHECK: call i32 asm "foo $1,$0", "=r|r,r|X[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32], [2 x i32]* {{[a-zA-Z0-9@%]+}}, i32 0, i32 0))
+  // CHECK: call i32 asm "foo $1,$0", "=r|r,r|X[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32], [2 x i32]* {{[a-zA-Z0-9@%]+}}, i{{32|64}} 0, i{{32|64}} 0))
   asm("foo %1,%0" : "=r,r" (out0) : "r,X" (marray));
   // CHECK: call i32 asm "foo $1,$0", "=r|r,r|X[[CLOBBERS]](double {{[0-9.eE+-]+}})
   asm("foo %1,%0" : "=r,r" (out0) : "r,X" (1.0e+01));
@@ -276,6 +276,6 @@ void multi_p()
 {
   register int out0 = 0;
   // Constraint converted differently on different platforms moved to platform-specific.
-  // : call i32 asm "foo $1,$0", "=r|r,r|im[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32], [2 x i32]* {{[a-zA-Z0-9@%]+}}, i32 0, i32 0))
+  // : call i32 asm "foo $1,$0", "=r|r,r|im[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32], [2 x i32]* {{[a-zA-Z0-9@%]+}}, {{i[0-9]*}} 0, {{i[0-9]*}} 0))
   asm("foo %1,%0" : "=r,r" (out0) : "r,p" (marray));
 }
diff --git a/test/CodeGen/object-size.c b/test/CodeGen/object-size.c
index a1095798c1..0f0069d802 100644
--- a/test/CodeGen/object-size.c
+++ b/test/CodeGen/object-size.c
@@ -1,12 +1,19 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - 2>&1 | FileCheck %s
+// RUN: %clang_cc1           -triple x86_64-apple-darwin -emit-llvm %s -o - 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -DDYNAMIC -triple x86_64-apple-darwin -emit-llvm %s -o - 2>&1 | FileCheck %s
+
+#ifndef DYNAMIC
+#define OBJECT_SIZE_BUILTIN __builtin_object_size
+#else
+#define OBJECT_SIZE_BUILTIN __builtin_dynamic_object_size
+#endif
 
 #define strcpy(dest, src) \
-  ((__builtin_object_size(dest, 0) != -1ULL) \
-   ? __builtin___strcpy_chk (dest, src, __builtin_object_size(dest, 1)) \
+  ((OBJECT_SIZE_BUILTIN(dest, 0) != -1ULL) \
+   ? __builtin___strcpy_chk (dest, src, OBJECT_SIZE_BUILTIN(dest, 1)) \
    : __inline_strcpy_chk(dest, src))
 
 static char *__inline_strcpy_chk (char *dest, const char *src) {
-  return __builtin___strcpy_chk(dest, src, __builtin_object_size(dest, 1));
+  return __builtin___strcpy_chk(dest, src, OBJECT_SIZE_BUILTIN(dest, 1));
 }
 
 char gbuf[63];
@@ -15,32 +22,32 @@ int gi, gj;
 
 // CHECK-LABEL: define void @test1
 void test1() {
-  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i64 0, i64 4), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 59)
+  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i64 0, i64 4), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0), i64 59)
   strcpy(&gbuf[4], "Hi there");
 }
 
 // CHECK-LABEL: define void @test2
 void test2() {
-  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 63)
+  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i64 0, i64 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0), i64 63)
   strcpy(gbuf, "Hi there");
 }
 
 // CHECK-LABEL: define void @test3
 void test3() {
-  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i64 1, i64 37), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 0)
+  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i64 1, i64 37), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0), i64 0)
   strcpy(&gbuf[100], "Hi there");
 }
 
 // CHECK-LABEL: define void @test4
 void test4() {
-  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i64 0, i64 -1), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 0)
+  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i64 0, i64 -1), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0), i64 0)
   strcpy((char*)(void*)&gbuf[-1], "Hi there");
 }
 
 // CHECK-LABEL: define void @test5
 void test5() {
   // CHECK:     = load i8*, i8** @gp
-  // CHECK-NEXT:= call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
+  // CHECK-NEXT:= call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
   strcpy(gp, "Hi there");
 }
 
@@ -48,7 +55,7 @@ void test5() {
 void test6() {
   char buf[57];
 
-  // CHECK:       = call i8* @__strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 53)
+  // CHECK:       = call i8* @__strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0), i64 53)
   strcpy(&buf[4], "Hi there");
 }
 
@@ -58,7 +65,7 @@ void test7() {
   // Ensure we only evaluate the side-effect once.
   // CHECK:     = add
   // CHECK-NOT: = add
-  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 63)
+  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i64 0, i64 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0), i64 63)
   strcpy((++i, gbuf), "Hi there");
 }
 
@@ -66,14 +73,14 @@ void test7() {
 void test8() {
   char *buf[50];
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0))
   strcpy(buf[++gi], "Hi there");
 }
 
 // CHECK-LABEL: define void @test9
 void test9() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0))
   strcpy((char *)((++gi) + gj), "Hi there");
 }
 
@@ -81,62 +88,62 @@ void test9() {
 char **p;
 void test10() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0))
   strcpy(*(++p), "Hi there");
 }
 
 // CHECK-LABEL: define void @test11
 void test11() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i64 0, i64 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0))
   strcpy(gp = gbuf, "Hi there");
 }
 
 // CHECK-LABEL: define void @test12
 void test12() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0))
   strcpy(++gp, "Hi there");
 }
 
 // CHECK-LABEL: define void @test13
 void test13() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0))
   strcpy(gp++, "Hi there");
 }
 
 // CHECK-LABEL: define void @test14
 void test14() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0))
   strcpy(--gp, "Hi there");
 }
 
 // CHECK-LABEL: define void @test15
 void test15() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{..*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{..*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0))
   strcpy(gp--, "Hi there");
 }
 
 // CHECK-LABEL: define void @test16
 void test16() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0))
   strcpy(gp += 1, "Hi there");
 }
 
 // CHECK-LABEL: @test17
 void test17() {
   // CHECK: store i32 -1
-  gi = __builtin_object_size(gp++, 0);
+  gi = OBJECT_SIZE_BUILTIN(gp++, 0);
   // CHECK: store i32 -1
-  gi = __builtin_object_size(gp++, 1);
+  gi = OBJECT_SIZE_BUILTIN(gp++, 1);
   // CHECK: store i32 0
-  gi = __builtin_object_size(gp++, 2);
+  gi = OBJECT_SIZE_BUILTIN(gp++, 2);
   // CHECK: store i32 0
-  gi = __builtin_object_size(gp++, 3);
+  gi = OBJECT_SIZE_BUILTIN(gp++, 3);
 }
 
 // CHECK-LABEL: @test18
@@ -144,7 +151,7 @@ unsigned test18(int cond) {
   int a[4], b[4];
   // CHECK: phi i32*
   // CHECK: call i64 @llvm.objectsize.i64
-  return __builtin_object_size(cond ? a : b, 0);
+  return OBJECT_SIZE_BUILTIN(cond ? a : b, 0);
 }
 
 // CHECK-LABEL: @test19
@@ -154,22 +161,22 @@ void test19() {
   } foo;
 
   // CHECK: store i32 8
-  gi = __builtin_object_size(&foo.a, 0);
+  gi = OBJECT_SIZE_BUILTIN(&foo.a, 0);
   // CHECK: store i32 4
-  gi = __builtin_object_size(&foo.a, 1);
+  gi = OBJECT_SIZE_BUILTIN(&foo.a, 1);
   // CHECK: store i32 8
-  gi = __builtin_object_size(&foo.a, 2);
+  gi = OBJECT_SIZE_BUILTIN(&foo.a, 2);
   // CHECK: store i32 4
-  gi = __builtin_object_size(&foo.a, 3);
+  gi = OBJECT_SIZE_BUILTIN(&foo.a, 3);
 
   // CHECK: store i32 4
-  gi = __builtin_object_size(&foo.b, 0);
+  gi = OBJECT_SIZE_BUILTIN(&foo.b, 0);
   // CHECK: store i32 4
-  gi = __builtin_object_size(&foo.b, 1);
+  gi = OBJECT_SIZE_BUILTIN(&foo.b, 1);
   // CHECK: store i32 4
-  gi = __builtin_object_size(&foo.b, 2);
+  gi = OBJECT_SIZE_BUILTIN(&foo.b, 2);
   // CHECK: store i32 4
-  gi = __builtin_object_size(&foo.b, 3);
+  gi = OBJECT_SIZE_BUILTIN(&foo.b, 3);
 }
 
 // CHECK-LABEL: @test20
@@ -177,13 +184,13 @@ void test20() {
   struct { int t[10]; } t[10];
 
   // CHECK: store i32 380
-  gi = __builtin_object_size(&t[0].t[5], 0);
+  gi = OBJECT_SIZE_BUILTIN(&t[0].t[5], 0);
   // CHECK: store i32 20
-  gi = __builtin_object_size(&t[0].t[5], 1);
+  gi = OBJECT_SIZE_BUILTIN(&t[0].t[5], 1);
   // CHECK: store i32 380
-  gi = __builtin_object_size(&t[0].t[5], 2);
+  gi = OBJECT_SIZE_BUILTIN(&t[0].t[5], 2);
   // CHECK: store i32 20
-  gi = __builtin_object_size(&t[0].t[5], 3);
+  gi = OBJECT_SIZE_BUILTIN(&t[0].t[5], 3);
 }
 
 // CHECK-LABEL: @test21
@@ -191,22 +198,22 @@ void test21() {
   struct { int t; } t;
 
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t + 1, 0);
+  gi = OBJECT_SIZE_BUILTIN(&t + 1, 0);
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t + 1, 1);
+  gi = OBJECT_SIZE_BUILTIN(&t + 1, 1);
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t + 1, 2);
+  gi = OBJECT_SIZE_BUILTIN(&t + 1, 2);
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t + 1, 3);
+  gi = OBJECT_SIZE_BUILTIN(&t + 1, 3);
 
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t.t + 1, 0);
+  gi = OBJECT_SIZE_BUILTIN(&t.t + 1, 0);
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t.t + 1, 1);
+  gi = OBJECT_SIZE_BUILTIN(&t.t + 1, 1);
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t.t + 1, 2);
+  gi = OBJECT_SIZE_BUILTIN(&t.t + 1, 2);
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t.t + 1, 3);
+  gi = OBJECT_SIZE_BUILTIN(&t.t + 1, 3);
 }
 
 // CHECK-LABEL: @test22
@@ -214,114 +221,114 @@ void test22() {
   struct { int t[10]; } t[10];
 
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t[10], 0);
+  gi = OBJECT_SIZE_BUILTIN(&t[10], 0);
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t[10], 1);
+  gi = OBJECT_SIZE_BUILTIN(&t[10], 1);
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t[10], 2);
+  gi = OBJECT_SIZE_BUILTIN(&t[10], 2);
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t[10], 3);
+  gi = OBJECT_SIZE_BUILTIN(&t[10], 3);
 
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t[9].t[10], 0);
+  gi = OBJECT_SIZE_BUILTIN(&t[9].t[10], 0);
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t[9].t[10], 1);
+  gi = OBJECT_SIZE_BUILTIN(&t[9].t[10], 1);
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t[9].t[10], 2);
+  gi = OBJECT_SIZE_BUILTIN(&t[9].t[10], 2);
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t[9].t[10], 3);
+  gi = OBJECT_SIZE_BUILTIN(&t[9].t[10], 3);
 
   // CHECK: store i32 0
-  gi = __builtin_object_size((char*)&t[0] + sizeof(t), 0);
+  gi = OBJECT_SIZE_BUILTIN((char*)&t[0] + sizeof(t), 0);
   // CHECK: store i32 0
-  gi = __builtin_object_size((char*)&t[0] + sizeof(t), 1);
+  gi = OBJECT_SIZE_BUILTIN((char*)&t[0] + sizeof(t), 1);
   // CHECK: store i32 0
-  gi = __builtin_object_size((char*)&t[0] + sizeof(t), 2);
+  gi = OBJECT_SIZE_BUILTIN((char*)&t[0] + sizeof(t), 2);
   // CHECK: store i32 0
-  gi = __builtin_object_size((char*)&t[0] + sizeof(t), 3);
+  gi = OBJECT_SIZE_BUILTIN((char*)&t[0] + sizeof(t), 3);
 
   // CHECK: store i32 0
-  gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 0);
+  gi = OBJECT_SIZE_BUILTIN((char*)&t[9].t[0] + 10*sizeof(t[0].t), 0);
   // CHECK: store i32 0
-  gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 1);
+  gi = OBJECT_SIZE_BUILTIN((char*)&t[9].t[0] + 10*sizeof(t[0].t), 1);
   // CHECK: store i32 0
-  gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 2);
+  gi = OBJECT_SIZE_BUILTIN((char*)&t[9].t[0] + 10*sizeof(t[0].t), 2);
   // CHECK: store i32 0
-  gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 3);
+  gi = OBJECT_SIZE_BUILTIN((char*)&t[9].t[0] + 10*sizeof(t[0].t), 3);
 }
 
 struct Test23Ty { int a; int t[10]; };
 
 // CHECK-LABEL: @test23
 void test23(struct Test23Ty *p) {
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(p, 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(p, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size(p, 2);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(p, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(p, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(p, 2);
   // Note: this is currently fixed at 0 because LLVM doesn't have sufficient
   // data to correctly handle type=3
   // CHECK: store i32 0
-  gi = __builtin_object_size(p, 3);
+  gi = OBJECT_SIZE_BUILTIN(p, 3);
 
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(&p->a, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(&p->a, 0);
   // CHECK: store i32 4
-  gi = __builtin_object_size(&p->a, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size(&p->a, 2);
+  gi = OBJECT_SIZE_BUILTIN(&p->a, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(&p->a, 2);
   // CHECK: store i32 4
-  gi = __builtin_object_size(&p->a, 3);
-
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(&p->t[5], 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(&p->t[5], 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size(&p->t[5], 2);
+  gi = OBJECT_SIZE_BUILTIN(&p->a, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(&p->t[5], 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(&p->t[5], 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(&p->t[5], 2);
   // CHECK: store i32 20
-  gi = __builtin_object_size(&p->t[5], 3);
+  gi = OBJECT_SIZE_BUILTIN(&p->t[5], 3);
 }
 
-// PR24493 -- ICE if __builtin_object_size called with NULL and (Type & 1) != 0
+// PR24493 -- ICE if OBJECT_SIZE_BUILTIN called with NULL and (Type & 1) != 0
 // CHECK-LABEL: @test24
 void test24() {
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size((void*)0, 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size((void*)0, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size((void*)0, 2);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN((void*)0, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN((void*)0, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN((void*)0, 2);
   // Note: Currently fixed at zero because LLVM can't handle type=3 correctly.
   // Hopefully will be lowered properly in the future.
   // CHECK: store i32 0
-  gi = __builtin_object_size((void*)0, 3);
+  gi = OBJECT_SIZE_BUILTIN((void*)0, 3);
 }
 
 // CHECK-LABEL: @test25
 void test25() {
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size((void*)0x1000, 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size((void*)0x1000, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size((void*)0x1000, 2);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN((void*)0x1000, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN((void*)0x1000, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN((void*)0x1000, 2);
   // Note: Currently fixed at zero because LLVM can't handle type=3 correctly.
   // Hopefully will be lowered properly in the future.
   // CHECK: store i32 0
-  gi = __builtin_object_size((void*)0x1000, 3);
+  gi = OBJECT_SIZE_BUILTIN((void*)0x1000, 3);
 
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size((void*)0 + 0x1000, 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size((void*)0 + 0x1000, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size((void*)0 + 0x1000, 2);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN((void*)0 + 0x1000, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN((void*)0 + 0x1000, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN((void*)0 + 0x1000, 2);
   // Note: Currently fixed at zero because LLVM can't handle type=3 correctly.
   // Hopefully will be lowered properly in the future.
   // CHECK: store i32 0
-  gi = __builtin_object_size((void*)0 + 0x1000, 3);
+  gi = OBJECT_SIZE_BUILTIN((void*)0 + 0x1000, 3);
 }
 
 // CHECK-LABEL: @test26
@@ -329,43 +336,43 @@ void test26() {
   struct { int v[10]; } t[10];
 
   // CHECK: store i32 316
-  gi = __builtin_object_size(&t[1].v[11], 0);
+  gi = OBJECT_SIZE_BUILTIN(&t[1].v[11], 0);
   // CHECK: store i32 312
-  gi = __builtin_object_size(&t[1].v[12], 1);
+  gi = OBJECT_SIZE_BUILTIN(&t[1].v[12], 1);
   // CHECK: store i32 308
-  gi = __builtin_object_size(&t[1].v[13], 2);
+  gi = OBJECT_SIZE_BUILTIN(&t[1].v[13], 2);
   // CHECK: store i32 0
-  gi = __builtin_object_size(&t[1].v[14], 3);
+  gi = OBJECT_SIZE_BUILTIN(&t[1].v[14], 3);
 }
 
 struct Test27IncompleteTy;
 
 // CHECK-LABEL: @test27
 void test27(struct Test27IncompleteTy *t) {
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(t, 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(t, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size(t, 2);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(t, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(t, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(t, 2);
   // Note: this is currently fixed at 0 because LLVM doesn't have sufficient
   // data to correctly handle type=3
   // CHECK: store i32 0
-  gi = __builtin_object_size(t, 3);
+  gi = OBJECT_SIZE_BUILTIN(t, 3);
 
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(&test27, 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(&test27, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size(&test27, 2);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(&test27, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(&test27, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(&test27, 2);
   // Note: this is currently fixed at 0 because LLVM doesn't have sufficient
   // data to correctly handle type=3
   // CHECK: store i32 0
-  gi = __builtin_object_size(&test27, 3);
+  gi = OBJECT_SIZE_BUILTIN(&test27, 3);
 }
 
-// The intent of this test is to ensure that __builtin_object_size treats `&foo`
+// The intent of this test is to ensure that OBJECT_SIZE_BUILTIN treats `&foo`
 // and `(T*)&foo` identically, when used as the pointer argument.
 // CHECK-LABEL: @test28
 void test28() {
@@ -373,22 +380,22 @@ void test28() {
 
 #define addCasts(s) ((char*)((short*)(s)))
   // CHECK: store i32 360
-  gi = __builtin_object_size(addCasts(&t[1]), 0);
+  gi = OBJECT_SIZE_BUILTIN(addCasts(&t[1]), 0);
   // CHECK: store i32 360
-  gi = __builtin_object_size(addCasts(&t[1]), 1);
+  gi = OBJECT_SIZE_BUILTIN(addCasts(&t[1]), 1);
   // CHECK: store i32 360
-  gi = __builtin_object_size(addCasts(&t[1]), 2);
+  gi = OBJECT_SIZE_BUILTIN(addCasts(&t[1]), 2);
   // CHECK: store i32 360
-  gi = __builtin_object_size(addCasts(&t[1]), 3);
+  gi = OBJECT_SIZE_BUILTIN(addCasts(&t[1]), 3);
 
   // CHECK: store i32 356
-  gi = __builtin_object_size(addCasts(&t[1].v[1]), 0);
+  gi = OBJECT_SIZE_BUILTIN(addCasts(&t[1].v[1]), 0);
   // CHECK: store i32 36
-  gi = __builtin_object_size(addCasts(&t[1].v[1]), 1);
+  gi = OBJECT_SIZE_BUILTIN(addCasts(&t[1].v[1]), 1);
   // CHECK: store i32 356
-  gi = __builtin_object_size(addCasts(&t[1].v[1]), 2);
+  gi = OBJECT_SIZE_BUILTIN(addCasts(&t[1].v[1]), 2);
   // CHECK: store i32 36
-  gi = __builtin_object_size(addCasts(&t[1].v[1]), 3);
+  gi = OBJECT_SIZE_BUILTIN(addCasts(&t[1].v[1]), 3);
 #undef addCasts
 }
 
@@ -415,83 +422,83 @@ struct StaticStruct {
 // CHECK-LABEL: @test29
 void test29(struct DynStructVar *dv, struct DynStruct0 *d0,
             struct DynStruct1 *d1, struct StaticStruct *ss) {
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(dv->snd, 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(dv->snd, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size(dv->snd, 2);
-  // CHECK: store i32 0
-  gi = __builtin_object_size(dv->snd, 3);
-
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(d0->snd, 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(d0->snd, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size(d0->snd, 2);
-  // CHECK: store i32 0
-  gi = __builtin_object_size(d0->snd, 3);
-
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(d1->snd, 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(d1->snd, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size(d1->snd, 2);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(dv->snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(dv->snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(dv->snd, 2);
+  // CHECK: store i32 0
+  gi = OBJECT_SIZE_BUILTIN(dv->snd, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(d0->snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(d0->snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(d0->snd, 2);
+  // CHECK: store i32 0
+  gi = OBJECT_SIZE_BUILTIN(d0->snd, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(d1->snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(d1->snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(d1->snd, 2);
   // CHECK: store i32 1
-  gi = __builtin_object_size(d1->snd, 3);
-
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(ss->snd, 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(ss->snd, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size(ss->snd, 2);
+  gi = OBJECT_SIZE_BUILTIN(d1->snd, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(ss->snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(ss->snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(ss->snd, 2);
   // CHECK: store i32 2
-  gi = __builtin_object_size(ss->snd, 3);
+  gi = OBJECT_SIZE_BUILTIN(ss->snd, 3);
 }
 
 // CHECK-LABEL: @test30
 void test30() {
   struct { struct DynStruct1 fst, snd; } *nested;
 
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(nested->fst.snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(nested->fst.snd, 0);
   // CHECK: store i32 1
-  gi = __builtin_object_size(nested->fst.snd, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size(nested->fst.snd, 2);
+  gi = OBJECT_SIZE_BUILTIN(nested->fst.snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(nested->fst.snd, 2);
   // CHECK: store i32 1
-  gi = __builtin_object_size(nested->fst.snd, 3);
-
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(nested->snd.snd, 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(nested->snd.snd, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size(nested->snd.snd, 2);
+  gi = OBJECT_SIZE_BUILTIN(nested->fst.snd, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(nested->snd.snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(nested->snd.snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(nested->snd.snd, 2);
   // CHECK: store i32 1
-  gi = __builtin_object_size(nested->snd.snd, 3);
+  gi = OBJECT_SIZE_BUILTIN(nested->snd.snd, 3);
 
   union { struct DynStruct1 d1; char c[1]; } *u;
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(u->c, 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(u->c, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size(u->c, 2);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(u->c, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(u->c, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(u->c, 2);
   // CHECK: store i32 1
-  gi = __builtin_object_size(u->c, 3);
-
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(u->d1.snd, 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(u->d1.snd, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size(u->d1.snd, 2);
+  gi = OBJECT_SIZE_BUILTIN(u->c, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(u->d1.snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(u->d1.snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(u->d1.snd, 2);
   // CHECK: store i32 1
-  gi = __builtin_object_size(u->d1.snd, 3);
+  gi = OBJECT_SIZE_BUILTIN(u->d1.snd, 3);
 }
 
 // CHECK-LABEL: @test31
@@ -502,20 +509,20 @@ void test31() {
   struct DynStruct1 *ds1;
   struct StaticStruct *ss;
 
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(ds1[9].snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(ds1[9].snd, 1);
 
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(&ss[9].snd[0], 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(&ss[9].snd[0], 1);
 
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(&ds1[9].snd[0], 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(&ds1[9].snd[0], 1);
 
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(&ds0[9].snd[0], 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(&ds0[9].snd[0], 1);
 
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(&dsv[9].snd[0], 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(&dsv[9].snd[0], 1);
 }
 
 // CHECK-LABEL: @PR30346
@@ -527,27 +534,27 @@ void PR30346() {
   };
 
   struct sockaddr *sa;
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(sa->sa_data, 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
-  gi = __builtin_object_size(sa->sa_data, 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
-  gi = __builtin_object_size(sa->sa_data, 2);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(sa->sa_data, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(sa->sa_data, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1
+  gi = OBJECT_SIZE_BUILTIN(sa->sa_data, 2);
   // CHECK: store i32 14
-  gi = __builtin_object_size(sa->sa_data, 3);
+  gi = OBJECT_SIZE_BUILTIN(sa->sa_data, 3);
 }
 
 extern char incomplete_char_array[];
 // CHECK-LABEL: @incomplete_and_function_types
 int incomplete_and_function_types() {
   // CHECK: call i64 @llvm.objectsize.i64.p0i8
-  gi = __builtin_object_size(incomplete_char_array, 0);
+  gi = OBJECT_SIZE_BUILTIN(incomplete_char_array, 0);
   // CHECK: call i64 @llvm.objectsize.i64.p0i8
-  gi = __builtin_object_size(incomplete_char_array, 1);
+  gi = OBJECT_SIZE_BUILTIN(incomplete_char_array, 1);
   // CHECK: call i64 @llvm.objectsize.i64.p0i8
-  gi = __builtin_object_size(incomplete_char_array, 2);
+  gi = OBJECT_SIZE_BUILTIN(incomplete_char_array, 2);
   // CHECK: store i32 0
-  gi = __builtin_object_size(incomplete_char_array, 3);
+  gi = OBJECT_SIZE_BUILTIN(incomplete_char_array, 3);
 }
 
 // Flips between the pointer and lvalue evaluator a lot.
@@ -564,7 +571,7 @@ void deeply_nested() {
   } *a;
 
   // CHECK: store i32 4
-  gi = __builtin_object_size(&a->b[1].c[1].d[1].e[1], 1);
+  gi = OBJECT_SIZE_BUILTIN(&a->b[1].c[1].d[1].e[1], 1);
   // CHECK: store i32 4
-  gi = __builtin_object_size(&a->b[1].c[1].d[1].e[1], 3);
+  gi = OBJECT_SIZE_BUILTIN(&a->b[1].c[1].d[1].e[1], 3);
 }
diff --git a/test/CodeGen/object-size.cpp b/test/CodeGen/object-size.cpp
index 725c49214d..3c8390f5f0 100644
--- a/test/CodeGen/object-size.cpp
+++ b/test/CodeGen/object-size.cpp
@@ -35,29 +35,29 @@ void test2() {
   struct B : A {};
   struct C { int i; B bs[1]; } *c;
 
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1 false)
   gi = __builtin_object_size(&c->bs[0], 0);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1 false)
   gi = __builtin_object_size(&c->bs[0], 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1 false)
   gi = __builtin_object_size(&c->bs[0], 2);
   // CHECK: store i32 16
   gi = __builtin_object_size(&c->bs[0], 3);
 
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1 false)
   gi = __builtin_object_size((A*)&c->bs[0], 0);
   // CHECK: store i32 16
   gi = __builtin_object_size((A*)&c->bs[0], 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1 false)
   gi = __builtin_object_size((A*)&c->bs[0], 2);
   // CHECK: store i32 16
   gi = __builtin_object_size((A*)&c->bs[0], 3);
 
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true)
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false, i1 true, i1 false)
   gi = __builtin_object_size(&c->bs[0].buf[0], 0);
   // CHECK: store i32 16
   gi = __builtin_object_size(&c->bs[0].buf[0], 1);
-  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true)
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true, i1 true, i1 false)
   gi = __builtin_object_size(&c->bs[0].buf[0], 2);
   // CHECK: store i32 16
   gi = __builtin_object_size(&c->bs[0].buf[0], 3);
diff --git a/test/CodeGen/opt-record-MIR.c b/test/CodeGen/opt-record-MIR.c
index 37239281e9..f9b4e74580 100644
--- a/test/CodeGen/opt-record-MIR.c
+++ b/test/CodeGen/opt-record-MIR.c
@@ -3,6 +3,8 @@
 // RUN: %clang_cc1 -triple arm64-apple-ios -S -o /dev/null %s -O2 -dwarf-column-info 2>&1 | FileCheck -allow-empty -check-prefix=NO_REMARK %s
 // RUN: %clang_cc1 -triple arm64-apple-ios -S -o /dev/null %s -O2 -dwarf-column-info -opt-record-file %t.yaml
 // RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+// RUN: %clang_cc1 -triple arm64-apple-ios -S -o /dev/null %s -O2 -dwarf-column-info -opt-record-file %t.yaml -opt-record-passes asm-printer
+// RUN: cat %t.yaml | FileCheck -check-prefix=PASSES %s
 
 void bar(float);
 
@@ -15,15 +17,15 @@ void foo(float *p, int i) {
   }
 }
 
-// REMARK: opt-record-MIR.c:10:11: remark: {{.}} spills {{.}} reloads generated in loop
+// REMARK: opt-record-MIR.c:{{[1-9][0-9]*}}:{{[1-9][0-9]*}}: remark: {{.}} spills {{.}} reloads generated in loop
 // NO_REMARK-NOT: remark:
 
 // YAML: --- !Missed
 // YAML: Pass:            regalloc
 // YAML: Name:            LoopSpillReload
 // YAML: DebugLoc:        { File: {{[^,]+}},
-// YAML:                    Line: 10,
-// YAML:                    Column: 11 }
+// YAML:                    Line: {{[1-9][0-9]*}}
+// YAML:                    Column: {{[1-9][0-9]*}} }
 // YAML: Function:        foo
 // YAML: Args:
 // YAML:   - NumSpills:       '{{.}}'
@@ -32,3 +34,6 @@ void foo(float *p, int i) {
 // YAML:   - String:          ' reloads '
 // YAML:   - String:          generated
 // YAML: ...
+
+// PASSES: Pass:            asm-printer
+// PASSES-NOT: regalloc
diff --git a/test/CodeGen/opt-record.c b/test/CodeGen/opt-record.c
index 3bc3c41f78..3f134854fe 100644
--- a/test/CodeGen/opt-record.c
+++ b/test/CodeGen/opt-record.c
@@ -3,6 +3,8 @@
 // RUN: llvm-profdata merge %S/Inputs/opt-record.proftext -o %t.profdata
 // RUN: %clang_cc1 -O3 -triple x86_64-unknown-linux-gnu -target-cpu x86-64 -fprofile-instrument-use-path=%t.profdata %s -o %t -dwarf-column-info -opt-record-file %t.yaml -emit-obj
 // RUN: cat %t.yaml | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PGO %s
+// RUN: %clang_cc1 -O3 -triple x86_64-unknown-linux-gnu -target-cpu x86-64 %s -o %t -dwarf-column-info -opt-record-file %t.yaml -opt-record-passes inline -emit-obj
+// RUN: cat %t.yaml | FileCheck -check-prefix=CHECK-PASSES %s
 // REQUIRES: x86-registered-target
 
 void bar();
@@ -23,6 +25,7 @@ void Test(int *res, int *c, int *d, int *p, int n) {
 // CHECK: DebugLoc:
 // CHECK: Function:        foo
 // CHECK-PGO: Hotness:
+// CHECK-PASSES: Pass: inline
 
 // CHECK: --- !Passed
 // CHECK: Pass:            loop-vectorize
@@ -30,4 +33,4 @@ void Test(int *res, int *c, int *d, int *p, int n) {
 // CHECK: DebugLoc:
 // CHECK: Function:        Test
 // CHECK-PGO: Hotness:
-
+// CHECK-PASSES-NOT: loop-vectorize
diff --git a/test/CodeGen/padding-init.c b/test/CodeGen/padding-init.c
new file mode 100644
index 0000000000..0ff11efd64
--- /dev/null
+++ b/test/CodeGen/padding-init.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -ftrivial-auto-var-init=pattern %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -ftrivial-auto-var-init=zero %s -emit-llvm -o - | FileCheck %s
+
+// C guarantees that brace-init with fewer initializers than members in the
+// aggregate will initialize the rest of the aggregate as-if it were static
+// initialization. In turn static initialization guarantees that padding is
+// initialized to zero bits.
+
+// CHECK: @__const.partial_init.s = private unnamed_addr constant { i8, [7 x i8], i64 } { i8 42, [7 x i8] zeroinitializer, i64 0 }, align 8
+
+// Technically, we could initialize this padding to non-zero because all of the
+// struct's members have initializers.
+
+// CHECK: @__const.init_all.s = private unnamed_addr constant { i8, [7 x i8], i64 } { i8 42, [7 x i8] zeroinitializer, i64 -2401053089374216531 }, align 8
+
+struct S {
+  char c;
+  long long l;
+};
+
+void use(struct S*);
+
+// CHECK-LABEL: @empty_braces(
+// CHECK:       %s = alloca
+// CHECK-NEXT:  %[[B:[0-9+]]] = bitcast %struct.S* %s to i8*
+// CHECK-NEXT:  call void @llvm.memset{{.*}}(i8* align 8 %[[B]], i8 0,
+// CHECK-NEXT:  call void @use(%struct.S* %s)
+void empty_braces() {
+  struct S s = {};
+  return use(&s);
+}
+
+// CHECK-LABEL: @partial_init(
+// CHECK:       %s = alloca
+// CHECK-NEXT:  %[[B:[0-9+]]] = bitcast %struct.S* %s to i8*
+// CHECK-NEXT:  call void @llvm.memcpy{{.*}}(i8* align 8 %[[B]], {{.*}}@__const.partial_init.s
+// CHECK-NEXT:  call void @use(%struct.S* %s)
+void partial_init() {
+  struct S s = { .c = 42 };
+  return use(&s);
+}
+
+// CHECK-LABEL: @init_all(
+// CHECK:       %s = alloca
+// CHECK-NEXT:  %[[B:[0-9+]]] = bitcast %struct.S* %s to i8*
+// CHECK-NEXT:  call void @llvm.memcpy{{.*}}(i8* align 8 %[[B]], {{.*}}@__const.init_all.s
+// CHECK-NEXT:  call void @use(%struct.S* %s)
+void init_all() {
+  struct S s = { .c = 42, .l = 0xdeadbeefc0fedead };
+  return use(&s);
+}
diff --git a/test/CodeGen/pass-object-size.c b/test/CodeGen/pass-object-size.c
index f5c12317ec..80c0a75199 100644
--- a/test/CodeGen/pass-object-size.c
+++ b/test/CodeGen/pass-object-size.c
@@ -7,6 +7,7 @@ struct Foo {
 };
 
 #define PS(N) __attribute__((pass_object_size(N)))
+#define PDS(N) __attribute__((pass_dynamic_object_size(N)))
 
 int gi = 0;
 
@@ -16,26 +17,52 @@ int ObjectSize0(void *const p PS(0)) {
   return __builtin_object_size(p, 0);
 }
 
+// CHECK-LABEL: define i32 @DynamicObjectSize0(i8* %{{.*}}, i64)
+int DynamicObjectSize0(void *const p PDS(0)) {
+  // CHECK-NOT: @llvm.objectsize
+  return __builtin_dynamic_object_size(p, 0);
+}
+
 // CHECK-LABEL: define i32 @ObjectSize1(i8* %{{.*}}, i64)
 int ObjectSize1(void *const p PS(1)) {
   // CHECK-NOT: @llvm.objectsize
   return __builtin_object_size(p, 1);
 }
 
+// CHECK-LABEL: define i32 @DynamicObjectSize1(i8* %{{.*}}, i64)
+int DynamicObjectSize1(void *const p PDS(1)) {
+  // CHECK-NOT: @llvm.objectsize
+  return __builtin_dynamic_object_size(p, 1);
+}
+
 // CHECK-LABEL: define i32 @ObjectSize2(i8* %{{.*}}, i64)
 int ObjectSize2(void *const p PS(2)) {
   // CHECK-NOT: @llvm.objectsize
   return __builtin_object_size(p, 2);
 }
 
+// CHECK-LABEL: define i32 @DynamicObjectSize2(i8* %{{.*}}, i64)
+int DynamicObjectSize2(void *const p PDS(2)) {
+  // CHECK-NOT: @llvm.objectsize
+  return __builtin_object_size(p, 2);
+}
+
 // CHECK-LABEL: define i32 @ObjectSize3(i8* %{{.*}}, i64)
 int ObjectSize3(void *const p PS(3)) {
   // CHECK-NOT: @llvm.objectsize
   return __builtin_object_size(p, 3);
 }
 
+// CHECK-LABEL: define i32 @DynamicObjectSize3(i8* %{{.*}}, i64)
+int DynamicObjectSize3(void *const p PDS(3)) {
+  // CHECK-NOT: @llvm.objectsize
+  return __builtin_object_size(p, 3);
+}
+
+void *malloc(unsigned long) __attribute__((alloc_size(1)));
+
 // CHECK-LABEL: define void @test1
-void test1() {
+void test1(unsigned long sz) {
   struct Foo t[10];
 
   // CHECK: call i32 @ObjectSize0(i8* %{{.*}}, i64 360)
@@ -55,6 +82,21 @@ void test1() {
   gi = ObjectSize2(&t[1].t[1]);
   // CHECK: call i32 @ObjectSize3(i8* %{{.*}}, i64 36)
   gi = ObjectSize3(&t[1].t[1]);
+
+  char *ptr = (char *)malloc(sz);
+
+  // CHECK: [[REG:%.*]] = call i64 @llvm.objectsize.i64.p0i8({{.*}}, i1 false, i1 true, i1 true)
+  // CHECK: call i32 @DynamicObjectSize0(i8* %{{.*}}, i64 [[REG]])
+  gi = DynamicObjectSize0(ptr);
+
+  // CHECK: [[WITH_OFFSET:%.*]] = getelementptr
+  // CHECK: [[REG:%.*]] = call i64 @llvm.objectsize.i64.p0i8(i8* [[WITH_OFFSET]], i1 false, i1 true, i1 true)
+  // CHECK: call i32 @DynamicObjectSize0(i8* {{.*}}, i64 [[REG]])
+  gi = DynamicObjectSize0(ptr+10);
+
+  // CHECK: [[REG:%.*]] = call i64 @llvm.objectsize.i64.p0i8({{.*}}, i1 true, i1 true, i1 true)
+  // CHECK: call i32 @DynamicObjectSize2(i8* {{.*}}, i64 [[REG]])
+  gi = DynamicObjectSize2(ptr);
 }
 
 // CHECK-LABEL: define void @test2
@@ -72,6 +114,13 @@ int NoViableOverloadObjectSize0(void *const p) __attribute__((overloadable)) {
   return __builtin_object_size(p, 0);
 }
 
+// CHECK-LABEL: define i32 @_Z34NoViableOverloadDynamicObjectSize0Pv
+int NoViableOverloadDynamicObjectSize0(void *const p)
+  __attribute__((overloadable)) {
+  // CHECK: @llvm.objectsize
+  return __builtin_object_size(p, 0);
+}
+
 // CHECK-LABEL: define i32 @_Z27NoViableOverloadObjectSize1Pv
 int NoViableOverloadObjectSize1(void *const p) __attribute__((overloadable)) {
   // CHECK: @llvm.objectsize
@@ -97,6 +146,11 @@ int NoViableOverloadObjectSize0(void *const p PS(0))
   return __builtin_object_size(p, 0);
 }
 
+int NoViableOverloadDynamicObjectSize0(void *const p PDS(0))
+  __attribute__((overloadable)) {
+  return __builtin_dynamic_object_size(p, 0);
+}
+
 int NoViableOverloadObjectSize1(void *const p PS(1))
     __attribute__((overloadable)) {
   return __builtin_object_size(p, 1);
@@ -154,6 +208,9 @@ void test3() {
   gi = NoViableOverloadObjectSize2(&t[1].t[1]);
   // CHECK: call i32 @_Z27NoViableOverloadObjectSize3PvU17pass_object_size3(i8* %{{.*}}, i64 36)
   gi = NoViableOverloadObjectSize3(&t[1].t[1]);
+
+  // CHECK: call i32 @_Z34NoViableOverloadDynamicObjectSize0PvU25pass_dynamic_object_size0(i8* %{{.*}}, i64 360)
+  gi = NoViableOverloadDynamicObjectSize0(&t[1]);
 }
 
 // CHECK-LABEL: define void @test4
@@ -183,6 +240,9 @@ void test5() {
 
   int (*f)(void *) = &NoViableOverloadObjectSize0;
   gi = f(&t[1]);
+
+  int (*g)(void *) = &NoViableOverloadDynamicObjectSize0;
+  gi = g(&t[1]);
 }
 
 // CHECK-LABEL: define i32 @IndirectObjectSize0
@@ -213,6 +273,12 @@ int IndirectObjectSize3(void *const p PS(3)) {
   return ObjectSize3(p);
 }
 
+int IndirectDynamicObjectSize0(void *const p PDS(0)) {
+  // CHECK: call i32 @ObjectSize0(i8* %{{.*}}, i64 %{{.*}})
+  // CHECK-NOT: @llvm.objectsize
+  return ObjectSize0(p);
+}
+
 int Overload0(void *, size_t, void *, size_t);
 int OverloadNoSize(void *, void *);
 
@@ -418,3 +484,10 @@ void test17(char *C) {
   // CHECK: call i32 @ObjectSize0(i8* [[PTR]]
   ObjectSize0(C + ({ int a = 65535; a; }));
 }
+
+// CHECK-LABEL: define void @test18
+void test18(char *const p PDS(0)) {
+  // CHECK-NOT: llvm.objectsize
+  gi = __builtin_dynamic_object_size(p, 0);
+  gi = __builtin_object_size(p, 0);
+}
diff --git a/test/CodeGen/pgo-instrumentation.c b/test/CodeGen/pgo-instrumentation.c
index 1dac36fa58..35b6f8b762 100644
--- a/test/CodeGen/pgo-instrumentation.c
+++ b/test/CodeGen/pgo-instrumentation.c
@@ -1,20 +1,36 @@
 // Test if PGO instrumentation and use pass are invoked.
 //
 // Ensure Pass PGOInstrumentationGenPass is invoked.
-// RUN: %clang_cc1 -O2 -fprofile-instrument=llvm %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOGENPASS-INVOKED-INSTR-GEN
+// RUN: %clang_cc1 -O2 -fprofile-instrument=llvm %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOGENPASS-INVOKED-INSTR-GEN --check-prefix=CHECK-INSTRPROF
+// RUN: %clang_cc1 -O2 -fprofile-instrument=llvm %s  -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOGENPASS-INVOKED-INSTR-GEN-NEWPM --check-prefix=CHECK-INSTRPROF-NEWPM
 // CHECK-PGOGENPASS-INVOKED-INSTR-GEN: PGOInstrumentationGenPass
+// CHECK-INSTRPROF: Frontend instrumentation-based coverage lowering
+// CHECK-PGOGENPASS-INVOKED-INSTR-GEN-NEWPM: Running pass: PGOInstrumentationGen on
+// CHECK-INSTRPROF-NEWPM: Running pass: InstrProfiling on
 //
 // Ensure Pass PGOInstrumentationGenPass is not invoked.
 // RUN: %clang_cc1 -O2 -fprofile-instrument=clang %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOGENPASS-INVOKED-INSTR-GEN-CLANG
+// RUN: %clang_cc1 -O2 -fprofile-instrument=clang %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOGENPASS-INVOKED-INSTR-GEN-CLANG-NEWPM
 // CHECK-PGOGENPASS-INVOKED-INSTR-GEN-CLANG-NOT: PGOInstrumentationGenPass
+// CHECK-PGOGENPASS-INVOKED-INSTR-GEN-CLANG-NEWPM-NOT: Running pass: PGOInstrumentationGen on
+
+// RUN: %clang_cc1 -O2 -fprofile-instrument=clang %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF
+// RUN: %clang_cc1 -O2 -fprofile-instrument=clang %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF-NEWPM
+// RUN: %clang_cc1 -O0 -fprofile-instrument=clang %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF
+// RUN: %clang_cc1 -O0 -fprofile-instrument=clang %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF-NEWPM
+// CHECK-CLANG-INSTRPROF: Frontend instrumentation-based coverage lowering
+// CHECK-CLANG-INSTRPROF-NEWPM: Running pass: InstrProfiling on
 
 // Ensure Pass PGOInstrumentationUsePass is invoked.
 // RUN: llvm-profdata merge -o %t.profdata %S/Inputs/pgotestir.profraw
 // RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.profdata %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-INSTR-USE
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.profdata %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-INSTR-USE-NEWPM
 // CHECK-PGOUSEPASS-INVOKED-INSTR-USE: PGOInstrumentationUsePass
+// CHECK-PGOUSEPASS-INVOKED-INSTR-USE-NEWPM: Running pass: PGOInstrumentationUse on
 //
 // Ensure Pass PGOInstrumentationUsePass is not invoked.
 // RUN: llvm-profdata merge -o %t.profdata %S/Inputs/pgotestclang.profraw
 // RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.profdata %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-USE-CLANG
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.profdata %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-USE-CLANG-NEWPM
 // CHECK-PGOUSEPASS-INVOKED-USE-CLANG-NOT: PGOInstrumentationUsePass
-
+// CHECK-PGOUSEPASS-INVOKED-USE-CLANG-NEWPM-NOT: Running pass: PGOInstrumentationUse on
diff --git a/test/CodeGen/popcnt-builtins.c b/test/CodeGen/popcnt-builtins.c
index 1fdb43339a..800e759bba 100644
--- a/test/CodeGen/popcnt-builtins.c
+++ b/test/CodeGen/popcnt-builtins.c
@@ -1,24 +1,39 @@
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +popcnt -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +popcnt -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-POPCNT
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -emit-llvm -o - | FileCheck %s
 
 
-#include <immintrin.h>
+#include <x86intrin.h>
 
-unsigned int test_mm_popcnt_u32(unsigned int __X) {
-  //CHECK: call i32 @llvm.ctpop.i32
+#ifdef __POPCNT__
+int test_mm_popcnt_u32(unsigned int __X) {
+  //CHECK-POPCNT: call i32 @llvm.ctpop.i32
   return _mm_popcnt_u32(__X);
 }
+#endif
 
-unsigned int test_popcnt_32(int __X) {
+int test_popcnt32(unsigned int __X) {
   //CHECK: call i32 @llvm.ctpop.i32
   return _popcnt32(__X);
 }
 
-unsigned long long test_mm_popcnt_u64(unsigned long long __X) {
-  //CHECK: call i64 @llvm.ctpop.i64
+int test__popcntd(unsigned int __X) {
+  //CHECK: call i32 @llvm.ctpop.i32
+  return __popcntd(__X);
+}
+
+#ifdef __POPCNT__
+long long test_mm_popcnt_u64(unsigned long long __X) {
+  //CHECK-POPCNT: call i64 @llvm.ctpop.i64
   return _mm_popcnt_u64(__X);
 }
+#endif
 
-unsigned long long test_popcnt_64(long long __X) {
+long long test_popcnt64(unsigned long long __X) {
   //CHECK: call i64 @llvm.ctpop.i64
   return _popcnt64(__X);
 }
+
+long long test__popcntq(unsigned long long __X) {
+  //CHECK: call i64 @llvm.ctpop.i64
+  return __popcntq(__X);
+}
diff --git a/test/CodeGen/powerpc_types.c b/test/CodeGen/powerpc_types.c
index b7d0f5de49..86eb7f8356 100644
--- a/test/CodeGen/powerpc_types.c
+++ b/test/CodeGen/powerpc_types.c
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -triple powerpc-unknown-freebsd -emit-llvm -o - %s| FileCheck -check-prefix=SVR4-CHECK %s
+// RUN: %clang_cc1 -triple powerpc-unknown-netbsd -emit-llvm -o - %s| FileCheck -check-prefix=SVR4-CHECK %s
+// RUN: %clang_cc1 -triple powerpc-unknown-openbsd -emit-llvm -o - %s| FileCheck -check-prefix=SVR4-CHECK %s
 
 #include <stdarg.h>
 
diff --git a/test/CodeGen/ppc-mmintrin.c b/test/CodeGen/ppc-mmintrin.c
new file mode 100644
index 0000000000..212a387ec3
--- /dev/null
+++ b/test/CodeGen/ppc-mmintrin.c
@@ -0,0 +1,1262 @@
+// REQUIRES: powerpc-registered-target
+
+// RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-BE
+// RUN: %clang -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-LE
+// RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-BE
+// RUN: %clang -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-LE
+
+#include <mmintrin.h>
+
+unsigned long long int ull1, ull2;
+int i1, i2;
+short s[4];
+signed char c[8];
+long long int ll1;
+__m64 m1, m2, res;
+
+void __attribute__((noinline))
+test_add() {
+  res = _mm_add_pi32(m1, m2);
+  res = _mm_add_pi16(m1, m2);
+  res = _mm_add_pi8(m1, m2);
+  res = _mm_adds_pu16(m1, m2);
+  res = _mm_adds_pu8(m1, m2);
+  res = _mm_adds_pi16(m1, m2);
+  res = _mm_adds_pi8(m1, m2);
+}
+
+// CHECK-LABEL: @test_add
+
+// CHECK: define available_externally i64 @_mm_add_pi32
+
+// CHECK-P9: [[REG1:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-P9-NEXT: [[REG2:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1]] to <4 x i32>
+// CHECK-P9-NEXT: store <4 x i32> [[REG2]], <4 x i32>* [[REG3:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-P9-NEXT: [[REG4:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-P9-NEXT: [[REG5:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-P9-NEXT: [[REG6:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG5]] to <4 x i32>
+// CHECK-P9-NEXT: store <4 x i32> [[REG6]], <4 x i32>* [[REG7:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-P9-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG3]], align 16
+// CHECK-P9-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG7]], align 16
+// CHECK-P9-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_add(int vector[4], int vector[4])(<4 x i32> [[REG8]], <4 x i32> [[REG9]])
+// CHECK-P9-NEXT: store <4 x i32> [[REG10]], <4 x i32>* [[REG11:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-P9-NEXT: [[REG12:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG11]], align 16
+// CHECK-P9-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> %6 to <2 x i64>
+// CHECK-P9-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG13]], i32 0
+// CHECK-P9-NEXT: ret i64 [[REG14]]
+
+// CHECK-P8: [[REG15:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-P8-NEXT: [[REG16:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG15]]
+// CHECK-P8: [[REG17:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-P8-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG17]]
+// CHECK-P8-NEXT: add nsw i32 [[REG16]], [[REG18]]
+// CHECK-P8: [[REG19:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-P8-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG19]]
+// CHECK-P8: [[REG21:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-P8-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG21]]
+// CHECK-P8-NEXT: add nsw i32 [[REG20]], [[REG22]]
+
+// CHECK: define available_externally i64 @_mm_add_pi16
+// CHECK: [[REG23:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
+// CHECK-NEXT: [[REG24:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG23]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG24]], <8 x i16>* [[REG25:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG26:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
+// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG26]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG27]], <8 x i16>* [[REG28:[0-9a-zA-Z_%.]+]]
+// CHECK-NEXT: [[REG29:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG25]], align 16
+// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG28]], align 16
+// CHECK-NEXT: call <8 x i16> @vec_add(short vector[8], short vector[8])(<8 x i16> [[REG29]], <8 x i16> [[REG30]])
+
+// CHECK: define available_externally i64 @_mm_add_pi8
+// CHECK: [[REG31:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
+// CHECK-NEXT: [[REG32:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG31]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG32]], <16 x i8>* [[REG33:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG34:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
+// CHECK-NEXT: [[REG35:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG34]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG35]], <16 x i8>* [[REG36:[0-9a-zA-Z_%.]+]]
+// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG33]], align 16
+// CHECK-NEXT: [[REG38:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG36]], align 16
+// CHECK-NEXT: call <16 x i8> @vec_add(signed char vector[16], signed char vector[16])(<16 x i8> [[REG37]], <16 x i8> [[REG38]])
+
+// CHECK: define available_externally i64 @_mm_adds_pu16
+// CHECK: [[REG39:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
+// CHECK-NEXT: [[REG40:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG39]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG40]], <8 x i16>* [[REG41:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG42:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
+// CHECK-NEXT: [[REG43:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG42]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG43]], <8 x i16>* [[REG44:[0-9a-zA-Z_%.]+]]
+// CHECK-NEXT: [[REG45:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG41]], align 16
+// CHECK-NEXT: [[REG46:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG44]], align 16
+// CHECK-NEXT: call <8 x i16> @vec_adds(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG45]], <8 x i16> [[REG46]])
+
+// CHECK: define available_externally i64 @_mm_adds_pu8
+// CHECK: [[REG47:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
+// CHECK-NEXT: [[REG48:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG47]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG48]], <16 x i8>* [[REG49:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG50:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
+// CHECK-NEXT: [[REG51:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG50]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG51]], <16 x i8>* [[REG52:[0-9a-zA-Z_%.]+]]
+// CHECK-NEXT: [[REG53:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG49]], align 16
+// CHECK-NEXT: [[REG54:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG52]], align 16
+// CHECK-NEXT: call <16 x i8> @vec_adds(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG53]], <16 x i8> [[REG54]])
+
+// CHECK: define available_externally i64 @_mm_adds_pi16
+// CHECK: [[REG55:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
+// CHECK-NEXT: [[REG56:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG55]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG56]], <8 x i16>* [[REG57:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG58:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
+// CHECK-NEXT: [[REG59:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG58]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG59]], <8 x i16>* [[REG60:[0-9a-zA-Z_%.]+]]
+// CHECK-NEXT: [[REG61:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG57]], align 16
+// CHECK-NEXT: [[REG62:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG60]], align 16
+// CHECK-NEXT: call <8 x i16> @vec_adds(short vector[8], short vector[8])(<8 x i16> [[REG61]], <8 x i16> [[REG62]])
+
+// CHECK: define available_externally i64 @_mm_adds_pi8
+// CHECK: [[REG63:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
+// CHECK-NEXT: [[REG64:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG63]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG64]], <16 x i8>* [[REG65:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG66:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
+// CHECK-NEXT: [[REG67:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG66]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG67]], <16 x i8>* [[REG68:[0-9a-zA-Z_%.]+]]
+// CHECK-NEXT: [[REG69:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG65]], align 16
+// CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG68]], align 16
+// CHECK-NEXT: call <16 x i8> @vec_adds(signed char vector[16], signed char vector[16])(<16 x i8> [[REG69]], <16 x i8> [[REG70]])
+
+void __attribute__((noinline))
+test_alt_name_add() {
+  res = _m_paddb(m1, m2);
+  res = _m_paddd(m1, m2);
+  res = _m_paddsb(m1, m2);
+  res = _m_paddsw(m1, m2);
+  res = _m_paddusb(m1, m2);
+  res = _m_paddusw(m1, m2);
+  res = _m_paddw(m1, m2);
+}
+
+// CHECK-LABEL: @test_alt_name_add
+
+// CHECK: define available_externally i64 @_m_paddb
+// CHECK: [[REG71:[0-9a-zA-Z_%.]+]] = call i64 @_mm_add_pi8
+// CHECK-NEXT: ret i64 [[REG71]]
+
+// CHECK: define available_externally i64 @_m_paddd
+// CHECK: [[REG72:[0-9a-zA-Z_%.]+]] = call i64 @_mm_add_pi32
+// CHECK-NEXT: ret i64 [[REG72]]
+
+// CHECK: define available_externally i64 @_m_paddsb
+// CHECK: [[REG73:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pi8
+// CHECK-NEXT: ret i64 [[REG73]]
+
+// CHECK: define available_externally i64 @_m_paddsw
+// CHECK: [[REG74:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pi16
+// CHECK-NEXT: ret i64 [[REG74]]
+
+// CHECK: define available_externally i64 @_m_paddusb
+// CHECK: [[REG75:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pu8
+// CHECK-NEXT: ret i64 [[REG75]]
+
+// CHECK: define available_externally i64 @_m_paddusw
+// CHECK: [[REG76:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pu16
+// CHECK-NEXT: ret i64 [[REG76]]
+
+// CHECK: define available_externally i64 @_m_paddw
+// CHECK: [[REG77:[0-9a-zA-Z_%.]+]] = call i64 @_mm_add_pi16
+// CHECK-NEXT: ret i64 [[REG77]]
+
+void __attribute__((noinline))
+test_cmp() {
+  res = _mm_cmpeq_pi32(m1, m2);
+  res = _mm_cmpeq_pi16(m1, m2);
+  res = _mm_cmpeq_pi8(m1, m2);
+  res = _mm_cmpgt_pi32(m1, m2);
+  res = _mm_cmpgt_pi16(m1, m2);
+  res = _mm_cmpgt_pi8(m1, m2);
+}
+
+// CHECK-LABEL: @test_cmp
+
+// CHECK: define available_externally i64 @_mm_cmpeq_pi32
+
+// CHECK-P9: [[REG78:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-P9-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG78]] to <4 x i32>
+// CHECK-P9-NEXT: store <4 x i32> [[REG79]], <4 x i32>* [[REG80:[0-9a-zA-Z_%.]+]]
+// CHECK-P9: [[REG81:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-P9-NEXT: [[REG82:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG81]] to <4 x i32>
+// CHECK-P9-NEXT: store <4 x i32> [[REG82]], <4 x i32>* [[REG83:[0-9a-zA-Z_%.]+]]
+// CHECK-P9-NEXT: [[REG84:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG80]]
+// CHECK-P9-NEXT: [[REG85:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG83]]
+// CHECK-P9-NEXT: call <4 x i32> @vec_cmpeq(int vector[4], int vector[4])(<4 x i32> [[REG84]], <4 x i32> [[REG85]])
+
+// CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
+// CHECK-P8: [[REG86:[0-9a-zA-Z_%.]+]] = icmp eq i32 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
+// CHECK-P8: [[REG87:[0-9a-zA-Z_%.]+]] = select i1 [[REG86]], i32 -1, i32 0
+// CHECK-P8: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
+// CHECK-P8-NEXT: store i32 [[REG87]], i32* {{[0-9a-zA-Z_%.]+}}
+// CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
+// CHECK-P8: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}
+
+// CHECK: define available_externally i64 @_mm_cmpeq_pi16
+// CHECK: [[REG88:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: [[REG89:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG88]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG89]], <8 x i16>* [[REG90:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG91:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: [[REG92:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG91]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG92]], <8 x i16>* [[REG93:[0-9a-zA-Z_%.]+]]
+// CHECK-NEXT: [[REG94:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG90]], align 16
+// CHECK-NEXT: [[REG95:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG93]], align 16
+// CHECK-NEXT: call <8 x i16> @vec_cmpeq(short vector[8], short vector[8])(<8 x i16> [[REG94]], <8 x i16> [[REG95]])
+
+// CHECK: define available_externally i64 @_mm_cmpeq_pi8
+// CHECK: call i64 asm "cmpb $0,$1,$2;\0A", "=r,r,r"
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* [[REG96:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: [[REG97:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG96]], align 8
+// CHECK-NEXT: ret i64 [[REG97]]
+
+// CHECK: define available_externally i64 @_mm_cmpgt_pi32
+
+// CHECK-P9: [[REG98:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-P9-NEXT: [[REG99:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG98]] to <4 x i32>
+// CHECK-P9-NEXT: store <4 x i32> [[REG99]], <4 x i32>* [[REG100:[0-9a-zA-Z_%.]+]]
+// CHECK-P9: [[REG101:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-P9-NEXT: [[REG102:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG101]] to <4 x i32>
+// CHECK-P9-NEXT: store <4 x i32> [[REG102]], <4 x i32>* [[REG103:[0-9a-zA-Z_%.]+]]
+// CHECK-P9-NEXT: [[REG104:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG100]]
+// CHECK-P9-NEXT: [[REG105:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG103]]
+// CHECK-P9-NEXT: call <4 x i32> @vec_cmpgt(int vector[4], int vector[4])(<4 x i32> [[REG104]], <4 x i32> [[REG85]])
+
+// CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
+// CHECK-P8: [[REG106:[0-9a-zA-Z_%.]+]] = icmp sgt i32 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
+// CHECK-P8: [[REG107:[0-9a-zA-Z_%.]+]] = select i1 [[REG106]], i32 -1, i32 0
+// CHECK-P8: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
+// CHECK-P8-NEXT: store i32 [[REG107]], i32* {{[0-9a-zA-Z_%.]+}}
+// CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
+// CHECK-P8: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}
+
+// CHECK: define available_externally i64 @_mm_cmpgt_pi16
+// CHECK: [[REG108:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: [[REG109:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG108]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG109]], <8 x i16>* [[REG110:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG111:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: [[REG112:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG111]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG112]], <8 x i16>* [[REG113:[0-9a-zA-Z_%.]+]]
+// CHECK-NEXT: [[REG114:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG110]]
+// CHECK-NEXT: [[REG115:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG113]]
+// CHECK-NEXT: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])(<8 x i16> [[REG114]], <8 x i16> [[REG115]])
+
+// CHECK: define available_externally i64 @_mm_cmpgt_pi8
+// CHECK: [[REG116:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: [[REG117:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG116]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG117]], <16 x i8>* [[REG118:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG119:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: [[REG120:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG119]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG120]], <16 x i8>* [[REG121:[0-9a-zA-Z_%.]+]]
+// CHECK-NEXT: [[REG122:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG118]]
+// CHECK-NEXT: [[REG123:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG121]]
+// CHECK-NEXT: call <16 x i8> @vec_cmpgt(signed char vector[16], signed char vector[16])(<16 x i8> [[REG122]], <16 x i8> [[REG123]])
+
+void __attribute__((noinline))
+test_alt_name_cmp() {
+  res = _m_pcmpeqb(m1, m2);
+  res = _m_pcmpeqd(m1, m2);
+  res = _m_pcmpeqw(m1, m2);
+  res = _m_pcmpgtb(m1, m2);
+  res = _m_pcmpgtd(m1, m2);
+  res = _m_pcmpgtw(m1, m2);
+}
+
+// CHECK-LABEL: @test_alt_name_cmp
+
+// CHECK: define available_externally i64 @_m_pcmpeqb
+// CHECK: [[REG124:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi8
+// CHECK-NEXT: ret i64 [[REG124]]
+
+// CHECK: define available_externally i64 @_m_pcmpeqd
+// CHECK: [[REG125:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi32
+// CHECK-NEXT: ret i64 [[REG125]]
+
+// CHECK: define available_externally i64 @_m_pcmpeqw
+// CHECK: [[REG126:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi16
+// CHECK-NEXT: ret i64 [[REG126]]
+
+// CHECK: define available_externally i64 @_m_pcmpgtb
+// CHECK: [[REG127:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpgt_pi8
+// CHECK-NEXT: ret i64 [[REG127]]
+
+// CHECK: define available_externally i64 @_m_pcmpgtd
+// CHECK: [[REG128:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpgt_pi32
+// CHECK-NEXT: ret i64 [[REG128]]
+
+// CHECK: define available_externally i64 @_m_pcmpgtw
+// CHECK: [[REG129:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpgt_pi16
+// CHECK-NEXT: ret i64 [[REG129]]
+
+void __attribute__((noinline))
+test_convert() {
+  ll1 = _mm_cvtm64_si64(m1);
+  m1 = _mm_cvtsi32_si64(i1);
+  m1 = _mm_cvtsi64_m64(ll1);
+  i1 = _mm_cvtsi64_si32(m1);
+}
+
+// CHECK-LABEL: @test_convert
+
+// CHECK: define available_externally i64 @_mm_cvtm64_si64
+// CHECK: [[REG130:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: ret i64 [[REG130]]
+
+// CHECK: define available_externally i64 @_mm_cvtsi32_si64
+// CHECK: [[REG131:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}
+// CHECK-NEXT: [[REG132:[0-9a-zA-Z_%.]+]] = zext i32 [[REG131]] to i64
+// CHECK-NEXT: ret i64 [[REG132]]
+
+// CHECK: define available_externally i64 @_mm_cvtsi64_m64
+// CHECK: [[REG133:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: ret i64 [[REG133]]
+
+// CHECK: define available_externally signext i32 @_mm_cvtsi64_si32
+// CHECK: [[REG134:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG135:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG134]] to i32
+// CHECK-NEXT: ret i32 [[REG135]]
+
+void __attribute__((noinline))
+test_alt_name_convert() {
+  m1 = _m_from_int(i1);
+  m1 = _m_from_int64(ll1);
+  i1 = _m_to_int(m1);
+  ll1 = _m_to_int64(m1);
+}
+
+// CHECK-LABEL: @test_alt_name_convert
+
+// CHECK: define available_externally i64 @_m_from_int
+// CHECK: [[REG136:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvtsi32_si64
+// CHECK-NEXT: ret i64 [[REG136]]
+
+// CHECK: define available_externally i64 @_m_from_int64
+// CHECK: [[REG137:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}
+// CHECK-NEXT: ret i64 [[REG137]]
+
+// CHECK: define available_externally signext i32 @_m_to_int
+// CHECK: [[REG138:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_cvtsi64_si32
+// CHECK-NEXT: ret i32 [[REG138]]
+
+// CHECK: define available_externally i64 @_m_to_int64
+// CHECK: [[REG139:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}
+// CHECK-NEXT: ret i64 [[REG139]]
+
+void __attribute__((noinline))
+test_empty() {
+  _mm_empty();
+  _m_empty();
+}
+
+// CHECK-LABEL: @test_empty
+
+// CHECK: define available_externally void @_mm_empty
+// CHECK-NEXT: entry
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_m_empty
+// CHECK-NEXT: entry
+// CHECK-NEXT: ret void
+
+void __attribute__((noinline))
+test_logic() {
+  res = _mm_and_si64(m1, m2);
+  res = _mm_andnot_si64(m1, m2);
+  res = _mm_or_si64(m1, m2);
+  res = _mm_xor_si64(m1, m2);
+}
+
+// CHECK-LABEL: @test_logic
+
+// CHECK: define available_externally i64 @_mm_and_si64
+// CHECK: [[REG140:[0-9a-zA-Z_%.]+]] = and i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
+// CHECK-NEXT: ret i64 [[REG140]]
+
+// CHECK: define available_externally i64 @_mm_andnot_si64
+// CHECK: [[REG141:[0-9a-zA-Z_%.]+]] = xor i64 {{[0-9a-zA-Z_%.]+}}, -1
+// CHECK: [[REG142:[0-9a-zA-Z_%.]+]] = and i64 [[REG141]], {{[0-9a-zA-Z_%.]+}}
+// CHECK-NEXT: ret i64 [[REG142]]
+
+// CHECK: define available_externally i64 @_mm_or_si64
+// CHECK: [[REG143:[0-9a-zA-Z_%.]+]] = or i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
+// CHECK-NEXT: ret i64 [[REG143]]
+
+// CHECK: define available_externally i64 @_mm_xor_si64
+// CHECK: [[REG144:[0-9a-zA-Z_%.]+]] = xor i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
+// CHECK-NEXT: ret i64 [[REG144]]
+
+void __attribute__((noinline))
+test_alt_name_logic() {
+  res = _m_pand(m1, m2);
+  res = _m_pandn(m1, m2);
+  res = _m_por(m1, m2);
+  res = _m_pxor(m1, m2);
+}
+
+// CHECK-LABEL: @test_alt_name_logic
+
+// CHECK: define available_externally i64 @_m_pand
+// CHECK: [[REG145:[0-9a-zA-Z_%.]+]] = call i64 @_mm_and_si64
+// CHECK-NEXT: ret i64 [[REG145]]
+
+// CHECK: define available_externally i64 @_m_pandn
+// CHECK: [[REG146:[0-9a-zA-Z_%.]+]] = call i64 @_mm_andnot_si64
+// CHECK-NEXT: ret i64 [[REG146]]
+
+// CHECK: define available_externally i64 @_m_por
+// CHECK: [[REG147:[0-9a-zA-Z_%.]+]] = call i64 @_mm_or_si64
+// CHECK-NEXT: ret i64 [[REG147]]
+
+// CHECK: define available_externally i64 @_m_pxor
+// CHECK: [[REG148:[0-9a-zA-Z_%.]+]] = call i64 @_mm_xor_si64
+// CHECK-NEXT: ret i64 [[REG148]]
+
+void __attribute__((noinline))
+test_madd() {
+  res = _mm_madd_pi16(m1, m2);
+  res = _m_pmaddwd(m1, m2);
+}
+
+// CHECK-LABEL: @test_madd
+
+// CHECK: define available_externally i64 @_mm_madd_pi16
+// CHECK: store <4 x i32> zeroinitializer, <4 x i32>* [[REG149:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG150:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG151:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG150]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG151]], <8 x i16>* [[REG152:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG153:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG154:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG153]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG154]], <8 x i16>* [[REG155:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG156:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG152]], align 16
+// CHECK-NEXT: [[REG157:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG155]], align 16
+// CHECK-NEXT: [[REG158:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG149]], align 16
+// CHECK-NEXT: [[REG159:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmsumshm(<8 x i16> [[REG156]], <8 x i16> [[REG157]], <4 x i32> [[REG158]])
+// CHECK-NEXT: store <4 x i32> [[REG159]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+
+// CHECK: define available_externally i64 @_m_pmaddwd
+// CHECK: [[REG160:[0-9a-zA-Z_%.]+]] = call i64 @_mm_madd_pi16
+// CHECK-NEXT: ret i64 [[REG160]]
+
+void __attribute__((noinline))
+test_mul() {
+  res = _mm_mulhi_pi16(m1, m2);
+  res = _mm_mullo_pi16(m1, m2);
+}
+
+// CHECK-LABEL: @test_mul
+
+// CHECK: define available_externally i64 @_mm_mulhi_pi16
+// CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK: [[REG161:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: store <8 x i16> {{[0-9a-zA-Z_%.]+}}, <8 x i16>* [[REG162:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG163:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG164:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: [[REG165:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG164]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG165]], <8 x i16>* [[REG166:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG167:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG162]], align 16
+// CHECK-NEXT: [[REG168:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG166]], align 16
+// CHECK-NEXT: [[REG169:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulesh(<8 x i16> [[REG167]], <8 x i16> [[REG168]])
+// CHECK-NEXT: store <4 x i32> [[REG169]], <4 x i32>* [[REG170:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG171:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG162]], align 16
+// CHECK-NEXT: [[REG172:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG166]], align 16
+// CHECK-NEXT: [[REG173:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulosh(<8 x i16> [[REG171]], <8 x i16> [[REG172]])
+// CHECK-NEXT: store <4 x i32> [[REG173]], <4 x i32>* [[REG174:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG175:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG170]], align 16
+// CHECK-NEXT: [[REG176:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG174]], align 16
+// CHECK-NEXT: [[REG177:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> [[REG175]], <4 x i32> [[REG176]], <16 x i8> [[REG177]])
+
+// CHECK: define available_externally i64 @_mm_mullo_pi16
+// CHECK: [[REG178:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: [[REG179:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG178]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG179]], <8 x i16>* [[REG180:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG181:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG182:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: [[REG183:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG182]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG183]], <8 x i16>* [[REG184:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG185:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG180]], align 16
+// CHECK-NEXT: [[REG186:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG184]], align 16
+// CHECK-NEXT: [[REG187:[0-9a-zA-Z_%.]+]] = mul <8 x i16> [[REG185]], [[REG186]]
+// CHECK-NEXT: store <8 x i16> [[REG187]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+
+void __attribute__((noinline))
+test_alt_name_mul() {
+  res = _m_pmulhw(m1, m2);
+  res = _m_pmullw(m1, m2);
+}
+
+// CHECK-LABEL: @test_alt_name_mul
+
+// CHECK: define available_externally i64 @_m_pmulhw
+// CHECK: [[REG188:[0-9a-zA-Z_%.]+]] = call i64 @_mm_mulhi_pi16
+// CHECK-NEXT: ret i64 [[REG188]]
+
+// CHECK: define available_externally i64 @_m_pmullw
+// CHECK: [[REG189:[0-9a-zA-Z_%.]+]] = call i64 @_mm_mullo_pi16
+// CHECK-NEXT: ret i64 [[REG189]]
+
+void __attribute__((noinline))
+test_packs() {
+  res = _mm_packs_pu16((__m64)ull1, (__m64)ull2);
+  res = _mm_packs_pi16((__m64)ull1, (__m64)ull2);
+  res = _mm_packs_pi32((__m64)ull1, (__m64)ull2);
+}
+
+// CHECK-LABEL: @test_packs
+
+// CHECK: define available_externally i64 @_mm_packs_pu16(i64 [[REG190:[0-9a-zA-Z_%.]+]], i64 [[REG191:[0-9a-zA-Z_%.]+]])
+// CHECK: store i64 [[REG190]], i64* [[REG192:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 [[REG191]], i64* [[REG193:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-LE: load i64, i64* [[REG192]], align 8
+// CHECK: load i64, i64* [[REG193]], align 8
+// CHECK-BE: load i64, i64* [[REG192]], align 8
+// CHECK: [[REG194:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmplt
+// CHECK-NEXT: store <8 x i16> [[REG194]], <8 x i16>* [[REG195:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG196:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG197:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG198:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG197]], align 16
+// CHECK-NEXT: [[REG199:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_packs(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG196]], <8 x i16> [[REG198]])
+// CHECK-NEXT: store <16 x i8> [[REG199]], <16 x i8>* [[REG200:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG201:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG195]], align 16
+// CHECK-NEXT: [[REG202:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG195]], align 16
+// CHECK-NEXT: [[REG203:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_pack(bool vector[8], bool vector[8])(<8 x i16> [[REG201]], <8 x i16> [[REG202]])
+// CHECK-NEXT: store <16 x i8> [[REG203]], <16 x i8>* [[REG204:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG205:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG200]], align 16
+// CHECK-NEXT: [[REG206:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG204]], align 16
+// CHECK-NEXT: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> [[REG205]], <16 x i8> zeroinitializer, <16 x i8> [[REG206]])
+
+// CHECK: define available_externally i64 @_mm_packs_pi16(i64 [[REG207:[0-9a-zA-Z_%.]+]], i64 [[REG208:[0-9a-zA-Z_%.]+]])
+// CHECK: store i64 [[REG207]], i64* [[REG209:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 [[REG208]], i64* [[REG210:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-LE: load i64, i64* [[REG209]], align 8
+// CHECK: load i64, i64* [[REG210]], align 8
+// CHECK-BE: load i64, i64* [[REG209]], align 8
+// CHECK: [[REG211:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG212:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG213:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG212]], align 16
+// CHECK-NEXT: call <16 x i8> @vec_packs(short vector[8], short vector[8])(<8 x i16> [[REG211]], <8 x i16> [[REG213]])
+
+// CHECK: define available_externally i64 @_mm_packs_pi32(i64 [[REG214:[0-9a-zA-Z_%.]+]], i64 [[REG215:[0-9a-zA-Z_%.]+]])
+// CHECK: store i64 [[REG214]], i64* [[REG216:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 [[REG215]], i64* [[REG217:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-LE: load i64, i64* [[REG216]], align 8
+// CHECK: load i64, i64* [[REG217]], align 8
+// CHECK-BE: load i64, i64* [[REG216]], align 8
+// CHECK: [[REG218:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG219:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG220:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG219]], align 16
+// CHECK-NEXT: call <8 x i16> @vec_packs(int vector[4], int vector[4])(<4 x i32> [[REG218]], <4 x i32> [[REG220]])
+
+void __attribute__((noinline))
+test_alt_name_packs() {
+  res = _m_packssdw(m1, m2);
+  res = _m_packsswb(m1, m2);
+  res = _m_packuswb(m1, m2);
+}
+
+// CHECK-LABEL: @test_alt_name_packs
+
+// CHECK: define available_externally i64 @_m_packssdw
+// CHECK: [[REG221:[0-9a-zA-Z_%.]+]] = call i64 @_mm_packs_pi32
+// CHECK-NEXT: ret i64 [[REG221]]
+
+// CHECK: define available_externally i64 @_m_packsswb
+// CHECK: [[REG222:[0-9a-zA-Z_%.]+]] = call i64 @_mm_packs_pi16
+// CHECK-NEXT: ret i64 [[REG222]]
+
+// CHECK: define available_externally i64 @_m_packuswb
+// CHECK: [[REG223:[0-9a-zA-Z_%.]+]] = call i64 @_mm_packs_pu16
+// CHECK-NEXT: ret i64 [[REG223]]
+
+void __attribute__((noinline))
+test_set() {
+  m1 = _mm_set_pi32(2134, -128);
+  m1 = _mm_set_pi16(2134, -128, 1234, 6354);
+  m1 = _mm_set_pi8(-128, 10, 0, 123, -1, -5, 127, 5);
+}
+
+// CHECK-LABEL: @test_set
+
+// CHECK: define available_externally i64 @_mm_set_pi32
+// CHECK: [[REG224:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG224]]
+// CHECK: [[REG225:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG225]]
+
+// CHECK: define available_externally i64 @_mm_set_pi16
+// CHECK: [[REG226:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG226]]
+// CHECK: [[REG227:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG227]]
+// CHECK: [[REG228:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
+// CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG228]]
+// CHECK: [[REG229:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
+// CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG229]]
+
+// CHECK: define available_externally i64 @_mm_set_pi8
+// CHECK: [[REG230:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG230]]
+// CHECK: [[REG231:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG231]]
+// CHECK: [[REG232:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
+// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG232]]
+// CHECK: [[REG233:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
+// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG233]]
+// CHECK: [[REG234:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 4
+// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG234]]
+// CHECK: [[REG235:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 5
+// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG235]]
+// CHECK: [[REG236:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 6
+// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG236]]
+// CHECK: [[REG237:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 7
+// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG237]]
+
+void __attribute__((noinline))
+test_set1() {
+  res = _mm_set1_pi32(i1);
+  res = _mm_set1_pi16(s[0]);
+  res = _mm_set1_pi8(c[0]);
+}
+
+// CHECK-LABEL: @test_set1
+
+// CHECK: define available_externally i64 @_mm_set1_pi32
+// CHECK: [[REG244:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG244]], align 8
+// CHECK-NEXT: [[REG245:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK: [[REG246:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG246]], align 4
+
+// CHECK: define available_externally i64 @_mm_set1_pi16
+
+// CHECK-P9: [[REG247:[0-9a-zA-Z_%.]+]] = load i16, i16* {{[0-9a-zA-Z_%.]+}}, align 2
+// CHECK-P9-NEXT: [[REG248:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(short)(i16 signext [[REG247]])
+// CHECK-P9-NEXT: store <8 x i16> %call, <8 x i16>* [[REG249:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-P9-NEXT: [[REG250:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG249:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-P9-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG250]] to <2 x i64>
+// CHECK-P9-NEXT: [[REG252:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG251]], i32 0
+// CHECK-P9-NEXT: ret i64 [[REG252]]
+
+// CHECK-P8: [[REG253:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254:[0-9a-zA-Z_%.]+]], align 2
+// CHECK-P8: [[REG255:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-P8-NEXT: store i16 [[REG253]], i16* [[REG255]], align 8
+// CHECK-P8-NEXT: [[REG250:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254]], align 2
+// CHECK-P8: [[REG256:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-P8-NEXT: store i16 [[REG250]], i16* [[REG256]], align 2
+// CHECK-P8-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254]], align 2
+// CHECK-P8: [[REG257:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
+// CHECK-P8-NEXT: store i16 [[REG251]], i16* [[REG257]], align 4
+// CHECK-P8-NEXT: [[REG258:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254]], align 2
+// CHECK-P8: [[REG259:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
+// CHECK-P8-NEXT: store i16 [[REG258]], i16* [[REG259]], align 2
+
+// CHECK: define available_externally i64 @_mm_set1_pi8
+// CHECK: [[REG260:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_splats(signed char)(i8 signext {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: store <16 x i8> [[REG260]], <16 x i8>* [[REG261:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG262:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG261:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG263:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> %1 to <2 x i64>
+// CHECK-NEXT: [[REG264:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG263]], i32 0
+// CHECK-NEXT: ret i64 [[REG264]]
+
+void __attribute__((noinline))
+test_setr() {
+  res = _mm_setr_pi32(i1, i2);
+  res = _mm_setr_pi16(s[0], s[1], s[2], s[3]);
+  res = _mm_setr_pi8(c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]);
+}
+
+// CHECK-LABEL: @test_setr
+
+// CHECK: define available_externally i64 @_mm_setr_pi32
+// CHECK: [[REG265:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK: [[REG266:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: store i32 [[REG265:[0-9a-zA-Z_%.]+]], i32* [[REG266]], align 8
+// CHECK-NEXT: [[REG267:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK: [[REG268:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: store i32 [[REG267]], i32* [[REG268]], align 4
+
+// CHECK: define available_externally i64 @_mm_setr_pi16
+// CHECK: [[REG269:[0-9a-zA-Z_%.]+]] = call i64 @_mm_set_pi16
+// CHECK-NEXT: ret i64 [[REG269]]
+
+// CHECK: define available_externally i64 @_mm_setr_pi8
+// CHECK: [[REG270:[0-9a-zA-Z_%.]+]] = call i64 @_mm_set_pi8
+// CHECK-NEXT: ret i64 [[REG270]]
+
+void __attribute__((noinline))
+test_setzero() {
+  res = _mm_setzero_si64();
+}
+
+// CHECK-LABEL: @test_setzero
+
+// CHECK: define available_externally i64 @_mm_setzero_si64
+// CHECK: entry
+// CHECK-NEXT: ret i64 0
+
+void __attribute__((noinline))
+test_sll() {
+  res = _mm_sll_pi16(m1, m2);
+  res = _mm_sll_pi32(m1, m2);
+  res = _mm_sll_si64(m1, m2);
+  res = _mm_slli_pi16(m1, i1);
+  res = _mm_slli_pi32(m1, i1);
+  res = _mm_slli_si64(m1, i1);
+}
+
+// CHECK-LABEL: @test_sll
+
+// CHECK: define available_externally i64 @_mm_sll_pi16
+// CHECK: [[REG271:[0-9a-zA-Z_%.]+]] = icmp ule i64 {{[0-9a-zA-Z_%.]+}}, 15
+// CHECK-NEXT: br i1 [[REG271]], label %[[REG272:[0-9a-zA-Z_.]+]], label %[[REG273:[0-9a-zA-Z_.]+]]
+// CHECK: [[REG272]]
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: trunc i64 {{[0-9a-zA-Z_%.]+}} to i16
+// CHECK-NEXT: call <8 x i16> @vec_splats(unsigned short)
+// CHECK: call <8 x i16> @vec_sl(short vector[8], unsigned short vector[8])
+// CHECK: store i64 [[REG274:[0-9a-zA-Z_%.]+]], i64* [[REG275:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: br label %[[REG276:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG273]]
+// CHECK-NEXT: store i64 0, i64* [[REG275]], align 8
+// CHECK-NEXT: br label %[[REG276]]
+// CHECK: [[REG276]]
+// CHECK-NEXT: [[REG277:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG275]], align 8
+// CHECK-NEXT: ret i64 [[REG277]]
+
+// CHECK: define available_externally i64 @_mm_sll_pi32
+// CHECK: [[REG278:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: [[REG279:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG278]]
+// CHECK-NEXT: [[REG280:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}
+// CHECK-NEXT: [[REG281:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG280]] to i32
+// CHECK-NEXT: [[REG282:[0-9a-zA-Z_%.]+]] = shl i32 [[REG279]], [[REG281]]
+// CHECK: [[REG283:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: store i32 [[REG282]], i32* [[REG283]]
+// CHECK: [[REG284:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: [[REG285:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG284]], align 4
+// CHECK: trunc i64 {{[0-9a-zA-Z_%.]+}} to i32
+// CHECK-NEXT: [[REG286:[0-9a-zA-Z_%.]+]] = shl i32 [[REG285]], {{[0-9a-zA-Z_%.]+}}
+// CHECK: [[REG287:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: store i32 [[REG286]], i32* [[REG287]], align 4
+
+// CHECK: define available_externally i64 @_mm_sll_si64
+// CHECK: [[REG288:[0-9a-zA-Z_%.]+]] = shl i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
+// CHECK-NEXT: ret i64 [[REG288]]
+
+// CHECK: define available_externally i64 @_mm_slli_pi16
+// CHECK: [[REG289:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
+// CHECK-NEXT: [[REG290:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi16(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG289]])
+// CHECK-NEXT: ret i64 [[REG290]]
+
+// CHECK: define available_externally i64 @_mm_slli_pi32
+// CHECK: [[REG291:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
+// CHECK-NEXT: [[REG292:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi32(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG291]])
+// CHECK-NEXT: ret i64 [[REG292]]
+
+// CHECK: define available_externally i64 @_mm_slli_si64
+// CHECK: [[REG293:[0-9a-zA-Z_%.]+]] = zext i32 {{[0-9a-zA-Z_%.]+}} to i64
+// CHECK-NEXT: [[REG294:[0-9a-zA-Z_%.]+]] = shl i64 {{[0-9a-zA-Z_%.]+}}, [[REG293]]
+// CHECK-NEXT: ret i64 [[REG294]]
+
+void __attribute__((noinline))
+test_alt_name_sll() {
+  res = _m_pslld(m1, m2);
+  res = _m_pslldi(m1, i1);
+  res = _m_psllq(m1, m2);
+  res = _m_psllqi(m1, i1);
+  res = _m_psllw(m1, m2);
+  res = _m_psllwi(m1, i1);
+}
+
+// CHECK-LABEL: @test_alt_name_sll
+
+// CHECK: define available_externally i64 @_m_pslld
+// CHECK: [[REG295:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi32
+// CHECK-NEXT: ret i64 [[REG295]]
+
+// CHECK: define available_externally i64 @_m_pslldi
+// CHECK: [[REG296:[0-9a-zA-Z_%.]+]] = call i64 @_mm_slli_pi32
+// CHECK-NEXT: ret i64 [[REG296]]
+
+// CHECK: define available_externally i64 @_m_psllq
+// CHECK: [[REG297:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_si64
+// CHECK-NEXT: ret i64 [[REG297]]
+
+// CHECK: define available_externally i64 @_m_psllqi
+// CHECK: [[REG298:[0-9a-zA-Z_%.]+]] = call i64 @_mm_slli_si64
+// CHECK-NEXT: ret i64 [[REG298]]
+
+// CHECK: define available_externally i64 @_m_psllw
+// CHECK: [[REG299:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi16
+// CHECK-NEXT: ret i64 [[REG299]]
+
+// CHECK: define available_externally i64 @_m_psllwi
+// CHECK: [[REG300:[0-9a-zA-Z_%.]+]] = call i64 @_mm_slli_pi16
+// CHECK-NEXT: ret i64 [[REG300]]
+
+void __attribute__((noinline))
+test_sra() {
+  res = _mm_sra_pi32(m1, m2);
+  res = _mm_sra_pi16(m1, m2);
+  res = _mm_srai_pi32(m1, i1);
+  res = _mm_srai_pi16(m1, i1);
+}
+
+// CHECK-LABEL: @test_sra
+
+// CHECK: define available_externally i64 @_mm_sra_pi32
+// CHECK: [[REG301:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: [[REG302:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG301]], align 8
+// CHECK-NEXT: [[REG303:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG304:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG303]] to i32
+// CHECK-NEXT: [[REG305:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG302]], [[REG304]]
+// CHECK: [[REG306:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: store i32 [[REG305]], i32* [[REG306]], align 8
+// CHECK: [[REG307:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: [[REG308:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG307]], align 4
+// CHECK-NEXT: [[REG309:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG310:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG309]] to i32
+// CHECK-NEXT: [[REG311:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG308]], [[REG310]]
+// CHECK: [[REG312:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: store i32 [[REG311]], i32* [[REG312]], align 4
+
+// CHECK: define available_externally i64 @_mm_sra_pi16
+// CHECK: [[REG313:[0-9a-zA-Z_%.]+]] = icmp ule i64 {{[0-9a-zA-Z_%.]+}}, 15
+// CHECK-NEXT: br i1 [[REG313]], label %[[REG314:[0-9a-zA-Z_%.]+]], label %[[REG315:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG314]]
+// CHECK: [[REG316:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG317:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG316]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG317]], <8 x i16>* [[REG318:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG319:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG320:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG319]] to i16
+// CHECK-NEXT: [[REG321:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(unsigned short)(i16 zeroext [[REG320]])
+// CHECK-NEXT: store <8 x i16> [[REG321]], <8 x i16>* [[REG322:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG323:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG318]], align 16
+// CHECK-NEXT: [[REG324:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG322]], align 16
+// CHECK-NEXT: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])(<8 x i16> [[REG323]], <8 x i16> [[REG324]])
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* [[REG325:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: br label %[[REG326:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG315]]
+// CHECK-NEXT: store i64 0, i64* [[REG325]], align 8
+// CHECK-NEXT: br label %[[REG326]]
+// CHECK: [[REG326]]
+// CHECK-NEXT: [[REG327:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG325]], align 8
+// CHECK-NEXT: ret i64 [[REG327]]
+
+// CHECK: define available_externally i64 @_mm_srai_pi32
+// CHECK: [[REG328:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
+// CHECK-NEXT: [[REG329:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi32(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG328]])
+// CHECK-NEXT: ret i64 [[REG329]]
+
+// CHECK: define available_externally i64 @_mm_srai_pi16
+// CHECK: [[REG330:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
+// CHECK-NEXT: [[REG331:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi16(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG330]])
+// CHECK-NEXT: ret i64 [[REG331]]
+
+void __attribute__((noinline))
+test_alt_name_sra() {
+  res = _m_psrad(m1, m2);
+  res = _m_psraw(m1, m2);
+  res = _m_psradi(m1, i1);
+  res = _m_psrawi(m1, i1);
+}
+
+// CHECK-LABEL: @test_alt_name_sra
+
+// CHECK: define available_externally i64 @_m_psrad
+// CHECK: [[REG332:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi32
+// CHECK-NEXT: ret i64 [[REG332]]
+
+// CHECK: define available_externally i64 @_m_psraw
+// CHECK: [[REG333:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi16
+// CHECK-NEXT: ret i64 [[REG333]]
+
+// CHECK: define available_externally i64 @_m_psradi
+// CHECK: [[REG334:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srai_pi32
+// CHECK-NEXT: ret i64 [[REG334]]
+
+// CHECK: define available_externally i64 @_m_psrawi
+// CHECK: [[REG335:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srai_pi16
+// CHECK-NEXT: ret i64 [[REG335]]
+
+void __attribute__((noinline))
+test_srl() {
+  res = _mm_srl_si64(m1, m2);
+  res = _mm_srl_pi32(m1, m2);
+  res = _mm_srl_pi16(m1, m2);
+  res = _mm_srli_si64(m1, i1);
+  res = _mm_srli_pi32(m1, i1);
+  res = _mm_srli_pi16(m1, i1);
+}
+
+// CHECK-LABEL: @test_srl
+
+// CHECK: define available_externally i64 @_mm_srl_si64
+// CHECK: [[REG336:[0-9a-zA-Z_%.]+]] = lshr i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
+// CHECK-NEXT: ret i64 [[REG336]]
+
+// CHECK: define available_externally i64 @_mm_srl_pi32
+// CHECK: [[REG337:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: [[REG338:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG337]], align 8
+// CHECK-NEXT: [[REG339:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG340:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG339]] to i32
+// CHECK-NEXT: [[REG341:[0-9a-zA-Z_%.]+]] = lshr i32 [[REG338]], [[REG340]]
+// CHECK: [[REG342:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: store i32 [[REG341]], i32* [[REG342]], align 8
+// CHECK: [[REG343:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: [[REG344:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG343]], align 4
+// CHECK-NEXT: [[REG345:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG345]] to i32
+// CHECK-NEXT: [[REG347:[0-9a-zA-Z_%.]+]] = lshr i32 [[REG344]], [[REG346]]
+// CHECK: [[REG348:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: store i32 [[REG347]], i32* [[REG348]], align 4
+
+// CHECK: define available_externally i64 @_mm_srl_pi16
+// CHECK: [[REG349:[0-9a-zA-Z_%.]+]] = icmp ule i64 {{[0-9a-zA-Z_%.]+}}, 15
+// CHECK-NEXT: br i1 [[REG349]], label %[[REG350:[0-9a-zA-Z_%.]+]], label %[[REG351:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG350]]
+// CHECK: [[REG352:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG353:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG352]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG353]], <8 x i16>* [[REG354:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG355:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG356:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG355]] to i16
+// CHECK-NEXT: [[REG357:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(unsigned short)(i16 zeroext [[REG356]])
+// CHECK-NEXT: store <8 x i16> [[REG357]], <8 x i16>* [[REG358:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG359:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG354]], align 16
+// CHECK-NEXT: [[REG360:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG358]], align 16
+// CHECK-NEXT: call <8 x i16> @vec_sr(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG359]], <8 x i16> [[REG360]])
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* [[REG361:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: br label %[[REG362:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG351]]
+// CHECK-NEXT: store i64 0, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: br label %[[REG362]]
+// CHECK: [[REG362]]
+// CHECK-NEXT: [[REG363:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG361]], align 8
+// CHECK-NEXT: ret i64 [[REG363]]
+
+// CHECK: define available_externally i64 @_mm_srli_si64
+// CHECK: [[REG364:[0-9a-zA-Z_%.]+]] = zext i32 {{[0-9a-zA-Z_%.]+}} to i64
+// CHECK-NEXT: [[REG365:[0-9a-zA-Z_%.]+]] = lshr i64 {{[0-9a-zA-Z_%.]+}}, [[REG364]]
+// CHECK-NEXT: ret i64 [[REG365]]
+
+// CHECK: define available_externally i64 @_mm_srli_pi32
+// CHECK: [[REG366:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
+// CHECK-NEXT: [[REG367:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi32(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG366]])
+// CHECK-NEXT: ret i64 [[REG367]]
+
+// CHECK: define available_externally i64 @_mm_srli_pi16
+// CHECK: [[REG366:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
+// CHECK-NEXT: [[REG368:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi16(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG366]])
+// CHECK-NEXT: ret i64 [[REG368]]
+
+void __attribute__((noinline))
+test_alt_name_srl() {
+  res = _m_psrlq(m1, m2);
+  res = _m_psrld(m1, m2);
+  res = _m_psrlw(m1, m2);
+  res = _m_psrlqi(m1, i1);
+  res = _m_psrldi(m1, i1);
+  res = _m_psrlwi(m1, i1);
+}
+
+// CHECK-LABEL: @test_alt_name_srl
+
+// CHECK: define available_externally i64 @_m_psrlq
+// CHECK: [[REG369:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_si64
+// CHECK-NEXT: ret i64 [[REG369]]
+
+// CHECK: define available_externally i64 @_m_psrld
+// CHECK: [[REG370:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi32
+// CHECK-NEXT: ret i64 [[REG370]]
+
+// CHECK: define available_externally i64 @_m_psrlw
+// CHECK: [[REG371:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi16
+// CHECK-NEXT: ret i64 [[REG371]]
+
+// CHECK: define available_externally i64 @_m_psrlqi
+// CHECK: [[REG372:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srli_si64
+// CHECK-NEXT: ret i64 [[REG372]]
+
+// CHECK: define available_externally i64 @_m_psrldi
+// CHECK: [[REG373:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srli_pi32
+// CHECK-NEXT: ret i64 [[REG373]]
+
+// CHECK: define available_externally i64 @_m_psrlwi
+// CHECK: [[REG374:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srli_pi16
+// CHECK-NEXT: ret i64 [[REG374]]
+
+void __attribute__((noinline))
+test_sub() {
+  res = _mm_sub_pi32(m1, m2);
+  res = _mm_sub_pi16(m1, m2);
+  res = _mm_sub_pi8(m1, m2);
+  res = _mm_subs_pi16(m1, m2);
+  res = _mm_subs_pi8(m1, m2);
+  res = _mm_subs_pu16(m1, m2);
+  res = _mm_subs_pu8(m1, m2);
+}
+
+// CHECK-LABEL: @test_sub
+
+// CHECK: define available_externally i64 @_mm_sub_pi32
+
+// CHECK-P8: [[REG375:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-P8-NEXT: [[REG376:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG375]], align 8
+// CHECK-P8: [[REG377:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-P8-NEXT: [[REG378:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG377]], align 8
+// CHECK-P8-NEXT: sub nsw i32 [[REG376]], [[REG378]]
+// CHECK-P8: [[REG379:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-P8-NEXT: [[REG380:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG379]], align 4
+// CHECK-P8: [[REG381:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-P8-NEXT: [[REG382:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG381]], align 4
+// CHECK-P8-NEXT: sub nsw i32 [[REG380]], [[REG382]]
+
+// CHECK-P9: [[REG383:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-P9-NEXT: [[REG384:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG383]] to <4 x i32>
+// CHECK-P9-NEXT: store <4 x i32> [[REG384]], <4 x i32>* [[REG385:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-P9: [[REG386:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-P9-NEXT: [[REG387:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG386]] to <4 x i32>
+// CHECK-P9-NEXT: store <4 x i32> [[REG387]], <4 x i32>* [[REG388:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-P9-NEXT: [[REG389:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG385]], align 16
+// CHECK-P9-NEXT: [[REG390:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG388]], align 16
+// CHECK-P9-NEXT: call <4 x i32> @vec_sub(int vector[4], int vector[4])(<4 x i32> [[REG389]], <4 x i32> [[REG390]])
+
+// CHECK: define available_externally i64 @_mm_sub_pi16
+// CHECK: [[REG391:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG392:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG391]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG392]], <8 x i16>* [[REG393:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG394:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG395:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG394]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG395]], <8 x i16>* [[REG396:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG397:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG393]], align 16
+// CHECK-NEXT: [[REG398:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG396]], align 16
+// CHECK-NEXT: call <8 x i16> @vec_sub(short vector[8], short vector[8])(<8 x i16> [[REG397]], <8 x i16> [[REG398]])
+
+// CHECK: define available_externally i64 @_mm_sub_pi8
+// CHECK: [[REG399:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG400:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG399]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG400]], <16 x i8>* [[REG401:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG402:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG403:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG402]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG403]], <16 x i8>* [[REG404:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG405:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG401]], align 16
+// CHECK-NEXT: [[REG406:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG404]], align 16
+// CHECK-NEXT: call <16 x i8> @vec_sub(signed char vector[16], signed char vector[16])(<16 x i8> [[REG405]], <16 x i8> [[REG406]])
+
+// CHECK: define available_externally i64 @_mm_subs_pi16
+// CHECK: [[REG407:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG408:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG407]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG408]], <8 x i16>* [[REG409:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG410:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG411:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG410]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG411]], <8 x i16>* [[REG412:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG413:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG409]], align 16
+// CHECK-NEXT: [[REG414:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG412]], align 16
+// CHECK-NEXT: call <8 x i16> @vec_subs(short vector[8], short vector[8])(<8 x i16> [[REG413]], <8 x i16> [[REG414]])
+
+// CHECK: define available_externally i64 @_mm_subs_pi8
+// CHECK: [[REG415:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG416:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG415]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG416]], <16 x i8>* [[REG417:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG418:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG419:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG418]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG419]], <16 x i8>* [[REG420:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG421:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG417]], align 16
+// CHECK-NEXT: [[REG422:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG420]], align 16
+// CHECK-NEXT: call <16 x i8> @vec_subs(signed char vector[16], signed char vector[16])(<16 x i8> [[REG421]], <16 x i8> [[REG422]])
+
+// CHECK: define available_externally i64 @_mm_subs_pu16
+// CHECK: [[REG423:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG424:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG423]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG424]], <8 x i16>* [[REG425:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG426:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG427:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG426]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG427]], <8 x i16>* [[REG428:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG429:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG425]], align 16
+// CHECK-NEXT: [[REG430:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG428]], align 16
+// CHECK-NEXT: call <8 x i16> @vec_subs(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG429]], <8 x i16> [[REG430]])
+
+// CHECK: define available_externally i64 @_mm_subs_pu8
+// CHECK: [[REG431:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG432:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG431]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG432]], <16 x i8>* [[REG433:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG434:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG435:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG434]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG435]], <16 x i8>* [[REG436:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG437:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG433]], align 16
+// CHECK-NEXT: [[REG438:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG436]], align 16
+// CHECK-NEXT: call <16 x i8> @vec_subs(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG437]], <16 x i8> [[REG438]])
+
+void __attribute__((noinline))
+test_alt_name_sub() {
+  res = _m_psubd(m1, m2);
+  res = _m_psubw(m1, m2);
+  res = _m_psubb(m1, m2);
+  res = _m_psubsw(m1, m2);
+  res = _m_psubsb(m1, m2);
+  res = _m_psubusw(m1, m2);
+  res = _m_psubusb(m1, m2);
+}
+
+// CHECK-LABEL: @test_alt_name_sub
+
+// CHECK: define available_externally i64 @_m_psubd
+// CHECK: [[REG439:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sub_pi32
+// CHECK-NEXT: ret i64 [[REG439]]
+
+// CHECK: define available_externally i64 @_m_psubw
+// CHECK: [[REG440:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sub_pi16
+// CHECK-NEXT: ret i64 [[REG440]]
+
+// CHECK: define available_externally i64 @_m_psubb
+// CHECK: [[REG441:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sub_pi8
+// CHECK-NEXT: ret i64 [[REG441]]
+
+// CHECK: define available_externally i64 @_m_psubsw
+// CHECK: [[REG442:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pi16
+// CHECK-NEXT: ret i64 [[REG442]]
+
+// CHECK: define available_externally i64 @_m_psubsb
+// CHECK: [[REG443:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pi8
+// CHECK-NEXT: ret i64 [[REG443]]
+
+// CHECK: define available_externally i64 @_m_psubusw
+// CHECK: [[REG444:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pu16
+// CHECK-NEXT: ret i64 [[REG444]]
+
+// CHECK: define available_externally i64 @_m_psubusb
+// CHECK: [[REG445:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pu8
+// CHECK-NEXT: ret i64 [[REG445]]
+
+void __attribute__((noinline))
+test_unpack() {
+  res = _mm_unpackhi_pi32(m1, m2);
+  res = _mm_unpackhi_pi16(m1, m2);
+  res = _mm_unpackhi_pi8(m1, m2);
+  res = _mm_unpacklo_pi32(m1, m2);
+  res = _mm_unpacklo_pi16(m1, m2);
+  res = _mm_unpacklo_pi8(m1, m2);
+}
+
+// CHECK-LABEL: @test_unpack
+
+// CHECK: define available_externally i64 @_mm_unpackhi_pi32
+// CHECK: [[REG446:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: [[REG447:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG446]], align 4
+// CHECK-NEXT: [[REG448:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* [[REG449:[0-9a-zA-Z_%.]+]] to [2 x i32]*
+// CHECK-NEXT: [[REG450:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* [[REG448]], i64 0, i64 0
+// CHECK-NEXT: store i32 [[REG447]], i32* [[REG450]], align 8
+// CHECK: [[REG451:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: [[REG452:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG451]], align 4
+// CHECK-NEXT: [[REG453:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* [[REG449]] to [2 x i32]*
+// CHECK-NEXT: [[REG454:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* [[REG453]], i64 0, i64 1
+// CHECK-NEXT: store i32 [[REG452]], i32* [[REG454]], align 4
+
+// CHECK: define available_externally i64 @_mm_unpackhi_pi16
+// CHECK: [[REG455:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
+// CHECK-NEXT: [[REG456:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG455]], align 4
+// CHECK: [[REG457:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: store i16 [[REG456]], i16* [[REG457]], align 8
+// CHECK: [[REG458:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
+// CHECK-NEXT: [[REG459:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG458]], align 4
+// CHECK: [[REG460:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: store i16 [[REG459]], i16* [[REG460]], align 2
+// CHECK: [[REG461:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
+// CHECK-NEXT: [[REG462:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG461]], align 2
+// CHECK: [[REG463:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
+// CHECK-NEXT: store i16 [[REG462]], i16* [[REG463]], align 4
+// CHECK: [[REG464:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
+// CHECK-NEXT: [[REG465:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG464]], align 2
+// CHECK: [[REG466:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
+// CHECK-NEXT: store i16 [[REG465]], i16* [[REG466]], align 2
+
+// CHECK: define available_externally i64 @_mm_unpackhi_pi8
+// CHECK: [[REG467:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: [[REG468:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG467]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG468]], <16 x i8>* [[REG469:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG470:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG471:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: [[REG472:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG471]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG472]], <16 x i8>* [[REG473:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG474:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG469]], align 16
+// CHECK-NEXT: [[REG475:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG473]], align 16
+// CHECK-NEXT: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG474]], <16 x i8> [[REG475]])
+
+// CHECK: define available_externally i64 @_mm_unpacklo_pi32
+// CHECK: [[REG476:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: [[REG477:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG476]], align 8
+// CHECK: [[REG478:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: store i32 [[REG477]], i32* [[REG478]], align 8
+// CHECK: [[REG479:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: [[REG480:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG479]], align 8
+// CHECK: [[REG481:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: store i32 [[REG480]], i32* [[REG481]], align 4
+
+// CHECK: define available_externally i64 @_mm_unpacklo_pi16
+// CHECK: [[REG482:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: [[REG483:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG482]], align 8
+// CHECK: [[REG484:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: store i16 [[REG483]], i16* [[REG484]], align 8
+// CHECK: [[REG485:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
+// CHECK-NEXT: [[REG486:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG485]], align 8
+// CHECK: [[REG487:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: store i16 [[REG486]], i16* [[REG487]], align 2
+// CHECK: [[REG488:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: [[REG489:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG488]], align 2
+// CHECK: [[REG490:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
+// CHECK-NEXT: store i16 [[REG489]], i16* [[REG490]], align 4
+// CHECK: [[REG491:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
+// CHECK-NEXT: [[REG492:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG491]], align 2
+// CHECK: [[REG493:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
+// CHECK-NEXT: store i16 [[REG492]], i16* [[REG493]], align 2
+
+// CHECK: define available_externally i64 @_mm_unpacklo_pi8
+// CHECK: [[REG494:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: [[REG495:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG494]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG495]], <16 x i8>* [[REG496:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG497:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG498:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: [[REG499:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG498]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG499]], <16 x i8>* [[REG500:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG501:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG496]], align 16
+// CHECK-NEXT: [[REG502:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG500]], align 16
+// CHECK-NEXT: [[REG503:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])
+
+void __attribute__((noinline))
+test_alt_name_unpack() {
+  res = _m_punpckhdq(m1, m2);
+  res = _m_punpckhwd(m1, m2);
+  res = _m_punpckhbw(m1, m2);
+  res = _m_punpckldq(m1, m2);
+  res = _m_punpcklwd(m1, m2);
+  res = _m_punpcklbw(m1, m2);
+}
+
+// CHECK-LABEL: @test_alt_name_unpack
+
+// CHECK: define available_externally i64 @_m_punpckhdq
+// CHECK: [[REG238:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpackhi_pi32
+// CHECK-NEXT: ret i64 [[REG238]]
+
+// CHECK: define available_externally i64 @_m_punpckhwd
+// CHECK: [[REG239:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpackhi_pi16
+// CHECK-NEXT: ret i64 [[REG239]]
+
+// CHECK: define available_externally i64 @_m_punpckhbw
+// CHECK: [[REG240:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpackhi_pi8
+// CHECK-NEXT: ret i64 [[REG240]]
+
+// CHECK: define available_externally i64 @_m_punpckldq
+// CHECK: [[REG241:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpacklo_pi32
+// CHECK-NEXT: ret i64 [[REG241]]
+
+// CHECK: define available_externally i64 @_m_punpcklwd
+// CHECK: [[REG242:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpacklo_pi16
+// CHECK-NEXT: ret i64 [[REG242]]
+
+// CHECK: define available_externally i64 @_m_punpcklbw
+// CHECK: [[REG243:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpacklo_pi8
+// CHECK-NEXT: ret i64 [[REG243]]
diff --git a/test/CodeGen/ppc64-dwarf.c b/test/CodeGen/ppc64-dwarf.c
index 679bded453..fc815c6898 100644
--- a/test/CodeGen/ppc64-dwarf.c
+++ b/test/CodeGen/ppc64-dwarf.c
@@ -8,122 +8,121 @@ int test() {
 }
 
 // CHECK-LABEL: define signext i32 @test()
-// CHECK:      store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 0), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 1), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 2), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 3), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 4), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 5), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 6), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 7), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 8), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 9), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 10), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 11), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 12), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 13), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 14), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 15), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 16), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 17), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 18), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 19), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 20), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 21), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 22), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 23), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 24), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 25), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 26), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 27), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 28), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 29), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 30), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 31), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 32), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 33), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 34), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 35), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 36), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 37), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 38), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 39), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 40), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 41), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 42), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 43), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 44), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 45), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 46), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 47), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 48), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 49), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 50), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 51), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 52), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 53), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 54), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 55), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 56), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 57), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 58), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 59), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 60), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 61), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 62), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 63), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 64), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 65), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 66), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 67), align 1
-// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 68), align 1
-// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 69), align 1
-// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 70), align 1
-// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 71), align 1
-// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 72), align 1
-// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 73), align 1
-// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 74), align 1
-// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 75), align 1
-// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 76), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 77), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 78), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 79), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 80), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 81), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 82), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 83), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 84), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 85), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 86), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 87), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 88), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 89), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 90), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 91), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 92), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 93), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 94), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 95), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 96), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 97), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 98), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 99), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 100), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 101), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 102), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 103), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 104), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 105), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 106), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 107), align 1
-// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 108), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 109), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 110), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 111), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 112), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 113), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 114), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 115), align 1
-// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i32 0, i32 116), align 1
+// CHECK:      store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 0), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 1), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 2), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 3), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 4), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 5), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 6), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 7), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 8), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 9), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 10), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 11), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 12), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 13), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 14), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 15), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 16), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 17), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 18), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 19), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 20), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 21), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 22), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 23), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 24), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 25), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 26), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 27), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 28), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 29), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 30), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 31), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 32), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 33), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 34), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 35), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 36), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 37), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 38), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 39), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 40), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 41), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 42), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 43), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 44), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 45), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 46), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 47), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 48), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 49), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 50), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 51), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 52), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 53), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 54), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 55), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 56), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 57), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 58), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 59), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 60), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 61), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 62), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 63), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 64), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 65), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 66), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 67), align 1
+// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 68), align 1
+// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 69), align 1
+// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 70), align 1
+// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 71), align 1
+// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 72), align 1
+// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 73), align 1
+// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 74), align 1
+// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 75), align 1
+// CHECK-NEXT: store i8 4, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 76), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 77), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 78), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 79), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 80), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 81), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 82), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 83), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 84), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 85), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 86), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 87), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 88), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 89), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 90), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 91), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 92), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 93), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 94), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 95), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 96), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 97), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 98), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 99), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 100), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 101), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 102), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 103), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 104), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 105), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 106), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 107), align 1
+// CHECK-NEXT: store i8 16, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 108), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 109), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 110), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 111), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 112), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 113), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 114), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 115), align 1
+// CHECK-NEXT: store i8 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dwarf_reg_size_table, i64 0, i64 116), align 1
 // CHECK-NEXT: ret i32 1
-
diff --git a/test/CodeGen/riscv32-ilp32-abi.c b/test/CodeGen/riscv32-ilp32-abi.c
new file mode 100644
index 0000000000..59f0bb5683
--- /dev/null
+++ b/test/CodeGen/riscv32-ilp32-abi.c
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -triple riscv32 -emit-llvm %s -o - | FileCheck %s
+
+// This file contains test cases that will have different output for ilp32 vs
+// the other 32-bit ABIs.
+
+#include <stddef.h>
+#include <stdint.h>
+
+struct tiny {
+  uint8_t a, b, c, d;
+};
+
+struct small {
+  int32_t a, *b;
+};
+
+struct small_aligned {
+  int64_t a;
+};
+
+struct large {
+  int32_t a, b, c, d;
+};
+
+// Scalars passed on the stack should not have signext/zeroext attributes
+// (they are anyext).
+
+// CHECK-LABEL: define i32 @f_scalar_stack_1(i32 %a, i64 %b, float %c, double %d, fp128 %e, i8 zeroext %f, i8 %g, i8 %h)
+int f_scalar_stack_1(int32_t a, int64_t b, float c, double d, long double e,
+                     uint8_t f, int8_t g, uint8_t h) {
+  return g + h;
+}
+
+// Ensure that scalars passed on the stack are still determined correctly in
+// the presence of large return values that consume a register due to the need
+// to pass a pointer.
+
+// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret %agg.result, float %a, i64 %b, double %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
+struct large f_scalar_stack_2(float a, int64_t b, double c, long double d,
+                              uint8_t e, int8_t f, uint8_t g) {
+  return (struct large){a, e, f, g};
+}
+
+// Aggregates and >=XLen scalars passed on the stack should be lowered just as
+// they would be if passed via registers.
+
+// CHECK-LABEL: define void @f_scalar_stack_3(double %a, i64 %b, double %c, i64 %d, i32 %e, i64 %f, float %g, double %h, fp128 %i)
+void f_scalar_stack_3(double a, int64_t b, double c, int64_t d, int e,
+                      int64_t f, float g, double h, long double i) {}
+
+// CHECK-LABEL: define void @f_agg_stack(double %a, i64 %b, double %c, i64 %d, i32 %e.coerce, [2 x i32] %f.coerce, i64 %g.coerce, %struct.large* %h)
+void f_agg_stack(double a, int64_t b, double c, int64_t d, struct tiny e,
+                 struct small f, struct small_aligned g, struct large h) {}
diff --git a/test/CodeGen/riscv32-ilp32-ilp32f-abi.c b/test/CodeGen/riscv32-ilp32-ilp32f-abi.c
new file mode 100644
index 0000000000..0c2f0791e3
--- /dev/null
+++ b/test/CodeGen/riscv32-ilp32-ilp32f-abi.c
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -triple riscv32 -emit-llvm %s -o - | FileCheck %s
+
+// This file contains test cases that will have the same output for the ilp32
+// and ilp32f ABIs.
+
+#include <stddef.h>
+#include <stdint.h>
+
+struct tiny {
+  uint8_t a, b, c, d;
+};
+
+struct small {
+  int32_t a, *b;
+};
+
+struct small_aligned {
+  int64_t a;
+};
+
+struct large {
+  int32_t a, b, c, d;
+};
+
+// Scalars passed on the stack should not have signext/zeroext attributes
+// (they are anyext).
+
+// CHECK-LABEL: define i32 @f_scalar_stack_1(i32 %a, i64 %b, i32 %c, double %d, fp128 %e, i8 zeroext %f, i8 %g, i8 %h)
+int f_scalar_stack_1(int32_t a, int64_t b, int32_t c, double d, long double e,
+                     uint8_t f, int8_t g, uint8_t h) {
+  return g + h;
+}
+
+// Ensure that scalars passed on the stack are still determined correctly in
+// the presence of large return values that consume a register due to the need
+// to pass a pointer.
+
+// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret %agg.result, i32 %a, i64 %b, i64 %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
+struct large f_scalar_stack_2(int32_t a, int64_t b, int64_t c, long double d,
+                              uint8_t e, int8_t f, uint8_t g) {
+  return (struct large){a, e, f, g};
+}
+
+// Aggregates and >=XLen scalars passed on the stack should be lowered just as
+// they would be if passed via registers.
+
+// CHECK-LABEL: define void @f_scalar_stack_3(double %a, i64 %b, double %c, i64 %d, i32 %e, i64 %f, i32 %g, double %h, fp128 %i)
+void f_scalar_stack_3(double a, int64_t b, double c, int64_t d, int e,
+                      int64_t f, int32_t g, double h, long double i) {}
+
+// CHECK-LABEL: define void @f_agg_stack(double %a, i64 %b, double %c, i64 %d, i32 %e.coerce, [2 x i32] %f.coerce, i64 %g.coerce, %struct.large* %h)
+void f_agg_stack(double a, int64_t b, double c, int64_t d, struct tiny e,
+                 struct small f, struct small_aligned g, struct large h) {}
diff --git a/test/CodeGen/riscv32-abi.c b/test/CodeGen/riscv32-ilp32-ilp32f-ilp32d-abi.c
index 04eceb3a81..12837fce94 100644
--- a/test/CodeGen/riscv32-abi.c
+++ b/test/CodeGen/riscv32-ilp32-ilp32f-ilp32d-abi.c
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 -triple riscv32 -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -triple riscv32 -emit-llvm -fforce-enable-int128 %s -o - \
-// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-FORCEINT128
+// RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-FORCEINT128
+
+// This file contains test cases that will have the same output for the ilp32,
+// ilp32f, and ilp32d ABIs.
 
 #include <stddef.h>
 #include <stdint.h>
@@ -187,8 +190,8 @@ v16i8 f_vec_large_v16i8_ret() {
   return (v16i8){1, 2, 3, 4, 5, 6, 7, 8};
 }
 
-// Scalars passed on the stack should have signext/zeroext attributes (they
-// are anyext).
+// Scalars passed on the stack should not have signext/zeroext attributes
+// (they are anyext).
 
 // CHECK-LABEL: define i32 @f_scalar_stack_1(i32 %a.coerce, [2 x i32] %b.coerce, i64 %c.coerce, %struct.large* %d, i8 zeroext %e, i8 signext %f, i8 %g, i8 %h)
 int f_scalar_stack_1(struct tiny a, struct small b, struct small_aligned c,
@@ -196,24 +199,18 @@ int f_scalar_stack_1(struct tiny a, struct small b, struct small_aligned c,
   return g + h;
 }
 
-// CHECK-LABEL: define i32 @f_scalar_stack_2(i32 %a, i64 %b, float %c, double %d, fp128 %e, i8 zeroext %f, i8 %g, i8 %h)
-int f_scalar_stack_2(int32_t a, int64_t b, float c, double d, long double e,
-                     uint8_t f, int8_t g, uint8_t h) {
-  return g + h;
-}
-
 // Ensure that scalars passed on the stack are still determined correctly in
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-// CHECK-LABEL: define void @f_scalar_stack_3(%struct.large* noalias sret %agg.result, i32 %a, i64 %b, double %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
-struct large f_scalar_stack_3(int32_t a, int64_t b, double c, long double d,
+// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret %agg.result, i32 %a, i64 %b, i64 %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
+struct large f_scalar_stack_2(int32_t a, int64_t b, int64_t c, long double d,
                               uint8_t e, int8_t f, uint8_t g) {
   return (struct large){a, e, f, g};
 }
 
-// CHECK-LABEL: define fp128 @f_scalar_stack_4(i32 %a, i64 %b, double %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
-long double f_scalar_stack_4(int32_t a, int64_t b, double c, long double d,
+// CHECK-LABEL: define fp128 @f_scalar_stack_4(i32 %a, i64 %b, i64 %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
+long double f_scalar_stack_4(int32_t a, int64_t b, int64_t c, long double d,
                              uint8_t e, int8_t f, uint8_t g) {
   return d;
 }
diff --git a/test/CodeGen/riscv64-lp64-abi.c b/test/CodeGen/riscv64-lp64-abi.c
new file mode 100644
index 0000000000..3720315208
--- /dev/null
+++ b/test/CodeGen/riscv64-lp64-abi.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple riscv64 -emit-llvm %s -o - | FileCheck %s
+
+// This file contains test cases that will have different output for lp64 vs
+// the other 64-bit ABIs.
+
+#include <stddef.h>
+#include <stdint.h>
+
+struct large {
+  int64_t a, b, c, d;
+};
+
+typedef unsigned char v32i8 __attribute__((vector_size(32)));
+
+// Scalars passed on the stack should not have signext/zeroext attributes
+// (they are anyext).
+
+// CHECK-LABEL: define signext i32 @f_scalar_stack_1(i32 signext %a, i128 %b, float %c, fp128 %d, <32 x i8>*, i8 zeroext %f, i8 %g, i8 %h)
+int f_scalar_stack_1(int32_t a, __int128_t b, float c, long double d, v32i8 e,
+                     uint8_t f, int8_t g, uint8_t h) {
+  return g + h;
+}
+
+// Ensure that scalars passed on the stack are still determined correctly in
+// the presence of large return values that consume a register due to the need
+// to pass a pointer.
+
+// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret %agg.result, double %a, i128 %b, fp128 %c, <32 x i8>*, i8 zeroext %e, i8 %f, i8 %g)
+struct large f_scalar_stack_2(double a, __int128_t b, long double c, v32i8 d,
+                              uint8_t e, int8_t f, uint8_t g) {
+  return (struct large){a, e, f, g};
+}
diff --git a/test/CodeGen/riscv64-lp64-lp64f-abi.c b/test/CodeGen/riscv64-lp64-lp64f-abi.c
new file mode 100644
index 0000000000..3b944e716a
--- /dev/null
+++ b/test/CodeGen/riscv64-lp64-lp64f-abi.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple riscv64 -emit-llvm %s -o - | FileCheck %s
+
+// This file contains test cases that will have the same output for the lp64
+// and lp64f ABIs.
+
+#include <stddef.h>
+#include <stdint.h>
+
+struct large {
+  int64_t a, b, c, d;
+};
+
+typedef unsigned char v32i8 __attribute__((vector_size(32)));
+
+// Scalars passed on the stack should not have signext/zeroext attributes
+// (they are anyext).
+
+// CHECK-LABEL: define signext i32 @f_scalar_stack_1(i32 signext %a, i128 %b, double %c, fp128 %d, <32 x i8>*, i8 zeroext %f, i8 %g, i8 %h)
+int f_scalar_stack_1(int32_t a, __int128_t b, double c, long double d, v32i8 e,
+                     uint8_t f, int8_t g, uint8_t h) {
+  return g + h;
+}
+
+// Ensure that scalars passed on the stack are still determined correctly in
+// the presence of large return values that consume a register due to the need
+// to pass a pointer.
+
+// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret %agg.result, double %a, i128 %b, fp128 %c, <32 x i8>*, i8 zeroext %e, i8 %f, i8 %g)
+struct large f_scalar_stack_2(double a, __int128_t b, long double c, v32i8 d,
+                              uint8_t e, int8_t f, uint8_t g) {
+  return (struct large){a, e, f, g};
+}
diff --git a/test/CodeGen/riscv64-abi.c b/test/CodeGen/riscv64-lp64-lp64f-lp64d-abi.c
index 7a0f065fb4..f51d8252b8 100644
--- a/test/CodeGen/riscv64-abi.c
+++ b/test/CodeGen/riscv64-lp64-lp64f-lp64d-abi.c
@@ -1,5 +1,8 @@
 // RUN: %clang_cc1 -triple riscv64 -emit-llvm %s -o - | FileCheck %s
 
+// This file contains test cases that will have the same output for the lp64,
+// lp64f, and lp64d ABIs.
+
 #include <stddef.h>
 #include <stdint.h>
 
@@ -176,8 +179,8 @@ v32i8 f_vec_large_v32i8_ret() {
   return (v32i8){1, 2, 3, 4, 5, 6, 7, 8};
 }
 
-// Scalars passed on the stack should have signext/zeroext attributes (they
-// are anyext).
+// Scalars passed on the stack should not have signext/zeroext attributes
+// (they are anyext).
 
 // CHECK-LABEL: define signext i32 @f_scalar_stack_1(i64 %a.coerce, [2 x i64] %b.coerce, i128 %c.coerce, %struct.large* %d, i8 zeroext %e, i8 signext %f, i8 %g, i8 %h)
 int f_scalar_stack_1(struct tiny a, struct small b, struct small_aligned c,
@@ -185,8 +188,8 @@ int f_scalar_stack_1(struct tiny a, struct small b, struct small_aligned c,
   return g + h;
 }
 
-// CHECK-LABEL: define signext i32 @f_scalar_stack_2(i32 signext %a, i128 %b, float %c, fp128 %d, <32 x i8>*, i8 zeroext %f, i8 %g, i8 %h)
-int f_scalar_stack_2(int32_t a, __int128_t b, float c, long double d, v32i8 e,
+// CHECK-LABEL: define signext i32 @f_scalar_stack_2(i32 signext %a, i128 %b, i64 %c, fp128 %d, <32 x i8>*, i8 zeroext %f, i8 %g, i8 %h)
+int f_scalar_stack_2(int32_t a, __int128_t b, int64_t c, long double d, v32i8 e,
                      uint8_t f, int8_t g, uint8_t h) {
   return g + h;
 }
diff --git a/test/CodeGen/rot-intrinsics.c b/test/CodeGen/rot-intrinsics.c
new file mode 100644
index 0000000000..dcdc54c458
--- /dev/null
+++ b/test/CodeGen/rot-intrinsics.c
@@ -0,0 +1,120 @@
+// RUN: %clang_cc1 -ffreestanding -triple i686--linux -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -ffreestanding -triple x86_64--linux -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+
+#include <x86intrin.h>
+
+unsigned char test__rolb(unsigned char value, int shift) {
+// CHECK-LABEL: i8 @test__rolb
+// CHECK:   [[R:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 [[Y:%.*]])
+// CHECK:   ret i8 [[R]]
+  return __rolb(value, shift);
+}
+
+unsigned short test__rolw(unsigned short value, int shift) {
+// CHECK-LABEL: i16 @test__rolw
+// CHECK:   [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y:%.*]])
+// CHECK:   ret i16 [[R]]
+  return __rolw(value, shift);
+}
+
+unsigned int test__rold(unsigned int value, int shift) {
+// CHECK-LABEL: i32 @test__rold
+// CHECK:   [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
+// CHECK:   ret i32 [[R]]
+  return __rold(value, shift);
+}
+
+#if defined(__x86_64__)
+unsigned long test__rolq(unsigned long value, int shift) {
+// CHECK-LONG-LABEL: i64 @test__rolq
+// CHECK-LONG:   [[R:%.*]] = call i64 @llvm.fshl.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
+// CHECK-LONG:   ret i64 [[R]]
+  return __rolq(value, shift);
+}
+#endif
+
+unsigned char test__rorb(unsigned char value, int shift) {
+// CHECK-LABEL: i8 @test__rorb
+// CHECK:   [[R:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[Y:%.*]])
+// CHECK:   ret i8 [[R]]
+  return __rorb(value, shift);
+}
+
+unsigned short test__rorw(unsigned short value, int shift) {
+// CHECK-LABEL: i16 @test__rorw
+// CHECK:   [[R:%.*]] = call i16 @llvm.fshr.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y:%.*]])
+// CHECK:   ret i16 [[R]]
+  return __rorw(value, shift);
+}
+
+unsigned int test__rord(unsigned int value, int shift) {
+// CHECK-LABEL: i32 @test__rord
+// CHECK:   [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
+// CHECK:   ret i32 [[R]]
+  return __rord(value, shift);
+}
+
+#if defined(__x86_64__)
+unsigned long test__rorq(unsigned long value, int shift) {
+// CHECK-LONG-LABEL: i64 @test__rorq
+// CHECK-LONG:   [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
+// CHECK-LONG:   ret i64 [[R]]
+  return __rorq(value, shift);
+}
+#endif
+
+unsigned short test_rotwl(unsigned short value, int shift) {
+// CHECK-LABEL: i16 @test_rotwl
+// CHECK:   [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y:%.*]])
+// CHECK:   ret i16 [[R]]
+  return _rotwl(value, shift);
+}
+
+unsigned int test_rotl(unsigned int value, int shift) {
+// CHECK-LABEL: i32 @test_rotl
+// CHECK:   [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
+// CHECK:   ret i32 [[R]]
+  return _rotl(value, shift);
+}
+
+unsigned long test_lrotl(unsigned long value, int shift) {
+// CHECK-32BIT-LONG-LABEL: i32 @test_lrotl
+// CHECK-32BIT-LONG:   [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
+// CHECK-32BIT-LONG:   ret i32 [[R]]
+//
+// CHECK-64BIT-LONG-LABEL: i64 @test_lrotl
+// CHECK-64BIT-LONG:   [[R:%.*]] = call i64 @llvm.fshl.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
+// CHECK-64BIT-LONG:   ret i64 [[R]]
+  return _lrotl(value, shift);
+}
+
+
+unsigned short test_rotwr(unsigned short value, int shift) {
+// CHECK-LABEL: i16 @test_rotwr
+// CHECK:   [[R:%.*]] = call i16 @llvm.fshr.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y:%.*]])
+// CHECK:   ret i16 [[R]]
+  return _rotwr(value, shift);
+}
+
+unsigned int test_rotr(unsigned int value, int shift) {
+// CHECK-LABEL: i32 @test_rotr
+// CHECK:   [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
+// CHECK:   ret i32 [[R]]
+  return _rotr(value, shift);
+}
+
+unsigned long test_lrotr(unsigned long value, int shift) {
+// CHECK-32BIT-LONG-LABEL: i32 @test_lrotr
+// CHECK-32BIT-LONG:   [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
+// CHECK-32BIT-LONG:   ret i32 [[R]]
+//
+// CHECK-64BIT-LONG-LABEL: i64 @test_lrotr
+// CHECK-64BIT-LONG:   [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
+// CHECK-64BIT-LONG:   ret i64 [[R]]
+  return _lrotr(value, shift);
+}
+
diff --git a/test/CodeGen/sanitize-atomic-int-overflow.c b/test/CodeGen/sanitize-atomic-int-overflow.c
new file mode 100644
index 0000000000..a1064f47c3
--- /dev/null
+++ b/test/CodeGen/sanitize-atomic-int-overflow.c
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 -fsanitize=unsigned-integer-overflow %s -emit-llvm -o - | FileCheck %s
+
+_Atomic(unsigned) atomic;
+
+// CHECK-LABEL: define void @cmpd_assign
+void cmpd_assign() {
+  // CHECK: br label %[[LOOP_START:.*]]
+
+  // CHECK: [[LOOP_START]]:
+  // CHECK-NEXT: phi i32 {{.*}}, [ {{.*}}, %[[INCOMING_BLOCK:.*]] ]
+
+  // CHECK: [[INCOMING_BLOCK]]:
+  // CHECK-NEXT: cmpxchg
+  // CHECK-NEXT: extractvalue
+  // CHECK-NEXT: extractvalue
+  // CHECK-NEXT: br i1 %8, label %{{.*}}, label %[[LOOP_START]]
+  atomic += 1;
+}
+
+// CHECK-LABEL: define void @inc
+void inc() {
+  // CHECK: br label %[[LOOP_START:.*]]
+
+  // CHECK: [[LOOP_START]]:
+  // CHECK-NEXT: phi i32 {{.*}}, [ {{.*}}, %[[INCOMING_BLOCK:.*]] ]
+
+  // CHECK: [[INCOMING_BLOCK]]:
+  // CHECK-NEXT: cmpxchg
+  // CHECK-NEXT: extractvalue
+  // CHECK-NEXT: extractvalue
+  // CHECK-NEXT: br i1 %8, label %{{.*}}, label %[[LOOP_START]]
+  atomic++;
+}
diff --git a/test/CodeGen/set-visibility-for-decls.c b/test/CodeGen/set-visibility-for-decls.c
new file mode 100644
index 0000000000..04232f8715
--- /dev/null
+++ b/test/CodeGen/set-visibility-for-decls.c
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 %s -std=c11 -triple=x86_64-pc-linux -fvisibility hidden -fapply-global-visibility-to-externs -emit-llvm -o - | FileCheck --check-prefix=CHECK-HIDDEN %s
+// RUN: %clang_cc1 %s -std=c11 -triple=x86_64-pc-linux -fvisibility protected -fapply-global-visibility-to-externs -emit-llvm -o - | FileCheck --check-prefix=CHECK-PROTECTED %s
+// RUN: %clang_cc1 %s -std=c11 -triple=x86_64-pc-linux -fvisibility default -fapply-global-visibility-to-externs -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEFAULT %s
+
+// CHECK-HIDDEN: @var_hidden = external hidden global
+// CHECK-PROTECTED: @var_hidden = external hidden global
+// CHECK-DEFAULT: @var_hidden = external hidden global
+__attribute__((visibility("hidden"))) extern int var_hidden;
+// CHECK-HIDDEN: @var_protected = external protected global
+// CHECK-PROTECTED: @var_protected = external protected global
+// CHECK-DEFAULT: @var_protected = external protected global
+__attribute__((visibility("protected"))) extern int var_protected;
+// CHECK-HIDDEN: @var_default = external global
+// CHECK-PROTECTED: @var_default = external global
+// CHECK-DEFAULT: @var_default = external global
+__attribute__((visibility("default"))) extern int var_default;
+// CHECK-HIDDEN: @var = external hidden global
+// CHECK-PROTECTED: @var = external protected global
+// CHECK-DEFAULT: @var = external global
+extern int var;
+
+// CHECK-HIDDEN: declare hidden i32 @func_hidden()
+// CHECK-PROTECTED: declare hidden i32 @func_hidden()
+// CHECK-DEFAULT: declare hidden i32 @func_hidden()
+__attribute__((visibility("hidden"))) int func_hidden(void);
+// CHECK-HIDDEN: declare protected i32 @func_protected()
+// CHECK-PROTECTED: declare protected i32 @func_protected()
+// CHECK-DEFAULT: declare protected i32 @func_protected()
+__attribute__((visibility("protected"))) int func_protected(void);
+// CHECK-HIDDEN: declare i32 @func_default()
+// CHECK-PROTECTED: declare i32 @func_default()
+// CHECK-DEFAULT: declare i32 @func_default()
+__attribute__((visibility("default"))) int func_default(void);
+// CHECK-HIDDEN: declare hidden i32 @func()
+// CHECK-PROTECTED: declare protected i32 @func()
+// CHECK-DEFAULT: declare i32 @func()
+int func(void);
+
+int use() {
+  return var_hidden + var_protected + var_default + var +
+         func_hidden() + func_protected() + func_default() + func();
+}
diff --git a/test/CodeGen/sparcv9-dwarf.c b/test/CodeGen/sparcv9-dwarf.c
index c75b09fb7d..b893d9ff74 100644
--- a/test/CodeGen/sparcv9-dwarf.c
+++ b/test/CodeGen/sparcv9-dwarf.c
@@ -8,92 +8,92 @@ int test() {
 }
 
 // CHECK-LABEL: define signext i32 @test()
-// CHECK:       store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 0)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 1)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 2)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 3)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 4)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 5)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 6)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 7)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 8)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 9)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 10)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 11)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 12)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 13)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 14)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 15)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 16)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 17)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 18)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 19)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 20)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 21)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 22)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 23)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 24)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 25)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 26)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 27)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 28)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 29)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 30)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 31)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 32)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 33)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 34)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 35)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 36)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 37)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 38)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 39)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 40)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 41)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 42)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 43)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 44)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 45)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 46)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 47)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 48)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 49)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 50)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 51)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 52)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 53)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 54)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 55)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 56)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 57)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 58)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 59)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 60)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 61)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 62)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 63)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 64)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 65)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 66)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 67)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 68)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 69)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 70)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 71)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 72)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 73)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 74)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 75)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 76)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 77)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 78)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 79)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 80)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 81)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 82)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 83)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 84)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 85)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 86)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 87)
+// CHECK:       store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 0)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 1)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 2)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 3)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 4)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 5)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 6)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 7)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 8)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 9)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 10)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 11)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 12)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 13)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 14)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 15)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 16)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 17)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 18)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 19)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 20)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 21)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 22)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 23)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 24)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 25)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 26)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 27)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 28)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 29)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 30)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 31)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 32)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 33)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 34)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 35)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 36)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 37)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 38)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 39)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 40)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 41)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 42)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 43)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 44)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 45)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 46)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 47)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 48)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 49)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 50)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 51)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 52)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 53)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 54)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 55)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 56)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 57)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 58)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 59)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 60)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 61)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 62)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 63)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 64)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 65)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 66)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 67)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 68)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 69)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 70)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 71)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 72)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 73)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 74)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 75)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 76)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 77)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 78)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 79)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 80)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 81)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 82)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 83)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 84)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 85)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 86)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i64 0, i64 87)
 // CHECK-NEXT:  ret i32 14
diff --git a/test/CodeGen/spir-half-type.cpp b/test/CodeGen/spir-half-type.cpp
index b60931fea6..5cdc38e997 100644
--- a/test/CodeGen/spir-half-type.cpp
+++ b/test/CodeGen/spir-half-type.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -O0 -triple spir -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -O0 -triple spir64 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O0 -triple spir -fexperimental-new-pass-manager -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O0 -triple spir64 -fexperimental-new-pass-manager -emit-llvm %s -o - | FileCheck %s
 
 // This file tests that using the _Float16 type with the spir target will not
 // use the llvm intrinsics but instead will use the half arithmetic
diff --git a/test/CodeGen/split-debug-filename.c b/test/CodeGen/split-debug-filename.c
index 99f89a7414..b8ce639f0f 100644
--- a/test/CodeGen/split-debug-filename.c
+++ b/test/CodeGen/split-debug-filename.c
@@ -1,8 +1,8 @@
 // REQUIRES: x86-registered-target
 // RUN: %clang_cc1 -debug-info-kind=limited -split-dwarf-file foo.dwo -S -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -debug-info-kind=limited -enable-split-dwarf -split-dwarf-file foo.dwo -S -emit-llvm -o - %s | FileCheck --check-prefix=VANILLA %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux -debug-info-kind=limited -enable-split-dwarf -split-dwarf-file %t.dwo -emit-obj -o - %s | llvm-objdump -section-headers - | FileCheck --check-prefix=O %s
-// RUN: llvm-objdump -section-headers %t.dwo | FileCheck --check-prefix=DWO %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -debug-info-kind=limited -enable-split-dwarf -split-dwarf-file %t.dwo -emit-obj -o - %s | llvm-readobj -S - | FileCheck --check-prefix=O %s
+// RUN: llvm-readobj -S %t.dwo | FileCheck --check-prefix=DWO %s
 
 int main (void) {
   return 0;
diff --git a/test/CodeGen/split-debug-single-file.c b/test/CodeGen/split-debug-single-file.c
index ffbc127a46..5987dc07ab 100644
--- a/test/CodeGen/split-debug-single-file.c
+++ b/test/CodeGen/split-debug-single-file.c
@@ -3,13 +3,13 @@
 // Testing to ensure -enable-split-dwarf=single allows to place .dwo sections into regular output object.
 //  RUN: %clang_cc1 -debug-info-kind=limited -triple x86_64-unknown-linux \
 //  RUN:   -enable-split-dwarf=single -split-dwarf-file %t.o -emit-obj -o %t.o %s
-//  RUN: llvm-objdump -section-headers %t.o | FileCheck --check-prefix=MODE-SINGLE %s
+//  RUN: llvm-readobj -S %t.o | FileCheck --check-prefix=MODE-SINGLE %s
 //  MODE-SINGLE: .dwo
 
 // Testing to ensure -enable-split-dwarf=split does not place .dwo sections into regular output object.
 //  RUN: %clang_cc1 -debug-info-kind=limited -triple x86_64-unknown-linux \
 //  RUN:   -enable-split-dwarf=split -split-dwarf-file %t.o -emit-obj -o %t.o %s
-//  RUN: llvm-objdump -section-headers %t.o | FileCheck --check-prefix=MODE-SPLIT %s
+//  RUN: llvm-readobj -S %t.o | FileCheck --check-prefix=MODE-SPLIT %s
 //  MODE-SPLIT-NOT: .dwo
 
 int main (void) {
diff --git a/test/CodeGen/sse-builtins.c b/test/CodeGen/sse-builtins.c
index e9801487be..9151c93736 100644
--- a/test/CodeGen/sse-builtins.c
+++ b/test/CodeGen/sse-builtins.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 
 #include <immintrin.h>
diff --git a/test/CodeGen/sse2-builtins.c b/test/CodeGen/sse2-builtins.c
index 28ee523ac8..acf4b20dd1 100644
--- a/test/CodeGen/sse2-builtins.c
+++ b/test/CodeGen/sse2-builtins.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s
 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 
 #include <immintrin.h>
@@ -99,25 +100,13 @@ __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
 
 __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_avg_epu8
-  // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
-  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
-  // CHECK: add <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK: lshr <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  // CHECK:trunc <16 x i16> %{{.*}} to <16 x i8>
+  // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_avg_epu8(A, B);
 }
 
 __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_avg_epu16
-  // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
-  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
-  // CHECK: add <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: lshr <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
+  // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_avg_epu16(A, B);
 }
 
diff --git a/test/CodeGen/target-builtin-noerror.c b/test/CodeGen/target-builtin-noerror.c
index 400c5e696a..364eae7ca6 100644
--- a/test/CodeGen/target-builtin-noerror.c
+++ b/test/CodeGen/target-builtin-noerror.c
@@ -98,6 +98,7 @@ void verifycpustrings() {
   (void)__builtin_cpu_is("btver1");
   (void)__builtin_cpu_is("btver2");
   (void)__builtin_cpu_is("cannonlake");
+  (void)__builtin_cpu_is("cascadelake");
   (void)__builtin_cpu_is("core2");
   (void)__builtin_cpu_is("corei7");
   (void)__builtin_cpu_is("goldmont");
@@ -120,4 +121,5 @@ void verifycpustrings() {
   (void)__builtin_cpu_is("tremont");
   (void)__builtin_cpu_is("westmere");
   (void)__builtin_cpu_is("znver1");
+  (void)__builtin_cpu_is("znver2");
 }
diff --git a/test/CodeGen/target-data.c b/test/CodeGen/target-data.c
index 0c2b1e4cff..c7ce89df77 100644
--- a/test/CodeGen/target-data.c
+++ b/test/CodeGen/target-data.c
@@ -96,7 +96,7 @@
 
 // RUN: %clang_cc1 -triple arm-nacl -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=ARM-NACL
-// ARM-NACL: target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S128"
+// ARM-NACL: target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S128"
 
 // RUN: %clang_cc1 -triple mipsel-nacl -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=MIPS-NACL
@@ -152,12 +152,12 @@
 
 // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \
 // RUN: | FileCheck %s -check-prefix=R600SI
-// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
 
 // Test default -target-cpu
 // RUN: %clang_cc1 -triple amdgcn-unknown -o - -emit-llvm %s \
 // RUN: | FileCheck %s -check-prefix=R600SIDefault
-// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
 
 // RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=AARCH64
@@ -165,19 +165,19 @@
 
 // RUN: %clang_cc1 -triple thumb-unknown-gnueabi -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=THUMB
-// THUMB: target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+// THUMB: target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 // RUN: %clang_cc1 -triple arm-unknown-gnueabi -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=ARM
-// ARM: target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+// ARM: target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 // RUN: %clang_cc1 -triple thumb-unknown -o - -emit-llvm -target-abi apcs-gnu \
 // RUN: %s | FileCheck %s -check-prefix=THUMB-GNU
-// THUMB-GNU: target datalayout = "e-m:e-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+// THUMB-GNU: target datalayout = "e-m:e-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
 
 // RUN: %clang_cc1 -triple arm-unknown -o - -emit-llvm -target-abi apcs-gnu \
 // RUN: %s | FileCheck %s -check-prefix=ARM-GNU
-// ARM-GNU: target datalayout = "e-m:e-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+// ARM-GNU: target datalayout = "e-m:e-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
 
 // RUN: %clang_cc1 -triple arc-unknown-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=ARC
diff --git a/test/CodeGen/thinlto-debug-pm.c b/test/CodeGen/thinlto-debug-pm.c
index 2accde1f36..dc3bd33883 100644
--- a/test/CodeGen/thinlto-debug-pm.c
+++ b/test/CodeGen/thinlto-debug-pm.c
@@ -1,10 +1,17 @@
-// Test to ensure -fdebug-pass-manager works when invoking the
-// ThinLTO backend path with the new PM.
+// Test to ensure the opt level is passed down to the ThinLTO backend.
 // REQUIRES: x86-registered-target
 // RUN: %clang_cc1 -o %t.o -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s
 // RUN: llvm-lto -thinlto -o %t %t.o
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s
-// CHECK: Running pass:
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-NEWPM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-NEWPM
+// O2-NEWPM: Running pass: LoopVectorizePass
+// O0-NEWPM-NOT: Running pass: LoopVectorizePass
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -mllvm -debug-pass=Structure 2>&1 | FileCheck %s --check-prefix=O2-OLDPM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -mllvm -debug-pass=Structure 2>&1 | FileCheck %s --check-prefix=O0-OLDPM
+// O2-OLDPM: Loop Vectorization
+// O0-OLDPM-NOT: Loop Vectorization
 
 void foo() {
 }
diff --git a/test/CodeGen/thinlto-distributed-cfi-devirt.ll b/test/CodeGen/thinlto-distributed-cfi-devirt.ll
index 2ecf149b06..0bc23eb773 100644
--- a/test/CodeGen/thinlto-distributed-cfi-devirt.ll
+++ b/test/CodeGen/thinlto-distributed-cfi-devirt.ll
@@ -39,12 +39,12 @@
 ; CHECK-DIS: ^2 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: allOnes, sizeM1BitWidth: 7), wpdResolutions: ((offset: 0, wpdRes: (kind: branchFunnel)), (offset: 8, wpdRes: (kind: singleImpl, singleImplName: "_ZN1A1nEi"))))) ; guid = 7004155349499253778
 
 ; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \
-; RUN:   -emit-obj -fthinlto-index=%t.o.thinlto.bc \
+; RUN:   -emit-obj -fthinlto-index=%t.o.thinlto.bc -O2 \
 ; RUN:   -emit-llvm -o - -x ir %t.o | FileCheck %s --check-prefixes=CHECK-IR
 
 ; Check that backend does not fail generating native code.
 ; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \
-; RUN:   -emit-obj -fthinlto-index=%t.o.thinlto.bc \
+; RUN:   -emit-obj -fthinlto-index=%t.o.thinlto.bc -O2 \
 ; RUN:   -o %t.native.o -x ir %t.o
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/test/CodeGen/thinlto-split-dwarf.c b/test/CodeGen/thinlto-split-dwarf.c
index 2a0d82b34c..4391345ad4 100644
--- a/test/CodeGen/thinlto-split-dwarf.c
+++ b/test/CodeGen/thinlto-split-dwarf.c
@@ -12,8 +12,8 @@
 // RUN:   -emit-obj -fthinlto-index=%t.o.thinlto.bc \
 // RUN:   -o %t.native.o -split-dwarf-file %t.native.dwo -x ir %t.o
 
-// RUN: llvm-readobj -sections %t.native.o | FileCheck --check-prefix=O %s
-// RUN: llvm-readobj -sections %t.native.dwo | FileCheck --check-prefix=DWO %s
+// RUN: llvm-readobj -S %t.native.o | FileCheck --check-prefix=O %s
+// RUN: llvm-readobj -S %t.native.dwo | FileCheck --check-prefix=DWO %s
 
 // O-NOT: .dwo
 // DWO: .dwo
diff --git a/test/CodeGen/ubsan-asan-noreturn.c b/test/CodeGen/ubsan-asan-noreturn.c
new file mode 100644
index 0000000000..516c58469d
--- /dev/null
+++ b/test/CodeGen/ubsan-asan-noreturn.c
@@ -0,0 +1,21 @@
+// Ensure compatiblity of UBSan unreachable with ASan in the presence of
+// noreturn functions.
+// RUN: %clang_cc1 -fsanitize=unreachable,address        -triple x86_64-linux -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=unreachable,kernel-address -triple x86_64-linux -emit-llvm -o - %s | FileCheck %s
+
+void my_longjmp(void) __attribute__((noreturn));
+
+// CHECK-LABEL: define void @calls_noreturn()
+void calls_noreturn() {
+  my_longjmp();
+  // CHECK:      @__asan_handle_no_return{{.*}} !nosanitize
+  // CHECK-NEXT: @my_longjmp(){{[^#]*}}
+  // CHECK:      @__ubsan_handle_builtin_unreachable{{.*}} !nosanitize
+  // CHECK-NEXT: unreachable
+}
+
+// CHECK: declare void @my_longjmp() [[FN_ATTR:#[0-9]+]]
+// CHECK: declare void @__asan_handle_no_return()
+
+// CHECK-LABEL: attributes
+// CHECK-NOT: [[FN_ATTR]] = { {{.*noreturn.*}} }
diff --git a/test/CodeGen/unreachable-ret.c b/test/CodeGen/unreachable-ret.c
new file mode 100644
index 0000000000..386b83e9ef
--- /dev/null
+++ b/test/CodeGen/unreachable-ret.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
+
+extern void abort() __attribute__((noreturn));
+
+void f1() {
+  abort();
+}
+// CHECK-LABEL: define {{.*}}void @f1()
+// CHECK-NEXT: entry:
+// CHECK-NEXT:   call void @abort()
+// CHECK-NEXT:   unreachable
+// CHECK-NEXT: }
+
+void *f2() {
+  abort();
+  return 0;
+}
+// CHECK-LABEL: define {{.*}}i8* @f2()
+// CHECK-NEXT: entry:
+// CHECK-NEXT:   call void @abort()
+// CHECK-NEXT:   unreachable
+// CHECK-NEXT: }
+
diff --git a/test/CodeGen/wasm-import-module.c b/test/CodeGen/wasm-import-module.c
new file mode 100644
index 0000000000..866a3a4599
--- /dev/null
+++ b/test/CodeGen/wasm-import-module.c
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown-wasm -emit-llvm -o - %s | FileCheck %s
+
+void __attribute__((import_module("bar"))) foo(void);
+
+void call(void) {
+  foo();
+}
+
+// CHECK: declare void @foo() [[A:#[0-9]+]]
+
+// CHECK: attributes [[A]] = {{{.*}} "wasm-import-module"="bar" {{.*}}}
diff --git a/test/CodeGen/wasm-import-name.c b/test/CodeGen/wasm-import-name.c
new file mode 100644
index 0000000000..7c3b094b9e
--- /dev/null
+++ b/test/CodeGen/wasm-import-name.c
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown-wasm -emit-llvm -o - %s | FileCheck %s
+
+void __attribute__((import_name("bar"))) foo(void);
+
+void call(void) {
+  foo();
+}
+
+// CHECK: declare void @foo() [[A:#[0-9]+]]
+
+// CHECK: attributes [[A]] = {{{.*}} "wasm-import-name"="bar" {{.*}}}
diff --git a/test/CodeGen/x86-64-inline-asm.c b/test/CodeGen/x86-64-inline-asm.c
index bb46eda633..80ae0a46a2 100644
--- a/test/CodeGen/x86-64-inline-asm.c
+++ b/test/CodeGen/x86-64-inline-asm.c
@@ -1,6 +1,7 @@
 // REQUIRES: x86-registered-target
 // RUN: %clang_cc1 -triple x86_64 %s -S -o /dev/null -DWARN -verify
 // RUN: %clang_cc1 -triple x86_64 %s -S -o /dev/null -Werror -verify
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -S -o - | FileCheck %s
 void f() {
   asm("movaps %xmm3, (%esi, 2)");
 // expected-note@1 {{instantiated into assembly here}}
@@ -15,3 +16,19 @@ static unsigned var[1] = {};
 void g(void) { asm volatile("movd %%xmm0, %0"
                             :
                             : "m"(var)); }
+
+void pr40890(void) {
+  struct s {
+    int a, b;
+  } s;
+  __asm__ __volatile__("\n#define S_A abcd%0\n" : : "n"(&((struct s*)0)->a));
+  __asm__ __volatile__("\n#define S_B abcd%0\n" : : "n"(&((struct s*)0)->b));
+  __asm__ __volatile__("\n#define BEEF abcd%0\n" : : "n"((int*)0xdeadbeeeeeef));
+  __asm__ __volatile__("movabsq %0, %%rax" : : "n"(4624529908474429119));
+
+// CHECK-LABEL: pr40890
+// CHECK: #define S_A abcd$0
+// CHECK: #define S_B abcd$4
+// CHECK: #define BEEF abcd$244837814038255
+// CHECK: movabsq $4624529908474429119, %rax
+}
diff --git a/test/CodeGen/x86-bswap.c b/test/CodeGen/x86-bswap.c
new file mode 100644
index 0000000000..adf8b7846a
--- /dev/null
+++ b/test/CodeGen/x86-bswap.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -emit-llvm -o - | FileCheck %s
+
+#include <x86intrin.h>
+
+int test__bswapd(int X) {
+// CHECK-LABEL: @test__bswapd
+// CHECK: call i32 @llvm.bswap.i32
+  return __bswapd(X);
+}
+
+int test_bswap(int X) {
+// CHECK-LABEL: @test_bswap
+// CHECK: call i32 @llvm.bswap.i32
+  return _bswap(X);
+}
+
+long test__bswapq(long long X) {
+// CHECK-LABEL: @test__bswapq
+// CHECK: call i64 @llvm.bswap.i64
+  return __bswapq(X);
+}
+
+long test_bswap64(long long X) {
+// CHECK-LABEL: @test_bswap64
+// CHECK: call i64 @llvm.bswap.i64
+  return _bswap64(X);
+}
+
+
diff --git a/test/CodeGen/x86-crc-builtins.c b/test/CodeGen/x86-crc-builtins.c
new file mode 100644
index 0000000000..de6869a044
--- /dev/null
+++ b/test/CodeGen/x86-crc-builtins.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK64
+// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <x86intrin.h>
+
+unsigned int test__crc32b(unsigned int CRC, unsigned char V) {
+// CHECK-LABEL: test__crc32b
+// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
+  return __crc32b(CRC, V);
+}
+
+unsigned int test__crc32w(unsigned int CRC, unsigned short V) {
+// CHECK-LABEL: test__crc32w
+// CHECK: call i32 @llvm.x86.sse42.crc32.32.16(i32 %{{.*}}, i16 %{{.*}})
+  return __crc32w(CRC, V);
+}
+
+unsigned int test__crc32d(unsigned int CRC, unsigned int V) {
+// CHECK-LABEL: test__crc32d
+// CHECK: call i32 @llvm.x86.sse42.crc32.32.32(i32 %{{.*}}, i32 %{{.*}})
+  return __crc32d(CRC, V);
+}
+
+#ifdef __x86_64__
+unsigned long long test__crc32q(unsigned long long CRC, unsigned long long V) {
+// CHECK64-LABEL: test__crc32q
+// CHECK64: call i64 @llvm.x86.sse42.crc32.64.64(i64 %{{.*}}, i64 %{{.*}})
+  return __crc32q(CRC, V);
+}
+#endif
diff --git a/test/CodeGen/x86-vec-struct-packing.c b/test/CodeGen/x86-vec-struct-packing.c
new file mode 100644
index 0000000000..01458d131e
--- /dev/null
+++ b/test/CodeGen/x86-vec-struct-packing.c
@@ -0,0 +1,227 @@
+// RUN: %clang_cc1 -ffreestanding -emit-llvm-only  -triple x86_64-windows-coff -fdump-record-layouts %s | FileCheck %s --check-prefix=CHECK-MS
+// RUN: %clang_cc1 -ffreestanding -emit-llvm-only  -triple x86_64-apple-darwin -fdump-record-layouts %s | FileCheck %s --check-prefix=CHECK-NOTMS
+#include <x86intrin.h>
+
+#pragma pack(1)
+
+struct s_m64 {
+  int a;
+  __m64 b;
+};
+typedef struct s_m64 m64;
+
+#if defined(_WIN32)
+static int a1[(sizeof(m64) == 16) - 1];
+#else
+static int a1[(sizeof(m64) == 12) - 1];
+#endif
+
+struct s_m128 {
+  int a;
+  __m128 b;
+};
+typedef struct s_m128 m128;
+
+#if defined(_WIN32)
+static int a1[(sizeof(m128) == 32) - 1];
+#else
+static int a1[(sizeof(m128) == 20) - 1];
+#endif
+
+struct s_m128i {
+  int a;
+  __m128i b;
+};
+typedef struct s_m128i m128i;
+
+#if defined(_WIN32)
+static int a1[(sizeof(m128i) == 32) - 1];
+#else
+static int a1[(sizeof(m128i) == 20) - 1];
+#endif
+
+struct s_m128d {
+  int a;
+  __m128d b;
+};
+typedef struct s_m128d m128d;
+
+#if defined(_WIN32)
+static int a1[(sizeof(m128d) == 32) - 1];
+#else
+static int a1[(sizeof(m128d) == 20) - 1];
+#endif
+
+struct s_m256 {
+  int a;
+  __m256 b;
+};
+typedef struct s_m256 m256;
+
+#if defined(_WIN32)
+static int a1[(sizeof(m256) == 64) - 1];
+#else
+static int a1[(sizeof(m256) == 36) - 1];
+#endif
+
+struct s_m256i {
+  int a;
+  __m256i b;
+};
+typedef struct s_m256i m256i;
+
+#if defined(_WIN32)
+static int a1[(sizeof(m256i) == 64) - 1];
+#else
+static int a1[(sizeof(m256i) == 36) - 1];
+#endif
+
+struct s_m256d {
+  int a;
+  __m256d b;
+};
+typedef struct s_m256d m256d;
+
+#if defined(_WIN32)
+static int a1[(sizeof(m256d) == 64) - 1];
+#else
+static int a1[(sizeof(m256d) == 36) - 1];
+#endif
+
+struct s_m512 {
+  int a;
+  __m512 b;
+};
+typedef struct s_m512 m512;
+
+#if defined(_WIN32)
+static int a1[(sizeof(m512) == 128) - 1];
+#else
+static int a1[(sizeof(m512) == 68) - 1];
+#endif
+
+struct s_m512i {
+  int a;
+  __m512i b;
+};
+typedef struct s_m512i m512i;
+
+#if defined(_WIN32)
+static int a1[(sizeof(m512i) == 128) - 1];
+#else
+static int a1[(sizeof(m512i) == 68) - 1];
+#endif
+
+struct s_m512d {
+  int a;
+  __m512d b;
+};
+typedef struct s_m512d m512d;
+
+#if defined(_WIN32)
+static int a1[(sizeof(m512d) == 128) - 1];
+#else
+static int a1[(sizeof(m512d) == 68) - 1];
+#endif
+
+// CHECK-MS: *** Dumping AST Record Layout
+// CHECK-MS:          0 | struct s_m64
+// CHECK-MS:          0 |   int a
+// CHECK-MS:          8 |   __m64 b
+// CHECK-MS:            | [sizeof=16, align=8]
+// CHECK-MS: *** Dumping AST Record Layout
+// CHECK-MS:          0 | struct s_m128
+// CHECK-MS:          0 |   int a
+// CHECK-MS:         16 |   __m128 b
+// CHECK-MS:            | [sizeof=32, align=16]
+// CHECK-MS: *** Dumping AST Record Layout
+// CHECK-MS:          0 | struct s_m128i
+// CHECK-MS:          0 |   int a
+// CHECK-MS:         16 |   __m128i b
+// CHECK-MS:            | [sizeof=32, align=16]
+// CHECK-MS: *** Dumping AST Record Layout
+// CHECK-MS:          0 | struct s_m128d
+// CHECK-MS:          0 |   int a
+// CHECK-MS:         16 |   __m128d b
+// CHECK-MS:            | [sizeof=32, align=16]
+// CHECK-MS: *** Dumping AST Record Layout
+// CHECK-MS:          0 | struct s_m256
+// CHECK-MS:          0 |   int a
+// CHECK-MS:         32 |   __m256 b
+// CHECK-MS:            | [sizeof=64, align=32]
+// CHECK-MS: *** Dumping AST Record Layout
+// CHECK-MS:          0 | struct s_m256i
+// CHECK-MS:          0 |   int a
+// CHECK-MS:         32 |   __m256i b
+// CHECK-MS:            | [sizeof=64, align=32]
+// CHECK-MS: *** Dumping AST Record Layout
+// CHECK-MS:          0 | struct s_m256d
+// CHECK-MS:          0 |   int a
+// CHECK-MS:         32 |   __m256d b
+// CHECK-MS:            | [sizeof=64, align=32]
+// CHECK-MS: *** Dumping AST Record Layout
+// CHECK-MS:          0 | struct s_m512
+// CHECK-MS:          0 |   int a
+// CHECK-MS:         64 |   __m512 b
+// CHECK-MS:            | [sizeof=128, align=64]
+// CHECK-MS: *** Dumping AST Record Layout
+// CHECK-MS:          0 | struct s_m512i
+// CHECK-MS:          0 |   int a
+// CHECK-MS:         64 |   __m512i b
+// CHECK-MS:            | [sizeof=128, align=64]
+// CHECK-MS: *** Dumping AST Record Layout
+// CHECK-MS:          0 | struct s_m512d
+// CHECK-MS:          0 |   int a
+// CHECK-MS:         64 |   __m512d b
+// CHECK-MS:            | [sizeof=128, align=64]
+
+// CHECK-NOTMS: *** Dumping AST Record Layout
+// CHECK-NOTMS:          0 | struct s_m64
+// CHECK-NOTMS:          0 |   int a
+// CHECK-NOTMS:          4 |   __m64 b
+// CHECK-NOTMS:            | [sizeof=12, align=1]
+// CHECK-NOTMS: *** Dumping AST Record Layout
+// CHECK-NOTMS:          0 | struct s_m128
+// CHECK-NOTMS:          0 |   int a
+// CHECK-NOTMS:          4 |   __m128 b
+// CHECK-NOTMS:            | [sizeof=20, align=1]
+// CHECK-NOTMS: *** Dumping AST Record Layout
+// CHECK-NOTMS:          0 | struct s_m128i
+// CHECK-NOTMS:          0 |   int a
+// CHECK-NOTMS:          4 |   __m128i b
+// CHECK-NOTMS:            | [sizeof=20, align=1]
+// CHECK-NOTMS: *** Dumping AST Record Layout
+// CHECK-NOTMS:          0 | struct s_m128d
+// CHECK-NOTMS:          0 |   int a
+// CHECK-NOTMS:          4 |   __m128d b
+// CHECK-NOTMS:            | [sizeof=20, align=1]
+// CHECK-NOTMS: *** Dumping AST Record Layout
+// CHECK-NOTMS:          0 | struct s_m256
+// CHECK-NOTMS:          0 |   int a
+// CHECK-NOTMS:          4 |   __m256 b
+// CHECK-NOTMS:            | [sizeof=36, align=1]
+// CHECK-NOTMS: *** Dumping AST Record Layout
+// CHECK-NOTMS:          0 | struct s_m256i
+// CHECK-NOTMS:          0 |   int a
+// CHECK-NOTMS:          4 |   __m256i b
+// CHECK-NOTMS:            | [sizeof=36, align=1]
+// CHECK-NOTMS: *** Dumping AST Record Layout
+// CHECK-NOTMS:          0 | struct s_m256d
+// CHECK-NOTMS:          0 |   int a
+// CHECK-NOTMS:          4 |   __m256d b
+// CHECK-NOTMS:            | [sizeof=36, align=1]
+// CHECK-NOTMS: *** Dumping AST Record Layout
+// CHECK-NOTMS:          0 | struct s_m512
+// CHECK-NOTMS:          0 |   int a
+// CHECK-NOTMS:          4 |   __m512 b
+// CHECK-NOTMS:            | [sizeof=68, align=1]
+// CHECK-NOTMS: *** Dumping AST Record Layout
+// CHECK-NOTMS:          0 | struct s_m512i
+// CHECK-NOTMS:          0 |   int a
+// CHECK-NOTMS:          4 |   __m512i b
+// CHECK-NOTMS:            | [sizeof=68, align=1]
+// CHECK-NOTMS: *** Dumping AST Record Layout
+// CHECK-NOTMS:          0 | struct s_m512d
+// CHECK-NOTMS:          0 |   int a
+// CHECK-NOTMS:          4 |   __m512d b
+// CHECK-NOTMS:            | [sizeof=68, align=1]
diff --git a/test/CodeGen/x86_32-xsave.c b/test/CodeGen/x86_32-xsave.c
index f5d84e2d92..e1acdff124 100644
--- a/test/CodeGen/x86_32-xsave.c
+++ b/test/CodeGen/x86_32-xsave.c
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=i686-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVE
 // RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=i686-unknown-unknown -target-feature +xsave -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVE
 
+// RUN: %clang_cc1 %s -DTEST_XGETBV -O0 -triple=i686-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XGETBV
+// RUN: %clang_cc1 %s -DTEST_XSETBV -O0 -triple=i686-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSETBV
+
 // RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVEOPT
 // RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVEOPT
 
@@ -10,9 +13,15 @@
 // RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaves -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVES
 // RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaves -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVES
 
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+#include <x86intrin.h>
+
 void test() {
-  unsigned long long tmp_ULLi = 0;
-  void*              tmp_vp = 0;
+  unsigned long long tmp_ULLi;
+  unsigned int       tmp_Ui;
+  void*              tmp_vp;
+  tmp_ULLi = 0; tmp_Ui = 0; tmp_vp = 0;
 
 #ifdef TEST_XSAVE
 // XSAVE: [[tmp_vp_1:%[0-9a-zA-Z]+]] = load i8*, i8** %tmp_vp, align 4
@@ -30,6 +39,12 @@ void test() {
 // XSAVE: [[low32_3:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_3]] to i32
 // XSAVE: call void @llvm.x86.xrstor(i8* [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]])
   (void)__builtin_ia32_xrstor(tmp_vp, tmp_ULLi);
+  
+// XSAVE: call void @llvm.x86.xsave
+  (void)_xsave(tmp_vp, tmp_ULLi);
+  
+// XSAVE: call void @llvm.x86.xrstor
+  (void)_xrstor(tmp_vp, tmp_ULLi);
 #endif
 
 #ifdef TEST_XSAVEOPT
@@ -40,6 +55,9 @@ void test() {
 // XSAVEOPT: [[low32_1:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_1]] to i32
 // XSAVEOPT: call void @llvm.x86.xsaveopt(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
   (void)__builtin_ia32_xsaveopt(tmp_vp, tmp_ULLi);
+  
+// XSAVEOPT: call void @llvm.x86.xsaveopt
+  (void)_xsaveopt(tmp_vp, tmp_ULLi);
 #endif
 
 #ifdef TEST_XSAVEC
@@ -50,6 +68,9 @@ void test() {
 // XSAVEC: [[low32_1:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_1]] to i32
 // XSAVEC: call void @llvm.x86.xsavec(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
   (void)__builtin_ia32_xsavec(tmp_vp, tmp_ULLi);
+ 
+// XSAVEC: call void @llvm.x86.xsavec 
+  (void)_xsavec(tmp_vp, tmp_ULLi);
 #endif
 
 #ifdef TEST_XSAVES
@@ -68,5 +89,34 @@ void test() {
 // XSAVES: [[low32_3:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_3]] to i32
 // XSAVES: call void @llvm.x86.xrstors(i8* [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]])
   (void)__builtin_ia32_xrstors(tmp_vp, tmp_ULLi);
+  
+// XSAVES: call void @llvm.x86.xsaves
+  (void)_xsaves(tmp_vp, tmp_ULLi); 
+
+// XSAVES: call void @llvm.x86.xrstors
+  (void)_xrstors(tmp_vp, tmp_ULLi);
+#endif
+
+#ifdef TEST_XGETBV
+// XGETBV: [[tmp_Ui:%[0-9a-zA-z]+]] = load i32, i32* %tmp_Ui, align 4
+// XGETBV: call i64 @llvm.x86.xgetbv(i32 [[tmp_Ui]])
+  tmp_ULLi = __builtin_ia32_xgetbv(tmp_Ui);
+  
+// XGETBV: call i64 @llvm.x86.xgetbv
+  tmp_ULLi = _xgetbv(tmp_Ui);
+#endif
+
+#ifdef TEST_XSETBV
+// XSETBV: [[tmp_Ui:%[0-9a-zA-z]+]] = load i32, i32* %tmp_Ui, align 4
+// XSETBV: [[tmp_ULLi_3:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSETBV: [[high64_3:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_3]], 32
+// XSETBV: [[high32_3:%[0-9a-zA-z]+]] = trunc i64 [[high64_3]] to i32
+// XSETBV: [[low32_3:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_3]] to i32
+// XSETBV: call void @llvm.x86.xsetbv(i32 [[tmp_Ui]], i32 [[high32_3]], i32 [[low32_3]])
+  (void)__builtin_ia32_xsetbv(tmp_Ui, tmp_ULLi);
+  
+  // XSETBV: call void @llvm.x86.xsetbv
+  (void)_xsetbv(tmp_Ui, tmp_ULLi);
 #endif
+
 }
diff --git a/test/CodeGen/x86_64-xsave.c b/test/CodeGen/x86_64-xsave.c
index beb775c0e4..cfc33cb067 100644
--- a/test/CodeGen/x86_64-xsave.c
+++ b/test/CodeGen/x86_64-xsave.c
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVE
 // RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVE
 
+// RUN: %clang_cc1 %s -DTEST_XGETBV -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XGETBV
+// RUN: %clang_cc1 %s -DTEST_XSETBV -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSETBV
+
 // RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVEOPT
 // RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVEOPT
 
@@ -10,9 +13,16 @@
 // RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaves -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVES
 // RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaves -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVES
 
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+#include <x86intrin.h>
+
+
 void test() {
-  unsigned long long tmp_ULLi = 0;
-  void*              tmp_vp = 0;
+  unsigned long long tmp_ULLi;
+  unsigned int       tmp_Ui;
+  void*              tmp_vp;
+  tmp_ULLi = 0; tmp_Ui = 0; tmp_vp = 0;
 
 #ifdef TEST_XSAVE
 // XSAVE: [[tmp_vp_1:%[0-9a-zA-Z]+]] = load i8*, i8** %tmp_vp, align 8
@@ -46,6 +56,18 @@ void test() {
 // XSAVE: [[low32_4:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_4]] to i32
 // XSAVE: call void @llvm.x86.xrstor64(i8* [[tmp_vp_4]], i32 [[high32_4]], i32 [[low32_4]])
   (void)__builtin_ia32_xrstor64(tmp_vp, tmp_ULLi);
+  
+// XSAVE: call void @llvm.x86.xsave
+  (void)_xsave(tmp_vp, tmp_ULLi);
+  
+// XSAVE: call void @llvm.x86.xsave64
+  (void)_xsave64(tmp_vp, tmp_ULLi);
+  
+// XSAVE: call void @llvm.x86.xrstor
+  (void)_xrstor(tmp_vp, tmp_ULLi);
+  
+// XSAVE: call void @llvm.x86.xrstor64
+  (void)_xrstor64(tmp_vp, tmp_ULLi);
 #endif
 
 #ifdef TEST_XSAVEOPT
@@ -64,6 +86,12 @@ void test() {
 // XSAVEOPT: [[low32_2:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_2]] to i32
 // XSAVEOPT: call void @llvm.x86.xsaveopt64(i8* [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])
   (void)__builtin_ia32_xsaveopt64(tmp_vp, tmp_ULLi);
+  
+// XSAVEOPT: call void @llvm.x86.xsaveopt
+  (void)_xsaveopt(tmp_vp, tmp_ULLi);
+  
+// XSAVEOPT: call void @llvm.x86.xsaveopt64
+  (void)_xsaveopt64(tmp_vp, tmp_ULLi);
 #endif
 
 #ifdef TEST_XSAVEC
@@ -82,6 +110,12 @@ void test() {
 // XSAVEC: [[low32_2:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_2]] to i32
 // XSAVEC: call void @llvm.x86.xsavec64(i8* [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])
   (void)__builtin_ia32_xsavec64(tmp_vp, tmp_ULLi);
+  
+// XSAVEC: call void @llvm.x86.xsavec 
+  (void)_xsavec(tmp_vp, tmp_ULLi);
+  
+// XSAVEC: call void @llvm.x86.xsavec64
+  (void)_xsavec64(tmp_vp, tmp_ULLi);
 #endif
 
 #ifdef TEST_XSAVES
@@ -116,5 +150,39 @@ void test() {
 // XSAVES: [[low32_4:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_4]] to i32
 // XSAVES: call void @llvm.x86.xrstors64(i8* [[tmp_vp_4]], i32 [[high32_4]], i32 [[low32_4]])
   (void)__builtin_ia32_xrstors64(tmp_vp, tmp_ULLi);
+  
+// XSAVES: call void @llvm.x86.xsaves
+  (void)_xsaves(tmp_vp, tmp_ULLi); 
+  
+// XSAVES: call void @llvm.x86.xsaves64
+  (void)_xsaves64(tmp_vp, tmp_ULLi); 
+
+// XSAVES: call void @llvm.x86.xrstors
+  (void)_xrstors(tmp_vp, tmp_ULLi);
+  
+// XSAVES: call void @llvm.x86.xrstors64
+  (void)_xrstors64(tmp_vp, tmp_ULLi);
+#endif
+
+#ifdef TEST_XGETBV
+// XGETBV: [[tmp_Ui:%[0-9a-zA-z]+]] = load i32, i32* %tmp_Ui, align 4
+// XGETBV: call i64 @llvm.x86.xgetbv(i32 [[tmp_Ui]])
+  tmp_ULLi = __builtin_ia32_xgetbv(tmp_Ui);
+  
+// XGETBV: call i64 @llvm.x86.xgetbv
+  tmp_ULLi = _xgetbv(tmp_Ui);
+#endif
+
+#ifdef TEST_XSETBV
+// XSETBV: [[tmp_Ui:%[0-9a-zA-z]+]] = load i32, i32* %tmp_Ui, align 4
+// XSETBV: [[tmp_ULLi_3:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSETBV: [[high64_3:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_3]], 32
+// XSETBV: [[high32_3:%[0-9a-zA-z]+]] = trunc i64 [[high64_3]] to i32
+// XSETBV: [[low32_3:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_3]] to i32
+// XSETBV: call void @llvm.x86.xsetbv(i32 [[tmp_Ui]], i32 [[high32_3]], i32 [[low32_3]])
+  (void)__builtin_ia32_xsetbv(tmp_Ui, tmp_ULLi);
+  
+  // XSETBV: call void @llvm.x86.xsetbv
+  (void)_xsetbv(tmp_Ui, tmp_ULLi);
 #endif
 }
diff --git a/test/CodeGen/xop-builtins-cmp.c b/test/CodeGen/xop-builtins-cmp.c
index a805352ad3..4fbe6d0e70 100644
--- a/test/CodeGen/xop-builtins-cmp.c
+++ b/test/CodeGen/xop-builtins-cmp.c
@@ -8,49 +8,57 @@
 
 __m128i test_mm_comlt_epu8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comlt_epu8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 0)
+  // CHECK: icmp ult <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_comlt_epu8(a, b);
 }
 
 __m128i test_mm_comlt_epu16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comlt_epu16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 0)
+  // CHECK: icmp ult <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_comlt_epu16(a, b);
 }
 
 __m128i test_mm_comlt_epu32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comlt_epu32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 0)
+  // CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_comlt_epu32(a, b);
 }
 
 __m128i test_mm_comlt_epu64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comlt_epu64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 0)
+  // CHECK: icmp ult <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_comlt_epu64(a, b);
 }
 
 __m128i test_mm_comlt_epi8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comlt_epi8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 0)
+  // CHECK: icmp slt <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_comlt_epi8(a, b);
 }
 
 __m128i test_mm_comlt_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comlt_epi16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 0)
+  // CHECK: icmp slt <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_comlt_epi16(a, b);
 }
 
 __m128i test_mm_comlt_epi32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comlt_epi32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 0)
+  // CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_comlt_epi32(a, b);
 }
 
 __m128i test_mm_comlt_epi64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comlt_epi64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 0)
+  // CHECK: icmp slt <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_comlt_epi64(a, b);
 }
 
@@ -58,49 +66,57 @@ __m128i test_mm_comlt_epi64(__m128i a, __m128i b) {
 
 __m128i test_mm_comle_epu8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comle_epu8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 1)
+  // CHECK: icmp ule <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_comle_epu8(a, b);
 }
 
 __m128i test_mm_comle_epu16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comle_epu16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 1)
+  // CHECK: icmp ule <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_comle_epu16(a, b);
 }
 
 __m128i test_mm_comle_epu32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comle_epu32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 1)
+  // CHECK: icmp ule <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_comle_epu32(a, b);
 }
 
 __m128i test_mm_comle_epu64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comle_epu64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 1)
+  // CHECK: icmp ule <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_comle_epu64(a, b);
 }
 
 __m128i test_mm_comle_epi8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comle_epi8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 1)
+  // CHECK: icmp sle <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_comle_epi8(a, b);
 }
 
 __m128i test_mm_comle_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comle_epi16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 1)
+  // CHECK: icmp sle <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_comle_epi16(a, b);
 }
 
 __m128i test_mm_comle_epi32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comle_epi32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 1)
+  // CHECK: icmp sle <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_comle_epi32(a, b);
 }
 
 __m128i test_mm_comle_epi64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comle_epi64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 1)
+  // CHECK: icmp sle <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_comle_epi64(a, b);
 }
 
@@ -108,49 +124,57 @@ __m128i test_mm_comle_epi64(__m128i a, __m128i b) {
 
 __m128i test_mm_comgt_epu8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comgt_epu8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 2)
+  // CHECK: icmp ugt <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_comgt_epu8(a, b);
 }
 
 __m128i test_mm_comgt_epu16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comgt_epu16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 2)
+  // CHECK: icmp ugt <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_comgt_epu16(a, b);
 }
 
 __m128i test_mm_comgt_epu32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comgt_epu32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 2)
+  // CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_comgt_epu32(a, b);
 }
 
 __m128i test_mm_comgt_epu64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comgt_epu64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2)
+  // CHECK: icmp ugt <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_comgt_epu64(a, b);
 }
 
 __m128i test_mm_comgt_epi8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comgt_epi8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 2)
+  // CHECK: icmp sgt <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_comgt_epi8(a, b);
 }
 
 __m128i test_mm_comgt_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comgt_epi16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 2)
+  // CHECK: icmp sgt <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_comgt_epi16(a, b);
 }
 
 __m128i test_mm_comgt_epi32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comgt_epi32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 2)
+  // CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_comgt_epi32(a, b);
 }
 
 __m128i test_mm_comgt_epi64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comgt_epi64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2)
+  // CHECK: icmp sgt <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_comgt_epi64(a, b);
 }
 
@@ -158,49 +182,57 @@ __m128i test_mm_comgt_epi64(__m128i a, __m128i b) {
 
 __m128i test_mm_comge_epu8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comge_epu8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 3)
+  // CHECK: icmp uge <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_comge_epu8(a, b);
 }
 
 __m128i test_mm_comge_epu16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comge_epu16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 3)
+  // CHECK: icmp uge <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_comge_epu16(a, b);
 }
 
 __m128i test_mm_comge_epu32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comge_epu32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 3)
+  // CHECK: icmp uge <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_comge_epu32(a, b);
 }
 
 __m128i test_mm_comge_epu64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comge_epu64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 3)
+  // CHECK: icmp uge <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_comge_epu64(a, b);
 }
 
 __m128i test_mm_comge_epi8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comge_epi8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 3)
+  // CHECK: icmp sge <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_comge_epi8(a, b);
 }
 
 __m128i test_mm_comge_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comge_epi16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 3)
+  // CHECK: icmp sge <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_comge_epi16(a, b);
 }
 
 __m128i test_mm_comge_epi32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comge_epi32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 3)
+  // CHECK: icmp sge <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_comge_epi32(a, b);
 }
 
 __m128i test_mm_comge_epi64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comge_epi64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 3)
+  // CHECK: icmp sge <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_comge_epi64(a, b);
 }
 
@@ -208,49 +240,57 @@ __m128i test_mm_comge_epi64(__m128i a, __m128i b) {
 
 __m128i test_mm_comeq_epu8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comeq_epu8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 4)
+  // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_comeq_epu8(a, b);
 }
 
 __m128i test_mm_comeq_epu16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comeq_epu16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 4)
+  // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_comeq_epu16(a, b);
 }
 
 __m128i test_mm_comeq_epu32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comeq_epu32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 4)
+  // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_comeq_epu32(a, b);
 }
 
 __m128i test_mm_comeq_epu64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comeq_epu64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 4)
+  // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_comeq_epu64(a, b);
 }
 
 __m128i test_mm_comeq_epi8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comeq_epi8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 4)
+  // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_comeq_epi8(a, b);
 }
 
 __m128i test_mm_comeq_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comeq_epi16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 4)
+  // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_comeq_epi16(a, b);
 }
 
 __m128i test_mm_comeq_epi32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comeq_epi32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 4)
+  // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_comeq_epi32(a, b);
 }
 
 __m128i test_mm_comeq_epi64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comeq_epi64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 4)
+  // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_comeq_epi64(a, b);
 }
 
@@ -258,49 +298,57 @@ __m128i test_mm_comeq_epi64(__m128i a, __m128i b) {
 
 __m128i test_mm_comneq_epu8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comneq_epu8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 5)
+  // CHECK: icmp ne <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_comneq_epu8(a, b);
 }
 
 __m128i test_mm_comneq_epu16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comneq_epu16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 5)
+  // CHECK: icmp ne <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_comneq_epu16(a, b);
 }
 
 __m128i test_mm_comneq_epu32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comneq_epu32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 5)
+  // CHECK: icmp ne <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_comneq_epu32(a, b);
 }
 
 __m128i test_mm_comneq_epu64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comneq_epu64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 5)
+  // CHECK: icmp ne <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_comneq_epu64(a, b);
 }
 
 __m128i test_mm_comneq_epi8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comneq_epi8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 5)
+  // CHECK: icmp ne <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_comneq_epi8(a, b);
 }
 
 __m128i test_mm_comneq_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comneq_epi16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 5)
+  // CHECK: icmp ne <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_comneq_epi16(a, b);
 }
 
 __m128i test_mm_comneq_epi32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comneq_epi32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 5)
+  // CHECK: icmp ne <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_comneq_epi32(a, b);
 }
 
 __m128i test_mm_comneq_epi64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comneq_epi64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 5)
+  // CHECK: icmp ne <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_comneq_epi64(a, b);
 }
 
@@ -308,49 +356,49 @@ __m128i test_mm_comneq_epi64(__m128i a, __m128i b) {
 
 __m128i test_mm_comfalse_epu8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comfalse_epu8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 6)
+  // CHECK: ret <2 x i64> zeroinitializer
   return _mm_comfalse_epu8(a, b);
 }
 
 __m128i test_mm_comfalse_epu16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comfalse_epu16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 6)
+  // CHECK: ret <2 x i64> zeroinitializer
   return _mm_comfalse_epu16(a, b);
 }
 
 __m128i test_mm_comfalse_epu32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comfalse_epu32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 6)
+  // CHECK: ret <2 x i64> zeroinitializer
   return _mm_comfalse_epu32(a, b);
 }
 
 __m128i test_mm_comfalse_epu64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comfalse_epu64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 6)
+  // CHECK: ret <2 x i64> zeroinitializer
   return _mm_comfalse_epu64(a, b);
 }
 
 __m128i test_mm_comfalse_epi8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comfalse_epi8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 6)
+  // CHECK: ret <2 x i64> zeroinitializer
   return _mm_comfalse_epi8(a, b);
 }
 
 __m128i test_mm_comfalse_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comfalse_epi16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 6)
+  // CHECK: ret <2 x i64> zeroinitializer
   return _mm_comfalse_epi16(a, b);
 }
 
 __m128i test_mm_comfalse_epi32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comfalse_epi32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 6)
+  // CHECK: ret <2 x i64> zeroinitializer
   return _mm_comfalse_epi32(a, b);
 }
 
 __m128i test_mm_comfalse_epi64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comfalse_epi64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 6)
+  // CHECK: ret <2 x i64> zeroinitializer
   return _mm_comfalse_epi64(a, b);
 }
 
@@ -358,48 +406,48 @@ __m128i test_mm_comfalse_epi64(__m128i a, __m128i b) {
 
 __m128i test_mm_comtrue_epu8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comtrue_epu8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 7)
+  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
   return _mm_comtrue_epu8(a, b);
 }
 
 __m128i test_mm_comtrue_epu16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comtrue_epu16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 7)
+  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
   return _mm_comtrue_epu16(a, b);
 }
 
 __m128i test_mm_comtrue_epu32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comtrue_epu32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 7)
+  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
   return _mm_comtrue_epu32(a, b);
 }
 
 __m128i test_mm_comtrue_epu64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comtrue_epu64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 7)
+  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
   return _mm_comtrue_epu64(a, b);
 }
 
 __m128i test_mm_comtrue_epi8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comtrue_epi8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 7)
+  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
   return _mm_comtrue_epi8(a, b);
 }
 
 __m128i test_mm_comtrue_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comtrue_epi16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 7)
+  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
   return _mm_comtrue_epi16(a, b);
 }
 
 __m128i test_mm_comtrue_epi32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comtrue_epi32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 7)
+  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
   return _mm_comtrue_epi32(a, b);
 }
 
 __m128i test_mm_comtrue_epi64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_comtrue_epi64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 7)
+  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
   return _mm_comtrue_epi64(a, b);
 }
diff --git a/test/CodeGen/xop-builtins.c b/test/CodeGen/xop-builtins.c
index e6a09007f7..01d77ce056 100644
--- a/test/CodeGen/xop-builtins.c
+++ b/test/CodeGen/xop-builtins.c
@@ -290,49 +290,57 @@ __m128i test_mm_sha_epi64(__m128i a, __m128i b) {
 
 __m128i test_mm_com_epu8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_com_epu8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 0)
+  // CHECK: icmp ult <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_com_epu8(a, b, 0);
 }
 
 __m128i test_mm_com_epu16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_com_epu16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 0)
+  // CHECK: icmp ult <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_com_epu16(a, b, 0);
 }
 
 __m128i test_mm_com_epu32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_com_epu32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 0)
+  // CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_com_epu32(a, b, 0);
 }
 
 __m128i test_mm_com_epu64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_com_epu64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 0)
+  // CHECK: icmp ult <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_com_epu64(a, b, 0);
 }
 
 __m128i test_mm_com_epi8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_com_epi8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 0)
+  // CHECK: icmp slt <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_com_epi8(a, b, 0);
 }
 
 __m128i test_mm_com_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_com_epi16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 0)
+  // CHECK: icmp slt <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_com_epi16(a, b, 0);
 }
 
 __m128i test_mm_com_epi32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_com_epi32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 0)
+  // CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_com_epi32(a, b, 0);
 }
 
 __m128i test_mm_com_epi64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_com_epi64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 0)
+  // CHECK: icmp slt <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_com_epi64(a, b, 0);
 }