Recommit r343498 "[X86] Improve test instruction shrinking when the sign flag is used and the output of the and is truncated."

This includes a fix to prevent i16 compares with i32/i64 ands from being shrunk if bit 15 of the and is set and the sign bit is used. Original commit message: Currently we skip looking through truncates if the sign flag is used. But that's overly restrictive. It's safe to look through the truncate as long as we ensure one of the 3 things when we shrink. Either the MSB of the mask at the shrunken size isn't set. If the mask bit is set then either the shrunk size needs to be equal to the compare size or the sign There are still missed opportunities to shrink a load and fold it in here. This will be fixed in a future patch.
author: Craig Topper <craig.topper@intel.com> 2018-10-01 21:35:26 +0000
committer: Craig Topper <craig.topper@intel.com> 2018-10-01 21:35:26 +0000
commit: b003e46adc581eeaae6315e812221d2f2244bda2 (patch)
tree: e3c1f3edf668cc500f6c4b0bcf90239efdd7e044
parent: d0011fbe859e99b26e89402ba1bc1c719e150011 (diff)
3 files changed, 41 insertions, 41 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 4909d18d967..d9c92ecf6c3 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -3392,8 +3392,11 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     SDValue N0 = Node->getOperand(0);
     SDValue N1 = Node->getOperand(1);
 
-    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
-        hasNoSignedComparisonUses(Node))
+    // Save the original VT of the compare.
+    MVT CmpVT = N0.getSimpleValueType();
+
+    // We can peek through truncates, but we need to be careful below.
+    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
       N0 = N0.getOperand(0);
 
     // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
@@ -3411,14 +3414,21 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
       int SubRegOp;
       unsigned Op;
 
+      // For each of these checks we need to be careful if the sign flag is
+      // being used. It is only safe to use the sign flag in two conditions,
+      // either the sign bit in the shrunken mask is zero or the final test
+      // size is equal to the original compare size.
+
       if (isUInt<8>(Mask) &&
-          (!(Mask & 0x80) || hasNoSignedComparisonUses(Node))) {
+          (!(Mask & 0x80) || CmpVT == MVT::i8 ||
+           hasNoSignedComparisonUses(Node))) {
         // For example, convert "testl %eax, $8" to "testb %al, $8"
         VT = MVT::i8;
         SubRegOp = X86::sub_8bit;
         Op = X86::TEST8ri;
       } else if (OptForMinSize && isUInt<16>(Mask) &&
-                 (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
+                 (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
+                  hasNoSignedComparisonUses(Node))) {
         // For example, "testl %eax, $32776" to "testw %ax, $32776".
         // NOTE: We only want to form TESTW instructions if optimizing for
         // min size. Otherwise we only save one byte and possibly get a length
@@ -3427,7 +3437,11 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
         SubRegOp = X86::sub_16bit;
         Op = X86::TEST16ri;
       } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
-                 (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) {
+                 ((!(Mask & 0x80000000) &&
+                   // Without minsize 16-bit Cmps can get here so we need to
+                   // be sure we calculate the correct sign flag if needed.
+                   (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
+                  CmpVT == MVT::i32 || hasNoSignedComparisonUses(Node))) {
         // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
         // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
         // Otherwize, we find ourselves in a position where we have to do
@@ -3441,6 +3455,8 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
         break;
       }
 
+      // FIXME: We should be able to fold loads here.
+
       SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
       SDValue Reg = N0.getOperand(0);
 
diff --git a/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
index 9bbd86ca646..4f6f0c9201b 100644
--- a/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
+++ b/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
@@ -12,8 +12,7 @@ define i32 @main() nounwind {
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpq {{.*}}(%rip), %rax
 ; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $150, %eax
-; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    testb $-106, %al
 ; CHECK-NEXT:    jle .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %if.then
 ; CHECK-NEXT:    movl $1, {{.*}}(%rip)
diff --git a/llvm/test/CodeGen/X86/test-shrink.ll b/llvm/test/CodeGen/X86/test-shrink.ll
index c226a996ca2..ea68d38dcf1 100644
--- a/llvm/test/CodeGen/X86/test-shrink.ll
+++ b/llvm/test/CodeGen/X86/test-shrink.ll
@@ -578,8 +578,7 @@ no:
 define void @and16_trunc_8_sign(i16 %x) nounwind {
 ; CHECK-LINUX64-LABEL: and16_trunc_8_sign:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    andl $128, %edi
-; CHECK-LINUX64-NEXT:    testb %dil, %dil
+; CHECK-LINUX64-NEXT:    testb $-128, %dil
 ; CHECK-LINUX64-NEXT:    jg .LBB13_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
@@ -592,8 +591,7 @@ define void @and16_trunc_8_sign(i16 %x) nounwind {
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
 ; CHECK-WIN32-64-NEXT:    # kill: def $cx killed $cx def $ecx
-; CHECK-WIN32-64-NEXT:    andl $128, %ecx
-; CHECK-WIN32-64-NEXT:    testb %cl, %cl
+; CHECK-WIN32-64-NEXT:    testb $-128, %cl
 ; CHECK-WIN32-64-NEXT:    jg .LBB13_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
@@ -604,8 +602,7 @@ define void @and16_trunc_8_sign(i16 %x) nounwind {
 ; CHECK-X86-LABEL: and16_trunc_8_sign:
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    andl $128, %eax
-; CHECK-X86-NEXT:    testb %al, %al
+; CHECK-X86-NEXT:    testb $-128, %al
 ; CHECK-X86-NEXT:    jg .LBB13_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
@@ -626,8 +623,7 @@ no:
 define void @and32_trunc_8_sign(i32 %x) nounwind {
 ; CHECK-LINUX64-LABEL: and32_trunc_8_sign:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    andl $128, %edi
-; CHECK-LINUX64-NEXT:    testb %dil, %dil
+; CHECK-LINUX64-NEXT:    testb $-128, %dil
 ; CHECK-LINUX64-NEXT:    jg .LBB14_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
@@ -639,8 +635,7 @@ define void @and32_trunc_8_sign(i32 %x) nounwind {
 ; CHECK-WIN32-64-LABEL: and32_trunc_8_sign:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    andl $128, %ecx
-; CHECK-WIN32-64-NEXT:    testb %cl, %cl
+; CHECK-WIN32-64-NEXT:    testb $-128, %cl
 ; CHECK-WIN32-64-NEXT:    jg .LBB14_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
@@ -650,9 +645,8 @@ define void @and32_trunc_8_sign(i32 %x) nounwind {
 ;
 ; CHECK-X86-LABEL: and32_trunc_8_sign:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    movl $128, %eax
-; CHECK-X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    testb %al, %al
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    testb $-128, %al
 ; CHECK-X86-NEXT:    jg .LBB14_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
@@ -673,8 +667,7 @@ no:
 define void @and64_trunc_8_sign(i64 %x) nounwind {
 ; CHECK-LINUX64-LABEL: and64_trunc_8_sign:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    andl $128, %edi
-; CHECK-LINUX64-NEXT:    testb %dil, %dil
+; CHECK-LINUX64-NEXT:    testb $-128, %dil
 ; CHECK-LINUX64-NEXT:    jg .LBB15_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
@@ -686,8 +679,7 @@ define void @and64_trunc_8_sign(i64 %x) nounwind {
 ; CHECK-WIN32-64-LABEL: and64_trunc_8_sign:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    andl $128, %ecx
-; CHECK-WIN32-64-NEXT:    testb %cl, %cl
+; CHECK-WIN32-64-NEXT:    testb $-128, %cl
 ; CHECK-WIN32-64-NEXT:    jg .LBB15_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
@@ -697,9 +689,8 @@ define void @and64_trunc_8_sign(i64 %x) nounwind {
 ;
 ; CHECK-X86-LABEL: and64_trunc_8_sign:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    movl $128, %eax
-; CHECK-X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    testb %al, %al
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    testb $-128, %al
 ; CHECK-X86-NEXT:    jg .LBB15_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
@@ -767,8 +758,7 @@ no:
 define void @and32_trunc_16_sign_minsize(i32 %x) minsize nounwind {
 ; CHECK-LINUX64-LABEL: and32_trunc_16_sign_minsize:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    andl $32768, %edi # imm = 0x8000
-; CHECK-LINUX64-NEXT:    testw %di, %di
+; CHECK-LINUX64-NEXT:    testw $-32768, %di # imm = 0x8000
 ; CHECK-LINUX64-NEXT:    jg .LBB17_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
@@ -780,8 +770,7 @@ define void @and32_trunc_16_sign_minsize(i32 %x) minsize nounwind {
 ; CHECK-WIN32-64-LABEL: and32_trunc_16_sign_minsize:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    andl $32768, %ecx # imm = 0x8000
-; CHECK-WIN32-64-NEXT:    testw %cx, %cx
+; CHECK-WIN32-64-NEXT:    testw $-32768, %cx # imm = 0x8000
 ; CHECK-WIN32-64-NEXT:    jg .LBB17_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
@@ -791,9 +780,8 @@ define void @and32_trunc_16_sign_minsize(i32 %x) minsize nounwind {
 ;
 ; CHECK-X86-LABEL: and32_trunc_16_sign_minsize:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    movl $32768, %eax # imm = 0x8000
-; CHECK-X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    testw %ax, %ax
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    testw $-32768, %ax # imm = 0x8000
 ; CHECK-X86-NEXT:    jg .LBB17_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
@@ -861,8 +849,7 @@ no:
 define void @and64_trunc_16_sign_minsize(i64 %x) minsize nounwind {
 ; CHECK-LINUX64-LABEL: and64_trunc_16_sign_minsize:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    andl $32768, %edi # imm = 0x8000
-; CHECK-LINUX64-NEXT:    testw %di, %di
+; CHECK-LINUX64-NEXT:    testw $-32768, %di # imm = 0x8000
 ; CHECK-LINUX64-NEXT:    jg .LBB19_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
@@ -874,8 +861,7 @@ define void @and64_trunc_16_sign_minsize(i64 %x) minsize nounwind {
 ; CHECK-WIN32-64-LABEL: and64_trunc_16_sign_minsize:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    andl $32768, %ecx # imm = 0x8000
-; CHECK-WIN32-64-NEXT:    testw %cx, %cx
+; CHECK-WIN32-64-NEXT:    testw $-32768, %cx # imm = 0x8000
 ; CHECK-WIN32-64-NEXT:    jg .LBB19_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
@@ -885,9 +871,8 @@ define void @and64_trunc_16_sign_minsize(i64 %x) minsize nounwind {
 ;
 ; CHECK-X86-LABEL: and64_trunc_16_sign_minsize:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    movl $32768, %eax # imm = 0x8000
-; CHECK-X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    testw %ax, %ax
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    testw $-32768, %ax # imm = 0x8000
 ; CHECK-X86-NEXT:    jg .LBB19_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
author	Craig Topper <craig.topper@intel.com>	2018-10-01 21:35:26 +0000
committer	Craig Topper <craig.topper@intel.com>	2018-10-01 21:35:26 +0000
commit	b003e46adc581eeaae6315e812221d2f2244bda2 (patch)
tree	e3c1f3edf668cc500f6c4b0bcf90239efdd7e044
parent	d0011fbe859e99b26e89402ba1bc1c719e150011 (diff)