aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp48
-rw-r--r--test/Transforms/Reassociate/fast-ReassociateVector.ll10
-rw-r--r--test/Transforms/Reassociate/fast-basictest.ll2
-rw-r--r--test/Transforms/Reassociate/fast-fp-commute.ll4
-rw-r--r--test/Transforms/Reassociate/fast-multistep.ll6
-rw-r--r--test/Transforms/Reassociate/multistep.ll6
-rw-r--r--test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll31
-rw-r--r--test/Transforms/Reassociate/secondary.ll2
-rw-r--r--test/Transforms/Reassociate/xor_reassoc.ll4
9 files changed, 87 insertions, 26 deletions
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index f6c18d626cf..82c79e7d919 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -881,7 +881,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
/// that computes the negative version of the value specified. The negative
/// version of the value is returned, and BI is left pointing at the instruction
/// that should be processed next by the reassociation pass.
-static Value *NegateValue(Value *V, Instruction *BI) {
+/// Also add intermediate instructions to the redo list that are modified while
+/// pushing the negates through adds. These will be revisited to see if
+/// additional opportunities have been exposed.
+static Value *NegateValue(Value *V, Instruction *BI,
+ SetVector<AssertingVH<Instruction>> &ToRedo) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (C->getType()->isFPOrFPVectorTy()) {
return ConstantExpr::getFNeg(C);
@@ -902,8 +906,8 @@ static Value *NegateValue(Value *V, Instruction *BI) {
if (BinaryOperator *I =
isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
// Push the negates through the add.
- I->setOperand(0, NegateValue(I->getOperand(0), BI));
- I->setOperand(1, NegateValue(I->getOperand(1), BI));
+ I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
+ I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
if (I->getOpcode() == Instruction::Add) {
I->setHasNoUnsignedWrap(false);
I->setHasNoSignedWrap(false);
@@ -916,6 +920,10 @@ static Value *NegateValue(Value *V, Instruction *BI) {
//
I->moveBefore(BI);
I->setName(I->getName()+".neg");
+
+ // Add the intermediate negates to the redo list as processing them later
+ // could expose more reassociating opportunities.
+ ToRedo.insert(I);
return I;
}
@@ -955,12 +963,15 @@ static Value *NegateValue(Value *V, Instruction *BI) {
} else {
TheNeg->andIRFlags(BI);
}
+ ToRedo.insert(TheNeg);
return TheNeg;
}
// Insert a 'neg' instruction that subtracts the value from zero to get the
// negation.
- return CreateNeg(V, V->getName() + ".neg", BI, BI);
+ BinaryOperator *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
+ ToRedo.insert(NewNeg);
+ return NewNeg;
}
/// Return true if we should break up this subtract of X-Y into (X + -Y).
@@ -994,14 +1005,15 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
/// If we have (X-Y), and if either X is an add, or if this is only used by an
/// add, transform this into (X+(0-Y)) to promote better reassociation.
-static BinaryOperator *BreakUpSubtract(Instruction *Sub) {
+static BinaryOperator *
+BreakUpSubtract(Instruction *Sub, SetVector<AssertingVH<Instruction>> &ToRedo) {
// Convert a subtract into an add and a neg instruction. This allows sub
// instructions to be commuted with other add instructions.
//
// Calculate the negative value of Operand 1 of the sub instruction,
// and set it as the RHS of the add instruction we just made.
//
- Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+ Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
@@ -2068,7 +2080,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
// see if we can convert it to X+-Y.
if (I->getOpcode() == Instruction::Sub) {
if (ShouldBreakUpSubtract(I)) {
- Instruction *NI = BreakUpSubtract(I);
+ Instruction *NI = BreakUpSubtract(I, RedoInsts);
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2079,6 +2091,12 @@ void Reassociate::OptimizeInst(Instruction *I) {
(!I->hasOneUse() ||
!isReassociableOp(I->user_back(), Instruction::Mul))) {
Instruction *NI = LowerNegateToMultiply(I);
+ // If the negate was simplified, revisit the users to see if we can
+ // reassociate further.
+ for (User *U : NI->users()) {
+ if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
+ RedoInsts.insert(Tmp);
+ }
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2086,7 +2104,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
}
} else if (I->getOpcode() == Instruction::FSub) {
if (ShouldBreakUpSubtract(I)) {
- Instruction *NI = BreakUpSubtract(I);
+ Instruction *NI = BreakUpSubtract(I, RedoInsts);
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2096,7 +2114,13 @@ void Reassociate::OptimizeInst(Instruction *I) {
if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
(!I->hasOneUse() ||
!isReassociableOp(I->user_back(), Instruction::FMul))) {
+ // If the negate was simplified, revisit the users to see if we can
+ // reassociate further.
Instruction *NI = LowerNegateToMultiply(I);
+ for (User *U : NI->users()) {
+ if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
+ RedoInsts.insert(Tmp);
+ }
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2111,8 +2135,14 @@ void Reassociate::OptimizeInst(Instruction *I) {
// If this is an interior node of a reassociable tree, ignore it until we
// get to the root of the tree, to avoid N^2 analysis.
unsigned Opcode = BO->getOpcode();
- if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode)
+ if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
+ // During the initial run we will get to the root of the tree.
+ // But if we get here while we are redoing instructions, there is no
+ // guarantee that the root will be visited. So Redo later
+ if (BO->user_back() != BO)
+ RedoInsts.insert(BO->user_back());
return;
+ }
// If this is an add tree that is used by a sub instruction, ignore it
// until we process the subtract.
diff --git a/test/Transforms/Reassociate/fast-ReassociateVector.ll b/test/Transforms/Reassociate/fast-ReassociateVector.ll
index 9fbb5ccfe9a..fb76b9d990b 100644
--- a/test/Transforms/Reassociate/fast-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/fast-ReassociateVector.ll
@@ -16,9 +16,9 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
define <2 x float> @test2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
; CHECK-LABEL: @test2
-; CHECK-NEXT: fadd fast <2 x float> %c, %b
-; CHECK-NEXT: fmul fast <2 x float> %a, %tmp2
-; CHECK-NEXT: fmul fast <2 x float> %tmp3, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast <2 x float> %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast <2 x float> %a, %a
+; CHECK-NEXT: fmul fast <2 x float> [[TMP2]], [[TMP1]]
; CHECK-NEXT: ret <2 x float>
%t0 = fmul fast <2 x float> %a, %b
@@ -133,8 +133,8 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
; Check x*y+y*x -> x*y*2.
define <2 x double> @test11(<2 x double> %x, <2 x double> %y) {
; CHECK-LABEL: @test11
-; CHECK-NEXT: %factor = fmul fast <2 x double> %y, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %x
+; CHECK-NEXT: %factor = fmul fast <2 x double> %x, <double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %y
; CHECK-NEXT: ret <2 x double> %tmp1
%1 = fmul fast <2 x double> %x, %y
diff --git a/test/Transforms/Reassociate/fast-basictest.ll b/test/Transforms/Reassociate/fast-basictest.ll
index 64b74e3e8c1..c8a2bd9c193 100644
--- a/test/Transforms/Reassociate/fast-basictest.ll
+++ b/test/Transforms/Reassociate/fast-basictest.ll
@@ -108,7 +108,7 @@ define float @test7(float %A, float %B, float %C) {
; CHECK-LABEL: @test7
; CHECK-NEXT: fadd fast float %C, %B
; CHECK-NEXT: fmul fast float %A, %A
-; CHECK-NEXT: fmul fast float %1, %tmp2
+; CHECK-NEXT: fmul fast float %tmp3, %tmp2
; CHECK-NEXT: ret float
%aa = fmul fast float %A, %A
diff --git a/test/Transforms/Reassociate/fast-fp-commute.ll b/test/Transforms/Reassociate/fast-fp-commute.ll
index ad89607a21e..6565bbb3d20 100644
--- a/test/Transforms/Reassociate/fast-fp-commute.ll
+++ b/test/Transforms/Reassociate/fast-fp-commute.ll
@@ -33,8 +33,8 @@ define float @test2(float %x, float %y) {
define float @test3(float %x, float %y) {
; CHECK-LABEL: test3
-; CHECK-NEXT: %factor = fmul fast float %y, 2.000000e+00
-; CHECK-NEXT: %tmp1 = fmul fast float %factor, %x
+; CHECK-NEXT: %factor = fmul fast float %x, 2.000000e+00
+; CHECK-NEXT: %tmp1 = fmul fast float %factor, %y
; CHECK-NEXT: ret float %tmp1
%1 = fmul fast float %x, %y
diff --git a/test/Transforms/Reassociate/fast-multistep.ll b/test/Transforms/Reassociate/fast-multistep.ll
index 45e15c7f353..aea997cdcbd 100644
--- a/test/Transforms/Reassociate/fast-multistep.ll
+++ b/test/Transforms/Reassociate/fast-multistep.ll
@@ -3,9 +3,9 @@
define float @fmultistep1(float %a, float %b, float %c) {
; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
; CHECK-LABEL: @fmultistep1
-; CHECK-NEXT: fadd fast float %c, %b
-; CHECK-NEXT: fmul fast float %a, %tmp2
-; CHECK-NEXT: fmul fast float %tmp3, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast float %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast float %a, %a
+; CHECK-NEXT: fmul fast float [[TMP2]], [[TMP1]]
; CHECK-NEXT: ret float
%t0 = fmul fast float %a, %b
diff --git a/test/Transforms/Reassociate/multistep.ll b/test/Transforms/Reassociate/multistep.ll
index c499646a8b6..5685bb94953 100644
--- a/test/Transforms/Reassociate/multistep.ll
+++ b/test/Transforms/Reassociate/multistep.ll
@@ -8,9 +8,9 @@ define i64 @multistep1(i64 %a, i64 %b, i64 %c) {
%t2 = mul i64 %a, %c
%t3 = mul i64 %a, %t2 ; a*(a*c)
%t4 = add i64 %t1, %t3
-; CHECK-NEXT: add i64 %c, %b
-; CHECK-NEXT: mul i64 %a, %tmp{{.*}}
-; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = add i64 %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = mul i64 %a, %a
+; CHECK-NEXT: mul i64 [[TMP2]], [[TMP1]]
; CHECK-NEXT: ret
ret i64 %t4
}
diff --git a/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
new file mode 100644
index 00000000000..c2cdffce61e
--- /dev/null
+++ b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+; CHECK-LABEL: faddsubAssoc1
+; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
+; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500
+; CHECK: fsub fast half [[TMP2]], [[TMP1]]
+; CHECK: ret
+; Input is A op (B op C)
+define half @faddsubAssoc1(half %a, half %b) {
+ %tmp1 = fmul fast half %b, 0xH4200 ; 3*b
+ %tmp2 = fmul fast half %a, 0xH4500 ; 5*a
+ %tmp3 = fmul fast half %b, 0xH4000 ; 2*b
+ %tmp4 = fsub fast half %tmp2, %tmp1 ; 5 * a - 3 * b
+ %tmp5 = fsub fast half %tmp3, %tmp4 ; 2 * b - ( 5 * a - 3 * b)
+ ret half %tmp5 ; = 5 * (b - a)
+}
+
+; CHECK-LABEL: faddsubAssoc2
+; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
+; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH3C00
+; CHECK: fadd fast half [[TMP2]], [[TMP1]]
+; CHECK: ret
+; Input is (A op B) op C
+define half @faddsubAssoc2(half %a, half %b) {
+ %tmp1 = fmul fast half %b, 0xH4200 ; 3*b
+ %tmp2 = fmul fast half %a, 0xH4500 ; 5*a
+ %tmp3 = fmul fast half %b, 0xH4000 ; 2*b
+ %tmp4 = fadd fast half %tmp2, %tmp1 ; 5 * a + 3 * b
+ %tmp5 = fsub fast half %tmp4, %tmp3 ; (5 * a + 3 * b) - (2 * b)
+ ret half %tmp5 ; = 5 * a + b
+}
+
diff --git a/test/Transforms/Reassociate/secondary.ll b/test/Transforms/Reassociate/secondary.ll
index a52000ada53..388cd6bcb6f 100644
--- a/test/Transforms/Reassociate/secondary.ll
+++ b/test/Transforms/Reassociate/secondary.ll
@@ -6,7 +6,7 @@
; CHECK: define
; CHECK-NOT: undef
-; CHECK: %factor = mul i32 %tmp3, -2
+; CHECK: %factor = mul i32 %tmp3.neg, 2
; CHECK-NOT: undef
; CHECK: }
diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll
index a22689805fb..0bed6f35880 100644
--- a/test/Transforms/Reassociate/xor_reassoc.ll
+++ b/test/Transforms/Reassociate/xor_reassoc.ll
@@ -88,8 +88,8 @@ define i32 @xor_special2(i32 %x, i32 %y) {
%xor1 = xor i32 %xor, %and
ret i32 %xor1
; CHECK-LABEL: @xor_special2(
-; CHECK: %xor = xor i32 %y, 123
-; CHECK: %xor1 = xor i32 %xor, %x
+; CHECK: %xor = xor i32 %x, 123
+; CHECK: %xor1 = xor i32 %xor, %y
; CHECK: ret i32 %xor1
}