diff options
author | Sam Parker <sam.parker@arm.com> | 2018-08-21 10:26:59 +0000 |
---|---|---|
committer | Sam Parker <sam.parker@arm.com> | 2018-08-21 10:26:59 +0000 |
commit | 48cb502069d672cfa560e2b0839bc290acd7053f (patch) | |
tree | f0ac7c7716924f48cfb525ea3828b49a788afdc7 | |
parent | d92e522f1288e3146e98bfba0d5772ec6140ea99 (diff) |
[DAGCombiner] Reduce load widths of shifted maskslinaro-local/ci/llvm-kernel-aarch64-tested
During combining, ReduceLoadWdith is used to combine AND nodes that
mask loads into narrow loads. This patch allows the mask to be a
shifted constant. This results in a narrow load which is then left
shifted to compensate for the new offset.
Differential Revision: https://reviews.llvm.org/D50432
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@340261 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 49 | ||||
-rw-r--r-- | test/CodeGen/ARM/and-load-combine.ll | 539 | ||||
-rw-r--r-- | test/CodeGen/X86/fp128-i128.ll | 28 |
3 files changed, 569 insertions, 47 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 62ee5c30086..6747b2dca42 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9063,6 +9063,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (VT.isVector()) return SDValue(); + unsigned ShAmt = 0; + bool HasShiftedOffset = false; // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then // extended to VT. if (Opc == ISD::SIGN_EXTEND_INREG) { @@ -9090,15 +9092,25 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { } else if (Opc == ISD::AND) { // An AND with a constant mask is the same as a truncate + zero-extend. auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1)); - if (!AndC || !AndC->getAPIntValue().isMask()) + if (!AndC) + return SDValue(); + + const APInt &Mask = AndC->getAPIntValue(); + unsigned ActiveBits = 0; + if (Mask.isMask()) { + ActiveBits = Mask.countTrailingOnes(); + } else if (Mask.isShiftedMask()) { + ShAmt = Mask.countTrailingZeros(); + APInt ShiftedMask = Mask.lshr(ShAmt); + ActiveBits = ShiftedMask.countTrailingOnes(); + HasShiftedOffset = true; + } else return SDValue(); - unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); ExtType = ISD::ZEXTLOAD; ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); } - unsigned ShAmt = 0; if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { SDValue SRL = N0; if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) { @@ -9167,13 +9179,16 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); - // For big endian targets, we need to adjust the offset to the pointer to - // load the correct bytes. - if (DAG.getDataLayout().isBigEndian()) { + auto AdjustBigEndianShift = [&](unsigned ShAmt) { unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); - ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; - } + return LVTStoreBits - EVTStoreBits - ShAmt; + }; + + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (DAG.getDataLayout().isBigEndian()) + ShAmt = AdjustBigEndianShift(ShAmt); EVT PtrType = N0.getOperand(1).getValueType(); uint64_t PtrOff = ShAmt / 8; @@ -9221,6 +9236,24 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); } + if (HasShiftedOffset) { + // Recalculate the shift amount after it has been altered to calculate + // the offset. + if (DAG.getDataLayout().isBigEndian()) + ShAmt = AdjustBigEndianShift(ShAmt); + + // We're using a shifted mask, so the load now has an offset. This means we + // now need to shift right the mask to match the new load and then shift + // right the result of the AND. + const APInt &Mask = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); + APInt ShiftedMask = Mask.lshr(ShAmt); + DAG.UpdateNodeOperands(N, Result, DAG.getConstant(ShiftedMask, DL, VT)); + SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT); + SDValue Shifted = DAG.getNode(ISD::SHL, DL, VT, SDValue(N, 0), + ShiftC); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shifted); + DAG.UpdateNodeOperands(Shifted.getNode(), SDValue(N, 0), ShiftC); + } // Return the new loaded value. return Result; } diff --git a/test/CodeGen/ARM/and-load-combine.ll b/test/CodeGen/ARM/and-load-combine.ll index dac8e113d32..09acefad305 100644 --- a/test/CodeGen/ARM/and-load-combine.ll +++ b/test/CodeGen/ARM/and-load-combine.ll @@ -4,8 +4,7 @@ ; RUN: llc -mtriple=armv6m %s -o - | FileCheck %s --check-prefix=THUMB1 ; RUN: llc -mtriple=thumbv8m.main %s -o - | FileCheck %s --check-prefix=THUMB2 -define arm_aapcscc zeroext i1 @cmp_xor8_short_short(i16* nocapture readonly %a, - i16* nocapture readonly %b) { +define arm_aapcscc zeroext i1 @cmp_xor8_short_short(i16* nocapture readonly %a, i16* nocapture readonly %b) { ; ARM-LABEL: cmp_xor8_short_short: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrb r0, [r0] @@ -51,8 +50,7 @@ entry: ret i1 %cmp } -define arm_aapcscc zeroext i1 @cmp_xor8_short_int(i16* nocapture readonly %a, - i32* nocapture readonly %b) { +define arm_aapcscc zeroext i1 @cmp_xor8_short_int(i16* nocapture readonly %a, i32* nocapture readonly %b) { ; ARM-LABEL: cmp_xor8_short_int: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrb r0, [r0] @@ -99,8 +97,7 @@ entry: ret i1 %cmp } -define arm_aapcscc zeroext i1 @cmp_xor8_int_int(i32* nocapture readonly %a, - i32* nocapture readonly %b) { +define arm_aapcscc zeroext i1 @cmp_xor8_int_int(i32* nocapture readonly %a, i32* nocapture readonly %b) { ; ARM-LABEL: cmp_xor8_int_int: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrb r0, [r0] @@ -146,8 +143,7 @@ entry: ret i1 %cmp } -define arm_aapcscc zeroext i1 @cmp_xor16(i32* nocapture readonly %a, - i32* nocapture readonly %b) { +define arm_aapcscc zeroext i1 @cmp_xor16(i32* nocapture readonly %a, i32* nocapture readonly %b) { ; ARM-LABEL: cmp_xor16: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrh r0, [r0] @@ -193,8 +189,7 @@ entry: ret i1 %cmp } -define arm_aapcscc zeroext i1 @cmp_or8_short_short(i16* nocapture readonly %a, - i16* nocapture readonly %b) { +define arm_aapcscc zeroext i1 @cmp_or8_short_short(i16* nocapture readonly %a, i16* nocapture readonly %b) { ; ARM-LABEL: cmp_or8_short_short: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrb r0, [r0] @@ -240,8 +235,7 @@ entry: ret i1 %cmp } -define arm_aapcscc zeroext i1 @cmp_or8_short_int(i16* nocapture readonly %a, - i32* nocapture readonly %b) { +define arm_aapcscc zeroext i1 @cmp_or8_short_int(i16* nocapture readonly %a, i32* nocapture readonly %b) { ; ARM-LABEL: cmp_or8_short_int: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrb r0, [r0] @@ -288,8 +282,7 @@ entry: ret i1 %cmp } -define arm_aapcscc zeroext i1 @cmp_or8_int_int(i32* nocapture readonly %a, - i32* nocapture readonly %b) { +define arm_aapcscc zeroext i1 @cmp_or8_int_int(i32* nocapture readonly %a, i32* nocapture readonly %b) { ; ARM-LABEL: cmp_or8_int_int: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrb r0, [r0] @@ -335,8 +328,7 @@ entry: ret i1 %cmp } -define arm_aapcscc zeroext i1 @cmp_or16(i32* nocapture readonly %a, - i32* nocapture readonly %b) { +define arm_aapcscc zeroext i1 @cmp_or16(i32* nocapture readonly %a, i32* nocapture readonly %b) { ; ARM-LABEL: cmp_or16: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrh r0, [r0] @@ -382,8 +374,7 @@ entry: ret i1 %cmp } -define arm_aapcscc zeroext i1 @cmp_and8_short_short(i16* nocapture readonly %a, - i16* nocapture readonly %b) { +define arm_aapcscc zeroext i1 @cmp_and8_short_short(i16* nocapture readonly %a, i16* nocapture readonly %b) { ; ARM-LABEL: cmp_and8_short_short: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrb r1, [r1] @@ -429,8 +420,7 @@ entry: ret i1 %cmp } -define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture readonly %a, - i32* nocapture readonly %b) { +define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture readonly %a, i32* nocapture readonly %b) { ; ARM-LABEL: cmp_and8_short_int: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrb r0, [r0] @@ -477,8 +467,7 @@ entry: ret i1 %cmp } -define arm_aapcscc zeroext i1 @cmp_and8_int_int(i32* nocapture readonly %a, - i32* nocapture readonly %b) { +define arm_aapcscc zeroext i1 @cmp_and8_int_int(i32* nocapture readonly %a, i32* nocapture readonly %b) { ; ARM-LABEL: cmp_and8_int_int: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrb r1, [r1] @@ -524,8 +513,7 @@ entry: ret i1 %cmp } -define arm_aapcscc zeroext i1 @cmp_and16(i32* nocapture readonly %a, - i32* nocapture readonly %b) { +define arm_aapcscc zeroext i1 @cmp_and16(i32* nocapture readonly %a, i32* nocapture readonly %b) { ; ARM-LABEL: cmp_and16: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrh r1, [r1] @@ -1037,6 +1025,7 @@ entry: ret void } +define arm_aapcscc void @test10(i32* nocapture %p) { ; ARM-LABEL: test10: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrb r1, [r0] @@ -1065,7 +1054,6 @@ entry: ; THUMB2-NEXT: eor r1, r1, #255 ; THUMB2-NEXT: str r1, [r0] ; THUMB2-NEXT: bx lr -define arm_aapcscc void @test10(i32* nocapture %p) { entry: %0 = load i32, i32* %p, align 4 %neg = and i32 %0, 255 @@ -1074,3 +1062,504 @@ entry: ret void } +define arm_aapcscc i32 @test11(i32* nocapture %p) { +; ARM-LABEL: test11: +; ARM: @ %bb.0: +; ARM-NEXT: ldrb r0, [r0, #1] +; ARM-NEXT: lsl r0, r0, #8 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test11: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrb r0, [r0, #2] +; ARMEB-NEXT: lsl r0, r0, #8 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test11: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrb r0, [r0, #1] +; THUMB1-NEXT: lsls r0, r0, #8 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test11: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrb r0, [r0, #1] +; THUMB2-NEXT: lsls r0, r0, #8 +; THUMB2-NEXT: bx lr + %1 = load i32, i32* %p, align 4 + %and = and i32 %1, 65280 + ret i32 %and +} + +define arm_aapcscc i32 @test12(i32* nocapture %p) { +; ARM-LABEL: test12: +; ARM: @ %bb.0: +; ARM-NEXT: ldrb r0, [r0, #2] +; ARM-NEXT: lsl r0, r0, #16 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test12: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrb r0, [r0, #1] +; ARMEB-NEXT: lsl r0, r0, #16 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test12: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrb r0, [r0, #2] +; THUMB1-NEXT: lsls r0, r0, #16 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test12: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrb r0, [r0, #2] +; THUMB2-NEXT: lsls r0, r0, #16 +; THUMB2-NEXT: bx lr + %1 = load i32, i32* %p, align 4 + %and = and i32 %1, 16711680 + ret i32 %and +} + +define arm_aapcscc i32 @test13(i32* nocapture %p) { +; ARM-LABEL: test13: +; ARM: @ %bb.0: +; ARM-NEXT: ldrb r0, [r0, #3] +; ARM-NEXT: lsl r0, r0, #24 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test13: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrb r0, [r0] +; ARMEB-NEXT: lsl r0, r0, #24 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test13: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrb r0, [r0, #3] +; THUMB1-NEXT: lsls r0, r0, #24 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test13: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrb r0, [r0, #3] +; THUMB2-NEXT: lsls r0, r0, #24 +; THUMB2-NEXT: bx lr + %1 = load i32, i32* %p, align 4 + %and = and i32 %1, 4278190080 + ret i32 %and +} + +define arm_aapcscc i32 @test14(i32* nocapture %p) { +; ARM-LABEL: test14: +; ARM: @ %bb.0: +; ARM-NEXT: ldrh r0, [r0, #1] +; ARM-NEXT: lsl r0, r0, #8 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test14: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrh r0, [r0, #1] +; ARMEB-NEXT: lsl r0, r0, #8 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test14: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldr r1, [r0] +; THUMB1-NEXT: ldr r0, .LCPI26_0 +; THUMB1-NEXT: ands r0, r1 +; THUMB1-NEXT: bx lr +; THUMB1-NEXT: .p2align 2 +; THUMB1-NEXT: @ %bb.1: +; THUMB1-NEXT: .LCPI26_0: +; THUMB1-NEXT: .long 16776960 @ 0xffff00 +; +; THUMB2-LABEL: test14: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrh.w r0, [r0, #1] +; THUMB2-NEXT: lsls r0, r0, #8 +; THUMB2-NEXT: bx lr + %1 = load i32, i32* %p, align 4 + %and = and i32 %1, 16776960 + ret i32 %and +} + +define arm_aapcscc i32 @test15(i32* nocapture %p) { +; ARM-LABEL: test15: +; ARM: @ %bb.0: +; ARM-NEXT: ldrh r0, [r0, #2] +; ARM-NEXT: lsl r0, r0, #16 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test15: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrh r0, [r0] +; ARMEB-NEXT: lsl r0, r0, #16 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test15: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrh r0, [r0, #2] +; THUMB1-NEXT: lsls r0, r0, #16 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test15: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrh r0, [r0, #2] +; THUMB2-NEXT: lsls r0, r0, #16 +; THUMB2-NEXT: bx lr + %1 = load i32, i32* %p, align 4 + %and = and i32 %1, 4294901760 + ret i32 %and +} + +define arm_aapcscc i32 @test16(i64* nocapture %p) { +; ARM-LABEL: test16: +; ARM: @ %bb.0: +; ARM-NEXT: ldrb r0, [r0, #1] +; ARM-NEXT: lsl r0, r0, #8 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test16: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrb r0, [r0, #6] +; ARMEB-NEXT: lsl r0, r0, #8 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test16: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrb r0, [r0, #1] +; THUMB1-NEXT: lsls r0, r0, #8 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test16: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrb r0, [r0, #1] +; THUMB2-NEXT: lsls r0, r0, #8 +; THUMB2-NEXT: bx lr + %1 = load i64, i64* %p, align 8 + %and = and i64 %1, 65280 + %trunc = trunc i64 %and to i32 + ret i32 %trunc +} + +define arm_aapcscc i32 @test17(i64* nocapture %p) { +; ARM-LABEL: test17: +; ARM: @ %bb.0: +; ARM-NEXT: ldrb r0, [r0, #2] +; ARM-NEXT: lsl r0, r0, #16 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test17: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrb r0, [r0, #5] +; ARMEB-NEXT: lsl r0, r0, #16 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test17: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrb r0, [r0, #2] +; THUMB1-NEXT: lsls r0, r0, #16 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test17: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrb r0, [r0, #2] +; THUMB2-NEXT: lsls r0, r0, #16 +; THUMB2-NEXT: bx lr + %1 = load i64, i64* %p, align 8 + %and = and i64 %1, 16711680 + %trunc = trunc i64 %and to i32 + ret i32 %trunc +} + +define arm_aapcscc i32 @test18(i64* nocapture %p) { +; ARM-LABEL: test18: +; ARM: @ %bb.0: +; ARM-NEXT: ldrb r0, [r0, #3] +; ARM-NEXT: lsl r0, r0, #24 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test18: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrb r0, [r0, #4] +; ARMEB-NEXT: lsl r0, r0, #24 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test18: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrb r0, [r0, #3] +; THUMB1-NEXT: lsls r0, r0, #24 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test18: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrb r0, [r0, #3] +; THUMB2-NEXT: lsls r0, r0, #24 +; THUMB2-NEXT: bx lr + %1 = load i64, i64* %p, align 8 + %and = and i64 %1, 4278190080 + %trunc = trunc i64 %and to i32 + ret i32 %trunc +} + +define arm_aapcscc i64 @test19(i64* nocapture %p) { +; ARM-LABEL: test19: +; ARM: @ %bb.0: +; ARM-NEXT: ldrb r1, [r0, #4] +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test19: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrb r0, [r0, #3] +; ARMEB-NEXT: mov r1, #0 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test19: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrb r1, [r0, #4] +; THUMB1-NEXT: movs r0, #0 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test19: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrb r1, [r0, #4] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: bx lr + %1 = load i64, i64* %p, align 8 + %and = and i64 %1, 1095216660480 + ret i64 %and +} + +define arm_aapcscc i64 @test20(i64* nocapture %p) { +; ARM-LABEL: test20: +; ARM: @ %bb.0: +; ARM-NEXT: ldrb r0, [r0, #5] +; ARM-NEXT: lsl r1, r0, #8 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test20: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrb r0, [r0, #2] +; ARMEB-NEXT: mov r1, #0 +; ARMEB-NEXT: lsl r0, r0, #8 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test20: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrb r0, [r0, #5] +; THUMB1-NEXT: lsls r1, r0, #8 +; THUMB1-NEXT: movs r0, #0 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test20: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrb r0, [r0, #5] +; THUMB2-NEXT: lsls r1, r0, #8 +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: bx lr + %1 = load i64, i64* %p, align 8 + %and = and i64 %1, 280375465082880 + ret i64 %and +} + +define arm_aapcscc i64 @test21(i64* nocapture %p) { +; ARM-LABEL: test21: +; ARM: @ %bb.0: +; ARM-NEXT: ldrb r0, [r0, #6] +; ARM-NEXT: lsl r1, r0, #16 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test21: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrb r0, [r0, #1] +; ARMEB-NEXT: mov r1, #0 +; ARMEB-NEXT: lsl r0, r0, #16 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test21: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrb r0, [r0, #6] +; THUMB1-NEXT: lsls r1, r0, #16 +; THUMB1-NEXT: movs r0, #0 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test21: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrb r0, [r0, #6] +; THUMB2-NEXT: lsls r1, r0, #16 +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: bx lr + %1 = load i64, i64* %p, align 8 + %and = and i64 %1, 71776119061217280 + ret i64 %and +} + +define arm_aapcscc i64 @test22(i64* nocapture %p) { +; ARM-LABEL: test22: +; ARM: @ %bb.0: +; ARM-NEXT: ldrb r0, [r0, #7] +; ARM-NEXT: lsl r1, r0, #24 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test22: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrb r0, [r0] +; ARMEB-NEXT: mov r1, #0 +; ARMEB-NEXT: lsl r0, r0, #24 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test22: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrb r0, [r0, #7] +; THUMB1-NEXT: lsls r1, r0, #24 +; THUMB1-NEXT: movs r0, #0 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test22: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrb r0, [r0, #7] +; THUMB2-NEXT: lsls r1, r0, #24 +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: bx lr + %1 = load i64, i64* %p, align 8 + %and = and i64 %1, -72057594037927936 + ret i64 %and +} + +define arm_aapcscc i64 @test23(i64* nocapture %p) { +; ARM-LABEL: test23: +; ARM: @ %bb.0: +; ARM-NEXT: ldrh r1, [r0, #3] +; ARM-NEXT: lsl r0, r1, #24 +; ARM-NEXT: lsr r1, r1, #8 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test23: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrh r1, [r0, #3] +; ARMEB-NEXT: lsr r0, r1, #8 +; ARMEB-NEXT: lsl r1, r1, #24 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test23: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrb r1, [r0, #3] +; THUMB1-NEXT: ldrb r0, [r0, #4] +; THUMB1-NEXT: lsls r0, r0, #8 +; THUMB1-NEXT: adds r1, r0, r1 +; THUMB1-NEXT: lsls r0, r1, #24 +; THUMB1-NEXT: lsrs r1, r1, #8 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test23: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrh.w r1, [r0, #3] +; THUMB2-NEXT: lsls r0, r1, #24 +; THUMB2-NEXT: lsrs r1, r1, #8 +; THUMB2-NEXT: bx lr + %1 = load i64, i64* %p, align 8 + %and = and i64 %1, 1099494850560 + ret i64 %and +} + +define arm_aapcscc i64 @test24(i64* nocapture %p) { +; ARM-LABEL: test24: +; ARM: @ %bb.0: +; ARM-NEXT: ldrh r1, [r0, #4] +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test24: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrh r0, [r0, #2] +; ARMEB-NEXT: mov r1, #0 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test24: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrh r1, [r0, #4] +; THUMB1-NEXT: movs r0, #0 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test24: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrh r1, [r0, #4] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: bx lr + %1 = load i64, i64* %p, align 8 + %and = and i64 %1, 281470681743360 + ret i64 %and +} + +define arm_aapcscc i64 @test25(i64* nocapture %p) { +; ARM-LABEL: test25: +; ARM: @ %bb.0: +; ARM-NEXT: ldrh r0, [r0, #5] +; ARM-NEXT: lsl r1, r0, #8 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test25: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrh r0, [r0, #1] +; ARMEB-NEXT: mov r1, #0 +; ARMEB-NEXT: lsl r0, r0, #8 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test25: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrb r1, [r0, #5] +; THUMB1-NEXT: ldrb r0, [r0, #6] +; THUMB1-NEXT: lsls r0, r0, #8 +; THUMB1-NEXT: adds r0, r0, r1 +; THUMB1-NEXT: lsls r1, r0, #8 +; THUMB1-NEXT: movs r0, #0 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test25: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrh.w r0, [r0, #5] +; THUMB2-NEXT: lsls r1, r0, #8 +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: bx lr + %1 = load i64, i64* %p, align 8 + %and = and i64 %1, 72056494526300160 + ret i64 %and +} + +define arm_aapcscc i64 @test26(i64* nocapture %p) { +; ARM-LABEL: test26: +; ARM: @ %bb.0: +; ARM-NEXT: ldrh r0, [r0, #6] +; ARM-NEXT: lsl r1, r0, #16 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test26: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrh r0, [r0] +; ARMEB-NEXT: mov r1, #0 +; ARMEB-NEXT: lsl r0, r0, #16 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test26: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrh r0, [r0, #6] +; THUMB1-NEXT: lsls r1, r0, #16 +; THUMB1-NEXT: movs r0, #0 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test26: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrh r0, [r0, #6] +; THUMB2-NEXT: lsls r1, r0, #16 +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: bx lr + %1 = load i64, i64* %p, align 8 + %and = and i64 %1, -281474976710656 + ret i64 %and +} diff --git a/test/CodeGen/X86/fp128-i128.ll b/test/CodeGen/X86/fp128-i128.ll index 6bfc0e5eb51..f18b3e46e7a 100644 --- a/test/CodeGen/X86/fp128-i128.ll +++ b/test/CodeGen/X86/fp128-i128.ll @@ -49,13 +49,13 @@ define void @TestUnionLD1(fp128 %s, i64 %n) #0 { ; SSE-LABEL: TestUnionLD1: ; SSE: # %bb.0: # %entry ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; SSE-NEXT: movabsq $281474976710655, %rcx # imm = 0xFFFFFFFFFFFF -; SSE-NEXT: andq %rdi, %rcx -; SSE-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000 -; SSE-NEXT: andq -{{[0-9]+}}(%rsp), %rdx -; SSE-NEXT: orq %rcx, %rdx -; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: shlq $48, %rax +; SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; SSE-NEXT: movabsq $281474976710655, %rdx # imm = 0xFFFFFFFFFFFF +; SSE-NEXT: andq %rdi, %rdx +; SSE-NEXT: orq %rax, %rdx +; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 ; SSE-NEXT: jmp foo # TAILCALL @@ -63,13 +63,13 @@ define void @TestUnionLD1(fp128 %s, i64 %n) #0 { ; AVX-LABEL: TestUnionLD1: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; AVX-NEXT: movabsq $281474976710655, %rcx # imm = 0xFFFFFFFFFFFF -; AVX-NEXT: andq %rdi, %rcx -; AVX-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000 -; AVX-NEXT: andq -{{[0-9]+}}(%rsp), %rdx -; AVX-NEXT: orq %rcx, %rdx -; AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; AVX-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; AVX-NEXT: shlq $48, %rax +; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; AVX-NEXT: movabsq $281474976710655, %rdx # imm = 0xFFFFFFFFFFFF +; AVX-NEXT: andq %rdi, %rdx +; AVX-NEXT: orq %rax, %rdx +; AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) ; AVX-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) ; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 ; AVX-NEXT: jmp foo # TAILCALL |