aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Parker <sam.parker@arm.com>2018-08-21 10:26:59 +0000
committerSam Parker <sam.parker@arm.com>2018-08-21 10:26:59 +0000
commit48cb502069d672cfa560e2b0839bc290acd7053f (patch)
treef0ac7c7716924f48cfb525ea3828b49a788afdc7
parentd92e522f1288e3146e98bfba0d5772ec6140ea99 (diff)
[DAGCombiner] Reduce load widths of shifted maskslinaro-local/ci/llvm-kernel-aarch64-tested
During combining, ReduceLoadWdith is used to combine AND nodes that mask loads into narrow loads. This patch allows the mask to be a shifted constant. This results in a narrow load which is then left shifted to compensate for the new offset. Differential Revision: https://reviews.llvm.org/D50432 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@340261 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp49
-rw-r--r--test/CodeGen/ARM/and-load-combine.ll539
-rw-r--r--test/CodeGen/X86/fp128-i128.ll28
3 files changed, 569 insertions, 47 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 62ee5c30086..6747b2dca42 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9063,6 +9063,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (VT.isVector())
return SDValue();
+ unsigned ShAmt = 0;
+ bool HasShiftedOffset = false;
// Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
// extended to VT.
if (Opc == ISD::SIGN_EXTEND_INREG) {
@@ -9090,15 +9092,25 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
} else if (Opc == ISD::AND) {
// An AND with a constant mask is the same as a truncate + zero-extend.
auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!AndC || !AndC->getAPIntValue().isMask())
+ if (!AndC)
+ return SDValue();
+
+ const APInt &Mask = AndC->getAPIntValue();
+ unsigned ActiveBits = 0;
+ if (Mask.isMask()) {
+ ActiveBits = Mask.countTrailingOnes();
+ } else if (Mask.isShiftedMask()) {
+ ShAmt = Mask.countTrailingZeros();
+ APInt ShiftedMask = Mask.lshr(ShAmt);
+ ActiveBits = ShiftedMask.countTrailingOnes();
+ HasShiftedOffset = true;
+ } else
return SDValue();
- unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
ExtType = ISD::ZEXTLOAD;
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
}
- unsigned ShAmt = 0;
if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
SDValue SRL = N0;
if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
@@ -9167,13 +9179,16 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
return SDValue();
- // For big endian targets, we need to adjust the offset to the pointer to
- // load the correct bytes.
- if (DAG.getDataLayout().isBigEndian()) {
+ auto AdjustBigEndianShift = [&](unsigned ShAmt) {
unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
- ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
- }
+ return LVTStoreBits - EVTStoreBits - ShAmt;
+ };
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (DAG.getDataLayout().isBigEndian())
+ ShAmt = AdjustBigEndianShift(ShAmt);
EVT PtrType = N0.getOperand(1).getValueType();
uint64_t PtrOff = ShAmt / 8;
@@ -9221,6 +9236,24 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
}
+ if (HasShiftedOffset) {
+ // Recalculate the shift amount after it has been altered to calculate
+ // the offset.
+ if (DAG.getDataLayout().isBigEndian())
+ ShAmt = AdjustBigEndianShift(ShAmt);
+
+ // We're using a shifted mask, so the load now has an offset. This means we
+ // now need to shift right the mask to match the new load and then shift
+ // right the result of the AND.
+ const APInt &Mask = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ APInt ShiftedMask = Mask.lshr(ShAmt);
+ DAG.UpdateNodeOperands(N, Result, DAG.getConstant(ShiftedMask, DL, VT));
+ SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
+ SDValue Shifted = DAG.getNode(ISD::SHL, DL, VT, SDValue(N, 0),
+ ShiftC);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shifted);
+ DAG.UpdateNodeOperands(Shifted.getNode(), SDValue(N, 0), ShiftC);
+ }
// Return the new loaded value.
return Result;
}
diff --git a/test/CodeGen/ARM/and-load-combine.ll b/test/CodeGen/ARM/and-load-combine.ll
index dac8e113d32..09acefad305 100644
--- a/test/CodeGen/ARM/and-load-combine.ll
+++ b/test/CodeGen/ARM/and-load-combine.ll
@@ -4,8 +4,7 @@
; RUN: llc -mtriple=armv6m %s -o - | FileCheck %s --check-prefix=THUMB1
; RUN: llc -mtriple=thumbv8m.main %s -o - | FileCheck %s --check-prefix=THUMB2
-define arm_aapcscc zeroext i1 @cmp_xor8_short_short(i16* nocapture readonly %a,
- i16* nocapture readonly %b) {
+define arm_aapcscc zeroext i1 @cmp_xor8_short_short(i16* nocapture readonly %a, i16* nocapture readonly %b) {
; ARM-LABEL: cmp_xor8_short_short:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrb r0, [r0]
@@ -51,8 +50,7 @@ entry:
ret i1 %cmp
}
-define arm_aapcscc zeroext i1 @cmp_xor8_short_int(i16* nocapture readonly %a,
- i32* nocapture readonly %b) {
+define arm_aapcscc zeroext i1 @cmp_xor8_short_int(i16* nocapture readonly %a, i32* nocapture readonly %b) {
; ARM-LABEL: cmp_xor8_short_int:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrb r0, [r0]
@@ -99,8 +97,7 @@ entry:
ret i1 %cmp
}
-define arm_aapcscc zeroext i1 @cmp_xor8_int_int(i32* nocapture readonly %a,
- i32* nocapture readonly %b) {
+define arm_aapcscc zeroext i1 @cmp_xor8_int_int(i32* nocapture readonly %a, i32* nocapture readonly %b) {
; ARM-LABEL: cmp_xor8_int_int:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrb r0, [r0]
@@ -146,8 +143,7 @@ entry:
ret i1 %cmp
}
-define arm_aapcscc zeroext i1 @cmp_xor16(i32* nocapture readonly %a,
- i32* nocapture readonly %b) {
+define arm_aapcscc zeroext i1 @cmp_xor16(i32* nocapture readonly %a, i32* nocapture readonly %b) {
; ARM-LABEL: cmp_xor16:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrh r0, [r0]
@@ -193,8 +189,7 @@ entry:
ret i1 %cmp
}
-define arm_aapcscc zeroext i1 @cmp_or8_short_short(i16* nocapture readonly %a,
- i16* nocapture readonly %b) {
+define arm_aapcscc zeroext i1 @cmp_or8_short_short(i16* nocapture readonly %a, i16* nocapture readonly %b) {
; ARM-LABEL: cmp_or8_short_short:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrb r0, [r0]
@@ -240,8 +235,7 @@ entry:
ret i1 %cmp
}
-define arm_aapcscc zeroext i1 @cmp_or8_short_int(i16* nocapture readonly %a,
- i32* nocapture readonly %b) {
+define arm_aapcscc zeroext i1 @cmp_or8_short_int(i16* nocapture readonly %a, i32* nocapture readonly %b) {
; ARM-LABEL: cmp_or8_short_int:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrb r0, [r0]
@@ -288,8 +282,7 @@ entry:
ret i1 %cmp
}
-define arm_aapcscc zeroext i1 @cmp_or8_int_int(i32* nocapture readonly %a,
- i32* nocapture readonly %b) {
+define arm_aapcscc zeroext i1 @cmp_or8_int_int(i32* nocapture readonly %a, i32* nocapture readonly %b) {
; ARM-LABEL: cmp_or8_int_int:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrb r0, [r0]
@@ -335,8 +328,7 @@ entry:
ret i1 %cmp
}
-define arm_aapcscc zeroext i1 @cmp_or16(i32* nocapture readonly %a,
- i32* nocapture readonly %b) {
+define arm_aapcscc zeroext i1 @cmp_or16(i32* nocapture readonly %a, i32* nocapture readonly %b) {
; ARM-LABEL: cmp_or16:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrh r0, [r0]
@@ -382,8 +374,7 @@ entry:
ret i1 %cmp
}
-define arm_aapcscc zeroext i1 @cmp_and8_short_short(i16* nocapture readonly %a,
- i16* nocapture readonly %b) {
+define arm_aapcscc zeroext i1 @cmp_and8_short_short(i16* nocapture readonly %a, i16* nocapture readonly %b) {
; ARM-LABEL: cmp_and8_short_short:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrb r1, [r1]
@@ -429,8 +420,7 @@ entry:
ret i1 %cmp
}
-define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture readonly %a,
- i32* nocapture readonly %b) {
+define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture readonly %a, i32* nocapture readonly %b) {
; ARM-LABEL: cmp_and8_short_int:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrb r0, [r0]
@@ -477,8 +467,7 @@ entry:
ret i1 %cmp
}
-define arm_aapcscc zeroext i1 @cmp_and8_int_int(i32* nocapture readonly %a,
- i32* nocapture readonly %b) {
+define arm_aapcscc zeroext i1 @cmp_and8_int_int(i32* nocapture readonly %a, i32* nocapture readonly %b) {
; ARM-LABEL: cmp_and8_int_int:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrb r1, [r1]
@@ -524,8 +513,7 @@ entry:
ret i1 %cmp
}
-define arm_aapcscc zeroext i1 @cmp_and16(i32* nocapture readonly %a,
- i32* nocapture readonly %b) {
+define arm_aapcscc zeroext i1 @cmp_and16(i32* nocapture readonly %a, i32* nocapture readonly %b) {
; ARM-LABEL: cmp_and16:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrh r1, [r1]
@@ -1037,6 +1025,7 @@ entry:
ret void
}
+define arm_aapcscc void @test10(i32* nocapture %p) {
; ARM-LABEL: test10:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrb r1, [r0]
@@ -1065,7 +1054,6 @@ entry:
; THUMB2-NEXT: eor r1, r1, #255
; THUMB2-NEXT: str r1, [r0]
; THUMB2-NEXT: bx lr
-define arm_aapcscc void @test10(i32* nocapture %p) {
entry:
%0 = load i32, i32* %p, align 4
%neg = and i32 %0, 255
@@ -1074,3 +1062,504 @@ entry:
ret void
}
+define arm_aapcscc i32 @test11(i32* nocapture %p) {
+; ARM-LABEL: test11:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrb r0, [r0, #1]
+; ARM-NEXT: lsl r0, r0, #8
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test11:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrb r0, [r0, #2]
+; ARMEB-NEXT: lsl r0, r0, #8
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test11:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrb r0, [r0, #1]
+; THUMB1-NEXT: lsls r0, r0, #8
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test11:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrb r0, [r0, #1]
+; THUMB2-NEXT: lsls r0, r0, #8
+; THUMB2-NEXT: bx lr
+ %1 = load i32, i32* %p, align 4
+ %and = and i32 %1, 65280
+ ret i32 %and
+}
+
+define arm_aapcscc i32 @test12(i32* nocapture %p) {
+; ARM-LABEL: test12:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrb r0, [r0, #2]
+; ARM-NEXT: lsl r0, r0, #16
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test12:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrb r0, [r0, #1]
+; ARMEB-NEXT: lsl r0, r0, #16
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test12:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrb r0, [r0, #2]
+; THUMB1-NEXT: lsls r0, r0, #16
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test12:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrb r0, [r0, #2]
+; THUMB2-NEXT: lsls r0, r0, #16
+; THUMB2-NEXT: bx lr
+ %1 = load i32, i32* %p, align 4
+ %and = and i32 %1, 16711680
+ ret i32 %and
+}
+
+define arm_aapcscc i32 @test13(i32* nocapture %p) {
+; ARM-LABEL: test13:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrb r0, [r0, #3]
+; ARM-NEXT: lsl r0, r0, #24
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test13:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrb r0, [r0]
+; ARMEB-NEXT: lsl r0, r0, #24
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test13:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrb r0, [r0, #3]
+; THUMB1-NEXT: lsls r0, r0, #24
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test13:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrb r0, [r0, #3]
+; THUMB2-NEXT: lsls r0, r0, #24
+; THUMB2-NEXT: bx lr
+ %1 = load i32, i32* %p, align 4
+ %and = and i32 %1, 4278190080
+ ret i32 %and
+}
+
+define arm_aapcscc i32 @test14(i32* nocapture %p) {
+; ARM-LABEL: test14:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrh r0, [r0, #1]
+; ARM-NEXT: lsl r0, r0, #8
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test14:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrh r0, [r0, #1]
+; ARMEB-NEXT: lsl r0, r0, #8
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test14:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldr r1, [r0]
+; THUMB1-NEXT: ldr r0, .LCPI26_0
+; THUMB1-NEXT: ands r0, r1
+; THUMB1-NEXT: bx lr
+; THUMB1-NEXT: .p2align 2
+; THUMB1-NEXT: @ %bb.1:
+; THUMB1-NEXT: .LCPI26_0:
+; THUMB1-NEXT: .long 16776960 @ 0xffff00
+;
+; THUMB2-LABEL: test14:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrh.w r0, [r0, #1]
+; THUMB2-NEXT: lsls r0, r0, #8
+; THUMB2-NEXT: bx lr
+ %1 = load i32, i32* %p, align 4
+ %and = and i32 %1, 16776960
+ ret i32 %and
+}
+
+define arm_aapcscc i32 @test15(i32* nocapture %p) {
+; ARM-LABEL: test15:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrh r0, [r0, #2]
+; ARM-NEXT: lsl r0, r0, #16
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test15:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrh r0, [r0]
+; ARMEB-NEXT: lsl r0, r0, #16
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test15:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrh r0, [r0, #2]
+; THUMB1-NEXT: lsls r0, r0, #16
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test15:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrh r0, [r0, #2]
+; THUMB2-NEXT: lsls r0, r0, #16
+; THUMB2-NEXT: bx lr
+ %1 = load i32, i32* %p, align 4
+ %and = and i32 %1, 4294901760
+ ret i32 %and
+}
+
+define arm_aapcscc i32 @test16(i64* nocapture %p) {
+; ARM-LABEL: test16:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrb r0, [r0, #1]
+; ARM-NEXT: lsl r0, r0, #8
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test16:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrb r0, [r0, #6]
+; ARMEB-NEXT: lsl r0, r0, #8
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test16:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrb r0, [r0, #1]
+; THUMB1-NEXT: lsls r0, r0, #8
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test16:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrb r0, [r0, #1]
+; THUMB2-NEXT: lsls r0, r0, #8
+; THUMB2-NEXT: bx lr
+ %1 = load i64, i64* %p, align 8
+ %and = and i64 %1, 65280
+ %trunc = trunc i64 %and to i32
+ ret i32 %trunc
+}
+
+define arm_aapcscc i32 @test17(i64* nocapture %p) {
+; ARM-LABEL: test17:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrb r0, [r0, #2]
+; ARM-NEXT: lsl r0, r0, #16
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test17:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrb r0, [r0, #5]
+; ARMEB-NEXT: lsl r0, r0, #16
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test17:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrb r0, [r0, #2]
+; THUMB1-NEXT: lsls r0, r0, #16
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test17:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrb r0, [r0, #2]
+; THUMB2-NEXT: lsls r0, r0, #16
+; THUMB2-NEXT: bx lr
+ %1 = load i64, i64* %p, align 8
+ %and = and i64 %1, 16711680
+ %trunc = trunc i64 %and to i32
+ ret i32 %trunc
+}
+
+define arm_aapcscc i32 @test18(i64* nocapture %p) {
+; ARM-LABEL: test18:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrb r0, [r0, #3]
+; ARM-NEXT: lsl r0, r0, #24
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test18:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrb r0, [r0, #4]
+; ARMEB-NEXT: lsl r0, r0, #24
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test18:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrb r0, [r0, #3]
+; THUMB1-NEXT: lsls r0, r0, #24
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test18:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrb r0, [r0, #3]
+; THUMB2-NEXT: lsls r0, r0, #24
+; THUMB2-NEXT: bx lr
+ %1 = load i64, i64* %p, align 8
+ %and = and i64 %1, 4278190080
+ %trunc = trunc i64 %and to i32
+ ret i32 %trunc
+}
+
+define arm_aapcscc i64 @test19(i64* nocapture %p) {
+; ARM-LABEL: test19:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrb r1, [r0, #4]
+; ARM-NEXT: mov r0, #0
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test19:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrb r0, [r0, #3]
+; ARMEB-NEXT: mov r1, #0
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test19:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrb r1, [r0, #4]
+; THUMB1-NEXT: movs r0, #0
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test19:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrb r1, [r0, #4]
+; THUMB2-NEXT: movs r0, #0
+; THUMB2-NEXT: bx lr
+ %1 = load i64, i64* %p, align 8
+ %and = and i64 %1, 1095216660480
+ ret i64 %and
+}
+
+define arm_aapcscc i64 @test20(i64* nocapture %p) {
+; ARM-LABEL: test20:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrb r0, [r0, #5]
+; ARM-NEXT: lsl r1, r0, #8
+; ARM-NEXT: mov r0, #0
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test20:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrb r0, [r0, #2]
+; ARMEB-NEXT: mov r1, #0
+; ARMEB-NEXT: lsl r0, r0, #8
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test20:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrb r0, [r0, #5]
+; THUMB1-NEXT: lsls r1, r0, #8
+; THUMB1-NEXT: movs r0, #0
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test20:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrb r0, [r0, #5]
+; THUMB2-NEXT: lsls r1, r0, #8
+; THUMB2-NEXT: movs r0, #0
+; THUMB2-NEXT: bx lr
+ %1 = load i64, i64* %p, align 8
+ %and = and i64 %1, 280375465082880
+ ret i64 %and
+}
+
+define arm_aapcscc i64 @test21(i64* nocapture %p) {
+; ARM-LABEL: test21:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrb r0, [r0, #6]
+; ARM-NEXT: lsl r1, r0, #16
+; ARM-NEXT: mov r0, #0
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test21:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrb r0, [r0, #1]
+; ARMEB-NEXT: mov r1, #0
+; ARMEB-NEXT: lsl r0, r0, #16
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test21:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrb r0, [r0, #6]
+; THUMB1-NEXT: lsls r1, r0, #16
+; THUMB1-NEXT: movs r0, #0
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test21:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrb r0, [r0, #6]
+; THUMB2-NEXT: lsls r1, r0, #16
+; THUMB2-NEXT: movs r0, #0
+; THUMB2-NEXT: bx lr
+ %1 = load i64, i64* %p, align 8
+ %and = and i64 %1, 71776119061217280
+ ret i64 %and
+}
+
+define arm_aapcscc i64 @test22(i64* nocapture %p) {
+; ARM-LABEL: test22:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrb r0, [r0, #7]
+; ARM-NEXT: lsl r1, r0, #24
+; ARM-NEXT: mov r0, #0
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test22:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrb r0, [r0]
+; ARMEB-NEXT: mov r1, #0
+; ARMEB-NEXT: lsl r0, r0, #24
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test22:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrb r0, [r0, #7]
+; THUMB1-NEXT: lsls r1, r0, #24
+; THUMB1-NEXT: movs r0, #0
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test22:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrb r0, [r0, #7]
+; THUMB2-NEXT: lsls r1, r0, #24
+; THUMB2-NEXT: movs r0, #0
+; THUMB2-NEXT: bx lr
+ %1 = load i64, i64* %p, align 8
+ %and = and i64 %1, -72057594037927936
+ ret i64 %and
+}
+
+define arm_aapcscc i64 @test23(i64* nocapture %p) {
+; ARM-LABEL: test23:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrh r1, [r0, #3]
+; ARM-NEXT: lsl r0, r1, #24
+; ARM-NEXT: lsr r1, r1, #8
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test23:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrh r1, [r0, #3]
+; ARMEB-NEXT: lsr r0, r1, #8
+; ARMEB-NEXT: lsl r1, r1, #24
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test23:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrb r1, [r0, #3]
+; THUMB1-NEXT: ldrb r0, [r0, #4]
+; THUMB1-NEXT: lsls r0, r0, #8
+; THUMB1-NEXT: adds r1, r0, r1
+; THUMB1-NEXT: lsls r0, r1, #24
+; THUMB1-NEXT: lsrs r1, r1, #8
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test23:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrh.w r1, [r0, #3]
+; THUMB2-NEXT: lsls r0, r1, #24
+; THUMB2-NEXT: lsrs r1, r1, #8
+; THUMB2-NEXT: bx lr
+ %1 = load i64, i64* %p, align 8
+ %and = and i64 %1, 1099494850560
+ ret i64 %and
+}
+
+define arm_aapcscc i64 @test24(i64* nocapture %p) {
+; ARM-LABEL: test24:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrh r1, [r0, #4]
+; ARM-NEXT: mov r0, #0
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test24:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrh r0, [r0, #2]
+; ARMEB-NEXT: mov r1, #0
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test24:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrh r1, [r0, #4]
+; THUMB1-NEXT: movs r0, #0
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test24:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrh r1, [r0, #4]
+; THUMB2-NEXT: movs r0, #0
+; THUMB2-NEXT: bx lr
+ %1 = load i64, i64* %p, align 8
+ %and = and i64 %1, 281470681743360
+ ret i64 %and
+}
+
+define arm_aapcscc i64 @test25(i64* nocapture %p) {
+; ARM-LABEL: test25:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrh r0, [r0, #5]
+; ARM-NEXT: lsl r1, r0, #8
+; ARM-NEXT: mov r0, #0
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test25:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrh r0, [r0, #1]
+; ARMEB-NEXT: mov r1, #0
+; ARMEB-NEXT: lsl r0, r0, #8
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test25:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrb r1, [r0, #5]
+; THUMB1-NEXT: ldrb r0, [r0, #6]
+; THUMB1-NEXT: lsls r0, r0, #8
+; THUMB1-NEXT: adds r0, r0, r1
+; THUMB1-NEXT: lsls r1, r0, #8
+; THUMB1-NEXT: movs r0, #0
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test25:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrh.w r0, [r0, #5]
+; THUMB2-NEXT: lsls r1, r0, #8
+; THUMB2-NEXT: movs r0, #0
+; THUMB2-NEXT: bx lr
+ %1 = load i64, i64* %p, align 8
+ %and = and i64 %1, 72056494526300160
+ ret i64 %and
+}
+
+define arm_aapcscc i64 @test26(i64* nocapture %p) {
+; ARM-LABEL: test26:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldrh r0, [r0, #6]
+; ARM-NEXT: lsl r1, r0, #16
+; ARM-NEXT: mov r0, #0
+; ARM-NEXT: bx lr
+;
+; ARMEB-LABEL: test26:
+; ARMEB: @ %bb.0:
+; ARMEB-NEXT: ldrh r0, [r0]
+; ARMEB-NEXT: mov r1, #0
+; ARMEB-NEXT: lsl r0, r0, #16
+; ARMEB-NEXT: bx lr
+;
+; THUMB1-LABEL: test26:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: ldrh r0, [r0, #6]
+; THUMB1-NEXT: lsls r1, r0, #16
+; THUMB1-NEXT: movs r0, #0
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: test26:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: ldrh r0, [r0, #6]
+; THUMB2-NEXT: lsls r1, r0, #16
+; THUMB2-NEXT: movs r0, #0
+; THUMB2-NEXT: bx lr
+ %1 = load i64, i64* %p, align 8
+ %and = and i64 %1, -281474976710656
+ ret i64 %and
+}
diff --git a/test/CodeGen/X86/fp128-i128.ll b/test/CodeGen/X86/fp128-i128.ll
index 6bfc0e5eb51..f18b3e46e7a 100644
--- a/test/CodeGen/X86/fp128-i128.ll
+++ b/test/CodeGen/X86/fp128-i128.ll
@@ -49,13 +49,13 @@ define void @TestUnionLD1(fp128 %s, i64 %n) #0 {
; SSE-LABEL: TestUnionLD1:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; SSE-NEXT: movabsq $281474976710655, %rcx # imm = 0xFFFFFFFFFFFF
-; SSE-NEXT: andq %rdi, %rcx
-; SSE-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
-; SSE-NEXT: andq -{{[0-9]+}}(%rsp), %rdx
-; SSE-NEXT: orq %rcx, %rdx
-; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT: shlq $48, %rax
+; SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
+; SSE-NEXT: movabsq $281474976710655, %rdx # imm = 0xFFFFFFFFFFFF
+; SSE-NEXT: andq %rdi, %rdx
+; SSE-NEXT: orq %rax, %rdx
+; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
; SSE-NEXT: jmp foo # TAILCALL
@@ -63,13 +63,13 @@ define void @TestUnionLD1(fp128 %s, i64 %n) #0 {
; AVX-LABEL: TestUnionLD1:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX-NEXT: movabsq $281474976710655, %rcx # imm = 0xFFFFFFFFFFFF
-; AVX-NEXT: andq %rdi, %rcx
-; AVX-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
-; AVX-NEXT: andq -{{[0-9]+}}(%rsp), %rdx
-; AVX-NEXT: orq %rcx, %rdx
-; AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
+; AVX-NEXT: shlq $48, %rax
+; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
+; AVX-NEXT: movabsq $281474976710655, %rdx # imm = 0xFFFFFFFFFFFF
+; AVX-NEXT: andq %rdi, %rdx
+; AVX-NEXT: orq %rax, %rdx
+; AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
; AVX-NEXT: jmp foo # TAILCALL