aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64A53Fix835769.cpp2
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.h2
-rw-r--r--lib/Target/AArch64/AArch64CondBrTuning.cpp2
-rw-r--r--lib/Target/AArch64/AArch64ConditionOptimizer.cpp2
-rw-r--r--lib/Target/AArch64/AArch64ConditionalCompares.cpp2
-rw-r--r--lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp2
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp2
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h2
-rw-r--r--lib/Target/AArch64/AArch64GenRegisterBankInfo.def106
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp4
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h2
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td2
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.cpp169
-rw-r--r--lib/Target/AArch64/AArch64MacroFusion.cpp2
-rw-r--r--lib/Target/AArch64/AArch64RegisterBankInfo.cpp40
-rw-r--r--lib/Target/AArch64/AArch64RegisterBankInfo.h27
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp2
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td131
-rw-r--r--lib/Target/AArch64/AArch64SVEInstrInfo.td17
-rw-r--r--lib/Target/AArch64/AArch64SchedA53.td2
-rw-r--r--lib/Target/AArch64/AArch64SchedA57.td2
-rw-r--r--lib/Target/AArch64/AArch64SchedCyclone.td2
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkor.td2
-rw-r--r--lib/Target/AArch64/AArch64SchedKryo.td2
-rw-r--r--lib/Target/AArch64/AArch64SchedM1.td2
-rw-r--r--lib/Target/AArch64/AArch64SchedThunderX.td2
-rw-r--r--lib/Target/AArch64/AArch64SchedThunderX2T99.td2
-rw-r--r--lib/Target/AArch64/AArch64StorePairSuppress.cpp2
-rw-r--r--lib/Target/AArch64/AArch64VectorByElementOpt.cpp2
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp266
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp24
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp20
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h3
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp8
-rw-r--r--lib/Target/AArch64/SVEInstrFormats.td41
-rw-r--r--lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUFrameLowering.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp19
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp57
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h5
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPULibCalls.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPULibFunc.cpp29
-rw-r--r--lib/Target/AMDGPU/AMDGPULibFunc.h20
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h4
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp4
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp62
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h8
-rw-r--r--lib/Target/AMDGPU/VOP3Instructions.td2
-rw-r--r--lib/Target/ARC/ARCBranchFinalize.cpp2
-rw-r--r--lib/Target/ARC/ARCFrameLowering.h2
-rw-r--r--lib/Target/ARC/ARCInstrInfo.h2
-rw-r--r--lib/Target/ARC/ARCRegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h2
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMCallLowering.cpp12
-rw-r--r--lib/Target/ARM/ARMCallingConv.h2
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h2
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp2
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp71
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp6
-rw-r--r--lib/Target/ARM/ARMMacroFusion.cpp2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp2
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp2
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.cpp2
-rw-r--r--lib/Target/AVR/AVRFrameLowering.h2
-rw-r--r--lib/Target/AVR/AVRInstrInfo.h2
-rw-r--r--lib/Target/AVR/AVRRegisterInfo.cpp2
-rw-r--r--lib/Target/BPF/BPFFrameLowering.h2
-rw-r--r--lib/Target/BPF/BPFInstrInfo.h2
-rw-r--r--lib/Target/BPF/BPFRegisterInfo.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonCFGOptimizer.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonFixupHwLoops.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h2
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h2
-rw-r--r--lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp69
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.h2
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td134
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp15
-rw-r--r--lib/Target/Hexagon/RDFGraph.cpp2
-rw-r--r--lib/Target/Lanai/LanaiDelaySlotFiller.cpp2
-rw-r--r--lib/Target/Lanai/LanaiFrameLowering.h2
-rw-r--r--lib/Target/Lanai/LanaiInstrInfo.h2
-rw-r--r--lib/Target/Lanai/LanaiMemAluCombiner.cpp2
-rw-r--r--lib/Target/Lanai/LanaiRegisterInfo.cpp4
-rw-r--r--lib/Target/Lanai/LanaiSubtarget.h2
-rw-r--r--lib/Target/Lanai/LanaiTargetMachine.h2
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h2
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h2
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h2
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp23
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h3
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp8
-rw-r--r--lib/Target/Mips/MicroMips32r6InstrFormats.td15
-rw-r--r--lib/Target/Mips/MicroMips32r6InstrInfo.td11
-rw-r--r--lib/Target/Mips/MicroMips64r6InstrInfo.td7
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td15
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp2
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp2
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp4
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td6
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp2
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp35
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h2
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp6
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td26
-rw-r--r--lib/Target/Mips/MipsOptimizePICCall.cpp2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td1
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp10
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp58
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp2
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp4
-rw-r--r--lib/Target/Mips/MipsScheduleGeneric.td1
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h2
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp1
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h2
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td13
-rw-r--r--lib/Target/NVPTX/NVPTXPeephole.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h2
-rw-r--r--lib/Target/Nios2/Nios2FrameLowering.h2
-rw-r--r--lib/Target/Nios2/Nios2InstrInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCBranchCoalescing.cpp4
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h2
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp164
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td7
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td7
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp4
-rw-r--r--lib/Target/RISCV/RISCV.h12
-rw-r--r--lib/Target/RISCV/RISCV.td4
-rw-r--r--lib/Target/RISCV/RISCVAsmPrinter.cpp7
-rw-r--r--lib/Target/RISCV/RISCVCallingConv.td3
-rw-r--r--lib/Target/RISCV/RISCVFrameLowering.h8
-rw-r--r--lib/Target/RISCV/RISCVISelLowering.cpp161
-rw-r--r--lib/Target/RISCV/RISCVISelLowering.h6
-rw-r--r--lib/Target/RISCV/RISCVInstrInfo.cpp48
-rw-r--r--lib/Target/RISCV/RISCVInstrInfo.h18
-rw-r--r--lib/Target/RISCV/RISCVInstrInfo.td120
-rw-r--r--lib/Target/RISCV/RISCVMCInstLower.cpp83
-rw-r--r--lib/Target/RISCV/RISCVRegisterInfo.cpp41
-rw-r--r--lib/Target/RISCV/RISCVRegisterInfo.h5
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp2
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h2
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h2
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp2
-rw-r--r--lib/Target/Sparc/SparcSubtarget.h2
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.h2
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h2
-rw-r--r--lib/Target/SystemZ/SystemZLDCleanup.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h1
-rw-r--r--lib/Target/TargetMachine.cpp7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp11
-rw-r--r--lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp20
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFastISel.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFrameLowering.h2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.h2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp4
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp6
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp6
-rw-r--r--lib/Target/X86/X86.td14
-rw-r--r--lib/Target/X86/X86CallFrameOptimization.cpp2
-rw-r--r--lib/Target/X86/X86CallLowering.cpp2
-rw-r--r--lib/Target/X86/X86CmovConversion.cpp2
-rw-r--r--lib/Target/X86/X86EvexToVex.cpp20
-rw-r--r--lib/Target/X86/X86FixupBWInsts.cpp2
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp2
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp2
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp48
-rw-r--r--lib/Target/X86/X86FrameLowering.h6
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp12
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp305
-rw-r--r--lib/Target/X86/X86ISelLowering.h12
-rw-r--r--lib/Target/X86/X86InstrAVX512.td331
-rw-r--r--lib/Target/X86/X86InstrFMA.td80
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td22
-rw-r--r--lib/Target/X86/X86InstrInfo.h2
-rw-r--r--lib/Target/X86/X86InstrInfo.td1
-rw-r--r--lib/Target/X86/X86InstrSSE.td104
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h78
-rw-r--r--lib/Target/X86/X86LegalizerInfo.cpp66
-rw-r--r--lib/Target/X86/X86MacroFusion.cpp2
-rw-r--r--lib/Target/X86/X86PadShortFunction.cpp2
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp4
-rw-r--r--lib/Target/X86/X86Subtarget.cpp2
-rw-r--r--lib/Target/X86/X86Subtarget.h2
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp7
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp7
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp2
-rw-r--r--lib/Target/X86/X86WinAllocaExpander.cpp2
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.h2
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h2
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp2
214 files changed, 2866 insertions, 897 deletions
diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp
index e6afb42440a..7de5d0ef66b 100644
--- a/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -22,9 +22,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h
index bc44bc5f246..461c01318d4 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/lib/Target/AArch64/AArch64CallingConvention.h
@@ -19,8 +19,8 @@
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/CallingConv.h"
-#include "llvm/Target/TargetInstrInfo.h"
namespace {
using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp
index 51700f90597..2793481a0c1 100644
--- a/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -34,9 +34,9 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
diff --git a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index 8bbef44a2e6..c3a354cb01d 100644
--- a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -73,11 +73,11 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
#include <cstdlib>
diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 9eda56c825a..33e0f5de5fd 100644
--- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -31,10 +31,10 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 5d8b4b69593..d182c812189 100644
--- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -20,9 +20,9 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 8ebcaff1358..809e4a77fad 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -110,6 +110,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -121,7 +122,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index c351efb0c39..55a256867fa 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -14,7 +14,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
diff --git a/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
index 7d2cfbeff38..39f50ade747 100644
--- a/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
+++ b/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
@@ -14,19 +14,21 @@
namespace llvm {
RegisterBankInfo::PartialMapping AArch64GenRegisterBankInfo::PartMappings[]{
/* StartIdx, Length, RegBank */
- // 0: FPR 32-bit value.
+ // 0: FPR 16-bit value.
+ {0, 16, AArch64::FPRRegBank},
+ // 1: FPR 32-bit value.
{0, 32, AArch64::FPRRegBank},
- // 1: FPR 64-bit value.
+ // 2: FPR 64-bit value.
{0, 64, AArch64::FPRRegBank},
- // 2: FPR 128-bit value.
+ // 3: FPR 128-bit value.
{0, 128, AArch64::FPRRegBank},
- // 3: FPR 256-bit value.
+ // 4: FPR 256-bit value.
{0, 256, AArch64::FPRRegBank},
- // 4: FPR 512-bit value.
+ // 5: FPR 512-bit value.
{0, 512, AArch64::FPRRegBank},
- // 5: GPR 32-bit value.
+ // 6: GPR 32-bit value.
{0, 32, AArch64::GPRRegBank},
- // 6: GPR 64-bit value.
+ // 7: GPR 64-bit value.
{0, 64, AArch64::GPRRegBank},
};
@@ -37,58 +39,77 @@ RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{
{nullptr, 0},
// 3-operands instructions (all binary operations should end up with one of
// those mapping).
- // 1: FPR 32-bit value. <-- This must match First3OpsIdx.
+ // 1: FPR 16-bit value. <-- This must match First3OpsIdx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+ // 4: FPR 32-bit value. <-- This must match First3OpsIdx.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
- // 4: FPR 64-bit value.
+ // 7: FPR 64-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
- // 7: FPR 128-bit value.
+ // 10: FPR 128-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
- // 10: FPR 256-bit value.
+ // 13: FPR 256-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
- // 13: FPR 512-bit value.
+ // 16: FPR 512-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
- // 16: GPR 32-bit value.
+ // 19: GPR 32-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
- // 19: GPR 64-bit value. <-- This must match Last3OpsIdx.
+ // 22: GPR 64-bit value. <-- This must match Last3OpsIdx.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
// Cross register bank copies.
- // 22: FPR 32-bit value to GPR 32-bit value. <-- This must match
+ // 25: FPR 16-bit value to GPR 16-bit (invalid). <-- This must match
// FirstCrossRegCpyIdx.
+ {nullptr, 1},
+ {nullptr, 1},
+ // 27: FPR 32-bit value to GPR 32-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
- // 24: FPR 64-bit value to GPR 64-bit value.
+ // 29: FPR 64-bit value to GPR 64-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
- // 26: FPR 128-bit value to GPR 128-bit value (invalid)
+ // 31: FPR 128-bit value to GPR 128-bit value (invalid)
{nullptr, 1},
{nullptr, 1},
- // 28: FPR 256-bit value to GPR 256-bit value (invalid)
+ // 33: FPR 256-bit value to GPR 256-bit value (invalid)
{nullptr, 1},
{nullptr, 1},
- // 30: FPR 512-bit value to GPR 512-bit value (invalid)
+ // 35: FPR 512-bit value to GPR 512-bit value (invalid)
{nullptr, 1},
{nullptr, 1},
- // 32: GPR 32-bit value to FPR 32-bit value.
+ // 37: GPR 32-bit value to FPR 32-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
- // 34: GPR 64-bit value to FPR 64-bit value. <-- This must match
+ // 39: GPR 64-bit value to FPR 64-bit value. <-- This must match
// LastCrossRegCpyIdx.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ // 41: FPExt: 16 to 32. <-- This must match FPExt16To32Idx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+ // 43: FPExt: 16 to 32. <-- This must match FPExt16To64Idx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+ // 45: FPExt: 32 to 64. <-- This must match FPExt32To64Idx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ // 47: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
};
bool AArch64GenRegisterBankInfo::checkPartialMap(unsigned Idx,
@@ -145,16 +166,18 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
return -1;
}
if (RBIdx == PMI_FirstFPR) {
- if (Size <= 32)
+ if (Size <= 16)
return 0;
- if (Size <= 64)
+ if (Size <= 32)
return 1;
- if (Size <= 128)
+ if (Size <= 64)
return 2;
- if (Size <= 256)
+ if (Size <= 128)
return 3;
- if (Size <= 512)
+ if (Size <= 256)
return 4;
+ if (Size <= 512)
+ return 5;
return -1;
}
return -1;
@@ -206,4 +229,35 @@ AArch64GenRegisterBankInfo::getCopyMapping(unsigned DstBankID,
ValMappingIdx <= LastCrossRegCpyIdx && "Mapping out of bound");
return &ValMappings[ValMappingIdx];
}
+
+const RegisterBankInfo::ValueMapping *
+AArch64GenRegisterBankInfo::getFPExtMapping(unsigned DstSize,
+ unsigned SrcSize) {
+ // We support:
+ // - For Scalar:
+ // - 16 to 32.
+ // - 16 to 64.
+ // - 32 to 64.
+ // => FPR 16 to FPR 32|64
+ // => FPR 32 to FPR 64
+ // - For vectors:
+ // - v4f16 to v4f32
+ // - v2f32 to v2f64
+ // => FPR 64 to FPR 128
+
+ // Check that we have been asked sensible sizes.
+ if (SrcSize == 16) {
+ assert((DstSize == 32 || DstSize == 64) && "Unexpected half extension");
+ if (DstSize == 32)
+ return &ValMappings[FPExt16To32Idx];
+ return &ValMappings[FPExt16To64Idx];
+ }
+
+ if (SrcSize == 32) {
+ assert(DstSize == 64 && "Unexpected float extension");
+ return &ValMappings[FPExt32To64Idx];
+ }
+ assert((SrcSize == 64 || DstSize == 128) && "Unexpected vector extension");
+ return &ValMappings[FPExt64To128Idx];
+}
} // End llvm namespace.
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index bec872ae8c0..81dac1be56c 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -42,6 +42,7 @@
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
@@ -71,7 +72,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetCallingConv.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
@@ -4981,7 +4981,7 @@ static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
// the initial estimate is 2^-8. Thus the number of extra steps to refine
// the result for float (23 mantissa bits) is 2 and for double (52
// mantissa bits) is 3.
- ExtraSteps = VT == MVT::f64 ? 3 : 2;
+ ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 24758e97888..2f10bef1e47 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -17,7 +17,7 @@
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "AArch64GenInstrInfo.inc"
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index eabbc05a033..e014d5bd569 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -335,6 +335,7 @@ let RecomputePerFunction = 1 in {
}
include "AArch64InstrFormats.td"
+include "SVEInstrFormats.td"
//===----------------------------------------------------------------------===//
@@ -6275,3 +6276,4 @@ def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
(TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
include "AArch64InstrAtomics.td"
+include "AArch64SVEInstrInfo.td"
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 2d45be37ca7..3a456255224 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -23,6 +23,110 @@
using namespace llvm;
+/// FIXME: The following static functions are SizeChangeStrategy functions
+/// that are meant to temporarily mimic the behaviour of the old legalization
+/// based on doubling/halving non-legal types as closely as possible. This is
+/// not entirly possible as only legalizing the types that are exactly a power
+/// of 2 times the size of the legal types would require specifying all those
+/// sizes explicitly.
+/// In practice, not specifying those isn't a problem, and the below functions
+/// should disappear quickly as we add support for legalizing non-power-of-2
+/// sized types further.
+static void
+addAndInterleaveWithUnsupported(LegalizerInfo::SizeAndActionsVec &result,
+ const LegalizerInfo::SizeAndActionsVec &v) {
+ for (unsigned i = 0; i < v.size(); ++i) {
+ result.push_back(v[i]);
+ if (i + 1 < v[i].first && i + 1 < v.size() &&
+ v[i + 1].first != v[i].first + 1)
+ result.push_back({v[i].first + 1, LegalizerInfo::Unsupported});
+ }
+}
+
+static LegalizerInfo::SizeAndActionsVec
+widen_1_narrow_128_ToLargest(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 2);
+ LegalizerInfo::SizeAndActionsVec result = {{1, LegalizerInfo::WidenScalar},
+ {2, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ assert(Largest + 1 < 128);
+ result.push_back({Largest + 1, LegalizerInfo::Unsupported});
+ result.push_back({128, LegalizerInfo::NarrowScalar});
+ result.push_back({129, LegalizerInfo::Unsupported});
+ return result;
+}
+
+static LegalizerInfo::SizeAndActionsVec
+widen_16(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 17);
+ LegalizerInfo::SizeAndActionsVec result = {{1, LegalizerInfo::Unsupported},
+ {16, LegalizerInfo::WidenScalar},
+ {17, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ result.push_back({Largest + 1, LegalizerInfo::Unsupported});
+ return result;
+}
+
+static LegalizerInfo::SizeAndActionsVec
+widen_1_8(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 9);
+ LegalizerInfo::SizeAndActionsVec result = {
+ {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported},
+ {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ result.push_back({Largest + 1, LegalizerInfo::Unsupported});
+ return result;
+}
+
+static LegalizerInfo::SizeAndActionsVec
+widen_1_8_16(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 17);
+ LegalizerInfo::SizeAndActionsVec result = {
+ {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported},
+ {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported},
+ {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ result.push_back({Largest + 1, LegalizerInfo::Unsupported});
+ return result;
+}
+
+static LegalizerInfo::SizeAndActionsVec
+widen_1_8_16_narrowToLargest(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 17);
+ LegalizerInfo::SizeAndActionsVec result = {
+ {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported},
+ {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported},
+ {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ result.push_back({Largest + 1, LegalizerInfo::NarrowScalar});
+ return result;
+}
+
+static LegalizerInfo::SizeAndActionsVec
+widen_1_8_16_32(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 33);
+ LegalizerInfo::SizeAndActionsVec result = {
+ {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported},
+ {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported},
+ {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported},
+ {32, LegalizerInfo::WidenScalar}, {33, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ result.push_back({Largest + 1, LegalizerInfo::Unsupported});
+ return result;
+}
+
AArch64LegalizerInfo::AArch64LegalizerInfo() {
using namespace TargetOpcode;
const LLT p0 = LLT::pointer(0, 64);
@@ -42,8 +146,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
for (auto Ty : {s16, s32, s64, p0})
setAction({G_PHI, Ty}, Legal);
- for (auto Ty : {s1, s8})
- setAction({G_PHI, Ty}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(G_PHI, 0, widen_1_8);
for (auto Ty : { s32, s64 })
setAction({G_BSWAP, Ty}, Legal);
@@ -54,15 +157,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
for (auto Ty : {s32, s64, v2s32, v4s32, v2s64})
setAction({BinOp, Ty}, Legal);
- for (auto Ty : {s1, s8, s16})
- setAction({BinOp, Ty}, WidenScalar);
+ if (BinOp != G_ADD)
+ setLegalizeScalarToDifferentSizeStrategy(BinOp, 0,
+ widen_1_8_16_narrowToLargest);
}
setAction({G_GEP, p0}, Legal);
setAction({G_GEP, 1, s64}, Legal);
- for (auto Ty : {s1, s8, s16, s32})
- setAction({G_GEP, 1, Ty}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(G_GEP, 1, widen_1_8_16_32);
setAction({G_PTR_MASK, p0}, Legal);
@@ -70,16 +173,17 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
for (auto Ty : {s32, s64})
setAction({BinOp, Ty}, Legal);
- for (auto Ty : {s1, s8, s16})
- setAction({BinOp, Ty}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(BinOp, 0, widen_1_8_16);
}
for (unsigned BinOp : {G_SREM, G_UREM})
for (auto Ty : { s1, s8, s16, s32, s64 })
setAction({BinOp, Ty}, Lower);
- for (unsigned Op : {G_SMULO, G_UMULO})
- setAction({Op, s64}, Lower);
+ for (unsigned Op : {G_SMULO, G_UMULO}) {
+ setAction({Op, 0, s64}, Lower);
+ setAction({Op, 1, s1}, Legal);
+ }
for (unsigned Op : {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_SMULH, G_UMULH}) {
for (auto Ty : { s32, s64 })
@@ -101,8 +205,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_INSERT, Ty}, Legal);
setAction({G_INSERT, 1, Ty}, Legal);
}
+ setLegalizeScalarToDifferentSizeStrategy(G_INSERT, 0,
+ widen_1_8_16_narrowToLargest);
for (auto Ty : {s1, s8, s16}) {
- setAction({G_INSERT, Ty}, WidenScalar);
setAction({G_INSERT, 1, Ty}, Legal);
// FIXME: Can't widen the sources because that violates the constraints on
// G_INSERT (It seems entirely reasonable that inputs shouldn't overlap).
@@ -118,7 +223,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
for (auto Ty : {s8, s16, s32, s64, p0, v2s32})
setAction({MemOp, Ty}, Legal);
- setAction({MemOp, s1}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(MemOp, 0,
+ widen_1_narrow_128_ToLargest);
// And everything's fine in addrspace 0.
setAction({MemOp, 1, p0}, Legal);
@@ -132,20 +238,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_CONSTANT, p0}, Legal);
- for (auto Ty : {s1, s8, s16})
- setAction({TargetOpcode::G_CONSTANT, Ty}, WidenScalar);
-
- setAction({TargetOpcode::G_FCONSTANT, s16}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(G_CONSTANT, 0, widen_1_8_16);
+ setLegalizeScalarToDifferentSizeStrategy(G_FCONSTANT, 0, widen_16);
setAction({G_ICMP, 1, s32}, Legal);
setAction({G_ICMP, 1, s64}, Legal);
setAction({G_ICMP, 1, p0}, Legal);
- for (auto Ty : {s1, s8, s16}) {
- setAction({G_ICMP, Ty}, WidenScalar);
- setAction({G_FCMP, Ty}, WidenScalar);
- setAction({G_ICMP, 1, Ty}, WidenScalar);
- }
+ setLegalizeScalarToDifferentSizeStrategy(G_ICMP, 0, widen_1_8_16);
+ setLegalizeScalarToDifferentSizeStrategy(G_FCMP, 0, widen_1_8_16);
+ setLegalizeScalarToDifferentSizeStrategy(G_ICMP, 1, widen_1_8_16);
setAction({G_ICMP, s32}, Legal);
setAction({G_FCMP, s32}, Legal);
@@ -159,12 +261,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_ANYEXT, Ty}, Legal);
}
- for (auto Ty : { s1, s8, s16, s32 }) {
- setAction({G_ZEXT, 1, Ty}, Legal);
- setAction({G_SEXT, 1, Ty}, Legal);
- setAction({G_ANYEXT, 1, Ty}, Legal);
- }
-
// FP conversions
for (auto Ty : { s16, s32 }) {
setAction({G_FPTRUNC, Ty}, Legal);
@@ -176,12 +272,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_FPEXT, Ty}, Legal);
}
- for (auto Ty : { s1, s8, s16, s32 })
- setAction({G_TRUNC, Ty}, Legal);
-
- for (auto Ty : { s8, s16, s32, s64 })
- setAction({G_TRUNC, 1, Ty}, Legal);
-
// Conversions
for (auto Ty : { s32, s64 }) {
setAction({G_FPTOSI, 0, Ty}, Legal);
@@ -189,12 +279,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_SITOFP, 1, Ty}, Legal);
setAction({G_UITOFP, 1, Ty}, Legal);
}
- for (auto Ty : { s1, s8, s16 }) {
- setAction({G_FPTOSI, 0, Ty}, WidenScalar);
- setAction({G_FPTOUI, 0, Ty}, WidenScalar);
- setAction({G_SITOFP, 1, Ty}, WidenScalar);
- setAction({G_UITOFP, 1, Ty}, WidenScalar);
- }
+ setLegalizeScalarToDifferentSizeStrategy(G_FPTOSI, 0, widen_1_8_16);
+ setLegalizeScalarToDifferentSizeStrategy(G_FPTOUI, 0, widen_1_8_16);
+ setLegalizeScalarToDifferentSizeStrategy(G_SITOFP, 1, widen_1_8_16);
+ setLegalizeScalarToDifferentSizeStrategy(G_UITOFP, 1, widen_1_8_16);
for (auto Ty : { s32, s64 }) {
setAction({G_FPTOSI, 1, Ty}, Legal);
@@ -209,8 +297,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_BRINDIRECT, p0}, Legal);
// Select
- for (auto Ty : {s1, s8, s16})
- setAction({G_SELECT, Ty}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(G_SELECT, 0, widen_1_8_16);
for (auto Ty : {s32, s64, p0})
setAction({G_SELECT, Ty}, Legal);
diff --git a/lib/Target/AArch64/AArch64MacroFusion.cpp b/lib/Target/AArch64/AArch64MacroFusion.cpp
index bd5211d4ff5..bd4bdaa6d12 100644
--- a/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -15,7 +15,7 @@
#include "AArch64MacroFusion.h"
#include "AArch64Subtarget.h"
#include "llvm/CodeGen/MacroFusion.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 391e8ed633d..83bf493c9f0 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -87,9 +87,9 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
{PMI_GPR32, PMI_GPR64}) &&
"PartialMappingIdx's are incorrectly ordered");
- assert(checkPartialMappingIdx(
- PMI_FirstFPR, PMI_LastFPR,
- {PMI_FPR32, PMI_FPR64, PMI_FPR128, PMI_FPR256, PMI_FPR512}) &&
+ assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
+ {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
+ PMI_FPR256, PMI_FPR512}) &&
"PartialMappingIdx's are incorrectly ordered");
// Now, the content.
// Check partial mapping.
@@ -102,6 +102,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
+ CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
@@ -121,6 +122,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
CHECK_VALUEMAP(GPR, 32);
CHECK_VALUEMAP(GPR, 64);
+ CHECK_VALUEMAP(FPR, 16);
CHECK_VALUEMAP(FPR, 32);
CHECK_VALUEMAP(FPR, 64);
CHECK_VALUEMAP(FPR, 128);
@@ -173,6 +175,30 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
+#define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \
+ do { \
+ unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \
+ unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \
+ (void)PartialMapDstIdx; \
+ (void)PartialMapSrcIdx; \
+ const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \
+ (void)Map; \
+ assert(Map[0].BreakDown == \
+ &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
+ Map[0].NumBreakDowns == 1 && "FPR" #DstSize \
+ " Dst is incorrectly initialized"); \
+ assert(Map[1].BreakDown == \
+ &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
+ Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \
+ " Src is incorrectly initialized"); \
+ \
+ } while (false)
+
+ CHECK_VALUEMAP_FPEXT(32, 16);
+ CHECK_VALUEMAP_FPEXT(64, 16);
+ CHECK_VALUEMAP_FPEXT(64, 32);
+ CHECK_VALUEMAP_FPEXT(128, 64);
+
assert(verify(TRI) && "Invalid register bank information");
}
@@ -453,6 +479,14 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
return getSameKindOfOperandsMapping(MI);
+ case TargetOpcode::G_FPEXT: {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ return getInstructionMapping(
+ DefaultMappingID, /*Cost*/ 1,
+ getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
+ /*NumOperands*/ 2);
+ }
case TargetOpcode::COPY: {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(1).getReg();
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.h b/lib/Target/AArch64/AArch64RegisterBankInfo.h
index 6d74a47095a..008221dbef5 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.h
@@ -25,10 +25,10 @@ class TargetRegisterInfo;
class AArch64GenRegisterBankInfo : public RegisterBankInfo {
protected:
-
enum PartialMappingIdx {
PMI_None = -1,
- PMI_FPR32 = 1,
+ PMI_FPR16 = 1,
+ PMI_FPR32,
PMI_FPR64,
PMI_FPR128,
PMI_FPR256,
@@ -37,7 +37,7 @@ protected:
PMI_GPR64,
PMI_FirstGPR = PMI_GPR32,
PMI_LastGPR = PMI_GPR64,
- PMI_FirstFPR = PMI_FPR32,
+ PMI_FirstFPR = PMI_FPR16,
PMI_LastFPR = PMI_FPR512,
PMI_Min = PMI_FirstFPR,
};
@@ -49,11 +49,15 @@ protected:
enum ValueMappingIdx {
InvalidIdx = 0,
First3OpsIdx = 1,
- Last3OpsIdx = 19,
+ Last3OpsIdx = 22,
DistanceBetweenRegBanks = 3,
- FirstCrossRegCpyIdx = 22,
- LastCrossRegCpyIdx = 34,
- DistanceBetweenCrossRegCpy = 2
+ FirstCrossRegCpyIdx = 25,
+ LastCrossRegCpyIdx = 39,
+ DistanceBetweenCrossRegCpy = 2,
+ FPExt16To32Idx = 41,
+ FPExt16To64Idx = 43,
+ FPExt32To64Idx = 45,
+ FPExt64To128Idx = 47,
};
static bool checkPartialMap(unsigned Idx, unsigned ValStartIdx,
@@ -82,6 +86,15 @@ protected:
static const RegisterBankInfo::ValueMapping *
getCopyMapping(unsigned DstBankID, unsigned SrcBankID, unsigned Size);
+ /// Get the instruction mapping for G_FPEXT.
+ ///
+ /// \pre (DstSize, SrcSize) pair is one of the following:
+ /// (32, 16), (64, 16), (64, 32), (128, 64)
+ ///
+ /// \return An InstructionMapping with statically allocated OperandsMapping.
+ static const RegisterBankInfo::ValueMapping *
+ getFPExtMapping(unsigned DstSize, unsigned SrcSize);
+
#define GET_TARGET_REGBANK_CLASS
#include "AArch64GenRegisterBank.inc"
};
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 91b1481f5ef..1059bc37c8f 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -26,7 +26,7 @@
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index ee5d3547aaa..a9fb0200d80 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -32,6 +32,12 @@ let Namespace = "AArch64" in {
def qsub : SubRegIndex<64>;
def sube64 : SubRegIndex<64>;
def subo64 : SubRegIndex<64>;
+ // SVE
+ def zsub : SubRegIndex<128>;
+ // Note: zsub_hi should never be used directly because it represents
+ // the scalable part of the SVE vector and cannot be manipulated as a
+ // subvector in the same way the lower 128bits can.
+ def zsub_hi : SubRegIndex<128>;
// Note: Code depends on these having consecutive numbers
def dsub0 : SubRegIndex<64>;
def dsub1 : SubRegIndex<64>;
@@ -460,11 +466,11 @@ def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> {
// assmebler matching.
def VectorReg64AsmOperand : AsmOperandClass {
let Name = "VectorReg64";
- let PredicateMethod = "isVectorReg";
+ let PredicateMethod = "isNeonVectorReg";
}
def VectorReg128AsmOperand : AsmOperandClass {
let Name = "VectorReg128";
- let PredicateMethod = "isVectorReg";
+ let PredicateMethod = "isNeonVectorReg";
}
def V64 : RegisterOperand<FPR64, "printVRegOperand"> {
@@ -475,7 +481,10 @@ def V128 : RegisterOperand<FPR128, "printVRegOperand"> {
let ParserMatchClass = VectorReg128AsmOperand;
}
-def VectorRegLoAsmOperand : AsmOperandClass { let Name = "VectorRegLo"; }
+def VectorRegLoAsmOperand : AsmOperandClass {
+ let Name = "VectorRegLo";
+ let PredicateMethod = "isNeonVectorRegLo";
+}
def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> {
let ParserMatchClass = VectorRegLoAsmOperand;
}
@@ -642,3 +651,119 @@ def XSeqPairClassOperand :
//===----- END: v8.1a atomic CASP register operands -----------------------===//
+
+// The part of SVE registers that don't overlap Neon registers.
+// These are only used as part of clobber lists.
+def Z0_HI : AArch64Reg<0, "z0_hi">;
+def Z1_HI : AArch64Reg<1, "z1_hi">;
+def Z2_HI : AArch64Reg<2, "z2_hi">;
+def Z3_HI : AArch64Reg<3, "z3_hi">;
+def Z4_HI : AArch64Reg<4, "z4_hi">;
+def Z5_HI : AArch64Reg<5, "z5_hi">;
+def Z6_HI : AArch64Reg<6, "z6_hi">;
+def Z7_HI : AArch64Reg<7, "z7_hi">;
+def Z8_HI : AArch64Reg<8, "z8_hi">;
+def Z9_HI : AArch64Reg<9, "z9_hi">;
+def Z10_HI : AArch64Reg<10, "z10_hi">;
+def Z11_HI : AArch64Reg<11, "z11_hi">;
+def Z12_HI : AArch64Reg<12, "z12_hi">;
+def Z13_HI : AArch64Reg<13, "z13_hi">;
+def Z14_HI : AArch64Reg<14, "z14_hi">;
+def Z15_HI : AArch64Reg<15, "z15_hi">;
+def Z16_HI : AArch64Reg<16, "z16_hi">;
+def Z17_HI : AArch64Reg<17, "z17_hi">;
+def Z18_HI : AArch64Reg<18, "z18_hi">;
+def Z19_HI : AArch64Reg<19, "z19_hi">;
+def Z20_HI : AArch64Reg<20, "z20_hi">;
+def Z21_HI : AArch64Reg<21, "z21_hi">;
+def Z22_HI : AArch64Reg<22, "z22_hi">;
+def Z23_HI : AArch64Reg<23, "z23_hi">;
+def Z24_HI : AArch64Reg<24, "z24_hi">;
+def Z25_HI : AArch64Reg<25, "z25_hi">;
+def Z26_HI : AArch64Reg<26, "z26_hi">;
+def Z27_HI : AArch64Reg<27, "z27_hi">;
+def Z28_HI : AArch64Reg<28, "z28_hi">;
+def Z29_HI : AArch64Reg<29, "z29_hi">;
+def Z30_HI : AArch64Reg<30, "z30_hi">;
+def Z31_HI : AArch64Reg<31, "z31_hi">;
+
+// SVE variable-size vector registers
+let SubRegIndices = [zsub,zsub_hi] in {
+def Z0 : AArch64Reg<0, "z0", [Q0, Z0_HI]>, DwarfRegNum<[96]>;
+def Z1 : AArch64Reg<1, "z1", [Q1, Z1_HI]>, DwarfRegNum<[97]>;
+def Z2 : AArch64Reg<2, "z2", [Q2, Z2_HI]>, DwarfRegNum<[98]>;
+def Z3 : AArch64Reg<3, "z3", [Q3, Z3_HI]>, DwarfRegNum<[99]>;
+def Z4 : AArch64Reg<4, "z4", [Q4, Z4_HI]>, DwarfRegNum<[100]>;
+def Z5 : AArch64Reg<5, "z5", [Q5, Z5_HI]>, DwarfRegNum<[101]>;
+def Z6 : AArch64Reg<6, "z6", [Q6, Z6_HI]>, DwarfRegNum<[102]>;
+def Z7 : AArch64Reg<7, "z7", [Q7, Z7_HI]>, DwarfRegNum<[103]>;
+def Z8 : AArch64Reg<8, "z8", [Q8, Z8_HI]>, DwarfRegNum<[104]>;
+def Z9 : AArch64Reg<9, "z9", [Q9, Z9_HI]>, DwarfRegNum<[105]>;
+def Z10 : AArch64Reg<10, "z10", [Q10, Z10_HI]>, DwarfRegNum<[106]>;
+def Z11 : AArch64Reg<11, "z11", [Q11, Z11_HI]>, DwarfRegNum<[107]>;
+def Z12 : AArch64Reg<12, "z12", [Q12, Z12_HI]>, DwarfRegNum<[108]>;
+def Z13 : AArch64Reg<13, "z13", [Q13, Z13_HI]>, DwarfRegNum<[109]>;
+def Z14 : AArch64Reg<14, "z14", [Q14, Z14_HI]>, DwarfRegNum<[110]>;
+def Z15 : AArch64Reg<15, "z15", [Q15, Z15_HI]>, DwarfRegNum<[111]>;
+def Z16 : AArch64Reg<16, "z16", [Q16, Z16_HI]>, DwarfRegNum<[112]>;
+def Z17 : AArch64Reg<17, "z17", [Q17, Z17_HI]>, DwarfRegNum<[113]>;
+def Z18 : AArch64Reg<18, "z18", [Q18, Z18_HI]>, DwarfRegNum<[114]>;
+def Z19 : AArch64Reg<19, "z19", [Q19, Z19_HI]>, DwarfRegNum<[115]>;
+def Z20 : AArch64Reg<20, "z20", [Q20, Z20_HI]>, DwarfRegNum<[116]>;
+def Z21 : AArch64Reg<21, "z21", [Q21, Z21_HI]>, DwarfRegNum<[117]>;
+def Z22 : AArch64Reg<22, "z22", [Q22, Z22_HI]>, DwarfRegNum<[118]>;
+def Z23 : AArch64Reg<23, "z23", [Q23, Z23_HI]>, DwarfRegNum<[119]>;
+def Z24 : AArch64Reg<24, "z24", [Q24, Z24_HI]>, DwarfRegNum<[120]>;
+def Z25 : AArch64Reg<25, "z25", [Q25, Z25_HI]>, DwarfRegNum<[121]>;
+def Z26 : AArch64Reg<26, "z26", [Q26, Z26_HI]>, DwarfRegNum<[122]>;
+def Z27 : AArch64Reg<27, "z27", [Q27, Z27_HI]>, DwarfRegNum<[123]>;
+def Z28 : AArch64Reg<28, "z28", [Q28, Z28_HI]>, DwarfRegNum<[124]>;
+def Z29 : AArch64Reg<29, "z29", [Q29, Z29_HI]>, DwarfRegNum<[125]>;
+def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>;
+def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>;
+}
+
+class SVERegOp <string Suffix, AsmOperandClass C,
+ RegisterClass RC> : RegisterOperand<RC> {
+ let PrintMethod = !if(!eq(Suffix, ""),
+ "printSVERegOp<>",
+ "printSVERegOp<'" # Suffix # "'>");
+ let ParserMatchClass = C;
+}
+
+class ZPRRegOp <string Suffix, AsmOperandClass C,
+ RegisterClass RC> : SVERegOp<Suffix, C, RC> {}
+
+//******************************************************************************
+
+// SVE vector register class
+def ZPR : RegisterClass<"AArch64",
+ [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
+ nxv2f16, nxv4f16, nxv8f16,
+ nxv1f32, nxv2f32, nxv4f32,
+ nxv1f64, nxv2f64],
+ 128, (sequence "Z%u", 0, 31)> {
+ let Size = 128;
+}
+
+class ZPRAsmOperand <string name, int Width>: AsmOperandClass {
+ let Name = "SVE" # name # "Reg";
+ let PredicateMethod = "isSVEDataVectorRegOfWidth<" # Width # ">";
+ let RenderMethod = "addRegOperands";
+ let ParserMethod = "tryParseSVEDataVector<"
+ # !if(!eq(Width, -1), "false", "true") # ">";
+}
+
+def ZPRAsmOpAny : ZPRAsmOperand<"VectorAny", -1>;
+def ZPRAsmOp8 : ZPRAsmOperand<"VectorB", 8>;
+def ZPRAsmOp16 : ZPRAsmOperand<"VectorH", 16>;
+def ZPRAsmOp32 : ZPRAsmOperand<"VectorS", 32>;
+def ZPRAsmOp64 : ZPRAsmOperand<"VectorD", 64>;
+def ZPRAsmOp128 : ZPRAsmOperand<"VectorQ", 128>;
+
+def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ZPR>;
+def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ZPR>;
+def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ZPR>;
+def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ZPR>;
+def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ZPR>;
+def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ZPR>;
diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td
new file mode 100644
index 00000000000..7da0b28d22d
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -0,0 +1,17 @@
+//=- AArch64SVEInstrInfo.td - AArch64 SVE Instructions -*- tablegen -*-----=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Vector Extension (SVE) Instruction definitions.
+//
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasSVE] in {
+ defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add">;
+ defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub">;
+}
diff --git a/lib/Target/AArch64/AArch64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td
index 18d000ace94..90ebd78f4ab 100644
--- a/lib/Target/AArch64/AArch64SchedA53.td
+++ b/lib/Target/AArch64/AArch64SchedA53.td
@@ -26,6 +26,8 @@ def CortexA53Model : SchedMachineModel {
// Specification - Instruction Timings"
// v 1.0 Spreadsheet
let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
index 5d1608ef04a..ade03f23f8c 100644
--- a/lib/Target/AArch64/AArch64SchedA57.td
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -31,6 +31,8 @@ def CortexA57Model : SchedMachineModel {
// experiments and benchmarking data.
let LoopMicroOpBufferSize = 16;
let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SchedCyclone.td b/lib/Target/AArch64/AArch64SchedCyclone.td
index 9fd3ae6818e..7a474ba8ef9 100644
--- a/lib/Target/AArch64/AArch64SchedCyclone.td
+++ b/lib/Target/AArch64/AArch64SchedCyclone.td
@@ -18,6 +18,8 @@ def CycloneModel : SchedMachineModel {
let LoadLatency = 4; // Optimistic load latency.
let MispredictPenalty = 16; // 14-19 cycles are typical.
let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SchedFalkor.td b/lib/Target/AArch64/AArch64SchedFalkor.td
index 44fd94fc3d4..7277198b585 100644
--- a/lib/Target/AArch64/AArch64SchedFalkor.td
+++ b/lib/Target/AArch64/AArch64SchedFalkor.td
@@ -23,6 +23,8 @@ def FalkorModel : SchedMachineModel {
let LoadLatency = 3; // Optimistic load latency.
let MispredictPenalty = 11; // Minimum branch misprediction penalty.
let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SchedKryo.td b/lib/Target/AArch64/AArch64SchedKryo.td
index 4e491a04c78..ce2afd499af 100644
--- a/lib/Target/AArch64/AArch64SchedKryo.td
+++ b/lib/Target/AArch64/AArch64SchedKryo.td
@@ -27,6 +27,8 @@ def KryoModel : SchedMachineModel {
// experiments and benchmarking data.
let LoopMicroOpBufferSize = 16;
let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SchedM1.td b/lib/Target/AArch64/AArch64SchedM1.td
index 6133efed020..6c86fcdd29b 100644
--- a/lib/Target/AArch64/AArch64SchedM1.td
+++ b/lib/Target/AArch64/AArch64SchedM1.td
@@ -24,6 +24,8 @@ def ExynosM1Model : SchedMachineModel {
let LoadLatency = 4; // Optimistic load cases.
let MispredictPenalty = 14; // Minimum branch misprediction penalty.
let CompleteModel = 1; // Use the default model otherwise.
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SchedThunderX.td b/lib/Target/AArch64/AArch64SchedThunderX.td
index 3cdd2047fbb..585688aae27 100644
--- a/lib/Target/AArch64/AArch64SchedThunderX.td
+++ b/lib/Target/AArch64/AArch64SchedThunderX.td
@@ -25,6 +25,8 @@ def ThunderXT8XModel : SchedMachineModel {
let MispredictPenalty = 8; // Branch mispredict penalty.
let PostRAScheduler = 1; // Use PostRA scheduler.
let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
// Modeling each pipeline with BufferSize == 0 since T8X is in-order.
diff --git a/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index 4ab7555594a..fd60459382a 100644
--- a/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -25,6 +25,8 @@ def ThunderX2T99Model : SchedMachineModel {
let LoopMicroOpBufferSize = 32;
let PostRAScheduler = 1; // Using PostRA sched.
let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
// Define the issue ports.
diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index fe984ccbaf1..78fc322158b 100644
--- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -16,10 +16,10 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64VectorByElementOpt.cpp b/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
index f53af2315ec..2ff644d2bcd 100644
--- a/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
+++ b/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
@@ -33,11 +33,11 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <map>
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 1f06d4065b3..de048a24534 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -59,12 +59,14 @@ using namespace llvm;
namespace {
+enum class RegKind {Scalar, NeonVector, SVEDataVector};
+
class AArch64AsmParser : public MCTargetAsmParser {
private:
StringRef Mnemonic; ///< Instruction mnemonic.
// Map of register aliases registers via the .req directive.
- StringMap<std::pair<bool, unsigned>> RegisterReqs;
+ StringMap<std::pair<RegKind, unsigned>> RegisterReqs;
AArch64TargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
@@ -77,9 +79,10 @@ private:
void createSysAlias(uint16_t Encoding, OperandVector &Operands, SMLoc S);
AArch64CC::CondCode parseCondCodeString(StringRef Cond);
bool parseCondCode(OperandVector &Operands, bool invertCondCode);
- unsigned matchRegisterNameAlias(StringRef Name, bool isVector);
+ unsigned matchRegisterNameAlias(StringRef Name, RegKind Kind);
int tryParseRegister();
int tryMatchVectorRegister(StringRef &Kind, bool expected);
+ int tryParseSVEDataVectorRegister(const AsmToken &Tok, StringRef &Kind);
bool parseRegister(OperandVector &Operands);
bool parseSymbolicImmVal(const MCExpr *&ImmVal);
bool parseVectorList(OperandVector &Operands);
@@ -126,8 +129,10 @@ private:
OperandMatchResultTy tryParseFPImm(OperandVector &Operands);
OperandMatchResultTy tryParseAddSubImm(OperandVector &Operands);
OperandMatchResultTy tryParseGPR64sp0Operand(OperandVector &Operands);
- bool tryParseVectorRegister(OperandVector &Operands);
+ bool tryParseNeonVectorRegister(OperandVector &Operands);
OperandMatchResultTy tryParseGPRSeqPair(OperandVector &Operands);
+ template <bool ParseSuffix>
+ OperandMatchResultTy tryParseSVEDataVector(OperandVector &Operands);
public:
enum AArch64MatchResultTy {
@@ -194,7 +199,9 @@ private:
struct RegOp {
unsigned RegNum;
- bool isVector;
+ RegKind Kind;
+
+ int ElementWidth;
};
struct VectorListOp {
@@ -804,34 +811,50 @@ public:
return SysReg.PStateField != -1U;
}
- bool isReg() const override { return Kind == k_Register && !Reg.isVector; }
- bool isVectorReg() const { return Kind == k_Register && Reg.isVector; }
+ bool isReg() const override {
+ return Kind == k_Register && Reg.Kind == RegKind::Scalar;
+ }
+
+ bool isNeonVectorReg() const {
+ return Kind == k_Register && Reg.Kind == RegKind::NeonVector;
+ }
- bool isVectorRegLo() const {
- return Kind == k_Register && Reg.isVector &&
+ bool isNeonVectorRegLo() const {
+ return Kind == k_Register && Reg.Kind == RegKind::NeonVector &&
AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains(
Reg.RegNum);
}
+ template <unsigned Class = AArch64::ZPRRegClassID>
+ bool isSVEDataVectorReg() const {
+ return (Kind == k_Register && Reg.Kind == RegKind::SVEDataVector) &&
+ AArch64MCRegisterClasses[Class].contains(getReg());
+ }
+
+ template <int ElementWidth> bool isSVEDataVectorRegOfWidth() const {
+ return isSVEDataVectorReg() &&
+ (ElementWidth == -1 || Reg.ElementWidth == ElementWidth);
+ }
+
bool isGPR32as64() const {
- return Kind == k_Register && !Reg.isVector &&
+ return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum);
}
bool isWSeqPair() const {
- return Kind == k_Register && !Reg.isVector &&
+ return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID].contains(
Reg.RegNum);
}
bool isXSeqPair() const {
- return Kind == k_Register && !Reg.isVector &&
+ return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID].contains(
Reg.RegNum);
}
bool isGPR64sp0() const {
- return Kind == k_Register && !Reg.isVector &&
+ return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
AArch64MCRegisterClasses[AArch64::GPR64spRegClassID].contains(Reg.RegNum);
}
@@ -1564,10 +1587,22 @@ public:
}
static std::unique_ptr<AArch64Operand>
- CreateReg(unsigned RegNum, bool isVector, SMLoc S, SMLoc E, MCContext &Ctx) {
+ CreateReg(unsigned RegNum, RegKind Kind, SMLoc S, SMLoc E, MCContext &Ctx) {
auto Op = make_unique<AArch64Operand>(k_Register, Ctx);
Op->Reg.RegNum = RegNum;
- Op->Reg.isVector = isVector;
+ Op->Reg.Kind = Kind;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<AArch64Operand>
+ CreateReg(unsigned RegNum, RegKind Kind, unsigned ElementWidth,
+ SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Register, Ctx);
+ Op->Reg.RegNum = RegNum;
+ Op->Reg.ElementWidth = ElementWidth;
+ Op->Reg.Kind = Kind;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
@@ -1791,7 +1826,7 @@ static unsigned MatchRegisterName(StringRef Name);
/// }
-static unsigned matchVectorRegName(StringRef Name) {
+static unsigned MatchNeonVectorRegName(StringRef Name) {
return StringSwitch<unsigned>(Name.lower())
.Case("v0", AArch64::Q0)
.Case("v1", AArch64::Q1)
@@ -1853,6 +1888,57 @@ static bool isValidVectorKind(StringRef Name) {
.Default(false);
}
+static unsigned matchSVEDataVectorRegName(StringRef Name) {
+ return StringSwitch<unsigned>(Name.lower())
+ .Case("z0", AArch64::Z0)
+ .Case("z1", AArch64::Z1)
+ .Case("z2", AArch64::Z2)
+ .Case("z3", AArch64::Z3)
+ .Case("z4", AArch64::Z4)
+ .Case("z5", AArch64::Z5)
+ .Case("z6", AArch64::Z6)
+ .Case("z7", AArch64::Z7)
+ .Case("z8", AArch64::Z8)
+ .Case("z9", AArch64::Z9)
+ .Case("z10", AArch64::Z10)
+ .Case("z11", AArch64::Z11)
+ .Case("z12", AArch64::Z12)
+ .Case("z13", AArch64::Z13)
+ .Case("z14", AArch64::Z14)
+ .Case("z15", AArch64::Z15)
+ .Case("z16", AArch64::Z16)
+ .Case("z17", AArch64::Z17)
+ .Case("z18", AArch64::Z18)
+ .Case("z19", AArch64::Z19)
+ .Case("z20", AArch64::Z20)
+ .Case("z21", AArch64::Z21)
+ .Case("z22", AArch64::Z22)
+ .Case("z23", AArch64::Z23)
+ .Case("z24", AArch64::Z24)
+ .Case("z25", AArch64::Z25)
+ .Case("z26", AArch64::Z26)
+ .Case("z27", AArch64::Z27)
+ .Case("z28", AArch64::Z28)
+ .Case("z29", AArch64::Z29)
+ .Case("z30", AArch64::Z30)
+ .Case("z31", AArch64::Z31)
+ .Default(0);
+}
+
+static bool isValidSVEKind(StringRef Name) {
+ return StringSwitch<bool>(Name.lower())
+ .Case(".b", true)
+ .Case(".h", true)
+ .Case(".s", true)
+ .Case(".d", true)
+ .Case(".q", true)
+ .Default(false);
+}
+
+static bool isSVEDataVectorRegister(StringRef Name) {
+ return Name[0] == 'z';
+}
+
static void parseValidVectorKind(StringRef Name, unsigned &NumElements,
char &ElementKind) {
assert(isValidVectorKind(Name));
@@ -1881,19 +1967,30 @@ bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
// Matches a register name or register alias previously defined by '.req'
unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name,
- bool isVector) {
- unsigned RegNum = isVector ? matchVectorRegName(Name)
- : MatchRegisterName(Name);
+ RegKind Kind) {
+ unsigned RegNum;
+ switch (Kind) {
+ case RegKind::Scalar:
+ RegNum = MatchRegisterName(Name);
+ break;
+ case RegKind::NeonVector:
+ RegNum = MatchNeonVectorRegName(Name);
+ break;
+ case RegKind::SVEDataVector:
+ RegNum = matchSVEDataVectorRegName(Name);
+ break;
+ }
- if (RegNum == 0) {
+ if (!RegNum) {
// Check for aliases registered via .req. Canonicalize to lower case.
// That's more consistent since register names are case insensitive, and
// it's how the original entry was passed in from MC/MCParser/AsmParser.
auto Entry = RegisterReqs.find(Name.lower());
if (Entry == RegisterReqs.end())
return 0;
+
// set RegNum if the match is the right kind of register
- if (isVector == Entry->getValue().first)
+ if (Kind == Entry->getValue().first)
RegNum = Entry->getValue().second;
}
return RegNum;
@@ -1909,7 +2006,10 @@ int AArch64AsmParser::tryParseRegister() {
return -1;
std::string lowerCase = Tok.getString().lower();
- unsigned RegNum = matchRegisterNameAlias(lowerCase, false);
+ if (isSVEDataVectorRegister(lowerCase))
+ return -1;
+
+ unsigned RegNum = matchRegisterNameAlias(lowerCase, RegKind::Scalar);
// Also handle a few aliases of registers.
if (RegNum == 0)
RegNum = StringSwitch<unsigned>(lowerCase)
@@ -1940,7 +2040,7 @@ int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) {
// a '.'.
size_t Start = 0, Next = Name.find('.');
StringRef Head = Name.slice(Start, Next);
- unsigned RegNum = matchRegisterNameAlias(Head, true);
+ unsigned RegNum = matchRegisterNameAlias(Head, RegKind::NeonVector);
if (RegNum) {
if (Next != StringRef::npos) {
@@ -2559,8 +2659,8 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
return MatchOperand_Success;
}
-/// tryParseVectorRegister - Parse a vector register operand.
-bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
+/// tryParseNeonVectorRegister - Parse a vector register operand.
+bool AArch64AsmParser::tryParseNeonVectorRegister(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::Identifier))
return true;
@@ -2572,7 +2672,9 @@ bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
if (Reg == -1)
return true;
Operands.push_back(
- AArch64Operand::CreateReg(Reg, true, S, getLoc(), getContext()));
+ AArch64Operand::CreateReg(Reg, RegKind::NeonVector, S, getLoc(),
+ getContext()));
+
// If there was an explicit qualifier, that goes on as a literal text
// operand.
if (!Kind.empty())
@@ -2603,19 +2705,48 @@ bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
return false;
}
+// tryParseSVEDataVectorRegister - Try to parse a SVE vector register name with
+// optional kind specifier. If it is a register specifier, eat the token
+// and return it.
+int AArch64AsmParser::tryParseSVEDataVectorRegister(const AsmToken &Tok,
+ StringRef &Kind) {
+ if (Tok.isNot(AsmToken::Identifier))
+ return -1;
+
+ StringRef Name = Tok.getString();
+ // If there is a kind specifier, it's separated from the register name by
+ // a '.'.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Head = Name.slice(Start, Next);
+ unsigned RegNum = matchRegisterNameAlias(Head, RegKind::SVEDataVector);
+
+ if (RegNum) {
+ if (Next != StringRef::npos) {
+ Kind = Name.slice(Next, StringRef::npos);
+ if (!isValidSVEKind(Kind)) {
+ TokError("invalid sve vector kind qualifier");
+ return -1;
+ }
+ }
+ return RegNum;
+ }
+
+ return -1;
+}
+
/// parseRegister - Parse a non-vector register operand.
bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
SMLoc S = getLoc();
- // Try for a vector register.
- if (!tryParseVectorRegister(Operands))
+ // Try for a vector (neon) register.
+ if (!tryParseNeonVectorRegister(Operands))
return false;
// Try for a scalar register.
int64_t Reg = tryParseRegister();
if (Reg == -1)
return true;
- Operands.push_back(
- AArch64Operand::CreateReg(Reg, false, S, getLoc(), getContext()));
+ Operands.push_back(AArch64Operand::CreateReg(Reg, RegKind::Scalar, S,
+ getLoc(), getContext()));
return false;
}
@@ -2783,7 +2914,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
if (!Tok.is(AsmToken::Identifier))
return MatchOperand_NoMatch;
- unsigned RegNum = matchRegisterNameAlias(Tok.getString().lower(), false);
+ unsigned RegNum = matchRegisterNameAlias(Tok.getString().lower(), RegKind::Scalar);
MCContext &Ctx = getContext();
const MCRegisterInfo *RI = Ctx.getRegisterInfo();
@@ -2795,7 +2926,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
if (!parseOptionalToken(AsmToken::Comma)) {
Operands.push_back(
- AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx));
+ AArch64Operand::CreateReg(RegNum, RegKind::Scalar, S, getLoc(), Ctx));
return MatchOperand_Success;
}
@@ -2814,7 +2945,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
}
Operands.push_back(
- AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx));
+ AArch64Operand::CreateReg(RegNum, RegKind::Scalar, S, getLoc(), Ctx));
return MatchOperand_Success;
}
@@ -3529,8 +3660,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
Operands[0] = AArch64Operand::CreateToken(
"bfm", false, Op.getStartLoc(), getContext());
Operands[2] = AArch64Operand::CreateReg(
- RegWidth == 32 ? AArch64::WZR : AArch64::XZR, false, SMLoc(),
- SMLoc(), getContext());
+ RegWidth == 32 ? AArch64::WZR : AArch64::XZR, RegKind::Scalar,
+ SMLoc(), SMLoc(), getContext());
Operands[3] = AArch64Operand::CreateImm(
ImmRExpr, LSBOp.getStartLoc(), LSBOp.getEndLoc(), getContext());
Operands.emplace_back(
@@ -3666,8 +3797,9 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
if (Op.isReg()) {
unsigned Reg = getXRegFromWReg(Op.getReg());
- Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
- Op.getEndLoc(), getContext());
+ Operands[2] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
+ Op.getStartLoc(), Op.getEndLoc(),
+ getContext());
}
}
// FIXME: Likewise for sxt[bh] with a Xd dst operand
@@ -3681,7 +3813,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
if (Op.isReg()) {
unsigned Reg = getXRegFromWReg(Op.getReg());
- Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
+ Operands[2] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
+ Op.getStartLoc(),
Op.getEndLoc(), getContext());
}
}
@@ -3697,7 +3830,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
if (Op.isReg()) {
unsigned Reg = getWRegFromXReg(Op.getReg());
- Operands[1] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
+ Operands[1] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
+ Op.getStartLoc(),
Op.getEndLoc(), getContext());
}
}
@@ -4158,14 +4292,25 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
Parser.Lex(); // Eat the '.req' token.
SMLoc SRegLoc = getLoc();
unsigned RegNum = tryParseRegister();
- bool IsVector = false;
+ RegKind RegisterKind = RegKind::Scalar;
if (RegNum == static_cast<unsigned>(-1)) {
StringRef Kind;
+ RegisterKind = RegKind::NeonVector;
RegNum = tryMatchVectorRegister(Kind, false);
if (!Kind.empty())
return Error(SRegLoc, "vector register without type specifier expected");
- IsVector = true;
+ }
+
+ if (RegNum == static_cast<unsigned>(-1)) {
+ StringRef Kind;
+ RegisterKind = RegKind::SVEDataVector;
+ int RegNumTmp = tryParseSVEDataVectorRegister(Parser.getTok(), Kind);
+ if (RegNumTmp != -1)
+ Parser.Lex();
+ RegNum = RegNumTmp;
+ if (!Kind.empty())
+ return Error(SRegLoc, "sve vector register without type specifier expected");
}
if (RegNum == static_cast<unsigned>(-1))
@@ -4176,7 +4321,7 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
"unexpected input in .req directive"))
return true;
- auto pair = std::make_pair(IsVector, RegNum);
+ auto pair = std::make_pair(RegisterKind, RegNum);
if (RegisterReqs.insert(std::make_pair(Name, pair)).first->second != pair)
Warning(L, "ignoring redefinition of register alias '" + Name + "'");
@@ -4388,8 +4533,43 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
&AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID]);
}
- Operands.push_back(AArch64Operand::CreateReg(Pair, false, S, getLoc(),
- getContext()));
+ Operands.push_back(AArch64Operand::CreateReg(Pair, RegKind::Scalar, S,
+ getLoc(), getContext()));
+
+ return MatchOperand_Success;
+}
+
+template <bool ParseSuffix>
+OperandMatchResultTy
+AArch64AsmParser::tryParseSVEDataVector(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ const SMLoc S = getLoc();
+ // Check for a SVE vector register specifier first.
+ StringRef Kind;
+ int RegNum = tryParseSVEDataVectorRegister(Parser.getTok(), Kind);
+ if (RegNum == -1)
+ return MatchOperand_NoMatch;
+
+ // Eat the SVE Register Token
+ Parser.Lex();
+
+ if (ParseSuffix && Kind.empty())
+ return MatchOperand_NoMatch;
+
+ unsigned ElementWidth = StringSwitch<unsigned>(Kind.lower())
+ .Case("", -1)
+ .Case(".b", 8)
+ .Case(".h", 16)
+ .Case(".s", 32)
+ .Case(".d", 64)
+ .Case(".q", 128)
+ .Default(0);
+ if (!ElementWidth)
+ return MatchOperand_NoMatch;
+
+ Operands.push_back(
+ AArch64Operand::CreateReg(RegNum, RegKind::SVEDataVector, ElementWidth,
+ S, S, getContext()));
return MatchOperand_Success;
}
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 73db32c0487..aea1b4f2d2c 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -85,6 +85,9 @@ static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decode);
static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm,
uint64_t Address,
@@ -436,6 +439,27 @@ static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo,
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
+static const unsigned ZPRDecoderTable[] = {
+ AArch64::Z0, AArch64::Z1, AArch64::Z2, AArch64::Z3,
+ AArch64::Z4, AArch64::Z5, AArch64::Z6, AArch64::Z7,
+ AArch64::Z8, AArch64::Z9, AArch64::Z10, AArch64::Z11,
+ AArch64::Z12, AArch64::Z13, AArch64::Z14, AArch64::Z15,
+ AArch64::Z16, AArch64::Z17, AArch64::Z18, AArch64::Z19,
+ AArch64::Z20, AArch64::Z21, AArch64::Z22, AArch64::Z23,
+ AArch64::Z24, AArch64::Z25, AArch64::Z26, AArch64::Z27,
+ AArch64::Z28, AArch64::Z29, AArch64::Z30, AArch64::Z31
+};
+
+static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void* Decoder) {
+ if (RegNo > 31)
+ return Fail;
+
+ unsigned Register = ZPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return Success;
+}
static const unsigned VectorDecoderTable[] = {
AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 62e5d02f603..bdf71b095fd 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -1340,3 +1340,23 @@ void AArch64InstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo,
O << "#" << (Val * Angle) + Remainder;
}
+template <char suffix>
+void AArch64InstPrinter::printSVERegOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ switch (suffix) {
+ case 0:
+ case 'b':
+ case 'h':
+ case 's':
+ case 'd':
+ case 'q':
+ break;
+ default: llvm_unreachable("Invalid kind specifier.");
+ }
+
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ O << getRegisterName(Reg);
+ if (suffix != 0)
+ O << '.' << suffix;
+} \ No newline at end of file
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 8515ad24c71..76f20f042ce 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -165,6 +165,9 @@ protected:
void printGPRSeqPairsClassOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O);
+ template <char = 0>
+ void printSVERegOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
};
class AArch64AppleInstPrinter : public AArch64InstPrinter {
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 7fba4849438..c5da457c38f 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -106,13 +106,15 @@ AArch64MCAsmInfoCOFF::AArch64MCAsmInfoCOFF() {
PrivateLabelPrefix = ".L";
AlignmentIsInBytes = false;
SupportsDebugInformation = true;
- ExceptionsType = ExceptionHandling::WinEH;
+ CodePointerSize = 8;
}
AArch64MCAsmInfoMicrosoftCOFF::AArch64MCAsmInfoMicrosoftCOFF() {
CommentString = ";";
+ ExceptionsType = ExceptionHandling::WinEH;
}
AArch64MCAsmInfoGNUCOFF::AArch64MCAsmInfoGNUCOFF() {
CommentString = "//";
+ ExceptionsType = ExceptionHandling::DwarfCFI;
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index 9d0f39e5f6a..c88363d2c25 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -23,7 +23,15 @@ public:
std::unique_ptr<MCCodeEmitter> CE,
raw_pwrite_stream &OS)
: MCWinCOFFStreamer(C, std::move(AB), std::move(CE), OS) {}
+
+ void FinishImpl() override;
};
+
+void AArch64WinCOFFStreamer::FinishImpl() {
+ EmitFrames(nullptr);
+
+ MCWinCOFFStreamer::FinishImpl();
+}
} // end anonymous namespace
namespace llvm {
diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td
new file mode 100644
index 00000000000..e74bab8b7fe
--- /dev/null
+++ b/lib/Target/AArch64/SVEInstrFormats.td
@@ -0,0 +1,41 @@
+//=-- SVEInstrFormats.td - AArch64 SVE Instruction classes -*- tablegen -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Vector Extension (SVE) Instruction Class Definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Arithmetic - Unpredicated Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_bin_cons_arit_0<bits<2> sz8_64, bits<3> opc, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zd, $Zn, $Zm",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b000;
+ let Inst{12-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm> {
+ def _B : sve_int_bin_cons_arit_0<0b00, opc, asm, ZPR8>;
+ def _H : sve_int_bin_cons_arit_0<0b01, opc, asm, ZPR16>;
+ def _S : sve_int_bin_cons_arit_0<0b10, opc, asm, ZPR32>;
+ def _D : sve_int_bin_cons_arit_0<0b11, opc, asm, ZPR64>;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 156f7bc6512..b17b6716766 100644
--- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -400,7 +400,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
return false;
FastMathFlags FMF = FPOp->getFastMathFlags();
- bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() ||
+ bool UnsafeDiv = HasUnsafeFPMath || FMF.isFast() ||
FMF.allowReciprocal();
// With UnsafeDiv node will be optimized to just rcp and mul.
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
index 2329fffd521..91fe921bfee 100644
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
@@ -15,7 +15,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index c313e4a04ef..f04efd71fa0 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -204,6 +204,7 @@ private:
void SelectADD_SUB_I64(SDNode *N);
void SelectUADDO_USUBO(SDNode *N);
void SelectDIV_SCALE(SDNode *N);
+ void SelectMAD_64_32(SDNode *N);
void SelectFMA_W_CHAIN(SDNode *N);
void SelectFMUL_W_CHAIN(SDNode *N);
@@ -594,6 +595,11 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectDIV_SCALE(N);
return;
}
+ case AMDGPUISD::MAD_I64_I32:
+ case AMDGPUISD::MAD_U64_U32: {
+ SelectMAD_64_32(N);
+ return;
+ }
case ISD::CopyToReg: {
const SITargetLowering& Lowering =
*static_cast<const SITargetLowering*>(getTargetLowering());
@@ -814,6 +820,19 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
+// We need to handle this here because tablegen doesn't support matching
+// instructions with multiple outputs.
+void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
+ SDLoc SL(N);
+ bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
+ unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
+
+ SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ Clamp };
+ CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
+}
+
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
unsigned OffsetBits) const {
if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index fe2c9337721..d502b77447d 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -128,27 +128,20 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
}
-bool AMDGPUTargetLowering::isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op)
-{
- assert(Op.getOpcode() == ISD::OR);
-
- SDValue N0 = Op->getOperand(0);
- SDValue N1 = Op->getOperand(1);
- EVT VT = N0.getValueType();
-
- if (VT.isInteger() && !VT.isVector()) {
- KnownBits LHSKnown, RHSKnown;
- DAG.computeKnownBits(N0, LHSKnown);
+unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) {
+ KnownBits Known;
+ EVT VT = Op.getValueType();
+ DAG.computeKnownBits(Op, Known);
- if (LHSKnown.Zero.getBoolValue()) {
- DAG.computeKnownBits(N1, RHSKnown);
+ return VT.getSizeInBits() - Known.countMinLeadingZeros();
+}
- if (!(~RHSKnown.Zero & ~LHSKnown.Zero))
- return true;
- }
- }
+unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
- return false;
+ // In order for this to be a signed 24-bit value, bit 23, must
+ // be a sign bit.
+ return VT.getSizeInBits() - DAG.ComputeNumSignBits(Op);
}
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
@@ -2615,21 +2608,14 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
//===----------------------------------------------------------------------===//
static bool isU24(SDValue Op, SelectionDAG &DAG) {
- KnownBits Known;
- EVT VT = Op.getValueType();
- DAG.computeKnownBits(Op, Known);
-
- return (VT.getSizeInBits() - Known.countMinLeadingZeros()) <= 24;
+ return AMDGPUTargetLowering::numBitsUnsigned(Op, DAG) <= 24;
}
static bool isI24(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
-
- // In order for this to be a signed 24-bit value, bit 23, must
- // be a sign bit.
return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
// as unsigned 24-bit values.
- (VT.getSizeInBits() - DAG.ComputeNumSignBits(Op)) < 24;
+ AMDGPUTargetLowering::numBitsSigned(Op, DAG) < 24;
}
static bool simplifyI24(SDNode *Node24, unsigned OpIdx,
@@ -2914,21 +2900,6 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0));
return DAG.getZExtOrTrunc(Shl, SL, VT);
}
- case ISD::OR:
- if (!isOrEquivalentToAdd(DAG, LHS))
- break;
- LLVM_FALLTHROUGH;
- case ISD::ADD: {
- // shl (or|add x, c2), c1 => or|add (shl x, c1), (c2 << c1)
- if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(LHS->getOperand(1))) {
- SDValue Shl = DAG.getNode(ISD::SHL, SL, VT, LHS->getOperand(0),
- SDValue(RHS, 0));
- SDValue C2V = DAG.getConstant(C2->getAPIntValue() << RHSVal,
- SDLoc(C2), VT);
- return DAG.getNode(LHS->getOpcode(), SL, VT, Shl, C2V);
- }
- break;
- }
}
if (VT != MVT::i64)
@@ -3946,6 +3917,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MUL_LOHI_I24)
NODE_NAME_CASE(MAD_U24)
NODE_NAME_CASE(MAD_I24)
+ NODE_NAME_CASE(MAD_I64_I32)
+ NODE_NAME_CASE(MAD_U64_U32)
NODE_NAME_CASE(TEXTURE_FETCH)
NODE_NAME_CASE(EXPORT)
NODE_NAME_CASE(EXPORT_DONE)
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index cdb15186f86..ba35aeb90ed 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -35,7 +35,8 @@ private:
SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const;
public:
- static bool isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op);
+ static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG);
+ static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
protected:
const AMDGPUSubtarget *Subtarget;
@@ -379,6 +380,8 @@ enum NodeType : unsigned {
MULHI_I24,
MAD_U24,
MAD_I24,
+ MAD_U64_U32,
+ MAD_I64_I32,
MUL_LOHI_I24,
MUL_LOHI_U24,
TEXTURE_FETCH,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 41cc7d7093e..f1a42b42f1f 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -18,7 +18,7 @@
#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "AMDGPUGenInstrInfo.inc"
diff --git a/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index e7e54750fe6..714c60a7446 100644
--- a/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -487,7 +487,7 @@ bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
if (auto Op = dyn_cast<FPMathOperator>(CI))
- if (Op->hasUnsafeAlgebra())
+ if (Op->isFast())
return true;
const Function *F = CI->getParent()->getParent();
Attribute Attr = F->getFnAttribute("unsafe-fp-math");
@@ -1337,7 +1337,8 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
// for OpenCL 2.0 we have only generic implementation of sincos
// function.
AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
- nf.getLeads()[0].PtrKind = AMDGPULibFunc::GENERIC;
+ const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*M);
+ nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AS.FLAT_ADDRESS);
Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
if (!Fsincos) return false;
@@ -1350,7 +1351,6 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
// The allocaInst allocates the memory in private address space. This need
// to be bitcasted to point to the address space of cos pointer type.
// In OpenCL 2.0 this is generic, while in 1.2 that is private.
- const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*M);
if (PTy->getPointerAddressSpace() != AS.PRIVATE_ADDRESS)
P = B.CreateAddrSpaceCast(Alloc, PTy);
CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index 919e8c1e13c..4671273d61f 100644
--- a/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPU.h"
#include "AMDGPULibFunc.h"
#include <llvm/ADT/SmallString.h>
#include <llvm/ADT/SmallVector.h>
@@ -458,13 +459,16 @@ AMDGPULibFunc::Param ParamIterator::getNextParam() {
P.ArgType = AMDGPULibFunc::I32;
break;
- case E_CONSTPTR_SWAPGL:
- switch (P.PtrKind & AMDGPULibFunc::ADDR_SPACE) {
- case AMDGPULibFunc::GLOBAL: P.PtrKind = AMDGPULibFunc::LOCAL; break;
- case AMDGPULibFunc::LOCAL: P.PtrKind = AMDGPULibFunc::GLOBAL; break;
+ case E_CONSTPTR_SWAPGL: {
+ unsigned AS = AMDGPULibFunc::getAddrSpaceFromEPtrKind(P.PtrKind);
+ switch (AS) {
+ case AMDGPUAS::GLOBAL_ADDRESS: AS = AMDGPUAS::LOCAL_ADDRESS; break;
+ case AMDGPUAS::LOCAL_ADDRESS: AS = AMDGPUAS::GLOBAL_ADDRESS; break;
}
+ P.PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AS);
P.PtrKind |= AMDGPULibFunc::CONST;
break;
+ }
default: llvm_unreachable("Unhandeled param rule");
}
@@ -590,19 +594,14 @@ bool ItaniumParamParser::parseItaniumParam(StringRef& param,
if (eatTerm(param, 'P')) {
if (eatTerm(param, 'K')) res.PtrKind |= AMDGPULibFunc::CONST;
if (eatTerm(param, 'V')) res.PtrKind |= AMDGPULibFunc::VOLATILE;
+ unsigned AS;
if (!eatTerm(param, "U3AS")) {
- res.PtrKind |= AMDGPULibFunc::PRIVATE;
+ AS = 0;
} else {
- switch(param.front()) {
- case '1': res.PtrKind |= AMDGPULibFunc::GLOBAL; break;
- case '2': res.PtrKind |= AMDGPULibFunc::READONLY;break;
- case '3': res.PtrKind |= AMDGPULibFunc::LOCAL; break;
- case '4': res.PtrKind |= AMDGPULibFunc::GENERIC; break;
- case '5': res.PtrKind |= AMDGPULibFunc::OTHER; break;
- default: return false;
- }
+ AS = param.front() - '0';
drop_front(param, 1);
}
+ res.PtrKind |= AMDGPULibFuncBase::getEPtrKindFromAddrSpace(AS);
} else {
res.PtrKind = AMDGPULibFunc::BYVALUE;
}
@@ -837,7 +836,9 @@ public:
os << 'P';
if (p.PtrKind & AMDGPULibFunc::CONST) os << 'K';
if (p.PtrKind & AMDGPULibFunc::VOLATILE) os << 'V';
- int AS = UseAddrSpace ? (p.PtrKind & AMDGPULibFunc::ADDR_SPACE)-1 : 0;
+ unsigned AS = UseAddrSpace
+ ? AMDGPULibFuncBase::getAddrSpaceFromEPtrKind(p.PtrKind)
+ : 0;
if (AS != 0) os << "U3AS" << AS;
Ptr = p;
p.PtrKind = 0;
diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.h b/lib/Target/AMDGPU/AMDGPULibFunc.h
index 8f4297b4a49..5405bc64571 100644
--- a/lib/Target/AMDGPU/AMDGPULibFunc.h
+++ b/lib/Target/AMDGPU/AMDGPULibFunc.h
@@ -283,14 +283,7 @@ public:
enum EPtrKind {
BYVALUE = 0,
- PRIVATE,
- GLOBAL,
- READONLY,
- LOCAL,
- GENERIC,
- OTHER,
-
- ADDR_SPACE = 0xF,
+ ADDR_SPACE = 0xF, // Address space takes value 0x1 ~ 0xF.
CONST = 0x10,
VOLATILE = 0x20
};
@@ -315,6 +308,17 @@ public:
static bool isMangled(EFuncId Id) {
return static_cast<unsigned>(Id) <= static_cast<unsigned>(EI_LAST_MANGLED);
}
+
+ static unsigned getEPtrKindFromAddrSpace(unsigned AS) {
+ assert(((AS + 1) & ~ADDR_SPACE) == 0);
+ return AS + 1;
+ }
+
+ static unsigned getAddrSpaceFromEPtrKind(unsigned Kind) {
+ Kind = Kind & ADDR_SPACE;
+ assert(Kind >= 1);
+ return Kind - 1;
+ }
};
class AMDGPULibFuncImpl : public AMDGPULibFuncBase {
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 9fc9592bdc5..83122281d2b 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -23,7 +23,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/MDBuilder.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include <algorithm>
using namespace llvm;
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 56a5fa634b5..6ee529c8549 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -462,6 +462,10 @@ public:
return isAmdHsaOS() || isMesaKernel(MF);
}
+ bool hasMad64_32() const {
+ return getGeneration() >= SEA_ISLANDS;
+ }
+
bool hasFminFmaxLegacy() const {
return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
}
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index f7ecdea7704..14f26f787ab 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -245,6 +245,10 @@ GCNMinRegSchedRegistry("gcn-minreg",
static StringRef computeDataLayout(const Triple &TT) {
if (TT.getArch() == Triple::r600) {
// 32-bit pointers.
+ if (TT.getEnvironmentName() == "amdgiz" ||
+ TT.getEnvironmentName() == "amdgizcl")
+ return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
+ "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
}
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index d729dcc439e..d1120f5e330 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2134,12 +2134,17 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // FIXME: Remove this hack for function pointer types.
- const GlobalValue *GV = GA->getGlobal();
- assert(Callee.getValueType() == MVT::i32);
- Callee = DAG.getGlobalAddress(GV, DL, MVT::i64, GA->getOffset(),
- false, GA->getTargetFlags());
+ // FIXME: Remove this hack for function pointer types after removing
+ // support of old address space mapping. In the new address space
+ // mapping the pointer in default address space is 64 bit, therefore
+ // does not need this hack.
+ if (Callee.getValueType() == MVT::i32) {
+ const GlobalValue *GV = GA->getGlobal();
+ Callee = DAG.getGlobalAddress(GV, DL, MVT::i64, GA->getOffset(), false,
+ GA->getTargetFlags());
+ }
}
+ assert(Callee.getValueType() == MVT::i64);
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
@@ -5957,18 +5962,57 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
return 0;
}
+static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL,
+ EVT VT,
+ SDValue N0, SDValue N1, SDValue N2,
+ bool Signed) {
+ unsigned MadOpc = Signed ? AMDGPUISD::MAD_I64_I32 : AMDGPUISD::MAD_U64_U32;
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i1);
+ SDValue Mad = DAG.getNode(MadOpc, SL, VTs, N0, N1, N2);
+ return DAG.getNode(ISD::TRUNCATE, SL, VT, Mad);
+}
+
SDValue SITargetLowering::performAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
-
- if (VT != MVT::i32)
- return SDValue();
-
SDLoc SL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
+ if ((LHS.getOpcode() == ISD::MUL || RHS.getOpcode() == ISD::MUL)
+ && Subtarget->hasMad64_32() &&
+ !VT.isVector() && VT.getScalarSizeInBits() > 32 &&
+ VT.getScalarSizeInBits() <= 64) {
+ if (LHS.getOpcode() != ISD::MUL)
+ std::swap(LHS, RHS);
+
+ SDValue MulLHS = LHS.getOperand(0);
+ SDValue MulRHS = LHS.getOperand(1);
+ SDValue AddRHS = RHS;
+
+ // TODO: Maybe restrict if SGPR inputs.
+ if (numBitsUnsigned(MulLHS, DAG) <= 32 &&
+ numBitsUnsigned(MulRHS, DAG) <= 32) {
+ MulLHS = DAG.getZExtOrTrunc(MulLHS, SL, MVT::i32);
+ MulRHS = DAG.getZExtOrTrunc(MulRHS, SL, MVT::i32);
+ AddRHS = DAG.getZExtOrTrunc(AddRHS, SL, MVT::i64);
+ return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, false);
+ }
+
+ if (numBitsSigned(MulLHS, DAG) < 32 && numBitsSigned(MulRHS, DAG) < 32) {
+ MulLHS = DAG.getSExtOrTrunc(MulLHS, SL, MVT::i32);
+ MulRHS = DAG.getSExtOrTrunc(MulRHS, SL, MVT::i32);
+ AddRHS = DAG.getSExtOrTrunc(AddRHS, SL, MVT::i64);
+ return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, true);
+ }
+
+ return SDValue();
+ }
+
+ if (VT != MVT::i32)
+ return SDValue();
+
// add x, zext (setcc) => addcarry x, 0, setcc
// add x, sext (setcc) => subcarry x, 0, setcc
unsigned Opc = LHS.getOpcode();
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index ade909cc84e..5dde72910ee 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -14,15 +14,15 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
-#include "AMDGPUMachineFunction.h"
#include "AMDGPUArgumentUsageInfo.h"
+#include "AMDGPUMachineFunction.h"
#include "SIRegisterInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include <array>
@@ -87,9 +87,6 @@ public:
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
- // FIXME: This should be removed and getPreloadedValue moved here.
- friend class SIRegisterInfo;
-
unsigned TIDReg = AMDGPU::NoRegister;
// Registers that may be reserved for spilling purposes. These may be the same
@@ -143,7 +140,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
private:
unsigned LDSWaveSpillSize = 0;
- unsigned ScratchOffsetReg;
unsigned NumUserSGPRs = 0;
unsigned NumSystemSGPRs = 0;
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index aa041aab51c..666b80107dc 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -399,8 +399,10 @@ def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_
} // End Constraints = "@earlyclobber $vdst"
let isCommutable = 1 in {
+let SchedRW = [WriteDouble, WriteSALU] in {
def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
+} // End SchedRW = [WriteDouble, WriteSALU]
} // End isCommutable = 1
} // End SubtargetPredicate = isCIVI
diff --git a/lib/Target/ARC/ARCBranchFinalize.cpp b/lib/Target/ARC/ARCBranchFinalize.cpp
index 0fb8a420d86..e5b0f8f3208 100644
--- a/lib/Target/ARC/ARCBranchFinalize.cpp
+++ b/lib/Target/ARC/ARCBranchFinalize.cpp
@@ -20,8 +20,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include <vector>
using namespace llvm;
diff --git a/lib/Target/ARC/ARCFrameLowering.h b/lib/Target/ARC/ARCFrameLowering.h
index ac5378adbd8..c042bec016c 100644
--- a/lib/Target/ARC/ARCFrameLowering.h
+++ b/lib/Target/ARC/ARCFrameLowering.h
@@ -17,7 +17,7 @@
#include "ARC.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
diff --git a/lib/Target/ARC/ARCInstrInfo.h b/lib/Target/ARC/ARCInstrInfo.h
index 5285dce9f12..f965dd4ff7f 100644
--- a/lib/Target/ARC/ARCInstrInfo.h
+++ b/lib/Target/ARC/ARCInstrInfo.h
@@ -15,7 +15,7 @@
#define LLVM_LIB_TARGET_ARC_ARCINSTRINFO_H
#include "ARCRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "ARCGenInstrInfo.inc"
diff --git a/lib/Target/ARC/ARCRegisterInfo.cpp b/lib/Target/ARC/ARCRegisterInfo.cpp
index 66f95911d3e..bed47a0eab5 100644
--- a/lib/Target/ARC/ARCRegisterInfo.cpp
+++ b/lib/Target/ARC/ARCRegisterInfo.cpp
@@ -25,7 +25,7 @@
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 3688db943d5..dac11626a6f 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
@@ -53,7 +54,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 9f168acd567..99bcf788ddf 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -21,7 +21,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include <array>
#include <cstdint>
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 63b14ee98d7..eaa8d4c0f1a 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
@@ -41,7 +42,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index e1323cd9427..9c10a1c79a4 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -417,6 +417,12 @@ struct FormalArgHandler : public IncomingValueHandler {
bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<unsigned> VRegs) const {
+ auto &TLI = *getTLI<ARMTargetLowering>();
+ auto Subtarget = TLI.getSubtarget();
+
+ if (Subtarget->isThumb())
+ return false;
+
// Quick exit if there aren't any args
if (F.arg_empty())
return true;
@@ -427,12 +433,6 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
auto &MF = MIRBuilder.getMF();
auto &MBB = MIRBuilder.getMBB();
auto DL = MF.getDataLayout();
- auto &TLI = *getTLI<ARMTargetLowering>();
-
- auto Subtarget = TLI.getSubtarget();
-
- if (Subtarget->isThumb())
- return false;
for (auto &Arg : F.args())
if (!isSupportedType(DL, TLI, Arg.getType()))
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 71b81936240..284b67fd59b 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -19,8 +19,8 @@
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/CallingConv.h"
-#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 5c967d9f906..f42d00ecf60 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -43,6 +43,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -72,7 +73,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOpcodes.h"
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index ce4add974d6..ab8fbb47086 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
@@ -49,7 +50,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 2c10031e3f8..1f18e2bf80c 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -11,7 +11,7 @@
#define LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include <vector>
namespace llvm {
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 9cfa0d3a7c3..deece84ecf2 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -57,6 +57,7 @@
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
@@ -94,7 +95,6 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOpcodes.h"
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index 309430b0e9c..34186dede0d 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -24,6 +24,54 @@
using namespace llvm;
+/// FIXME: The following static functions are SizeChangeStrategy functions
+/// that are meant to temporarily mimic the behaviour of the old legalization
+/// based on doubling/halving non-legal types as closely as possible. This is
+/// not entirly possible as only legalizing the types that are exactly a power
+/// of 2 times the size of the legal types would require specifying all those
+/// sizes explicitly.
+/// In practice, not specifying those isn't a problem, and the below functions
+/// should disappear quickly as we add support for legalizing non-power-of-2
+/// sized types further.
+static void
+addAndInterleaveWithUnsupported(LegalizerInfo::SizeAndActionsVec &result,
+ const LegalizerInfo::SizeAndActionsVec &v) {
+ for (unsigned i = 0; i < v.size(); ++i) {
+ result.push_back(v[i]);
+ if (i + 1 < v[i].first && i + 1 < v.size() &&
+ v[i + 1].first != v[i].first + 1)
+ result.push_back({v[i].first + 1, LegalizerInfo::Unsupported});
+ }
+}
+
+static LegalizerInfo::SizeAndActionsVec
+widen_8_16(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 17);
+ LegalizerInfo::SizeAndActionsVec result = {
+ {1, LegalizerInfo::Unsupported},
+ {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported},
+ {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ result.push_back({Largest + 1, LegalizerInfo::Unsupported});
+ return result;
+}
+
+static LegalizerInfo::SizeAndActionsVec
+widen_1_8_16(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 17);
+ LegalizerInfo::SizeAndActionsVec result = {
+ {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported},
+ {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported},
+ {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ result.push_back({Largest + 1, LegalizerInfo::Unsupported});
+ return result;
+}
+
static bool AEABI(const ARMSubtarget &ST) {
return ST.isTargetAEABI() || ST.isTargetGNUAEABI() || ST.isTargetMuslAEABI();
}
@@ -49,14 +97,15 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
}
for (unsigned Op : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) {
- for (auto Ty : {s1, s8, s16})
- setAction({Op, Ty}, WidenScalar);
+ if (Op != G_ADD)
+ setLegalizeScalarToDifferentSizeStrategy(
+ Op, 0, widenToLargerTypesUnsupportedOtherwise);
setAction({Op, s32}, Legal);
}
for (unsigned Op : {G_SDIV, G_UDIV}) {
- for (auto Ty : {s8, s16})
- setAction({Op, Ty}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(Op, 0,
+ widenToLargerTypesUnsupportedOtherwise);
if (ST.hasDivideInARMMode())
setAction({Op, s32}, Legal);
else
@@ -64,8 +113,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
}
for (unsigned Op : {G_SREM, G_UREM}) {
- for (auto Ty : {s8, s16})
- setAction({Op, Ty}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(Op, 0, widen_8_16);
if (ST.hasDivideInARMMode())
setAction({Op, s32}, Lower);
else if (AEABI(ST))
@@ -74,10 +122,8 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({Op, s32}, Libcall);
}
- for (unsigned Op : {G_SEXT, G_ZEXT}) {
+ for (unsigned Op : {G_SEXT, G_ZEXT, G_ANYEXT}) {
setAction({Op, s32}, Legal);
- for (auto Ty : {s1, s8, s16})
- setAction({Op, 1, Ty}, Legal);
}
for (unsigned Op : {G_ASHR, G_LSHR, G_SHL})
@@ -93,12 +139,11 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({G_BRCOND, s1}, Legal);
setAction({G_CONSTANT, s32}, Legal);
- for (auto Ty : {s1, s8, s16})
- setAction({G_CONSTANT, Ty}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(G_CONSTANT, 0, widen_1_8_16);
setAction({G_ICMP, s1}, Legal);
- for (auto Ty : {s8, s16})
- setAction({G_ICMP, 1, Ty}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(G_ICMP, 1,
+ widenToLargerTypesUnsupportedOtherwise);
for (auto Ty : {s32, p0})
setAction({G_ICMP, 1, Ty}, Legal);
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 4aa7e150342..7424af9d5a5 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -41,10 +41,12 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Pass.h"
@@ -53,8 +55,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
diff --git a/lib/Target/ARM/ARMMacroFusion.cpp b/lib/Target/ARM/ARMMacroFusion.cpp
index a34ed2cb5a2..5c9aad417ce 100644
--- a/lib/Target/ARM/ARMMacroFusion.cpp
+++ b/lib/Target/ARM/ARMMacroFusion.cpp
@@ -15,7 +15,7 @@
#include "ARMMacroFusion.h"
#include "ARMSubtarget.h"
#include "llvm/CodeGen/MacroFusion.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
namespace llvm {
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 4f330e3a884..f6996f098c0 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
@@ -38,7 +39,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <bitset>
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index d911dd97b1a..a0b98a43108 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -34,7 +35,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
diff --git a/lib/Target/ARM/ThumbRegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp
index 15a56752333..d2bebb9eeec 100644
--- a/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -29,7 +29,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
diff --git a/lib/Target/AVR/AVRFrameLowering.h b/lib/Target/AVR/AVRFrameLowering.h
index 30ef441183a..a0ba6c95127 100644
--- a/lib/Target/AVR/AVRFrameLowering.h
+++ b/lib/Target/AVR/AVRFrameLowering.h
@@ -10,7 +10,7 @@
#ifndef LLVM_AVR_FRAME_LOWERING_H
#define LLVM_AVR_FRAME_LOWERING_H
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
diff --git a/lib/Target/AVR/AVRInstrInfo.h b/lib/Target/AVR/AVRInstrInfo.h
index eee8a92c619..354edcec346 100644
--- a/lib/Target/AVR/AVRInstrInfo.h
+++ b/lib/Target/AVR/AVRInstrInfo.h
@@ -14,7 +14,7 @@
#ifndef LLVM_AVR_INSTR_INFO_H
#define LLVM_AVR_INSTR_INFO_H
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "AVRRegisterInfo.h"
diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp
index 7099b29a8bc..b6ac93452cb 100644
--- a/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -18,7 +18,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/Function.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "AVR.h"
#include "AVRInstrInfo.h"
diff --git a/lib/Target/BPF/BPFFrameLowering.h b/lib/Target/BPF/BPFFrameLowering.h
index 5db963f518b..b4ffa0713fa 100644
--- a/lib/Target/BPF/BPFFrameLowering.h
+++ b/lib/Target/BPF/BPFFrameLowering.h
@@ -14,7 +14,7 @@
#ifndef LLVM_LIB_TARGET_BPF_BPFFRAMELOWERING_H
#define LLVM_LIB_TARGET_BPF_BPFFRAMELOWERING_H
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
class BPFSubtarget;
diff --git a/lib/Target/BPF/BPFInstrInfo.h b/lib/Target/BPF/BPFInstrInfo.h
index c7048ab979b..f591f48a89a 100644
--- a/lib/Target/BPF/BPFInstrInfo.h
+++ b/lib/Target/BPF/BPFInstrInfo.h
@@ -15,7 +15,7 @@
#define LLVM_LIB_TARGET_BPF_BPFINSTRINFO_H
#include "BPFRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "BPFGenInstrInfo.inc"
diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp
index 273843e9270..00d609e8960 100644
--- a/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -18,10 +18,10 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#define GET_REGINFO_TARGET_DESC
#include "BPFGenRegisterInfo.inc"
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index 22794eb50e2..e28af5a844f 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -14,9 +14,9 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
#include <vector>
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index 501ac2c44bb..6336075917e 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -19,8 +19,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/PassSupport.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index 296edbe1eff..988718860c5 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -15,7 +15,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include <vector>
namespace llvm {
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index a5381c1fb1a..9b8970258a2 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -36,6 +36,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -47,7 +48,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 2f172340c4e..1558c2e9850 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -19,8 +19,8 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include <cstdint>
#include <vector>
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index 29e2bc32dfb..2154a485dc6 100644
--- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -161,9 +161,16 @@ namespace {
};
struct Simplifier {
- using Rule = std::function<Value * (Instruction *, LLVMContext &)>;
+ struct Rule {
+ using FuncType = std::function<Value* (Instruction*, LLVMContext&)>;
+ Rule(StringRef N, FuncType F) : Name(N), Fn(F) {}
+ StringRef Name; // For debugging.
+ FuncType Fn;
+ };
- void addRule(const Rule &R) { Rules.push_back(R); }
+ void addRule(StringRef N, const Rule::FuncType &F) {
+ Rules.push_back(Rule(N, F));
+ }
private:
struct WorkListType {
@@ -522,7 +529,7 @@ Value *Simplifier::simplify(Context &C) {
continue;
bool Changed = false;
for (Rule &R : Rules) {
- Value *W = R(U, C.Ctx);
+ Value *W = R.Fn(U, C.Ctx);
if (!W)
continue;
Changed = true;
@@ -1544,8 +1551,30 @@ Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
return R;
}
+static bool hasZeroSignBit(const Value *V) {
+ if (const auto *CI = dyn_cast<const ConstantInt>(V))
+ return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
+ const Instruction *I = dyn_cast<const Instruction>(V);
+ if (!I)
+ return false;
+ switch (I->getOpcode()) {
+ case Instruction::LShr:
+ if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1)))
+ return SI->getZExtValue() > 0;
+ return false;
+ case Instruction::Or:
+ case Instruction::Xor:
+ return hasZeroSignBit(I->getOperand(0)) &&
+ hasZeroSignBit(I->getOperand(1));
+ case Instruction::And:
+ return hasZeroSignBit(I->getOperand(0)) ||
+ hasZeroSignBit(I->getOperand(1));
+ }
+ return false;
+}
+
void PolynomialMultiplyRecognize::setupSimplifier() {
- Simp.addRule(
+ Simp.addRule("sink-zext",
// Sink zext past bitwise operations.
[](Instruction *I, LLVMContext &Ctx) -> Value* {
if (I->getOpcode() != Instruction::ZExt)
@@ -1566,7 +1595,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() {
B.CreateZExt(T->getOperand(0), I->getType()),
B.CreateZExt(T->getOperand(1), I->getType()));
});
- Simp.addRule(
+ Simp.addRule("xor/and -> and/xor",
// (xor (and x a) (and y a)) -> (and (xor x y) a)
[](Instruction *I, LLVMContext &Ctx) -> Value* {
if (I->getOpcode() != Instruction::Xor)
@@ -1584,7 +1613,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() {
return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
And0->getOperand(1));
});
- Simp.addRule(
+ Simp.addRule("sink binop into select",
// (Op (select c x y) z) -> (select c (Op x z) (Op y z))
// (Op x (select c y z)) -> (select c (Op x y) (Op x z))
[](Instruction *I, LLVMContext &Ctx) -> Value* {
@@ -1610,7 +1639,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() {
}
return nullptr;
});
- Simp.addRule(
+ Simp.addRule("fold select-select",
// (select c (select c x y) z) -> (select c x z)
// (select c x (select c y z)) -> (select c x z)
[](Instruction *I, LLVMContext &Ctx) -> Value* {
@@ -1629,23 +1658,19 @@ void PolynomialMultiplyRecognize::setupSimplifier() {
}
return nullptr;
});
- Simp.addRule(
+ Simp.addRule("or-signbit -> xor-signbit",
// (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
[](Instruction *I, LLVMContext &Ctx) -> Value* {
if (I->getOpcode() != Instruction::Or)
return nullptr;
- Instruction *LShr = dyn_cast<Instruction>(I->getOperand(0));
- if (!LShr || LShr->getOpcode() != Instruction::LShr)
- return nullptr;
- ConstantInt *One = dyn_cast<ConstantInt>(LShr->getOperand(1));
- if (!One || One->getZExtValue() != 1)
- return nullptr;
ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
return nullptr;
- return IRBuilder<>(Ctx).CreateXor(LShr, Msb);
+ if (!hasZeroSignBit(I->getOperand(0)))
+ return nullptr;
+ return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb);
});
- Simp.addRule(
+ Simp.addRule("sink lshr into binop",
// (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
[](Instruction *I, LLVMContext &Ctx) -> Value* {
if (I->getOpcode() != Instruction::LShr)
@@ -1667,7 +1692,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() {
B.CreateLShr(BitOp->getOperand(0), S),
B.CreateLShr(BitOp->getOperand(1), S));
});
- Simp.addRule(
+ Simp.addRule("expose bitop-const",
// (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
[](Instruction *I, LLVMContext &Ctx) -> Value* {
auto IsBitOp = [](unsigned Op) -> bool {
@@ -1737,9 +1762,17 @@ bool PolynomialMultiplyRecognize::recognize() {
// XXX: Currently this approach can modify the loop before being 100% sure
// that the transformation can be carried out.
bool FoundPreScan = false;
+ auto FeedsPHI = [LoopB](const Value *V) -> bool {
+ for (const Value *U : V->users()) {
+ if (const auto *P = dyn_cast<const PHINode>(U))
+ if (P->getParent() == LoopB)
+ return true;
+ }
+ return false;
+ };
for (Instruction &In : *LoopB) {
SelectInst *SI = dyn_cast<SelectInst>(&In);
- if (!SI)
+ if (!SI || !FeedsPHI(SI))
continue;
Simplifier::Context C(SI);
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 93f1fd4109a..3c88eeeb8a4 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -24,12 +24,12 @@
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index 2525d272666..6cca5a849cc 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -20,8 +20,8 @@
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index d432bfef7ae..05865c43f2d 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -1706,28 +1706,27 @@ multiclass Loadxim_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
defm: Loadxgim_pat<Load, VT, ValueMod, ImmPred, MI>;
}
-// Patterns to select load reg reg-indexed: Rs + Rt<<u2.
-multiclass Loadxr_pat<PatFrag Load, ValueType VT, InstHexagon MI> {
- let AddedComplexity = 40 in
- def: Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
- (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
-
- let AddedComplexity = 20 in
- def: Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
- (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
-}
-
-// Patterns to select load reg reg-indexed: Rs + Rt<<u2 with value modifier.
-multiclass Loadxrm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
- InstHexagon MI> {
- let AddedComplexity = 40 in
- def: Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
- (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2)))>;
+// Pattern to select load reg reg-indexed: Rs + Rt<<u2.
+class Loadxr_shl_pat<PatFrag Load, ValueType VT, InstHexagon MI>
+ : Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
+ (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
+
+// Pattern to select load reg reg-indexed: Rs + Rt<<0.
+class Loadxr_add_pat<PatFrag Load, ValueType VT, InstHexagon MI>
+ : Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
+ (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
+
+// Pattern to select load reg reg-indexed: Rs + Rt<<u2 with value modifier.
+class Loadxrm_shl_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
+ InstHexagon MI>
+ : Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
+ (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2)))>;
- let AddedComplexity = 20 in
- def: Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
- (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, 0)))>;
-}
+// Pattern to select load reg reg-indexed: Rs + Rt<<0 with value modifier.
+class Loadxrm_add_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
+ InstHexagon MI>
+ : Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
+ (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, 0)))>;
// Pattern to select load long-offset reg-indexed: Addr + Rt<<u2.
// Don't match for u2==0, instead use reg+imm for those cases.
@@ -1777,17 +1776,19 @@ let AddedComplexity = 20 in {
defm: Loadxi_pat<atomic_load_64, i64, anyimm3, L2_loadrd_io>;
}
-defm: Loadxim_pat<extloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;
-defm: Loadxim_pat<extloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
-defm: Loadxim_pat<extloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
-defm: Loadxim_pat<extloadi32, i64, ToZext64, anyimm2, L2_loadri_io>;
-defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;
-defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
-defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
-defm: Loadxim_pat<zextloadi32, i64, ToZext64, anyimm2, L2_loadri_io>;
-defm: Loadxim_pat<sextloadi8, i64, ToSext64, anyimm0, L2_loadrb_io>;
-defm: Loadxim_pat<sextloadi16, i64, ToSext64, anyimm1, L2_loadrh_io>;
-defm: Loadxim_pat<sextloadi32, i64, ToSext64, anyimm2, L2_loadri_io>;
+let AddedComplexity = 30 in {
+ defm: Loadxim_pat<extloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;
+ defm: Loadxim_pat<extloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
+ defm: Loadxim_pat<extloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
+ defm: Loadxim_pat<extloadi32, i64, ToZext64, anyimm2, L2_loadri_io>;
+ defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;
+ defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
+ defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
+ defm: Loadxim_pat<zextloadi32, i64, ToZext64, anyimm2, L2_loadri_io>;
+ defm: Loadxim_pat<sextloadi8, i64, ToSext64, anyimm0, L2_loadrb_io>;
+ defm: Loadxim_pat<sextloadi16, i64, ToSext64, anyimm1, L2_loadrh_io>;
+ defm: Loadxim_pat<sextloadi32, i64, ToSext64, anyimm2, L2_loadri_io>;
+}
let AddedComplexity = 60 in {
def: Loadxu_pat<extloadi8, i32, anyimm0, L4_loadrub_ur>;
@@ -1818,26 +1819,55 @@ let AddedComplexity = 60 in {
def: Loadxum_pat<extloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>;
}
-defm: Loadxr_pat<extloadi8, i32, L4_loadrub_rr>;
-defm: Loadxr_pat<zextloadi8, i32, L4_loadrub_rr>;
-defm: Loadxr_pat<sextloadi8, i32, L4_loadrb_rr>;
-defm: Loadxr_pat<extloadi16, i32, L4_loadruh_rr>;
-defm: Loadxr_pat<zextloadi16, i32, L4_loadruh_rr>;
-defm: Loadxr_pat<sextloadi16, i32, L4_loadrh_rr>;
-defm: Loadxr_pat<load, i32, L4_loadri_rr>;
-defm: Loadxr_pat<load, i64, L4_loadrd_rr>;
-defm: Loadxr_pat<load, f32, L4_loadri_rr>;
-defm: Loadxr_pat<load, f64, L4_loadrd_rr>;
-
-defm: Loadxrm_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>;
-defm: Loadxrm_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
-defm: Loadxrm_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
-defm: Loadxrm_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>;
-defm: Loadxrm_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>;
-defm: Loadxrm_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>;
-defm: Loadxrm_pat<extloadi32, i64, ToZext64, L4_loadri_rr>;
-defm: Loadxrm_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>;
-defm: Loadxrm_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
+let AddedComplexity = 40 in {
+ def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>;
+ def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>;
+ def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>;
+ def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>;
+ def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>;
+ def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>;
+ def: Loadxr_shl_pat<load, i32, L4_loadri_rr>;
+ def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>;
+ def: Loadxr_shl_pat<load, f32, L4_loadri_rr>;
+ def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>;
+}
+
+let AddedComplexity = 20 in {
+ def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>;
+ def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>;
+ def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>;
+ def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>;
+ def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>;
+ def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>;
+ def: Loadxr_add_pat<load, i32, L4_loadri_rr>;
+ def: Loadxr_add_pat<load, i64, L4_loadrd_rr>;
+ def: Loadxr_add_pat<load, f32, L4_loadri_rr>;
+ def: Loadxr_add_pat<load, f64, L4_loadrd_rr>;
+}
+
+let AddedComplexity = 40 in {
+ def: Loadxrm_shl_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>;
+ def: Loadxrm_shl_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
+ def: Loadxrm_shl_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
+ def: Loadxrm_shl_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>;
+ def: Loadxrm_shl_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>;
+ def: Loadxrm_shl_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>;
+ def: Loadxrm_shl_pat<extloadi32, i64, ToZext64, L4_loadri_rr>;
+ def: Loadxrm_shl_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>;
+ def: Loadxrm_shl_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
+}
+
+let AddedComplexity = 20 in {
+ def: Loadxrm_add_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>;
+ def: Loadxrm_add_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
+ def: Loadxrm_add_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
+ def: Loadxrm_add_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>;
+ def: Loadxrm_add_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>;
+ def: Loadxrm_add_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>;
+ def: Loadxrm_add_pat<extloadi32, i64, ToZext64, L4_loadri_rr>;
+ def: Loadxrm_add_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>;
+ def: Loadxrm_add_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
+}
// Absolute address
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 7d961a238ae..da53a09a6fc 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -44,12 +44,12 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index e491c757670..f29f321214c 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -26,13 +26,13 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 68484344fde..0ff3afff5f5 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -23,7 +23,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
index a0fdc70e141..52e5dcd4638 100644
--- a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
+++ b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -548,14 +548,13 @@ bool HexagonVectorLoopCarriedReuse::doVLCR() {
findValueToReuse();
if (ReuseCandidate.isDefined()) {
reuseValue();
- Changed = true;
- Continue = true;
- }
- std::for_each(Dependences.begin(), Dependences.end(),
- std::default_delete<DepChain>());
- } while (Continue);
- return Changed;
-}
+ Changed = true;
+ Continue = true;
+ }
+ llvm::for_each(Dependences, std::default_delete<DepChain>());
+ } while (Continue);
+ return Changed;
+}
void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
DepChain &D) {
diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp
index de58ddff339..22bb8841f5f 100644
--- a/lib/Target/Hexagon/RDFGraph.cpp
+++ b/lib/Target/Hexagon/RDFGraph.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -28,7 +29,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
diff --git a/lib/Target/Lanai/LanaiDelaySlotFiller.cpp b/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
index 802232b0582..6b4fa777178 100644
--- a/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
+++ b/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
@@ -17,8 +17,8 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/Lanai/LanaiFrameLowering.h b/lib/Target/Lanai/LanaiFrameLowering.h
index 2f9b6c3c158..ca690d513fc 100644
--- a/lib/Target/Lanai/LanaiFrameLowering.h
+++ b/lib/Target/Lanai/LanaiFrameLowering.h
@@ -15,7 +15,7 @@
#define LLVM_LIB_TARGET_LANAI_LANAIFRAMELOWERING_H
#include "Lanai.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
diff --git a/lib/Target/Lanai/LanaiInstrInfo.h b/lib/Target/Lanai/LanaiInstrInfo.h
index 4387fe1af3c..f07fede67a4 100644
--- a/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/lib/Target/Lanai/LanaiInstrInfo.h
@@ -15,7 +15,7 @@
#define LLVM_LIB_TARGET_LANAI_LANAIINSTRINFO_H
#include "LanaiRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "LanaiGenInstrInfo.inc"
diff --git a/lib/Target/Lanai/LanaiMemAluCombiner.cpp b/lib/Target/Lanai/LanaiMemAluCombiner.cpp
index 7259c02194c..c29c933db74 100644
--- a/lib/Target/Lanai/LanaiMemAluCombiner.cpp
+++ b/lib/Target/Lanai/LanaiMemAluCombiner.cpp
@@ -30,8 +30,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
#define GET_INSTRMAP_INFO
diff --git a/lib/Target/Lanai/LanaiRegisterInfo.cpp b/lib/Target/Lanai/LanaiRegisterInfo.cpp
index 6ea477dce3e..56a5e0ea2de 100644
--- a/lib/Target/Lanai/LanaiRegisterInfo.cpp
+++ b/lib/Target/Lanai/LanaiRegisterInfo.cpp
@@ -20,11 +20,11 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#define GET_REGINFO_TARGET_DESC
#include "LanaiGenRegisterInfo.inc"
diff --git a/lib/Target/Lanai/LanaiSubtarget.h b/lib/Target/Lanai/LanaiSubtarget.h
index 2732ef3097e..313d950e8aa 100644
--- a/lib/Target/Lanai/LanaiSubtarget.h
+++ b/lib/Target/Lanai/LanaiSubtarget.h
@@ -19,7 +19,7 @@
#include "LanaiInstrInfo.h"
#include "LanaiSelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
diff --git a/lib/Target/Lanai/LanaiTargetMachine.h b/lib/Target/Lanai/LanaiTargetMachine.h
index ce1271d9dea..2fb1a053610 100644
--- a/lib/Target/Lanai/LanaiTargetMachine.h
+++ b/lib/Target/Lanai/LanaiTargetMachine.h
@@ -19,7 +19,7 @@
#include "LanaiInstrInfo.h"
#include "LanaiSelectionDAGInfo.h"
#include "LanaiSubtarget.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index fdc4aa52a19..8807101f37c 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -15,7 +15,7 @@
#define LLVM_LIB_TARGET_MSP430_MSP430FRAMELOWERING_H
#include "MSP430.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
class MSP430FrameLowering : public TargetFrameLowering {
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index d81f17e753c..45357f54c9c 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -15,7 +15,7 @@
#define LLVM_LIB_TARGET_MSP430_MSP430INSTRINFO_H
#include "MSP430RegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "MSP430GenInstrInfo.inc"
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 97b5e810a1d..4935b80cfdd 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -16,7 +16,7 @@
#define LLVM_LIB_TARGET_MSP430_MSP430TARGETMACHINE_H
#include "MSP430Subtarget.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 002fa512b21..d8e2eef6a9f 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -535,7 +535,7 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair,
uint64_t Address,
const void *Decoder);
@@ -2481,10 +2481,8 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair,
uint64_t Address, const void *Decoder) {
- unsigned RegPair = fieldFromInstruction(Insn, 7, 3);
-
switch (RegPair) {
default:
return MCDisassembler::Fail;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 12f7638594d..eae0f975080 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -1115,6 +1115,29 @@ MipsMCCodeEmitter::getMovePRegPairOpValue(const MCInst &MI, unsigned OpNo,
}
unsigned
+MipsMCCodeEmitter::getMovePRegSingleOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ assert(((OpNo == 2) || (OpNo == 3)) &&
+ "Unexpected OpNo for movep operand encoding!");
+
+ MCOperand Op = MI.getOperand(OpNo);
+ assert(Op.isReg() && "Operand of movep is not a register!");
+ switch (Op.getReg()) {
+ default:
+ llvm_unreachable("Unknown register for movep!");
+ case Mips::ZERO: return 0;
+ case Mips::S1: return 1;
+ case Mips::V0: return 2;
+ case Mips::V1: return 3;
+ case Mips::S0: return 4;
+ case Mips::S2: return 5;
+ case Mips::S3: return 6;
+ case Mips::S4: return 7;
+ }
+}
+
+unsigned
MipsMCCodeEmitter::getSimm23Lsl2Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index d12d3195521..1e840114b2b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -252,6 +252,9 @@ public:
unsigned getMovePRegPairOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getMovePRegSingleOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getSimm23Lsl2Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index aad6bf378ea..0bddba78145 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -246,8 +246,6 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
break;
case MEK_CALL_HI16:
case MEK_CALL_LO16:
- case MEK_DTPREL_HI:
- case MEK_DTPREL_LO:
case MEK_GOT:
case MEK_GOT_CALL:
case MEK_GOT_DISP:
@@ -263,14 +261,16 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
case MEK_NEG:
case MEK_PCREL_HI16:
case MEK_PCREL_LO16:
- case MEK_TLSLDM:
// If we do have nested target-specific expressions, they will be in
// a consecutive chain.
if (const MipsMCExpr *E = dyn_cast<const MipsMCExpr>(getSubExpr()))
E->fixELFSymbolsInTLSFixups(Asm);
break;
- case MEK_GOTTPREL:
+ case MEK_DTPREL_HI:
+ case MEK_DTPREL_LO:
+ case MEK_TLSLDM:
case MEK_TLSGD:
+ case MEK_GOTTPREL:
case MEK_TPREL_HI:
case MEK_TPREL_LO:
fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
diff --git a/lib/Target/Mips/MicroMips32r6InstrFormats.td b/lib/Target/Mips/MicroMips32r6InstrFormats.td
index 2f0933277e8..e1f1f9262b9 100644
--- a/lib/Target/Mips/MicroMips32r6InstrFormats.td
+++ b/lib/Target/Mips/MicroMips32r6InstrFormats.td
@@ -829,6 +829,21 @@ class POOL16C_NOT16_FM_MMR6 : MicroMipsR6Inst16 {
let Inst{3-0} = 0b0000;
}
+class POOL16C_MOVEP16_FM_MMR6 : MicroMipsR6Inst16 {
+ bits<3> dst_regs;
+ bits<3> rt;
+ bits<3> rs;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0b010001;
+ let Inst{9-7} = dst_regs;
+ let Inst{6-4} = rt;
+ let Inst{3} = rs{2};
+ let Inst{2} = 0b1;
+ let Inst{1-0} = rs{1-0};
+}
+
class POOL16C_OR16_XOR16_FM_MMR6<bits<4> op> : MicroMipsR6Inst16 {
bits<3> rt;
bits<3> rs;
diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 425e75e14c8..49d6ae3f98a 100644
--- a/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -229,6 +229,7 @@ class SRL16_MMR6_ENC : SHIFT_FM_MM16<1>, MicroMipsR6Inst16;
class BREAK16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b011011>;
class LI16_MMR6_ENC : LI_FM_MM16;
class MOVE16_MMR6_ENC : MOVE_FM_MM16<0b000011>;
+class MOVEP_MMR6_ENC : POOL16C_MOVEP16_FM_MMR6;
class SDBBP16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b111011>;
class SUBU16_MMR6_ENC : POOL16A_SUBU16_FM_MMR6;
class XOR16_MMR6_ENC : POOL16C_OR16_XOR16_FM_MMR6<0b1000>;
@@ -1204,6 +1205,7 @@ class LI16_MMR6_DESC : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>,
MMR6Arch<"li16">, MicroMipsR6Inst16, IsAsCheapAsAMove;
class MOVE16_MMR6_DESC : MoveMM16<"move16", GPR32Opnd>, MMR6Arch<"move16">,
MicroMipsR6Inst16;
+class MOVEP_MMR6_DESC : MovePMM16<"movep", GPRMM16OpndMoveP>, MMR6Arch<"movep">;
class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16", II_SDBBP>, MMR6Arch<"sdbbp16">,
MicroMipsR6Inst16;
class SUBU16_MMR6_DESC : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
@@ -1679,6 +1681,8 @@ def LI16_MMR6 : StdMMR6Rel, LI16_MMR6_DESC, LI16_MMR6_ENC,
ISA_MICROMIPS32R6;
def MOVE16_MMR6 : StdMMR6Rel, MOVE16_MMR6_DESC, MOVE16_MMR6_ENC,
ISA_MICROMIPS32R6;
+def MOVEP_MMR6 : StdMMR6Rel, MOVEP_MMR6_DESC, MOVEP_MMR6_ENC,
+ ISA_MICROMIPS32R6;
def SDBBP16_MMR6 : StdMMR6Rel, SDBBP16_MMR6_DESC, SDBBP16_MMR6_ENC,
ISA_MICROMIPS32R6;
def SUBU16_MMR6 : StdMMR6Rel, SUBU16_MMR6_DESC, SUBU16_MMR6_ENC,
@@ -1879,3 +1883,10 @@ let AddedComplexity = 41 in {
}
def TAILCALL_MMR6 : TailCall<BC_MMR6, brtarget26_mm>, ISA_MICROMIPS32R6;
+
+def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
+ (TAILCALL_MMR6 tglobaladdr:$dst)>, ISA_MICROMIPS32R6;
+
+def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)),
+ (TAILCALL_MMR6 texternalsym:$dst)>, ISA_MICROMIPS32R6;
+
diff --git a/lib/Target/Mips/MicroMips64r6InstrInfo.td b/lib/Target/Mips/MicroMips64r6InstrInfo.td
index e0f4d833392..4f705feed0a 100644
--- a/lib/Target/Mips/MicroMips64r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips64r6InstrInfo.td
@@ -162,12 +162,11 @@ class DCLZ_MM64R6_DESC {
class DINSU_MM64R6_DESC : InsBase<"dinsu", GPR64Opnd, uimm5_plus32,
uimm5_inssize_plus1, immZExt5Plus32,
- immZExt5Plus1, MipsIns>;
+ immZExt5Plus1>;
class DINSM_MM64R6_DESC : InsBase<"dinsm", GPR64Opnd, uimm5, uimm_range_2_64,
- immZExt5, immZExtRange2To64, MipsIns>;
+ immZExt5, immZExtRange2To64>;
class DINS_MM64R6_DESC : InsBase<"dins", GPR64Opnd, uimm5_report_uimm6,
- uimm5_inssize_plus1, immZExt5, immZExt5Plus1,
- MipsIns>;
+ uimm5_inssize_plus1, immZExt5, immZExt5Plus1>;
class DMTC0_MM64R6_DESC : MTC0_MMR6_DESC_BASE<"dmtc0", COP0Opnd, GPR64Opnd,
II_DMTC0>;
class DMTC1_MM64R6_DESC : MTC1_MMR6_DESC_BASE<"dmtc1", FGR64Opnd, GPR64Opnd,
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index 1f869db4efe..48c1d94d03c 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -631,7 +631,8 @@ def ADDIUSP_MM : AddImmUSP<"addiusp">, ADDIUSP_FM_MM16;
def MFHI16_MM : MoveFromHILOMM<"mfhi", GPR32Opnd, AC0>, MFHILO_FM_MM16<0x10>;
def MFLO16_MM : MoveFromHILOMM<"mflo", GPR32Opnd, AC0>, MFHILO_FM_MM16<0x12>;
def MOVE16_MM : MoveMM16<"move", GPR32Opnd>, MOVE_FM_MM16<0x03>;
-def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMoveP>, MOVEP_FM_MM16;
+def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMoveP>, MOVEP_FM_MM16,
+ ISA_MICROMIPS_NOT_32R6_64R6;
def LI16_MM : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>, LI_FM_MM16,
IsAsCheapAsAMove;
def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>,
@@ -884,7 +885,7 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, uimm5_plus1, immZExt5,
immZExt5Plus1, MipsExt>, EXT_FM_MM<0x2c>;
def INS_MM : MMRel, InsBase<"ins", GPR32Opnd, uimm5, uimm5_inssize_plus1,
- immZExt5, immZExt5Plus1, MipsIns>,
+ immZExt5, immZExt5Plus1>,
EXT_FM_MM<0x0c>;
/// Jump Instructions
@@ -1061,13 +1062,13 @@ let Predicates = [InMicroMips] in {
(LW_MM addr:$addr)>;
def : MipsPat<(subc GPR32:$lhs, GPR32:$rhs),
(SUBu_MM GPR32:$lhs, GPR32:$rhs)>;
-
- def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
- (TAILCALL_MM tglobaladdr:$dst)>, ISA_MIPS1_NOT_32R6_64R6;
- def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)),
- (TAILCALL_MM texternalsym:$dst)>, ISA_MIPS1_NOT_32R6_64R6;
}
+def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
+ (TAILCALL_MM tglobaladdr:$dst)>, ISA_MICROMIPS32_NOT_MIPS32R6;
+def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)),
+ (TAILCALL_MM texternalsym:$dst)>, ISA_MICROMIPS32_NOT_MIPS32R6;
+
let AddedComplexity = 40 in {
def : MipsPat<(i32 (sextloadi16 addrRegImm:$a)),
(LH_MM addrRegImm:$a)>;
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index 76bca3df2bc..cb59e2ddb1c 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -30,7 +30,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include <cassert>
#include <cstdint>
#include <vector>
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index bdb9eec4cc5..8ce47e3f669 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -17,8 +17,8 @@
#include "MipsRegisterInfo.h"
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index 44771cbe8be..ff95f3c7228 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -22,6 +22,8 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
@@ -29,8 +31,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 04a050c2ff4..dbd47de4dad 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -341,13 +341,13 @@ let AdditionalPredicates = [NotInMicroMips] in {
// for dinsm and dinsu like binutils.
let DecoderMethod = "DecodeDINS" in {
def DINS : InsBase<"dins", GPR64Opnd, uimm6, uimm5_inssize_plus1,
- immZExt5, immZExt5Plus1, MipsIns>, EXT_FM<7>,
+ immZExt5, immZExt5Plus1>, EXT_FM<7>,
ISA_MIPS64R2;
def DINSU : InsBase<"dinsu", GPR64Opnd, uimm5_plus32, uimm5_inssize_plus1,
- immZExt5Plus32, immZExt5Plus1, MipsIns>,
+ immZExt5Plus32, immZExt5Plus1>,
EXT_FM<6>, ISA_MIPS64R2;
def DINSM : InsBase<"dinsm", GPR64Opnd, uimm5, uimm_range_2_64,
- immZExt5, immZExtRange2To64, MipsIns>,
+ immZExt5, immZExtRange2To64>,
EXT_FM<5>, ISA_MIPS64R2;
}
}
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index bec0ae6ba4c..5edd12c0232 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
@@ -64,7 +65,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include <algorithm>
#include <cassert>
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index ef05166503b..27a85970da6 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -107,38 +107,31 @@ bool MipsFrameLowering::hasBP(const MachineFunction &MF) const {
return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
}
+// Estimate the size of the stack, including the incoming arguments. We need to
+// account for register spills, local objects, reserved call frame and incoming
+// arguments. This is required to determine the largest possible positive offset
+// from $sp so that it can be determined if an emergency spill slot for stack
+// addresses is required.
uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
- int64_t Offset = 0;
+ int64_t Size = 0;
- // Iterate over fixed sized objects.
+ // Iterate over fixed sized objects which are incoming arguments.
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
- Offset = std::max(Offset, -MFI.getObjectOffset(I));
+ if (MFI.getObjectOffset(I) > 0)
+ Size += MFI.getObjectSize(I);
// Conservatively assume all callee-saved registers will be saved.
for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) {
- unsigned Size = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R));
- Offset = alignTo(Offset + Size, Size);
+ unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R));
+ Size = alignTo(Size + RegSize, RegSize);
}
- unsigned MaxAlign = MFI.getMaxAlignment();
-
- // Check that MaxAlign is not zero if there is a stack object that is not a
- // callee-saved spill.
- assert(!MFI.getObjectIndexEnd() || MaxAlign);
-
- // Iterate over other objects.
- for (unsigned I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I)
- Offset = alignTo(Offset + MFI.getObjectSize(I), MaxAlign);
-
- // Call frame.
- if (MFI.adjustsStack() && hasReservedCallFrame(MF))
- Offset = alignTo(Offset + MFI.getMaxCallFrameSize(),
- std::max(MaxAlign, getStackAlignment()));
-
- return alignTo(Offset, getStackAlignment());
+ // Get the size of the rest of the frame objects and any possible reserved
+ // call frame, accounting for alignment.
+ return Size + MFI.estimateStackSize(MF);
}
// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index 8c4214c4c21..883c3267d51 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -15,7 +15,7 @@
#define LLVM_LIB_TARGET_MIPS_MIPSFRAMELOWERING_H
#include "Mips.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
class MipsSubtarget;
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 38b3c3fb160..d31385f42d6 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -27,8 +27,8 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -45,6 +45,8 @@
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
@@ -62,8 +64,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index a5ed1be3bee..c18e395f901 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -25,7 +25,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include <cstdint>
#define GET_INSTRINFO_HEADER
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index c4c3eb760c5..3502dbcdae9 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -212,6 +212,8 @@ def HasMicroMips64r6 : Predicate<"Subtarget->inMicroMips64r6Mode()">,
AssemblerPredicate<"FeatureMicroMips,FeatureMips64r6">;
def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">,
AssemblerPredicate<"FeatureMips16">;
+def NotInMips16Mode : Predicate<"!Subtarget->inMips16Mode()">,
+ AssemblerPredicate<"!FeatureMips16">;
def HasCnMips : Predicate<"Subtarget->hasCnMips()">,
AssemblerPredicate<"FeatureCnMips">;
def NotCnMips : Predicate<"!Subtarget->hasCnMips()">,
@@ -1544,7 +1546,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
PseudoInstExpansion<(JumpInst Opnd:$target)>;
class TailCallReg<RegisterOperand RO> :
- MipsPseudo<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>;
+ PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>;
}
class BAL_BR_Pseudo<Instruction RealInst> :
@@ -1726,12 +1728,13 @@ class ExtBase<string opstr, RegisterOperand RO, Operand PosOpnd,
[(set RO:$rt, (Op RO:$rs, PosImm:$pos, SizeImm:$size))], II_EXT,
FrmR, opstr>, ISA_MIPS32R2;
+// 'ins' and its' 64 bit variants are matched by C++ code.
class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd,
- Operand SizeOpnd, PatFrag PosImm, PatFrag SizeImm,
- SDPatternOperator Op = null_frag>:
+ Operand SizeOpnd, PatFrag PosImm, PatFrag SizeImm>:
InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, SizeOpnd:$size, RO:$src),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
- [(set RO:$rt, (Op RO:$rs, PosImm:$pos, SizeImm:$size, RO:$src))],
+ [(set RO:$rt, (null_frag RO:$rs, PosImm:$pos, SizeImm:$size,
+ RO:$src))],
II_INS, FrmR, opstr>, ISA_MIPS32R2 {
let Constraints = "$src = $rt";
}
@@ -2086,7 +2089,7 @@ def BLTZALL : MMRel, BGEZAL_FT<"bltzall", brtarget, GPR32Opnd>,
BGEZAL_FM<0x12>, ISA_MIPS2_NOT_32R6_64R6;
def BAL_BR : BAL_BR_Pseudo<BGEZAL>;
-let Predicates = [NotInMicroMips] in {
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips] in {
def TAILCALL : TailCall<J, jmptarget>;
}
@@ -2103,6 +2106,7 @@ class PseudoIndirectBranchBase<RegisterOperand RO> :
let isBranch = 1;
let isIndirectBranch = 1;
bit isCTI = 1;
+ let Predicates = [NotInMips16Mode];
}
def PseudoIndirectBranch : PseudoIndirectBranchBase<GPR32Opnd>;
@@ -2236,7 +2240,7 @@ let AdditionalPredicates = [NotInMicroMips] in {
EXT_FM<0>;
def INS : MMRel, StdMMR6Rel, InsBase<"ins", GPR32Opnd, uimm5,
uimm5_inssize_plus1, immZExt5,
- immZExt5Plus1, MipsIns>,
+ immZExt5Plus1>,
EXT_FM<4>;
}
/// Move Control Registers From/To CPU Registers
@@ -2776,10 +2780,12 @@ def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)),
// (JALR GPR32:$dst)>;
// Tail call
-def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
- (TAILCALL tglobaladdr:$dst)>;
-def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)),
- (TAILCALL texternalsym:$dst)>;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
+ (TAILCALL tglobaladdr:$dst)>;
+ def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)),
+ (TAILCALL texternalsym:$dst)>;
+}
// hi/lo relocs
multiclass MipsHiLoRelocs<Instruction Lui, Instruction Addiu,
Register ZeroReg, RegisterOperand GPROpnd> {
diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp
index 01c0cbf8262..3910adb7316 100644
--- a/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -28,11 +28,11 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/RecyclingAllocator.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 9c64a0ecbb1..ec966afee0e 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -28,7 +28,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <cstdint>
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 08fb3d7d435..f64d91aad85 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -616,6 +616,7 @@ def GPRMM16OpndZero : RegisterOperand<GPRMM16Zero> {
def GPRMM16OpndMoveP : RegisterOperand<GPRMM16MoveP> {
let ParserMatchClass = GPRMM16AsmOperandMoveP;
+ let EncoderMethod = "getMovePRegSingleOpValue";
}
def GPR64Opnd : RegisterOperand<GPR64> {
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 0b19b18449e..2d9cbabbc59 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCDwarf.h"
@@ -37,7 +38,6 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
@@ -893,10 +893,12 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
// Set scavenging frame index if necessary.
- uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() +
- estimateStackSize(MF);
+ uint64_t MaxSPOffset = estimateStackSize(MF);
- if (isInt<16>(MaxSPOffset))
+ // MSA has a minimum offset of 10 bits signed. If there is a variable
+ // sized object on the stack, the estimation cannot account for it.
+ if (isIntN(STI.hasMSA() ? 10 : 16, MaxSPOffset) &&
+ !MF.getFrameInfo().hasVarSizedObjects())
return;
const TargetRegisterClass &RC =
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 283fcaa73a7..3c6a7d7a665 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -905,6 +905,64 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
break;
}
+ // Manually match MipsISD::Ins nodes to get the correct instruction. It has
+ // to be done in this fashion so that we respect the differences between
+ // dins and dinsm, as the difference is that the size operand has the range
+ // 0 < size <= 32 for dins while dinsm has the range 2 <= size <= 64 which
+ // means SelectionDAGISel would have to test all the operands at once to
+ // match the instruction.
+ case MipsISD::Ins: {
+
+ // Sanity checking for the node operands.
+ if (Node->getValueType(0) != MVT::i32 && Node->getValueType(0) != MVT::i64)
+ return false;
+
+ if (Node->getNumOperands() != 4)
+ return false;
+
+ if (Node->getOperand(1)->getOpcode() != ISD::Constant ||
+ Node->getOperand(2)->getOpcode() != ISD::Constant)
+ return false;
+
+ MVT ResTy = Node->getSimpleValueType(0);
+ uint64_t Pos = Node->getConstantOperandVal(1);
+ uint64_t Size = Node->getConstantOperandVal(2);
+
+ // Size has to be >0 for 'ins', 'dins' and 'dinsu'.
+ if (!Size)
+ return false;
+
+ if (Pos + Size > 64)
+ return false;
+
+ if (ResTy != MVT::i32 && ResTy != MVT::i64)
+ return false;
+
+ unsigned Opcode = 0;
+ if (ResTy == MVT::i32) {
+ if (Pos + Size <= 32)
+ Opcode = Mips::INS;
+ } else {
+ if (Pos + Size <= 32)
+ Opcode = Mips::DINS;
+ else if (Pos < 32 && 1 < Size)
+ Opcode = Mips::DINSM;
+ else
+ Opcode = Mips::DINSU;
+ }
+
+ if (Opcode) {
+ SDValue Ops[4] = {
+ Node->getOperand(0), CurDAG->getTargetConstant(Pos, DL, MVT::i32),
+ CurDAG->getTargetConstant(Size, DL, MVT::i32), Node->getOperand(3)};
+
+ ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, ResTy, Ops));
+ return true;
+ }
+
+ return false;
+ }
+
case MipsISD::ThreadPointer: {
EVT PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout());
unsigned RdhwrOpc, DestReg;
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 45d7f94f1d1..4dd9f7f219a 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Intrinsics.h"
@@ -40,7 +41,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 86bd24166bb..2ff6b99e78f 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -23,6 +23,8 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
@@ -30,8 +32,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/Mips/MipsScheduleGeneric.td b/lib/Target/Mips/MipsScheduleGeneric.td
index 89cda676441..9621009ed1c 100644
--- a/lib/Target/Mips/MipsScheduleGeneric.td
+++ b/lib/Target/Mips/MipsScheduleGeneric.td
@@ -736,6 +736,7 @@ def : InstRW<[GenericDSPShort], (instregex "^MFHI_DSP_MM$")>;
def : InstRW<[GenericDSPShort], (instregex "^MFLO_DSP_MM$")>;
def : InstRW<[GenericDSPShort], (instregex "^MODSUB_MM$")>;
def : InstRW<[GenericDSPShort], (instregex "^MOVEP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MOVEP_MMR6$")>;
def : InstRW<[GenericDSPShort], (instregex "^MOVN_I_MM$")>;
def : InstRW<[GenericDSPShort], (instregex "^MOVZ_I_MM$")>;
def : InstRW<[GenericDSPShort], (instregex "^MSUBU_DSP_MM$")>;
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 6ced2f6967c..729f3ed7b79 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -20,8 +20,8 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/MC/MachineLocation.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index 320ca9a2f09..a802cf85d2e 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -14,7 +14,7 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
class NVPTXSubtarget;
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 7b9acb20b75..ac4f2544fc3 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -3449,6 +3449,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
}
case Intrinsic::nvvm_atomic_load_add_f32:
+ case Intrinsic::nvvm_atomic_load_add_f64:
case Intrinsic::nvvm_atomic_load_inc_32:
case Intrinsic::nvvm_atomic_load_dec_32:
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index d284282e28c..18ba7684ae5 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -16,7 +16,7 @@
#include "NVPTX.h"
#include "NVPTXRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "NVPTXGenInstrInfo.inc"
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index f745b6f6635..478f3e9d057 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1095,6 +1095,12 @@ def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
+def atomic_load_add_f64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
+def atomic_load_add_f64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
+def atomic_load_add_f64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
@@ -1121,6 +1127,13 @@ defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
+defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
+ atomic_load_add_f64_g, f64imm, fpimm, hasAtomAddF64>;
+defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
+ atomic_load_add_f64_s, f64imm, fpimm, hasAtomAddF64>;
+defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
+ atomic_load_add_f64_gen, f64imm, fpimm, hasAtomAddF64>;
+
// atom_sub
def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp
index 4e902c0fb50..38437b20433 100644
--- a/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -36,7 +36,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 88288abe64f..3957d426653 100644
--- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -20,7 +20,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 8d46694fbe5..75573832988 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -18,8 +18,8 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/MC/MachineLocation.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 7674135f0a7..54a72a688ee 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -17,7 +17,7 @@
#include "ManagedStringPool.h"
#include "NVPTXSubtarget.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
diff --git a/lib/Target/Nios2/Nios2FrameLowering.h b/lib/Target/Nios2/Nios2FrameLowering.h
index 2aaea678d9e..2d9e84b2c72 100644
--- a/lib/Target/Nios2/Nios2FrameLowering.h
+++ b/lib/Target/Nios2/Nios2FrameLowering.h
@@ -14,7 +14,7 @@
#define LLVM_LIB_TARGET_NIOS2_NIOS2FRAMELOWERING_H
#include "Nios2.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
class Nios2Subtarget;
diff --git a/lib/Target/Nios2/Nios2InstrInfo.h b/lib/Target/Nios2/Nios2InstrInfo.h
index 47e5e83a39d..6a0a050c839 100644
--- a/lib/Target/Nios2/Nios2InstrInfo.h
+++ b/lib/Target/Nios2/Nios2InstrInfo.h
@@ -18,7 +18,7 @@
#include "Nios2RegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "Nios2GenInstrInfo.inc"
diff --git a/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index 33085a42361..ac28d7ff497 100644
--- a/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -21,9 +21,9 @@
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index fa813db5fef..f845d5a9ac6 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -15,7 +15,7 @@
#include "PPC.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 8ea3689b08e..2092748ca1a 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -36,6 +36,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugLoc.h"
@@ -53,7 +54,6 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
#include <cassert>
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index f3e7b4af45d..62ade966145 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -51,6 +51,7 @@
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
@@ -82,7 +83,6 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -291,14 +291,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FROUND, MVT::f32, Legal);
}
- // PowerPC does not have BSWAP
+ // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
+ // to speed up scalar BSWAP64.
// CTPOP or CTTZ were introduced in P8/P9 respectivelly
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
- setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
if (Subtarget.isISA3_0()) {
+ setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
} else {
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
}
@@ -781,6 +783,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SRL, MVT::v1i128, Legal);
setOperationAction(ISD::SRA, MVT::v1i128, Expand);
}
+
+ if (Subtarget.hasP9Altivec()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+ }
}
if (Subtarget.hasQPX()) {
@@ -7888,6 +7895,107 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
return DAG.getNode(ISD::BITCAST, dl, VT, T);
}
+/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
+/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
+/// SDValue.
+SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
+ SelectionDAG &DAG) const {
+ const unsigned BytesInVector = 16;
+ bool IsLE = Subtarget.isLittleEndian();
+ SDLoc dl(N);
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+ unsigned ShiftElts = 0, InsertAtByte = 0;
+ bool Swap = false;
+
+ // Shifts required to get the byte we want at element 7.
+ unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
+ 0, 15, 14, 13, 12, 11, 10, 9};
+ unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
+ 1, 2, 3, 4, 5, 6, 7, 8};
+
+ ArrayRef<int> Mask = N->getMask();
+ int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+
+ // For each mask element, find out if we're just inserting something
+ // from V2 into V1 or vice versa.
+ // Possible permutations inserting an element from V2 into V1:
+ // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ // ...
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
+ // Inserting from V1 into V2 will be similar, except mask range will be
+ // [16,31].
+
+ bool FoundCandidate = false;
+ // If both vector operands for the shuffle are the same vector, the mask
+ // will contain only elements from the first one and the second one will be
+ // undef.
+ unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
+ // Go through the mask of half-words to find an element that's being moved
+ // from one vector to the other.
+ for (unsigned i = 0; i < BytesInVector; ++i) {
+ unsigned CurrentElement = Mask[i];
+ // If 2nd operand is undefined, we should only look for element 7 in the
+ // Mask.
+ if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
+ continue;
+
+ bool OtherElementsInOrder = true;
+ // Examine the other elements in the Mask to see if they're in original
+ // order.
+ for (unsigned j = 0; j < BytesInVector; ++j) {
+ if (j == i)
+ continue;
+ // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
+ // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
+ // in which we always assume we're always picking from the 1st operand.
+ int MaskOffset =
+ (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
+ if (Mask[j] != OriginalOrder[j] + MaskOffset) {
+ OtherElementsInOrder = false;
+ break;
+ }
+ }
+ // If other elements are in original order, we record the number of shifts
+ // we need to get the element we want into element 7. Also record which byte
+ // in the vector we should insert into.
+ if (OtherElementsInOrder) {
+ // If 2nd operand is undefined, we assume no shifts and no swapping.
+ if (V2.isUndef()) {
+ ShiftElts = 0;
+ Swap = false;
+ } else {
+ // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
+ ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
+ : BigEndianShifts[CurrentElement & 0xF];
+ Swap = CurrentElement < BytesInVector;
+ }
+ InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
+ FoundCandidate = true;
+ break;
+ }
+ }
+
+ if (!FoundCandidate)
+ return SDValue();
+
+ // Candidate found, construct the proper SDAG sequence with VINSERTB,
+ // optionally with VECSHL if shift is required.
+ if (Swap)
+ std::swap(V1, V2);
+ if (V2.isUndef())
+ V2 = V1;
+ if (ShiftElts) {
+ SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
+ DAG.getConstant(ShiftElts, dl, MVT::i32));
+ return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
+ DAG.getConstant(InsertAtByte, dl, MVT::i32));
+ }
+ return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
+ DAG.getConstant(InsertAtByte, dl, MVT::i32));
+}
+
/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
/// SDValue.
@@ -8035,8 +8143,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
}
if (Subtarget.hasP9Altivec()) {
- SDValue NewISDNode = lowerToVINSERTH(SVOp, DAG);
- if (NewISDNode)
+ SDValue NewISDNode;
+ if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
+ return NewISDNode;
+
+ if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
return NewISDNode;
}
@@ -8675,6 +8786,23 @@ SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
+// Lower scalar BSWAP64 to xxbrd.
+SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ // MTVSRDD
+ Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
+ Op.getOperand(0));
+ // XXBRD
+ Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op);
+ // MFVSRD
+ int VectorIndex = 0;
+ if (Subtarget.isLittleEndian())
+ VectorIndex = 1;
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
+ DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
+ return Op;
+}
+
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -8719,11 +8847,29 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
"Should only be called for ISD::INSERT_VECTOR_ELT");
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
// We have legal lowering for constant indices but not for variable ones.
- if (C)
- return Op;
- return SDValue();
+ if (!C)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
+ if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+ SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
+ unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
+ unsigned InsertAtElement = C->getZExtValue();
+ unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
+ if (Subtarget.isLittleEndian()) {
+ InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
+ }
+ return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
+ DAG.getConstant(InsertAtByte, dl, MVT::i32));
+ }
+ return Op;
}
SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
@@ -9146,6 +9292,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SREM:
case ISD::UREM:
return LowerREM(Op, DAG);
+ case ISD::BSWAP:
+ return LowerBSWAP(Op, DAG);
}
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 1a5efeba4cf..bf9c4b8e63b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -953,6 +953,7 @@ namespace llvm {
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
@@ -1079,6 +1080,11 @@ namespace llvm {
/// from one vector into the other.
SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
+ /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be
+ /// handled by the VINSERTB instruction introduced in ISA 3.0. This is
+ /// essentially v16i8 vector version of VINSERTH.
+ SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
+
}; // end class PPCTargetLowering
namespace PPC {
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 506fac7dfc1..e751c149b0b 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1312,7 +1312,12 @@ def VEXTUWLX : VX1_RT5_RA5_VB5<1677, "vextuwlx", []>;
def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "vextuwrx", []>;
// Vector Insert Element Instructions
-def VINSERTB : VX1_VT5_UIM5_VB5<781, "vinsertb", []>;
+def VINSERTB : VXForm_1<781, (outs vrrc:$vD),
+ (ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB),
+ "vinsertb $vD, $vB, $UIM", IIC_VecGeneral,
+ [(set v16i8:$vD, (PPCvecinsert v16i8:$vDi, v16i8:$vB,
+ imm32SExt16:$UIM))]>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
def VINSERTH : VXForm_1<845, (outs vrrc:$vD),
(ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB),
"vinserth $vD, $vB, $UIM", IIC_VecGeneral,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index ab86a54f6fe..565392f76e4 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -16,7 +16,7 @@
#include "PPC.h"
#include "PPCRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "PPCGenInstrInfo.inc"
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 1fc50d2c860..3261bc9bc53 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2595,6 +2595,13 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
}
+ // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead
+ // of f64
+ def : Pat<(v8i16 (PPCmtvsrz i32:$A)),
+ (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
+ def : Pat<(v16i8 (PPCmtvsrz i32:$A)),
+ (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
+
// Patterns for which instructions from ISA 3.0 are a better match
let Predicates = [IsLittleEndian, HasP9Vector] in {
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index d46c1383297..78467e81795 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -28,6 +28,8 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -37,8 +39,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <cstdlib>
diff --git a/lib/Target/RISCV/RISCV.h b/lib/Target/RISCV/RISCV.h
index 1b6140203c8..884cb2e5014 100644
--- a/lib/Target/RISCV/RISCV.h
+++ b/lib/Target/RISCV/RISCV.h
@@ -15,15 +15,21 @@
#ifndef LLVM_LIB_TARGET_RISCV_RISCV_H
#define LLVM_LIB_TARGET_RISCV_RISCV_H
-#include "MCTargetDesc/RISCVMCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
+#include "MCTargetDesc/RISCVBaseInfo.h"
namespace llvm {
class RISCVTargetMachine;
+class AsmPrinter;
+class FunctionPass;
class MCInst;
+class MCOperand;
class MachineInstr;
+class MachineOperand;
-void LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI);
+void LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ const AsmPrinter &AP);
+bool LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
+ MCOperand &MCOp, const AsmPrinter &AP);
FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM);
}
diff --git a/lib/Target/RISCV/RISCV.td b/lib/Target/RISCV/RISCV.td
index 54aa570e13b..da919acad36 100644
--- a/lib/Target/RISCV/RISCV.td
+++ b/lib/Target/RISCV/RISCV.td
@@ -40,9 +40,7 @@ def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit]>;
//===----------------------------------------------------------------------===//
def RISCVInstrInfo : InstrInfo {
- // TODO: disable guessInstructionProperties when
- // https://reviews.llvm.org/D37065 lands.
- let guessInstructionProperties = 1;
+ let guessInstructionProperties = 0;
}
def RISCVAsmParser : AsmParser {
diff --git a/lib/Target/RISCV/RISCVAsmPrinter.cpp b/lib/Target/RISCV/RISCVAsmPrinter.cpp
index 1c213b6c7e9..4808e6c73c5 100644
--- a/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -43,6 +43,11 @@ public:
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
const MachineInstr *MI);
+
+ // Wrapper needed for tblgenned pseudo lowering.
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
+ return LowerRISCVMachineOperandToMCOperand(MO, MCOp, *this);
+ }
};
}
@@ -56,7 +61,7 @@ void RISCVAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
MCInst TmpInst;
- LowerRISCVMachineInstrToMCInst(MI, TmpInst);
+ LowerRISCVMachineInstrToMCInst(MI, TmpInst, *this);
EmitToStreamer(*OutStreamer, TmpInst);
}
diff --git a/lib/Target/RISCV/RISCVCallingConv.td b/lib/Target/RISCV/RISCVCallingConv.td
index e0c25e32e01..0b7a523424c 100644
--- a/lib/Target/RISCV/RISCVCallingConv.td
+++ b/lib/Target/RISCV/RISCVCallingConv.td
@@ -27,3 +27,6 @@ def CC_RISCV32 : CallingConv<[
]>;
def CSR : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>;
+
+// Needed for implementation of RISCVRegisterInfo::getNoPreservedMask()
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
diff --git a/lib/Target/RISCV/RISCVFrameLowering.h b/lib/Target/RISCV/RISCVFrameLowering.h
index 14772ddac4a..0b2c7a40298 100644
--- a/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/lib/Target/RISCV/RISCVFrameLowering.h
@@ -14,7 +14,7 @@
#ifndef LLVM_LIB_TARGET_RISCV_RISCVFRAMELOWERING_H
#define LLVM_LIB_TARGET_RISCV_RISCVFRAMELOWERING_H
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
class RISCVSubtarget;
@@ -30,6 +30,12 @@ public:
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
bool hasFP(const MachineFunction &MF) const override;
+
+ MachineBasicBlock::iterator
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override {
+ return MBB.erase(MI);
+ }
};
}
#endif
diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp
index d76170b7b78..98f7aa16e2e 100644
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -49,8 +49,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setStackPointerRegisterToSaveRestore(RISCV::X2);
+ for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
+ setLoadExtAction(N, XLenVT, MVT::i1, Promote);
+
// TODO: add all necessary setOperationAction calls.
+ setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
+ setOperationAction(ISD::BR_CC, XLenVT, Expand);
setBooleanContents(ZeroOrOneBooleanContent);
// Function alignments (log2).
@@ -63,6 +68,30 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
switch (Op.getOpcode()) {
default:
report_fatal_error("unimplemented operand");
+ case ISD::GlobalAddress:
+ return lowerGlobalAddress(Op, DAG);
+ }
+}
+
+SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT Ty = Op.getValueType();
+ GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = N->getGlobal();
+ int64_t Offset = N->getOffset();
+
+ if (!isPositionIndependent() && !Subtarget.is64Bit()) {
+ SDValue GAHi =
+ DAG.getTargetGlobalAddress(GV, DL, Ty, Offset, RISCVII::MO_HI);
+ SDValue GALo =
+ DAG.getTargetGlobalAddress(GV, DL, Ty, Offset, RISCVII::MO_LO);
+ SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);
+ SDValue MNLo =
+ SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);
+ return MNLo;
+ } else {
+ report_fatal_error("Unable to lowerGlobalAddress");
}
}
@@ -79,6 +108,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
default:
report_fatal_error("Unsupported calling convention");
case CallingConv::C:
+ case CallingConv::Fast:
break;
}
@@ -115,6 +145,135 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
return Chain;
}
+// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
+// and output parameter nodes.
+SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ SDLoc &DL = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ CLI.IsTailCall = false;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool IsVarArg = CLI.IsVarArg;
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ if (IsVarArg) {
+ report_fatal_error("LowerCall with varargs not implemented");
+ }
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Analyze the operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+ ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV32);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = ArgCCInfo.getNextStackOffset();
+
+ for (auto &Arg : Outs) {
+ if (!Arg.Flags.isByVal())
+ continue;
+ report_fatal_error("Passing arguments byval not yet implemented");
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
+
+ // Copy argument values to their designated locations.
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SDValue StackPtr;
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ SDValue ArgValue = OutVals[I];
+
+ // Promote the value if needed.
+ // For now, only handle fully promoted arguments.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full:
+ break;
+ default:
+ llvm_unreachable("Unknown loc info!");
+ }
+
+ if (VA.isRegLoc()) {
+ // Queue up the argument copies and emit them at the end.
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
+ } else {
+ assert(VA.isMemLoc() && "Argument not register or memory");
+ report_fatal_error("Passing arguments via the stack not yet implemented");
+ }
+ }
+
+ SDValue Glue;
+
+ // Build a sequence of copy-to-reg nodes, chained and glued together.
+ for (auto &Reg : RegsToPass) {
+ Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
+ Glue = Chain.getValue(1);
+ }
+
+ if (isa<GlobalAddressSDNode>(Callee)) {
+ Callee = lowerGlobalAddress(Callee, DAG);
+ } else if (isa<ExternalSymbolSDNode>(Callee)) {
+ report_fatal_error(
+ "lowerExternalSymbol, needed for lowerCall, not yet handled");
+ }
+
+ // The first call operand is the chain and the second is the target address.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (auto &Reg : RegsToPass)
+ Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ // Glue the call to the argument copies, if any.
+ if (Glue.getNode())
+ Ops.push_back(Glue);
+
+ // Emit the call.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
+ Glue = Chain.getValue(1);
+
+ // Mark the end of the call, which is glued to the call itself.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, DL, PtrVT, true),
+ DAG.getConstant(0, DL, PtrVT, true),
+ Glue, DL);
+ Glue = Chain.getValue(1);
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
+ RetCCInfo.AnalyzeCallResult(Ins, RetCC_RISCV32);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (auto &VA : RVLocs) {
+ // Copy the value out, gluing the copy to the end of the call sequence.
+ SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
+ VA.getLocVT(), Glue);
+ Chain = RetValue.getValue(1);
+ Glue = RetValue.getValue(2);
+
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
SDValue
RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
@@ -165,6 +324,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
break;
case RISCVISD::RET_FLAG:
return "RISCVISD::RET_FLAG";
+ case RISCVISD::CALL:
+ return "RISCVISD::CALL";
}
return nullptr;
}
diff --git a/lib/Target/RISCV/RISCVISelLowering.h b/lib/Target/RISCV/RISCVISelLowering.h
index 9fed48fc04e..dfb4824cc18 100644
--- a/lib/Target/RISCV/RISCVISelLowering.h
+++ b/lib/Target/RISCV/RISCVISelLowering.h
@@ -24,7 +24,8 @@ class RISCVSubtarget;
namespace RISCVISD {
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- RET_FLAG
+ RET_FLAG,
+ CALL
};
}
@@ -52,10 +53,13 @@ private:
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const override;
+ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override {
return true;
}
+ SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
};
}
diff --git a/lib/Target/RISCV/RISCVInstrInfo.cpp b/lib/Target/RISCV/RISCVInstrInfo.cpp
index 92db5358ce4..5b4f4fcbb88 100644
--- a/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -28,4 +28,50 @@
using namespace llvm;
-RISCVInstrInfo::RISCVInstrInfo() : RISCVGenInstrInfo() {}
+RISCVInstrInfo::RISCVInstrInfo()
+ : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP) {}
+
+void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned DstReg,
+ unsigned SrcReg, bool KillSrc) const {
+ assert(RISCV::GPRRegClass.contains(DstReg, SrcReg) &&
+ "Impossible reg-to-reg copy");
+
+ BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+}
+
+void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool IsKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end())
+ DL = I->getDebugLoc();
+
+ if (RC == &RISCV::GPRRegClass)
+ BuildMI(MBB, I, DL, get(RISCV::SW))
+ .addReg(SrcReg, getKillRegState(IsKill))
+ .addFrameIndex(FI)
+ .addImm(0);
+ else
+ llvm_unreachable("Can't store this register to stack slot");
+}
+
+void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end())
+ DL = I->getDebugLoc();
+
+ if (RC == &RISCV::GPRRegClass)
+ BuildMI(MBB, I, DL, get(RISCV::LW), DstReg).addFrameIndex(FI).addImm(0);
+ else
+ llvm_unreachable("Can't load this register from stack slot");
+}
diff --git a/lib/Target/RISCV/RISCVInstrInfo.h b/lib/Target/RISCV/RISCVInstrInfo.h
index 50404d5554d..05c8378445c 100644
--- a/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/lib/Target/RISCV/RISCVInstrInfo.h
@@ -15,7 +15,7 @@
#define LLVM_LIB_TARGET_RISCV_RISCVINSTRINFO_H
#include "RISCVRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "RISCVGenInstrInfo.inc"
@@ -26,7 +26,21 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
public:
RISCVInstrInfo();
+
+ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned DstReg, unsigned SrcReg,
+ bool KillSrc) const override;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+ bool IsKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, unsigned DstReg,
+ int FrameIndex, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
};
}
-
#endif
diff --git a/lib/Target/RISCV/RISCVInstrInfo.td b/lib/Target/RISCV/RISCVInstrInfo.td
index 23adf1eda9d..23f218fda8f 100644
--- a/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/lib/Target/RISCV/RISCVInstrInfo.td
@@ -17,8 +17,22 @@ include "RISCVInstrFormats.td"
// RISC-V specific DAG Nodes.
//===----------------------------------------------------------------------===//
-def RetFlag : SDNode<"RISCVISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def SDT_RISCVCall : SDTypeProfile<0, -1, [SDTCisVT<0, XLenVT>]>;
+def SDT_RISCVCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+def SDT_RISCVCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+
+
+def Call : SDNode<"RISCVISD::CALL", SDT_RISCVCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def CallSeqStart : SDNode<"ISD::CALLSEQ_START", SDT_RISCVCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def CallSeqEnd : SDNode<"ISD::CALLSEQ_END", SDT_RISCVCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def RetFlag : SDNode<"RISCVISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
@@ -67,7 +81,7 @@ def uimm12 : Operand<XLenVT> {
}
// A 13-bit signed immediate where the least significant bit is zero.
-def simm13_lsb0 : Operand<XLenVT> {
+def simm13_lsb0 : Operand<OtherVT> {
let ParserMatchClass = SImmAsmOperand<13, "Lsb0">;
let EncoderMethod = "getImmOpValueAsr1";
let DecoderMethod = "decodeSImmOperandAndLsl1<13>";
@@ -80,12 +94,30 @@ def uimm20 : Operand<XLenVT> {
}
// A 21-bit signed immediate where the least significant bit is zero.
-def simm21_lsb0 : Operand<XLenVT> {
+def simm21_lsb0 : Operand<OtherVT> {
let ParserMatchClass = SImmAsmOperand<21, "Lsb0">;
let EncoderMethod = "getImmOpValueAsr1";
let DecoderMethod = "decodeSImmOperandAndLsl1<21>";
}
+// Standalone (codegen-only) immleaf patterns.
+def simm32 : ImmLeaf<XLenVT, [{return isInt<32>(Imm);}]>;
+
+// Extract least significant 12 bits from an immediate value and sign extend
+// them.
+def LO12Sext : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(SignExtend64<12>(N->getZExtValue()),
+ SDLoc(N), N->getValueType(0));
+}]>;
+
+// Extract the most significant 20 bits from an immediate value. Add 1 if bit
+// 11 is 1, to compensate for the low 12 bits in the matching immediate addi
+// or ld/st being negative.
+def HI20 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(((N->getZExtValue()+0x800) >> 12) & 0xfffff,
+ SDLoc(N), N->getValueType(0));
+}]>;
+
//===----------------------------------------------------------------------===//
// Instruction Class Templates
//===----------------------------------------------------------------------===//
@@ -257,6 +289,12 @@ class PatGprUimm5<SDPatternOperator OpNode, RVInstIShift Inst>
: Pat<(OpNode GPR:$rs1, uimm5:$shamt),
(Inst GPR:$rs1, uimm5:$shamt)>;
+/// Immediates
+
+def : Pat<(simm12:$imm), (ADDI X0, simm12:$imm)>;
+// TODO: Add a pattern for immediates with all zeroes in the lower 12 bits.
+def : Pat<(simm32:$imm), (ADDI (LUI (HI20 imm:$imm)), (LO12Sext imm:$imm))>;
+
/// Simple arithmetic operations
def : PatGprGpr<add, ADD>;
@@ -284,6 +322,80 @@ def : PatGprSimm12<setult, SLTIU>;
/// Branches and jumps
+// Match `(brcond (CondOp ..), ..)` and lower to the appropriate RISC-V branch
+// instruction.
+class BccPat<PatFrag CondOp, RVInstB Inst>
+ : Pat<(brcond (i32 (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12),
+ (Inst GPR:$rs1, GPR:$rs2, simm13_lsb0:$imm12)>;
+
+def : BccPat<seteq, BEQ>;
+def : BccPat<setne, BNE>;
+def : BccPat<setlt, BLT>;
+def : BccPat<setge, BGE>;
+def : BccPat<setult, BLTU>;
+def : BccPat<setuge, BGEU>;
+
+class BccSwapPat<PatFrag CondOp, RVInst InstBcc>
+ : Pat<(brcond (i32 (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12),
+ (InstBcc GPR:$rs2, GPR:$rs1, bb:$imm12)>;
+
+// Condition codes that don't have matching RISC-V branch instructions, but
+// are trivially supported by swapping the two input operands
+def : BccSwapPat<setgt, BLT>;
+def : BccSwapPat<setle, BGE>;
+def : BccSwapPat<setugt, BLTU>;
+def : BccSwapPat<setule, BGEU>;
+
+// An extra pattern is needed for a brcond without a setcc (i.e. where the
+// condition was calculated elsewhere).
+def : Pat<(brcond GPR:$cond, bb:$imm12), (BNE GPR:$cond, X0, bb:$imm12)>;
+
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in
+def PseudoBR : Pseudo<(outs), (ins simm21_lsb0:$imm20), [(br bb:$imm20)]>,
+ PseudoInstExpansion<(JAL X0, simm21_lsb0:$imm20)>;
+
+let isCall = 1, Defs=[X1] in
+def PseudoCALL : Pseudo<(outs), (ins GPR:$rs1), [(Call GPR:$rs1)]>,
+ PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>;
+
let isBarrier = 1, isReturn = 1, isTerminator = 1 in
def PseudoRET : Pseudo<(outs), (ins), [(RetFlag)]>,
PseudoInstExpansion<(JALR X0, X1, 0)>;
+
+/// Loads
+
+multiclass LdPat<PatFrag LoadOp, RVInst Inst> {
+ def : Pat<(LoadOp GPR:$rs1), (Inst GPR:$rs1, 0)>;
+ def : Pat<(LoadOp (add GPR:$rs1, simm12:$imm12)),
+ (Inst GPR:$rs1, simm12:$imm12)>;
+}
+
+defm : LdPat<sextloadi8, LB>;
+defm : LdPat<extloadi8, LB>;
+defm : LdPat<sextloadi16, LH>;
+defm : LdPat<extloadi16, LH>;
+defm : LdPat<load, LW>;
+defm : LdPat<zextloadi8, LBU>;
+defm : LdPat<zextloadi16, LHU>;
+
+/// Stores
+
+multiclass StPat<PatFrag StoreOp, RVInst Inst> {
+ def : Pat<(StoreOp GPR:$rs2, GPR:$rs1), (Inst GPR:$rs2, GPR:$rs1, 0)>;
+ def : Pat<(StoreOp GPR:$rs2, (add GPR:$rs1, simm12:$imm12)),
+ (Inst GPR:$rs2, GPR:$rs1, simm12:$imm12)>;
+}
+
+defm : StPat<truncstorei8, SB>;
+defm : StPat<truncstorei16, SH>;
+defm : StPat<store, SW>;
+
+/// Other pseudo-instructions
+
+// Pessimistically assume the stack pointer will be clobbered
+let Defs = [X2], Uses = [X2] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ [(CallSeqStart timm:$amt1, timm:$amt2)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ [(CallSeqEnd timm:$amt1, timm:$amt2)]>;
+} // Defs = [X2], Uses = [X2]
diff --git a/lib/Target/RISCV/RISCVMCInstLower.cpp b/lib/Target/RISCV/RISCVMCInstLower.cpp
index 1ac8d982ff9..ef0051ed56e 100644
--- a/lib/Target/RISCV/RISCVMCInstLower.cpp
+++ b/lib/Target/RISCV/RISCVMCInstLower.cpp
@@ -13,6 +13,8 @@
//===----------------------------------------------------------------------===//
#include "RISCV.h"
+#include "MCTargetDesc/RISCVMCExpr.h"
+#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -24,27 +26,72 @@
using namespace llvm;
-void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI,
- MCInst &OutMI) {
+static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
+ const AsmPrinter &AP) {
+ MCContext &Ctx = AP.OutContext;
+ RISCVMCExpr::VariantKind Kind;
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+ case RISCVII::MO_None:
+ Kind = RISCVMCExpr::VK_RISCV_None;
+ break;
+ case RISCVII::MO_LO:
+ Kind = RISCVMCExpr::VK_RISCV_LO;
+ break;
+ case RISCVII::MO_HI:
+ Kind = RISCVMCExpr::VK_RISCV_HI;
+ break;
+ }
+
+ const MCExpr *ME =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx);
+
+ if (!MO.isJTI() && MO.getOffset())
+ ME = MCBinaryExpr::createAdd(
+ ME, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
+
+ ME = RISCVMCExpr::create(ME, Kind, Ctx);
+ return MCOperand::createExpr(ME);
+}
+
+bool llvm::LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
+ MCOperand &MCOp,
+ const AsmPrinter &AP) {
+ switch (MO.getType()) {
+ default:
+ report_fatal_error("LowerRISCVMachineInstrToMCInst: unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit())
+ return false;
+ MCOp = MCOperand::createReg(MO.getReg());
+ break;
+ case MachineOperand::MO_RegisterMask:
+ // Regmasks are like implicit defs.
+ return false;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::createImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::createExpr(
+ MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), AP.OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = lowerSymbolOperand(MO, AP.getSymbol(MO.getGlobal()), AP);
+ break;
+ }
+ return true;
+}
+
+void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ const AsmPrinter &AP) {
OutMI.setOpcode(MI->getOpcode());
for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp;
- switch (MO.getType()) {
- default:
- report_fatal_error(
- "LowerRISCVMachineInstrToMCInst: unknown operand type");
- case MachineOperand::MO_Register:
- // Ignore all implicit register operands.
- if (MO.isImplicit())
- continue;
- MCOp = MCOperand::createReg(MO.getReg());
- break;
- case MachineOperand::MO_Immediate:
- MCOp = MCOperand::createImm(MO.getImm());
- break;
- }
-
- OutMI.addOperand(MCOp);
+ if (LowerRISCVMachineOperandToMCOperand(MO, MCOp, AP))
+ OutMI.addOperand(MCOp);
}
}
diff --git a/lib/Target/RISCV/RISCVRegisterInfo.cpp b/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 4f6c528061c..cd658d7e2d9 100644
--- a/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -18,9 +18,9 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#define GET_REGINFO_TARGET_DESC
#include "RISCVGenRegisterInfo.inc"
@@ -50,12 +50,47 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+const uint32_t *RISCVRegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
+}
+
void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
- report_fatal_error("Subroutines not supported yet");
+ // TODO: this implementation is a temporary placeholder which does just
+ // enough to allow other aspects of code generation to be tested
+
+ assert(SPAdj == 0 && "Unexpected non-zero SPAdj value");
+
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned FrameReg = getFrameRegister(MF);
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ int Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg);
+ Offset += MI.getOperand(FIOperandNum + 1).getImm();
+
+ assert(TFI->hasFP(MF) && "eliminateFrameIndex currently requires hasFP");
+
+ // Offsets must be directly encoded in a 12-bit immediate field
+ if (!isInt<12>(Offset)) {
+ report_fatal_error(
+ "Frame offsets outside of the signed 12-bit range not supported");
+ }
+
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ return;
}
unsigned RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return RISCV::X8;
}
+
+const uint32_t *
+RISCVRegisterInfo::getCallPreservedMask(const MachineFunction & /*MF*/,
+ CallingConv::ID /*CC*/) const {
+ return CSR_RegMask;
+}
diff --git a/lib/Target/RISCV/RISCVRegisterInfo.h b/lib/Target/RISCV/RISCVRegisterInfo.h
index 94af9f44ecd..d9de9bf8c76 100644
--- a/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -25,10 +25,15 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
RISCVRegisterInfo(unsigned HwMode);
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
+
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ const uint32_t *getNoPreservedMask() const override;
+
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index df819ccd15d..6948b72747c 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -19,8 +19,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index ac0e69ccde1..6098afa6898 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -15,7 +15,7 @@
#define LLVM_LIB_TARGET_SPARC_SPARCFRAMELOWERING_H
#include "Sparc.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index c053cc4c475..524b5d05416 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -15,7 +15,7 @@
#define LLVM_LIB_TARGET_SPARC_SPARCINSTRINFO_H
#include "SparcRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "SparcGenInstrInfo.inc"
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 37a1fdf4d77..b9647eaa3d5 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -20,10 +20,10 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index bfbdb8d0b44..ad6b55a9fc9 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -19,7 +19,7 @@
#include "SparcInstrInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
index 91c5a5d53a1..a75d111b029 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -11,7 +11,7 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H
#include "llvm/ADT/IndexedMap.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
class SystemZTargetMachine;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 4533f4fdf21..19ce7776fed 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -27,12 +27,12 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index b8be1f5f392..216139eb7c7 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -20,7 +20,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include <cstdint>
#define GET_INSTRINFO_HEADER
diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp
index d4cd89ce590..27c72b41e4f 100644
--- a/lib/Target/SystemZ/SystemZLDCleanup.cpp
+++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -19,7 +19,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 05f93ce5162..a44fae523fe 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -13,7 +13,7 @@
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
using namespace llvm;
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index a4d9421e08a..6d50369e587 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -323,6 +323,11 @@ unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const {
return 0;
}
+bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
+ EVT VT = TLI->getValueType(DL, DataType);
+ return (VT.isScalarInteger() && TLI->isTypeLegal(VT));
+}
+
int SystemZTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 28821a2ca11..4b11a6f0a83 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -62,6 +62,7 @@ public:
unsigned getPrefetchDistance() { return 2000; }
unsigned getMinPrefetchStride() { return 2048; }
+ bool hasDivRemOp(Type *DataType, bool IsSigned);
bool prefersVectorizedAddressing() { return false; }
bool LSRWithInstrQueries() { return true; }
bool supportsEfficientVectorElementLoadStore() { return true; }
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 5dcb89477a3..b24888fa9cb 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -167,6 +167,13 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
if (GV && !GV->isDeclarationForLinker())
return true;
+ // A symbol marked nonlazybind should not be accessed with a plt. If the
+ // symbol turns out to be external, the linker will convert a direct
+ // access to an access via the plt, so don't assume it is local.
+ const Function *F = dyn_cast_or_null<Function>(GV);
+ if (F && F->hasFnAttribute(Attribute::NonLazyBind))
+ return false;
+
bool IsTLS = GV && GV->isThreadLocal();
bool IsAccessViaCopyRelocs =
Options.MCOptions.MCPIECopyRelocations && GV && isa<GlobalVariable>(GV);
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 211358ad66c..ee60c8f3a7a 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -267,12 +267,11 @@ bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
if (AsmVariant != 0)
report_fatal_error("There are no defined alternate asm variants");
- if (!ExtraCode) {
- // TODO: For now, we just hard-code 0 as the constant offset; teach
- // SelectInlineAsmMemoryOperand how to do address mode matching.
- OS << "0(" + regToString(MI->getOperand(OpNo)) + ')';
- return false;
- }
+ // The current approach to inline asm is that "r" constraints are expressed
+ // as local indices, rather than values on the operand stack. This simplifies
+ // using "r" as it eliminates the need to push and pop the values in a
+ // particular order, however it also makes it impossible to have an "m"
+ // constraint. So we don't support it.
return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
}
diff --git a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 41249117ae0..e2edb924d4d 100644
--- a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -294,6 +294,17 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
unsigned OldReg = MO.getReg();
+ // Inline asm may have a def in the middle of the operands. Our contract
+ // with inline asm register operands is to provide local indices as
+ // immediates.
+ if (MO.isDef()) {
+ assert(MI.getOpcode() == TargetOpcode::INLINEASM);
+ unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+ MRI.removeRegOperandFromUseList(&MO);
+ MO = MachineOperand::CreateImm(LocalId);
+ continue;
+ }
+
// If we see a stackified register, prepare to insert subsequent
// get_locals before the start of its tree.
if (MFI.isVRegStackified(OldReg)) {
@@ -301,6 +312,15 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ // Our contract with inline asm register operands is to provide local
+ // indices as immediates.
+ if (MI.getOpcode() == TargetOpcode::INLINEASM) {
+ unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+ MRI.removeRegOperandFromUseList(&MO);
+ MO = MachineOperand::CreateImm(LocalId);
+ continue;
+ }
+
// Insert a get_local.
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index f516a6b260d..e67b1c88b58 100644
--- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -541,7 +541,7 @@ unsigned WebAssemblyFastISel::getRegForUnsignedValue(const Value *V) {
unsigned WebAssemblyFastISel::getRegForSignedValue(const Value *V) {
MVT::SimpleValueType From = getSimpleType(V->getType());
MVT::SimpleValueType To = getLegalType(From);
- return zeroExtend(getRegForValue(V), V, From, To);
+ return signExtend(getRegForValue(V), V, From, To);
}
unsigned WebAssemblyFastISel::getRegForPromotedValue(const Value *V,
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index bf326fce88f..4cc7f5ae058 100644
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -16,7 +16,7 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYFRAMELOWERING_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYFRAMELOWERING_H
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
class MachineFrameInfo;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
index df6c937a364..eb74106336e 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -17,7 +17,7 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYINSTRINFO_H
#include "WebAssemblyRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "WebAssemblyGenInstrInfo.inc"
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index 9367464c806..5e7ebd19fac 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -24,7 +24,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index b8ea2f01133..2a784c9822a 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2791,7 +2791,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
isParsingIntelSyntax())) {
default: llvm_unreachable("Unexpected match result!");
case Match_Success:
- if (validateInstruction(Inst, Operands))
+ if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
return true;
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the
@@ -3082,7 +3082,7 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
if (NumSuccessfulMatches == 1) {
- if (validateInstruction(Inst, Operands))
+ if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
return true;
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the individual
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 6ff1136cd85..0c99dbbe328 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -54,12 +54,12 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
if (TSFlags & X86II::LOCK)
OS << "\tlock\t";
if (!(TSFlags & X86II::LOCK) && Flags & X86::IP_HAS_LOCK)
- OS << "\tlock\n";
+ OS << "\tlock\t";
if (Flags & X86::IP_HAS_REPEAT_NE)
- OS << "\trepne\n";
+ OS << "\trepne\t";
else if (Flags & X86::IP_HAS_REPEAT)
- OS << "\trep\n";
+ OS << "\trep\t";
// Output CALLpcrel32 as "callq" in 64-bit mode.
// In Intel annotation it's always emitted as "call".
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 464941a1bab..1f02600a798 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -41,13 +41,13 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
uint64_t TSFlags = Desc.TSFlags;
if (TSFlags & X86II::LOCK)
- OS << "\tlock\n";
+ OS << "\tlock\t";
unsigned Flags = MI->getFlags();
if (Flags & X86::IP_HAS_REPEAT_NE)
- OS << "\trepne\n";
+ OS << "\trepne\t";
else if (Flags & X86::IP_HAS_REPEAT)
- OS << "\trep\n";
+ OS << "\trep\t";
printInstruction(MI, OS);
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index f4021d7639b..34f2956e0c0 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -116,9 +116,15 @@ def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
"Enable AVX2 instructions",
[FeatureAVX]>;
+def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
+ "Enable three-operand fused multiple-add",
+ [FeatureAVX]>;
+def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
+ "Support 16-bit floating point conversion instructions",
+ [FeatureAVX]>;
def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
"Enable AVX-512 instructions",
- [FeatureAVX2]>;
+ [FeatureAVX2, FeatureFMA, FeatureF16C]>;
def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
"Enable AVX-512 Exponential and Reciprocal Instructions",
[FeatureAVX512]>;
@@ -154,9 +160,6 @@ def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
-def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
- "Enable three-operand fused multiple-add",
- [FeatureAVX]>;
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
"Enable four-operand fused multiple-add",
[FeatureAVX, FeatureSSE4A]>;
@@ -177,9 +180,6 @@ def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
"Support MOVBE instruction">;
def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
"Support RDRAND instruction">;
-def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
- "Support 16-bit floating point conversion instructions",
- [FeatureAVX]>;
def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
"Support FS/GS Base instructions">;
def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp
index 34e384ba311..d8b5b7d3fc0 100644
--- a/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -34,13 +34,13 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <cassert>
#include <cstddef>
diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp
index 7beb9c6e357..54f937bcda3 100644
--- a/lib/Target/X86/X86CallLowering.cpp
+++ b/lib/Target/X86/X86CallLowering.cpp
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DataLayout.h"
@@ -41,7 +42,6 @@
#include "llvm/IR/Value.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
#include <cstdint>
diff --git a/lib/Target/X86/X86CmovConversion.cpp b/lib/Target/X86/X86CmovConversion.cpp
index b2cd622b1e8..e75276960cc 100644
--- a/lib/Target/X86/X86CmovConversion.cpp
+++ b/lib/Target/X86/X86CmovConversion.cpp
@@ -57,6 +57,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCSchedule.h"
@@ -64,7 +65,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
diff --git a/lib/Target/X86/X86EvexToVex.cpp b/lib/Target/X86/X86EvexToVex.cpp
index 744510a3a3b..6dd4631a484 100644
--- a/lib/Target/X86/X86EvexToVex.cpp
+++ b/lib/Target/X86/X86EvexToVex.cpp
@@ -171,7 +171,7 @@ static void performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
case X86::VALIGNDZ128rri:
case X86::VALIGNDZ128rmi:
case X86::VALIGNQZ128rri:
- case X86::VALIGNQZ128rmi:
+ case X86::VALIGNQZ128rmi: {
assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
"Unexpected new opcode!");
unsigned Scale = (Opc == X86::VALIGNQZ128rri ||
@@ -180,6 +180,24 @@ static void performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
Imm.setImm(Imm.getImm() * Scale);
break;
}
+ case X86::VSHUFF32X4Z256rmi:
+ case X86::VSHUFF32X4Z256rri:
+ case X86::VSHUFF64X2Z256rmi:
+ case X86::VSHUFF64X2Z256rri:
+ case X86::VSHUFI32X4Z256rmi:
+ case X86::VSHUFI32X4Z256rri:
+ case X86::VSHUFI64X2Z256rmi:
+ case X86::VSHUFI64X2Z256rri: {
+ assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr ||
+ NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) &&
+ "Unexpected new opcode!");
+ MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
+ int64_t ImmVal = Imm.getImm();
+ // Set bit 5, move bit 1 to bit 4, copy bit 0.
+ Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1));
+ break;
+ }
+ }
}
diff --git a/lib/Target/X86/X86FixupBWInsts.cpp b/lib/Target/X86/X86FixupBWInsts.cpp
index b2b5a78fcdb..9664c931c35 100644
--- a/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/lib/Target/X86/X86FixupBWInsts.cpp
@@ -55,9 +55,9 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
#define FIXUPBW_DESC "X86 Byte/Word Instruction Fixup"
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 9f649dad8bc..bbc2bffdb70 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -22,9 +22,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
namespace llvm {
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 5582526541b..7bcbe199124 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -37,11 +37,11 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 988f2967401..86e65b83ffa 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1562,6 +1562,11 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
bool HasFP = hasFP(MF);
uint64_t NumBytes = 0;
+ bool NeedsDwarfCFI =
+ (!MF.getTarget().getTargetTriple().isOSDarwin() &&
+ !MF.getTarget().getTargetTriple().isOSWindows()) &&
+ (MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry());
+
if (IsFunclet) {
assert(HasFP && "EH funclets without FP not yet implemented");
NumBytes = getWinEHFuncletFrameSize(MF);
@@ -1584,6 +1589,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
MachineFramePtr)
.setMIFlag(MachineInstr::FrameDestroy);
+ if (NeedsDwarfCFI) {
+ unsigned DwarfStackPtr =
+ TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
+ BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa(
+ nullptr, DwarfStackPtr, -SlotSize));
+ --MBBI;
+ }
}
MachineBasicBlock::iterator FirstCSPop = MBBI;
@@ -1647,6 +1659,11 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
} else if (NumBytes) {
// Adjust stack pointer back: ESP += numbytes.
emitSPUpdate(MBB, MBBI, NumBytes, /*InEpilogue=*/true);
+ if (!hasFP(MF) && NeedsDwarfCFI) {
+ // Define the current CFA rule to use the provided offset.
+ BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset(
+ nullptr, -CSSize - SlotSize));
+ }
--MBBI;
}
@@ -1659,6 +1676,23 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (NeedsWin64CFI && MF.hasWinCFI())
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
+ if (!hasFP(MF) && NeedsDwarfCFI) {
+ MBBI = FirstCSPop;
+ int64_t Offset = -CSSize - SlotSize;
+ // Mark callee-saved pop instruction.
+ // Define the current CFA rule to use the provided offset.
+ while (MBBI != MBB.end()) {
+ MachineBasicBlock::iterator PI = MBBI;
+ unsigned Opc = PI->getOpcode();
+ ++MBBI;
+ if (Opc == X86::POP32r || Opc == X86::POP64r) {
+ Offset += SlotSize;
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createDefCfaOffset(nullptr, Offset));
+ }
+ }
+ }
+
if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
// Add the return addr area delta back since we are not tail calling.
int Offset = -1 * X86FI->getTCReturnAddrDelta();
@@ -2577,6 +2611,7 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
unsigned Regs[2];
unsigned FoundRegs = 0;
+ auto &MRI = MBB.getParent()->getRegInfo();
auto RegMask = Prev->getOperand(1);
auto &RegClass =
@@ -2590,6 +2625,10 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
if (!RegMask.clobbersPhysReg(Candidate))
continue;
+ // Don't clobber reserved registers
+ if (MRI.isReserved(Candidate))
+ continue;
+
bool IsDef = false;
for (const MachineOperand &MO : Prev->implicit_operands()) {
if (MO.isReg() && MO.isDef() &&
@@ -2835,6 +2874,15 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
return MBBI;
}
+int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
+ return TRI->getSlotSize();
+}
+
+unsigned X86FrameLowering::getInitialCFARegister(const MachineFunction &MF)
+ const {
+ return TRI->getDwarfRegNum(StackPtr, true);
+}
+
namespace {
// Struct used by orderFrameObjects to help sort the stack objects.
struct X86FrameSortingObject {
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 38ac96e16d4..2bce79262d0 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -14,7 +14,7 @@
#ifndef LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H
#define LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
@@ -168,6 +168,10 @@ public:
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, bool RestoreSP = false) const;
+ int getInitialCFAOffset(const MachineFunction &MF) const override;
+
+ unsigned getInitialCFARegister(const MachineFunction &MF) const override;
+
private:
uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index e43fd508de3..0cbf7601790 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -449,15 +449,15 @@ namespace {
// Returns true if this masked compare can be implemented legally with this
// type.
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
- if (N->getOpcode() == X86ISD::PCMPEQM ||
- N->getOpcode() == X86ISD::PCMPGTM ||
- N->getOpcode() == X86ISD::CMPM ||
- N->getOpcode() == X86ISD::CMPMU) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == X86ISD::PCMPEQM || Opcode == X86ISD::PCMPGTM ||
+ Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM ||
+ Opcode == X86ISD::TESTNM || Opcode == X86ISD::CMPMU) {
// We can get 256-bit 8 element types here without VLX being enabled. When
// this happens we will use 512-bit operations and the mask will not be
// zero extended.
- if (N->getOperand(0).getValueType() == MVT::v8i32 ||
- N->getOperand(0).getValueType() == MVT::v8f32)
+ EVT OpVT = N->getOperand(0).getValueType();
+ if (OpVT == MVT::v8i32 || OpVT == MVT::v8f32)
return Subtarget->hasVLX();
return true;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b178ad6c13e..22b4d7997fa 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -380,8 +380,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Special handling for half-precision floating point conversions.
// If we don't have F16C support, then lower half float conversions
// into library calls.
- if (Subtarget.useSoftFloat() ||
- (!Subtarget.hasF16C() && !Subtarget.hasAVX512())) {
+ if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}
@@ -4998,6 +4997,8 @@ static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
switch (Opcode) {
default:
return false;
+ case X86ISD::TESTM:
+ case X86ISD::TESTNM:
case X86ISD::PCMPEQM:
case X86ISD::PCMPGTM:
case X86ISD::CMPM:
@@ -6746,6 +6747,9 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
"Unsupported vector type for broadcast.");
+ BitVector UndefElements;
+ SDValue Ld = BVOp->getSplatValue(&UndefElements);
+
// Attempt to use VBROADCASTM
// From this paterrn:
// a. t0 = (zext_i64 (bitcast_i8 v2i1 X))
@@ -6753,17 +6757,23 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
//
// Create (VBROADCASTM v2i1 X)
if (Subtarget.hasCDI() && (VT.is512BitVector() || Subtarget.hasVLX())) {
- MVT EltType;
- unsigned NumElts;
+ MVT EltType = VT.getScalarType();
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue BOperand;
SDValue ZeroExtended = isSplatZeroExtended(BVOp, NumElts, EltType);
- if (ZeroExtended && ZeroExtended.getOpcode() == ISD::BITCAST) {
- SDValue BOperand = ZeroExtended.getOperand(0);
+ if ((ZeroExtended && ZeroExtended.getOpcode() == ISD::BITCAST) ||
+ (Ld && Ld.getOpcode() == ISD::ZERO_EXTEND &&
+ Ld.getOperand(0).getOpcode() == ISD::BITCAST)) {
+ if (ZeroExtended)
+ BOperand = ZeroExtended.getOperand(0);
+ else
+ BOperand = Ld.getOperand(0).getOperand(0);
if (BOperand.getValueType().isVector() &&
BOperand.getSimpleValueType().getVectorElementType() == MVT::i1) {
- if ((EltType == MVT::i64 &&
- VT.getVectorElementType() == MVT::i8) || // for broadcastmb2q
- (EltType == MVT::i32 &&
- VT.getVectorElementType() == MVT::i16)) { // for broadcastmw2d
+ if ((EltType == MVT::i64 && (VT.getVectorElementType() == MVT::i8 ||
+ NumElts == 8)) || // for broadcastmb2q
+ (EltType == MVT::i32 && (VT.getVectorElementType() == MVT::i16 ||
+ NumElts == 16))) { // for broadcastmw2d
SDValue Brdcst =
DAG.getNode(X86ISD::VBROADCASTM, dl,
MVT::getVectorVT(EltType, NumElts), BOperand);
@@ -6773,9 +6783,6 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
}
}
- BitVector UndefElements;
- SDValue Ld = BVOp->getSplatValue(&UndefElements);
-
// We need a splat of a single value to use broadcast, and it doesn't
// make any sense if the value is only in one element of the vector.
if (!Ld || (VT.getVectorNumElements() - UndefElements.count()) <= 1) {
@@ -7707,6 +7714,111 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
+// Tries to lower a BUILD_VECTOR composed of extract-extract chains that can be
+// reasoned to be a permutation of a vector by indices in a non-constant vector.
+// (build_vector (extract_elt V, (extract_elt I, 0)),
+// (extract_elt V, (extract_elt I, 1)),
+// ...
+// ->
+// (vpermv I, V)
+//
+// TODO: Handle undefs
+// TODO: Utilize pshufb and zero mask blending to support more efficient
+// construction of vectors with constant-0 elements.
+// TODO: Use smaller-element vectors of same width, and "interpolate" the indices,
+// when no native operation available.
+static SDValue
+LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // Look for VPERMV and PSHUFB opportunities.
+ MVT VT = V.getSimpleValueType();
+ switch (VT.SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::v16i8:
+ if (!Subtarget.hasSSE3())
+ return SDValue();
+ break;
+ case MVT::v8f32:
+ case MVT::v8i32:
+ if (!Subtarget.hasAVX2())
+ return SDValue();
+ break;
+ case MVT::v4i64:
+ case MVT::v4f64:
+ if (!Subtarget.hasVLX())
+ return SDValue();
+ break;
+ case MVT::v16f32:
+ case MVT::v8f64:
+ case MVT::v16i32:
+ case MVT::v8i64:
+ if (!Subtarget.hasAVX512())
+ return SDValue();
+ break;
+ case MVT::v32i16:
+ if (!Subtarget.hasBWI())
+ return SDValue();
+ break;
+ case MVT::v8i16:
+ case MVT::v16i16:
+ if (!Subtarget.hasVLX() || !Subtarget.hasBWI())
+ return SDValue();
+ break;
+ case MVT::v64i8:
+ if (!Subtarget.hasVBMI())
+ return SDValue();
+ break;
+ case MVT::v32i8:
+ if (!Subtarget.hasVLX() || !Subtarget.hasVBMI())
+ return SDValue();
+ break;
+ }
+ SDValue SrcVec, IndicesVec;
+ // Check for a match of the permute source vector and permute index elements.
+ // This is done by checking that the i-th build_vector operand is of the form:
+ // (extract_elt SrcVec, (extract_elt IndicesVec, i)).
+ for (unsigned Idx = 0, E = V.getNumOperands(); Idx != E; ++Idx) {
+ SDValue Op = V.getOperand(Idx);
+ if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ // If this is the first extract encountered in V, set the source vector,
+ // otherwise verify the extract is from the previously defined source
+ // vector.
+ if (!SrcVec)
+ SrcVec = Op.getOperand(0);
+ else if (SrcVec != Op.getOperand(0))
+ return SDValue();
+ SDValue ExtractedIndex = Op->getOperand(1);
+ // Peek through extends.
+ if (ExtractedIndex.getOpcode() == ISD::ZERO_EXTEND ||
+ ExtractedIndex.getOpcode() == ISD::SIGN_EXTEND)
+ ExtractedIndex = ExtractedIndex.getOperand(0);
+ if (ExtractedIndex.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ // If this is the first extract from the index vector candidate, set the
+ // indices vector, otherwise verify the extract is from the previously
+ // defined indices vector.
+ if (!IndicesVec)
+ IndicesVec = ExtractedIndex.getOperand(0);
+ else if (IndicesVec != ExtractedIndex.getOperand(0))
+ return SDValue();
+
+ auto *PermIdx = dyn_cast<ConstantSDNode>(ExtractedIndex.getOperand(1));
+ if (!PermIdx || PermIdx->getZExtValue() != Idx)
+ return SDValue();
+ }
+ MVT IndicesVT = VT;
+ if (VT.isFloatingPoint())
+ IndicesVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
+ VT.getVectorNumElements());
+ IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT);
+ return DAG.getNode(VT == MVT::v16i8 ? X86ISD::PSHUFB : X86ISD::VPERMV,
+ SDLoc(V), VT, IndicesVec, SrcVec);
+}
+
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -7922,6 +8034,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (IsAllConstants)
return SDValue();
+ if (SDValue V = LowerBUILD_VECTORAsVariablePermute(Op, DAG, Subtarget))
+ return V;
+
// See if we can use a vector load to get all of the elements.
if (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) {
SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);
@@ -10716,10 +10831,16 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
- if (Subtarget.hasSSSE3())
+ if (Subtarget.hasSSSE3()) {
+ if (Subtarget.hasVLX())
+ if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v2i64, V1, V2,
+ Mask, Subtarget, DAG))
+ return Rotate;
+
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
return Rotate;
+ }
// If we have direct support for blends, we should lower by decomposing into
// a permute. That will be faster than the domain cross.
@@ -11016,10 +11137,16 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
- if (Subtarget.hasSSSE3())
+ if (Subtarget.hasSSSE3()) {
+ if (Subtarget.hasVLX())
+ if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i32, V1, V2,
+ Mask, Subtarget, DAG))
+ return Rotate;
+
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
return Rotate;
+ }
// Assume that a single SHUFPS is faster than an alternative sequence of
// multiple instructions (even if the CPU has a domain penalty).
@@ -12372,6 +12499,16 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
}
}
+
+ // Try to use SHUF128 if possible.
+ if (Subtarget.hasVLX()) {
+ if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) {
+ unsigned PermMask = ((WidenedMask[0] % 2) << 0) |
+ ((WidenedMask[1] % 2) << 1);
+ return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
+ DAG.getConstant(PermMask, DL, MVT::i8));
+ }
+ }
}
// Otherwise form a 128-bit permutation. After accounting for undefs,
@@ -13697,10 +13834,6 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
- if (SDValue Shuf128 =
- lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG))
- return Shuf128;
-
if (V2.isUndef()) {
// When the shuffle is mirrored between the 128-bit lanes of the unit, we
// can use lower latency instructions that will operate on all four
@@ -13722,6 +13855,10 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG));
}
+ if (SDValue Shuf128 =
+ lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG))
+ return Shuf128;
+
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
@@ -17333,6 +17470,20 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
if (Swap)
std::swap(Op0, Op1);
+
+ // See if it is the case of CMP(EQ|NEQ,AND(A,B),ZERO) and change it to TESTM|NM.
+ if ((!Opc && SSECC == 4) || Opc == X86ISD::PCMPEQM) {
+ SDValue A = peekThroughBitcasts(Op0);
+ if ((A.getOpcode() == ISD::AND || A.getOpcode() == X86ISD::FAND) &&
+ ISD::isBuildVectorAllZeros(Op1.getNode())) {
+ MVT VT0 = Op0.getSimpleValueType();
+ SDValue RHS = DAG.getBitcast(VT0, A.getOperand(0));
+ SDValue LHS = DAG.getBitcast(VT0, A.getOperand(1));
+ return DAG.getNode(Opc == X86ISD::PCMPEQM ? X86ISD::TESTNM : X86ISD::TESTM,
+ dl, VT, RHS, LHS);
+ }
+ }
+
if (Opc)
return DAG.getNode(Opc, dl, VT, Op0, Op1);
Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
@@ -19838,10 +19989,19 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
else
PassThru = Src1;
- SDValue Rnd = Op.getOperand(5);
+ unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+ if (IntrWithRoundingModeOpcode != 0) {
+ SDValue Rnd = Op.getOperand(5);
+ if (!isRoundModeCurDirection(Rnd))
+ return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl,
+ Op.getValueType(), Src1, Src2,
+ Src3, Rnd),
+ Mask, PassThru, Subtarget, DAG);
+ }
+
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl,
Op.getValueType(), Src1, Src2,
- Src3, Rnd),
+ Src3),
Mask, PassThru, Subtarget, DAG);
}
case IFMA_OP_MASKZ:
@@ -24786,9 +24946,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FMAXC: return "X86ISD::FMAXC";
case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
- case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS";
case X86ISD::FRCP: return "X86ISD::FRCP";
- case X86ISD::FRCPS: return "X86ISD::FRCPS";
case X86ISD::EXTRQI: return "X86ISD::EXTRQI";
case X86ISD::INSERTQI: return "X86ISD::INSERTQI";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
@@ -24942,10 +25100,18 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND";
case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND";
case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND";
+ case X86ISD::FMADDS1: return "X86ISD::FMADDS1";
+ case X86ISD::FNMADDS1: return "X86ISD::FNMADDS1";
+ case X86ISD::FMSUBS1: return "X86ISD::FMSUBS1";
+ case X86ISD::FNMSUBS1: return "X86ISD::FNMSUBS1";
case X86ISD::FMADDS1_RND: return "X86ISD::FMADDS1_RND";
case X86ISD::FNMADDS1_RND: return "X86ISD::FNMADDS1_RND";
case X86ISD::FMSUBS1_RND: return "X86ISD::FMSUBS1_RND";
case X86ISD::FNMSUBS1_RND: return "X86ISD::FNMSUBS1_RND";
+ case X86ISD::FMADDS3: return "X86ISD::FMADDS3";
+ case X86ISD::FNMADDS3: return "X86ISD::FNMADDS3";
+ case X86ISD::FMSUBS3: return "X86ISD::FMSUBS3";
+ case X86ISD::FNMSUBS3: return "X86ISD::FNMSUBS3";
case X86ISD::FMADDS3_RND: return "X86ISD::FMADDS3_RND";
case X86ISD::FNMADDS3_RND: return "X86ISD::FNMADDS3_RND";
case X86ISD::FMSUBS3_RND: return "X86ISD::FMSUBS3_RND";
@@ -24966,9 +25132,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SELECT: return "X86ISD::SELECT";
case X86ISD::SELECTS: return "X86ISD::SELECTS";
case X86ISD::ADDSUB: return "X86ISD::ADDSUB";
+ case X86ISD::RCP14: return "X86ISD::RCP14";
+ case X86ISD::RCP14S: return "X86ISD::RCP14S";
case X86ISD::RCP28: return "X86ISD::RCP28";
case X86ISD::RCP28S: return "X86ISD::RCP28S";
case X86ISD::EXP2: return "X86ISD::EXP2";
+ case X86ISD::RSQRT14: return "X86ISD::RSQRT14";
+ case X86ISD::RSQRT14S: return "X86ISD::RSQRT14S";
case X86ISD::RSQRT28: return "X86ISD::RSQRT28";
case X86ISD::RSQRT28S: return "X86ISD::RSQRT28S";
case X86ISD::FADD_RND: return "X86ISD::FADD_RND";
@@ -25006,6 +25176,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
case X86ISD::CVTPS2PH: return "X86ISD::CVTPS2PH";
case X86ISD::CVTPH2PS: return "X86ISD::CVTPH2PS";
+ case X86ISD::CVTPH2PS_RND: return "X86ISD::CVTPH2PS_RND";
case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI";
case X86ISD::CVTP2UI: return "X86ISD::CVTP2UI";
case X86ISD::CVTP2SI_RND: return "X86ISD::CVTP2SI_RND";
@@ -30314,10 +30485,17 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
- if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI))
+ if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget))
return NewOp;
- if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget))
+ // TODO - Remove this once we can handle the implicit zero-extension of
+ // X86ISD::PEXTRW/X86ISD::PEXTRB in:
+ // XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and
+ // combineBasicSADPattern.
+ if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI))
return NewOp;
SDValue InputVector = N->getOperand(0);
@@ -30464,16 +30642,6 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-// TODO - merge with combineExtractVectorElt once it can handle the implicit
-// zero-extension of X86ISD::PINSRW/X86ISD::PINSRB in:
-// XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and
-// combineBasicSADPattern.
-static SDValue combineExtractVectorElt_SSE(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
- return combineExtractWithShuffle(N, DAG, DCI, Subtarget);
-}
-
/// If a vector select has an operand that is -1 or 0, try to simplify the
/// select to a bitwise logic operation.
/// TODO: Move to DAGCombiner, possibly using TargetLowering::hasAndNot()?
@@ -30674,26 +30842,6 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
- case X86ISD::PALIGNR:
- // PALIGNR can be converted to VALIGND/Q for 128-bit vectors.
- if (!VT.is128BitVector())
- return false;
- Opcode = X86ISD::VALIGN;
- LLVM_FALLTHROUGH;
- case X86ISD::VALIGN: {
- if (EltVT != MVT::i32 && EltVT != MVT::i64)
- return false;
- uint64_t Imm = Op.getConstantOperandVal(2);
- MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
- unsigned ShiftAmt = Imm * OpEltVT.getSizeInBits();
- unsigned EltSize = EltVT.getSizeInBits();
- // Make sure we can represent the same shift with the new VT.
- if ((ShiftAmt % EltSize) != 0)
- return false;
- Imm = ShiftAmt / EltSize;
- return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
- DAG.getConstant(Imm, DL, MVT::i8));
- }
case X86ISD::SHUF128: {
if (EltVT.getSizeInBits() != 32 && EltVT.getSizeInBits() != 64)
return false;
@@ -34441,8 +34589,9 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// This function transforms vector truncation of 'extended sign-bits' values.
-/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS operations.
+/// This function transforms vector truncation of 'extended sign-bits' or
+/// 'extended zero-bits' values.
+/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS/PACKUS operations.
static SDValue combineVectorSignBitsTruncation(SDNode *N, SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -34475,10 +34624,19 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, SDLoc &DL,
// packed/truncated value. e.g. Comparison result, sext_in_reg, etc.
unsigned NumSignBits = DAG.ComputeNumSignBits(In);
unsigned NumPackedBits = std::min<unsigned>(SVT.getSizeInBits(), 16);
- if (NumSignBits <= (InSVT.getSizeInBits() - NumPackedBits))
- return SDValue();
+ if (NumSignBits > (InSVT.getSizeInBits() - NumPackedBits))
+ return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget);
+
+ // Use PACKUS if the input has zero-bits that extend all the way to the
+ // packed/truncated value. e.g. masks, zext_in_reg, etc.
+ KnownBits Known;
+ DAG.computeKnownBits(In, Known);
+ unsigned NumLeadingZeroBits = Known.countMinLeadingZeros();
+ NumPackedBits = Subtarget.hasSSE41() ? NumPackedBits : 8;
+ if (NumLeadingZeroBits >= (InSVT.getSizeInBits() - NumPackedBits))
+ return truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget);
- return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget);
+ return SDValue();
}
static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
@@ -34507,7 +34665,7 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc);
}
- // Try to truncate extended sign bits with PACKSS.
+ // Try to truncate extended sign/zero bits with PACKSS/PACKUS.
if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget))
return V;
@@ -35341,9 +35499,11 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
// Do not convert the passthru input of scalar intrinsics.
// FIXME: We could allow negations of the lower element only.
- bool NegA = N->getOpcode() != X86ISD::FMADDS1_RND && invertIfNegative(A);
+ bool NegA = N->getOpcode() != X86ISD::FMADDS1 &&
+ N->getOpcode() != X86ISD::FMADDS1_RND && invertIfNegative(A);
bool NegB = invertIfNegative(B);
- bool NegC = N->getOpcode() != X86ISD::FMADDS3_RND && invertIfNegative(C);
+ bool NegC = N->getOpcode() != X86ISD::FMADDS3 &&
+ N->getOpcode() != X86ISD::FMADDS3_RND && invertIfNegative(C);
// Negative multiplication when NegA xor NegB
bool NegMul = (NegA != NegB);
@@ -35371,6 +35531,20 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADD_RND; break;
case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUB_RND; break;
}
+ } else if (N->getOpcode() == X86ISD::FMADDS1) {
+ switch (NewOpcode) {
+ case ISD::FMA: NewOpcode = X86ISD::FMADDS1; break;
+ case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS1; break;
+ case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS1; break;
+ case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS1; break;
+ }
+ } else if (N->getOpcode() == X86ISD::FMADDS3) {
+ switch (NewOpcode) {
+ case ISD::FMA: NewOpcode = X86ISD::FMADDS3; break;
+ case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS3; break;
+ case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS3; break;
+ case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS3; break;
+ }
} else if (N->getOpcode() == X86ISD::FMADDS1_RND) {
switch (NewOpcode) {
case ISD::FMA: NewOpcode = X86ISD::FMADDS1_RND; break;
@@ -36590,10 +36764,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default: break;
case ISD::EXTRACT_VECTOR_ELT:
- return combineExtractVectorElt(N, DAG, DCI, Subtarget);
case X86ISD::PEXTRW:
case X86ISD::PEXTRB:
- return combineExtractVectorElt_SSE(N, DAG, DCI, Subtarget);
+ return combineExtractVectorElt(N, DAG, DCI, Subtarget);
case ISD::INSERT_SUBVECTOR:
return combineInsertSubvector(N, DAG, DCI, Subtarget);
case ISD::EXTRACT_SUBVECTOR:
@@ -36689,6 +36862,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FMADD_RND:
case X86ISD::FMADDS1_RND:
case X86ISD::FMADDS3_RND:
+ case X86ISD::FMADDS1:
+ case X86ISD::FMADDS3:
case ISD::FMA: return combineFMA(N, DAG, Subtarget);
case ISD::MGATHER:
case ISD::MSCATTER: return combineGatherScatter(N, DAG);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 17cb976a4c7..d1438e59f9b 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -254,7 +254,9 @@ namespace llvm {
/// Note that these typically require refinement
/// in order to obtain suitable precision.
FRSQRT, FRCP,
- FRSQRTS, FRCPS,
+
+ // AVX-512 reciprocal approximations with a little more precision.
+ RSQRT14, RSQRT14S, RCP14, RCP14S,
// Thread Local Storage.
TLSADDR,
@@ -487,6 +489,12 @@ namespace llvm {
FMADDSUB_RND,
FMSUBADD_RND,
+ // Scalar intrinsic FMA.
+ FMADDS1, FMADDS3,
+ FNMADDS1, FNMADDS3,
+ FMSUBS1, FMSUBS3,
+ FNMSUBS1, FNMSUBS3,
+
// Scalar intrinsic FMA with rounding mode.
// Two versions, passthru bits on op1 or op3.
FMADDS1_RND, FMADDS3_RND,
@@ -555,7 +563,7 @@ namespace llvm {
RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
// Conversions between float and half-float.
- CVTPS2PH, CVTPH2PS,
+ CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
// LWP insert record.
LWPINS,
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index a73ee19423d..84b44ac677b 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -6017,16 +6017,17 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
}
multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
- string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1,
- SDNode OpNodeRnds3, X86VectorVTInfo _ , string SUFF> {
+ string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
+ SDNode OpNodeRnds1, SDNode OpNodes3,
+ SDNode OpNodeRnds3, X86VectorVTInfo _,
+ string SUFF> {
let ExeDomain = _.ExeDomain in {
defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
// Operands for intrinsic are in 123 order to preserve passthu
// semantics.
- (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
- (i32 FROUND_CURRENT))),
- (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2,
- _.ScalarIntMemCPat:$src3, (i32 FROUND_CURRENT))),
+ (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, _.RC:$src3)),
+ (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2,
+ _.ScalarIntMemCPat:$src3)),
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
(i32 imm:$rc))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
@@ -6035,10 +6036,9 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
(_.ScalarLdFrag addr:$src3)))), 0>;
defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
- (_.VT (OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
- (i32 FROUND_CURRENT))),
- (_.VT (OpNodeRnds3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
- _.RC:$src1, (i32 FROUND_CURRENT))),
+ (_.VT (OpNodes3 _.RC:$src2, _.RC:$src3, _.RC:$src1)),
+ (_.VT (OpNodes3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
+ _.RC:$src1)),
(_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
(i32 imm:$rc))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
@@ -6050,8 +6050,8 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
(null_frag),
- (_.VT (OpNodeRnds1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
- _.RC:$src2, (i32 FROUND_CURRENT))),
+ (_.VT (OpNodes1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
+ _.RC:$src2)),
(null_frag),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
_.FRC:$src2))),
@@ -6061,26 +6061,29 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
}
multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
- string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1,
+ string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
+ SDNode OpNodeRnds1, SDNode OpNodes3,
SDNode OpNodeRnds3> {
let Predicates = [HasAVX512] in {
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
- OpNodeRnds1, OpNodeRnds3, f32x_info, "SS">,
+ OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
+ f32x_info, "SS">,
EVEX_CD8<32, CD8VT1>, VEX_LIG;
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
- OpNodeRnds1, OpNodeRnds3, f64x_info, "SD">,
+ OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
+ f64x_info, "SD">,
EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
}
}
-defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnds1,
- X86FmaddRnds3>;
-defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnds1,
- X86FmsubRnds3>;
-defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd,
- X86FnmaddRnds1, X86FnmaddRnds3>;
-defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub,
- X86FnmsubRnds1, X86FnmsubRnds3>;
+defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86Fmadds1,
+ X86FmaddRnds1, X86Fmadds3, X86FmaddRnds3>;
+defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86Fmsubs1,
+ X86FmsubRnds1, X86Fmsubs3, X86FmsubRnds3>;
+defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86Fnmadds1,
+ X86FnmaddRnds1, X86Fnmadds3, X86FnmaddRnds3>;
+defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1,
+ X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>;
//===----------------------------------------------------------------------===//
// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
@@ -6554,7 +6557,7 @@ defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
NotMemoryFoldable;
def : Pat<(f64 (fpextend FR32X:$src)),
- (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>,
+ (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(f64 (fpextend (loadf32 addr:$src))),
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
@@ -6569,7 +6572,7 @@ def : Pat<(f64 (extloadf32 addr:$src)),
Requires<[HasAVX512, OptForSpeed]>;
def : Pat<(f32 (fpround FR64X:$src)),
- (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, FR32X), FR64X:$src)>,
+ (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v4f32 (X86Movss
@@ -7174,34 +7177,44 @@ def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
//===----------------------------------------------------------------------===//
multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
X86MemOperand x86memop, PatFrag ld_frag> {
- defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
- "vcvtph2ps", "$src", "$src",
- (X86cvtph2ps (_src.VT _src.RC:$src),
- (i32 FROUND_CURRENT))>, T8PD;
- defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src),
- "vcvtph2ps", "$src", "$src",
- (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))),
- (i32 FROUND_CURRENT))>, T8PD;
+ defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
+ (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
+ (X86cvtph2ps (_src.VT _src.RC:$src))>, T8PD;
+ defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
+ (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
+ (X86cvtph2ps (_src.VT
+ (bitconvert
+ (ld_frag addr:$src))))>, T8PD;
}
multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
- defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
- "vcvtph2ps", "{sae}, $src", "$src, {sae}",
- (X86cvtph2ps (_src.VT _src.RC:$src),
- (i32 FROUND_NO_EXC))>, T8PD, EVEX_B;
+ defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
+ (ins _src.RC:$src), "vcvtph2ps",
+ "{sae}, $src", "$src, {sae}",
+ (X86cvtph2psRnd (_src.VT _src.RC:$src),
+ (i32 FROUND_NO_EXC))>, T8PD, EVEX_B;
}
-let Predicates = [HasAVX512] in {
+let Predicates = [HasAVX512] in
defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64>,
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
- let Predicates = [HasVLX] in {
- defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
- loadv2i64>,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
- defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
- loadv2i64>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
- }
+
+let Predicates = [HasVLX] in {
+ defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
+ loadv2i64>,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
+ defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
+ loadv2i64>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
+
+ // Pattern match vcvtph2ps of a scalar i64 load.
+ def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
+ (VCVTPH2PSZ128rm addr:$src)>;
+ def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
+ (VCVTPH2PSZ128rm addr:$src)>;
+ def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ (VCVTPH2PSZ128rm addr:$src)>;
}
multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
@@ -7212,17 +7225,16 @@ multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
(X86cvtps2ph (_src.VT _src.RC:$src1),
(i32 imm:$src2)),
NoItinerary, 0, 0>, AVX512AIi8Base;
- def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
- (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1),
- (i32 imm:$src2))),
- addr:$dst)]>;
- let hasSideEffects = 0, mayStore = 1 in
- def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
- (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
- []>, EVEX_K;
+ let hasSideEffects = 0, mayStore = 1 in {
+ def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
+ (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>;
+ def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
+ (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
+ []>, EVEX_K;
+ }
}
multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
let hasSideEffects = 0 in
@@ -7242,6 +7254,19 @@ let Predicates = [HasAVX512] in {
defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>,
EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
}
+
+ def : Pat<(store (f64 (extractelt
+ (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
+ (iPTR 0))), addr:$dst),
+ (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
+ def : Pat<(store (i64 (extractelt
+ (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
+ (iPTR 0))), addr:$dst),
+ (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
+ def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
+ (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
+ def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
+ (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
}
// Patterns for matching conversions from float to half-float and vice versa.
@@ -7264,35 +7289,6 @@ let Predicates = [HasVLX] in {
(VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
}
-// Patterns for matching float to half-float conversion when AVX512 is supported
-// but F16C isn't. In that case we have to use 512-bit vectors.
-let Predicates = [HasAVX512, NoVLX, NoF16C] in {
- def : Pat<(fp_to_f16 FR32X:$src),
- (i16 (EXTRACT_SUBREG
- (VMOVPDI2DIZrr
- (v8i16 (EXTRACT_SUBREG
- (VCVTPS2PHZrr
- (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
- (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)),
- sub_xmm), 4), sub_xmm))), sub_16bit))>;
-
- def : Pat<(f16_to_fp GR16:$src),
- (f32 (COPY_TO_REGCLASS
- (v4f32 (EXTRACT_SUBREG
- (VCVTPH2PSZrr
- (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)),
- (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)),
- sub_xmm)), sub_xmm)), FR32X))>;
-
- def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
- (f32 (COPY_TO_REGCLASS
- (v4f32 (EXTRACT_SUBREG
- (VCVTPH2PSZrr
- (VCVTPS2PHZrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
- (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)),
- sub_xmm), 4)), sub_xmm)), FR32X))>;
-}
-
// Unordered/Ordered scalar fp compare with Sea and set EFLAGS
multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
@@ -7362,13 +7358,13 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
-defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>,
+defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, f32x_info>,
EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
-defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>,
+defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, f64x_info>,
VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
-defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>,
+defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, f32x_info>,
EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
-defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>,
+defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, f64x_info>,
VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
@@ -7414,8 +7410,8 @@ multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
}
}
-defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>;
-defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;
+defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14>;
+defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14>;
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
@@ -7582,7 +7578,8 @@ multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
}
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
- string SUFF, SDNode OpNode, SDNode OpNodeRnd> {
+ string SUFF, SDNode OpNode, SDNode OpNodeRnd,
+ Intrinsic Intr> {
let ExeDomain = _.ExeDomain in {
defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
@@ -7618,21 +7615,35 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
}
}
+let Predicates = [HasAVX512] in {
def : Pat<(_.EltVT (OpNode _.FRC:$src)),
(!cast<Instruction>(NAME#SUFF#Zr)
(_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
+ def : Pat<(Intr VR128X:$src),
+ (!cast<Instruction>(NAME#SUFF#Zr_Int) VR128X:$src,
+ VR128X:$src)>;
+}
+
+let Predicates = [HasAVX512, OptForSize] in {
def : Pat<(_.EltVT (OpNode (load addr:$src))),
(!cast<Instruction>(NAME#SUFF#Zm)
- (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>;
+ (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
+
+ def : Pat<(Intr (scalar_to_vector (_.EltVT (load addr:$src2)))),
+ (!cast<Instruction>(NAME#SUFF#Zm_Int)
+ (_.VT (IMPLICIT_DEF)), addr:$src2)>;
+}
+
}
multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", f32x_info, "SS", fsqrt,
- X86fsqrtRnds>, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS,
- NotMemoryFoldable;
+ X86fsqrtRnds, int_x86_sse_sqrt_ss>,
+ EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", f64x_info, "SD", fsqrt,
- X86fsqrtRnds>, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
+ X86fsqrtRnds, int_x86_sse2_sqrt_sd>,
+ EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
NotMemoryFoldable;
}
@@ -7641,19 +7652,6 @@ defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>,
defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
-let Predicates = [HasAVX512] in {
- def : Pat<(f32 (X86frsqrt FR32X:$src)),
- (COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>;
- def : Pat<(f32 (X86frsqrt (load addr:$src))),
- (COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
- Requires<[OptForSize]>;
- def : Pat<(f32 (X86frcp FR32X:$src)),
- (COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>;
- def : Pat<(f32 (X86frcp (load addr:$src))),
- (COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
- Requires<[OptForSize]>;
-}
-
multiclass
avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
@@ -8911,6 +8909,123 @@ defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
avx512vl_i8_info, avx512vl_i8_info>,
EVEX_CD8<8, CD8VF>;
+// Fragments to help convert valignq into masked valignd. Or valignq/valignd
+// into vpalignr.
+def ValignqImm32XForm : SDNodeXForm<imm, [{
+ return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
+}]>;
+def ValignqImm8XForm : SDNodeXForm<imm, [{
+ return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
+}]>;
+def ValigndImm8XForm : SDNodeXForm<imm, [{
+ return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
+}]>;
+
+multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo From, X86VectorVTInfo To,
+ SDNodeXForm ImmXForm> {
+ def : Pat<(To.VT (vselect To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1, From.RC:$src2,
+ imm:$src3))),
+ To.RC:$src0)),
+ (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
+ To.RC:$src1, To.RC:$src2,
+ (ImmXForm imm:$src3))>;
+
+ def : Pat<(To.VT (vselect To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1, From.RC:$src2,
+ imm:$src3))),
+ To.ImmAllZerosV)),
+ (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
+ To.RC:$src1, To.RC:$src2,
+ (ImmXForm imm:$src3))>;
+
+ def : Pat<(To.VT (vselect To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (bitconvert (To.LdFrag addr:$src2)),
+ imm:$src3))),
+ To.RC:$src0)),
+ (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
+ To.RC:$src1, addr:$src2,
+ (ImmXForm imm:$src3))>;
+
+ def : Pat<(To.VT (vselect To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (bitconvert (To.LdFrag addr:$src2)),
+ imm:$src3))),
+ To.ImmAllZerosV)),
+ (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
+ To.RC:$src1, addr:$src2,
+ (ImmXForm imm:$src3))>;
+}
+
+multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo From,
+ X86VectorVTInfo To,
+ SDNodeXForm ImmXForm> :
+ avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
+ def : Pat<(From.VT (OpNode From.RC:$src1,
+ (bitconvert (To.VT (X86VBroadcast
+ (To.ScalarLdFrag addr:$src2)))),
+ imm:$src3)),
+ (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
+ (ImmXForm imm:$src3))>;
+
+ def : Pat<(To.VT (vselect To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (bitconvert
+ (To.VT (X86VBroadcast
+ (To.ScalarLdFrag addr:$src2)))),
+ imm:$src3))),
+ To.RC:$src0)),
+ (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
+ To.RC:$src1, addr:$src2,
+ (ImmXForm imm:$src3))>;
+
+ def : Pat<(To.VT (vselect To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (bitconvert
+ (To.VT (X86VBroadcast
+ (To.ScalarLdFrag addr:$src2)))),
+ imm:$src3))),
+ To.ImmAllZerosV)),
+ (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
+ To.RC:$src1, addr:$src2,
+ (ImmXForm imm:$src3))>;
+}
+
+let Predicates = [HasAVX512] in {
+ // For 512-bit we lower to the widest element type we can. So we only need
+ // to handle converting valignq to valignd.
+ defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
+ v16i32_info, ValignqImm32XForm>;
+}
+
+let Predicates = [HasVLX] in {
+ // For 128-bit we lower to the widest element type we can. So we only need
+ // to handle converting valignq to valignd.
+ defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
+ v4i32x_info, ValignqImm32XForm>;
+ // For 256-bit we lower to the widest element type we can. So we only need
+ // to handle converting valignq to valignd.
+ defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
+ v8i32x_info, ValignqImm32XForm>;
+}
+
+let Predicates = [HasVLX, HasBWI] in {
+ // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
+ defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
+ v16i8x_info, ValignqImm8XForm>;
+ defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
+ v16i8x_info, ValigndImm8XForm>;
+}
+
defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 453dcd83df1..15466c2978f 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -290,8 +290,7 @@ multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
}
multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
- string OpStr, Intrinsic IntF32, Intrinsic IntF64,
- SDNode OpNode> {
+ string OpStr, SDNode OpNodeIntrin, SDNode OpNode> {
let ExeDomain = SSEPackedSingle in
defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", OpNode,
FR32, f32mem>,
@@ -309,43 +308,44 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
// This is because src1 is tied to dest, and the scalar intrinsics
// require the pass-through values to come from the first source
// operand, not the second.
- // TODO: Use AVX512 instructions when possible.
- let Predicates = [HasFMA] in {
- def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
+ let Predicates = [HasFMA, NoAVX512] in {
+ def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, VR128:$src2, VR128:$src3)),
(!cast<Instruction>(NAME#"213SSr_Int")
VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
+ def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, VR128:$src2, VR128:$src3)),
(!cast<Instruction>(NAME#"213SDr_Int")
VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(IntF32 VR128:$src1, VR128:$src2, sse_load_f32:$src3),
+ def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, VR128:$src2,
+ sse_load_f32:$src3)),
(!cast<Instruction>(NAME#"213SSm_Int")
VR128:$src1, VR128:$src2, sse_load_f32:$src3)>;
- def : Pat<(IntF64 VR128:$src1, VR128:$src2, sse_load_f64:$src3),
+ def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, VR128:$src2,
+ sse_load_f64:$src3)),
(!cast<Instruction>(NAME#"213SDm_Int")
VR128:$src1, VR128:$src2, sse_load_f64:$src3)>;
- def : Pat<(IntF32 VR128:$src1, sse_load_f32:$src3, VR128:$src2),
+ def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, sse_load_f32:$src3,
+ VR128:$src2)),
(!cast<Instruction>(NAME#"132SSm_Int")
VR128:$src1, VR128:$src2, sse_load_f32:$src3)>;
- def : Pat<(IntF64 VR128:$src1, sse_load_f64:$src3, VR128:$src2),
+ def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, sse_load_f64:$src3,
+ VR128:$src2)),
(!cast<Instruction>(NAME#"132SDm_Int")
VR128:$src1, VR128:$src2, sse_load_f64:$src3)>;
}
}
-defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
- int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG;
-defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
- int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG;
+defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadds1, X86Fmadd>, VEX_LIG;
+defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsubs1, X86Fmsub>, VEX_LIG;
-defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
- int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG;
-defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
- int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG;
+defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadds1, X86Fnmadd>,
+ VEX_LIG;
+defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsubs1, X86Fnmsub>,
+ VEX_LIG;
//===----------------------------------------------------------------------===//
@@ -385,26 +385,28 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
}
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
- ComplexPattern mem_cpat, Intrinsic Int> {
+ ValueType VT, ComplexPattern mem_cpat, SDNode OpNode> {
let isCodeGenOnly = 1 in {
def rr_Int : FMA4<opc, MRMSrcRegOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, VEX_LIG;
+ (VT (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, VEX_W,
+ VEX_LIG;
def rm_Int : FMA4<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst, (Int VR128:$src1, VR128:$src2,
- mem_cpat:$src3))]>, VEX_W, VEX_LIG;
+ [(set VR128:$dst, (VT (OpNode VR128:$src1, VR128:$src2,
+ mem_cpat:$src3)))]>, VEX_W, VEX_LIG;
def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG;
+ (VT (OpNode VR128:$src1, mem_cpat:$src2, VR128:$src3)))]>,
+ VEX_LIG;
let hasSideEffects = 0 in
def rr_Int_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
@@ -475,19 +477,19 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
let ExeDomain = SSEPackedSingle in {
// Scalar Instructions
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
- fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
- int_x86_fma_vfmadd_ss>;
+ fma4s_int<0x6A, "vfmaddss", ssmem, v4f32, sse_load_f32,
+ X86Fmadds1>;
defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
- fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
- int_x86_fma_vfmsub_ss>;
+ fma4s_int<0x6E, "vfmsubss", ssmem, v4f32, sse_load_f32,
+ X86Fmsubs1>;
defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
X86Fnmadd, loadf32>,
- fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
- int_x86_fma_vfnmadd_ss>;
+ fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32, sse_load_f32,
+ X86Fnmadds1>;
defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
X86Fnmsub, loadf32>,
- fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
- int_x86_fma_vfnmsub_ss>;
+ fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32, sse_load_f32,
+ X86Fnmsubs1>;
// Packed Instructions
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
loadv4f32, loadv8f32>;
@@ -506,19 +508,19 @@ let ExeDomain = SSEPackedSingle in {
let ExeDomain = SSEPackedDouble in {
// Scalar Instructions
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
- fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
- int_x86_fma_vfmadd_sd>;
+ fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64, sse_load_f64,
+ X86Fmadds1>;
defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
- fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
- int_x86_fma_vfmsub_sd>;
+ fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64, sse_load_f64,
+ X86Fmsubs1>;
defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
X86Fnmadd, loadf64>,
- fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
- int_x86_fma_vfnmadd_sd>;
+ fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64, sse_load_f64,
+ X86Fnmadds1>;
defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
X86Fnmsub, loadf64>,
- fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
- int_x86_fma_vfnmsub_sd>;
+ fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64, sse_load_f64,
+ X86Fnmsubs1>;
// Packed Instructions
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
loadv2f64, loadv4f64>;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index c4f34bdd37e..d30400836bb 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -56,8 +56,6 @@ def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp>;
def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
-def X86frsqrt14s: SDNode<"X86ISD::FRSQRTS", SDTFPBinOp>;
-def X86frcp14s : SDNode<"X86ISD::FRCPS", SDTFPBinOp>;
def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
@@ -482,11 +480,22 @@ def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound, [SDNPCommutat
def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>;
// Scalar FMA intrinsics with passthru bits in operand 1.
+def X86Fmadds1 : SDNode<"X86ISD::FMADDS1", SDTFPTernaryOp>;
+def X86Fnmadds1 : SDNode<"X86ISD::FNMADDS1", SDTFPTernaryOp>;
+def X86Fmsubs1 : SDNode<"X86ISD::FMSUBS1", SDTFPTernaryOp>;
+def X86Fnmsubs1 : SDNode<"X86ISD::FNMSUBS1", SDTFPTernaryOp>;
+
+// Scalar FMA intrinsics with passthru bits in operand 1.
def X86FmaddRnds1 : SDNode<"X86ISD::FMADDS1_RND", SDTFmaRound>;
def X86FnmaddRnds1 : SDNode<"X86ISD::FNMADDS1_RND", SDTFmaRound>;
def X86FmsubRnds1 : SDNode<"X86ISD::FMSUBS1_RND", SDTFmaRound>;
def X86FnmsubRnds1 : SDNode<"X86ISD::FNMSUBS1_RND", SDTFmaRound>;
+def X86Fmadds3 : SDNode<"X86ISD::FMADDS3", SDTFPTernaryOp>;
+def X86Fnmadds3 : SDNode<"X86ISD::FNMADDS3", SDTFPTernaryOp>;
+def X86Fmsubs3 : SDNode<"X86ISD::FMSUBS3", SDTFPTernaryOp>;
+def X86Fnmsubs3 : SDNode<"X86ISD::FNMSUBS3", SDTFPTernaryOp>;
+
// Scalar FMA intrinsics with passthru bits in operand 3.
def X86FmaddRnds3 : SDNode<"X86ISD::FMADDS3_RND", SDTFmaRound, [SDNPCommutative]>;
def X86FnmaddRnds3 : SDNode<"X86ISD::FNMADDS3_RND", SDTFmaRound, [SDNPCommutative]>;
@@ -498,10 +507,14 @@ def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>,
def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>;
def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTIFma, [SDNPCommutative]>;
+def X86rsqrt14 : SDNode<"X86ISD::RSQRT14", SDTFPUnaryOp>;
+def X86rcp14 : SDNode<"X86ISD::RCP14", SDTFPUnaryOp>;
def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOpRound>;
def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOpRound>;
def X86exp2 : SDNode<"X86ISD::EXP2", SDTFPUnaryOpRound>;
+def X86rsqrt14s : SDNode<"X86ISD::RSQRT14S", SDTFPBinOp>;
+def X86rcp14s : SDNode<"X86ISD::RCP14S", SDTFPBinOp>;
def X86rsqrt28s : SDNode<"X86ISD::RSQRT28S", SDTFPBinOpRound>;
def X86rcp28s : SDNode<"X86ISD::RCP28S", SDTFPBinOpRound>;
def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImmRound>;
@@ -578,7 +591,12 @@ def X86VUintToFP : SDNode<"X86ISD::CVTUI2P", SDTVintToFP>;
def X86cvtp2Int : SDNode<"X86ISD::CVTP2SI", SDTFloatToInt>;
def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>;
+
def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS",
+ SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, i16>]> >;
+
+def X86cvtph2psRnd : SDNode<"X86ISD::CVTPH2PS_RND",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, i16>,
SDTCisVT<2, i32>]> >;
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index e665ec1f14d..02a09c340ce 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -18,7 +18,7 @@
#include "X86InstrFMA3Info.h"
#include "X86RegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "X86GenInstrInfo.inc"
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 559a8fcf107..f00caa130d0 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -850,7 +850,6 @@ def HasLWP : Predicate<"Subtarget->hasLWP()">;
def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
def HasF16C : Predicate<"Subtarget->hasF16C()">;
-def NoF16C : Predicate<"!Subtarget->hasF16C()">;
def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">;
def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
def HasBMI : Predicate<"Subtarget->hasBMI()">;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 451303054f5..955a40ee171 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1514,13 +1514,12 @@ let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [], IIC_SSE_CVT_Scalar_RM>,
- XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
+ [], IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, VEX_LIG,
Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG, NotMemoryFoldable;
}
def : Pat<(f32 (fpround FR64:$src)),
- (VCVTSD2SSrr (COPY_TO_REGCLASS FR64:$src, FR32), FR64:$src)>,
+ (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>,
Requires<[UseAVX]>;
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
@@ -1574,20 +1573,18 @@ let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [], IIC_SSE_CVT_Scalar_RR>,
- XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
+ [], IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, VEX_LIG,
Sched<[WriteCvtF2F]>, VEX_WIG, NotMemoryFoldable;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [], IIC_SSE_CVT_Scalar_RM>,
- XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
+ [], IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, VEX_LIG,
Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG, NotMemoryFoldable;
}
def : Pat<(f64 (fpextend FR32:$src)),
- (VCVTSS2SDrr (COPY_TO_REGCLASS FR32:$src, FR64), FR32:$src)>, Requires<[UseAVX]>;
+ (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>;
def : Pat<(fpextend (loadf32 addr:$src)),
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
@@ -1899,7 +1896,7 @@ let Predicates = [HasAVX, NoVLX] in {
(v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
(VCVTTPD2DQrm addr:$src)>;
}
-} // Predicates = [HasAVX]
+} // Predicates = [HasAVX, NoVLX]
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
@@ -3095,7 +3092,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType vt, ValueType ScalarVT,
X86MemOperand x86memop,
Intrinsic Intr, SDNode OpNode, Domain d,
- OpndItins itins, string Suffix> {
+ OpndItins itins, Predicate target, string Suffix> {
let hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -3126,21 +3123,17 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
// vrcpss mem, %xmm0, %xmm0
// TODO: In theory, we could fold the load, and avoid the stall caused by
// the partial register store, either in ExecutionDepsFix or with smarter RA.
- let Predicates = [UseAVX] in {
+ let Predicates = [target] in {
def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r)
(ScalarVT (IMPLICIT_DEF)), RC:$src)>;
- }
- let Predicates = [HasAVX] in {
def : Pat<(Intr VR128:$src),
(!cast<Instruction>("V"#NAME#Suffix##r_Int) VR128:$src,
VR128:$src)>;
}
- let Predicates = [HasAVX, OptForSize] in {
+ let Predicates = [target, OptForSize] in {
def : Pat<(Intr (scalar_to_vector (ScalarVT (load addr:$src2)))),
(!cast<Instruction>("V"#NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), addr:$src2)>;
- }
- let Predicates = [UseAVX, OptForSize] in {
def : Pat<(ScalarVT (OpNode (load addr:$src))),
(!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
addr:$src)>;
@@ -3186,7 +3179,7 @@ let Predicates = prds in {
/// sse2_fp_unop_p - SSE2 unops in vector forms.
multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
SDNode OpNode, OpndItins itins> {
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
@@ -3220,41 +3213,41 @@ let Predicates = [HasAVX] in {
}
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins> {
+ OpndItins itins, Predicate AVXTarget> {
defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
SSEPackedSingle, itins, UseSSE1, "SS">, XS;
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
f32mem,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
- SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG,
- NotMemoryFoldable;
+ SSEPackedSingle, itins, AVXTarget, "SS">, XS, VEX_4V,
+ VEX_LIG, VEX_WIG, NotMemoryFoldable;
}
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins> {
+ OpndItins itins, Predicate AVXTarget> {
defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
OpNode, SSEPackedDouble, itins, UseSSE2, "SD">, XD;
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
f64mem,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
- OpNode, SSEPackedDouble, itins, "SD">,
+ OpNode, SSEPackedDouble, itins, AVXTarget, "SD">,
XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable;
}
// Square root.
-defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,
- sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS, [HasAVX]>,
- sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>,
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS, UseAVX>,
+ sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS, [HasAVX, NoVLX]>,
+ sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD, UseAVX>,
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>,
- sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS, [HasAVX, NoVLX] >;
-defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>,
- sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP, [HasAVX, NoVLX]>;
+defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS, HasAVX>,
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS, [HasAVX]>;
+defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS, HasAVX>,
+ sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP, [HasAVX]>;
// There is no f64 version of the reciprocal approximation instructions.
@@ -7692,22 +7685,24 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
-multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
+multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop> {
def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
- [(set RC:$dst, (Int VR128:$src))]>,
+ [(set RC:$dst, (X86cvtph2ps VR128:$src))]>,
T8PD, VEX, Sched<[WriteCvtF2F]>;
let hasSideEffects = 0, mayLoad = 1 in
def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8PD, VEX,
- Sched<[WriteCvtF2FLd]>;
+ "vcvtph2ps\t{$src, $dst|$dst, $src}",
+ [(set RC:$dst, (X86cvtph2ps (bc_v8i16
+ (loadv2i64 addr:$src))))]>,
+ T8PD, VEX, Sched<[WriteCvtF2FLd]>;
}
-multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
+multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop> {
def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
(ins RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
+ [(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
TAPD, VEX, Sched<[WriteCvtF2F]>;
let hasSideEffects = 0, mayStore = 1,
SchedRW = [WriteCvtF2FLd, WriteRMW] in
@@ -7717,32 +7712,31 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
TAPD, VEX;
}
-let Predicates = [HasF16C] in {
- defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
- defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L;
- defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
- defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L;
+let Predicates = [HasF16C, NoVLX] in {
+ defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem>;
+ defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem>, VEX_L;
+ defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem>;
+ defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem>, VEX_L;
// Pattern match vcvtph2ps of a scalar i64 load.
- def : Pat<(int_x86_vcvtph2ps_128 (vzmovl_v2i64 addr:$src)),
+ def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
(VCVTPH2PSrm addr:$src)>;
- def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)),
+ def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
(VCVTPH2PSrm addr:$src)>;
- def : Pat<(int_x86_vcvtph2ps_128 (bitconvert
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
(VCVTPH2PSrm addr:$src)>;
- def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16
- (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
- addr:$dst),
- (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
- def : Pat<(store (i64 (extractelt (bc_v2i64 (v8i16
- (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
- addr:$dst),
- (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
- def : Pat<(store (v8i16 (int_x86_vcvtps2ph_256 VR256:$src1, i32:$src2)),
- addr:$dst),
- (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>;
+ def : Pat<(store (f64 (extractelt
+ (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))),
+ (iPTR 0))), addr:$dst),
+ (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
+ def : Pat<(store (i64 (extractelt
+ (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))),
+ (iPTR 0))), addr:$dst),
+ (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
+ def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, i32:$src2)), addr:$dst),
+ (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>;
}
// Patterns for matching conversions from float to half-float and vice versa.
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 6f5e41bcdc6..9edac22d5ba 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -422,12 +422,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx512_broadcastmb_128, BROADCASTM, X86ISD::VBROADCASTM, 0),
- X86_INTRINSIC_DATA(avx512_broadcastmb_256, BROADCASTM, X86ISD::VBROADCASTM, 0),
- X86_INTRINSIC_DATA(avx512_broadcastmb_512, BROADCASTM, X86ISD::VBROADCASTM, 0),
- X86_INTRINSIC_DATA(avx512_broadcastmw_128, BROADCASTM, X86ISD::VBROADCASTM, 0),
- X86_INTRINSIC_DATA(avx512_broadcastmw_256, BROADCASTM, X86ISD::VBROADCASTM, 0),
- X86_INTRINSIC_DATA(avx512_broadcastmw_512, BROADCASTM, X86ISD::VBROADCASTM, 0),
X86_INTRINSIC_DATA(avx512_cvtb2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_cvtb2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_cvtb2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
@@ -1077,12 +1071,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FSUBS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FSUBS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTPH2PS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK_RM,
- X86ISD::CVTPH2PS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTPH2PS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_RND),
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK,
X86ISD::CVTPS2PH, 0),
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK,
@@ -1098,8 +1092,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_512, FMA_OP_MASK, ISD::FMA,
X86ISD::FMADD_RND),
- X86_INTRINSIC_DATA(avx512_mask_vfmadd_sd, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_vfmadd_ss, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_sd, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_ss, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
@@ -1220,8 +1214,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_512, FMA_OP_MASK3, ISD::FMA,
X86ISD::FMADD_RND),
- X86_INTRINSIC_DATA(avx512_mask3_vfmadd_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3, X86ISD::FMADDS3_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3, X86ISD::FMADDS3_RND),
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
@@ -1239,8 +1233,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_512, FMA_OP_MASK3, X86ISD::FMSUB,
X86ISD::FMSUB_RND),
- X86_INTRINSIC_DATA(avx512_mask3_vfmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3, X86ISD::FMSUBS3_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3, X86ISD::FMSUBS3_RND),
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
@@ -1259,8 +1253,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_512, FMA_OP_MASK3, X86ISD::FNMSUB,
X86ISD::FNMSUB_RND),
- X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3, X86ISD::FNMSUBS3_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3, X86ISD::FNMSUBS3_RND),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ,
X86ISD::VFIXUPIMM, 0),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_256, FIXUPIMM_MASKZ,
@@ -1298,8 +1292,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_512, FMA_OP_MASKZ, ISD::FMA,
X86ISD::FMADD_RND),
- X86_INTRINSIC_DATA(avx512_maskz_vfmadd_sd, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1_RND, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ss, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1_RND, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_sd, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ss, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
@@ -1428,26 +1422,26 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_ptestnm_w_128, CMP_MASK, X86ISD::TESTNM, 0),
X86_INTRINSIC_DATA(avx512_ptestnm_w_256, CMP_MASK, X86ISD::TESTNM, 0),
X86_INTRINSIC_DATA(avx512_ptestnm_w_512, CMP_MASK, X86ISD::TESTNM, 0),
- X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
- X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
- X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
- X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
- X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
- X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
- X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0),
- X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0),
X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28S, 0),
X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28S, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),
@@ -1468,6 +1462,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, ISD::FMA, 0),
X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, ISD::FMA, 0),
X86_INTRINSIC_DATA(fma_vfmadd_ps_256, INTR_TYPE_3OP, ISD::FMA, 0),
+ X86_INTRINSIC_DATA(fma_vfmadd_sd, INTR_TYPE_3OP, X86ISD::FMADDS1, 0),
+ X86_INTRINSIC_DATA(fma_vfmadd_ss, INTR_TYPE_3OP, X86ISD::FMADDS1, 0),
X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
@@ -1476,6 +1472,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(fma_vfmsub_pd_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
X86_INTRINSIC_DATA(fma_vfmsub_ps, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
X86_INTRINSIC_DATA(fma_vfmsub_ps_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(fma_vfmsub_sd, INTR_TYPE_3OP, X86ISD::FMSUBS1, 0),
+ X86_INTRINSIC_DATA(fma_vfmsub_ss, INTR_TYPE_3OP, X86ISD::FMSUBS1, 0),
X86_INTRINSIC_DATA(fma_vfmsubadd_pd, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
X86_INTRINSIC_DATA(fma_vfmsubadd_pd_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
X86_INTRINSIC_DATA(fma_vfmsubadd_ps, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
@@ -1484,10 +1482,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(fma_vfnmadd_pd_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
X86_INTRINSIC_DATA(fma_vfnmadd_ps, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
X86_INTRINSIC_DATA(fma_vfnmadd_ps_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(fma_vfnmadd_sd, INTR_TYPE_3OP, X86ISD::FNMADDS1, 0),
+ X86_INTRINSIC_DATA(fma_vfnmadd_ss, INTR_TYPE_3OP, X86ISD::FNMADDS1, 0),
X86_INTRINSIC_DATA(fma_vfnmsub_pd, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(fma_vfnmsub_pd_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(fma_vfnmsub_ps, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(fma_vfnmsub_ps_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(fma_vfnmsub_sd, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0),
+ X86_INTRINSIC_DATA(fma_vfnmsub_ss, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0),
X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
@@ -1584,6 +1586,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
+ X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
+ X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
+ X86_INTRINSIC_DATA(vcvtps2ph_256, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
index 98b4863134e..b1438bf7bc0 100644
--- a/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -22,6 +22,38 @@
using namespace llvm;
using namespace TargetOpcode;
+/// FIXME: The following static functions are SizeChangeStrategy functions
+/// that are meant to temporarily mimic the behaviour of the old legalization
+/// based on doubling/halving non-legal types as closely as possible. This is
+/// not entirly possible as only legalizing the types that are exactly a power
+/// of 2 times the size of the legal types would require specifying all those
+/// sizes explicitly.
+/// In practice, not specifying those isn't a problem, and the below functions
+/// should disappear quickly as we add support for legalizing non-power-of-2
+/// sized types further.
+static void
+addAndInterleaveWithUnsupported(LegalizerInfo::SizeAndActionsVec &result,
+ const LegalizerInfo::SizeAndActionsVec &v) {
+ for (unsigned i = 0; i < v.size(); ++i) {
+ result.push_back(v[i]);
+ if (i + 1 < v[i].first && i + 1 < v.size() &&
+ v[i + 1].first != v[i].first + 1)
+ result.push_back({v[i].first + 1, LegalizerInfo::Unsupported});
+ }
+}
+
+static LegalizerInfo::SizeAndActionsVec
+widen_1(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 1);
+ LegalizerInfo::SizeAndActionsVec result = {{1, LegalizerInfo::WidenScalar},
+ {2, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ result.push_back({Largest + 1, LegalizerInfo::Unsupported});
+ return result;
+}
+
X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
const X86TargetMachine &TM)
: Subtarget(STI), TM(TM) {
@@ -37,6 +69,17 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
setLegalizerInfoAVX512DQ();
setLegalizerInfoAVX512BW();
+ setLegalizeScalarToDifferentSizeStrategy(G_PHI, 0, widen_1);
+ for (unsigned BinOp : {G_SUB, G_MUL, G_AND, G_OR, G_XOR})
+ setLegalizeScalarToDifferentSizeStrategy(BinOp, 0, widen_1);
+ for (unsigned MemOp : {G_LOAD, G_STORE})
+ setLegalizeScalarToDifferentSizeStrategy(MemOp, 0,
+ narrowToSmallerAndWidenToSmallest);
+ setLegalizeScalarToDifferentSizeStrategy(
+ G_GEP, 1, widenToLargerTypesUnsupportedOtherwise);
+ setLegalizeScalarToDifferentSizeStrategy(
+ G_CONSTANT, 0, widenToLargerTypesAndNarrowToLargest);
+
computeTables();
}
@@ -47,7 +90,6 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
const LLT s8 = LLT::scalar(8);
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
- const LLT s64 = LLT::scalar(64);
for (auto Ty : {p0, s1, s8, s16, s32})
setAction({G_IMPLICIT_DEF, Ty}, Legal);
@@ -55,15 +97,10 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
for (auto Ty : {s8, s16, s32, p0})
setAction({G_PHI, Ty}, Legal);
- setAction({G_PHI, s1}, WidenScalar);
-
- for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) {
+ for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
for (auto Ty : {s8, s16, s32})
setAction({BinOp, Ty}, Legal);
- setAction({BinOp, s1}, WidenScalar);
- }
-
for (unsigned Op : {G_UADDE}) {
setAction({Op, s32}, Legal);
setAction({Op, 1, s1}, Legal);
@@ -73,7 +110,6 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
for (auto Ty : {s8, s16, s32, p0})
setAction({MemOp, Ty}, Legal);
- setAction({MemOp, s1}, WidenScalar);
// And everything's fine in addrspace 0.
setAction({MemOp, 1, p0}, Legal);
}
@@ -85,9 +121,6 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
setAction({G_GEP, p0}, Legal);
setAction({G_GEP, 1, s32}, Legal);
- for (auto Ty : {s1, s8, s16})
- setAction({G_GEP, 1, Ty}, WidenScalar);
-
// Control-flow
setAction({G_BRCOND, s1}, Legal);
@@ -95,9 +128,6 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
for (auto Ty : {s8, s16, s32, p0})
setAction({TargetOpcode::G_CONSTANT, Ty}, Legal);
- setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar);
- setAction({TargetOpcode::G_CONSTANT, s64}, NarrowScalar);
-
// Extensions
for (auto Ty : {s8, s16, s32}) {
setAction({G_ZEXT, Ty}, Legal);
@@ -105,12 +135,6 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
setAction({G_ANYEXT, Ty}, Legal);
}
- for (auto Ty : {s1, s8, s16}) {
- setAction({G_ZEXT, 1, Ty}, Legal);
- setAction({G_SEXT, 1, Ty}, Legal);
- setAction({G_ANYEXT, 1, Ty}, Legal);
- }
-
// Comparison
setAction({G_ICMP, s1}, Legal);
@@ -123,7 +147,6 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
if (!Subtarget.is64Bit())
return;
- const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
setAction({G_IMPLICIT_DEF, s64}, Legal);
@@ -145,7 +168,6 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
// Extensions
for (unsigned extOp : {G_ZEXT, G_SEXT, G_ANYEXT}) {
setAction({extOp, s64}, Legal);
- setAction({extOp, 1, s32}, Legal);
}
// Comparison
diff --git a/lib/Target/X86/X86MacroFusion.cpp b/lib/Target/X86/X86MacroFusion.cpp
index 0dd13077c37..67d95c2233d 100644
--- a/lib/Target/X86/X86MacroFusion.cpp
+++ b/lib/Target/X86/X86MacroFusion.cpp
@@ -14,8 +14,8 @@
#include "X86MacroFusion.h"
#include "X86Subtarget.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
index 3069d1fd349..9b7732c1db8 100644
--- a/lib/Target/X86/X86PadShortFunction.cpp
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -23,10 +23,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 1f49650340e..efa0cd2c6bc 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -27,14 +27,14 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index b0ce1335bd3..66fea1e688f 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -161,6 +161,8 @@ X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
// In Regcall calling convention those registers are used for passing
// parameters. Thus we need to prevent lazy binding in Regcall.
return X86II::MO_GOTPCREL;
+ if (F && F->hasFnAttribute(Attribute::NonLazyBind) && is64Bit())
+ return X86II::MO_GOTPCREL;
return X86II::MO_PLT;
}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index a8d7f290688..a21d068c7f4 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -463,7 +463,7 @@ public:
bool hasPCLMUL() const { return HasPCLMUL; }
// Prefer FMA4 to FMA - its better for commutation/memory folding and
// has equal or better performance on all supported targets.
- bool hasFMA() const { return (HasFMA || hasAVX512()) && !HasFMA4; }
+ bool hasFMA() const { return HasFMA && !HasFMA4; }
bool hasFMA4() const { return HasFMA4; }
bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
bool hasXOP() const { return HasXOP; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 6e6c724eb0a..11fe84f162d 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -436,4 +436,11 @@ void X86PassConfig::addPreEmitPass() {
addPass(createX86FixupLEAs());
addPass(createX86EvexToVexInsts());
}
+
+ // Verify basic block incoming and outgoing cfa offset and register values and
+ // correct CFA calculation rule where needed by inserting appropriate CFI
+ // instructions.
+ const Triple &TT = TM->getTargetTriple();
+ if (!TT.isOSDarwin() && !TT.isOSWindows())
+ addPass(createCFIInstrInserter());
}
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index effbd07fa31..6772d96c799 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1437,7 +1437,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return Entry->Cost;
}
- return BaseT::getCastInstrCost(Opcode, Dst, Src);
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, I);
}
int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
@@ -2644,12 +2644,15 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
{ 3, MVT::v8i8, 9 }, //(load 24i8 and) deinterleave into 3 x 8i8
{ 3, MVT::v16i8, 11}, //(load 48i8 and) deinterleave into 3 x 16i8
{ 3, MVT::v32i8, 13}, //(load 96i8 and) deinterleave into 3 x 32i8
+ { 3, MVT::v8f32, 17 }, //(load 24f32 and)deinterleave into 3 x 8f32
{ 4, MVT::v2i8, 12 }, //(load 8i8 and) deinterleave into 4 x 2i8
{ 4, MVT::v4i8, 4 }, //(load 16i8 and) deinterleave into 4 x 4i8
{ 4, MVT::v8i8, 20 }, //(load 32i8 and) deinterleave into 4 x 8i8
{ 4, MVT::v16i8, 39 }, //(load 64i8 and) deinterleave into 4 x 16i8
- { 4, MVT::v32i8, 80 } //(load 128i8 and) deinterleave into 4 x 32i8
+ { 4, MVT::v32i8, 80 }, //(load 128i8 and) deinterleave into 4 x 32i8
+
+ { 8, MVT::v8f32, 40 } //(load 64f32 and)deinterleave into 8 x 8f32
};
static const CostTblEntry AVX2InterleavedStoreTbl[] = {
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index fb8c2a71c9a..ba01f1e25ba 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -26,13 +26,13 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <cassert>
diff --git a/lib/Target/X86/X86WinAllocaExpander.cpp b/lib/Target/X86/X86WinAllocaExpander.cpp
index fc08f1582ad..8a186e94d9c 100644
--- a/lib/Target/X86/X86WinAllocaExpander.cpp
+++ b/lib/Target/X86/X86WinAllocaExpander.cpp
@@ -25,9 +25,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index 27584f4e2b6..e98e9cda11d 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -15,7 +15,7 @@
#ifndef LLVM_LIB_TARGET_XCORE_XCOREFRAMELOWERING_H
#define LLVM_LIB_TARGET_XCORE_XCOREFRAMELOWERING_H
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index a377784caf4..9d9ee33ce22 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -15,7 +15,7 @@
#define LLVM_LIB_TARGET_XCORE_XCOREINSTRINFO_H
#include "XCoreRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "XCoreGenInstrInfo.inc"
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index d34e928b14f..a6cf6837009 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -30,7 +30,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"