diff options
Diffstat (limited to 'lib')
409 files changed, 7415 insertions, 2792 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 4a6abae8e97..fb9ece2bd20 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -1672,9 +1672,9 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, // If both pointers are pointing into the same object and one of them // accesses the entire object, then the accesses must overlap in some way. if (O1 == O2) - if ((V1Size != MemoryLocation::UnknownSize && - isObjectSize(O1, V1Size, DL, TLI)) || - (V2Size != MemoryLocation::UnknownSize && + if (V1Size != MemoryLocation::UnknownSize && + V2Size != MemoryLocation::UnknownSize && + (isObjectSize(O1, V1Size, DL, TLI) || isObjectSize(O2, V2Size, DL, TLI))) return AliasCache[Locs] = PartialAlias; diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp index 5d2170dcf15..41c29589521 100644 --- a/lib/Analysis/BlockFrequencyInfo.cpp +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -218,6 +218,11 @@ BlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const { return BFI->getProfileCountFromFreq(*getFunction(), Freq); } +bool BlockFrequencyInfo::isIrrLoopHeader(const BasicBlock *BB) { + assert(BFI && "Expected analysis to be available"); + return BFI->isIrrLoopHeader(BB); +} + void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, uint64_t Freq) { assert(BFI && "Expected analysis to be available"); BFI->setBlockFreq(BB, Freq); diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp index 1030407b766..7e323022d9c 100644 --- a/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -271,6 +271,7 @@ void BlockFrequencyInfoImplBase::clear() { // Swap with a default-constructed std::vector, since std::vector<>::clear() // does not actually clear heap storage. std::vector<FrequencyData>().swap(Freqs); + IsIrrLoopHeader.clear(); std::vector<WorkingData>().swap(Working); Loops.clear(); } @@ -280,8 +281,10 @@ void BlockFrequencyInfoImplBase::clear() { /// Releases all memory not used downstream. In particular, saves Freqs. static void cleanup(BlockFrequencyInfoImplBase &BFI) { std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs)); + SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader)); BFI.clear(); BFI.Freqs = std::move(SavedFreqs); + BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader); } bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, @@ -572,6 +575,13 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F, return BlockCount.getLimitedValue(); } +bool +BlockFrequencyInfoImplBase::isIrrLoopHeader(const BlockNode &Node) { + if (!Node.isValid()) + return false; + return IsIrrLoopHeader.test(Node.Index); +} + Scaled64 BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const { if (!Node.isValid()) @@ -819,3 +829,14 @@ void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) { DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); } } + +void BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass(Distribution &Dist) { + BlockMass LoopMass = BlockMass::getFull(); + DitheringDistributer D(Dist, LoopMass); + for (const Weight &W : Dist.Weights) { + BlockMass Taken = D.takeMass(W.Amount); + assert(W.Type == Weight::Local && "all weights should be local"); + Working[W.TargetNode.Index].getMass() = Taken; + DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); + } +} diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 19889658b13..e141d6c58b6 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -2136,8 +2136,51 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { if (!Stride) return; - DEBUG(dbgs() << "LAA: Found a strided access that we can version"); + DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for " + "versioning:"); DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); + + // Avoid adding the "Stride == 1" predicate when we know that + // Stride >= Trip-Count. Such a predicate will effectively optimize a single + // or zero iteration loop, as Trip-Count <= Stride == 1. + // + // TODO: We are currently not making a very informed decision on when it is + // beneficial to apply stride versioning. It might make more sense that the + // users of this analysis (such as the vectorizer) will trigger it, based on + // their specific cost considerations; For example, in cases where stride + // versioning does not help resolving memory accesses/dependences, the + // vectorizer should evaluate the cost of the runtime test, and the benefit + // of various possible stride specializations, considering the alternatives + // of using gather/scatters (if available). + + const SCEV *StrideExpr = PSE->getSCEV(Stride); + const SCEV *BETakenCount = PSE->getBackedgeTakenCount(); + + // Match the types so we can compare the stride and the BETakenCount. + // The Stride can be positive/negative, so we sign extend Stride; + // The backdgeTakenCount is non-negative, so we zero extend BETakenCount. + const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); + uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType()); + uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType()); + const SCEV *CastedStride = StrideExpr; + const SCEV *CastedBECount = BETakenCount; + ScalarEvolution *SE = PSE->getSE(); + if (BETypeSize >= StrideTypeSize) + CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType()); + else + CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType()); + const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount); + // Since TripCount == BackEdgeTakenCount + 1, checking: + // "Stride >= TripCount" is equivalent to checking: + // Stride - BETakenCount > 0 + if (SE->isKnownPositive(StrideMinusBETaken)) { + DEBUG(dbgs() << "LAA: Stride>=TripCount; No point in versioning as the " + "Stride==1 predicate will imply that the loop executes " + "at most once.\n"); + return; + } + DEBUG(dbgs() << "LAA: Found a strided access that we can version."); + SymbolicStrides[Ptr] = Stride; StrideSet.insert(Stride); } diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index afd575e7273..82db09ca97b 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -303,7 +303,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // FIXME: refactor this to use the same code that inliner is using. F.isVarArg(); GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, - /* Live = */ false); + /* Live = */ false, F.isDSOLocal()); FunctionSummary::FFlags FunFlags{ F.hasFnAttribute(Attribute::ReadNone), F.hasFnAttribute(Attribute::ReadOnly), @@ -329,7 +329,7 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V, findRefEdges(Index, &V, RefEdges, Visited); bool NonRenamableLocal = isNonRenamableLocal(V); GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal, - /* Live = */ false); + /* Live = */ false, V.isDSOLocal()); auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector()); if (NonRenamableLocal) @@ -342,7 +342,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, DenseSet<GlobalValue::GUID> &CantBePromoted) { bool NonRenamableLocal = isNonRenamableLocal(A); GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal, - /* Live = */ false); + /* Live = */ false, A.isDSOLocal()); auto AS = llvm::make_unique<AliasSummary>(Flags); auto *Aliasee = A.getBaseObject(); auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee); @@ -410,7 +410,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( assert(GV->isDeclaration() && "Def in module asm already has definition"); GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage, /* NotEligibleToImport = */ true, - /* Live = */ true); + /* Live = */ true, + /* Local */ GV->isDSOLocal()); CantBePromoted.insert(GlobalValue::getGUID(Name)); // Create the appropriate summary type. if (Function *F = dyn_cast<Function>(GV)) { diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index 3a3a7ad3955..1e36e314b86 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -314,17 +314,8 @@ AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA, if (!EnableTBAA) return AAResultBase::alias(LocA, LocB); - // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must - // be conservative. - const MDNode *AM = LocA.AATags.TBAA; - if (!AM) - return AAResultBase::alias(LocA, LocB); - const MDNode *BM = LocB.AATags.TBAA; - if (!BM) - return AAResultBase::alias(LocA, LocB); - - // If they may alias, chain to the next AliasAnalysis. - if (Aliases(AM, BM)) + // If accesses may alias, chain to the next AliasAnalysis. + if (Aliases(LocA.AATags.TBAA, LocB.AATags.TBAA)) return AAResultBase::alias(LocA, LocB); // Otherwise return a definitive result. @@ -424,25 +415,24 @@ bool MDNode::isTBAAVtableAccess() const { return false; } +static bool matchAccessTags(const MDNode *A, const MDNode *B, + const MDNode **GenericTag = nullptr); + MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { + const MDNode *GenericTag; + matchAccessTags(A, B, &GenericTag); + return const_cast<MDNode*>(GenericTag); +} + +static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) { if (!A || !B) return nullptr; if (A == B) return A; - // For struct-path aware TBAA, we use the access type of the tag. - assert(isStructPathTBAA(A) && isStructPathTBAA(B) && - "Auto upgrade should have taken care of this!"); - A = cast_or_null<MDNode>(MutableTBAAStructTagNode(A).getAccessType()); - if (!A) - return nullptr; - B = cast_or_null<MDNode>(MutableTBAAStructTagNode(B).getAccessType()); - if (!B) - return nullptr; - - SmallSetVector<MDNode *, 4> PathA; - MutableTBAANode TA(A); + SmallSetVector<const MDNode *, 4> PathA; + TBAANode TA(A); while (TA.getNode()) { if (PathA.count(TA.getNode())) report_fatal_error("Cycle found in TBAA metadata."); @@ -450,8 +440,8 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { TA = TA.getParent(); } - SmallSetVector<MDNode *, 4> PathB; - MutableTBAANode TB(B); + SmallSetVector<const MDNode *, 4> PathB; + TBAANode TB(B); while (TB.getNode()) { if (PathB.count(TB.getNode())) report_fatal_error("Cycle found in TBAA metadata."); @@ -462,7 +452,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { int IA = PathA.size() - 1; int IB = PathB.size() - 1; - MDNode *Ret = nullptr; + const MDNode *Ret = nullptr; while (IA >= 0 && IB >= 0) { if (PathA[IA] == PathB[IB]) Ret = PathA[IA]; @@ -472,17 +462,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { --IB; } - // We either did not find a match, or the only common base "type" is - // the root node. In either case, we don't have any useful TBAA - // metadata to attach. - if (!Ret || Ret->getNumOperands() < 2) - return nullptr; - - // We need to convert from a type node to a tag node. - Type *Int64 = IntegerType::get(A->getContext(), 64); - Metadata *Ops[3] = {Ret, Ret, - ConstantAsMetadata::get(ConstantInt::get(Int64, 0))}; - return MDNode::get(A->getContext(), Ops); + return Ret; } void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { @@ -505,70 +485,96 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { N.NoAlias = getMetadata(LLVMContext::MD_noalias); } -/// Aliases - Test whether the type represented by A may alias the -/// type represented by B. -bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { - // Verify that both input nodes are struct-path aware. Auto-upgrade should - // have taken care of this. - assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); - assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); +static bool findAccessType(TBAAStructTagNode BaseTag, + const MDNode *AccessTypeNode, + uint64_t &OffsetInBase) { + // Start from the base type, follow the edge with the correct offset in + // the type DAG and adjust the offset until we reach the access type or + // until we reach a root node. + TBAAStructTypeNode BaseType(BaseTag.getBaseType()); + OffsetInBase = BaseTag.getOffset(); + + while (const MDNode *BaseTypeNode = BaseType.getNode()) { + if (BaseTypeNode == AccessTypeNode) + return true; - // Keep track of the root node for A and B. - TBAAStructTypeNode RootA, RootB; - TBAAStructTagNode TagA(A), TagB(B); + // Follow the edge with the correct offset, Offset will be adjusted to + // be relative to the field type. + BaseType = BaseType.getParent(OffsetInBase); + } + return false; +} - // TODO: We need to check if AccessType of TagA encloses AccessType of - // TagB to support aggregate AccessType. If yes, return true. +static const MDNode *createAccessTag(const MDNode *AccessType) { + // If there is no access type or the access type is the root node, then + // we don't have any useful access tag to return. + if (!AccessType || AccessType->getNumOperands() < 2) + return nullptr; - // Start from the base type of A, follow the edge with the correct offset in - // the type DAG and adjust the offset until we reach the base type of B or - // until we reach the Root node. - // Compare the adjusted offset once we have the same base. + Type *Int64 = IntegerType::get(AccessType->getContext(), 64); + auto *ImmutabilityFlag = ConstantAsMetadata::get(ConstantInt::get(Int64, 0)); + Metadata *Ops[] = {const_cast<MDNode*>(AccessType), + const_cast<MDNode*>(AccessType), ImmutabilityFlag}; + return MDNode::get(AccessType->getContext(), Ops); +} - // Climb the type DAG from base type of A to see if we reach base type of B. - const MDNode *BaseA = TagA.getBaseType(); - const MDNode *BaseB = TagB.getBaseType(); - uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); - for (TBAAStructTypeNode T(BaseA);;) { - if (T.getNode() == BaseB) - // Base type of A encloses base type of B, check if the offsets match. - return OffsetA == OffsetB; - - RootA = T; - // Follow the edge with the correct offset, OffsetA will be adjusted to - // be relative to the field type. - T = T.getParent(OffsetA); - if (!T.getNode()) - break; +/// matchTags - Return true if the given couple of accesses are allowed to +/// overlap. If \arg GenericTag is not null, then on return it points to the +/// most generic access descriptor for the given two. +static bool matchAccessTags(const MDNode *A, const MDNode *B, + const MDNode **GenericTag) { + if (A == B) { + if (GenericTag) + *GenericTag = A; + return true; } - // Reset OffsetA and climb the type DAG from base type of B to see if we reach - // base type of A. - OffsetA = TagA.getOffset(); - for (TBAAStructTypeNode T(BaseB);;) { - if (T.getNode() == BaseA) - // Base type of B encloses base type of A, check if the offsets match. - return OffsetA == OffsetB; - - RootB = T; - // Follow the edge with the correct offset, OffsetB will be adjusted to - // be relative to the field type. - T = T.getParent(OffsetB); - if (!T.getNode()) - break; + // Accesses with no TBAA information may alias with any other accesses. + if (!A || !B) { + if (GenericTag) + *GenericTag = nullptr; + return true; } - // Neither node is an ancestor of the other. + // Verify that both input nodes are struct-path aware. Auto-upgrade should + // have taken care of this. + assert(isStructPathTBAA(A) && "Access A is not struct-path aware!"); + assert(isStructPathTBAA(B) && "Access B is not struct-path aware!"); + + TBAAStructTagNode TagA(A), TagB(B); + const MDNode *CommonType = getLeastCommonType(TagA.getAccessType(), + TagB.getAccessType()); + if (GenericTag) + *GenericTag = createAccessTag(CommonType); - // If they have different roots, they're part of different potentially - // unrelated type systems, so we must be conservative. - if (RootA.getNode() != RootB.getNode()) + // TODO: We need to check if AccessType of TagA encloses AccessType of + // TagB to support aggregate AccessType. If yes, return true. + + // Climb the type DAG from base type of A to see if we reach base type of B. + uint64_t OffsetA; + if (findAccessType(TagA, TagB.getBaseType(), OffsetA)) + return OffsetA == TagB.getOffset(); + + // Climb the type DAG from base type of B to see if we reach base type of A. + uint64_t OffsetB; + if (findAccessType(TagB, TagA.getBaseType(), OffsetB)) + return OffsetB == TagA.getOffset(); + + // If the final access types have different roots, they're part of different + // potentially unrelated type systems, so we must be conservative. + if (!CommonType) return true; // If they have the same root, then we've proved there's no alias. return false; } +/// Aliases - Test whether the access represented by tag A may alias the +/// access represented by tag B. +bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { + return matchAccessTags(A, B); +} + AnalysisKey TypeBasedAA::Key; TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) { diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 3f53fc517e3..2010858139a 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -2579,9 +2579,7 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS, case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: - if (ICS->hasNoNaNs()) - return Intrinsic::sqrt; - return Intrinsic::not_intrinsic; + return Intrinsic::sqrt; } return Intrinsic::not_intrinsic; @@ -4140,7 +4138,8 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, // Is the sign bit set? // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN - if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue()) + if (Pred == CmpInst::ICMP_SLT && C1->isNullValue() && + C2->isMaxSignedValue()) return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; // Is the sign bit clear? @@ -4272,13 +4271,15 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X - if (Pred == ICmpInst::ICMP_SGT && (*C1 == 0 || C1->isAllOnesValue())) { + if (Pred == ICmpInst::ICMP_SGT && + (C1->isNullValue() || C1->isAllOnesValue())) { return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; } // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X - if (Pred == ICmpInst::ICMP_SLT && (*C1 == 0 || *C1 == 1)) { + if (Pred == ICmpInst::ICMP_SLT && + (C1->isNullValue() || C1->isOneValue())) { return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; } } diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 50b391fdf73..b8b56d79c82 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -552,6 +552,8 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(nsz); KEYWORD(arcp); KEYWORD(contract); + KEYWORD(reassoc); + KEYWORD(afn); KEYWORD(fast); KEYWORD(nuw); KEYWORD(nsw); diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index dcc3f22e03b..94e4c1ae96d 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -193,7 +193,7 @@ namespace llvm { FastMathFlags FMF; while (true) switch (Lex.getKind()) { - case lltok::kw_fast: FMF.setUnsafeAlgebra(); Lex.Lex(); continue; + case lltok::kw_fast: FMF.setFast(); Lex.Lex(); continue; case lltok::kw_nnan: FMF.setNoNaNs(); Lex.Lex(); continue; case lltok::kw_ninf: FMF.setNoInfs(); Lex.Lex(); continue; case lltok::kw_nsz: FMF.setNoSignedZeros(); Lex.Lex(); continue; @@ -202,6 +202,8 @@ namespace llvm { FMF.setAllowContract(true); Lex.Lex(); continue; + case lltok::kw_reassoc: FMF.setAllowReassoc(); Lex.Lex(); continue; + case lltok::kw_afn: FMF.setApproxFunc(); Lex.Lex(); continue; default: return FMF; } return FMF; diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index db0de6c0d5a..0c5cf6b5d45 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -102,6 +102,8 @@ enum Kind { kw_nsz, kw_arcp, kw_contract, + kw_reassoc, + kw_afn, kw_fast, kw_nuw, kw_nsw, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index c2272260f44..3e0a39c099b 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -889,7 +889,9 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags, // to work correctly on earlier versions, we must conservatively treat all // values as live. bool Live = (RawFlags & 0x2) || Version < 3; - return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live); + bool Local = (RawFlags & 0x4); + + return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live, Local); } static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) { @@ -1044,8 +1046,8 @@ static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) { static FastMathFlags getDecodedFastMathFlags(unsigned Val) { FastMathFlags FMF; - if (0 != (Val & FastMathFlags::UnsafeAlgebra)) - FMF.setUnsafeAlgebra(); + if (0 != (Val & FastMathFlags::AllowReassoc)) + FMF.setAllowReassoc(); if (0 != (Val & FastMathFlags::NoNaNs)) FMF.setNoNaNs(); if (0 != (Val & FastMathFlags::NoInfs)) @@ -1056,6 +1058,8 @@ static FastMathFlags getDecodedFastMathFlags(unsigned Val) { FMF.setAllowReciprocal(); if (0 != (Val & FastMathFlags::AllowContract)) FMF.setAllowContract(true); + if (0 != (Val & FastMathFlags::ApproxFunc)) + FMF.setApproxFunc(); return FMF; } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 1e491aa066e..03a77c9734e 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -955,6 +955,8 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) { RawFlags |= Flags.NotEligibleToImport; // bool RawFlags |= (Flags.Live << 1); + RawFlags |= (Flags.DSOLocal << 2); + // Linkage don't need to be remapped at that time for the summary. Any future // change to the getEncodedLinkage() function will need to be taken into // account here as well. @@ -1319,8 +1321,8 @@ static uint64_t getOptimizationFlags(const Value *V) { if (PEO->isExact()) Flags |= 1 << bitc::PEO_EXACT; } else if (const auto *FPMO = dyn_cast<FPMathOperator>(V)) { - if (FPMO->hasUnsafeAlgebra()) - Flags |= FastMathFlags::UnsafeAlgebra; + if (FPMO->hasAllowReassoc()) + Flags |= FastMathFlags::AllowReassoc; if (FPMO->hasNoNaNs()) Flags |= FastMathFlags::NoNaNs; if (FPMO->hasNoInfs()) @@ -1331,6 +1333,8 @@ static uint64_t getOptimizationFlags(const Value *V) { Flags |= FastMathFlags::AllowReciprocal; if (FPMO->hasAllowContract()) Flags |= FastMathFlags::AllowContract; + if (FPMO->hasApproxFunc()) + Flags |= FastMathFlags::ApproxFunc; } return Flags; diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 730187087dc..011356c3260 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -18,6 +18,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/UniqueVector.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Type.h" #include "llvm/IR/UseListOrder.h" #include <cassert> #include <cstdint> diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index d7f91fc1ce3..1dea746a6ac 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -28,12 +28,12 @@ #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 876cca4bc7a..9642368a047 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -24,7 +25,6 @@ #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 8b1376ab363..973816d5635 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -29,7 +29,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index a35fcdaaf9a..4ebc7176943 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -51,6 +51,8 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" @@ -100,8 +102,6 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index eae79ad101d..5250f1b1787 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -32,7 +33,6 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 67bab8c7684..5aa3f4ae103 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -68,7 +68,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index dd7f7931b06..1a6cb967992 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -31,7 +31,7 @@ #include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 06b5b06c41b..603d0f7f470 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -36,7 +36,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index 6e4625ba411..167ca13c19c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DIE.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/Allocator.h" #include <memory> #include <utility> @@ -27,7 +28,6 @@ class DwarfCompileUnit; class DwarfUnit; class LexicalScope; class MCSection; -class MDNode; class DwarfFile { // Target of Dwarf emission, used for sizing of abbreviations. diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 5d485f21357..35ce1fec385 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -33,7 +33,7 @@ #include "llvm/MC/MCWin64EH.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 40cb0c0cdf1..cd6056b674c 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -39,6 +39,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" @@ -52,7 +53,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -296,6 +296,11 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) { return HashMachineInstr(*I); } +/// Whether MI should be counted as an instruction when calculating common tail. +static bool countsAsInstruction(const MachineInstr &MI) { + return !(MI.isDebugValue() || MI.isCFIInstruction()); +} + /// ComputeCommonTailLength - Given two machine basic blocks, compute the number /// of instructions they actually have in common together at their end. Return /// iterators for the first shared instruction in each block. @@ -310,26 +315,27 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, while (I1 != MBB1->begin() && I2 != MBB2->begin()) { --I1; --I2; // Skip debugging pseudos; necessary to avoid changing the code. - while (I1->isDebugValue()) { + while (!countsAsInstruction(*I1)) { if (I1==MBB1->begin()) { - while (I2->isDebugValue()) { - if (I2==MBB2->begin()) + while (!countsAsInstruction(*I2)) { + if (I2==MBB2->begin()) { // I1==DBG at begin; I2==DBG at begin - return TailLen; + goto SkipTopCFIAndReturn; + } --I2; } ++I2; // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin - return TailLen; + goto SkipTopCFIAndReturn; } --I1; } // I1==first (untested) non-DBG preceding known match - while (I2->isDebugValue()) { + while (!countsAsInstruction(*I2)) { if (I2==MBB2->begin()) { ++I1; // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin - return TailLen; + goto SkipTopCFIAndReturn; } --I2; } @@ -368,6 +374,37 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, } ++I1; } + +SkipTopCFIAndReturn: + // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if + // I1 and I2 are non-identical when compared and then one or both of them ends + // up pointing to a CFI instruction after being incremented. For example: + /* + BB1: + ... + INSTRUCTION_A + ADD32ri8 <- last common instruction + ... + BB2: + ... + INSTRUCTION_B + CFI_INSTRUCTION + ADD32ri8 <- last common instruction + ... + */ + // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after + // incrementing the iterators, I1 will point to ADD, however I2 will point to + // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the + // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI + // instruction. + while (I1 != MBB1->end() && I1->isCFIInstruction()) { + ++I1; + } + + while (I2 != MBB2->end() && I2->isCFIInstruction()) { + ++I2; + } + return TailLen; } @@ -454,7 +491,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) { unsigned Time = 0; for (; I != E; ++I) { - if (I->isDebugValue()) + if (!countsAsInstruction(*I)) continue; if (I->isCall()) Time += 10; @@ -814,12 +851,12 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos, assert(MBBI != MBBIE && "Reached BB end within common tail length!"); (void)MBBIE; - if (MBBI->isDebugValue()) { + if (!countsAsInstruction(*MBBI)) { ++MBBI; continue; } - while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue()) + while ((MBBICommon != MBBIECommon) && !countsAsInstruction(*MBBICommon)) ++MBBICommon; assert(MBBICommon != MBBIECommon && @@ -859,7 +896,7 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) { } for (auto &MI : *MBB) { - if (MI.isDebugValue()) + if (!countsAsInstruction(MI)) continue; DebugLoc DL = MI.getDebugLoc(); for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) { @@ -869,7 +906,7 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) { auto &Pos = NextCommonInsts[i]; assert(Pos != SameTails[i].getBlock()->end() && "Reached BB end within common tail"); - while (Pos->isDebugValue()) { + while (!countsAsInstruction(*Pos)) { ++Pos; assert(Pos != SameTails[i].getBlock()->end() && "Reached BB end within common tail"); diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp index 2d21fbeea39..73b399e4444 100644 --- a/lib/CodeGen/BranchRelaxation.cpp +++ b/lib/CodeGen/BranchRelaxation.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" @@ -22,7 +23,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/CFIInstrInserter.cpp b/lib/CodeGen/CFIInstrInserter.cpp new file mode 100644 index 00000000000..5464ee443e0 --- /dev/null +++ b/lib/CodeGen/CFIInstrInserter.cpp @@ -0,0 +1,319 @@ +//===------ CFIInstrInserter.cpp - Insert additional CFI instructions -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This pass verifies incoming and outgoing CFA information of basic +/// blocks. CFA information is information about offset and register set by CFI +/// directives, valid at the start and end of a basic block. This pass checks +/// that outgoing information of predecessors matches incoming information of +/// their successors. Then it checks if blocks have correct CFA calculation rule +/// set and inserts additional CFI instruction at their beginnings if they +/// don't. CFI instructions are inserted if basic blocks have incorrect offset +/// or register set by previous blocks, as a result of a non-linear layout of +/// blocks in a function. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +namespace { +class CFIInstrInserter : public MachineFunctionPass { + public: + static char ID; + + CFIInstrInserter() : MachineFunctionPass(ID) { + initializeCFIInstrInserterPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + + if (!MF.getMMI().hasDebugInfo() && + !MF.getFunction()->needsUnwindTableEntry()) + return false; + + MBBVector.resize(MF.getNumBlockIDs()); + calculateCFAInfo(MF); +#ifndef NDEBUG + unsigned ErrorNum = verify(MF); + if (ErrorNum) + report_fatal_error("Found " + Twine(ErrorNum) + + " in/out CFI information errors."); +#endif + bool insertedCFI = insertCFIInstrs(MF); + MBBVector.clear(); + return insertedCFI; + } + + private: + struct MBBCFAInfo { + MachineBasicBlock *MBB; + /// Value of cfa offset valid at basic block entry. + int IncomingCFAOffset = -1; + /// Value of cfa offset valid at basic block exit. + int OutgoingCFAOffset = -1; + /// Value of cfa register valid at basic block entry. + unsigned IncomingCFARegister = 0; + /// Value of cfa register valid at basic block exit. + unsigned OutgoingCFARegister = 0; + /// If in/out cfa offset and register values for this block have already + /// been set or not. + bool Processed = false; + }; + + /// Contains cfa offset and register values valid at entry and exit of basic + /// blocks. + SmallVector<struct MBBCFAInfo, 4> MBBVector; + + /// Calculate cfa offset and register values valid at entry and exit for all + /// basic blocks in a function. + void calculateCFAInfo(MachineFunction &MF); + /// Calculate cfa offset and register values valid at basic block exit by + /// checking the block for CFI instructions. Block's incoming CFA info remains + /// the same. + void calculateOutgoingCFAInfo(struct MBBCFAInfo &MBBInfo); + /// Update in/out cfa offset and register values for successors of the basic + /// block. + void updateSuccCFAInfo(struct MBBCFAInfo &MBBInfo); + + /// Check if incoming CFA information of a basic block matches outgoing CFA + /// information of the previous block. If it doesn't, insert CFI instruction + /// at the beginning of the block that corrects the CFA calculation rule for + /// that block. + bool insertCFIInstrs(MachineFunction &MF); + /// Return the cfa offset value that should be set at the beginning of a MBB + /// if needed. The negated value is needed when creating CFI instructions that + /// set absolute offset. + int getCorrectCFAOffset(MachineBasicBlock *MBB) { + return -MBBVector[MBB->getNumber()].IncomingCFAOffset; + } + + void report(const char *msg, MachineBasicBlock &MBB); + /// Go through each MBB in a function and check that outgoing offset and + /// register of its predecessors match incoming offset and register of that + /// MBB, as well as that incoming offset and register of its successors match + /// outgoing offset and register of the MBB. + unsigned verify(MachineFunction &MF); +}; +} + +char CFIInstrInserter::ID = 0; +INITIALIZE_PASS(CFIInstrInserter, "cfi-instr-inserter", + "Check CFA info and insert CFI instructions if needed", false, + false) +FunctionPass *llvm::createCFIInstrInserter() { return new CFIInstrInserter(); } + +void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) { + // Initial CFA offset value i.e. the one valid at the beginning of the + // function. + int InitialOffset = + MF.getSubtarget().getFrameLowering()->getInitialCFAOffset(MF); + // Initial CFA register value i.e. the one valid at the beginning of the + // function. + unsigned InitialRegister = + MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF); + + // Initialize MBBMap. + for (MachineBasicBlock &MBB : MF) { + struct MBBCFAInfo MBBInfo; + MBBInfo.MBB = &MBB; + MBBInfo.IncomingCFAOffset = InitialOffset; + MBBInfo.OutgoingCFAOffset = InitialOffset; + MBBInfo.IncomingCFARegister = InitialRegister; + MBBInfo.OutgoingCFARegister = InitialRegister; + MBBVector[MBB.getNumber()] = MBBInfo; + } + + // Set in/out cfa info for all blocks in the function. This traversal is based + // on the assumption that the first block in the function is the entry block + // i.e. that it has initial cfa offset and register values as incoming CFA + // information. + for (MachineBasicBlock &MBB : MF) { + if (MBBVector[MBB.getNumber()].Processed) continue; + calculateOutgoingCFAInfo(MBBVector[MBB.getNumber()]); + updateSuccCFAInfo(MBBVector[MBB.getNumber()]); + } +} + +void CFIInstrInserter::calculateOutgoingCFAInfo(struct MBBCFAInfo &MBBInfo) { + // Outgoing cfa offset set by the block. + int SetOffset = MBBInfo.IncomingCFAOffset; + // Outgoing cfa register set by the block. + unsigned SetRegister = MBBInfo.IncomingCFARegister; + const std::vector<MCCFIInstruction> &Instrs = + MBBInfo.MBB->getParent()->getFrameInstructions(); + + // Determine cfa offset and register set by the block. + for (MachineInstr &MI : + make_range(MBBInfo.MBB->instr_begin(), MBBInfo.MBB->instr_end())) { + if (MI.isCFIInstruction()) { + unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); + const MCCFIInstruction &CFI = Instrs[CFIIndex]; + if (CFI.getOperation() == MCCFIInstruction::OpDefCfaRegister) { + SetRegister = CFI.getRegister(); + } else if (CFI.getOperation() == MCCFIInstruction::OpDefCfaOffset) { + SetOffset = CFI.getOffset(); + } else if (CFI.getOperation() == MCCFIInstruction::OpAdjustCfaOffset) { + SetOffset += CFI.getOffset(); + } else if (CFI.getOperation() == MCCFIInstruction::OpDefCfa) { + SetRegister = CFI.getRegister(); + SetOffset = CFI.getOffset(); + } + } + } + + MBBInfo.Processed = true; + + // Update outgoing CFA info. + MBBInfo.OutgoingCFAOffset = SetOffset; + MBBInfo.OutgoingCFARegister = SetRegister; +} + +void CFIInstrInserter::updateSuccCFAInfo(struct MBBCFAInfo &MBBInfo) { + + for (MachineBasicBlock *Succ : MBBInfo.MBB->successors()) { + struct MBBCFAInfo &SuccInfo = MBBVector[Succ->getNumber()]; + if (SuccInfo.Processed) continue; + SuccInfo.IncomingCFAOffset = MBBInfo.OutgoingCFAOffset; + SuccInfo.IncomingCFARegister = MBBInfo.OutgoingCFARegister; + calculateOutgoingCFAInfo(SuccInfo); + updateSuccCFAInfo(SuccInfo); + } +} + +bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) { + + const struct MBBCFAInfo *PrevMBBInfo = &MBBVector[MF.front().getNumber()]; + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + bool InsertedCFIInstr = false; + + for (MachineBasicBlock &MBB : MF) { + // Skip the first MBB in a function + if (MBB.getNumber() == MF.front().getNumber()) continue; + + const struct MBBCFAInfo& MBBInfo = MBBVector[MBB.getNumber()]; + auto MBBI = MBBInfo.MBB->begin(); + DebugLoc DL = MBBInfo.MBB->findDebugLoc(MBBI); + + if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) { + // If both outgoing offset and register of a previous block don't match + // incoming offset and register of this block, add a def_cfa instruction + // with the correct offset and register for this block. + if (PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) { + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( + nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB))); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + // If outgoing offset of a previous block doesn't match incoming offset + // of this block, add a def_cfa_offset instruction with the correct + // offset for this block. + } else { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createDefCfaOffset( + nullptr, getCorrectCFAOffset(&MBB))); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + InsertedCFIInstr = true; + // If outgoing register of a previous block doesn't match incoming + // register of this block, add a def_cfa_register instruction with the + // correct register for this block. + } else if (PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( + nullptr, MBBInfo.IncomingCFARegister)); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + InsertedCFIInstr = true; + } + PrevMBBInfo = &MBBInfo; + } + return InsertedCFIInstr; +} + +void CFIInstrInserter::report(const char *msg, MachineBasicBlock &MBB) { + errs() << '\n'; + errs() << "*** " << msg << " ***\n" + << "- function: " << MBB.getParent()->getName() << "\n"; + errs() << "- basic block: BB#" << MBB.getNumber() << ' ' << MBB.getName() + << " (" << (const void *)&MBB << ')'; + errs() << '\n'; +} + +unsigned CFIInstrInserter::verify(MachineFunction &MF) { + unsigned ErrorNum = 0; + for (MachineBasicBlock &CurrMBB : MF) { + const struct MBBCFAInfo& CurrMBBInfo = MBBVector[CurrMBB.getNumber()]; + for (MachineBasicBlock *Pred : CurrMBB.predecessors()) { + const struct MBBCFAInfo& PredMBBInfo = MBBVector[Pred->getNumber()]; + // Check that outgoing offset values of predecessors match the incoming + // offset value of CurrMBB + if (PredMBBInfo.OutgoingCFAOffset != CurrMBBInfo.IncomingCFAOffset) { + report("The outgoing offset of a predecessor is inconsistent.", + CurrMBB); + errs() << "Predecessor BB#" << Pred->getNumber() + << " has outgoing offset (" << PredMBBInfo.OutgoingCFAOffset + << "), while BB#" << CurrMBB.getNumber() + << " has incoming offset (" << CurrMBBInfo.IncomingCFAOffset + << ").\n"; + ErrorNum++; + } + // Check that outgoing register values of predecessors match the incoming + // register value of CurrMBB + if (PredMBBInfo.OutgoingCFARegister != CurrMBBInfo.IncomingCFARegister) { + report("The outgoing register of a predecessor is inconsistent.", + CurrMBB); + errs() << "Predecessor BB#" << Pred->getNumber() + << " has outgoing register (" << PredMBBInfo.OutgoingCFARegister + << "), while BB#" << CurrMBB.getNumber() + << " has incoming register (" << CurrMBBInfo.IncomingCFARegister + << ").\n"; + ErrorNum++; + } + } + + for (MachineBasicBlock *Succ : CurrMBB.successors()) { + const struct MBBCFAInfo& SuccMBBInfo = MBBVector[Succ->getNumber()]; + // Check that incoming offset values of successors match the outgoing + // offset value of CurrMBB + if (SuccMBBInfo.IncomingCFAOffset != CurrMBBInfo.OutgoingCFAOffset) { + report("The incoming offset of a successor is inconsistent.", CurrMBB); + errs() << "Successor BB#" << Succ->getNumber() + << " has incoming offset (" << SuccMBBInfo.IncomingCFAOffset + << "), while BB#" << CurrMBB.getNumber() + << " has outgoing offset (" << CurrMBBInfo.OutgoingCFAOffset + << ").\n"; + ErrorNum++; + } + // Check that incoming register values of successors match the outgoing + // register value of CurrMBB + if (SuccMBBInfo.IncomingCFARegister != CurrMBBInfo.OutgoingCFARegister) { + report("The incoming register of a successor is inconsistent.", + CurrMBB); + errs() << "Successor BB#" << Succ->getNumber() + << " has incoming register (" << SuccMBBInfo.IncomingCFARegister + << "), while BB#" << CurrMBB.getNumber() + << " has outgoing register (" << CurrMBBInfo.OutgoingCFARegister + << ").\n"; + ErrorNum++; + } + } + } + return ErrorNum; +} diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 7ec7fda4e44..1ae5aa80bf9 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMCodeGen BuiltinGCs.cpp CalcSpillWeights.cpp CallingConvLower.cpp + CFIInstrInserter.cpp CodeGen.cpp CodeGenPrepare.cpp CountingFunctionInserter.cpp @@ -21,6 +22,7 @@ add_llvm_library(LLVMCodeGen EdgeBundles.cpp ExecutionDepsFix.cpp ExpandISelPseudos.cpp + ExpandMemCmp.cpp ExpandPostRAPseudos.cpp ExpandReductions.cpp FaultMaps.cpp @@ -113,6 +115,7 @@ add_llvm_library(LLVMCodeGen RegisterPressure.cpp RegisterScavenging.cpp RenameIndependentSubregs.cpp + MIRCanonicalizerPass.cpp RegisterUsageInfo.cpp RegUsageInfoCollector.cpp RegUsageInfoPropagate.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 588f1791ce3..d4ac5fd040c 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -16,10 +16,10 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index f4ccb4889d3..9d10d1b75f5 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -23,6 +23,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAtomicExpandPass(Registry); initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); + initializeCFIInstrInserterPass(Registry); initializeCodeGenPreparePass(Registry); initializeCountingFunctionInserterPass(Registry); initializeDeadMachineInstructionElimPass(Registry); @@ -30,6 +31,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandISelPseudosPass(Registry); + initializeExpandMemCmpPassPass(Registry); initializeExpandPostRAPass(Registry); initializeFEntryInserterPass(Registry); initializeFinalizeMachineBundlesPass(Registry); @@ -99,6 +101,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeVirtRegRewriterPass(Registry); initializeWinEHPreparePass(Registry); initializeXRayInstrumentationPass(Registry); + initializeMIRCanonicalizerPass(Registry); } void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 51f2a320b29..d6633a508f5 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -113,6 +113,12 @@ STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"); STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " "computations were sunk"); +STATISTIC(NumMemoryInstsPhiCreated, + "Number of phis created when address " + "computations were sunk to memory instructions"); +STATISTIC(NumMemoryInstsSelectCreated, + "Number of select created when address " + "computations were sunk to memory instructions"); STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); STATISTIC(NumAndsAdded, @@ -123,12 +129,6 @@ STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); -STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); -STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size"); -STATISTIC(NumMemCmpGreaterThanMax, - "Number of memcmp calls with size greater than max size"); -STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); - static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); @@ -189,10 +189,18 @@ EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true)); -static cl::opt<unsigned> MemCmpNumLoadsPerBlock( - "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), - cl::desc("The number of loads per basic block for inline expansion of " - "memcmp that is only being compared against zero.")); +static cl::opt<bool> DisableComplexAddrModes( + "disable-complex-addr-modes", cl::Hidden, cl::init(true), + cl::desc("Disables combining addressing modes with different parts " + "in optimizeMemoryInst.")); + +static cl::opt<bool> +AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), + cl::desc("Allow creation of Phis in Address sinking.")); + +static cl::opt<bool> +AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(false), + cl::desc("Allow creation of selects in Address sinking.")); namespace { @@ -1182,6 +1190,7 @@ static bool SinkCast(CastInst *CI) { // If we removed all uses, nuke the cast. if (CI->use_empty()) { + salvageDebugInfo(*CI); CI->eraseFromParent(); MadeChange = true; } @@ -1697,699 +1706,6 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, return true; } -namespace { - -// This class provides helper functions to expand a memcmp library call into an -// inline expansion. -class MemCmpExpansion { - struct ResultBlock { - BasicBlock *BB = nullptr; - PHINode *PhiSrc1 = nullptr; - PHINode *PhiSrc2 = nullptr; - - ResultBlock() = default; - }; - - CallInst *const CI; - ResultBlock ResBlock; - const uint64_t Size; - unsigned MaxLoadSize; - uint64_t NumLoadsNonOneByte; - const uint64_t NumLoadsPerBlock; - std::vector<BasicBlock *> LoadCmpBlocks; - BasicBlock *EndBlock; - PHINode *PhiRes; - const bool IsUsedForZeroCmp; - const DataLayout &DL; - IRBuilder<> Builder; - // Represents the decomposition in blocks of the expansion. For example, - // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and - // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}. - // TODO(courbet): Involve the target more in this computation. On X86, 7 - // bytes can be done more efficiently with two overlaping 4-byte loads than - // covering the interval with [{4, 0},{2, 4},{1, 6}}. - struct LoadEntry { - LoadEntry(unsigned LoadSize, uint64_t Offset) - : LoadSize(LoadSize), Offset(Offset) { - assert(Offset % LoadSize == 0 && "invalid load entry"); - } - - uint64_t getGEPIndex() const { return Offset / LoadSize; } - - // The size of the load for this block, in bytes. - const unsigned LoadSize; - // The offset of this load WRT the base pointer, in bytes. - const uint64_t Offset; - }; - SmallVector<LoadEntry, 8> LoadSequence; - - void createLoadCmpBlocks(); - void createResultBlock(); - void setupResultBlockPHINodes(); - void setupEndBlockPHINodes(); - Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex); - void emitLoadCompareBlock(unsigned BlockIndex); - void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, - unsigned &LoadIndex); - void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex); - void emitMemCmpResultBlock(); - Value *getMemCmpExpansionZeroCase(); - Value *getMemCmpEqZeroOneBlock(); - Value *getMemCmpOneBlock(); - - public: - MemCmpExpansion(CallInst *CI, uint64_t Size, - const TargetTransformInfo::MemCmpExpansionOptions &Options, - unsigned MaxNumLoads, const bool IsUsedForZeroCmp, - unsigned NumLoadsPerBlock, const DataLayout &DL); - - unsigned getNumBlocks(); - uint64_t getNumLoads() const { return LoadSequence.size(); } - - Value *getMemCmpExpansion(); -}; - -} // end anonymous namespace - -// Initialize the basic block structure required for expansion of memcmp call -// with given maximum load size and memcmp size parameter. -// This structure includes: -// 1. A list of load compare blocks - LoadCmpBlocks. -// 2. An EndBlock, split from original instruction point, which is the block to -// return from. -// 3. ResultBlock, block to branch to for early exit when a -// LoadCmpBlock finds a difference. -MemCmpExpansion::MemCmpExpansion( - CallInst *const CI, uint64_t Size, - const TargetTransformInfo::MemCmpExpansionOptions &Options, - const unsigned MaxNumLoads, const bool IsUsedForZeroCmp, - const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout) - : CI(CI), - Size(Size), - MaxLoadSize(0), - NumLoadsNonOneByte(0), - NumLoadsPerBlock(NumLoadsPerBlock), - IsUsedForZeroCmp(IsUsedForZeroCmp), - DL(TheDataLayout), - Builder(CI) { - assert(Size > 0 && "zero blocks"); - // Scale the max size down if the target can load more bytes than we need. - size_t LoadSizeIndex = 0; - while (LoadSizeIndex < Options.LoadSizes.size() && - Options.LoadSizes[LoadSizeIndex] > Size) { - ++LoadSizeIndex; - } - this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex]; - // Compute the decomposition. - uint64_t CurSize = Size; - uint64_t Offset = 0; - while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) { - const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex]; - assert(LoadSize > 0 && "zero load size"); - const uint64_t NumLoadsForThisSize = CurSize / LoadSize; - if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) { - // Do not expand if the total number of loads is larger than what the - // target allows. Note that it's important that we exit before completing - // the expansion to avoid using a ton of memory to store the expansion for - // large sizes. - LoadSequence.clear(); - return; - } - if (NumLoadsForThisSize > 0) { - for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) { - LoadSequence.push_back({LoadSize, Offset}); - Offset += LoadSize; - } - if (LoadSize > 1) { - ++NumLoadsNonOneByte; - } - CurSize = CurSize % LoadSize; - } - ++LoadSizeIndex; - } - assert(LoadSequence.size() <= MaxNumLoads && "broken invariant"); -} - -unsigned MemCmpExpansion::getNumBlocks() { - if (IsUsedForZeroCmp) - return getNumLoads() / NumLoadsPerBlock + - (getNumLoads() % NumLoadsPerBlock != 0 ? 1 : 0); - return getNumLoads(); -} - -void MemCmpExpansion::createLoadCmpBlocks() { - for (unsigned i = 0; i < getNumBlocks(); i++) { - BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", - EndBlock->getParent(), EndBlock); - LoadCmpBlocks.push_back(BB); - } -} - -void MemCmpExpansion::createResultBlock() { - ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block", - EndBlock->getParent(), EndBlock); -} - -// This function creates the IR instructions for loading and comparing 1 byte. -// It loads 1 byte from each source of the memcmp parameters with the given -// GEPIndex. It then subtracts the two loaded values and adds this result to the -// final phi node for selecting the memcmp result. -void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, - unsigned GEPIndex) { - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Get the base address using the GEPIndex. - if (GEPIndex != 0) { - Source1 = Builder.CreateGEP(LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, GEPIndex)); - Source2 = Builder.CreateGEP(LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, GEPIndex)); - } - - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext())); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); - Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); - - PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]); - - if (BlockIndex < (LoadCmpBlocks.size() - 1)) { - // Early exit branch if difference found to EndBlock. Otherwise, continue to - // next LoadCmpBlock, - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, - ConstantInt::get(Diff->getType(), 0)); - BranchInst *CmpBr = - BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp); - Builder.Insert(CmpBr); - } else { - // The last block has an unconditional branch to EndBlock. - BranchInst *CmpBr = BranchInst::Create(EndBlock); - Builder.Insert(CmpBr); - } -} - -/// Generate an equality comparison for one or more pairs of loaded values. -/// This is used in the case where the memcmp() call is compared equal or not -/// equal to zero. -Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, - unsigned &LoadIndex) { - assert(LoadIndex < getNumLoads() && - "getCompareLoadPairs() called with no remaining loads"); - std::vector<Value *> XorList, OrList; - Value *Diff; - - const unsigned NumLoads = - std::min(getNumLoads() - LoadIndex, NumLoadsPerBlock); - - // For a single-block expansion, start inserting before the memcmp call. - if (LoadCmpBlocks.empty()) - Builder.SetInsertPoint(CI); - else - Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - - Value *Cmp = nullptr; - // If we have multiple loads per block, we need to generate a composite - // comparison using xor+or. The type for the combinations is the largest load - // type. - IntegerType *const MaxLoadType = - NumLoads == 1 ? nullptr - : IntegerType::get(CI->getContext(), MaxLoadSize * 8); - for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) { - const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; - - IntegerType *LoadSizeType = - IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); - - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Get the base address using a GEP. - if (CurLoadEntry.Offset != 0) { - Source1 = Builder.CreateGEP( - LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - Source2 = Builder.CreateGEP( - LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - } - - // Get a constant or load a value for each source address. - Value *LoadSrc1 = nullptr; - if (auto *Source1C = dyn_cast<Constant>(Source1)) - LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL); - if (!LoadSrc1) - LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - - Value *LoadSrc2 = nullptr; - if (auto *Source2C = dyn_cast<Constant>(Source2)) - LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL); - if (!LoadSrc2) - LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (NumLoads != 1) { - if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); - } - // If we have multiple loads per block, we need to generate a composite - // comparison using xor+or. - Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); - Diff = Builder.CreateZExt(Diff, MaxLoadType); - XorList.push_back(Diff); - } else { - // If there's only one load per block, we just compare the loaded values. - Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); - } - } - - auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> { - std::vector<Value *> OutList; - for (unsigned i = 0; i < InList.size() - 1; i = i + 2) { - Value *Or = Builder.CreateOr(InList[i], InList[i + 1]); - OutList.push_back(Or); - } - if (InList.size() % 2 != 0) - OutList.push_back(InList.back()); - return OutList; - }; - - if (!Cmp) { - // Pairwise OR the XOR results. - OrList = pairWiseOr(XorList); - - // Pairwise OR the OR results until one result left. - while (OrList.size() != 1) { - OrList = pairWiseOr(OrList); - } - Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); - } - - return Cmp; -} - -void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, - unsigned &LoadIndex) { - Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex); - - BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) - ? EndBlock - : LoadCmpBlocks[BlockIndex + 1]; - // Early exit branch if difference found to ResultBlock. Otherwise, - // continue to next LoadCmpBlock or EndBlock. - BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); - Builder.Insert(CmpBr); - - // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 - // since early exit to ResultBlock was not taken (no difference was found in - // any of the bytes). - if (BlockIndex == LoadCmpBlocks.size() - 1) { - Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); - PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); - } -} - -// This function creates the IR intructions for loading and comparing using the -// given LoadSize. It loads the number of bytes specified by LoadSize from each -// source of the memcmp parameters. It then does a subtract to see if there was -// a difference in the loaded values. If a difference is found, it branches -// with an early exit to the ResultBlock for calculating which source was -// larger. Otherwise, it falls through to the either the next LoadCmpBlock or -// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with -// a special case through emitLoadCompareByteBlock. The special handling can -// simply subtract the loaded values and add it to the result phi node. -void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { - // There is one load per block in this case, BlockIndex == LoadIndex. - const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex]; - - if (CurLoadEntry.LoadSize == 1) { - MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, - CurLoadEntry.getGEPIndex()); - return; - } - - Type *LoadSizeType = - IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); - Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); - assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type"); - - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Get the base address using a GEP. - if (CurLoadEntry.Offset != 0) { - Source1 = Builder.CreateGEP( - LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - Source2 = Builder.CreateGEP( - LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - } - - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (DL.isLittleEndian()) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); - LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); - LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); - } - - if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); - } - - // Add the loaded values to the phi nodes for calculating memcmp result only - // if result is not used in a zero equality. - if (!IsUsedForZeroCmp) { - ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]); - ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]); - } - - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2); - BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) - ? EndBlock - : LoadCmpBlocks[BlockIndex + 1]; - // Early exit branch if difference found to ResultBlock. Otherwise, continue - // to next LoadCmpBlock or EndBlock. - BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp); - Builder.Insert(CmpBr); - - // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 - // since early exit to ResultBlock was not taken (no difference was found in - // any of the bytes). - if (BlockIndex == LoadCmpBlocks.size() - 1) { - Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); - PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); - } -} - -// This function populates the ResultBlock with a sequence to calculate the -// memcmp result. It compares the two loaded source values and returns -1 if -// src1 < src2 and 1 if src1 > src2. -void MemCmpExpansion::emitMemCmpResultBlock() { - // Special case: if memcmp result is used in a zero equality, result does not - // need to be calculated and can simply return 1. - if (IsUsedForZeroCmp) { - BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); - Builder.SetInsertPoint(ResBlock.BB, InsertPt); - Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1); - PhiRes->addIncoming(Res, ResBlock.BB); - BranchInst *NewBr = BranchInst::Create(EndBlock); - Builder.Insert(NewBr); - return; - } - BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); - Builder.SetInsertPoint(ResBlock.BB, InsertPt); - - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1, - ResBlock.PhiSrc2); - - Value *Res = - Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1), - ConstantInt::get(Builder.getInt32Ty(), 1)); - - BranchInst *NewBr = BranchInst::Create(EndBlock); - Builder.Insert(NewBr); - PhiRes->addIncoming(Res, ResBlock.BB); -} - -void MemCmpExpansion::setupResultBlockPHINodes() { - Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); - Builder.SetInsertPoint(ResBlock.BB); - // Note: this assumes one load per block. - ResBlock.PhiSrc1 = - Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1"); - ResBlock.PhiSrc2 = - Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2"); -} - -void MemCmpExpansion::setupEndBlockPHINodes() { - Builder.SetInsertPoint(&EndBlock->front()); - PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); -} - -Value *MemCmpExpansion::getMemCmpExpansionZeroCase() { - unsigned LoadIndex = 0; - // This loop populates each of the LoadCmpBlocks with the IR sequence to - // handle multiple loads per block. - for (unsigned I = 0; I < getNumBlocks(); ++I) { - emitLoadCompareBlockMultipleLoads(I, LoadIndex); - } - - emitMemCmpResultBlock(); - return PhiRes; -} - -/// A memcmp expansion that compares equality with 0 and only has one block of -/// load and compare can bypass the compare, branch, and phi IR that is required -/// in the general case. -Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { - unsigned LoadIndex = 0; - Value *Cmp = getCompareLoadPairs(0, LoadIndex); - assert(LoadIndex == getNumLoads() && "some entries were not consumed"); - return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); -} - -/// A memcmp expansion that only has one block of load and compare can bypass -/// the compare, branch, and phi IR that is required in the general case. -Value *MemCmpExpansion::getMemCmpOneBlock() { - assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block"); - - Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (DL.isLittleEndian() && Size != 1) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); - LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); - LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); - } - - if (Size < 4) { - // The i8 and i16 cases don't need compares. We zext the loaded values and - // subtract them to get the suitable negative, zero, or positive i32 result. - LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty()); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty()); - return Builder.CreateSub(LoadSrc1, LoadSrc2); - } - - // The result of memcmp is negative, zero, or positive, so produce that by - // subtracting 2 extended compare bits: sub (ugt, ult). - // If a target prefers to use selects to get -1/0/1, they should be able - // to transform this later. The inverse transform (going from selects to math) - // may not be possible in the DAG because the selects got converted into - // branches before we got there. - Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2); - Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2); - Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty()); - Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty()); - return Builder.CreateSub(ZextUGT, ZextULT); -} - -// This function expands the memcmp call into an inline expansion and returns -// the memcmp result. -Value *MemCmpExpansion::getMemCmpExpansion() { - // A memcmp with zero-comparison with only one block of load and compare does - // not need to set up any extra blocks. This case could be handled in the DAG, - // but since we have all of the machinery to flexibly expand any memcpy here, - // we choose to handle this case too to avoid fragmented lowering. - if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || getNumBlocks() != 1) { - BasicBlock *StartBlock = CI->getParent(); - EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); - setupEndBlockPHINodes(); - createResultBlock(); - - // If return value of memcmp is not used in a zero equality, we need to - // calculate which source was larger. The calculation requires the - // two loaded source values of each load compare block. - // These will be saved in the phi nodes created by setupResultBlockPHINodes. - if (!IsUsedForZeroCmp) setupResultBlockPHINodes(); - - // Create the number of required load compare basic blocks. - createLoadCmpBlocks(); - - // Update the terminator added by splitBasicBlock to branch to the first - // LoadCmpBlock. - StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); - } - - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - if (IsUsedForZeroCmp) - return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock() - : getMemCmpExpansionZeroCase(); - - // TODO: Handle more than one load pair per block in getMemCmpOneBlock(). - if (getNumBlocks() == 1 && NumLoadsPerBlock == 1) return getMemCmpOneBlock(); - - for (unsigned I = 0; I < getNumBlocks(); ++I) { - emitLoadCompareBlock(I); - } - - emitMemCmpResultBlock(); - return PhiRes; -} - -// This function checks to see if an expansion of memcmp can be generated. -// It checks for constant compare size that is less than the max inline size. -// If an expansion cannot occur, returns false to leave as a library call. -// Otherwise, the library call is replaced with a new IR instruction sequence. -/// We want to transform: -/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15) -/// To: -/// loadbb: -/// %0 = bitcast i32* %buffer2 to i8* -/// %1 = bitcast i32* %buffer1 to i8* -/// %2 = bitcast i8* %1 to i64* -/// %3 = bitcast i8* %0 to i64* -/// %4 = load i64, i64* %2 -/// %5 = load i64, i64* %3 -/// %6 = call i64 @llvm.bswap.i64(i64 %4) -/// %7 = call i64 @llvm.bswap.i64(i64 %5) -/// %8 = sub i64 %6, %7 -/// %9 = icmp ne i64 %8, 0 -/// br i1 %9, label %res_block, label %loadbb1 -/// res_block: ; preds = %loadbb2, -/// %loadbb1, %loadbb -/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ] -/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ] -/// %10 = icmp ult i64 %phi.src1, %phi.src2 -/// %11 = select i1 %10, i32 -1, i32 1 -/// br label %endblock -/// loadbb1: ; preds = %loadbb -/// %12 = bitcast i32* %buffer2 to i8* -/// %13 = bitcast i32* %buffer1 to i8* -/// %14 = bitcast i8* %13 to i32* -/// %15 = bitcast i8* %12 to i32* -/// %16 = getelementptr i32, i32* %14, i32 2 -/// %17 = getelementptr i32, i32* %15, i32 2 -/// %18 = load i32, i32* %16 -/// %19 = load i32, i32* %17 -/// %20 = call i32 @llvm.bswap.i32(i32 %18) -/// %21 = call i32 @llvm.bswap.i32(i32 %19) -/// %22 = zext i32 %20 to i64 -/// %23 = zext i32 %21 to i64 -/// %24 = sub i64 %22, %23 -/// %25 = icmp ne i64 %24, 0 -/// br i1 %25, label %res_block, label %loadbb2 -/// loadbb2: ; preds = %loadbb1 -/// %26 = bitcast i32* %buffer2 to i8* -/// %27 = bitcast i32* %buffer1 to i8* -/// %28 = bitcast i8* %27 to i16* -/// %29 = bitcast i8* %26 to i16* -/// %30 = getelementptr i16, i16* %28, i16 6 -/// %31 = getelementptr i16, i16* %29, i16 6 -/// %32 = load i16, i16* %30 -/// %33 = load i16, i16* %31 -/// %34 = call i16 @llvm.bswap.i16(i16 %32) -/// %35 = call i16 @llvm.bswap.i16(i16 %33) -/// %36 = zext i16 %34 to i64 -/// %37 = zext i16 %35 to i64 -/// %38 = sub i64 %36, %37 -/// %39 = icmp ne i64 %38, 0 -/// br i1 %39, label %res_block, label %loadbb3 -/// loadbb3: ; preds = %loadbb2 -/// %40 = bitcast i32* %buffer2 to i8* -/// %41 = bitcast i32* %buffer1 to i8* -/// %42 = getelementptr i8, i8* %41, i8 14 -/// %43 = getelementptr i8, i8* %40, i8 14 -/// %44 = load i8, i8* %42 -/// %45 = load i8, i8* %43 -/// %46 = zext i8 %44 to i32 -/// %47 = zext i8 %45 to i32 -/// %48 = sub i32 %46, %47 -/// br label %endblock -/// endblock: ; preds = %res_block, -/// %loadbb3 -/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] -/// ret i32 %phi.res -static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, - const TargetLowering *TLI, const DataLayout *DL) { - NumMemCmpCalls++; - - // Early exit from expansion if -Oz. - if (CI->getFunction()->optForMinSize()) - return false; - - // Early exit from expansion if size is not a constant. - ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!SizeCast) { - NumMemCmpNotConstant++; - return false; - } - const uint64_t SizeVal = SizeCast->getZExtValue(); - - if (SizeVal == 0) { - return false; - } - - // TTI call to check if target would like to expand memcmp. Also, get the - // available load sizes. - const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); - const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp); - if (!Options) return false; - - const unsigned MaxNumLoads = - TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize()); - - MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads, - IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL); - - // Don't expand if this will require more loads than desired by the target. - if (Expansion.getNumLoads() == 0) { - NumMemCmpGreaterThanMax++; - return false; - } - - NumMemCmpInlined++; - - Value *Res = Expansion.getMemCmpExpansion(); - - // Replace call with result of expansion and erase call. - CI->replaceAllUsesWith(Res); - CI->eraseFromParent(); - - return true; -} - bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { BasicBlock *BB = CI->getParent(); @@ -2542,12 +1858,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { return true; } - LibFunc Func; - if (TLInfo->getLibFunc(ImmutableCallSite(CI), Func) && - Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TLI, DL)) { - ModifiedDT = true; - return true; - } return false; } @@ -3377,8 +2687,65 @@ private: Value *PromotedOperand) const; }; +/// \brief Keep track of simplification of Phi nodes. +/// Accept the set of all phi nodes and erase phi node from this set +/// if it is simplified. +class SimplificationTracker { + DenseMap<Value *, Value *> Storage; + const SimplifyQuery &SQ; + SmallPtrSetImpl<PHINode *> &AllPhiNodes; + SmallPtrSetImpl<SelectInst *> &AllSelectNodes; + +public: + SimplificationTracker(const SimplifyQuery &sq, + SmallPtrSetImpl<PHINode *> &APN, + SmallPtrSetImpl<SelectInst *> &ASN) + : SQ(sq), AllPhiNodes(APN), AllSelectNodes(ASN) {} + + Value *Get(Value *V) { + do { + auto SV = Storage.find(V); + if (SV == Storage.end()) + return V; + V = SV->second; + } while (true); + } + + Value *Simplify(Value *Val) { + SmallVector<Value *, 32> WorkList; + SmallPtrSet<Value *, 32> Visited; + WorkList.push_back(Val); + while (!WorkList.empty()) { + auto P = WorkList.pop_back_val(); + if (!Visited.insert(P).second) + continue; + if (auto *PI = dyn_cast<Instruction>(P)) + if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) { + for (auto *U : PI->users()) + WorkList.push_back(cast<Value>(U)); + Put(PI, V); + PI->replaceAllUsesWith(V); + if (auto *PHI = dyn_cast<PHINode>(PI)) + AllPhiNodes.erase(PHI); + if (auto *Select = dyn_cast<SelectInst>(PI)) + AllSelectNodes.erase(Select); + PI->eraseFromParent(); + } + } + return Get(Val); + } + + void Put(Value *From, Value *To) { + Storage.insert({ From, To }); + } +}; + /// \brief A helper class for combining addressing modes. class AddressingModeCombiner { + typedef std::pair<Value *, BasicBlock *> ValueInBB; + typedef DenseMap<ValueInBB, Value *> FoldAddrToValueMapping; + typedef std::pair<PHINode *, PHINode *> PHIPair; + private: /// The addressing modes we've collected. SmallVector<ExtAddrMode, 16> AddrModes; @@ -3389,7 +2756,19 @@ private: /// Are the AddrModes that we have all just equal to their original values? bool AllAddrModesTrivial = true; + /// Common Type for all different fields in addressing modes. + Type *CommonType; + + /// SimplifyQuery for simplifyInstruction utility. + const SimplifyQuery &SQ; + + /// Original Address. + ValueInBB Original; + public: + AddressingModeCombiner(const SimplifyQuery &_SQ, ValueInBB OriginalValue) + : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {} + /// \brief Get the combined AddrMode const ExtAddrMode &getAddrMode() const { return AddrModes[0]; @@ -3457,12 +2836,356 @@ public: if (AllAddrModesTrivial) return false; - // TODO: Combine multiple AddrModes by inserting a select or phi for the - // field in which the AddrModes differ. - return false; + if (DisableComplexAddrModes) + return false; + + // For now we support only different base registers. + // TODO: enable others. + if (DifferentField != ExtAddrMode::BaseRegField) + return false; + + // Build a map between <original value, basic block where we saw it> to + // value of base register. + FoldAddrToValueMapping Map; + initializeMap(Map); + + Value *CommonValue = findCommon(Map); + if (CommonValue) + AddrModes[0].BaseReg = CommonValue; + return CommonValue != nullptr; + } + +private: + /// \brief Initialize Map with anchor values. For address seen in some BB + /// we set the value of different field saw in this address. + /// If address is not an instruction than basic block is set to null. + /// At the same time we find a common type for different field we will + /// use to create new Phi/Select nodes. Keep it in CommonType field. + void initializeMap(FoldAddrToValueMapping &Map) { + // Keep track of keys where the value is null. We will need to replace it + // with constant null when we know the common type. + SmallVector<ValueInBB, 2> NullValue; + for (auto &AM : AddrModes) { + BasicBlock *BB = nullptr; + if (Instruction *I = dyn_cast<Instruction>(AM.OriginalValue)) + BB = I->getParent(); + + // For now we support only base register as different field. + // TODO: Enable others. + Value *DV = AM.BaseReg; + if (DV) { + if (CommonType) + assert(CommonType == DV->getType() && "Different types detected!"); + else + CommonType = DV->getType(); + Map[{ AM.OriginalValue, BB }] = DV; + } else { + NullValue.push_back({ AM.OriginalValue, BB }); + } + } + assert(CommonType && "At least one non-null value must be!"); + for (auto VIBB : NullValue) + Map[VIBB] = Constant::getNullValue(CommonType); + } + + /// \brief We have mapping between value A and basic block where value A + /// seen to other value B where B was a field in addressing mode represented + /// by A. Also we have an original value C representin an address in some + /// basic block. Traversing from C through phi and selects we ended up with + /// A's in a map. This utility function tries to find a value V which is a + /// field in addressing mode C and traversing through phi nodes and selects + /// we will end up in corresponded values B in a map. + /// The utility will create a new Phi/Selects if needed. + // The simple example looks as follows: + // BB1: + // p1 = b1 + 40 + // br cond BB2, BB3 + // BB2: + // p2 = b2 + 40 + // br BB3 + // BB3: + // p = phi [p1, BB1], [p2, BB2] + // v = load p + // Map is + // <p1, BB1> -> b1 + // <p2, BB2> -> b2 + // Request is + // <p, BB3> -> ? + // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3 + Value *findCommon(FoldAddrToValueMapping &Map) { + // Tracks of new created Phi nodes. + SmallPtrSet<PHINode *, 32> NewPhiNodes; + // Tracks of new created Select nodes. + SmallPtrSet<SelectInst *, 32> NewSelectNodes; + // Tracks the simplification of new created phi nodes. The reason we use + // this mapping is because we will add new created Phi nodes in AddrToBase. + // Simplification of Phi nodes is recursive, so some Phi node may + // be simplified after we added it to AddrToBase. + // Using this mapping we can find the current value in AddrToBase. + SimplificationTracker ST(SQ, NewPhiNodes, NewSelectNodes); + + // First step, DFS to create PHI nodes for all intermediate blocks. + // Also fill traverse order for the second step. + SmallVector<ValueInBB, 32> TraverseOrder; + InsertPlaceholders(Map, TraverseOrder, NewPhiNodes, NewSelectNodes); + + // Second Step, fill new nodes by merged values and simplify if possible. + FillPlaceholders(Map, TraverseOrder, ST); + + if (!AddrSinkNewSelects && NewSelectNodes.size() > 0) { + DestroyNodes(NewPhiNodes); + DestroyNodes(NewSelectNodes); + return nullptr; + } + + // Now we'd like to match New Phi nodes to existed ones. + unsigned PhiNotMatchedCount = 0; + if (!MatchPhiSet(NewPhiNodes, ST, AddrSinkNewPhis, PhiNotMatchedCount)) { + DestroyNodes(NewPhiNodes); + DestroyNodes(NewSelectNodes); + return nullptr; + } + + auto *Result = ST.Get(Map.find(Original)->second); + if (Result) { + NumMemoryInstsPhiCreated += NewPhiNodes.size() + PhiNotMatchedCount; + NumMemoryInstsSelectCreated += NewSelectNodes.size(); + } + return Result; + } + + /// \brief Destroy nodes from a set. + template <typename T> void DestroyNodes(SmallPtrSetImpl<T *> &Instructions) { + // For safe erasing, replace the Phi with dummy value first. + auto Dummy = UndefValue::get(CommonType); + for (auto I : Instructions) { + I->replaceAllUsesWith(Dummy); + I->eraseFromParent(); + } + } + + /// \brief Try to match PHI node to Candidate. + /// Matcher tracks the matched Phi nodes. + bool MatchPhiNode(PHINode *PHI, PHINode *Candidate, + DenseSet<PHIPair> &Matcher, + SmallPtrSetImpl<PHINode *> &PhiNodesToMatch) { + SmallVector<PHIPair, 8> WorkList; + Matcher.insert({ PHI, Candidate }); + WorkList.push_back({ PHI, Candidate }); + SmallSet<PHIPair, 8> Visited; + while (!WorkList.empty()) { + auto Item = WorkList.pop_back_val(); + if (!Visited.insert(Item).second) + continue; + // We iterate over all incoming values to Phi to compare them. + // If values are different and both of them Phi and the first one is a + // Phi we added (subject to match) and both of them is in the same basic + // block then we can match our pair if values match. So we state that + // these values match and add it to work list to verify that. + for (auto B : Item.first->blocks()) { + Value *FirstValue = Item.first->getIncomingValueForBlock(B); + Value *SecondValue = Item.second->getIncomingValueForBlock(B); + if (FirstValue == SecondValue) + continue; + + PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue); + PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue); + + // One of them is not Phi or + // The first one is not Phi node from the set we'd like to match or + // Phi nodes from different basic blocks then + // we will not be able to match. + if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) || + FirstPhi->getParent() != SecondPhi->getParent()) + return false; + + // If we already matched them then continue. + if (Matcher.count({ FirstPhi, SecondPhi })) + continue; + // So the values are different and does not match. So we need them to + // match. + Matcher.insert({ FirstPhi, SecondPhi }); + // But me must check it. + WorkList.push_back({ FirstPhi, SecondPhi }); + } + } + return true; } -}; + /// \brief For the given set of PHI nodes try to find their equivalents. + /// Returns false if this matching fails and creation of new Phi is disabled. + bool MatchPhiSet(SmallPtrSetImpl<PHINode *> &PhiNodesToMatch, + SimplificationTracker &ST, bool AllowNewPhiNodes, + unsigned &PhiNotMatchedCount) { + DenseSet<PHIPair> Matched; + SmallPtrSet<PHINode *, 8> WillNotMatch; + while (PhiNodesToMatch.size()) { + PHINode *PHI = *PhiNodesToMatch.begin(); + + // Add us, if no Phi nodes in the basic block we do not match. + WillNotMatch.clear(); + WillNotMatch.insert(PHI); + + // Traverse all Phis until we found equivalent or fail to do that. + bool IsMatched = false; + for (auto &P : PHI->getParent()->phis()) { + if (&P == PHI) + continue; + if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch))) + break; + // If it does not match, collect all Phi nodes from matcher. + // if we end up with no match, them all these Phi nodes will not match + // later. + for (auto M : Matched) + WillNotMatch.insert(M.first); + Matched.clear(); + } + if (IsMatched) { + // Replace all matched values and erase them. + for (auto MV : Matched) { + MV.first->replaceAllUsesWith(MV.second); + PhiNodesToMatch.erase(MV.first); + ST.Put(MV.first, MV.second); + MV.first->eraseFromParent(); + } + Matched.clear(); + continue; + } + // If we are not allowed to create new nodes then bail out. + if (!AllowNewPhiNodes) + return false; + // Just remove all seen values in matcher. They will not match anything. + PhiNotMatchedCount += WillNotMatch.size(); + for (auto *P : WillNotMatch) + PhiNodesToMatch.erase(P); + } + return true; + } + /// \brief Fill the placeholder with values from predecessors and simplify it. + void FillPlaceholders(FoldAddrToValueMapping &Map, + SmallVectorImpl<ValueInBB> &TraverseOrder, + SimplificationTracker &ST) { + while (!TraverseOrder.empty()) { + auto Current = TraverseOrder.pop_back_val(); + assert(Map.find(Current) != Map.end() && "No node to fill!!!"); + Value *CurrentValue = Current.first; + BasicBlock *CurrentBlock = Current.second; + Value *V = Map[Current]; + + if (SelectInst *Select = dyn_cast<SelectInst>(V)) { + // CurrentValue also must be Select. + auto *CurrentSelect = cast<SelectInst>(CurrentValue); + auto *TrueValue = CurrentSelect->getTrueValue(); + ValueInBB TrueItem = { TrueValue, isa<Instruction>(TrueValue) + ? CurrentBlock + : nullptr }; + assert(Map.find(TrueItem) != Map.end() && "No True Value!"); + Select->setTrueValue(Map[TrueItem]); + auto *FalseValue = CurrentSelect->getFalseValue(); + ValueInBB FalseItem = { FalseValue, isa<Instruction>(FalseValue) + ? CurrentBlock + : nullptr }; + assert(Map.find(FalseItem) != Map.end() && "No False Value!"); + Select->setFalseValue(Map[FalseItem]); + } else { + // Must be a Phi node then. + PHINode *PHI = cast<PHINode>(V); + // Fill the Phi node with values from predecessors. + bool IsDefinedInThisBB = + cast<Instruction>(CurrentValue)->getParent() == CurrentBlock; + auto *CurrentPhi = dyn_cast<PHINode>(CurrentValue); + for (auto B : predecessors(CurrentBlock)) { + Value *PV = IsDefinedInThisBB + ? CurrentPhi->getIncomingValueForBlock(B) + : CurrentValue; + ValueInBB item = { PV, isa<Instruction>(PV) ? B : nullptr }; + assert(Map.find(item) != Map.end() && "No predecessor Value!"); + PHI->addIncoming(ST.Get(Map[item]), B); + } + } + // Simplify if possible. + Map[Current] = ST.Simplify(V); + } + } + + /// Starting from value recursively iterates over predecessors up to known + /// ending values represented in a map. For each traversed block inserts + /// a placeholder Phi or Select. + /// Reports all new created Phi/Select nodes by adding them to set. + /// Also reports and order in what basic blocks have been traversed. + void InsertPlaceholders(FoldAddrToValueMapping &Map, + SmallVectorImpl<ValueInBB> &TraverseOrder, + SmallPtrSetImpl<PHINode *> &NewPhiNodes, + SmallPtrSetImpl<SelectInst *> &NewSelectNodes) { + SmallVector<ValueInBB, 32> Worklist; + assert((isa<PHINode>(Original.first) || isa<SelectInst>(Original.first)) && + "Address must be a Phi or Select node"); + auto *Dummy = UndefValue::get(CommonType); + Worklist.push_back(Original); + while (!Worklist.empty()) { + auto Current = Worklist.pop_back_val(); + // If value is not an instruction it is something global, constant, + // parameter and we can say that this value is observable in any block. + // Set block to null to denote it. + // Also please take into account that it is how we build anchors. + if (!isa<Instruction>(Current.first)) + Current.second = nullptr; + // if it is already visited or it is an ending value then skip it. + if (Map.find(Current) != Map.end()) + continue; + TraverseOrder.push_back(Current); + + Value *CurrentValue = Current.first; + BasicBlock *CurrentBlock = Current.second; + // CurrentValue must be a Phi node or select. All others must be covered + // by anchors. + Instruction *CurrentI = cast<Instruction>(CurrentValue); + bool IsDefinedInThisBB = CurrentI->getParent() == CurrentBlock; + + unsigned PredCount = + std::distance(pred_begin(CurrentBlock), pred_end(CurrentBlock)); + // if Current Value is not defined in this basic block we are interested + // in values in predecessors. + if (!IsDefinedInThisBB) { + assert(PredCount && "Unreachable block?!"); + PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi", + &CurrentBlock->front()); + Map[Current] = PHI; + NewPhiNodes.insert(PHI); + // Add all predecessors in work list. + for (auto B : predecessors(CurrentBlock)) + Worklist.push_back({ CurrentValue, B }); + continue; + } + // Value is defined in this basic block. + if (SelectInst *OrigSelect = dyn_cast<SelectInst>(CurrentI)) { + // Is it OK to get metadata from OrigSelect?! + // Create a Select placeholder with dummy value. + SelectInst *Select = + SelectInst::Create(OrigSelect->getCondition(), Dummy, Dummy, + OrigSelect->getName(), OrigSelect, OrigSelect); + Map[Current] = Select; + NewSelectNodes.insert(Select); + // We are interested in True and False value in this basic block. + Worklist.push_back({ OrigSelect->getTrueValue(), CurrentBlock }); + Worklist.push_back({ OrigSelect->getFalseValue(), CurrentBlock }); + } else { + // It must be a Phi node then. + auto *CurrentPhi = cast<PHINode>(CurrentI); + // Create new Phi node for merge of bases. + assert(PredCount && "Unreachable block?!"); + PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi", + &CurrentBlock->front()); + Map[Current] = PHI; + NewPhiNodes.insert(PHI); + + // Add all predecessors in work list. + for (auto B : predecessors(CurrentBlock)) + Worklist.push_back({ CurrentPhi->getIncomingValueForBlock(B), B }); + } + } + } +}; } // end anonymous namespace /// Try adding ScaleReg*Scale to the current addressing mode. @@ -4555,7 +4278,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the graph are compatible. bool PhiOrSelectSeen = false; SmallVector<Instruction*, 16> AddrModeInsts; - AddressingModeCombiner AddrModes; + const SimplifyQuery SQ(*DL, TLInfo); + AddressingModeCombiner AddrModes(SQ, { Addr, MemoryInst->getParent() }); TypePromotionTransaction TPT(RemovedInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); @@ -6715,7 +6439,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) { Instruction *Insn = &*BI++; DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); // Leave dbg.values that refer to an alloca alone. These - // instrinsics describe the address of a variable (= the alloca) + // intrinsics describe the address of a variable (= the alloca) // being taken. They should not be moved next to the alloca // (and to the beginning of the scope), but rather stay close to // where said address is used. diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index a791c01c48b..9ef172274c1 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -26,11 +26,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index cf21316ec22..0123afb4cd8 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -29,12 +29,12 @@ #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 91d18e2bcaa..ef1452087f2 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -15,10 +15,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp index ab9a0592e01..2613471714e 100644 --- a/lib/CodeGen/DetectDeadLanes.cpp +++ b/lib/CodeGen/DetectDeadLanes.cpp @@ -34,12 +34,12 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 402afe75b14..cb6c3ae04c7 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -30,10 +30,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index e272d25047e..28f716ee6c2 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -15,10 +15,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp new file mode 100644 index 00000000000..c5910c18d89 --- /dev/null +++ b/lib/CodeGen/ExpandMemCmp.cpp @@ -0,0 +1,828 @@ +//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass tries to partially inline the fast path of well-known library +// functions, such as using square-root instructions for cases where sqrt() +// does not need to set errno. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "expandmemcmp" + +STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); +STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size"); +STATISTIC(NumMemCmpGreaterThanMax, + "Number of memcmp calls with size greater than max size"); +STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); + +static cl::opt<unsigned> MemCmpNumLoadsPerBlock( + "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), + cl::desc("The number of loads per basic block for inline expansion of " + "memcmp that is only being compared against zero.")); + +namespace { + + +// This class provides helper functions to expand a memcmp library call into an +// inline expansion. +class MemCmpExpansion { + struct ResultBlock { + BasicBlock *BB = nullptr; + PHINode *PhiSrc1 = nullptr; + PHINode *PhiSrc2 = nullptr; + + ResultBlock() = default; + }; + + CallInst *const CI; + ResultBlock ResBlock; + const uint64_t Size; + unsigned MaxLoadSize; + uint64_t NumLoadsNonOneByte; + const uint64_t NumLoadsPerBlock; + std::vector<BasicBlock *> LoadCmpBlocks; + BasicBlock *EndBlock; + PHINode *PhiRes; + const bool IsUsedForZeroCmp; + const DataLayout &DL; + IRBuilder<> Builder; + // Represents the decomposition in blocks of the expansion. For example, + // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and + // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}. + // TODO(courbet): Involve the target more in this computation. On X86, 7 + // bytes can be done more efficiently with two overlaping 4-byte loads than + // covering the interval with [{4, 0},{2, 4},{1, 6}}. + struct LoadEntry { + LoadEntry(unsigned LoadSize, uint64_t Offset) + : LoadSize(LoadSize), Offset(Offset) { + assert(Offset % LoadSize == 0 && "invalid load entry"); + } + + uint64_t getGEPIndex() const { return Offset / LoadSize; } + + // The size of the load for this block, in bytes. + const unsigned LoadSize; + // The offset of this load WRT the base pointer, in bytes. + const uint64_t Offset; + }; + SmallVector<LoadEntry, 8> LoadSequence; + + void createLoadCmpBlocks(); + void createResultBlock(); + void setupResultBlockPHINodes(); + void setupEndBlockPHINodes(); + Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex); + void emitLoadCompareBlock(unsigned BlockIndex); + void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, + unsigned &LoadIndex); + void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex); + void emitMemCmpResultBlock(); + Value *getMemCmpExpansionZeroCase(); + Value *getMemCmpEqZeroOneBlock(); + Value *getMemCmpOneBlock(); + + public: + MemCmpExpansion(CallInst *CI, uint64_t Size, + const TargetTransformInfo::MemCmpExpansionOptions &Options, + unsigned MaxNumLoads, const bool IsUsedForZeroCmp, + unsigned NumLoadsPerBlock, const DataLayout &DL); + + unsigned getNumBlocks(); + uint64_t getNumLoads() const { return LoadSequence.size(); } + + Value *getMemCmpExpansion(); +}; + +// Initialize the basic block structure required for expansion of memcmp call +// with given maximum load size and memcmp size parameter. +// This structure includes: +// 1. A list of load compare blocks - LoadCmpBlocks. +// 2. An EndBlock, split from original instruction point, which is the block to +// return from. +// 3. ResultBlock, block to branch to for early exit when a +// LoadCmpBlock finds a difference. +MemCmpExpansion::MemCmpExpansion( + CallInst *const CI, uint64_t Size, + const TargetTransformInfo::MemCmpExpansionOptions &Options, + const unsigned MaxNumLoads, const bool IsUsedForZeroCmp, + const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout) + : CI(CI), + Size(Size), + MaxLoadSize(0), + NumLoadsNonOneByte(0), + NumLoadsPerBlock(NumLoadsPerBlock), + IsUsedForZeroCmp(IsUsedForZeroCmp), + DL(TheDataLayout), + Builder(CI) { + assert(Size > 0 && "zero blocks"); + // Scale the max size down if the target can load more bytes than we need. + size_t LoadSizeIndex = 0; + while (LoadSizeIndex < Options.LoadSizes.size() && + Options.LoadSizes[LoadSizeIndex] > Size) { + ++LoadSizeIndex; + } + this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex]; + // Compute the decomposition. + uint64_t CurSize = Size; + uint64_t Offset = 0; + while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) { + const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex]; + assert(LoadSize > 0 && "zero load size"); + const uint64_t NumLoadsForThisSize = CurSize / LoadSize; + if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) { + // Do not expand if the total number of loads is larger than what the + // target allows. Note that it's important that we exit before completing + // the expansion to avoid using a ton of memory to store the expansion for + // large sizes. + LoadSequence.clear(); + return; + } + if (NumLoadsForThisSize > 0) { + for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) { + LoadSequence.push_back({LoadSize, Offset}); + Offset += LoadSize; + } + if (LoadSize > 1) { + ++NumLoadsNonOneByte; + } + CurSize = CurSize % LoadSize; + } + ++LoadSizeIndex; + } + assert(LoadSequence.size() <= MaxNumLoads && "broken invariant"); +} + +unsigned MemCmpExpansion::getNumBlocks() { + if (IsUsedForZeroCmp) + return getNumLoads() / NumLoadsPerBlock + + (getNumLoads() % NumLoadsPerBlock != 0 ? 1 : 0); + return getNumLoads(); +} + +void MemCmpExpansion::createLoadCmpBlocks() { + for (unsigned i = 0; i < getNumBlocks(); i++) { + BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", + EndBlock->getParent(), EndBlock); + LoadCmpBlocks.push_back(BB); + } +} + +void MemCmpExpansion::createResultBlock() { + ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block", + EndBlock->getParent(), EndBlock); +} + +// This function creates the IR instructions for loading and comparing 1 byte. +// It loads 1 byte from each source of the memcmp parameters with the given +// GEPIndex. It then subtracts the two loaded values and adds this result to the +// final phi node for selecting the memcmp result. +void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, + unsigned GEPIndex) { + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); + Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using the GEPIndex. + if (GEPIndex != 0) { + Source1 = Builder.CreateGEP(LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, GEPIndex)); + Source2 = Builder.CreateGEP(LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, GEPIndex)); + } + + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext())); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); + Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); + + PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]); + + if (BlockIndex < (LoadCmpBlocks.size() - 1)) { + // Early exit branch if difference found to EndBlock. Otherwise, continue to + // next LoadCmpBlock, + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, + ConstantInt::get(Diff->getType(), 0)); + BranchInst *CmpBr = + BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp); + Builder.Insert(CmpBr); + } else { + // The last block has an unconditional branch to EndBlock. + BranchInst *CmpBr = BranchInst::Create(EndBlock); + Builder.Insert(CmpBr); + } +} + +/// Generate an equality comparison for one or more pairs of loaded values. +/// This is used in the case where the memcmp() call is compared equal or not +/// equal to zero. +Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, + unsigned &LoadIndex) { + assert(LoadIndex < getNumLoads() && + "getCompareLoadPairs() called with no remaining loads"); + std::vector<Value *> XorList, OrList; + Value *Diff; + + const unsigned NumLoads = + std::min(getNumLoads() - LoadIndex, NumLoadsPerBlock); + + // For a single-block expansion, start inserting before the memcmp call. + if (LoadCmpBlocks.empty()) + Builder.SetInsertPoint(CI); + else + Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); + + Value *Cmp = nullptr; + // If we have multiple loads per block, we need to generate a composite + // comparison using xor+or. The type for the combinations is the largest load + // type. + IntegerType *const MaxLoadType = + NumLoads == 1 ? nullptr + : IntegerType::get(CI->getContext(), MaxLoadSize * 8); + for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) { + const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; + + IntegerType *LoadSizeType = + IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); + + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using a GEP. + if (CurLoadEntry.Offset != 0) { + Source1 = Builder.CreateGEP( + LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); + Source2 = Builder.CreateGEP( + LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); + } + + // Get a constant or load a value for each source address. + Value *LoadSrc1 = nullptr; + if (auto *Source1C = dyn_cast<Constant>(Source1)) + LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL); + if (!LoadSrc1) + LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + + Value *LoadSrc2 = nullptr; + if (auto *Source2C = dyn_cast<Constant>(Source2)) + LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL); + if (!LoadSrc2) + LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (NumLoads != 1) { + if (LoadSizeType != MaxLoadType) { + LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); + } + // If we have multiple loads per block, we need to generate a composite + // comparison using xor+or. + Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); + Diff = Builder.CreateZExt(Diff, MaxLoadType); + XorList.push_back(Diff); + } else { + // If there's only one load per block, we just compare the loaded values. + Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); + } + } + + auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> { + std::vector<Value *> OutList; + for (unsigned i = 0; i < InList.size() - 1; i = i + 2) { + Value *Or = Builder.CreateOr(InList[i], InList[i + 1]); + OutList.push_back(Or); + } + if (InList.size() % 2 != 0) + OutList.push_back(InList.back()); + return OutList; + }; + + if (!Cmp) { + // Pairwise OR the XOR results. + OrList = pairWiseOr(XorList); + + // Pairwise OR the OR results until one result left. + while (OrList.size() != 1) { + OrList = pairWiseOr(OrList); + } + Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); + } + + return Cmp; +} + +void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, + unsigned &LoadIndex) { + Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex); + + BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) + ? EndBlock + : LoadCmpBlocks[BlockIndex + 1]; + // Early exit branch if difference found to ResultBlock. Otherwise, + // continue to next LoadCmpBlock or EndBlock. + BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); + Builder.Insert(CmpBr); + + // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 + // since early exit to ResultBlock was not taken (no difference was found in + // any of the bytes). + if (BlockIndex == LoadCmpBlocks.size() - 1) { + Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); + PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); + } +} + +// This function creates the IR intructions for loading and comparing using the +// given LoadSize. It loads the number of bytes specified by LoadSize from each +// source of the memcmp parameters. It then does a subtract to see if there was +// a difference in the loaded values. If a difference is found, it branches +// with an early exit to the ResultBlock for calculating which source was +// larger. Otherwise, it falls through to the either the next LoadCmpBlock or +// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with +// a special case through emitLoadCompareByteBlock. The special handling can +// simply subtract the loaded values and add it to the result phi node. +void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { + // There is one load per block in this case, BlockIndex == LoadIndex. + const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex]; + + if (CurLoadEntry.LoadSize == 1) { + MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, + CurLoadEntry.getGEPIndex()); + return; + } + + Type *LoadSizeType = + IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type"); + + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using a GEP. + if (CurLoadEntry.Offset != 0) { + Source1 = Builder.CreateGEP( + LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); + Source2 = Builder.CreateGEP( + LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); + } + + // Load LoadSizeType from the base address. + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (DL.isLittleEndian()) { + Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), + Intrinsic::bswap, LoadSizeType); + LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); + LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); + } + + if (LoadSizeType != MaxLoadType) { + LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); + } + + // Add the loaded values to the phi nodes for calculating memcmp result only + // if result is not used in a zero equality. + if (!IsUsedForZeroCmp) { + ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]); + ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]); + } + + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2); + BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) + ? EndBlock + : LoadCmpBlocks[BlockIndex + 1]; + // Early exit branch if difference found to ResultBlock. Otherwise, continue + // to next LoadCmpBlock or EndBlock. + BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp); + Builder.Insert(CmpBr); + + // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 + // since early exit to ResultBlock was not taken (no difference was found in + // any of the bytes). + if (BlockIndex == LoadCmpBlocks.size() - 1) { + Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); + PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); + } +} + +// This function populates the ResultBlock with a sequence to calculate the +// memcmp result. It compares the two loaded source values and returns -1 if +// src1 < src2 and 1 if src1 > src2. +void MemCmpExpansion::emitMemCmpResultBlock() { + // Special case: if memcmp result is used in a zero equality, result does not + // need to be calculated and can simply return 1. + if (IsUsedForZeroCmp) { + BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); + Builder.SetInsertPoint(ResBlock.BB, InsertPt); + Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1); + PhiRes->addIncoming(Res, ResBlock.BB); + BranchInst *NewBr = BranchInst::Create(EndBlock); + Builder.Insert(NewBr); + return; + } + BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); + Builder.SetInsertPoint(ResBlock.BB, InsertPt); + + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1, + ResBlock.PhiSrc2); + + Value *Res = + Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1), + ConstantInt::get(Builder.getInt32Ty(), 1)); + + BranchInst *NewBr = BranchInst::Create(EndBlock); + Builder.Insert(NewBr); + PhiRes->addIncoming(Res, ResBlock.BB); +} + +void MemCmpExpansion::setupResultBlockPHINodes() { + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + Builder.SetInsertPoint(ResBlock.BB); + // Note: this assumes one load per block. + ResBlock.PhiSrc1 = + Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1"); + ResBlock.PhiSrc2 = + Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2"); +} + +void MemCmpExpansion::setupEndBlockPHINodes() { + Builder.SetInsertPoint(&EndBlock->front()); + PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); +} + +Value *MemCmpExpansion::getMemCmpExpansionZeroCase() { + unsigned LoadIndex = 0; + // This loop populates each of the LoadCmpBlocks with the IR sequence to + // handle multiple loads per block. + for (unsigned I = 0; I < getNumBlocks(); ++I) { + emitLoadCompareBlockMultipleLoads(I, LoadIndex); + } + + emitMemCmpResultBlock(); + return PhiRes; +} + +/// A memcmp expansion that compares equality with 0 and only has one block of +/// load and compare can bypass the compare, branch, and phi IR that is required +/// in the general case. +Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { + unsigned LoadIndex = 0; + Value *Cmp = getCompareLoadPairs(0, LoadIndex); + assert(LoadIndex == getNumLoads() && "some entries were not consumed"); + return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); +} + +/// A memcmp expansion that only has one block of load and compare can bypass +/// the compare, branch, and phi IR that is required in the general case. +Value *MemCmpExpansion::getMemCmpOneBlock() { + assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block"); + + Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Load LoadSizeType from the base address. + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (DL.isLittleEndian() && Size != 1) { + Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), + Intrinsic::bswap, LoadSizeType); + LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); + LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); + } + + if (Size < 4) { + // The i8 and i16 cases don't need compares. We zext the loaded values and + // subtract them to get the suitable negative, zero, or positive i32 result. + LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty()); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty()); + return Builder.CreateSub(LoadSrc1, LoadSrc2); + } + + // The result of memcmp is negative, zero, or positive, so produce that by + // subtracting 2 extended compare bits: sub (ugt, ult). + // If a target prefers to use selects to get -1/0/1, they should be able + // to transform this later. The inverse transform (going from selects to math) + // may not be possible in the DAG because the selects got converted into + // branches before we got there. + Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2); + Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2); + Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty()); + Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty()); + return Builder.CreateSub(ZextUGT, ZextULT); +} + +// This function expands the memcmp call into an inline expansion and returns +// the memcmp result. +Value *MemCmpExpansion::getMemCmpExpansion() { + // A memcmp with zero-comparison with only one block of load and compare does + // not need to set up any extra blocks. This case could be handled in the DAG, + // but since we have all of the machinery to flexibly expand any memcpy here, + // we choose to handle this case too to avoid fragmented lowering. + if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || getNumBlocks() != 1) { + BasicBlock *StartBlock = CI->getParent(); + EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); + setupEndBlockPHINodes(); + createResultBlock(); + + // If return value of memcmp is not used in a zero equality, we need to + // calculate which source was larger. The calculation requires the + // two loaded source values of each load compare block. + // These will be saved in the phi nodes created by setupResultBlockPHINodes. + if (!IsUsedForZeroCmp) setupResultBlockPHINodes(); + + // Create the number of required load compare basic blocks. + createLoadCmpBlocks(); + + // Update the terminator added by splitBasicBlock to branch to the first + // LoadCmpBlock. + StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); + } + + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + if (IsUsedForZeroCmp) + return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock() + : getMemCmpExpansionZeroCase(); + + // TODO: Handle more than one load pair per block in getMemCmpOneBlock(). + if (getNumBlocks() == 1 && NumLoadsPerBlock == 1) return getMemCmpOneBlock(); + + for (unsigned I = 0; I < getNumBlocks(); ++I) { + emitLoadCompareBlock(I); + } + + emitMemCmpResultBlock(); + return PhiRes; +} + +// This function checks to see if an expansion of memcmp can be generated. +// It checks for constant compare size that is less than the max inline size. +// If an expansion cannot occur, returns false to leave as a library call. +// Otherwise, the library call is replaced with a new IR instruction sequence. +/// We want to transform: +/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15) +/// To: +/// loadbb: +/// %0 = bitcast i32* %buffer2 to i8* +/// %1 = bitcast i32* %buffer1 to i8* +/// %2 = bitcast i8* %1 to i64* +/// %3 = bitcast i8* %0 to i64* +/// %4 = load i64, i64* %2 +/// %5 = load i64, i64* %3 +/// %6 = call i64 @llvm.bswap.i64(i64 %4) +/// %7 = call i64 @llvm.bswap.i64(i64 %5) +/// %8 = sub i64 %6, %7 +/// %9 = icmp ne i64 %8, 0 +/// br i1 %9, label %res_block, label %loadbb1 +/// res_block: ; preds = %loadbb2, +/// %loadbb1, %loadbb +/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ] +/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ] +/// %10 = icmp ult i64 %phi.src1, %phi.src2 +/// %11 = select i1 %10, i32 -1, i32 1 +/// br label %endblock +/// loadbb1: ; preds = %loadbb +/// %12 = bitcast i32* %buffer2 to i8* +/// %13 = bitcast i32* %buffer1 to i8* +/// %14 = bitcast i8* %13 to i32* +/// %15 = bitcast i8* %12 to i32* +/// %16 = getelementptr i32, i32* %14, i32 2 +/// %17 = getelementptr i32, i32* %15, i32 2 +/// %18 = load i32, i32* %16 +/// %19 = load i32, i32* %17 +/// %20 = call i32 @llvm.bswap.i32(i32 %18) +/// %21 = call i32 @llvm.bswap.i32(i32 %19) +/// %22 = zext i32 %20 to i64 +/// %23 = zext i32 %21 to i64 +/// %24 = sub i64 %22, %23 +/// %25 = icmp ne i64 %24, 0 +/// br i1 %25, label %res_block, label %loadbb2 +/// loadbb2: ; preds = %loadbb1 +/// %26 = bitcast i32* %buffer2 to i8* +/// %27 = bitcast i32* %buffer1 to i8* +/// %28 = bitcast i8* %27 to i16* +/// %29 = bitcast i8* %26 to i16* +/// %30 = getelementptr i16, i16* %28, i16 6 +/// %31 = getelementptr i16, i16* %29, i16 6 +/// %32 = load i16, i16* %30 +/// %33 = load i16, i16* %31 +/// %34 = call i16 @llvm.bswap.i16(i16 %32) +/// %35 = call i16 @llvm.bswap.i16(i16 %33) +/// %36 = zext i16 %34 to i64 +/// %37 = zext i16 %35 to i64 +/// %38 = sub i64 %36, %37 +/// %39 = icmp ne i64 %38, 0 +/// br i1 %39, label %res_block, label %loadbb3 +/// loadbb3: ; preds = %loadbb2 +/// %40 = bitcast i32* %buffer2 to i8* +/// %41 = bitcast i32* %buffer1 to i8* +/// %42 = getelementptr i8, i8* %41, i8 14 +/// %43 = getelementptr i8, i8* %40, i8 14 +/// %44 = load i8, i8* %42 +/// %45 = load i8, i8* %43 +/// %46 = zext i8 %44 to i32 +/// %47 = zext i8 %45 to i32 +/// %48 = sub i32 %46, %47 +/// br label %endblock +/// endblock: ; preds = %res_block, +/// %loadbb3 +/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] +/// ret i32 %phi.res +static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, + const TargetLowering *TLI, const DataLayout *DL) { + NumMemCmpCalls++; + + // Early exit from expansion if -Oz. + if (CI->getFunction()->optForMinSize()) + return false; + + // Early exit from expansion if size is not a constant. + ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!SizeCast) { + NumMemCmpNotConstant++; + return false; + } + const uint64_t SizeVal = SizeCast->getZExtValue(); + + if (SizeVal == 0) { + return false; + } + + // TTI call to check if target would like to expand memcmp. Also, get the + // available load sizes. + const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); + const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp); + if (!Options) return false; + + const unsigned MaxNumLoads = + TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize()); + + MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads, + IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL); + + // Don't expand if this will require more loads than desired by the target. + if (Expansion.getNumLoads() == 0) { + NumMemCmpGreaterThanMax++; + return false; + } + + NumMemCmpInlined++; + + Value *Res = Expansion.getMemCmpExpansion(); + + // Replace call with result of expansion and erase call. + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); + + return true; +} + + + +class ExpandMemCmpPass : public FunctionPass { +public: + static char ID; + + ExpandMemCmpPass() : FunctionPass(ID) { + initializeExpandMemCmpPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) return false; + + auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); + if (!TPC) { + return false; + } + const TargetLowering* TL = + TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering(); + + const TargetLibraryInfo *TLI = + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + const TargetTransformInfo *TTI = + &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + auto PA = runImpl(F, TLI, TTI, TL); + return !PA.areAllPreserved(); + } + +private: + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + FunctionPass::getAnalysisUsage(AU); + } + + PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, + const TargetLowering* TL); + // Returns true if a change was made. + bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, const TargetLowering* TL, + const DataLayout& DL); +}; + +bool ExpandMemCmpPass::runOnBlock( + BasicBlock &BB, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, const TargetLowering* TL, + const DataLayout& DL) { + for (Instruction& I : BB) { + CallInst *CI = dyn_cast<CallInst>(&I); + if (!CI) { + continue; + } + LibFunc Func; + if (TLI->getLibFunc(ImmutableCallSite(CI), Func) && + Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TL, &DL)) { + return true; + } + } + return false; +} + + +PreservedAnalyses ExpandMemCmpPass::runImpl( + Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, + const TargetLowering* TL) { + const DataLayout& DL = F.getParent()->getDataLayout(); + bool MadeChanges = false; + for (auto BBIt = F.begin(); BBIt != F.end();) { + if (runOnBlock(*BBIt, TLI, TTI, TL, DL)) { + MadeChanges = true; + // If changes were made, restart the function from the beginning, since + // the structure of the function was changed. + BBIt = F.begin(); + } else { + ++BBIt; + } + } + return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} + +} // namespace + +char ExpandMemCmpPass::ID = 0; +INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp", + "Expand memcmp() to load/stores", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp", + "Expand memcmp() to load/stores", false, false) + +FunctionPass *llvm::createExpandMemCmpPass() { + return new ExpandMemCmpPass(); +} diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 4ce86f27a7d..b73aeb18382 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -17,9 +17,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp index 70dca3b74b2..abf487a4f19 100644 --- a/lib/CodeGen/ExpandReductions.cpp +++ b/lib/CodeGen/ExpandReductions.cpp @@ -95,7 +95,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { // and it can't be handled by generating this shuffle sequence. // TODO: Implement scalarization of ordered reductions here for targets // without native support. - if (!II->getFastMathFlags().unsafeAlgebra()) + if (!II->getFastMathFlags().isFast()) continue; Vec = II->getArgOperand(1); break; diff --git a/lib/CodeGen/FEntryInserter.cpp b/lib/CodeGen/FEntryInserter.cpp index 9781338f952..07cb7016139 100644 --- a/lib/CodeGen/FEntryInserter.cpp +++ b/lib/CodeGen/FEntryInserter.cpp @@ -15,10 +15,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp index 35246545ca9..2064ce7ac7b 100644 --- a/lib/CodeGen/GCRootLowering.cpp +++ b/lib/CodeGen/GCRootLowering.cpp @@ -18,14 +18,14 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 8e31ed0a015..45eb605c3c2 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -54,7 +54,7 @@ #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index fb954f3c3f1..59f34d730d0 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -20,9 +20,9 @@ #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <iterator> diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 99f605abe06..a8cfe0b89a0 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -173,12 +173,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MIRBuilder.setInstr(MI); + int64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + int64_t NarrowSize = NarrowTy.getSizeInBits(); + switch (MI.getOpcode()) { default: return UnableToLegalize; case TargetOpcode::G_IMPLICIT_DEF: { - int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / - NarrowTy.getSizeInBits(); + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; SmallVector<unsigned, 2> DstRegs; for (int i = 0; i < NumParts; ++i) { @@ -191,9 +197,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_ADD: { + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; // Expand in terms of carry-setting/consuming G_ADDE instructions. - int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / - NarrowTy.getSizeInBits(); + int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); @@ -221,9 +230,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, if (TypeIdx != 1) return UnableToLegalize; - int64_t NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() / NarrowSize; + int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + // FIXME: add support for when SizeOp1 isn't an exact multiple of + // NarrowSize. + if (SizeOp1 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp1 / NarrowSize; SmallVector<unsigned, 2> SrcRegs, DstRegs; SmallVector<uint64_t, 2> Indexes; @@ -270,12 +282,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_INSERT: { - if (TypeIdx != 0) + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) return UnableToLegalize; - int64_t NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + int NumParts = SizeOp0 / NarrowSize; SmallVector<unsigned, 2> SrcRegs, DstRegs; SmallVector<uint64_t, 2> Indexes; @@ -330,9 +342,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_LOAD: { - unsigned NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; LLT OffsetTy = LLT::scalar( MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); @@ -357,9 +371,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_STORE: { - unsigned NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; LLT OffsetTy = LLT::scalar( MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); @@ -381,9 +397,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_CONSTANT: { - unsigned NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext(); @@ -410,9 +428,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // ... // AN = BinOp<Ty/N> BN, CN // A = G_MERGE_VALUES A1, ..., AN - unsigned NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; // List the registers where the destination will be scattered. SmallVector<unsigned, 2> DstRegs; @@ -854,7 +875,12 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_ADD: { unsigned NarrowSize = NarrowTy.getSizeInBits(); unsigned DstReg = MI.getOperand(0).getReg(); - int NumParts = MRI.getType(DstReg).getSizeInBits() / NarrowSize; + unsigned Size = MRI.getType(DstReg).getSizeInBits(); + int NumParts = Size / NarrowSize; + // FIXME: Don't know how to handle the situation where the small vectors + // aren't all the same size yet. + if (Size % NarrowSize != 0) + return UnableToLegalize; MIRBuilder.setInstr(MI); diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index e7a46eadb44..074cfa61a29 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -28,46 +28,130 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOpcodes.h" #include <algorithm> -#include <cassert> -#include <tuple> -#include <utility> - +#include <map> using namespace llvm; -LegalizerInfo::LegalizerInfo() { - DefaultActions[TargetOpcode::G_IMPLICIT_DEF] = NarrowScalar; - - // FIXME: these two can be legalized to the fundamental load/store Jakob - // proposed. Once loads & stores are supported. - DefaultActions[TargetOpcode::G_ANYEXT] = Legal; - DefaultActions[TargetOpcode::G_TRUNC] = Legal; +LegalizerInfo::LegalizerInfo() : TablesInitialized(false) { + // Set defaults. + // FIXME: these two (G_ANYEXT and G_TRUNC?) can be legalized to the + // fundamental load/store Jakob proposed. Once loads & stores are supported. + setScalarAction(TargetOpcode::G_ANYEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_ZEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_SEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_TRUNC, 0, {{1, Legal}}); + setScalarAction(TargetOpcode::G_TRUNC, 1, {{1, Legal}}); - DefaultActions[TargetOpcode::G_INTRINSIC] = Legal; - DefaultActions[TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS] = Legal; + setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}}); + setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}}); - DefaultActions[TargetOpcode::G_ADD] = NarrowScalar; - DefaultActions[TargetOpcode::G_LOAD] = NarrowScalar; - DefaultActions[TargetOpcode::G_STORE] = NarrowScalar; - DefaultActions[TargetOpcode::G_OR] = NarrowScalar; + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_ADD, 0, widenToLargerTypesAndNarrowToLargest); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_OR, 0, widenToLargerTypesAndNarrowToLargest); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_LOAD, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_STORE, 0, narrowToSmallerAndUnsupportedIfTooSmall); - DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar; - DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar; - DefaultActions[TargetOpcode::G_EXTRACT] = NarrowScalar; - DefaultActions[TargetOpcode::G_FNEG] = Lower; + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_BRCOND, 0, widenToLargerTypesUnsupportedOtherwise); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_INSERT, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_EXTRACT, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_EXTRACT, 1, narrowToSmallerAndUnsupportedIfTooSmall); + setScalarAction(TargetOpcode::G_FNEG, 0, {{1, Lower}}); } void LegalizerInfo::computeTables() { - for (unsigned Opcode = 0; Opcode <= LastOp - FirstOp; ++Opcode) { - for (unsigned Idx = 0, End = Actions[Opcode].size(); Idx != End; ++Idx) { - for (auto &Action : Actions[Opcode][Idx]) { - LLT Ty = Action.first; - if (!Ty.isVector()) - continue; - - auto &Entry = MaxLegalVectorElts[std::make_pair(Opcode + FirstOp, - Ty.getElementType())]; - Entry = std::max(Entry, Ty.getNumElements()); + assert(TablesInitialized == false); + + for (unsigned OpcodeIdx = 0; OpcodeIdx <= LastOp - FirstOp; ++OpcodeIdx) { + const unsigned Opcode = FirstOp + OpcodeIdx; + for (unsigned TypeIdx = 0; TypeIdx != SpecifiedActions[OpcodeIdx].size(); + ++TypeIdx) { + // 0. Collect information specified through the setAction API, i.e. + // for specific bit sizes. + // For scalar types: + SizeAndActionsVec ScalarSpecifiedActions; + // For pointer types: + std::map<uint16_t, SizeAndActionsVec> AddressSpace2SpecifiedActions; + // For vector types: + std::map<uint16_t, SizeAndActionsVec> ElemSize2SpecifiedActions; + for (auto LLT2Action : SpecifiedActions[OpcodeIdx][TypeIdx]) { + const LLT Type = LLT2Action.first; + const LegalizeAction Action = LLT2Action.second; + + auto SizeAction = std::make_pair(Type.getSizeInBits(), Action); + if (Type.isPointer()) + AddressSpace2SpecifiedActions[Type.getAddressSpace()].push_back( + SizeAction); + else if (Type.isVector()) + ElemSize2SpecifiedActions[Type.getElementType().getSizeInBits()] + .push_back(SizeAction); + else + ScalarSpecifiedActions.push_back(SizeAction); + } + + // 1. Handle scalar types + { + // Decide how to handle bit sizes for which no explicit specification + // was given. + SizeChangeStrategy S = &unsupportedForDifferentSizes; + if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() && + ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) + S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx]; + std::sort(ScalarSpecifiedActions.begin(), ScalarSpecifiedActions.end()); + checkPartialSizeAndActionsVector(ScalarSpecifiedActions); + setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions)); } + + // 2. Handle pointer types + for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) { + std::sort(PointerSpecifiedActions.second.begin(), + PointerSpecifiedActions.second.end()); + checkPartialSizeAndActionsVector(PointerSpecifiedActions.second); + // For pointer types, we assume that there isn't a meaningfull way + // to change the number of bits used in the pointer. + setPointerAction( + Opcode, TypeIdx, PointerSpecifiedActions.first, + unsupportedForDifferentSizes(PointerSpecifiedActions.second)); + } + + // 3. Handle vector types + SizeAndActionsVec ElementSizesSeen; + for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) { + std::sort(VectorSpecifiedActions.second.begin(), + VectorSpecifiedActions.second.end()); + const uint16_t ElementSize = VectorSpecifiedActions.first; + ElementSizesSeen.push_back({ElementSize, Legal}); + checkPartialSizeAndActionsVector(VectorSpecifiedActions.second); + // For vector types, we assume that the best way to adapt the number + // of elements is to the next larger number of elements type for which + // the vector type is legal, unless there is no such type. In that case, + // legalize towards a vector type with a smaller number of elements. + SizeAndActionsVec NumElementsActions; + for (SizeAndAction BitsizeAndAction : VectorSpecifiedActions.second) { + assert(BitsizeAndAction.first % ElementSize == 0); + const uint16_t NumElements = BitsizeAndAction.first / ElementSize; + NumElementsActions.push_back({NumElements, BitsizeAndAction.second}); + } + setVectorNumElementAction( + Opcode, TypeIdx, ElementSize, + moreToWiderTypesAndLessToWidest(NumElementsActions)); + } + std::sort(ElementSizesSeen.begin(), ElementSizesSeen.end()); + SizeChangeStrategy VectorElementSizeChangeStrategy = + &unsupportedForDifferentSizes; + if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() && + VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) + VectorElementSizeChangeStrategy = + VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx]; + setScalarInVectorAction( + Opcode, TypeIdx, VectorElementSizeChangeStrategy(ElementSizesSeen)); } } @@ -90,69 +174,24 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const { Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES) return std::make_pair(Legal, Aspect.Type); - LLT Ty = Aspect.Type; - LegalizeAction Action = findInActions(Aspect); - // LegalizerHelper is not able to handle non-power-of-2 types right now, so do - // not try to legalize them unless they are marked as Legal or Custom. - // FIXME: This is a temporary hack until the general non-power-of-2 - // legalization works. - if (!isPowerOf2_64(Ty.getSizeInBits()) && - !(Action == Legal || Action == Custom)) - return std::make_pair(Unsupported, LLT()); - - if (Action != NotFound) - return findLegalAction(Aspect, Action); - - unsigned Opcode = Aspect.Opcode; - if (!Ty.isVector()) { - auto DefaultAction = DefaultActions.find(Aspect.Opcode); - if (DefaultAction != DefaultActions.end() && DefaultAction->second == Legal) - return std::make_pair(Legal, Ty); - - if (DefaultAction != DefaultActions.end() && DefaultAction->second == Lower) - return std::make_pair(Lower, Ty); - - if (DefaultAction == DefaultActions.end() || - DefaultAction->second != NarrowScalar) - return std::make_pair(Unsupported, LLT()); - return findLegalAction(Aspect, NarrowScalar); - } - - LLT EltTy = Ty.getElementType(); - int NumElts = Ty.getNumElements(); - - auto ScalarAction = ScalarInVectorActions.find(std::make_pair(Opcode, EltTy)); - if (ScalarAction != ScalarInVectorActions.end() && - ScalarAction->second != Legal) - return findLegalAction(Aspect, ScalarAction->second); - - // The element type is legal in principle, but the number of elements is - // wrong. - auto MaxLegalElts = MaxLegalVectorElts.lookup(std::make_pair(Opcode, EltTy)); - if (MaxLegalElts > NumElts) - return findLegalAction(Aspect, MoreElements); - - if (MaxLegalElts == 0) { - // Scalarize if there's no legal vector type, which is just a special case - // of FewerElements. - return std::make_pair(FewerElements, EltTy); - } - - return findLegalAction(Aspect, FewerElements); + if (Aspect.Type.isScalar() || Aspect.Type.isPointer()) + return findScalarLegalAction(Aspect); + assert(Aspect.Type.isVector()); + return findVectorLegalAction(Aspect); } std::tuple<LegalizerInfo::LegalizeAction, unsigned, LLT> LegalizerInfo::getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const { SmallBitVector SeenTypes(8); - const MCInstrDesc &MCID = MI.getDesc(); - const MCOperandInfo *OpInfo = MCID.OpInfo; - for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { + const MCOperandInfo *OpInfo = MI.getDesc().OpInfo; + // FIXME: probably we'll need to cache the results here somehow? + for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) { if (!OpInfo[i].isGenericType()) continue; - // We don't want to repeatedly check the same operand index, that - // could get expensive. + // We must only record actions once for each TypeIdx; otherwise we'd + // try to legalize operands multiple times down the line. unsigned TypeIdx = OpInfo[i].getGenericTypeIndex(); if (SeenTypes[TypeIdx]) continue; @@ -172,38 +211,166 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI, return std::get<0>(getAction(MI, MRI)) == Legal; } -Optional<LLT> LegalizerInfo::findLegalType(const InstrAspect &Aspect, - LegalizeAction Action) const { - switch(Action) { - default: - llvm_unreachable("Cannot find legal type"); +bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + return false; +} + +LegalizerInfo::SizeAndActionsVec +LegalizerInfo::increaseToLargerTypesAndDecreaseToLargest( + const SizeAndActionsVec &v, LegalizeAction IncreaseAction, + LegalizeAction DecreaseAction) { + SizeAndActionsVec result; + unsigned LargestSizeSoFar = 0; + if (v.size() >= 1 && v[0].first != 1) + result.push_back({1, IncreaseAction}); + for (size_t i = 0; i < v.size(); ++i) { + result.push_back(v[i]); + LargestSizeSoFar = v[i].first; + if (i + 1 < v.size() && v[i + 1].first != v[i].first + 1) { + result.push_back({LargestSizeSoFar + 1, IncreaseAction}); + LargestSizeSoFar = v[i].first + 1; + } + } + result.push_back({LargestSizeSoFar + 1, DecreaseAction}); + return result; +} + +LegalizerInfo::SizeAndActionsVec +LegalizerInfo::decreaseToSmallerTypesAndIncreaseToSmallest( + const SizeAndActionsVec &v, LegalizeAction DecreaseAction, + LegalizeAction IncreaseAction) { + SizeAndActionsVec result; + if (v.size() == 0 || v[0].first != 1) + result.push_back({1, IncreaseAction}); + for (size_t i = 0; i < v.size(); ++i) { + result.push_back(v[i]); + if (i + 1 == v.size() || v[i + 1].first != v[i].first + 1) { + result.push_back({v[i].first + 1, DecreaseAction}); + } + } + return result; +} + +LegalizerInfo::SizeAndAction +LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) { + assert(Size >= 1); + // Find the last element in Vec that has a bitsize equal to or smaller than + // the requested bit size. + // That is the element just before the first element that is bigger than Size. + auto VecIt = std::upper_bound( + Vec.begin(), Vec.end(), Size, + [](const uint32_t Size, const SizeAndAction lhs) -> bool { + return Size < lhs.first; + }); + assert(VecIt != Vec.begin() && "Does Vec not start with size 1?"); + --VecIt; + int VecIdx = VecIt - Vec.begin(); + + LegalizeAction Action = Vec[VecIdx].second; + switch (Action) { case Legal: case Lower: case Libcall: case Custom: - return Aspect.Type; + return {Size, Action}; + case FewerElements: + // FIXME: is this special case still needed and correct? + // Special case for scalarization: + if (Vec == SizeAndActionsVec({{1, FewerElements}})) + return {1, FewerElements}; + LLVM_FALLTHROUGH; case NarrowScalar: { - return findLegalizableSize( - Aspect, [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); }); - } - case WidenScalar: { - return findLegalizableSize(Aspect, [&](LLT Ty) -> LLT { - return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize(); - }); - } - case FewerElements: { - return findLegalizableSize( - Aspect, [&](LLT Ty) -> LLT { return Ty.halfElements(); }); + // The following needs to be a loop, as for now, we do allow needing to + // go over "Unsupported" bit sizes before finding a legalizable bit size. + // e.g. (s8, WidenScalar), (s9, Unsupported), (s32, Legal). if Size==8, + // we need to iterate over s9, and then to s32 to return (s32, Legal). + // If we want to get rid of the below loop, we should have stronger asserts + // when building the SizeAndActionsVecs, probably not allowing + // "Unsupported" unless at the ends of the vector. + for (int i = VecIdx - 1; i >= 0; --i) + if (!needsLegalizingToDifferentSize(Vec[i].second) && + Vec[i].second != Unsupported) + return {Vec[i].first, Action}; + llvm_unreachable(""); } + case WidenScalar: case MoreElements: { - return findLegalizableSize( - Aspect, [&](LLT Ty) -> LLT { return Ty.doubleElements(); }); + // See above, the following needs to be a loop, at least for now. + for (std::size_t i = VecIdx + 1; i < Vec.size(); ++i) + if (!needsLegalizingToDifferentSize(Vec[i].second) && + Vec[i].second != Unsupported) + return {Vec[i].first, Action}; + llvm_unreachable(""); } + case Unsupported: + return {Size, Unsupported}; + case NotFound: + llvm_unreachable("NotFound"); } + llvm_unreachable("Action has an unknown enum value"); } -bool LegalizerInfo::legalizeCustom(MachineInstr &MI, - MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const { - return false; +std::pair<LegalizerInfo::LegalizeAction, LLT> +LegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const { + assert(Aspect.Type.isScalar() || Aspect.Type.isPointer()); + if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) + return {NotFound, LLT()}; + const unsigned OpcodeIdx = Aspect.Opcode - FirstOp; + if (Aspect.Type.isPointer() && + AddrSpace2PointerActions[OpcodeIdx].find(Aspect.Type.getAddressSpace()) == + AddrSpace2PointerActions[OpcodeIdx].end()) { + return {NotFound, LLT()}; + } + const SmallVector<SizeAndActionsVec, 1> &Actions = + Aspect.Type.isPointer() + ? AddrSpace2PointerActions[OpcodeIdx] + .find(Aspect.Type.getAddressSpace()) + ->second + : ScalarActions[OpcodeIdx]; + if (Aspect.Idx >= Actions.size()) + return {NotFound, LLT()}; + const SizeAndActionsVec &Vec = Actions[Aspect.Idx]; + // FIXME: speed up this search, e.g. by using a results cache for repeated + // queries? + auto SizeAndAction = findAction(Vec, Aspect.Type.getSizeInBits()); + return {SizeAndAction.second, + Aspect.Type.isScalar() ? LLT::scalar(SizeAndAction.first) + : LLT::pointer(Aspect.Type.getAddressSpace(), + SizeAndAction.first)}; +} + +std::pair<LegalizerInfo::LegalizeAction, LLT> +LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const { + assert(Aspect.Type.isVector()); + // First legalize the vector element size, then legalize the number of + // lanes in the vector. + if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) + return {NotFound, Aspect.Type}; + const unsigned OpcodeIdx = Aspect.Opcode - FirstOp; + const unsigned TypeIdx = Aspect.Idx; + if (TypeIdx >= ScalarInVectorActions[OpcodeIdx].size()) + return {NotFound, Aspect.Type}; + const SizeAndActionsVec &ElemSizeVec = + ScalarInVectorActions[OpcodeIdx][TypeIdx]; + + LLT IntermediateType; + auto ElementSizeAndAction = + findAction(ElemSizeVec, Aspect.Type.getScalarSizeInBits()); + IntermediateType = + LLT::vector(Aspect.Type.getNumElements(), ElementSizeAndAction.first); + if (ElementSizeAndAction.second != Legal) + return {ElementSizeAndAction.second, IntermediateType}; + + auto i = NumElements2Actions[OpcodeIdx].find( + IntermediateType.getScalarSizeInBits()); + if (i == NumElements2Actions[OpcodeIdx].end()) { + return {NotFound, IntermediateType}; + } + const SizeAndActionsVec &NumElementsVec = (*i).second[TypeIdx]; + auto NumElementsAndAction = + findAction(NumElementsVec, IntermediateType.getNumElements()); + return {NumElementsAndAction.second, + LLT::vector(NumElementsAndAction.first, + IntermediateType.getScalarSizeInBits())}; } diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index b4fe5f15fdd..230c9ef04c5 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -15,8 +15,8 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index f117c609453..3854e9b263d 100644 --- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -19,10 +19,10 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index a9f3d73a294..9ae0f970f42 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -17,9 +17,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #define DEBUG_TYPE "globalisel-utils" diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 08720d1271f..6d2a55c65f4 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCRegisterInfo.h" @@ -40,7 +41,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index bf0f88d49a8..b1cac2a107d 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -44,6 +44,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/LLVMContext.h" @@ -51,7 +52,6 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 2e991de6221..a383b72dce6 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -41,6 +41,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" @@ -49,7 +50,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp index a45b1e39fee..9328e2d900a 100644 --- a/lib/CodeGen/LiveDebugValues.cpp +++ b/lib/CodeGen/LiveDebugValues.cpp @@ -36,6 +36,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -46,8 +48,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 0c81306a9a5..1923a8c2529 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -39,6 +39,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" @@ -51,7 +52,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 92cca1a5495..61fbfdd64a2 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -16,10 +16,10 @@ #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index a9aec926115..f9c5652e8a1 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -34,10 +34,10 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include <algorithm> using namespace llvm; diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 2eab0376da2..33ae476bf4a 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -30,7 +30,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp new file mode 100644 index 00000000000..62596440c73 --- /dev/null +++ b/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -0,0 +1,626 @@ +//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The purpose of this pass is to employ a canonical code transformation so +// that code compiled with slightly different IR passes can be diffed more +// effectively than otherwise. This is done by renaming vregs in a given +// LiveRange in a canonical way. This pass also does a pseudo-scheduling to +// move defs closer to their use inorder to reduce diffs caused by slightly +// different schedules. +// +// Basic Usage: +// +// llc -o - -run-pass mir-canonicalizer example.mir +// +// Reorders instructions canonically. +// Renames virtual register operands canonically. +// Strips certain MIR artifacts (optionally). +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/raw_ostream.h" + +#include <queue> + +using namespace llvm; + +namespace llvm { +extern char &MIRCanonicalizerID; +} // namespace llvm + +#define DEBUG_TYPE "mir-canonicalizer" + +static cl::opt<unsigned> +CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), + cl::value_desc("N"), + cl::desc("Function number to canonicalize.")); + +static cl::opt<unsigned> +CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u), + cl::value_desc("N"), + cl::desc("BasicBlock number to canonicalize.")); + +namespace { + +class MIRCanonicalizer : public MachineFunctionPass { +public: + static char ID; + MIRCanonicalizer() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "Rename register operands in a canonical ordering."; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // end anonymous namespace + +enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate }; +class TypedVReg { + VRType type; + unsigned reg; + +public: + TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {} + TypedVReg(VRType type) : type(type), reg(~0U) { + assert(type != RSE_Reg && "Expected a non-register type."); + } + + bool isReg() const { return type == RSE_Reg; } + bool isFrameIndex() const { return type == RSE_FrameIndex; } + bool isCandidate() const { return type == RSE_NewCandidate; } + + VRType getType() const { return type; } + unsigned getReg() const { + assert(this->isReg() && "Expected a virtual or physical register."); + return reg; + } +}; + +char MIRCanonicalizer::ID; + +char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; + +INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", + "Rename Register Operands Canonically", false, false) + +INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", + "Rename Register Operands Canonically", false, false) + +static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { + ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); + std::vector<MachineBasicBlock *> RPOList; + for (auto MBB : RPOT) { + RPOList.push_back(MBB); + } + + return RPOList; +} + +// Set a dummy vreg. We use this vregs register class to generate throw-away +// vregs that are used to skip vreg numbers so that vreg numbers line up. +static unsigned GetDummyVReg(const MachineFunction &MF) { + for (auto &MBB : MF) { + for (auto &MI : MBB) { + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + return MO.getReg(); + } + } + } + + return ~0U; +} + +static bool rescheduleCanonically(MachineBasicBlock *MBB) { + + bool Changed = false; + + // Calculates the distance of MI from the begining of its parent BB. + auto getInstrIdx = [](const MachineInstr &MI) { + unsigned i = 0; + for (auto &CurMI : *MI.getParent()) { + if (&CurMI == &MI) + return i; + i++; + } + return ~0U; + }; + + // Pre-Populate vector of instructions to reschedule so that we don't + // clobber the iterator. + std::vector<MachineInstr *> Instructions; + for (auto &MI : *MBB) { + Instructions.push_back(&MI); + } + + for (auto *II : Instructions) { + if (II->getNumOperands() == 0) + continue; + + MachineOperand &MO = II->getOperand(0); + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + + DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); + + MachineInstr *Def = II; + unsigned Distance = ~0U; + MachineInstr *UseToBringDefCloserTo = nullptr; + MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); + for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { + MachineInstr *UseInst = UO.getParent(); + + const unsigned DefLoc = getInstrIdx(*Def); + const unsigned UseLoc = getInstrIdx(*UseInst); + const unsigned Delta = (UseLoc - DefLoc); + + if (UseInst->getParent() != Def->getParent()) + continue; + if (DefLoc >= UseLoc) + continue; + + if (Delta < Distance) { + Distance = Delta; + UseToBringDefCloserTo = UseInst; + } + } + + const auto BBE = MBB->instr_end(); + MachineBasicBlock::iterator DefI = BBE; + MachineBasicBlock::iterator UseI = BBE; + + for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { + + if (DefI != BBE && UseI != BBE) + break; + + if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo)) + continue; + + if (&*BBI == Def) { + DefI = BBI; + continue; + } + + if (&*BBI == UseToBringDefCloserTo) { + UseI = BBI; + continue; + } + } + + if (DefI == BBE || UseI == BBE) + continue; + + DEBUG({ + dbgs() << "Splicing "; + DefI->dump(); + dbgs() << " right before: "; + UseI->dump(); + }); + + Changed = true; + MBB->splice(UseI, MBB, DefI); + } + + return Changed; +} + +/// Here we find our candidates. What makes an interesting candidate? +/// An candidate for a canonicalization tree root is normally any kind of +/// instruction that causes side effects such as a store to memory or a copy to +/// a physical register or a return instruction. We use these as an expression +/// tree root that we walk inorder to build a canonical walk which should result +/// in canoncal vreg renaming. +static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) { + std::vector<MachineInstr *> Candidates; + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { + MachineInstr *MI = &*II; + + bool DoesMISideEffect = false; + + if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) { + const unsigned Dst = MI->getOperand(0).getReg(); + DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst); + + for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { + if (DoesMISideEffect) break; + DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); + } + } + + if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) + continue; + + DEBUG(dbgs() << "Found Candidate: "; MI->dump();); + Candidates.push_back(MI); + } + + return Candidates; +} + +void doCandidateWalk(std::vector<TypedVReg> &VRegs, + std::queue <TypedVReg> &RegQueue, + std::vector<MachineInstr *> &VisitedMIs, + const MachineBasicBlock *MBB) { + + const MachineFunction &MF = *MBB->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + while (!RegQueue.empty()) { + + auto TReg = RegQueue.front(); + RegQueue.pop(); + + if (TReg.isFrameIndex()) { + DEBUG(dbgs() << "Popping frame index.\n";); + VRegs.push_back(TypedVReg(RSE_FrameIndex)); + continue; + } + + assert(TReg.isReg() && "Expected vreg or physreg."); + unsigned Reg = TReg.getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + DEBUG({ + dbgs() << "Popping vreg "; + MRI.def_begin(Reg)->dump(); + dbgs() << "\n"; + }); + + if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) { + return TR.isReg() && TR.getReg() == Reg; + })) { + VRegs.push_back(TypedVReg(Reg)); + } + } else { + DEBUG(dbgs() << "Popping physreg.\n";); + VRegs.push_back(TypedVReg(Reg)); + continue; + } + + for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) { + MachineInstr *Def = RI->getParent(); + + if (Def->getParent() != MBB) + continue; + + if (llvm::any_of(VisitedMIs, + [&](const MachineInstr *VMI) { return Def == VMI; })) { + break; + } + + DEBUG({ + dbgs() << "\n========================\n"; + dbgs() << "Visited MI: "; + Def->dump(); + dbgs() << "BB Name: " << Def->getParent()->getName() << "\n"; + dbgs() << "\n========================\n"; + }); + VisitedMIs.push_back(Def); + for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { + + MachineOperand &MO = Def->getOperand(I); + if (MO.isFI()) { + DEBUG(dbgs() << "Pushing frame index.\n";); + RegQueue.push(TypedVReg(RSE_FrameIndex)); + } + + if (!MO.isReg()) + continue; + RegQueue.push(TypedVReg(MO.getReg())); + } + } + } +} + +// TODO: Work to remove this in the future. One day when we have named vregs +// we should be able to form the canonical name based on some characteristic +// we see in that point of the expression tree (like if we were to name based +// on some sort of value numbering scheme). +static void SkipVRegs(unsigned &VRegGapIndex, MachineRegisterInfo &MRI, + const TargetRegisterClass *RC) { + const unsigned VR_GAP = (++VRegGapIndex * 1000); + + DEBUG({ + dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex << " to " + << VR_GAP << "\n"; + }); + + unsigned I = MRI.createVirtualRegister(RC); + const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; + while (I != E) { + I = MRI.createVirtualRegister(RC); + } +} + +static std::map<unsigned, unsigned> +GetVRegRenameMap(const std::vector<TypedVReg> &VRegs, + const std::vector<unsigned> &renamedInOtherBB, + MachineRegisterInfo &MRI, + const TargetRegisterClass *RC) { + std::map<unsigned, unsigned> VRegRenameMap; + unsigned LastRenameReg = MRI.createVirtualRegister(RC); + bool FirstCandidate = true; + + for (auto &vreg : VRegs) { + if (vreg.isFrameIndex()) { + // We skip one vreg for any frame index because there is a good chance + // (especially when comparing SelectionDAG to GlobalISel generated MIR) + // that in the other file we are just getting an incoming vreg that comes + // from a copy from a frame index. So it's safe to skip by one. + LastRenameReg = MRI.createVirtualRegister(RC); + DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); + continue; + } else if (vreg.isCandidate()) { + + // After the first candidate, for every subsequent candidate, we skip mod + // 10 registers so that the candidates are more likely to start at the + // same vreg number making it more likely that the canonical walk from the + // candidate insruction. We don't need to skip from the first candidate of + // the BasicBlock because we already skip ahead several vregs for each BB. + while (LastRenameReg % 10) { + if (!FirstCandidate) break; + LastRenameReg = MRI.createVirtualRegister(RC); + + DEBUG({ + dbgs() << "Skipping rename for new candidate " << LastRenameReg + << "\n"; + }); + } + FirstCandidate = false; + continue; + } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) { + LastRenameReg = MRI.createVirtualRegister(RC); + DEBUG({ + dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; + }); + continue; + } + + auto Reg = vreg.getReg(); + if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { + DEBUG(dbgs() << "Vreg " << Reg << " already renamed in other BB.\n";); + continue; + } + + auto Rename = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + LastRenameReg = Rename; + + if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { + DEBUG(dbgs() << "Mapping vreg ";); + if (MRI.reg_begin(Reg) != MRI.reg_end()) { + DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); + } else { + DEBUG(dbgs() << Reg;); + } + DEBUG(dbgs() << " to ";); + if (MRI.reg_begin(Rename) != MRI.reg_end()) { + DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); + } else { + DEBUG(dbgs() << Rename;); + } + DEBUG(dbgs() << "\n";); + + VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename)); + } + } + + return VRegRenameMap; +} + +static bool doVRegRenaming(std::vector<unsigned> &RenamedInOtherBB, + const std::map<unsigned, unsigned> &VRegRenameMap, + MachineRegisterInfo &MRI) { + bool Changed = false; + for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) { + + auto VReg = I->first; + auto Rename = I->second; + + RenamedInOtherBB.push_back(Rename); + + std::vector<MachineOperand *> RenameMOs; + for (auto &MO : MRI.reg_operands(VReg)) { + RenameMOs.push_back(&MO); + } + + for (auto *MO : RenameMOs) { + Changed = true; + MO->setReg(Rename); + + if (!MO->isDef()) + MO->setIsKill(false); + } + } + + return Changed; +} + +static bool doDefKillClear(MachineBasicBlock *MBB) { + bool Changed = false; + + for (auto &MI : *MBB) { + for (auto &MO : MI.operands()) { + if (!MO.isReg()) + continue; + if (!MO.isDef() && MO.isKill()) { + Changed = true; + MO.setIsKill(false); + } + + if (MO.isDef() && MO.isDead()) { + Changed = true; + MO.setIsDead(false); + } + } + } + + return Changed; +} + +static bool runOnBasicBlock(MachineBasicBlock *MBB, + std::vector<StringRef> &bbNames, + std::vector<unsigned> &renamedInOtherBB, + unsigned &basicBlockNum, unsigned &VRegGapIndex) { + + if (CanonicalizeBasicBlockNumber != ~0U) { + if (CanonicalizeBasicBlockNumber != basicBlockNum++) + return false; + DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() << "\n";); + } + + if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { + DEBUG({ + dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() + << "\n"; + }); + return false; + } + + DEBUG({ + dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; + dbgs() << "\n\n================================================\n\n"; + }); + + bool Changed = false; + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + const unsigned DummyVReg = GetDummyVReg(MF); + const TargetRegisterClass *DummyRC = + (DummyVReg == ~0U) ? nullptr : MRI.getRegClass(DummyVReg); + if (!DummyRC) return false; + + bbNames.push_back(MBB->getName()); + DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); + + DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); + Changed |= rescheduleCanonically(MBB); + DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); + + std::vector<MachineInstr *> Candidates = populateCandidates(MBB); + std::vector<MachineInstr *> VisitedMIs; + std::copy(Candidates.begin(), Candidates.end(), + std::back_inserter(VisitedMIs)); + + std::vector<TypedVReg> VRegs; + for (auto candidate : Candidates) { + VRegs.push_back(TypedVReg(RSE_NewCandidate)); + + std::queue<TypedVReg> RegQueue; + + // Here we walk the vreg operands of a non-root node along our walk. + // The root nodes are the original candidates (stores normally). + // These are normally not the root nodes (except for the case of copies to + // physical registers). + for (unsigned i = 1; i < candidate->getNumOperands(); i++) { + if (candidate->mayStore() || candidate->isBranch()) + break; + + MachineOperand &MO = candidate->getOperand(i); + if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + continue; + + DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); + RegQueue.push(TypedVReg(MO.getReg())); + } + + // Here we walk the root candidates. We start from the 0th operand because + // the root is normally a store to a vreg. + for (unsigned i = 0; i < candidate->getNumOperands(); i++) { + + if (!candidate->mayStore() && !candidate->isBranch()) + break; + + MachineOperand &MO = candidate->getOperand(i); + + // TODO: Do we want to only add vregs here? + if (!MO.isReg() && !MO.isFI()) + continue; + + DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); + + RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) : + TypedVReg(RSE_FrameIndex)); + } + + doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); + } + + // If we have populated no vregs to rename then bail. + // The rest of this function does the vreg remaping. + if (VRegs.size() == 0) + return Changed; + + // Skip some vregs, so we can recon where we'll land next. + SkipVRegs(VRegGapIndex, MRI, DummyRC); + + auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC); + Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); + Changed |= doDefKillClear(MBB); + + DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";); + DEBUG(dbgs() << "\n\n================================================\n\n"); + return Changed; +} + +bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { + + static unsigned functionNum = 0; + if (CanonicalizeFunctionNumber != ~0U) { + if (CanonicalizeFunctionNumber != functionNum++) + return false; + DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() << "\n";); + } + + // we need a valid vreg to create a vreg type for skipping all those + // stray vreg numbers so reach alignment/canonical vreg values. + std::vector<MachineBasicBlock*> RPOList = GetRPOList(MF); + + DEBUG( + dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; + dbgs() << "\n\n================================================\n\n"; + dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; + for (auto MBB : RPOList) { + dbgs() << MBB->getName() << "\n"; + } + dbgs() << "\n\n================================================\n\n"; + ); + + std::vector<StringRef> BBNames; + std::vector<unsigned> RenamedInOtherBB; + + unsigned GapIdx = 0; + unsigned BBNum = 0; + + bool Changed = false; + + for (auto MBB : RPOList) + Changed |= runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx); + + return Changed; +} + diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index 9c8743a7164..fbba60c4312 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MILexer.h" #include "MIParser.h" +#include "MILexer.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" @@ -21,8 +21,8 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" @@ -36,6 +36,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -64,7 +65,6 @@ #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index aae48587c5b..a817c62c985 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -12,16 +12,18 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MIRPrinter.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -31,19 +33,18 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MIRPrinter.h" -#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/Value.h" @@ -57,9 +58,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -162,8 +162,8 @@ public: void printStackObjectReference(int FrameIndex); void printOffset(int64_t Offset); void printTargetFlags(const MachineOperand &Op); - void print(const MachineOperand &Op, const TargetRegisterInfo *TRI, - unsigned I, bool ShouldPrintRegisterTies, + void print(const MachineInstr &MI, unsigned OpIdx, + const TargetRegisterInfo *TRI, bool ShouldPrintRegisterTies, LLT TypeToPrint, bool IsDef = false); void print(const LLVMContext &Context, const TargetInstrInfo &TII, const MachineMemOperand &Op); @@ -734,7 +734,7 @@ void MIPrinter::print(const MachineInstr &MI) { ++I) { if (I) OS << ", "; - print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, + print(MI, I, TRI, ShouldPrintRegisterTies, getTypeToPrint(MI, I, PrintedTypes, MRI), /*IsDef=*/true); } @@ -751,7 +751,7 @@ void MIPrinter::print(const MachineInstr &MI) { for (; I < E; ++I) { if (NeedComma) OS << ", "; - print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, + print(MI, I, TRI, ShouldPrintRegisterTies, getTypeToPrint(MI, I, PrintedTypes, MRI)); NeedComma = true; } @@ -923,9 +923,11 @@ static const char *getTargetIndexName(const MachineFunction &MF, int Index) { return nullptr; } -void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, - unsigned I, bool ShouldPrintRegisterTies, LLT TypeToPrint, +void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, + const TargetRegisterInfo *TRI, + bool ShouldPrintRegisterTies, LLT TypeToPrint, bool IsDef) { + const MachineOperand &Op = MI.getOperand(OpIdx); printTargetFlags(Op); switch (Op.getType()) { case MachineOperand::MO_Register: { @@ -959,13 +961,16 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, } } if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef()) - OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")"; + OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(OpIdx) << ")"; if (TypeToPrint.isValid()) OS << '(' << TypeToPrint << ')'; break; } case MachineOperand::MO_Immediate: - OS << Op.getImm(); + if (MI.isOperandSubregIdx(OpIdx)) + OS << "%subreg." << TRI->getSubRegIndexName(Op.getImm()); + else + OS << Op.getImm(); break; case MachineOperand::MO_CImmediate: Op.getCImm()->printAsOperand(OS, /*PrintType=*/true, MST); diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index d5758da0464..40ffbc46556 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -30,7 +31,6 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -42,6 +42,8 @@ using namespace llvm; MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B) : BB(B), Number(-1), xParent(&MF) { Insts.Parent = this; + if (B) + IrrLoopHeaderWeight = B->getIrrLoopHeaderWeight(); } MachineBasicBlock::~MachineBasicBlock() { @@ -338,6 +340,12 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, } OS << '\n'; } + if (IrrLoopHeaderWeight) { + if (Indexes) OS << '\t'; + OS << " Irreducible loop header weight: " + << IrrLoopHeaderWeight.getValue(); + OS << '\n'; + } } void MachineBasicBlock::printAsOperand(raw_ostream &OS, diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 14cd91206d8..2c336e45056 100644 --- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -234,6 +234,12 @@ MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const { return MBFI ? MBFI->getProfileCountFromFreq(*F, Freq) : None; } +bool +MachineBlockFrequencyInfo::isIrrLoopHeader(const MachineBasicBlock *MBB) { + assert(MBFI && "Expected analysis to be available"); + return MBFI->isIrrLoopHeader(MBB); +} + const MachineFunction *MachineBlockFrequencyInfo::getFunction() const { return MBFI ? MBFI->getFunction() : nullptr; } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index c5991332f08..9fa4c0d5e4f 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -43,6 +43,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -55,7 +56,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index f0f63715d2f..be197a48d80 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -35,7 +36,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index 9fc990f5c24..5b576b68fcc 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -21,11 +21,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 61f56fffc88..1a39afe655e 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -23,11 +23,11 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp index be8adf75fb7..75c05e2f002 100644 --- a/lib/CodeGen/MachineFrameInfo.cpp +++ b/lib/CodeGen/MachineFrameInfo.cpp @@ -16,10 +16,10 @@ #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 250a10c7d07..570c410e1fe 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -58,7 +58,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index bb2dda980e4..2c81218f8f6 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" @@ -58,7 +59,6 @@ #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -320,8 +320,45 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { } case MachineOperand::MO_MCSymbol: return getMCSymbol() == Other.getMCSymbol(); - case MachineOperand::MO_CFIIndex: - return getCFIIndex() == Other.getCFIIndex(); + case MachineOperand::MO_CFIIndex: { + const MachineFunction *MF = getParent()->getParent()->getParent(); + const MachineFunction *OtherMF = + Other.getParent()->getParent()->getParent(); + MCCFIInstruction Inst = MF->getFrameInstructions()[getCFIIndex()]; + MCCFIInstruction OtherInst = + OtherMF->getFrameInstructions()[Other.getCFIIndex()]; + MCCFIInstruction::OpType op = Inst.getOperation(); + if (op != OtherInst.getOperation()) return false; + switch (op) { + case MCCFIInstruction::OpDefCfa: + case MCCFIInstruction::OpOffset: + case MCCFIInstruction::OpRelOffset: + if (Inst.getRegister() != OtherInst.getRegister()) return false; + if (Inst.getOffset() != OtherInst.getOffset()) return false; + break; + case MCCFIInstruction::OpRestore: + case MCCFIInstruction::OpUndefined: + case MCCFIInstruction::OpSameValue: + case MCCFIInstruction::OpDefCfaRegister: + if (Inst.getRegister() != OtherInst.getRegister()) return false; + break; + case MCCFIInstruction::OpRegister: + if (Inst.getRegister() != OtherInst.getRegister()) return false; + if (Inst.getRegister2() != OtherInst.getRegister2()) return false; + break; + case MCCFIInstruction::OpDefCfaOffset: + case MCCFIInstruction::OpAdjustCfaOffset: + case MCCFIInstruction::OpGnuArgsSize: + if (Inst.getOffset() != OtherInst.getOffset()) return false; + break; + case MCCFIInstruction::OpRememberState: + case MCCFIInstruction::OpRestoreState: + case MCCFIInstruction::OpEscape: + case MCCFIInstruction::OpWindowSave: + break; + } + return true; + } case MachineOperand::MO_Metadata: return getMetadata() == Other.getMetadata(); case MachineOperand::MO_IntrinsicID: @@ -370,8 +407,13 @@ hash_code llvm::hash_value(const MachineOperand &MO) { return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); case MachineOperand::MO_MCSymbol: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); - case MachineOperand::MO_CFIIndex: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex()); + case MachineOperand::MO_CFIIndex: { + const MachineFunction *MF = MO.getParent()->getParent()->getParent(); + MCCFIInstruction Inst = MF->getFrameInstructions()[MO.getCFIIndex()]; + return hash_combine(MO.getType(), MO.getTargetFlags(), Inst.getOperation(), + Inst.getRegister(), Inst.getRegister2(), + Inst.getOffset()); + } case MachineOperand::MO_IntrinsicID: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID()); case MachineOperand::MO_Predicate: diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index b5621a09c6f..eceae9a968b 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -13,7 +13,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index efb5c3371de..d1147fea08e 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCInstrDesc.h" @@ -43,7 +44,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp index 1bc869e02e6..48b68e3b718 100644 --- a/lib/CodeGen/MachineOutliner.cpp +++ b/lib/CodeGen/MachineOutliner.cpp @@ -65,11 +65,11 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp index c852c2e1564..d270b8e5d8f 100644 --- a/lib/CodeGen/MachinePipeliner.cpp +++ b/lib/CodeGen/MachinePipeliner.cpp @@ -89,6 +89,7 @@ #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -102,7 +103,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index be06053f004..1674aba0c82 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -28,7 +29,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 65d82366767..a3e93de67bb 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -21,11 +21,11 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 3e12bdcd689..900a0a63e96 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -42,6 +42,7 @@ #include "llvm/CodeGen/ScheduleDFS.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/MC/LaneBitmask.h" @@ -52,7 +53,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index dd6e26d8f8b..f52e3942664 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Pass.h" @@ -40,7 +41,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 0bd5c56871c..f894f470445 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -51,6 +51,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" @@ -66,7 +67,6 @@ #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp index 633a853b2c7..13ddad59382 100644 --- a/lib/CodeGen/MacroFusion.cpp +++ b/lib/CodeGen/MacroFusion.cpp @@ -19,10 +19,10 @@ #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #define DEBUG_TYPE "machine-scheduler" diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 6430e54a59c..530040a3332 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -20,8 +20,8 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index c7f0329b3c5..af26c170cb8 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -31,11 +31,11 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp index 513e8271656..84dbebfd2b6 100644 --- a/lib/CodeGen/PatchableFunction.cpp +++ b/lib/CodeGen/PatchableFunction.cpp @@ -16,8 +16,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 7cff85a3ab0..c267018c4d1 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -81,6 +81,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Pass.h" @@ -88,7 +89,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp index 4a50d895340..9770b336da6 100644 --- a/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -31,10 +31,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index f2249f9e37e..fd92609613b 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -34,12 +34,12 @@ #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 0118580a626..feab36d3995 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -13,9 +13,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index d9e9b3360a0..e41ffc244d9 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -39,6 +39,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" @@ -55,8 +57,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetOptions.h" @@ -76,12 +76,6 @@ using namespace llvm; using MBBVector = SmallVector<MachineBasicBlock *, 4>; -static void spillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS, - unsigned &MinCSFrameIndex, - unsigned &MaxCXFrameIndex, - const MBBVector &SaveBlocks, - const MBBVector &RestoreBlocks); - namespace { class PEI : public MachineFunctionPass { @@ -125,6 +119,7 @@ private: void calculateCallFrameInfo(MachineFunction &Fn); void calculateSaveRestoreBlocks(MachineFunction &Fn); + void spillCalleeSavedRegs(MachineFunction &MF); void calculateFrameObjectOffsets(MachineFunction &Fn); void replaceFrameIndices(MachineFunction &Fn); @@ -197,8 +192,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Handle CSR spilling and restoring, for targets that need it. if (Fn.getTarget().usesPhysRegsForPEI()) - spillCalleeSavedRegs(Fn, RS, MinCSFrameIndex, MaxCSFrameIndex, SaveBlocks, - RestoreBlocks); + spillCalleeSavedRegs(Fn); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. @@ -505,11 +499,7 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, } } -static void spillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS, - unsigned &MinCSFrameIndex, - unsigned &MaxCSFrameIndex, - const MBBVector &SaveBlocks, - const MBBVector &RestoreBlocks) { +void PEI::spillCalleeSavedRegs(MachineFunction &Fn) { // We can't list this requirement in getRequiredProperties because some // targets (WebAssembly) use virtual registers past this point, and the pass // pipeline is set up without giving the passes a chance to look at the diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 5fa5587457d..86fd8745052 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -14,7 +14,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 7061c3ff652..19467ae3b72 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Metadata.h" @@ -41,7 +42,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index e74ac79f001..23e5f907c88 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -54,6 +54,7 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" @@ -66,7 +67,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp index 214c6d2c820..3aaa5a4738d 100644 --- a/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/lib/CodeGen/RegUsageInfoCollector.cpp @@ -27,7 +27,7 @@ #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" using namespace llvm; diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 956dec39fc3..8e463ff272d 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -24,7 +24,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 1ef7e41b8ae..84c2e2548ec 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -35,17 +35,17 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/Pass.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 844ddb9ed3f..18fe2d85954 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -28,13 +28,13 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp index bd5ecbd28f2..19e0f30ecfc 100644 --- a/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/lib/CodeGen/RenameIndependentSubregs.cpp @@ -35,7 +35,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 5e95f760aaa..627bc1946f3 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -19,11 +19,11 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index e2cb8cad6e1..b789e2d9c52 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -15,12 +15,12 @@ #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include <cassert> using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 491c56a7314..6e245feb735 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -63,6 +63,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -98,7 +99,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b736037d71d..696855f8018 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -19,6 +19,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" @@ -32,8 +34,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 7a123e3e92e..2760c6cbf86 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -21,12 +21,12 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index ff49134f7b9..356f2585046 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -40,7 +40,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index b42edf8e751..0d85bccdeac 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -849,13 +849,14 @@ static void transferDbgValues(SelectionDAG &DAG, SDValue From, SDValue To, break; DIVariable *Var = Dbg->getVariable(); - auto *Fragment = DIExpression::createFragmentExpression( - Dbg->getExpression(), OffsetInBits, To.getValueSizeInBits()); - SDDbgValue *Clone = - DAG.getDbgValue(Var, Fragment, ToNode, To.getResNo(), Dbg->isIndirect(), - Dbg->getDebugLoc(), Dbg->getOrder()); + if (auto Fragment = DIExpression::createFragmentExpression( + Dbg->getExpression(), OffsetInBits, To.getValueSizeInBits())) { + SDDbgValue *Clone = DAG.getDbgValue(Var, *Fragment, ToNode, To.getResNo(), + Dbg->isIndirect(), Dbg->getDebugLoc(), + Dbg->getOrder()); + ClonedDVs.push_back(Clone); + } Dbg->setIsInvalidated(); - ClonedDVs.push_back(Clone); } for (SDDbgValue *Dbg : ClonedDVs) diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 5d6c4998ecd..b55414b51b8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3844,7 +3844,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, } LdOps.push_back(L); - + LdOp = L; LdWidth -= NewVTWidth; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 13799409327..87dcf6e6950 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -18,12 +18,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 98202925629..24d9d376de2 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/InlineAsm.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" @@ -42,7 +43,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 7ddb0dc07fd..2e1abbe8bb2 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -23,11 +23,11 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 631cb34717c..c25315f2983 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -25,11 +25,11 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <climits> diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e5de280508b..2fb2615b072 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2893,11 +2893,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::FrameIndex: case ISD::TargetFrameIndex: - if (unsigned Align = InferPtrAlignment(Op)) { - // The low bits are known zero if the pointer is aligned. - Known.Zero.setLowBits(Log2_32(Align)); - break; - } + TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth); break; default: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ccc06fa3ee1..f4f8879b5d8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -55,6 +55,8 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Argument.h" @@ -98,8 +100,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -2585,7 +2585,7 @@ static bool isVectorReductionOp(const User *I) { case Instruction::FAdd: case Instruction::FMul: if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) - if (FPOp->getFastMathFlags().unsafeAlgebra()) + if (FPOp->getFastMathFlags().isFast()) break; LLVM_FALLTHROUGH; default: @@ -2631,7 +2631,7 @@ static bool isVectorReductionOp(const User *I) { if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) { if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) - if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra()) + if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast()) return false; UsersToVisit.push_back(U); } else if (const ShuffleVectorInst *ShufInst = @@ -2725,7 +2725,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { Flags.setNoInfs(FMF.noInfs()); Flags.setNoNaNs(FMF.noNaNs()); Flags.setNoSignedZeros(FMF.noSignedZeros()); - Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); + Flags.setUnsafeAlgebra(FMF.isFast()); SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, Flags); @@ -3862,7 +3862,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // // When the first GEP operand is a single pointer - it is the uniform base we // are looking for. If first operand of the GEP is a splat vector - we -// extract the spalt value and use it as a uniform base. +// extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { @@ -4828,12 +4828,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - // Ignore inlined function arguments here. - // - // FIXME: Should we be checking DL->inlinedAt() to determine this? - if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) - return false; - bool IsIndirect = false; Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. @@ -4873,11 +4867,13 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( for (unsigned E = I + RegCount; I != E; ++I) { // The vregs are guaranteed to be allocated in sequence. Op = MachineOperand::CreateReg(VMI->second + I, false); - auto *FragmentExpr = DIExpression::createFragmentExpression( + auto FragmentExpr = DIExpression::createFragmentExpression( Expr, Offset, RegisterSize); + if (!FragmentExpr) + continue; FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, - Op->getReg(), Variable, FragmentExpr)); + Op->getReg(), Variable, *FragmentExpr)); Offset += RegisterSize; } } @@ -7959,13 +7955,13 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, switch (Intrinsic) { case Intrinsic::experimental_vector_reduce_fadd: - if (FMF.unsafeAlgebra()) + if (FMF.isFast()) Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); break; case Intrinsic::experimental_vector_reduce_fmul: - if (FMF.unsafeAlgebra()) + if (FMF.isFast()) Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 1550347f006..1097fd92ede 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -38,7 +39,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 4c4d196427e..ae5eebd9545 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectionDAGISel.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/APInt.h" @@ -45,9 +46,9 @@ #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -80,7 +81,6 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index fe553bc986a..1f6fafb039e 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1288,6 +1288,19 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.resetAll(); } +void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex"); + + if (unsigned Align = DAG.InferPtrAlignment(Op)) { + // The low bits are known zero if the pointer is aligned. + Known.Zero.setLowBits(Log2_32(Align)); + } +} + /// This method can be implemented by targets that want to expose additional /// information about sign bits to the DAG Combiner. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp index 5fb6afee88a..3230aff5ed8 100644 --- a/lib/CodeGen/ShrinkWrap.cpp +++ b/lib/CodeGen/ShrinkWrap.cpp @@ -56,15 +56,17 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" @@ -73,8 +75,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 3656832a7f1..25a1c37b145 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -10,9 +10,9 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 1467179b7a3..7a9a65faaca 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/LaneBitmask.h" @@ -43,7 +44,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 8a47f3d2d6d..69614a55e14 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -28,12 +28,12 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp index bd3a20f936d..5b76e36382c 100644 --- a/lib/CodeGen/TailDuplicator.cpp +++ b/lib/CodeGen/TailDuplicator.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TailDuplicator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -27,14 +28,13 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSSAUpdater.h" -#include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> @@ -603,8 +603,8 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (PreRegAlloc && MI.isCall()) return false; - if (!MI.isPHI() && !MI.isDebugValue()) - InstrCount += 1; + if (!MI.isPHI() && !MI.isMetaInstruction()) + InstrCount += 1; if (InstrCount > MaxDuplicateCount) return false; diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index 9dd98b4020d..4d2130f5ebe 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -20,7 +20,7 @@ #include "llvm/IR/Function.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Compiler.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -104,3 +104,12 @@ unsigned TargetFrameLowering::getStackAlignmentSkew( return 0; } + +int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { + llvm_unreachable("getInitialCFAOffset() not implemented!"); +} + +unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF) + const { + llvm_unreachable("getInitialCFARegister() not implemented!"); +}
\ No newline at end of file diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index bac12efd639..702b91b7f77 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -19,6 +19,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" @@ -26,7 +27,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp index ed845e1706f..99ff4931e2f 100644 --- a/lib/CodeGen/TargetOptionsImpl.cpp +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -15,7 +15,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index c5101b1ecfc..59e88ba3bda 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -600,8 +600,14 @@ void TargetPassConfig::addIRPasses() { addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); } - if (getOptLevel() != CodeGenOpt::None && EnableMergeICmps) { - addPass(createMergeICmpsPass()); + if (getOptLevel() != CodeGenOpt::None) { + // The MergeICmpsPass tries to create memcmp calls by grouping sequences of + // loads and compares. ExpandMemCmpPass then tries to expand those calls + // into optimally-sized loads and compares. The transforms are enabled by a + // target lowering hook. + if (EnableMergeICmps) + addPass(createMergeICmpsPass()); + addPass(createExpandMemCmpPass()); } // Run GC lowering passes for builtin collectors diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 55318237e95..758fdabf5dd 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -27,7 +27,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index e1db9157f90..659d6f522af 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -16,13 +16,13 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp index 29cfd9fb178..d66272aed9b 100644 --- a/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/lib/CodeGen/TargetSubtargetInfo.cpp @@ -14,11 +14,11 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include <string> using namespace llvm; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index efd40b209e9..18f1485baca 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -46,6 +46,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Pass.h" @@ -54,7 +55,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index bdd25f29aea..5288ca67277 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Dominators.h" @@ -37,7 +38,6 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; static bool eliminateUnreachableBlock(Function &F) { diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 65c62b16719..d499c6c1e73 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -31,12 +31,12 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp index 2063ab11a74..fb621cab28b 100644 --- a/lib/CodeGen/XRayInstrumentation.cpp +++ b/lib/CodeGen/XRayInstrumentation.cpp @@ -23,10 +23,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp index a88dcfcf542..f593953c62f 100644 --- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp +++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp @@ -184,7 +184,7 @@ Optional<DWARFFormValue> DWARFAbbreviationDeclaration::getAttributeValue( FormValue.setSValue(Spec.getImplicitConstValue()); return FormValue; } - if (FormValue.extractValue(DebugInfoData, &Offset, &U)) + if (FormValue.extractValue(DebugInfoData, &Offset, U.getFormParams(), &U)) return FormValue; } // March Offset along until we get to the attribute we want. diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index dbe6fe52407..f04ec7706cd 100644 --- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -90,10 +90,11 @@ std::pair<uint32_t, dwarf::Tag> DWARFAcceleratorTable::readAtoms(uint32_t &HashDataOffset) { uint32_t DieOffset = dwarf::DW_INVALID_OFFSET; dwarf::Tag DieTag = dwarf::DW_TAG_null; + DWARFFormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32}; for (auto Atom : getAtomsDesc()) { DWARFFormValue FormValue(Atom.second); - FormValue.extractValue(AccelSection, &HashDataOffset, NULL); + FormValue.extractValue(AccelSection, &HashDataOffset, FormParams); switch (Atom.first) { case dwarf::DW_ATOM_die_offset: DieOffset = *FormValue.getAsUnsignedConstant(); @@ -145,6 +146,7 @@ LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const { uint32_t Offset = sizeof(Hdr) + Hdr.HeaderDataLength; unsigned HashesBase = Offset + Hdr.NumBuckets * 4; unsigned OffsetsBase = HashesBase + Hdr.NumHashes * 4; + DWARFFormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32}; for (unsigned Bucket = 0; Bucket < Hdr.NumBuckets; ++Bucket) { unsigned Index = AccelSection.getU32(&Offset); @@ -181,7 +183,7 @@ LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const { unsigned i = 0; for (auto &Atom : AtomForms) { OS << format("{Atom[%d]: ", i++); - if (Atom.extractValue(AccelSection, &DataOffset, nullptr)) + if (Atom.extractValue(AccelSection, &DataOffset, FormParams)) Atom.dump(OS); else OS << "Error extracting the value"; @@ -216,8 +218,10 @@ void DWARFAcceleratorTable::ValueIterator::Next() { NumData = 0; return; } + DWARFFormParams FormParams = {AccelTable->Hdr.Version, 0, + dwarf::DwarfFormat::DWARF32}; for (auto &Atom : AtomForms) - Atom.extractValue(AccelSection, &DataOffset, nullptr); + Atom.extractValue(AccelSection, &DataOffset, FormParams); ++Data; } diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index 24aa666fb81..881cd1dfd11 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -329,9 +329,9 @@ void DWARFContext::dump( // representation. OS << "debug_line[" << format("0x%8.8x", Offset) << "]\n"; if (DumpOpts.Verbose) { - LineTable.parse(lineData, &Offset, &OS); + LineTable.parse(lineData, &Offset, &*CU, &OS); } else { - LineTable.parse(lineData, &Offset); + LineTable.parse(lineData, &Offset, &*CU); LineTable.dump(OS); } } @@ -349,7 +349,7 @@ void DWARFContext::dump( DWARFDataExtractor lineData(*DObj, DObj->getLineDWOSection(), isLittleEndian(), savedAddressByteSize); DWARFDebugLine::LineTable LineTable; - while (LineTable.Prologue.parse(lineData, &stmtOffset)) { + while (LineTable.Prologue.parse(lineData, &stmtOffset, nullptr)) { LineTable.dump(OS); LineTable.clear(); } @@ -681,7 +681,7 @@ DWARFContext::getLineTableForUnit(DWARFUnit *U) { // We have to parse it first. DWARFDataExtractor lineData(*DObj, U->getLineSection(), isLittleEndian(), U->getAddressByteSize()); - return Line->getOrParseLineTable(lineData, stmtOffset); + return Line->getOrParseLineTable(lineData, stmtOffset, U); } void DWARFContext::parseCompileUnits() { diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index bd8dd0d0ede..c99c7a9277e 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -144,7 +144,7 @@ parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, static bool parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, uint64_t EndPrologueOffset, - const DWARFFormParams &FormParams, + const DWARFFormParams &FormParams, const DWARFUnit *U, std::vector<StringRef> &IncludeDirectories, std::vector<DWARFDebugLine::FileNameEntry> &FileNames) { // Get the directory entry description. @@ -162,7 +162,7 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, DWARFFormValue Value(Descriptor.Form); switch (Descriptor.Type) { case DW_LNCT_path: - if (!Value.extractValue(DebugLineData, OffsetPtr, nullptr)) + if (!Value.extractValue(DebugLineData, OffsetPtr, FormParams, U)) return false; IncludeDirectories.push_back(Value.getAsCString().getValue()); break; @@ -187,7 +187,7 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, DWARFDebugLine::FileNameEntry FileEntry; for (auto Descriptor : FileDescriptors) { DWARFFormValue Value(Descriptor.Form); - if (!Value.extractValue(DebugLineData, OffsetPtr, nullptr)) + if (!Value.extractValue(DebugLineData, OffsetPtr, FormParams, U)) return false; switch (Descriptor.Type) { case DW_LNCT_path: @@ -213,7 +213,7 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, } bool DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData, - uint32_t *OffsetPtr) { + uint32_t *OffsetPtr, const DWARFUnit *U) { const uint64_t PrologueOffset = *OffsetPtr; clear(); @@ -253,7 +253,8 @@ bool DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData, if (getVersion() >= 5) { if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, - getFormParams(), IncludeDirectories, FileNames)) { + getFormParams(), U, IncludeDirectories, + FileNames)) { fprintf(stderr, "warning: parsing line table prologue at 0x%8.8" PRIx64 " found an invalid directory or file table description at" @@ -382,24 +383,25 @@ DWARFDebugLine::getLineTable(uint32_t Offset) const { const DWARFDebugLine::LineTable * DWARFDebugLine::getOrParseLineTable(const DWARFDataExtractor &DebugLineData, - uint32_t Offset) { + uint32_t Offset, const DWARFUnit *U) { std::pair<LineTableIter, bool> Pos = LineTableMap.insert(LineTableMapTy::value_type(Offset, LineTable())); LineTable *LT = &Pos.first->second; if (Pos.second) { - if (!LT->parse(DebugLineData, &Offset)) + if (!LT->parse(DebugLineData, &Offset, U)) return nullptr; } return LT; } bool DWARFDebugLine::LineTable::parse(const DWARFDataExtractor &DebugLineData, - uint32_t *OffsetPtr, raw_ostream *OS) { + uint32_t *OffsetPtr, const DWARFUnit *U, + raw_ostream *OS) { const uint32_t DebugLineOffset = *OffsetPtr; clear(); - if (!Prologue.parse(DebugLineData, OffsetPtr)) { + if (!Prologue.parse(DebugLineData, OffsetPtr, U)) { // Restore our offset and return false to indicate failure! *OffsetPtr = DebugLineOffset; return false; diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp index d20eabff7f0..a579c06d02e 100644 --- a/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -208,7 +208,8 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, DWARFUnit *U = Die.getDwarfUnit(); DWARFFormValue formValue(Form); - if (!formValue.extractValue(U->getDebugInfoExtractor(), OffsetPtr, U)) + if (!formValue.extractValue(U->getDebugInfoExtractor(), OffsetPtr, + U->getFormParams(), U)) return; OS << "\t("; @@ -550,7 +551,7 @@ void DWARFDie::attribute_iterator::updateForIndex( auto U = Die.getDwarfUnit(); assert(U && "Die must have valid DWARF unit"); bool b = AttrValue.Value.extractValue(U->getDebugInfoExtractor(), - &ParseOffset, U); + &ParseOffset, U->getFormParams(), U); (void)b; assert(b && "extractValue cannot fail on fully parsed DWARF"); AttrValue.ByteSize = ParseOffset - AttrValue.Offset; diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp index d63e84ef4e7..c4abd49797b 100644 --- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -276,7 +276,8 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { } bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, - uint32_t *OffsetPtr, const DWARFUnit *CU) { + uint32_t *OffsetPtr, DWARFFormParams FP, + const DWARFUnit *CU) { U = CU; bool Indirect = false; bool IsBlock = false; @@ -288,10 +289,8 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, switch (Form) { case DW_FORM_addr: case DW_FORM_ref_addr: { - if (!U) - return false; - uint16_t Size = (Form == DW_FORM_addr) ? U->getAddressByteSize() - : U->getRefAddrByteSize(); + uint16_t Size = + (Form == DW_FORM_addr) ? FP.AddrSize : FP.getRefAddrByteSize(); Value.uval = Data.getRelocatedValue(Size, OffsetPtr, &Value.SectionIndex); break; } @@ -360,10 +359,8 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, case DW_FORM_GNU_strp_alt: case DW_FORM_line_strp: case DW_FORM_strp_sup: { - if (!U) - return false; Value.uval = - Data.getRelocatedValue(U->getDwarfOffsetByteSize(), OffsetPtr); + Data.getRelocatedValue(FP.getDwarfOffsetByteSize(), OffsetPtr); break; } case DW_FORM_flag_present: diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index dd8e2ac8b53..dee27c621fa 100644 --- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -176,9 +176,8 @@ Error PDBFileBuilder::commit(StringRef Filename) { uint64_t Filesize = Layout.SB->BlockSize * Layout.SB->NumBlocks; auto OutFileOrError = FileOutputBuffer::create(Filename, Filesize); - if (OutFileOrError.getError()) - return llvm::make_error<pdb::GenericError>(generic_error_code::invalid_path, - Filename); + if (auto E = OutFileOrError.takeError()) + return E; FileBufferByteStream Buffer(std::move(*OutFileOrError), llvm::support::little); BinaryStreamWriter Writer(Buffer); diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 3f41a1dc066..0fafe82404e 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -1108,10 +1108,12 @@ static void writeAtomicRMWOperation(raw_ostream &Out, static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { if (const FPMathOperator *FPO = dyn_cast<const FPMathOperator>(U)) { - // Unsafe algebra implies all the others, no need to write them all out - if (FPO->hasUnsafeAlgebra()) + // 'Fast' is an abbreviation for all fast-math-flags. + if (FPO->isFast()) Out << " fast"; else { + if (FPO->hasAllowReassoc()) + Out << " reassoc"; if (FPO->hasNoNaNs()) Out << " nnan"; if (FPO->hasNoInfs()) @@ -1122,6 +1124,8 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { Out << " arcp"; if (FPO->hasAllowContract()) Out << " contract"; + if (FPO->hasApproxFunc()) + Out << " afn"; } } diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 07d499bc193..2c9e9be3da5 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -78,6 +78,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name=="ssse3.pabs.d.128" || // Added in 6.0 Name.startswith("avx2.pabs.") || // Added in 6.0 Name.startswith("avx512.mask.pabs.") || // Added in 6.0 + Name.startswith("avx512.broadcastm") || // Added in 6.0 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0 Name.startswith("sse2.pcmpeq.") || // Added in 3.1 Name.startswith("sse2.pcmpgt.") || // Added in 3.1 @@ -1027,7 +1028,15 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT, CI->getArgOperand(0), CI->getArgOperand(1)); Rep = Builder.CreateSExt(Rep, CI->getType(), ""); - } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){ + } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) { + Type *ExtTy = Type::getInt32Ty(C); + if (CI->getOperand(0)->getType()->isIntegerTy(8)) + ExtTy = Type::getInt64Ty(C); + unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / + ExtTy->getPrimitiveSizeInBits(); + Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy); + Rep = Builder.CreateVectorSplat(NumElts, Rep); + } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))) { unsigned NumElts = CI->getArgOperand(1)->getType()->getVectorNumElements(); Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp index 2b780adf6c6..22513924a96 100644 --- a/lib/IR/BasicBlock.cpp +++ b/lib/IR/BasicBlock.cpp @@ -447,3 +447,16 @@ bool BasicBlock::isLandingPad() const { const LandingPadInst *BasicBlock::getLandingPadInst() const { return dyn_cast<LandingPadInst>(getFirstNonPHI()); } + +Optional<uint64_t> BasicBlock::getIrrLoopHeaderWeight() const { + const TerminatorInst *TI = getTerminator(); + if (MDNode *MDIrrLoopHeader = + TI->getMetadata(LLVMContext::MD_irr_loop)) { + MDString *MDName = cast<MDString>(MDIrrLoopHeader->getOperand(0)); + if (MDName->getString().equals("loop_header_weight")) { + auto *CI = mdconst::extract<ConstantInt>(MDIrrLoopHeader->getOperand(1)); + return Optional<uint64_t>(CI->getValue().getZExtValue()); + } + } + return Optional<uint64_t>(); +} diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt index eb4b9143090..17822bbbb5c 100644 --- a/lib/IR/CMakeLists.txt +++ b/lib/IR/CMakeLists.txt @@ -22,7 +22,6 @@ add_llvm_library(LLVMCore DiagnosticPrinter.cpp Dominators.cpp Function.cpp - GCOV.cpp GVMaterializer.cpp Globals.cpp IRBuilder.cpp diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index b34383f5ada..df0c52d4463 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -676,25 +676,7 @@ unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) { void Instruction::applyMergedLocation(const DILocation *LocA, const DILocation *LocB) { - if (LocA && LocB && (LocA == LocB || !LocA->canDiscriminate(*LocB))) { - setDebugLoc(LocA); - return; - } - if (!LocA || !LocB || !isa<CallInst>(this)) { - setDebugLoc(nullptr); - return; - } - SmallPtrSet<DILocation *, 5> InlinedLocationsA; - for (DILocation *L = LocA->getInlinedAt(); L; L = L->getInlinedAt()) - InlinedLocationsA.insert(L); - const DILocation *Result = LocB; - for (DILocation *L = LocB->getInlinedAt(); L; L = L->getInlinedAt()) { - Result = L; - if (InlinedLocationsA.count(L)) - break; - } - setDebugLoc(DILocation::get( - Result->getContext(), 0, 0, Result->getScope(), Result->getInlinedAt())); + setDebugLoc(DILocation::getMergedLocation(LocA, LocB, this)); } //===----------------------------------------------------------------------===// diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp index 645bba1652d..ae02392ea14 100644 --- a/lib/IR/DebugInfoMetadata.cpp +++ b/lib/IR/DebugInfoMetadata.cpp @@ -14,9 +14,11 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "LLVMContextImpl.h" #include "MetadataImpl.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" using namespace llvm; @@ -66,6 +68,31 @@ DILocation *DILocation::getImpl(LLVMContext &Context, unsigned Line, Storage, Context.pImpl->DILocations); } +const DILocation * +DILocation::getMergedLocation(const DILocation *LocA, const DILocation *LocB, + const Instruction *ForInst) { + if (!LocA || !LocB) + return nullptr; + + if (LocA == LocB || !LocA->canDiscriminate(*LocB)) + return LocA; + + if (!dyn_cast_or_null<CallInst>(ForInst)) + return nullptr; + + SmallPtrSet<DILocation *, 5> InlinedLocationsA; + for (DILocation *L = LocA->getInlinedAt(); L; L = L->getInlinedAt()) + InlinedLocationsA.insert(L); + const DILocation *Result = LocB; + for (DILocation *L = LocB->getInlinedAt(); L; L = L->getInlinedAt()) { + Result = L; + if (InlinedLocationsA.count(L)) + break; + } + return DILocation::get(Result->getContext(), 0, 0, Result->getScope(), + Result->getInlinedAt()); +} + DINode::DIFlags DINode::getFlag(StringRef Flag) { return StringSwitch<DIFlags>(Flag) #define HANDLE_DI_FLAG(ID, NAME) .Case("DIFlag" #NAME, Flag##NAME) @@ -726,14 +753,23 @@ DIExpression *DIExpression::prepend(const DIExpression *Expr, bool Deref, return DIExpression::get(Expr->getContext(), Ops); } -DIExpression *DIExpression::createFragmentExpression(const DIExpression *Expr, - unsigned OffsetInBits, - unsigned SizeInBits) { +Optional<DIExpression *> DIExpression::createFragmentExpression( + const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits) { SmallVector<uint64_t, 8> Ops; // Copy over the expression, but leave off any trailing DW_OP_LLVM_fragment. if (Expr) { for (auto Op : Expr->expr_ops()) { - if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) { + switch (Op.getOp()) { + default: break; + case dwarf::DW_OP_plus: + case dwarf::DW_OP_minus: + // We can't safely split arithmetic into multiple fragments because we + // can't express carry-over between fragments. + // + // FIXME: We *could* preserve the lowest fragment of a constant offset + // operation if the offset fits into SizeInBits. + return None; + case dwarf::DW_OP_LLVM_fragment: { // Make the new offset point into the existing fragment. uint64_t FragmentOffsetInBits = Op.getArg(0); // Op.getArg(0) is FragmentOffsetInBits. @@ -741,7 +777,8 @@ DIExpression *DIExpression::createFragmentExpression(const DIExpression *Expr, assert((OffsetInBits + SizeInBits <= Op.getArg(0) + Op.getArg(1)) && "new fragment outside of original fragment"); OffsetInBits += FragmentOffsetInBits; - break; + continue; + } } Ops.push_back(Op.getOp()); for (unsigned I = 0; I < Op.getNumArgs(); ++I) diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index ceb521c4c48..ffc3a30e6a1 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -146,9 +146,14 @@ bool Instruction::isExact() const { return cast<PossiblyExactOperator>(this)->isExact(); } -void Instruction::setHasUnsafeAlgebra(bool B) { +void Instruction::setFast(bool B) { assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op"); - cast<FPMathOperator>(this)->setHasUnsafeAlgebra(B); + cast<FPMathOperator>(this)->setFast(B); +} + +void Instruction::setHasAllowReassoc(bool B) { + assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op"); + cast<FPMathOperator>(this)->setHasAllowReassoc(B); } void Instruction::setHasNoNaNs(bool B) { @@ -171,6 +176,11 @@ void Instruction::setHasAllowReciprocal(bool B) { cast<FPMathOperator>(this)->setHasAllowReciprocal(B); } +void Instruction::setHasApproxFunc(bool B) { + assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op"); + cast<FPMathOperator>(this)->setHasApproxFunc(B); +} + void Instruction::setFastMathFlags(FastMathFlags FMF) { assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op"); cast<FPMathOperator>(this)->setFastMathFlags(FMF); @@ -181,9 +191,14 @@ void Instruction::copyFastMathFlags(FastMathFlags FMF) { cast<FPMathOperator>(this)->copyFastMathFlags(FMF); } -bool Instruction::hasUnsafeAlgebra() const { +bool Instruction::isFast() const { assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op"); - return cast<FPMathOperator>(this)->hasUnsafeAlgebra(); + return cast<FPMathOperator>(this)->isFast(); +} + +bool Instruction::hasAllowReassoc() const { + assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op"); + return cast<FPMathOperator>(this)->hasAllowReassoc(); } bool Instruction::hasNoNaNs() const { @@ -211,6 +226,11 @@ bool Instruction::hasAllowContract() const { return cast<FPMathOperator>(this)->hasAllowContract(); } +bool Instruction::hasApproxFunc() const { + assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op"); + return cast<FPMathOperator>(this)->hasApproxFunc(); +} + FastMathFlags Instruction::getFastMathFlags() const { assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op"); return cast<FPMathOperator>(this)->getFastMathFlags(); @@ -579,7 +599,7 @@ bool Instruction::isAssociative() const { switch (Opcode) { case FMul: case FAdd: - return cast<FPMathOperator>(this)->hasUnsafeAlgebra(); + return cast<FPMathOperator>(this)->isFast(); default: return false; } diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index a94da5452b8..c8b7c10a9a4 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -60,6 +60,7 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { {MD_absolute_symbol, "absolute_symbol"}, {MD_associated, "associated"}, {MD_callees, "callees"}, + {MD_irr_loop, "irr_loop"}, }; for (auto &MDKind : MDKinds) { diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp index 54783e884e9..d8e64db7c5d 100644 --- a/lib/IR/MDBuilder.cpp +++ b/lib/IR/MDBuilder.cpp @@ -197,3 +197,10 @@ MDNode *MDBuilder::createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType, } return MDNode::get(Context, {BaseType, AccessType, createConstant(Off)}); } + +MDNode *MDBuilder::createIrrLoopHeaderWeight(uint64_t Weight) { + SmallVector<Metadata *, 2> Vals(2); + Vals[0] = createString("loop_header_weight"); + Vals[1] = createConstant(ConstantInt::get(Type::getInt64Ty(Context), Weight)); + return MDNode::get(Context, Vals); +} diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index 51a7d424c1f..5df0c6d81cf 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -454,6 +454,28 @@ void Value::replaceUsesOutsideBlock(Value *New, BasicBlock *BB) { } } +void Value::replaceUsesExceptBlockAddr(Value *New) { + use_iterator UI = use_begin(), E = use_end(); + for (; UI != E;) { + Use &U = *UI; + ++UI; + + if (isa<BlockAddress>(U.getUser())) + continue; + + // Must handle Constants specially, we cannot call replaceUsesOfWith on a + // constant because they are uniqued. + if (auto *C = dyn_cast<Constant>(U.getUser())) { + if (!isa<GlobalValue>(C)) { + C->handleOperandChange(this, New); + continue; + } + } + + U.set(New); + } +} + namespace { // Various metrics for how much to strip off of pointers. enum PointerStripKind { diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index c528f7167e7..5bb1f84d2e5 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -115,8 +115,6 @@ using namespace llvm; -static cl::opt<bool> VerifyDebugInfo("verify-debug-info", cl::init(true)); - namespace llvm { struct VerifierSupport { diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp index 017dd201f9c..9c737795b5a 100644 --- a/lib/LTO/LTO.cpp +++ b/lib/LTO/LTO.cpp @@ -630,6 +630,9 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, NonPrevailingComdats.insert(GV->getComdat()); cast<GlobalObject>(GV)->setComdat(nullptr); } + + // Set the 'local' flag based on the linker resolution for this symbol. + GV->setDSOLocal(Res.FinalDefinitionInLinkageUnit); } // Common resolution: collect the maximum size/alignment over all commons. // We also record if we see an instance of a common as prevailing, so that @@ -643,7 +646,6 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, CommonRes.Prevailing |= Res.Prevailing; } - // FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit. } if (!M.getComdatSymbolTable().empty()) for (GlobalValue &GV : M.global_values()) @@ -698,10 +700,10 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, assert(ResI != ResE); SymbolResolution Res = *ResI++; - if (Res.Prevailing) { - if (!Sym.getIRName().empty()) { - auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier( - Sym.getIRName(), GlobalValue::ExternalLinkage, "")); + if (!Sym.getIRName().empty()) { + auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier( + Sym.getIRName(), GlobalValue::ExternalLinkage, "")); + if (Res.Prevailing) { ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier(); // For linker redefined symbols (via --wrap or --defsym) we want to @@ -713,6 +715,15 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, GUID, BM.getModuleIdentifier())) S->setLinkage(GlobalValue::WeakAnyLinkage); } + + // If the linker resolved the symbol to a local definition then mark it + // as local in the summary for the module we are adding. + if (Res.FinalDefinitionInLinkageUnit) { + if (auto S = ThinLTO.CombinedIndex.findSummaryInModule( + GUID, BM.getModuleIdentifier())) { + S->setDSOLocal(true); + } + } } } diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 9759c0c6c1d..87867c54fad 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -469,17 +469,15 @@ void LTOCodeGenerator::restoreLinkageForExternals() { if (I == ExternalSymbols.end()) return; - GV.setLinkage(I->second); - }; - - std::for_each(MergedModule->begin(), MergedModule->end(), externalize); - std::for_each(MergedModule->global_begin(), MergedModule->global_end(), - externalize); - std::for_each(MergedModule->alias_begin(), MergedModule->alias_end(), - externalize); -} - -void LTOCodeGenerator::verifyMergedModuleOnce() { + GV.setLinkage(I->second);
+ };
+
+ llvm::for_each(MergedModule->functions(), externalize);
+ llvm::for_each(MergedModule->globals(), externalize);
+ llvm::for_each(MergedModule->aliases(), externalize);
+}
+
+void LTOCodeGenerator::verifyMergedModuleOnce() {
// Only run on the first call. if (HasVerifiedInput) return; diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 29b14414ea2..9a23e614f3a 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -290,7 +290,7 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, case MCFragment::FT_Padding: return cast<MCPaddingFragment>(F).getSize(); - case MCFragment::FT_SafeSEH: + case MCFragment::FT_SymbolId: return 4; case MCFragment::FT_Align: { @@ -563,8 +563,8 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, break; } - case MCFragment::FT_SafeSEH: { - const MCSafeSEHFragment &SF = cast<MCSafeSEHFragment>(F); + case MCFragment::FT_SymbolId: { + const MCSymbolIdFragment &SF = cast<MCSymbolIdFragment>(F); OW->write32(SF.getSymbol()->getIndex()); break; } diff --git a/lib/MC/MCFragment.cpp b/lib/MC/MCFragment.cpp index 94839de14f8..1aed50aaeb7 100644 --- a/lib/MC/MCFragment.cpp +++ b/lib/MC/MCFragment.cpp @@ -281,8 +281,8 @@ void MCFragment::destroy() { case FT_Padding: delete cast<MCPaddingFragment>(this); return; - case FT_SafeSEH: - delete cast<MCSafeSEHFragment>(this); + case FT_SymbolId: + delete cast<MCSymbolIdFragment>(this); return; case FT_CVInlineLines: delete cast<MCCVInlineLineTableFragment>(this); @@ -326,7 +326,7 @@ LLVM_DUMP_METHOD void MCFragment::dump() const { case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break; case MCFragment::FT_LEB: OS << "MCLEBFragment"; break; case MCFragment::FT_Padding: OS << "MCPaddingFragment"; break; - case MCFragment::FT_SafeSEH: OS << "MCSafeSEHFragment"; break; + case MCFragment::FT_SymbolId: OS << "MCSymbolIdFragment"; break; case MCFragment::FT_CVInlineLines: OS << "MCCVInlineLineTableFragment"; break; case MCFragment::FT_CVDefRange: OS << "MCCVDefRangeTableFragment"; break; case MCFragment::FT_Dummy: OS << "MCDummyFragment"; break; @@ -436,8 +436,8 @@ LLVM_DUMP_METHOD void MCFragment::dump() const { OS << "\n "; break; } - case MCFragment::FT_SafeSEH: { - const MCSafeSEHFragment *F = cast<MCSafeSEHFragment>(this); + case MCFragment::FT_SymbolId: { + const MCSymbolIdFragment *F = cast<MCSymbolIdFragment>(this); OS << "\n "; OS << " Sym:" << F->getSymbol(); break; diff --git a/lib/MC/MCWinCOFFStreamer.cpp b/lib/MC/MCWinCOFFStreamer.cpp index 7e0533b8e00..c2583d95c5e 100644 --- a/lib/MC/MCWinCOFFStreamer.cpp +++ b/lib/MC/MCWinCOFFStreamer.cpp @@ -182,7 +182,7 @@ void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { if (SXData->getAlignment() < 4) SXData->setAlignment(4); - new MCSafeSEHFragment(Symbol, SXData); + new MCSymbolIdFragment(Symbol, SXData); getAssembler().registerSymbol(*Symbol); CSymbol->setIsSafeSEH(); diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp index 919e2676802..63f5082c29d 100644 --- a/lib/Object/ArchiveWriter.cpp +++ b/lib/Object/ArchiveWriter.cpp @@ -122,11 +122,11 @@ static void printWithSpacePadding(raw_ostream &OS, T Data, unsigned Size) { static bool isBSDLike(object::Archive::Kind Kind) { switch (Kind) { case object::Archive::K_GNU: + case object::Archive::K_GNU64: return false; case object::Archive::K_BSD: case object::Archive::K_DARWIN: return true; - case object::Archive::K_GNU64: case object::Archive::K_DARWIN64: case object::Archive::K_COFF: break; @@ -134,8 +134,8 @@ static bool isBSDLike(object::Archive::Kind Kind) { llvm_unreachable("not supported for writting"); } -static void print32(raw_ostream &Out, object::Archive::Kind Kind, - uint32_t Val) { +template <class T> +static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { if (isBSDLike(Kind)) support::endian::Writer<support::little>(Out).write(Val); else @@ -216,6 +216,20 @@ static std::string computeRelativePath(StringRef From, StringRef To) { return Relative.str(); } +static bool is64BitKind(object::Archive::Kind Kind) { + switch (Kind) { + case object::Archive::K_GNU: + case object::Archive::K_BSD: + case object::Archive::K_DARWIN: + case object::Archive::K_COFF: + return false; + case object::Archive::K_DARWIN64: + case object::Archive::K_GNU64: + return true; + } + llvm_unreachable("not supported for writting"); +} + static void addToStringTable(raw_ostream &Out, StringRef ArcName, const NewArchiveMember &M, bool Thin) { StringRef ID = M.Buf->getBufferIdentifier(); @@ -288,6 +302,14 @@ static bool isArchiveSymbol(const object::BasicSymbolRef &S) { return true; } +static void printNBits(raw_ostream &Out, object::Archive::Kind Kind, + uint64_t Val) { + if (is64BitKind(Kind)) + print<uint64_t>(Out, Kind, Val); + else + print<uint32_t>(Out, Kind, Val); +} + static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, bool Deterministic, ArrayRef<MemberData> Members, StringRef StringTable) { @@ -299,9 +321,11 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, NumSyms += M.Symbols.size(); unsigned Size = 0; - Size += 4; // Number of entries + Size += is64BitKind(Kind) ? 8 : 4; // Number of entries if (isBSDLike(Kind)) Size += NumSyms * 8; // Table + else if (is64BitKind(Kind)) + Size += NumSyms * 8; // Table else Size += NumSyms * 4; // Table if (isBSDLike(Kind)) @@ -318,27 +342,30 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, if (isBSDLike(Kind)) printBSDMemberHeader(Out, Out.tell(), "__.SYMDEF", now(Deterministic), 0, 0, 0, Size); + else if (is64BitKind(Kind)) + printGNUSmallMemberHeader(Out, "/SYM64", now(Deterministic), 0, 0, 0, Size); else printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, Size); uint64_t Pos = Out.tell() + Size; if (isBSDLike(Kind)) - print32(Out, Kind, NumSyms * 8); + print<uint32_t>(Out, Kind, NumSyms * 8); else - print32(Out, Kind, NumSyms); + printNBits(Out, Kind, NumSyms); for (const MemberData &M : Members) { for (unsigned StringOffset : M.Symbols) { if (isBSDLike(Kind)) - print32(Out, Kind, StringOffset); - print32(Out, Kind, Pos); // member offset + print<uint32_t>(Out, Kind, StringOffset); + printNBits(Out, Kind, Pos); // member offset } Pos += M.Header.size() + M.Data.size() + M.Padding.size(); } if (isBSDLike(Kind)) - print32(Out, Kind, StringTable.size()); // byte count of the string table + // byte count of the string table + print<uint32_t>(Out, Kind, StringTable.size()); Out << StringTable; while (Pad--) @@ -442,6 +469,25 @@ Error llvm::writeArchive(StringRef ArcName, if (!StringTableBuf.empty()) Data.insert(Data.begin(), computeStringTable(StringTableBuf)); + // We would like to detect if we need to switch to a 64-bit symbol table. + if (WriteSymtab) { + uint64_t MaxOffset = 0; + uint64_t LastOffset = MaxOffset; + for (const auto& M : Data) { + // Record the start of the member's offset + LastOffset = MaxOffset; + // Account for the size of each part associated with the member. + MaxOffset += M.Header.size() + M.Data.size() + M.Padding.size(); + // We assume 32-bit symbols to see if 32-bit symbols are possible or not. + MaxOffset += M.Symbols.size() * 4; + } + // If LastOffset isn't going to fit in a 32-bit varible we need to switch + // to 64-bit. Note that the file can be larger than 4GB as long as the last + // member starts before the 4GB offset. + if (LastOffset >> 32 != 0) + Kind = object::Archive::K_GNU64; + } + SmallString<128> TmpArchive; int TmpArchiveFD; if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a", diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp index ef8c844a66f..c72a1258c1e 100644 --- a/lib/Object/ELF.cpp +++ b/lib/Object/ELF.cpp @@ -215,269 +215,6 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { } template <class ELFT> -Expected<uint32_t> -ELFFile<ELFT>::getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms, - ArrayRef<Elf_Word> ShndxTable) const { - uint32_t Index = Sym->st_shndx; - if (Index == ELF::SHN_XINDEX) { - auto ErrorOrIndex = getExtendedSymbolTableIndex<ELFT>( - Sym, Syms.begin(), ShndxTable); - if (!ErrorOrIndex) - return ErrorOrIndex.takeError(); - return *ErrorOrIndex; - } - if (Index == ELF::SHN_UNDEF || Index >= ELF::SHN_LORESERVE) - return 0; - return Index; -} - -template <class ELFT> -Expected<const typename ELFT::Shdr *> -ELFFile<ELFT>::getSection(const Elf_Sym *Sym, const Elf_Shdr *SymTab, - ArrayRef<Elf_Word> ShndxTable) const { - auto SymsOrErr = symbols(SymTab); - if (!SymsOrErr) - return SymsOrErr.takeError(); - return getSection(Sym, *SymsOrErr, ShndxTable); -} - -template <class ELFT> -Expected<const typename ELFT::Shdr *> -ELFFile<ELFT>::getSection(const Elf_Sym *Sym, Elf_Sym_Range Symbols, - ArrayRef<Elf_Word> ShndxTable) const { - auto IndexOrErr = getSectionIndex(Sym, Symbols, ShndxTable); - if (!IndexOrErr) - return IndexOrErr.takeError(); - uint32_t Index = *IndexOrErr; - if (Index == 0) - return nullptr; - return getSection(Index); -} - -template <class ELFT> -Expected<const typename ELFT::Sym *> -ELFFile<ELFT>::getSymbol(const Elf_Shdr *Sec, uint32_t Index) const { - auto SymtabOrErr = symbols(Sec); - if (!SymtabOrErr) - return SymtabOrErr.takeError(); - return object::getSymbol<ELFT>(*SymtabOrErr, Index); -} - -template <class ELFT> -Expected<ArrayRef<uint8_t>> -ELFFile<ELFT>::getSectionContents(const Elf_Shdr *Sec) const { - return getSectionContentsAsArray<uint8_t>(Sec); -} - -template <class ELFT> -StringRef ELFFile<ELFT>::getRelocationTypeName(uint32_t Type) const { - return getELFRelocationTypeName(getHeader()->e_machine, Type); -} - -template <class ELFT> -void ELFFile<ELFT>::getRelocationTypeName(uint32_t Type, - SmallVectorImpl<char> &Result) const { - if (!isMipsELF64()) { - StringRef Name = getRelocationTypeName(Type); - Result.append(Name.begin(), Name.end()); - } else { - // The Mips N64 ABI allows up to three operations to be specified per - // relocation record. Unfortunately there's no easy way to test for the - // presence of N64 ELFs as they have no special flag that identifies them - // as being N64. We can safely assume at the moment that all Mips - // ELFCLASS64 ELFs are N64. New Mips64 ABIs should provide enough - // information to disambiguate between old vs new ABIs. - uint8_t Type1 = (Type >> 0) & 0xFF; - uint8_t Type2 = (Type >> 8) & 0xFF; - uint8_t Type3 = (Type >> 16) & 0xFF; - - // Concat all three relocation type names. - StringRef Name = getRelocationTypeName(Type1); - Result.append(Name.begin(), Name.end()); - - Name = getRelocationTypeName(Type2); - Result.append(1, '/'); - Result.append(Name.begin(), Name.end()); - - Name = getRelocationTypeName(Type3); - Result.append(1, '/'); - Result.append(Name.begin(), Name.end()); - } -} - -template <class ELFT> -Expected<const typename ELFT::Sym *> -ELFFile<ELFT>::getRelocationSymbol(const Elf_Rel *Rel, - const Elf_Shdr *SymTab) const { - uint32_t Index = Rel->getSymbol(isMips64EL()); - if (Index == 0) - return nullptr; - return getEntry<Elf_Sym>(SymTab, Index); -} - -template <class ELFT> -Expected<StringRef> -ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections) const { - uint32_t Index = getHeader()->e_shstrndx; - if (Index == ELF::SHN_XINDEX) - Index = Sections[0].sh_link; - - if (!Index) // no section string table. - return ""; - if (Index >= Sections.size()) - return createError("invalid section index"); - return getStringTable(&Sections[Index]); -} - -template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {} - -template <class ELFT> -Expected<ELFFile<ELFT>> ELFFile<ELFT>::create(StringRef Object) { - if (sizeof(Elf_Ehdr) > Object.size()) - return createError("Invalid buffer"); - return ELFFile(Object); -} - -template <class ELFT> -Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const { - const uintX_t SectionTableOffset = getHeader()->e_shoff; - if (SectionTableOffset == 0) - return ArrayRef<Elf_Shdr>(); - - if (getHeader()->e_shentsize != sizeof(Elf_Shdr)) - return createError( - "invalid section header entry size (e_shentsize) in ELF header"); - - const uint64_t FileSize = Buf.size(); - - if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize) - return createError("section header table goes past the end of the file"); - - // Invalid address alignment of section headers - if (SectionTableOffset & (alignof(Elf_Shdr) - 1)) - return createError("invalid alignment of section headers"); - - const Elf_Shdr *First = - reinterpret_cast<const Elf_Shdr *>(base() + SectionTableOffset); - - uintX_t NumSections = getHeader()->e_shnum; - if (NumSections == 0) - NumSections = First->sh_size; - - if (NumSections > UINT64_MAX / sizeof(Elf_Shdr)) - return createError("section table goes past the end of file"); - - const uint64_t SectionTableSize = NumSections * sizeof(Elf_Shdr); - - // Section table goes past end of file! - if (SectionTableOffset + SectionTableSize > FileSize) - return createError("section table goes past the end of file"); - - return makeArrayRef(First, NumSections); -} - -template <class ELFT> -Expected<const typename ELFT::Shdr *> -ELFFile<ELFT>::getSection(uint32_t Index) const { - auto TableOrErr = sections(); - if (!TableOrErr) - return TableOrErr.takeError(); - return object::getSection<ELFT>(*TableOrErr, Index); -} - -template <class ELFT> -Expected<StringRef> -ELFFile<ELFT>::getStringTable(const Elf_Shdr *Section) const { - if (Section->sh_type != ELF::SHT_STRTAB) - return createError("invalid sh_type for string table, expected SHT_STRTAB"); - auto V = getSectionContentsAsArray<char>(Section); - if (!V) - return V.takeError(); - ArrayRef<char> Data = *V; - if (Data.empty()) - return createError("empty string table"); - if (Data.back() != '\0') - return createError("string table non-null terminated"); - return StringRef(Data.begin(), Data.size()); -} - -template <class ELFT> -Expected<ArrayRef<typename ELFT::Word>> -ELFFile<ELFT>::getSHNDXTable(const Elf_Shdr &Section) const { - auto SectionsOrErr = sections(); - if (!SectionsOrErr) - return SectionsOrErr.takeError(); - return getSHNDXTable(Section, *SectionsOrErr); -} - -template <class ELFT> -Expected<ArrayRef<typename ELFT::Word>> -ELFFile<ELFT>::getSHNDXTable(const Elf_Shdr &Section, - Elf_Shdr_Range Sections) const { - assert(Section.sh_type == ELF::SHT_SYMTAB_SHNDX); - auto VOrErr = getSectionContentsAsArray<Elf_Word>(&Section); - if (!VOrErr) - return VOrErr.takeError(); - ArrayRef<Elf_Word> V = *VOrErr; - auto SymTableOrErr = object::getSection<ELFT>(Sections, Section.sh_link); - if (!SymTableOrErr) - return SymTableOrErr.takeError(); - const Elf_Shdr &SymTable = **SymTableOrErr; - if (SymTable.sh_type != ELF::SHT_SYMTAB && - SymTable.sh_type != ELF::SHT_DYNSYM) - return createError("invalid sh_type"); - if (V.size() != (SymTable.sh_size / sizeof(Elf_Sym))) - return createError("invalid section contents size"); - return V; -} - -template <class ELFT> -Expected<StringRef> -ELFFile<ELFT>::getStringTableForSymtab(const Elf_Shdr &Sec) const { - auto SectionsOrErr = sections(); - if (!SectionsOrErr) - return SectionsOrErr.takeError(); - return getStringTableForSymtab(Sec, *SectionsOrErr); -} - -template <class ELFT> -Expected<StringRef> -ELFFile<ELFT>::getStringTableForSymtab(const Elf_Shdr &Sec, - Elf_Shdr_Range Sections) const { - - if (Sec.sh_type != ELF::SHT_SYMTAB && Sec.sh_type != ELF::SHT_DYNSYM) - return createError( - "invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM"); - auto SectionOrErr = object::getSection<ELFT>(Sections, Sec.sh_link); - if (!SectionOrErr) - return SectionOrErr.takeError(); - return getStringTable(*SectionOrErr); -} - -template <class ELFT> -Expected<StringRef> -ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section) const { - auto SectionsOrErr = sections(); - if (!SectionsOrErr) - return SectionsOrErr.takeError(); - auto Table = getSectionStringTable(*SectionsOrErr); - if (!Table) - return Table.takeError(); - return getSectionName(Section, *Table); -} - -template <class ELFT> -Expected<StringRef> ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section, - StringRef DotShstrtab) const { - uint32_t Offset = Section->sh_name; - if (Offset == 0) - return StringRef(); - if (Offset >= DotShstrtab.size()) - return createError("invalid string offset"); - return StringRef(DotShstrtab.data() + Offset); -} - -template <class ELFT> Expected<std::vector<typename ELFT::Rela>> ELFFile<ELFT>::android_relas(const Elf_Shdr *Sec) const { // This function reads relocations in Android's packed relocation format, diff --git a/lib/ObjectYAML/COFFYAML.cpp b/lib/ObjectYAML/COFFYAML.cpp index 1103159fc98..056a1aa3ca1 100644 --- a/lib/ObjectYAML/COFFYAML.cpp +++ b/lib/ObjectYAML/COFFYAML.cpp @@ -178,6 +178,46 @@ void ScalarEnumerationTraits<COFF::RelocationTypeAMD64>::enumeration( ECase(IMAGE_REL_AMD64_SSPAN32); } +void ScalarEnumerationTraits<COFF::RelocationTypesARM>::enumeration( + IO &IO, COFF::RelocationTypesARM &Value) { + ECase(IMAGE_REL_ARM_ABSOLUTE); + ECase(IMAGE_REL_ARM_ADDR32); + ECase(IMAGE_REL_ARM_ADDR32NB); + ECase(IMAGE_REL_ARM_BRANCH24); + ECase(IMAGE_REL_ARM_BRANCH11); + ECase(IMAGE_REL_ARM_TOKEN); + ECase(IMAGE_REL_ARM_BLX24); + ECase(IMAGE_REL_ARM_BLX11); + ECase(IMAGE_REL_ARM_SECTION); + ECase(IMAGE_REL_ARM_SECREL); + ECase(IMAGE_REL_ARM_MOV32A); + ECase(IMAGE_REL_ARM_MOV32T); + ECase(IMAGE_REL_ARM_BRANCH20T); + ECase(IMAGE_REL_ARM_BRANCH24T); + ECase(IMAGE_REL_ARM_BLX23T); +} + +void ScalarEnumerationTraits<COFF::RelocationTypesARM64>::enumeration( + IO &IO, COFF::RelocationTypesARM64 &Value) { + ECase(IMAGE_REL_ARM64_ABSOLUTE); + ECase(IMAGE_REL_ARM64_ADDR32); + ECase(IMAGE_REL_ARM64_ADDR32NB); + ECase(IMAGE_REL_ARM64_BRANCH26); + ECase(IMAGE_REL_ARM64_PAGEBASE_REL21); + ECase(IMAGE_REL_ARM64_REL21); + ECase(IMAGE_REL_ARM64_PAGEOFFSET_12A); + ECase(IMAGE_REL_ARM64_PAGEOFFSET_12L); + ECase(IMAGE_REL_ARM64_SECREL); + ECase(IMAGE_REL_ARM64_SECREL_LOW12A); + ECase(IMAGE_REL_ARM64_SECREL_HIGH12A); + ECase(IMAGE_REL_ARM64_SECREL_LOW12L); + ECase(IMAGE_REL_ARM64_TOKEN); + ECase(IMAGE_REL_ARM64_SECTION); + ECase(IMAGE_REL_ARM64_ADDR64); + ECase(IMAGE_REL_ARM64_BRANCH19); + ECase(IMAGE_REL_ARM64_BRANCH14); +} + void ScalarEnumerationTraits<COFF::WindowsSubsystem>::enumeration( IO &IO, COFF::WindowsSubsystem &Value) { ECase(IMAGE_SUBSYSTEM_UNKNOWN); @@ -378,6 +418,14 @@ void MappingTraits<COFFYAML::Relocation>::mapping(IO &IO, MappingNormalization<NType<COFF::RelocationTypeAMD64>, uint16_t> NT( IO, Rel.Type); IO.mapRequired("Type", NT->Type); + } else if (H.Machine == COFF::IMAGE_FILE_MACHINE_ARMNT) { + MappingNormalization<NType<COFF::RelocationTypesARM>, uint16_t> NT( + IO, Rel.Type); + IO.mapRequired("Type", NT->Type); + } else if (H.Machine == COFF::IMAGE_FILE_MACHINE_ARM64) { + MappingNormalization<NType<COFF::RelocationTypesARM64>, uint16_t> NT( + IO, Rel.Type); + IO.mapRequired("Type", NT->Type); } else { IO.mapRequired("Type", Rel.Type); } diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index 8796ff56e5e..9abbdba26cb 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -89,6 +89,7 @@ #include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" #include "llvm/Transforms/Scalar/BDCE.h" +#include "llvm/Transforms/Scalar/CallSiteSplitting.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" #include "llvm/Transforms/Scalar/DCE.h" @@ -548,6 +549,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); + if (Level == O3) + EarlyFPM.addPass(CallSiteSplittingPass()); + // In SamplePGO ThinLTO backend, we need instcombine before profile annotation // to convert bitcast to direct calls so that they can be inlined during the // profile annotation prepration step. @@ -915,13 +919,16 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(InferFunctionAttrsPass()); if (Level > 1) { + FunctionPassManager EarlyFPM(DebugLogging); + EarlyFPM.addPass(CallSiteSplittingPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); + // Indirect call promotion. This should promote all the targets that are // left by the earlier promotion pass that promotes intra-module targets. // This two-step promotion is to save the compile time. For LTO, it should // produce the same result as if we only do promotion here. MPM.addPass(PGOIndirectCallPromotion( true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty())); - // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def index 20d1220ac33..40b884351fd 100644 --- a/lib/Passes/PassRegistry.def +++ b/lib/Passes/PassRegistry.def @@ -140,6 +140,7 @@ FUNCTION_PASS("add-discriminators", AddDiscriminatorsPass()) FUNCTION_PASS("alignment-from-assumptions", AlignmentFromAssumptionsPass()) FUNCTION_PASS("bdce", BDCEPass()) FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass()) +FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass()) FUNCTION_PASS("consthoist", ConstantHoistingPass()) FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass()) FUNCTION_PASS("dce", DCEPass()) diff --git a/lib/ProfileData/CMakeLists.txt b/lib/ProfileData/CMakeLists.txt index cd65762ae6a..3a981d8acf4 100644 --- a/lib/ProfileData/CMakeLists.txt +++ b/lib/ProfileData/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMProfileData + GCOV.cpp InstrProf.cpp InstrProfReader.cpp InstrProfWriter.cpp diff --git a/lib/IR/GCOV.cpp b/lib/ProfileData/GCOV.cpp index d4b45522822..d6e44389f2b 100644 --- a/lib/IR/GCOV.cpp +++ b/lib/ProfileData/GCOV.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/GCOV.h" +#include "llvm/ProfileData/GCOV.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" diff --git a/lib/Support/Chrono.cpp b/lib/Support/Chrono.cpp index a39b485bd13..84f5aab6fc4 100644 --- a/lib/Support/Chrono.cpp +++ b/lib/Support/Chrono.cpp @@ -65,17 +65,17 @@ void format_provider<TimePoint<>>::format(const TimePoint<> &T, raw_ostream &OS, if (Style[I] == '%' && Style.size() > I + 1) switch (Style[I + 1]) { case 'L': // Milliseconds, from Ruby. FStream << llvm::format( - "%.3lu", duration_cast<milliseconds>(Fractional).count()); + "%.3lu", (long)duration_cast<milliseconds>(Fractional).count()); ++I; continue; case 'f': // Microseconds, from Python. FStream << llvm::format( - "%.6lu", duration_cast<microseconds>(Fractional).count()); + "%.6lu", (long)duration_cast<microseconds>(Fractional).count()); ++I; continue; case 'N': // Nanoseconds, from date(1). FStream << llvm::format( - "%.6lu", duration_cast<nanoseconds>(Fractional).count()); + "%.6lu", (long)duration_cast<nanoseconds>(Fractional).count()); ++I; continue; case '%': // Consume %%, so %%f parses as (%%)f not %(%f) diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp index 1e20b01fc4a..8906be3aaa2 100644 --- a/lib/Support/FileOutputBuffer.cpp +++ b/lib/Support/FileOutputBuffer.cpp @@ -38,7 +38,7 @@ public: std::unique_ptr<fs::mapped_file_region> Buf) : FileOutputBuffer(Path), Buffer(std::move(Buf)), TempPath(TempPath) {} - static ErrorOr<std::unique_ptr<OnDiskBuffer>> + static Expected<std::unique_ptr<OnDiskBuffer>> create(StringRef Path, size_t Size, unsigned Mode); uint8_t *getBufferStart() const override { return (uint8_t *)Buffer->data(); } @@ -49,14 +49,14 @@ public: size_t getBufferSize() const override { return Buffer->size(); } - std::error_code commit() override { + Error commit() override { // Unmap buffer, letting OS flush dirty pages to file on disk. Buffer.reset(); // Atomically replace the existing file with the new one. auto EC = fs::rename(TempPath, FinalPath); sys::DontRemoveFileOnSignal(TempPath); - return EC; + return errorCodeToError(EC); } ~OnDiskBuffer() override { @@ -78,13 +78,13 @@ public: InMemoryBuffer(StringRef Path, MemoryBlock Buf, unsigned Mode) : FileOutputBuffer(Path), Buffer(Buf), Mode(Mode) {} - static ErrorOr<std::unique_ptr<InMemoryBuffer>> + static Expected<std::unique_ptr<InMemoryBuffer>> create(StringRef Path, size_t Size, unsigned Mode) { std::error_code EC; MemoryBlock MB = Memory::allocateMappedMemory( Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC); if (EC) - return EC; + return errorCodeToError(EC); return llvm::make_unique<InMemoryBuffer>(Path, MB, Mode); } @@ -96,14 +96,14 @@ public: size_t getBufferSize() const override { return Buffer.size(); } - std::error_code commit() override { + Error commit() override { int FD; std::error_code EC; if (auto EC = openFileForWrite(FinalPath, FD, fs::F_None, Mode)) - return EC; + return errorCodeToError(EC); raw_fd_ostream OS(FD, /*shouldClose=*/true, /*unbuffered=*/true); OS << StringRef((const char *)Buffer.base(), Buffer.size()); - return std::error_code(); + return Error::success(); } private: @@ -111,13 +111,13 @@ private: unsigned Mode; }; -ErrorOr<std::unique_ptr<OnDiskBuffer>> +Expected<std::unique_ptr<OnDiskBuffer>> OnDiskBuffer::create(StringRef Path, size_t Size, unsigned Mode) { // Create new file in same directory but with random name. SmallString<128> TempPath; int FD; if (auto EC = fs::createUniqueFile(Path + ".tmp%%%%%%%", FD, TempPath, Mode)) - return EC; + return errorCodeToError(EC); sys::RemoveFileOnSignal(TempPath); @@ -128,7 +128,7 @@ OnDiskBuffer::create(StringRef Path, size_t Size, unsigned Mode) { // pretty slow just like it writes specified amount of bytes, // so we should avoid calling that function. if (auto EC = fs::resize_file(FD, Size)) - return EC; + return errorCodeToError(EC); #endif // Mmap it. @@ -137,12 +137,12 @@ OnDiskBuffer::create(StringRef Path, size_t Size, unsigned Mode) { FD, fs::mapped_file_region::readwrite, Size, 0, EC); close(FD); if (EC) - return EC; + return errorCodeToError(EC); return llvm::make_unique<OnDiskBuffer>(Path, TempPath, std::move(MappedFile)); } // Create an instance of FileOutputBuffer. -ErrorOr<std::unique_ptr<FileOutputBuffer>> +Expected<std::unique_ptr<FileOutputBuffer>> FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) { unsigned Mode = fs::all_read | fs::all_write; if (Flags & F_executable) @@ -161,7 +161,7 @@ FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) { // destination file and write to it on commit(). switch (Stat.type()) { case fs::file_type::directory_file: - return errc::is_a_directory; + return errorCodeToError(errc::is_a_directory); case fs::file_type::regular_file: case fs::file_type::file_not_found: case fs::file_type::status_error: diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index d8fb3e1dc1d..5b2a0f1d0c2 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -351,12 +351,14 @@ enum ProcessorTypes { INTEL_PENTIUM_IV, INTEL_PENTIUM_M, INTEL_CORE_DUO, - INTEL_X86_64, INTEL_NOCONA, INTEL_PRESCOTT, AMD_i486, AMDPENTIUM, - AMDATHLON, + AMD_ATHLON, + AMD_ATHLON_XP, + AMD_K8, + AMD_K8SSE3, INTEL_GOLDMONT, CPU_TYPE_MAX }; @@ -385,10 +387,6 @@ enum ProcessorSubtypes { AMDPENTIUM_K62, AMDPENTIUM_K63, AMDPENTIUM_GEODE, - AMDATHLON_CLASSIC, - AMDATHLON_XP, - AMDATHLON_K8, - AMDATHLON_K8SSE3, CPU_SUBTYPE_MAX }; @@ -794,8 +792,13 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, break; } if (Features2 & (1 << (FEATURE_EM64T - 32))) { - *Type = INTEL_X86_64; - break; // x86-64 + *Type = INTEL_CORE2; // "core2" + *Subtype = INTEL_CORE2_65; + break; + } + if (Features & (1 << FEATURE_SSE3)) { + *Type = INTEL_CORE_DUO; + break; } if (Features & (1 << FEATURE_SSE2)) { *Type = INTEL_PENTIUM_M; @@ -814,40 +817,15 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, } break; case 15: { - switch (Model) { - case 0: // Pentium 4 processor, Intel Xeon processor. All processors are - // model 00h and manufactured using the 0.18 micron process. - case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon - // processor MP, and Intel Celeron processor. All processors are - // model 01h and manufactured using the 0.18 micron process. - case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M, - // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron - // processor, and Mobile Intel Celeron processor. All processors - // are model 02h and manufactured using the 0.13 micron process. - *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_X86_64 - : INTEL_PENTIUM_IV); + if (Features2 & (1 << (FEATURE_EM64T - 32))) { + *Type = INTEL_NOCONA; break; - - case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D - // processor. All processors are model 03h and manufactured using - // the 90 nm process. - case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition, - // Pentium D processor, Intel Xeon processor, Intel Xeon - // processor MP, Intel Celeron D processor. All processors are - // model 04h and manufactured using the 90 nm process. - case 6: // Pentium 4 processor, Pentium D processor, Pentium processor - // Extreme Edition, Intel Xeon processor, Intel Xeon processor - // MP, Intel Celeron D processor. All processors are model 06h - // and manufactured using the 65 nm process. - *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_NOCONA - : INTEL_PRESCOTT); - break; - - default: - *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_X86_64 - : INTEL_PENTIUM_IV); + } + if (Features & (1 << FEATURE_SSE3)) { + *Type = INTEL_PRESCOTT; break; } + *Type = INTEL_PENTIUM_IV; break; } default: @@ -885,20 +863,18 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, } break; case 6: - *Type = AMDATHLON; if (Features & (1 << FEATURE_SSE)) { - *Subtype = AMDATHLON_XP; + *Type = AMD_ATHLON_XP; break; // "athlon-xp" } - *Subtype = AMDATHLON_CLASSIC; + *Type = AMD_ATHLON; break; // "athlon" case 15: - *Type = AMDATHLON; if (Features & (1 << FEATURE_SSE3)) { - *Subtype = AMDATHLON_K8SSE3; + *Type = AMD_K8SSE3; break; // "k8-sse3" } - *Subtype = AMDATHLON_K8; + *Type = AMD_K8; break; // "k8" case 16: *Type = AMDFAM10H; // "amdfam10" @@ -1078,8 +1054,8 @@ StringRef sys::getHostCPUName() { detectX86FamilyModel(EAX, &Family, &Model); getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2); - unsigned Type; - unsigned Subtype; + unsigned Type = 0; + unsigned Subtype = 0; if (Vendor == SIG_INTEL) { getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, @@ -1145,8 +1121,6 @@ StringRef sys::getHostCPUName() { return "knl"; case INTEL_KNM: return "knm"; - case INTEL_X86_64: - return "x86-64"; case INTEL_NOCONA: return "nocona"; case INTEL_PRESCOTT: @@ -1172,19 +1146,14 @@ StringRef sys::getHostCPUName() { default: return "pentium"; } - case AMDATHLON: - switch (Subtype) { - case AMDATHLON_CLASSIC: - return "athlon"; - case AMDATHLON_XP: - return "athlon-xp"; - case AMDATHLON_K8: - return "k8"; - case AMDATHLON_K8SSE3: - return "k8-sse3"; - default: - llvm_unreachable("Unexpected subtype!"); - } + case AMD_ATHLON: + return "athlon"; + case AMD_ATHLON_XP: + return "athlon-xp"; + case AMD_K8: + return "k8"; + case AMD_K8SSE3: + return "k8-sse3"; case AMDFAM10H: return "amdfam10"; case AMD_BTVER1: diff --git a/lib/Support/LowLevelType.cpp b/lib/Support/LowLevelType.cpp index 0ee3f1d0119..cb2187405d6 100644 --- a/lib/Support/LowLevelType.cpp +++ b/lib/Support/LowLevelType.cpp @@ -43,7 +43,7 @@ void LLT::print(raw_ostream &OS) const { assert(isScalar() && "unexpected type"); OS << "s" << getScalarSizeInBits(); } else - llvm_unreachable("trying to print an invalid type"); + OS << "LLT_invalid"; } const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo; diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp index a659a2afee6..bf807e66e02 100644 --- a/lib/Support/SpecialCaseList.cpp +++ b/lib/Support/SpecialCaseList.cpp @@ -27,6 +27,7 @@ namespace llvm { bool SpecialCaseList::Matcher::insert(std::string Regexp, + unsigned LineNumber, std::string &REError) { if (Regexp.empty()) { REError = "Supplied regexp was blank"; @@ -34,7 +35,7 @@ bool SpecialCaseList::Matcher::insert(std::string Regexp, } if (Regex::isLiteralERE(Regexp)) { - Strings.insert(Regexp); + Strings[Regexp] = LineNumber; return true; } Trigrams.insert(Regexp); @@ -45,34 +46,30 @@ bool SpecialCaseList::Matcher::insert(std::string Regexp, Regexp.replace(pos, strlen("*"), ".*"); } + Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str(); + // Check that the regexp is valid. Regex CheckRE(Regexp); if (!CheckRE.isValid(REError)) return false; - if (!UncompiledRegEx.empty()) - UncompiledRegEx += "|"; - UncompiledRegEx += "^(" + Regexp + ")$"; + RegExes.emplace_back( + std::make_pair(make_unique<Regex>(std::move(CheckRE)), LineNumber)); return true; } -void SpecialCaseList::Matcher::compile() { - if (!UncompiledRegEx.empty()) { - RegEx.reset(new Regex(UncompiledRegEx)); - UncompiledRegEx.clear(); - } -} - -bool SpecialCaseList::Matcher::match(StringRef Query) const { - if (Strings.count(Query)) - return true; +unsigned SpecialCaseList::Matcher::match(StringRef Query) const { + auto It = Strings.find(Query); + if (It != Strings.end()) + return It->second; if (Trigrams.isDefinitelyOut(Query)) return false; - return RegEx && RegEx->match(Query); + for (auto& RegExKV : RegExes) + if (RegExKV.first->match(Query)) + return RegExKV.second; + return 0; } -SpecialCaseList::SpecialCaseList() : Sections(), IsCompiled(false) {} - std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const std::vector<std::string> &Paths, std::string &Error) { @@ -114,7 +111,6 @@ bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths, return false; } } - compile(); return true; } @@ -123,7 +119,6 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB, StringMap<size_t> Sections; if (!parse(MB, Sections, Error)) return false; - compile(); return true; } @@ -132,11 +127,13 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) { // Iterate through each line in the blacklist file. SmallVector<StringRef, 16> Lines; - SplitString(MB->getBuffer(), Lines, "\n\r"); + MB->getBuffer().split(Lines, '\n'); - int LineNo = 1; + unsigned LineNo = 1; StringRef Section = "*"; + for (auto I = Lines.begin(), E = Lines.end(); I != E; ++I, ++LineNo) { + *I = I->trim(); // Ignore empty lines and lines starting with "#" if (I->empty() || I->startswith("#")) continue; @@ -181,11 +178,10 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, if (SectionsMap.find(Section) == SectionsMap.end()) { std::unique_ptr<Matcher> M = make_unique<Matcher>(); std::string REError; - if (!M->insert(Section, REError)) { + if (!M->insert(Section, LineNo, REError)) { Error = (Twine("malformed section ") + Section + ": '" + REError).str(); return false; } - M->compile(); SectionsMap[Section] = Sections.size(); Sections.emplace_back(std::move(M)); @@ -193,7 +189,7 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, auto &Entry = Sections[SectionsMap[Section]].Entries[Prefix][Category]; std::string REError; - if (!Entry.insert(std::move(Regexp), REError)) { + if (!Entry.insert(std::move(Regexp), LineNo, REError)) { Error = (Twine("malformed regex in line ") + Twine(LineNo) + ": '" + SplitLine.second + "': " + REError).str(); return false; @@ -202,38 +198,33 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, return true; } -void SpecialCaseList::compile() { - assert(!IsCompiled && "compile() should only be called once"); - // Iterate through every section compiling regular expressions for every query - // and creating Section entries. - for (auto &Section : Sections) - for (auto &Prefix : Section.Entries) - for (auto &Category : Prefix.getValue()) - Category.getValue().compile(); - - IsCompiled = true; -} - SpecialCaseList::~SpecialCaseList() {} bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category) const { - assert(IsCompiled && "SpecialCaseList::compile() was not called!"); + return inSectionBlame(Section, Prefix, Query, Category); +} +unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix, + StringRef Query, + StringRef Category) const { for (auto &SectionIter : Sections) - if (SectionIter.SectionMatcher->match(Section) && - inSection(SectionIter.Entries, Prefix, Query, Category)) - return true; - - return false; + if (SectionIter.SectionMatcher->match(Section)) { + unsigned Blame = + inSectionBlame(SectionIter.Entries, Prefix, Query, Category); + if (Blame) + return Blame; + } + return 0; } -bool SpecialCaseList::inSection(const SectionEntries &Entries, StringRef Prefix, - StringRef Query, StringRef Category) const { +unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries, + StringRef Prefix, StringRef Query, + StringRef Category) const { SectionEntries::const_iterator I = Entries.find(Prefix); - if (I == Entries.end()) return false; + if (I == Entries.end()) return 0; StringMap<Matcher>::const_iterator II = I->second.find(Category); - if (II == I->second.end()) return false; + if (II == I->second.end()) return 0; return II->getValue().match(Query); } diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 781a911ed57..2ecb97316c8 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -426,7 +426,7 @@ std::error_code resize_file(int FD, uint64_t Size) { // If we have posix_fallocate use it. Unlike ftruncate it always allocates // space, so we get an error if the disk is full. if (int Err = ::posix_fallocate(FD, 0, Size)) { - if (Err != EOPNOTSUPP) + if (Err != EINVAL && Err != EOPNOTSUPP) return std::error_code(Err, std::generic_category()); } #endif diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp index e6afb42440a..7de5d0ef66b 100644 --- a/lib/Target/AArch64/AArch64A53Fix835769.cpp +++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp @@ -22,9 +22,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h index bc44bc5f246..461c01318d4 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.h +++ b/lib/Target/AArch64/AArch64CallingConvention.h @@ -19,8 +19,8 @@ #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/CallingConv.h" -#include "llvm/Target/TargetInstrInfo.h" namespace { using namespace llvm; diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp index 51700f90597..2793481a0c1 100644 --- a/lib/Target/AArch64/AArch64CondBrTuning.cpp +++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -34,9 +34,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp index 8bbef44a2e6..c3a354cb01d 100644 --- a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp +++ b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp @@ -73,11 +73,11 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> #include <cstdlib> diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp index 9eda56c825a..33e0f5de5fd 100644 --- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -31,10 +31,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index 5d8b4b69593..d182c812189 100644 --- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -20,9 +20,9 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 8ebcaff1358..809e4a77fad 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -110,6 +110,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -121,7 +122,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index c351efb0c39..55a256867fa 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -14,7 +14,7 @@ #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H #define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { diff --git a/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/lib/Target/AArch64/AArch64GenRegisterBankInfo.def index 7d2cfbeff38..39f50ade747 100644 --- a/lib/Target/AArch64/AArch64GenRegisterBankInfo.def +++ b/lib/Target/AArch64/AArch64GenRegisterBankInfo.def @@ -14,19 +14,21 @@ namespace llvm { RegisterBankInfo::PartialMapping AArch64GenRegisterBankInfo::PartMappings[]{ /* StartIdx, Length, RegBank */ - // 0: FPR 32-bit value. + // 0: FPR 16-bit value. + {0, 16, AArch64::FPRRegBank}, + // 1: FPR 32-bit value. {0, 32, AArch64::FPRRegBank}, - // 1: FPR 64-bit value. + // 2: FPR 64-bit value. {0, 64, AArch64::FPRRegBank}, - // 2: FPR 128-bit value. + // 3: FPR 128-bit value. {0, 128, AArch64::FPRRegBank}, - // 3: FPR 256-bit value. + // 4: FPR 256-bit value. {0, 256, AArch64::FPRRegBank}, - // 4: FPR 512-bit value. + // 5: FPR 512-bit value. {0, 512, AArch64::FPRRegBank}, - // 5: GPR 32-bit value. + // 6: GPR 32-bit value. {0, 32, AArch64::GPRRegBank}, - // 6: GPR 64-bit value. + // 7: GPR 64-bit value. {0, 64, AArch64::GPRRegBank}, }; @@ -37,58 +39,77 @@ RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{ {nullptr, 0}, // 3-operands instructions (all binary operations should end up with one of // those mapping). - // 1: FPR 32-bit value. <-- This must match First3OpsIdx. + // 1: FPR 16-bit value. <-- This must match First3OpsIdx. + {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, + {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, + {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, + // 4: FPR 32-bit value. <-- This must match First3OpsIdx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - // 4: FPR 64-bit value. + // 7: FPR 64-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - // 7: FPR 128-bit value. + // 10: FPR 128-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1}, - // 10: FPR 256-bit value. + // 13: FPR 256-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1}, - // 13: FPR 512-bit value. + // 16: FPR 512-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1}, - // 16: GPR 32-bit value. + // 19: GPR 32-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - // 19: GPR 64-bit value. <-- This must match Last3OpsIdx. + // 22: GPR 64-bit value. <-- This must match Last3OpsIdx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, // Cross register bank copies. - // 22: FPR 32-bit value to GPR 32-bit value. <-- This must match + // 25: FPR 16-bit value to GPR 16-bit (invalid). <-- This must match // FirstCrossRegCpyIdx. + {nullptr, 1}, + {nullptr, 1}, + // 27: FPR 32-bit value to GPR 32-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - // 24: FPR 64-bit value to GPR 64-bit value. + // 29: FPR 64-bit value to GPR 64-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, - // 26: FPR 128-bit value to GPR 128-bit value (invalid) + // 31: FPR 128-bit value to GPR 128-bit value (invalid) {nullptr, 1}, {nullptr, 1}, - // 28: FPR 256-bit value to GPR 256-bit value (invalid) + // 33: FPR 256-bit value to GPR 256-bit value (invalid) {nullptr, 1}, {nullptr, 1}, - // 30: FPR 512-bit value to GPR 512-bit value (invalid) + // 35: FPR 512-bit value to GPR 512-bit value (invalid) {nullptr, 1}, {nullptr, 1}, - // 32: GPR 32-bit value to FPR 32-bit value. + // 37: GPR 32-bit value to FPR 32-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - // 34: GPR 64-bit value to FPR 64-bit value. <-- This must match + // 39: GPR 64-bit value to FPR 64-bit value. <-- This must match // LastCrossRegCpyIdx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, + // 41: FPExt: 16 to 32. <-- This must match FPExt16To32Idx. + {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, + {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, + // 43: FPExt: 16 to 32. <-- This must match FPExt16To64Idx. + {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, + {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, + // 45: FPExt: 32 to 64. <-- This must match FPExt32To64Idx. + {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, + {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, + // 47: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx. + {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1}, + {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, }; bool AArch64GenRegisterBankInfo::checkPartialMap(unsigned Idx, @@ -145,16 +166,18 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx, return -1; } if (RBIdx == PMI_FirstFPR) { - if (Size <= 32) + if (Size <= 16) return 0; - if (Size <= 64) + if (Size <= 32) return 1; - if (Size <= 128) + if (Size <= 64) return 2; - if (Size <= 256) + if (Size <= 128) return 3; - if (Size <= 512) + if (Size <= 256) return 4; + if (Size <= 512) + return 5; return -1; } return -1; @@ -206,4 +229,35 @@ AArch64GenRegisterBankInfo::getCopyMapping(unsigned DstBankID, ValMappingIdx <= LastCrossRegCpyIdx && "Mapping out of bound"); return &ValMappings[ValMappingIdx]; } + +const RegisterBankInfo::ValueMapping * +AArch64GenRegisterBankInfo::getFPExtMapping(unsigned DstSize, + unsigned SrcSize) { + // We support: + // - For Scalar: + // - 16 to 32. + // - 16 to 64. + // - 32 to 64. + // => FPR 16 to FPR 32|64 + // => FPR 32 to FPR 64 + // - For vectors: + // - v4f16 to v4f32 + // - v2f32 to v2f64 + // => FPR 64 to FPR 128 + + // Check that we have been asked sensible sizes. + if (SrcSize == 16) { + assert((DstSize == 32 || DstSize == 64) && "Unexpected half extension"); + if (DstSize == 32) + return &ValMappings[FPExt16To32Idx]; + return &ValMappings[FPExt16To64Idx]; + } + + if (SrcSize == 32) { + assert(DstSize == 64 && "Unexpected float extension"); + return &ValMappings[FPExt32To64Idx]; + } + assert((SrcSize == 64 || DstSize == 128) && "Unexpected vector extension"); + return &ValMappings[FPExt64To128Idx]; +} } // End llvm namespace. diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index bec872ae8c0..81dac1be56c 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -42,6 +42,7 @@ #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -71,7 +72,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetCallingConv.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <algorithm> @@ -4981,7 +4981,7 @@ static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode, // the initial estimate is 2^-8. Thus the number of extra steps to refine // the result for float (23 mantissa bits) is 2 and for double (52 // mantissa bits) is 3. - ExtraSteps = VT == MVT::f64 ? 3 : 2; + ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2; return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand); } diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 24758e97888..2f10bef1e47 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -17,7 +17,7 @@ #include "AArch64.h" #include "AArch64RegisterInfo.h" #include "llvm/CodeGen/MachineCombinerPattern.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "AArch64GenInstrInfo.inc" diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index eabbc05a033..e014d5bd569 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -335,6 +335,7 @@ let RecomputePerFunction = 1 in { } include "AArch64InstrFormats.td" +include "SVEInstrFormats.td" //===----------------------------------------------------------------------===// @@ -6275,3 +6276,4 @@ def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; include "AArch64InstrAtomics.td" +include "AArch64SVEInstrInfo.td" diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 2d45be37ca7..3a456255224 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -23,6 +23,110 @@ using namespace llvm; +/// FIXME: The following static functions are SizeChangeStrategy functions +/// that are meant to temporarily mimic the behaviour of the old legalization +/// based on doubling/halving non-legal types as closely as possible. This is +/// not entirly possible as only legalizing the types that are exactly a power +/// of 2 times the size of the legal types would require specifying all those +/// sizes explicitly. +/// In practice, not specifying those isn't a problem, and the below functions +/// should disappear quickly as we add support for legalizing non-power-of-2 +/// sized types further. +static void +addAndInterleaveWithUnsupported(LegalizerInfo::SizeAndActionsVec &result, + const LegalizerInfo::SizeAndActionsVec &v) { + for (unsigned i = 0; i < v.size(); ++i) { + result.push_back(v[i]); + if (i + 1 < v[i].first && i + 1 < v.size() && + v[i + 1].first != v[i].first + 1) + result.push_back({v[i].first + 1, LegalizerInfo::Unsupported}); + } +} + +static LegalizerInfo::SizeAndActionsVec +widen_1_narrow_128_ToLargest(const LegalizerInfo::SizeAndActionsVec &v) { + assert(v.size() >= 1); + assert(v[0].first > 2); + LegalizerInfo::SizeAndActionsVec result = {{1, LegalizerInfo::WidenScalar}, + {2, LegalizerInfo::Unsupported}}; + addAndInterleaveWithUnsupported(result, v); + auto Largest = result.back().first; + assert(Largest + 1 < 128); + result.push_back({Largest + 1, LegalizerInfo::Unsupported}); + result.push_back({128, LegalizerInfo::NarrowScalar}); + result.push_back({129, LegalizerInfo::Unsupported}); + return result; +} + +static LegalizerInfo::SizeAndActionsVec +widen_16(const LegalizerInfo::SizeAndActionsVec &v) { + assert(v.size() >= 1); + assert(v[0].first > 17); + LegalizerInfo::SizeAndActionsVec result = {{1, LegalizerInfo::Unsupported}, + {16, LegalizerInfo::WidenScalar}, + {17, LegalizerInfo::Unsupported}}; + addAndInterleaveWithUnsupported(result, v); + auto Largest = result.back().first; + result.push_back({Largest + 1, LegalizerInfo::Unsupported}); + return result; +} + +static LegalizerInfo::SizeAndActionsVec +widen_1_8(const LegalizerInfo::SizeAndActionsVec &v) { + assert(v.size() >= 1); + assert(v[0].first > 9); + LegalizerInfo::SizeAndActionsVec result = { + {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported}, + {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported}}; + addAndInterleaveWithUnsupported(result, v); + auto Largest = result.back().first; + result.push_back({Largest + 1, LegalizerInfo::Unsupported}); + return result; +} + +static LegalizerInfo::SizeAndActionsVec +widen_1_8_16(const LegalizerInfo::SizeAndActionsVec &v) { + assert(v.size() >= 1); + assert(v[0].first > 17); + LegalizerInfo::SizeAndActionsVec result = { + {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported}, + {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported}, + {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported}}; + addAndInterleaveWithUnsupported(result, v); + auto Largest = result.back().first; + result.push_back({Largest + 1, LegalizerInfo::Unsupported}); + return result; +} + +static LegalizerInfo::SizeAndActionsVec +widen_1_8_16_narrowToLargest(const LegalizerInfo::SizeAndActionsVec &v) { + assert(v.size() >= 1); + assert(v[0].first > 17); + LegalizerInfo::SizeAndActionsVec result = { + {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported}, + {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported}, + {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported}}; + addAndInterleaveWithUnsupported(result, v); + auto Largest = result.back().first; + result.push_back({Largest + 1, LegalizerInfo::NarrowScalar}); + return result; +} + +static LegalizerInfo::SizeAndActionsVec +widen_1_8_16_32(const LegalizerInfo::SizeAndActionsVec &v) { + assert(v.size() >= 1); + assert(v[0].first > 33); + LegalizerInfo::SizeAndActionsVec result = { + {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported}, + {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported}, + {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported}, + {32, LegalizerInfo::WidenScalar}, {33, LegalizerInfo::Unsupported}}; + addAndInterleaveWithUnsupported(result, v); + auto Largest = result.back().first; + result.push_back({Largest + 1, LegalizerInfo::Unsupported}); + return result; +} + AArch64LegalizerInfo::AArch64LegalizerInfo() { using namespace TargetOpcode; const LLT p0 = LLT::pointer(0, 64); @@ -42,8 +146,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { for (auto Ty : {s16, s32, s64, p0}) setAction({G_PHI, Ty}, Legal); - for (auto Ty : {s1, s8}) - setAction({G_PHI, Ty}, WidenScalar); + setLegalizeScalarToDifferentSizeStrategy(G_PHI, 0, widen_1_8); for (auto Ty : { s32, s64 }) setAction({G_BSWAP, Ty}, Legal); @@ -54,15 +157,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { for (auto Ty : {s32, s64, v2s32, v4s32, v2s64}) setAction({BinOp, Ty}, Legal); - for (auto Ty : {s1, s8, s16}) - setAction({BinOp, Ty}, WidenScalar); + if (BinOp != G_ADD) + setLegalizeScalarToDifferentSizeStrategy(BinOp, 0, + widen_1_8_16_narrowToLargest); } setAction({G_GEP, p0}, Legal); setAction({G_GEP, 1, s64}, Legal); - for (auto Ty : {s1, s8, s16, s32}) - setAction({G_GEP, 1, Ty}, WidenScalar); + setLegalizeScalarToDifferentSizeStrategy(G_GEP, 1, widen_1_8_16_32); setAction({G_PTR_MASK, p0}, Legal); @@ -70,16 +173,17 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { for (auto Ty : {s32, s64}) setAction({BinOp, Ty}, Legal); - for (auto Ty : {s1, s8, s16}) - setAction({BinOp, Ty}, WidenScalar); + setLegalizeScalarToDifferentSizeStrategy(BinOp, 0, widen_1_8_16); } for (unsigned BinOp : {G_SREM, G_UREM}) for (auto Ty : { s1, s8, s16, s32, s64 }) setAction({BinOp, Ty}, Lower); - for (unsigned Op : {G_SMULO, G_UMULO}) - setAction({Op, s64}, Lower); + for (unsigned Op : {G_SMULO, G_UMULO}) { + setAction({Op, 0, s64}, Lower); + setAction({Op, 1, s1}, Legal); + } for (unsigned Op : {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_SMULH, G_UMULH}) { for (auto Ty : { s32, s64 }) @@ -101,8 +205,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { setAction({G_INSERT, Ty}, Legal); setAction({G_INSERT, 1, Ty}, Legal); } + setLegalizeScalarToDifferentSizeStrategy(G_INSERT, 0, + widen_1_8_16_narrowToLargest); for (auto Ty : {s1, s8, s16}) { - setAction({G_INSERT, Ty}, WidenScalar); setAction({G_INSERT, 1, Ty}, Legal); // FIXME: Can't widen the sources because that violates the constraints on // G_INSERT (It seems entirely reasonable that inputs shouldn't overlap). @@ -118,7 +223,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { for (auto Ty : {s8, s16, s32, s64, p0, v2s32}) setAction({MemOp, Ty}, Legal); - setAction({MemOp, s1}, WidenScalar); + setLegalizeScalarToDifferentSizeStrategy(MemOp, 0, + widen_1_narrow_128_ToLargest); // And everything's fine in addrspace 0. setAction({MemOp, 1, p0}, Legal); @@ -132,20 +238,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { setAction({G_CONSTANT, p0}, Legal); - for (auto Ty : {s1, s8, s16}) - setAction({TargetOpcode::G_CONSTANT, Ty}, WidenScalar); - - setAction({TargetOpcode::G_FCONSTANT, s16}, WidenScalar); + setLegalizeScalarToDifferentSizeStrategy(G_CONSTANT, 0, widen_1_8_16); + setLegalizeScalarToDifferentSizeStrategy(G_FCONSTANT, 0, widen_16); setAction({G_ICMP, 1, s32}, Legal); setAction({G_ICMP, 1, s64}, Legal); setAction({G_ICMP, 1, p0}, Legal); - for (auto Ty : {s1, s8, s16}) { - setAction({G_ICMP, Ty}, WidenScalar); - setAction({G_FCMP, Ty}, WidenScalar); - setAction({G_ICMP, 1, Ty}, WidenScalar); - } + setLegalizeScalarToDifferentSizeStrategy(G_ICMP, 0, widen_1_8_16); + setLegalizeScalarToDifferentSizeStrategy(G_FCMP, 0, widen_1_8_16); + setLegalizeScalarToDifferentSizeStrategy(G_ICMP, 1, widen_1_8_16); setAction({G_ICMP, s32}, Legal); setAction({G_FCMP, s32}, Legal); @@ -159,12 +261,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { setAction({G_ANYEXT, Ty}, Legal); } - for (auto Ty : { s1, s8, s16, s32 }) { - setAction({G_ZEXT, 1, Ty}, Legal); - setAction({G_SEXT, 1, Ty}, Legal); - setAction({G_ANYEXT, 1, Ty}, Legal); - } - // FP conversions for (auto Ty : { s16, s32 }) { setAction({G_FPTRUNC, Ty}, Legal); @@ -176,12 +272,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { setAction({G_FPEXT, Ty}, Legal); } - for (auto Ty : { s1, s8, s16, s32 }) - setAction({G_TRUNC, Ty}, Legal); - - for (auto Ty : { s8, s16, s32, s64 }) - setAction({G_TRUNC, 1, Ty}, Legal); - // Conversions for (auto Ty : { s32, s64 }) { setAction({G_FPTOSI, 0, Ty}, Legal); @@ -189,12 +279,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { setAction({G_SITOFP, 1, Ty}, Legal); setAction({G_UITOFP, 1, Ty}, Legal); } - for (auto Ty : { s1, s8, s16 }) { - setAction({G_FPTOSI, 0, Ty}, WidenScalar); - setAction({G_FPTOUI, 0, Ty}, WidenScalar); - setAction({G_SITOFP, 1, Ty}, WidenScalar); - setAction({G_UITOFP, 1, Ty}, WidenScalar); - } + setLegalizeScalarToDifferentSizeStrategy(G_FPTOSI, 0, widen_1_8_16); + setLegalizeScalarToDifferentSizeStrategy(G_FPTOUI, 0, widen_1_8_16); + setLegalizeScalarToDifferentSizeStrategy(G_SITOFP, 1, widen_1_8_16); + setLegalizeScalarToDifferentSizeStrategy(G_UITOFP, 1, widen_1_8_16); for (auto Ty : { s32, s64 }) { setAction({G_FPTOSI, 1, Ty}, Legal); @@ -209,8 +297,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { setAction({G_BRINDIRECT, p0}, Legal); // Select - for (auto Ty : {s1, s8, s16}) - setAction({G_SELECT, Ty}, WidenScalar); + setLegalizeScalarToDifferentSizeStrategy(G_SELECT, 0, widen_1_8_16); for (auto Ty : {s32, s64, p0}) setAction({G_SELECT, Ty}, Legal); diff --git a/lib/Target/AArch64/AArch64MacroFusion.cpp b/lib/Target/AArch64/AArch64MacroFusion.cpp index bd5211d4ff5..bd4bdaa6d12 100644 --- a/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -15,7 +15,7 @@ #include "AArch64MacroFusion.h" #include "AArch64Subtarget.h" #include "llvm/CodeGen/MacroFusion.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index 391e8ed633d..83bf493c9f0 100644 --- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -87,9 +87,9 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR, {PMI_GPR32, PMI_GPR64}) && "PartialMappingIdx's are incorrectly ordered"); - assert(checkPartialMappingIdx( - PMI_FirstFPR, PMI_LastFPR, - {PMI_FPR32, PMI_FPR64, PMI_FPR128, PMI_FPR256, PMI_FPR512}) && + assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR, + {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128, + PMI_FPR256, PMI_FPR512}) && "PartialMappingIdx's are incorrectly ordered"); // Now, the content. // Check partial mapping. @@ -102,6 +102,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR); CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR); + CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR); CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR); CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR); CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR); @@ -121,6 +122,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) CHECK_VALUEMAP(GPR, 32); CHECK_VALUEMAP(GPR, 64); + CHECK_VALUEMAP(FPR, 16); CHECK_VALUEMAP(FPR, 32); CHECK_VALUEMAP(FPR, 64); CHECK_VALUEMAP(FPR, 128); @@ -173,6 +175,30 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64); CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64); +#define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \ + do { \ + unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \ + unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \ + (void)PartialMapDstIdx; \ + (void)PartialMapSrcIdx; \ + const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \ + (void)Map; \ + assert(Map[0].BreakDown == \ + &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ + Map[0].NumBreakDowns == 1 && "FPR" #DstSize \ + " Dst is incorrectly initialized"); \ + assert(Map[1].BreakDown == \ + &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ + Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \ + " Src is incorrectly initialized"); \ + \ + } while (false) + + CHECK_VALUEMAP_FPEXT(32, 16); + CHECK_VALUEMAP_FPEXT(64, 16); + CHECK_VALUEMAP_FPEXT(64, 32); + CHECK_VALUEMAP_FPEXT(128, 64); + assert(verify(TRI) && "Invalid register bank information"); } @@ -453,6 +479,14 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_FMUL: case TargetOpcode::G_FDIV: return getSameKindOfOperandsMapping(MI); + case TargetOpcode::G_FPEXT: { + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + return getInstructionMapping( + DefaultMappingID, /*Cost*/ 1, + getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()), + /*NumOperands*/ 2); + } case TargetOpcode::COPY: { unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcReg = MI.getOperand(1).getReg(); diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.h b/lib/Target/AArch64/AArch64RegisterBankInfo.h index 6d74a47095a..008221dbef5 100644 --- a/lib/Target/AArch64/AArch64RegisterBankInfo.h +++ b/lib/Target/AArch64/AArch64RegisterBankInfo.h @@ -25,10 +25,10 @@ class TargetRegisterInfo; class AArch64GenRegisterBankInfo : public RegisterBankInfo { protected: - enum PartialMappingIdx { PMI_None = -1, - PMI_FPR32 = 1, + PMI_FPR16 = 1, + PMI_FPR32, PMI_FPR64, PMI_FPR128, PMI_FPR256, @@ -37,7 +37,7 @@ protected: PMI_GPR64, PMI_FirstGPR = PMI_GPR32, PMI_LastGPR = PMI_GPR64, - PMI_FirstFPR = PMI_FPR32, + PMI_FirstFPR = PMI_FPR16, PMI_LastFPR = PMI_FPR512, PMI_Min = PMI_FirstFPR, }; @@ -49,11 +49,15 @@ protected: enum ValueMappingIdx { InvalidIdx = 0, First3OpsIdx = 1, - Last3OpsIdx = 19, + Last3OpsIdx = 22, DistanceBetweenRegBanks = 3, - FirstCrossRegCpyIdx = 22, - LastCrossRegCpyIdx = 34, - DistanceBetweenCrossRegCpy = 2 + FirstCrossRegCpyIdx = 25, + LastCrossRegCpyIdx = 39, + DistanceBetweenCrossRegCpy = 2, + FPExt16To32Idx = 41, + FPExt16To64Idx = 43, + FPExt32To64Idx = 45, + FPExt64To128Idx = 47, }; static bool checkPartialMap(unsigned Idx, unsigned ValStartIdx, @@ -82,6 +86,15 @@ protected: static const RegisterBankInfo::ValueMapping * getCopyMapping(unsigned DstBankID, unsigned SrcBankID, unsigned Size); + /// Get the instruction mapping for G_FPEXT. + /// + /// \pre (DstSize, SrcSize) pair is one of the following: + /// (32, 16), (64, 16), (64, 32), (128, 64) + /// + /// \return An InstructionMapping with statically allocated OperandsMapping. + static const RegisterBankInfo::ValueMapping * + getFPExtMapping(unsigned DstSize, unsigned SrcSize); + #define GET_TARGET_REGBANK_CLASS #include "AArch64GenRegisterBank.inc" }; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 91b1481f5ef..1059bc37c8f 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -26,7 +26,7 @@ #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/Function.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index ee5d3547aaa..a9fb0200d80 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -32,6 +32,12 @@ let Namespace = "AArch64" in { def qsub : SubRegIndex<64>; def sube64 : SubRegIndex<64>; def subo64 : SubRegIndex<64>; + // SVE + def zsub : SubRegIndex<128>; + // Note: zsub_hi should never be used directly because it represents + // the scalable part of the SVE vector and cannot be manipulated as a + // subvector in the same way the lower 128bits can. + def zsub_hi : SubRegIndex<128>; // Note: Code depends on these having consecutive numbers def dsub0 : SubRegIndex<64>; def dsub1 : SubRegIndex<64>; @@ -460,11 +466,11 @@ def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> { // assmebler matching. def VectorReg64AsmOperand : AsmOperandClass { let Name = "VectorReg64"; - let PredicateMethod = "isVectorReg"; + let PredicateMethod = "isNeonVectorReg"; } def VectorReg128AsmOperand : AsmOperandClass { let Name = "VectorReg128"; - let PredicateMethod = "isVectorReg"; + let PredicateMethod = "isNeonVectorReg"; } def V64 : RegisterOperand<FPR64, "printVRegOperand"> { @@ -475,7 +481,10 @@ def V128 : RegisterOperand<FPR128, "printVRegOperand"> { let ParserMatchClass = VectorReg128AsmOperand; } -def VectorRegLoAsmOperand : AsmOperandClass { let Name = "VectorRegLo"; } +def VectorRegLoAsmOperand : AsmOperandClass { + let Name = "VectorRegLo"; + let PredicateMethod = "isNeonVectorRegLo"; +} def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> { let ParserMatchClass = VectorRegLoAsmOperand; } @@ -642,3 +651,119 @@ def XSeqPairClassOperand : //===----- END: v8.1a atomic CASP register operands -----------------------===// + +// The part of SVE registers that don't overlap Neon registers. +// These are only used as part of clobber lists. +def Z0_HI : AArch64Reg<0, "z0_hi">; +def Z1_HI : AArch64Reg<1, "z1_hi">; +def Z2_HI : AArch64Reg<2, "z2_hi">; +def Z3_HI : AArch64Reg<3, "z3_hi">; +def Z4_HI : AArch64Reg<4, "z4_hi">; +def Z5_HI : AArch64Reg<5, "z5_hi">; +def Z6_HI : AArch64Reg<6, "z6_hi">; +def Z7_HI : AArch64Reg<7, "z7_hi">; +def Z8_HI : AArch64Reg<8, "z8_hi">; +def Z9_HI : AArch64Reg<9, "z9_hi">; +def Z10_HI : AArch64Reg<10, "z10_hi">; +def Z11_HI : AArch64Reg<11, "z11_hi">; +def Z12_HI : AArch64Reg<12, "z12_hi">; +def Z13_HI : AArch64Reg<13, "z13_hi">; +def Z14_HI : AArch64Reg<14, "z14_hi">; +def Z15_HI : AArch64Reg<15, "z15_hi">; +def Z16_HI : AArch64Reg<16, "z16_hi">; +def Z17_HI : AArch64Reg<17, "z17_hi">; +def Z18_HI : AArch64Reg<18, "z18_hi">; +def Z19_HI : AArch64Reg<19, "z19_hi">; +def Z20_HI : AArch64Reg<20, "z20_hi">; +def Z21_HI : AArch64Reg<21, "z21_hi">; +def Z22_HI : AArch64Reg<22, "z22_hi">; +def Z23_HI : AArch64Reg<23, "z23_hi">; +def Z24_HI : AArch64Reg<24, "z24_hi">; +def Z25_HI : AArch64Reg<25, "z25_hi">; +def Z26_HI : AArch64Reg<26, "z26_hi">; +def Z27_HI : AArch64Reg<27, "z27_hi">; +def Z28_HI : AArch64Reg<28, "z28_hi">; +def Z29_HI : AArch64Reg<29, "z29_hi">; +def Z30_HI : AArch64Reg<30, "z30_hi">; +def Z31_HI : AArch64Reg<31, "z31_hi">; + +// SVE variable-size vector registers +let SubRegIndices = [zsub,zsub_hi] in { +def Z0 : AArch64Reg<0, "z0", [Q0, Z0_HI]>, DwarfRegNum<[96]>; +def Z1 : AArch64Reg<1, "z1", [Q1, Z1_HI]>, DwarfRegNum<[97]>; +def Z2 : AArch64Reg<2, "z2", [Q2, Z2_HI]>, DwarfRegNum<[98]>; +def Z3 : AArch64Reg<3, "z3", [Q3, Z3_HI]>, DwarfRegNum<[99]>; +def Z4 : AArch64Reg<4, "z4", [Q4, Z4_HI]>, DwarfRegNum<[100]>; +def Z5 : AArch64Reg<5, "z5", [Q5, Z5_HI]>, DwarfRegNum<[101]>; +def Z6 : AArch64Reg<6, "z6", [Q6, Z6_HI]>, DwarfRegNum<[102]>; +def Z7 : AArch64Reg<7, "z7", [Q7, Z7_HI]>, DwarfRegNum<[103]>; +def Z8 : AArch64Reg<8, "z8", [Q8, Z8_HI]>, DwarfRegNum<[104]>; +def Z9 : AArch64Reg<9, "z9", [Q9, Z9_HI]>, DwarfRegNum<[105]>; +def Z10 : AArch64Reg<10, "z10", [Q10, Z10_HI]>, DwarfRegNum<[106]>; +def Z11 : AArch64Reg<11, "z11", [Q11, Z11_HI]>, DwarfRegNum<[107]>; +def Z12 : AArch64Reg<12, "z12", [Q12, Z12_HI]>, DwarfRegNum<[108]>; +def Z13 : AArch64Reg<13, "z13", [Q13, Z13_HI]>, DwarfRegNum<[109]>; +def Z14 : AArch64Reg<14, "z14", [Q14, Z14_HI]>, DwarfRegNum<[110]>; +def Z15 : AArch64Reg<15, "z15", [Q15, Z15_HI]>, DwarfRegNum<[111]>; +def Z16 : AArch64Reg<16, "z16", [Q16, Z16_HI]>, DwarfRegNum<[112]>; +def Z17 : AArch64Reg<17, "z17", [Q17, Z17_HI]>, DwarfRegNum<[113]>; +def Z18 : AArch64Reg<18, "z18", [Q18, Z18_HI]>, DwarfRegNum<[114]>; +def Z19 : AArch64Reg<19, "z19", [Q19, Z19_HI]>, DwarfRegNum<[115]>; +def Z20 : AArch64Reg<20, "z20", [Q20, Z20_HI]>, DwarfRegNum<[116]>; +def Z21 : AArch64Reg<21, "z21", [Q21, Z21_HI]>, DwarfRegNum<[117]>; +def Z22 : AArch64Reg<22, "z22", [Q22, Z22_HI]>, DwarfRegNum<[118]>; +def Z23 : AArch64Reg<23, "z23", [Q23, Z23_HI]>, DwarfRegNum<[119]>; +def Z24 : AArch64Reg<24, "z24", [Q24, Z24_HI]>, DwarfRegNum<[120]>; +def Z25 : AArch64Reg<25, "z25", [Q25, Z25_HI]>, DwarfRegNum<[121]>; +def Z26 : AArch64Reg<26, "z26", [Q26, Z26_HI]>, DwarfRegNum<[122]>; +def Z27 : AArch64Reg<27, "z27", [Q27, Z27_HI]>, DwarfRegNum<[123]>; +def Z28 : AArch64Reg<28, "z28", [Q28, Z28_HI]>, DwarfRegNum<[124]>; +def Z29 : AArch64Reg<29, "z29", [Q29, Z29_HI]>, DwarfRegNum<[125]>; +def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>; +def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>; +} + +class SVERegOp <string Suffix, AsmOperandClass C, + RegisterClass RC> : RegisterOperand<RC> { + let PrintMethod = !if(!eq(Suffix, ""), + "printSVERegOp<>", + "printSVERegOp<'" # Suffix # "'>"); + let ParserMatchClass = C; +} + +class ZPRRegOp <string Suffix, AsmOperandClass C, + RegisterClass RC> : SVERegOp<Suffix, C, RC> {} + +//****************************************************************************** + +// SVE vector register class +def ZPR : RegisterClass<"AArch64", + [nxv16i8, nxv8i16, nxv4i32, nxv2i64, + nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, + nxv1f64, nxv2f64], + 128, (sequence "Z%u", 0, 31)> { + let Size = 128; +} + +class ZPRAsmOperand <string name, int Width>: AsmOperandClass { + let Name = "SVE" # name # "Reg"; + let PredicateMethod = "isSVEDataVectorRegOfWidth<" # Width # ">"; + let RenderMethod = "addRegOperands"; + let ParserMethod = "tryParseSVEDataVector<" + # !if(!eq(Width, -1), "false", "true") # ">"; +} + +def ZPRAsmOpAny : ZPRAsmOperand<"VectorAny", -1>; +def ZPRAsmOp8 : ZPRAsmOperand<"VectorB", 8>; +def ZPRAsmOp16 : ZPRAsmOperand<"VectorH", 16>; +def ZPRAsmOp32 : ZPRAsmOperand<"VectorS", 32>; +def ZPRAsmOp64 : ZPRAsmOperand<"VectorD", 64>; +def ZPRAsmOp128 : ZPRAsmOperand<"VectorQ", 128>; + +def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ZPR>; +def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ZPR>; +def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ZPR>; +def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ZPR>; +def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ZPR>; +def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ZPR>; diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td new file mode 100644 index 00000000000..7da0b28d22d --- /dev/null +++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -0,0 +1,17 @@ +//=- AArch64SVEInstrInfo.td - AArch64 SVE Instructions -*- tablegen -*-----=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AArch64 Scalable Vector Extension (SVE) Instruction definitions. +// +//===----------------------------------------------------------------------===// + +let Predicates = [HasSVE] in { + defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add">; + defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub">; +} diff --git a/lib/Target/AArch64/AArch64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td index 18d000ace94..90ebd78f4ab 100644 --- a/lib/Target/AArch64/AArch64SchedA53.td +++ b/lib/Target/AArch64/AArch64SchedA53.td @@ -26,6 +26,8 @@ def CortexA53Model : SchedMachineModel { // Specification - Instruction Timings" // v 1.0 Spreadsheet let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = [HasSVE]; } diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td index 5d1608ef04a..ade03f23f8c 100644 --- a/lib/Target/AArch64/AArch64SchedA57.td +++ b/lib/Target/AArch64/AArch64SchedA57.td @@ -31,6 +31,8 @@ def CortexA57Model : SchedMachineModel { // experiments and benchmarking data. let LoopMicroOpBufferSize = 16; let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = [HasSVE]; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64SchedCyclone.td b/lib/Target/AArch64/AArch64SchedCyclone.td index 9fd3ae6818e..7a474ba8ef9 100644 --- a/lib/Target/AArch64/AArch64SchedCyclone.td +++ b/lib/Target/AArch64/AArch64SchedCyclone.td @@ -18,6 +18,8 @@ def CycloneModel : SchedMachineModel { let LoadLatency = 4; // Optimistic load latency. let MispredictPenalty = 16; // 14-19 cycles are typical. let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = [HasSVE]; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64SchedFalkor.td b/lib/Target/AArch64/AArch64SchedFalkor.td index 44fd94fc3d4..7277198b585 100644 --- a/lib/Target/AArch64/AArch64SchedFalkor.td +++ b/lib/Target/AArch64/AArch64SchedFalkor.td @@ -23,6 +23,8 @@ def FalkorModel : SchedMachineModel { let LoadLatency = 3; // Optimistic load latency. let MispredictPenalty = 11; // Minimum branch misprediction penalty. let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = [HasSVE]; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64SchedKryo.td b/lib/Target/AArch64/AArch64SchedKryo.td index 4e491a04c78..ce2afd499af 100644 --- a/lib/Target/AArch64/AArch64SchedKryo.td +++ b/lib/Target/AArch64/AArch64SchedKryo.td @@ -27,6 +27,8 @@ def KryoModel : SchedMachineModel { // experiments and benchmarking data. let LoopMicroOpBufferSize = 16; let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = [HasSVE]; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64SchedM1.td b/lib/Target/AArch64/AArch64SchedM1.td index 6133efed020..6c86fcdd29b 100644 --- a/lib/Target/AArch64/AArch64SchedM1.td +++ b/lib/Target/AArch64/AArch64SchedM1.td @@ -24,6 +24,8 @@ def ExynosM1Model : SchedMachineModel { let LoadLatency = 4; // Optimistic load cases. let MispredictPenalty = 14; // Minimum branch misprediction penalty. let CompleteModel = 1; // Use the default model otherwise. + + list<Predicate> UnsupportedFeatures = [HasSVE]; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64SchedThunderX.td b/lib/Target/AArch64/AArch64SchedThunderX.td index 3cdd2047fbb..585688aae27 100644 --- a/lib/Target/AArch64/AArch64SchedThunderX.td +++ b/lib/Target/AArch64/AArch64SchedThunderX.td @@ -25,6 +25,8 @@ def ThunderXT8XModel : SchedMachineModel { let MispredictPenalty = 8; // Branch mispredict penalty. let PostRAScheduler = 1; // Use PostRA scheduler. let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = [HasSVE]; } // Modeling each pipeline with BufferSize == 0 since T8X is in-order. diff --git a/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/lib/Target/AArch64/AArch64SchedThunderX2T99.td index 4ab7555594a..fd60459382a 100644 --- a/lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ b/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -25,6 +25,8 @@ def ThunderX2T99Model : SchedMachineModel { let LoopMicroOpBufferSize = 32; let PostRAScheduler = 1; // Using PostRA sched. let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = [HasSVE]; } // Define the issue ports. diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp index fe984ccbaf1..78fc322158b 100644 --- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp +++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp @@ -16,10 +16,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/Target/AArch64/AArch64VectorByElementOpt.cpp b/lib/Target/AArch64/AArch64VectorByElementOpt.cpp index f53af2315ec..2ff644d2bcd 100644 --- a/lib/Target/AArch64/AArch64VectorByElementOpt.cpp +++ b/lib/Target/AArch64/AArch64VectorByElementOpt.cpp @@ -33,11 +33,11 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSchedule.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <map> diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 1f06d4065b3..de048a24534 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -59,12 +59,14 @@ using namespace llvm; namespace { +enum class RegKind {Scalar, NeonVector, SVEDataVector}; + class AArch64AsmParser : public MCTargetAsmParser { private: StringRef Mnemonic; ///< Instruction mnemonic. // Map of register aliases registers via the .req directive. - StringMap<std::pair<bool, unsigned>> RegisterReqs; + StringMap<std::pair<RegKind, unsigned>> RegisterReqs; AArch64TargetStreamer &getTargetStreamer() { MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); @@ -77,9 +79,10 @@ private: void createSysAlias(uint16_t Encoding, OperandVector &Operands, SMLoc S); AArch64CC::CondCode parseCondCodeString(StringRef Cond); bool parseCondCode(OperandVector &Operands, bool invertCondCode); - unsigned matchRegisterNameAlias(StringRef Name, bool isVector); + unsigned matchRegisterNameAlias(StringRef Name, RegKind Kind); int tryParseRegister(); int tryMatchVectorRegister(StringRef &Kind, bool expected); + int tryParseSVEDataVectorRegister(const AsmToken &Tok, StringRef &Kind); bool parseRegister(OperandVector &Operands); bool parseSymbolicImmVal(const MCExpr *&ImmVal); bool parseVectorList(OperandVector &Operands); @@ -126,8 +129,10 @@ private: OperandMatchResultTy tryParseFPImm(OperandVector &Operands); OperandMatchResultTy tryParseAddSubImm(OperandVector &Operands); OperandMatchResultTy tryParseGPR64sp0Operand(OperandVector &Operands); - bool tryParseVectorRegister(OperandVector &Operands); + bool tryParseNeonVectorRegister(OperandVector &Operands); OperandMatchResultTy tryParseGPRSeqPair(OperandVector &Operands); + template <bool ParseSuffix> + OperandMatchResultTy tryParseSVEDataVector(OperandVector &Operands); public: enum AArch64MatchResultTy { @@ -194,7 +199,9 @@ private: struct RegOp { unsigned RegNum; - bool isVector; + RegKind Kind; + + int ElementWidth; }; struct VectorListOp { @@ -804,34 +811,50 @@ public: return SysReg.PStateField != -1U; } - bool isReg() const override { return Kind == k_Register && !Reg.isVector; } - bool isVectorReg() const { return Kind == k_Register && Reg.isVector; } + bool isReg() const override { + return Kind == k_Register && Reg.Kind == RegKind::Scalar; + } + + bool isNeonVectorReg() const { + return Kind == k_Register && Reg.Kind == RegKind::NeonVector; + } - bool isVectorRegLo() const { - return Kind == k_Register && Reg.isVector && + bool isNeonVectorRegLo() const { + return Kind == k_Register && Reg.Kind == RegKind::NeonVector && AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains( Reg.RegNum); } + template <unsigned Class = AArch64::ZPRRegClassID> + bool isSVEDataVectorReg() const { + return (Kind == k_Register && Reg.Kind == RegKind::SVEDataVector) && + AArch64MCRegisterClasses[Class].contains(getReg()); + } + + template <int ElementWidth> bool isSVEDataVectorRegOfWidth() const { + return isSVEDataVectorReg() && + (ElementWidth == -1 || Reg.ElementWidth == ElementWidth); + } + bool isGPR32as64() const { - return Kind == k_Register && !Reg.isVector && + return Kind == k_Register && Reg.Kind == RegKind::Scalar && AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum); } bool isWSeqPair() const { - return Kind == k_Register && !Reg.isVector && + return Kind == k_Register && Reg.Kind == RegKind::Scalar && AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID].contains( Reg.RegNum); } bool isXSeqPair() const { - return Kind == k_Register && !Reg.isVector && + return Kind == k_Register && Reg.Kind == RegKind::Scalar && AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID].contains( Reg.RegNum); } bool isGPR64sp0() const { - return Kind == k_Register && !Reg.isVector && + return Kind == k_Register && Reg.Kind == RegKind::Scalar && AArch64MCRegisterClasses[AArch64::GPR64spRegClassID].contains(Reg.RegNum); } @@ -1564,10 +1587,22 @@ public: } static std::unique_ptr<AArch64Operand> - CreateReg(unsigned RegNum, bool isVector, SMLoc S, SMLoc E, MCContext &Ctx) { + CreateReg(unsigned RegNum, RegKind Kind, SMLoc S, SMLoc E, MCContext &Ctx) { auto Op = make_unique<AArch64Operand>(k_Register, Ctx); Op->Reg.RegNum = RegNum; - Op->Reg.isVector = isVector; + Op->Reg.Kind = Kind; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr<AArch64Operand> + CreateReg(unsigned RegNum, RegKind Kind, unsigned ElementWidth, + SMLoc S, SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_Register, Ctx); + Op->Reg.RegNum = RegNum; + Op->Reg.ElementWidth = ElementWidth; + Op->Reg.Kind = Kind; Op->StartLoc = S; Op->EndLoc = E; return Op; @@ -1791,7 +1826,7 @@ static unsigned MatchRegisterName(StringRef Name); /// } -static unsigned matchVectorRegName(StringRef Name) { +static unsigned MatchNeonVectorRegName(StringRef Name) { return StringSwitch<unsigned>(Name.lower()) .Case("v0", AArch64::Q0) .Case("v1", AArch64::Q1) @@ -1853,6 +1888,57 @@ static bool isValidVectorKind(StringRef Name) { .Default(false); } +static unsigned matchSVEDataVectorRegName(StringRef Name) { + return StringSwitch<unsigned>(Name.lower()) + .Case("z0", AArch64::Z0) + .Case("z1", AArch64::Z1) + .Case("z2", AArch64::Z2) + .Case("z3", AArch64::Z3) + .Case("z4", AArch64::Z4) + .Case("z5", AArch64::Z5) + .Case("z6", AArch64::Z6) + .Case("z7", AArch64::Z7) + .Case("z8", AArch64::Z8) + .Case("z9", AArch64::Z9) + .Case("z10", AArch64::Z10) + .Case("z11", AArch64::Z11) + .Case("z12", AArch64::Z12) + .Case("z13", AArch64::Z13) + .Case("z14", AArch64::Z14) + .Case("z15", AArch64::Z15) + .Case("z16", AArch64::Z16) + .Case("z17", AArch64::Z17) + .Case("z18", AArch64::Z18) + .Case("z19", AArch64::Z19) + .Case("z20", AArch64::Z20) + .Case("z21", AArch64::Z21) + .Case("z22", AArch64::Z22) + .Case("z23", AArch64::Z23) + .Case("z24", AArch64::Z24) + .Case("z25", AArch64::Z25) + .Case("z26", AArch64::Z26) + .Case("z27", AArch64::Z27) + .Case("z28", AArch64::Z28) + .Case("z29", AArch64::Z29) + .Case("z30", AArch64::Z30) + .Case("z31", AArch64::Z31) + .Default(0); +} + +static bool isValidSVEKind(StringRef Name) { + return StringSwitch<bool>(Name.lower()) + .Case(".b", true) + .Case(".h", true) + .Case(".s", true) + .Case(".d", true) + .Case(".q", true) + .Default(false); +} + +static bool isSVEDataVectorRegister(StringRef Name) { + return Name[0] == 'z'; +} + static void parseValidVectorKind(StringRef Name, unsigned &NumElements, char &ElementKind) { assert(isValidVectorKind(Name)); @@ -1881,19 +1967,30 @@ bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, // Matches a register name or register alias previously defined by '.req' unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name, - bool isVector) { - unsigned RegNum = isVector ? matchVectorRegName(Name) - : MatchRegisterName(Name); + RegKind Kind) { + unsigned RegNum; + switch (Kind) { + case RegKind::Scalar: + RegNum = MatchRegisterName(Name); + break; + case RegKind::NeonVector: + RegNum = MatchNeonVectorRegName(Name); + break; + case RegKind::SVEDataVector: + RegNum = matchSVEDataVectorRegName(Name); + break; + } - if (RegNum == 0) { + if (!RegNum) { // Check for aliases registered via .req. Canonicalize to lower case. // That's more consistent since register names are case insensitive, and // it's how the original entry was passed in from MC/MCParser/AsmParser. auto Entry = RegisterReqs.find(Name.lower()); if (Entry == RegisterReqs.end()) return 0; + // set RegNum if the match is the right kind of register - if (isVector == Entry->getValue().first) + if (Kind == Entry->getValue().first) RegNum = Entry->getValue().second; } return RegNum; @@ -1909,7 +2006,10 @@ int AArch64AsmParser::tryParseRegister() { return -1; std::string lowerCase = Tok.getString().lower(); - unsigned RegNum = matchRegisterNameAlias(lowerCase, false); + if (isSVEDataVectorRegister(lowerCase)) + return -1; + + unsigned RegNum = matchRegisterNameAlias(lowerCase, RegKind::Scalar); // Also handle a few aliases of registers. if (RegNum == 0) RegNum = StringSwitch<unsigned>(lowerCase) @@ -1940,7 +2040,7 @@ int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) { // a '.'. size_t Start = 0, Next = Name.find('.'); StringRef Head = Name.slice(Start, Next); - unsigned RegNum = matchRegisterNameAlias(Head, true); + unsigned RegNum = matchRegisterNameAlias(Head, RegKind::NeonVector); if (RegNum) { if (Next != StringRef::npos) { @@ -2559,8 +2659,8 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) { return MatchOperand_Success; } -/// tryParseVectorRegister - Parse a vector register operand. -bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) { +/// tryParseNeonVectorRegister - Parse a vector register operand. +bool AArch64AsmParser::tryParseNeonVectorRegister(OperandVector &Operands) { MCAsmParser &Parser = getParser(); if (Parser.getTok().isNot(AsmToken::Identifier)) return true; @@ -2572,7 +2672,9 @@ bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) { if (Reg == -1) return true; Operands.push_back( - AArch64Operand::CreateReg(Reg, true, S, getLoc(), getContext())); + AArch64Operand::CreateReg(Reg, RegKind::NeonVector, S, getLoc(), + getContext())); + // If there was an explicit qualifier, that goes on as a literal text // operand. if (!Kind.empty()) @@ -2603,19 +2705,48 @@ bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) { return false; } +// tryParseSVEDataVectorRegister - Try to parse a SVE vector register name with +// optional kind specifier. If it is a register specifier, eat the token +// and return it. +int AArch64AsmParser::tryParseSVEDataVectorRegister(const AsmToken &Tok, + StringRef &Kind) { + if (Tok.isNot(AsmToken::Identifier)) + return -1; + + StringRef Name = Tok.getString(); + // If there is a kind specifier, it's separated from the register name by + // a '.'. + size_t Start = 0, Next = Name.find('.'); + StringRef Head = Name.slice(Start, Next); + unsigned RegNum = matchRegisterNameAlias(Head, RegKind::SVEDataVector); + + if (RegNum) { + if (Next != StringRef::npos) { + Kind = Name.slice(Next, StringRef::npos); + if (!isValidSVEKind(Kind)) { + TokError("invalid sve vector kind qualifier"); + return -1; + } + } + return RegNum; + } + + return -1; +} + /// parseRegister - Parse a non-vector register operand. bool AArch64AsmParser::parseRegister(OperandVector &Operands) { SMLoc S = getLoc(); - // Try for a vector register. - if (!tryParseVectorRegister(Operands)) + // Try for a vector (neon) register. + if (!tryParseNeonVectorRegister(Operands)) return false; // Try for a scalar register. int64_t Reg = tryParseRegister(); if (Reg == -1) return true; - Operands.push_back( - AArch64Operand::CreateReg(Reg, false, S, getLoc(), getContext())); + Operands.push_back(AArch64Operand::CreateReg(Reg, RegKind::Scalar, S, + getLoc(), getContext())); return false; } @@ -2783,7 +2914,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) { if (!Tok.is(AsmToken::Identifier)) return MatchOperand_NoMatch; - unsigned RegNum = matchRegisterNameAlias(Tok.getString().lower(), false); + unsigned RegNum = matchRegisterNameAlias(Tok.getString().lower(), RegKind::Scalar); MCContext &Ctx = getContext(); const MCRegisterInfo *RI = Ctx.getRegisterInfo(); @@ -2795,7 +2926,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) { if (!parseOptionalToken(AsmToken::Comma)) { Operands.push_back( - AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx)); + AArch64Operand::CreateReg(RegNum, RegKind::Scalar, S, getLoc(), Ctx)); return MatchOperand_Success; } @@ -2814,7 +2945,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) { } Operands.push_back( - AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx)); + AArch64Operand::CreateReg(RegNum, RegKind::Scalar, S, getLoc(), Ctx)); return MatchOperand_Success; } @@ -3529,8 +3660,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, Operands[0] = AArch64Operand::CreateToken( "bfm", false, Op.getStartLoc(), getContext()); Operands[2] = AArch64Operand::CreateReg( - RegWidth == 32 ? AArch64::WZR : AArch64::XZR, false, SMLoc(), - SMLoc(), getContext()); + RegWidth == 32 ? AArch64::WZR : AArch64::XZR, RegKind::Scalar, + SMLoc(), SMLoc(), getContext()); Operands[3] = AArch64Operand::CreateImm( ImmRExpr, LSBOp.getStartLoc(), LSBOp.getEndLoc(), getContext()); Operands.emplace_back( @@ -3666,8 +3797,9 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]); if (Op.isReg()) { unsigned Reg = getXRegFromWReg(Op.getReg()); - Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(), - Op.getEndLoc(), getContext()); + Operands[2] = AArch64Operand::CreateReg(Reg, RegKind::Scalar, + Op.getStartLoc(), Op.getEndLoc(), + getContext()); } } // FIXME: Likewise for sxt[bh] with a Xd dst operand @@ -3681,7 +3813,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]); if (Op.isReg()) { unsigned Reg = getXRegFromWReg(Op.getReg()); - Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(), + Operands[2] = AArch64Operand::CreateReg(Reg, RegKind::Scalar, + Op.getStartLoc(), Op.getEndLoc(), getContext()); } } @@ -3697,7 +3830,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]); if (Op.isReg()) { unsigned Reg = getWRegFromXReg(Op.getReg()); - Operands[1] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(), + Operands[1] = AArch64Operand::CreateReg(Reg, RegKind::Scalar, + Op.getStartLoc(), Op.getEndLoc(), getContext()); } } @@ -4158,14 +4292,25 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { Parser.Lex(); // Eat the '.req' token. SMLoc SRegLoc = getLoc(); unsigned RegNum = tryParseRegister(); - bool IsVector = false; + RegKind RegisterKind = RegKind::Scalar; if (RegNum == static_cast<unsigned>(-1)) { StringRef Kind; + RegisterKind = RegKind::NeonVector; RegNum = tryMatchVectorRegister(Kind, false); if (!Kind.empty()) return Error(SRegLoc, "vector register without type specifier expected"); - IsVector = true; + } + + if (RegNum == static_cast<unsigned>(-1)) { + StringRef Kind; + RegisterKind = RegKind::SVEDataVector; + int RegNumTmp = tryParseSVEDataVectorRegister(Parser.getTok(), Kind); + if (RegNumTmp != -1) + Parser.Lex(); + RegNum = RegNumTmp; + if (!Kind.empty()) + return Error(SRegLoc, "sve vector register without type specifier expected"); } if (RegNum == static_cast<unsigned>(-1)) @@ -4176,7 +4321,7 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { "unexpected input in .req directive")) return true; - auto pair = std::make_pair(IsVector, RegNum); + auto pair = std::make_pair(RegisterKind, RegNum); if (RegisterReqs.insert(std::make_pair(Name, pair)).first->second != pair) Warning(L, "ignoring redefinition of register alias '" + Name + "'"); @@ -4388,8 +4533,43 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) { &AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID]); } - Operands.push_back(AArch64Operand::CreateReg(Pair, false, S, getLoc(), - getContext())); + Operands.push_back(AArch64Operand::CreateReg(Pair, RegKind::Scalar, S, + getLoc(), getContext())); + + return MatchOperand_Success; +} + +template <bool ParseSuffix> +OperandMatchResultTy +AArch64AsmParser::tryParseSVEDataVector(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + const SMLoc S = getLoc(); + // Check for a SVE vector register specifier first. + StringRef Kind; + int RegNum = tryParseSVEDataVectorRegister(Parser.getTok(), Kind); + if (RegNum == -1) + return MatchOperand_NoMatch; + + // Eat the SVE Register Token + Parser.Lex(); + + if (ParseSuffix && Kind.empty()) + return MatchOperand_NoMatch; + + unsigned ElementWidth = StringSwitch<unsigned>(Kind.lower()) + .Case("", -1) + .Case(".b", 8) + .Case(".h", 16) + .Case(".s", 32) + .Case(".d", 64) + .Case(".q", 128) + .Default(0); + if (!ElementWidth) + return MatchOperand_NoMatch; + + Operands.push_back( + AArch64Operand::CreateReg(RegNum, RegKind::SVEDataVector, ElementWidth, + S, S, getContext())); return MatchOperand_Success; } diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 73db32c0487..aea1b4f2d2c 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -85,6 +85,9 @@ static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decode); static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm, uint64_t Address, @@ -436,6 +439,27 @@ static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo, Inst.addOperand(MCOperand::createReg(Register)); return Success; } +static const unsigned ZPRDecoderTable[] = { + AArch64::Z0, AArch64::Z1, AArch64::Z2, AArch64::Z3, + AArch64::Z4, AArch64::Z5, AArch64::Z6, AArch64::Z7, + AArch64::Z8, AArch64::Z9, AArch64::Z10, AArch64::Z11, + AArch64::Z12, AArch64::Z13, AArch64::Z14, AArch64::Z15, + AArch64::Z16, AArch64::Z17, AArch64::Z18, AArch64::Z19, + AArch64::Z20, AArch64::Z21, AArch64::Z22, AArch64::Z23, + AArch64::Z24, AArch64::Z25, AArch64::Z26, AArch64::Z27, + AArch64::Z28, AArch64::Z29, AArch64::Z30, AArch64::Z31 +}; + +static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void* Decoder) { + if (RegNo > 31) + return Fail; + + unsigned Register = ZPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Register)); + return Success; +} static const unsigned VectorDecoderTable[] = { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 62e5d02f603..bdf71b095fd 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -1340,3 +1340,23 @@ void AArch64InstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo, O << "#" << (Val * Angle) + Remainder; } +template <char suffix> +void AArch64InstPrinter::printSVERegOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + switch (suffix) { + case 0: + case 'b': + case 'h': + case 's': + case 'd': + case 'q': + break; + default: llvm_unreachable("Invalid kind specifier."); + } + + unsigned Reg = MI->getOperand(OpNum).getReg(); + O << getRegisterName(Reg); + if (suffix != 0) + O << '.' << suffix; +}
\ No newline at end of file diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 8515ad24c71..76f20f042ce 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -165,6 +165,9 @@ protected: void printGPRSeqPairsClassOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + template <char = 0> + void printSVERegOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); }; class AArch64AppleInstPrinter : public AArch64InstPrinter { diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index 7fba4849438..c5da457c38f 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -106,13 +106,15 @@ AArch64MCAsmInfoCOFF::AArch64MCAsmInfoCOFF() { PrivateLabelPrefix = ".L"; AlignmentIsInBytes = false; SupportsDebugInformation = true; - ExceptionsType = ExceptionHandling::WinEH; + CodePointerSize = 8; } AArch64MCAsmInfoMicrosoftCOFF::AArch64MCAsmInfoMicrosoftCOFF() { CommentString = ";"; + ExceptionsType = ExceptionHandling::WinEH; } AArch64MCAsmInfoGNUCOFF::AArch64MCAsmInfoGNUCOFF() { CommentString = "//"; + ExceptionsType = ExceptionHandling::DwarfCFI; } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp index 9d0f39e5f6a..c88363d2c25 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp @@ -23,7 +23,15 @@ public: std::unique_ptr<MCCodeEmitter> CE, raw_pwrite_stream &OS) : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), OS) {} + + void FinishImpl() override; }; + +void AArch64WinCOFFStreamer::FinishImpl() { + EmitFrames(nullptr); + + MCWinCOFFStreamer::FinishImpl(); +} } // end anonymous namespace namespace llvm { diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td new file mode 100644 index 00000000000..e74bab8b7fe --- /dev/null +++ b/lib/Target/AArch64/SVEInstrFormats.td @@ -0,0 +1,41 @@ +//=-- SVEInstrFormats.td - AArch64 SVE Instruction classes -*- tablegen -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AArch64 Scalable Vector Extension (SVE) Instruction Class Definitions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SVE Integer Arithmetic - Unpredicated Group +//===----------------------------------------------------------------------===// + +class sve_int_bin_cons_arit_0<bits<2> sz8_64, bits<3> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b000; + let Inst{12-10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm> { + def _B : sve_int_bin_cons_arit_0<0b00, opc, asm, ZPR8>; + def _H : sve_int_bin_cons_arit_0<0b01, opc, asm, ZPR16>; + def _S : sve_int_bin_cons_arit_0<0b10, opc, asm, ZPR32>; + def _D : sve_int_bin_cons_arit_0<0b11, opc, asm, ZPR64>; +} diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 156f7bc6512..b17b6716766 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -400,7 +400,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { return false; FastMathFlags FMF = FPOp->getFastMathFlags(); - bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() || + bool UnsafeDiv = HasUnsafeFPMath || FMF.isFast() || FMF.allowReciprocal(); // With UnsafeDiv node will be optimized to just rcp and mul. diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h index 2329fffd521..91fe921bfee 100644 --- a/lib/Target/AMDGPU/AMDGPUFrameLowering.h +++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.h @@ -15,7 +15,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index c313e4a04ef..f04efd71fa0 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -204,6 +204,7 @@ private: void SelectADD_SUB_I64(SDNode *N); void SelectUADDO_USUBO(SDNode *N); void SelectDIV_SCALE(SDNode *N); + void SelectMAD_64_32(SDNode *N); void SelectFMA_W_CHAIN(SDNode *N); void SelectFMUL_W_CHAIN(SDNode *N); @@ -594,6 +595,11 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SelectDIV_SCALE(N); return; } + case AMDGPUISD::MAD_I64_I32: + case AMDGPUISD::MAD_U64_U32: { + SelectMAD_64_32(N); + return; + } case ISD::CopyToReg: { const SITargetLowering& Lowering = *static_cast<const SITargetLowering*>(getTargetLowering()); @@ -814,6 +820,19 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); } +// We need to handle this here because tablegen doesn't support matching +// instructions with multiple outputs. +void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { + SDLoc SL(N); + bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; + unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32; + + SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + Clamp }; + CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); +} + bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, unsigned OffsetBits) const { if ((OffsetBits == 16 && !isUInt<16>(Offset)) || diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index fe2c9337721..d502b77447d 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -128,27 +128,20 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32); } -bool AMDGPUTargetLowering::isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op) -{ - assert(Op.getOpcode() == ISD::OR); - - SDValue N0 = Op->getOperand(0); - SDValue N1 = Op->getOperand(1); - EVT VT = N0.getValueType(); - - if (VT.isInteger() && !VT.isVector()) { - KnownBits LHSKnown, RHSKnown; - DAG.computeKnownBits(N0, LHSKnown); +unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) { + KnownBits Known; + EVT VT = Op.getValueType(); + DAG.computeKnownBits(Op, Known); - if (LHSKnown.Zero.getBoolValue()) { - DAG.computeKnownBits(N1, RHSKnown); + return VT.getSizeInBits() - Known.countMinLeadingZeros(); +} - if (!(~RHSKnown.Zero & ~LHSKnown.Zero)) - return true; - } - } +unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); - return false; + // In order for this to be a signed 24-bit value, bit 23, must + // be a sign bit. + return VT.getSizeInBits() - DAG.ComputeNumSignBits(Op); } AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, @@ -2615,21 +2608,14 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, //===----------------------------------------------------------------------===// static bool isU24(SDValue Op, SelectionDAG &DAG) { - KnownBits Known; - EVT VT = Op.getValueType(); - DAG.computeKnownBits(Op, Known); - - return (VT.getSizeInBits() - Known.countMinLeadingZeros()) <= 24; + return AMDGPUTargetLowering::numBitsUnsigned(Op, DAG) <= 24; } static bool isI24(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - - // In order for this to be a signed 24-bit value, bit 23, must - // be a sign bit. return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated // as unsigned 24-bit values. - (VT.getSizeInBits() - DAG.ComputeNumSignBits(Op)) < 24; + AMDGPUTargetLowering::numBitsSigned(Op, DAG) < 24; } static bool simplifyI24(SDNode *Node24, unsigned OpIdx, @@ -2914,21 +2900,6 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0)); return DAG.getZExtOrTrunc(Shl, SL, VT); } - case ISD::OR: - if (!isOrEquivalentToAdd(DAG, LHS)) - break; - LLVM_FALLTHROUGH; - case ISD::ADD: { - // shl (or|add x, c2), c1 => or|add (shl x, c1), (c2 << c1) - if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(LHS->getOperand(1))) { - SDValue Shl = DAG.getNode(ISD::SHL, SL, VT, LHS->getOperand(0), - SDValue(RHS, 0)); - SDValue C2V = DAG.getConstant(C2->getAPIntValue() << RHSVal, - SDLoc(C2), VT); - return DAG.getNode(LHS->getOpcode(), SL, VT, Shl, C2V); - } - break; - } } if (VT != MVT::i64) @@ -3946,6 +3917,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(MUL_LOHI_I24) NODE_NAME_CASE(MAD_U24) NODE_NAME_CASE(MAD_I24) + NODE_NAME_CASE(MAD_I64_I32) + NODE_NAME_CASE(MAD_U64_U32) NODE_NAME_CASE(TEXTURE_FETCH) NODE_NAME_CASE(EXPORT) NODE_NAME_CASE(EXPORT_DONE) diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index cdb15186f86..ba35aeb90ed 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -35,7 +35,8 @@ private: SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const; public: - static bool isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op); + static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG); + static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG); protected: const AMDGPUSubtarget *Subtarget; @@ -379,6 +380,8 @@ enum NodeType : unsigned { MULHI_I24, MAD_U24, MAD_I24, + MAD_U64_U32, + MAD_I64_I32, MUL_LOHI_I24, MUL_LOHI_U24, TEXTURE_FETCH, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 41cc7d7093e..f1a42b42f1f 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -18,7 +18,7 @@ #include "AMDGPU.h" #include "Utils/AMDGPUBaseInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "AMDGPUGenInstrInfo.inc" diff --git a/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/lib/Target/AMDGPU/AMDGPULibCalls.cpp index e7e54750fe6..714c60a7446 100644 --- a/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -487,7 +487,7 @@ bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName, bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const { if (auto Op = dyn_cast<FPMathOperator>(CI)) - if (Op->hasUnsafeAlgebra()) + if (Op->isFast()) return true; const Function *F = CI->getParent()->getParent(); Attribute Attr = F->getFnAttribute("unsafe-fp-math"); @@ -1337,7 +1337,8 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, // for OpenCL 2.0 we have only generic implementation of sincos // function. AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo); - nf.getLeads()[0].PtrKind = AMDGPULibFunc::GENERIC; + const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*M); + nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AS.FLAT_ADDRESS); Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf)); if (!Fsincos) return false; @@ -1350,7 +1351,6 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, // The allocaInst allocates the memory in private address space. This need // to be bitcasted to point to the address space of cos pointer type. // In OpenCL 2.0 this is generic, while in 1.2 that is private. - const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*M); if (PTy->getPointerAddressSpace() != AS.PRIVATE_ADDRESS) P = B.CreateAddrSpaceCast(Alloc, PTy); CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P); diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/lib/Target/AMDGPU/AMDGPULibFunc.cpp index 919e8c1e13c..4671273d61f 100644 --- a/lib/Target/AMDGPU/AMDGPULibFunc.cpp +++ b/lib/Target/AMDGPU/AMDGPULibFunc.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "AMDGPU.h" #include "AMDGPULibFunc.h" #include <llvm/ADT/SmallString.h> #include <llvm/ADT/SmallVector.h> @@ -458,13 +459,16 @@ AMDGPULibFunc::Param ParamIterator::getNextParam() { P.ArgType = AMDGPULibFunc::I32; break; - case E_CONSTPTR_SWAPGL: - switch (P.PtrKind & AMDGPULibFunc::ADDR_SPACE) { - case AMDGPULibFunc::GLOBAL: P.PtrKind = AMDGPULibFunc::LOCAL; break; - case AMDGPULibFunc::LOCAL: P.PtrKind = AMDGPULibFunc::GLOBAL; break; + case E_CONSTPTR_SWAPGL: { + unsigned AS = AMDGPULibFunc::getAddrSpaceFromEPtrKind(P.PtrKind); + switch (AS) { + case AMDGPUAS::GLOBAL_ADDRESS: AS = AMDGPUAS::LOCAL_ADDRESS; break; + case AMDGPUAS::LOCAL_ADDRESS: AS = AMDGPUAS::GLOBAL_ADDRESS; break; } + P.PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AS); P.PtrKind |= AMDGPULibFunc::CONST; break; + } default: llvm_unreachable("Unhandeled param rule"); } @@ -590,19 +594,14 @@ bool ItaniumParamParser::parseItaniumParam(StringRef& param, if (eatTerm(param, 'P')) { if (eatTerm(param, 'K')) res.PtrKind |= AMDGPULibFunc::CONST; if (eatTerm(param, 'V')) res.PtrKind |= AMDGPULibFunc::VOLATILE; + unsigned AS; if (!eatTerm(param, "U3AS")) { - res.PtrKind |= AMDGPULibFunc::PRIVATE; + AS = 0; } else { - switch(param.front()) { - case '1': res.PtrKind |= AMDGPULibFunc::GLOBAL; break; - case '2': res.PtrKind |= AMDGPULibFunc::READONLY;break; - case '3': res.PtrKind |= AMDGPULibFunc::LOCAL; break; - case '4': res.PtrKind |= AMDGPULibFunc::GENERIC; break; - case '5': res.PtrKind |= AMDGPULibFunc::OTHER; break; - default: return false; - } + AS = param.front() - '0'; drop_front(param, 1); } + res.PtrKind |= AMDGPULibFuncBase::getEPtrKindFromAddrSpace(AS); } else { res.PtrKind = AMDGPULibFunc::BYVALUE; } @@ -837,7 +836,9 @@ public: os << 'P'; if (p.PtrKind & AMDGPULibFunc::CONST) os << 'K'; if (p.PtrKind & AMDGPULibFunc::VOLATILE) os << 'V'; - int AS = UseAddrSpace ? (p.PtrKind & AMDGPULibFunc::ADDR_SPACE)-1 : 0; + unsigned AS = UseAddrSpace + ? AMDGPULibFuncBase::getAddrSpaceFromEPtrKind(p.PtrKind) + : 0; if (AS != 0) os << "U3AS" << AS; Ptr = p; p.PtrKind = 0; diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.h b/lib/Target/AMDGPU/AMDGPULibFunc.h index 8f4297b4a49..5405bc64571 100644 --- a/lib/Target/AMDGPU/AMDGPULibFunc.h +++ b/lib/Target/AMDGPU/AMDGPULibFunc.h @@ -283,14 +283,7 @@ public: enum EPtrKind { BYVALUE = 0, - PRIVATE, - GLOBAL, - READONLY, - LOCAL, - GENERIC, - OTHER, - - ADDR_SPACE = 0xF, + ADDR_SPACE = 0xF, // Address space takes value 0x1 ~ 0xF. CONST = 0x10, VOLATILE = 0x20 }; @@ -315,6 +308,17 @@ public: static bool isMangled(EFuncId Id) { return static_cast<unsigned>(Id) <= static_cast<unsigned>(EI_LAST_MANGLED); } + + static unsigned getEPtrKindFromAddrSpace(unsigned AS) { + assert(((AS + 1) & ~ADDR_SPACE) == 0); + return AS + 1; + } + + static unsigned getAddrSpaceFromEPtrKind(unsigned Kind) { + Kind = Kind & ADDR_SPACE; + assert(Kind >= 1); + return Kind - 1; + } }; class AMDGPULibFuncImpl : public AMDGPULibFuncBase { diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 9fc9592bdc5..83122281d2b 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -23,7 +23,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/MDBuilder.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include <algorithm> using namespace llvm; diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 56a5fa634b5..6ee529c8549 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -462,6 +462,10 @@ public: return isAmdHsaOS() || isMesaKernel(MF); } + bool hasMad64_32() const { + return getGeneration() >= SEA_ISLANDS; + } + bool hasFminFmaxLegacy() const { return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; } diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index f7ecdea7704..14f26f787ab 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -245,6 +245,10 @@ GCNMinRegSchedRegistry("gcn-minreg", static StringRef computeDataLayout(const Triple &TT) { if (TT.getArch() == Triple::r600) { // 32-bit pointers. + if (TT.getEnvironmentName() == "amdgiz" || + TT.getEnvironmentName() == "amdgizcl") + return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" + "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; } diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index d729dcc439e..d1120f5e330 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2134,12 +2134,17 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, } if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee)) { - // FIXME: Remove this hack for function pointer types. - const GlobalValue *GV = GA->getGlobal(); - assert(Callee.getValueType() == MVT::i32); - Callee = DAG.getGlobalAddress(GV, DL, MVT::i64, GA->getOffset(), - false, GA->getTargetFlags()); + // FIXME: Remove this hack for function pointer types after removing + // support of old address space mapping. In the new address space + // mapping the pointer in default address space is 64 bit, therefore + // does not need this hack. + if (Callee.getValueType() == MVT::i32) { + const GlobalValue *GV = GA->getGlobal(); + Callee = DAG.getGlobalAddress(GV, DL, MVT::i64, GA->getOffset(), false, + GA->getTargetFlags()); + } } + assert(Callee.getValueType() == MVT::i64); const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); @@ -5957,18 +5962,57 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, return 0; } +static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL, + EVT VT, + SDValue N0, SDValue N1, SDValue N2, + bool Signed) { + unsigned MadOpc = Signed ? AMDGPUISD::MAD_I64_I32 : AMDGPUISD::MAD_U64_U32; + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i1); + SDValue Mad = DAG.getNode(MadOpc, SL, VTs, N0, N1, N2); + return DAG.getNode(ISD::TRUNCATE, SL, VT, Mad); +} + SDValue SITargetLowering::performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); - - if (VT != MVT::i32) - return SDValue(); - SDLoc SL(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); + if ((LHS.getOpcode() == ISD::MUL || RHS.getOpcode() == ISD::MUL) + && Subtarget->hasMad64_32() && + !VT.isVector() && VT.getScalarSizeInBits() > 32 && + VT.getScalarSizeInBits() <= 64) { + if (LHS.getOpcode() != ISD::MUL) + std::swap(LHS, RHS); + + SDValue MulLHS = LHS.getOperand(0); + SDValue MulRHS = LHS.getOperand(1); + SDValue AddRHS = RHS; + + // TODO: Maybe restrict if SGPR inputs. + if (numBitsUnsigned(MulLHS, DAG) <= 32 && + numBitsUnsigned(MulRHS, DAG) <= 32) { + MulLHS = DAG.getZExtOrTrunc(MulLHS, SL, MVT::i32); + MulRHS = DAG.getZExtOrTrunc(MulRHS, SL, MVT::i32); + AddRHS = DAG.getZExtOrTrunc(AddRHS, SL, MVT::i64); + return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, false); + } + + if (numBitsSigned(MulLHS, DAG) < 32 && numBitsSigned(MulRHS, DAG) < 32) { + MulLHS = DAG.getSExtOrTrunc(MulLHS, SL, MVT::i32); + MulRHS = DAG.getSExtOrTrunc(MulRHS, SL, MVT::i32); + AddRHS = DAG.getSExtOrTrunc(AddRHS, SL, MVT::i64); + return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, true); + } + + return SDValue(); + } + + if (VT != MVT::i32) + return SDValue(); + // add x, zext (setcc) => addcarry x, 0, setcc // add x, sext (setcc) => subcarry x, 0, setcc unsigned Opc = LHS.getOpcode(); diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index ade909cc84e..5dde72910ee 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -14,15 +14,15 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H -#include "AMDGPUMachineFunction.h" #include "AMDGPUArgumentUsageInfo.h" +#include "AMDGPUMachineFunction.h" #include "SIRegisterInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include <array> @@ -87,9 +87,6 @@ public: /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. class SIMachineFunctionInfo final : public AMDGPUMachineFunction { - // FIXME: This should be removed and getPreloadedValue moved here. - friend class SIRegisterInfo; - unsigned TIDReg = AMDGPU::NoRegister; // Registers that may be reserved for spilling purposes. These may be the same @@ -143,7 +140,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { private: unsigned LDSWaveSpillSize = 0; - unsigned ScratchOffsetReg; unsigned NumUserSGPRs = 0; unsigned NumSystemSGPRs = 0; diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td index aa041aab51c..666b80107dc 100644 --- a/lib/Target/AMDGPU/VOP3Instructions.td +++ b/lib/Target/AMDGPU/VOP3Instructions.td @@ -399,8 +399,10 @@ def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_ } // End Constraints = "@earlyclobber $vdst" let isCommutable = 1 in { +let SchedRW = [WriteDouble, WriteSALU] in { def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; +} // End SchedRW = [WriteDouble, WriteSALU] } // End isCommutable = 1 } // End SubtargetPredicate = isCIVI diff --git a/lib/Target/ARC/ARCBranchFinalize.cpp b/lib/Target/ARC/ARCBranchFinalize.cpp index 0fb8a420d86..e5b0f8f3208 100644 --- a/lib/Target/ARC/ARCBranchFinalize.cpp +++ b/lib/Target/ARC/ARCBranchFinalize.cpp @@ -20,8 +20,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetInstrInfo.h" #include <vector> using namespace llvm; diff --git a/lib/Target/ARC/ARCFrameLowering.h b/lib/Target/ARC/ARCFrameLowering.h index ac5378adbd8..c042bec016c 100644 --- a/lib/Target/ARC/ARCFrameLowering.h +++ b/lib/Target/ARC/ARCFrameLowering.h @@ -17,7 +17,7 @@ #include "ARC.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { diff --git a/lib/Target/ARC/ARCInstrInfo.h b/lib/Target/ARC/ARCInstrInfo.h index 5285dce9f12..f965dd4ff7f 100644 --- a/lib/Target/ARC/ARCInstrInfo.h +++ b/lib/Target/ARC/ARCInstrInfo.h @@ -15,7 +15,7 @@ #define LLVM_LIB_TARGET_ARC_ARCINSTRINFO_H #include "ARCRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "ARCGenInstrInfo.inc" diff --git a/lib/Target/ARC/ARCRegisterInfo.cpp b/lib/Target/ARC/ARCRegisterInfo.cpp index 66f95911d3e..bed47a0eab5 100644 --- a/lib/Target/ARC/ARCRegisterInfo.cpp +++ b/lib/Target/ARC/ARCRegisterInfo.cpp @@ -25,7 +25,7 @@ #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 3688db943d5..dac11626a6f 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -53,7 +54,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 9f168acd567..99bcf788ddf 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -21,7 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include <array> #include <cstdint> diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 63b14ee98d7..eaa8d4c0f1a 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -41,7 +42,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index e1323cd9427..9c10a1c79a4 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -417,6 +417,12 @@ struct FormalArgHandler : public IncomingValueHandler { bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef<unsigned> VRegs) const { + auto &TLI = *getTLI<ARMTargetLowering>(); + auto Subtarget = TLI.getSubtarget(); + + if (Subtarget->isThumb()) + return false; + // Quick exit if there aren't any args if (F.arg_empty()) return true; @@ -427,12 +433,6 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, auto &MF = MIRBuilder.getMF(); auto &MBB = MIRBuilder.getMBB(); auto DL = MF.getDataLayout(); - auto &TLI = *getTLI<ARMTargetLowering>(); - - auto Subtarget = TLI.getSubtarget(); - - if (Subtarget->isThumb()) - return false; for (auto &Arg : F.args()) if (!isSupportedType(DL, TLI, Arg.getType())) diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 71b81936240..284b67fd59b 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -19,8 +19,8 @@ #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/CallingConv.h" -#include "llvm/Target/TargetInstrInfo.h" namespace llvm { diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 5c967d9f906..f42d00ecf60 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -43,6 +43,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -72,7 +73,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index ce4add974d6..ab8fbb47086 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugLoc.h" @@ -49,7 +50,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 2c10031e3f8..1f18e2bf80c 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -11,7 +11,7 @@ #define LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include <vector> namespace llvm { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 9cfa0d3a7c3..deece84ecf2 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -57,6 +57,7 @@ #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" @@ -94,7 +95,6 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 309430b0e9c..34186dede0d 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -24,6 +24,54 @@ using namespace llvm; +/// FIXME: The following static functions are SizeChangeStrategy functions +/// that are meant to temporarily mimic the behaviour of the old legalization +/// based on doubling/halving non-legal types as closely as possible. This is +/// not entirly possible as only legalizing the types that are exactly a power +/// of 2 times the size of the legal types would require specifying all those +/// sizes explicitly. +/// In practice, not specifying those isn't a problem, and the below functions +/// should disappear quickly as we add support for legalizing non-power-of-2 +/// sized types further. +static void +addAndInterleaveWithUnsupported(LegalizerInfo::SizeAndActionsVec &result, + const LegalizerInfo::SizeAndActionsVec &v) { + for (unsigned i = 0; i < v.size(); ++i) { + result.push_back(v[i]); + if (i + 1 < v[i].first && i + 1 < v.size() && + v[i + 1].first != v[i].first + 1) + result.push_back({v[i].first + 1, LegalizerInfo::Unsupported}); + } +} + +static LegalizerInfo::SizeAndActionsVec +widen_8_16(const LegalizerInfo::SizeAndActionsVec &v) { + assert(v.size() >= 1); + assert(v[0].first > 17); + LegalizerInfo::SizeAndActionsVec result = { + {1, LegalizerInfo::Unsupported}, + {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported}, + {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported}}; + addAndInterleaveWithUnsupported(result, v); + auto Largest = result.back().first; + result.push_back({Largest + 1, LegalizerInfo::Unsupported}); + return result; +} + +static LegalizerInfo::SizeAndActionsVec +widen_1_8_16(const LegalizerInfo::SizeAndActionsVec &v) { + assert(v.size() >= 1); + assert(v[0].first > 17); + LegalizerInfo::SizeAndActionsVec result = { + {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported}, + {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported}, + {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported}}; + addAndInterleaveWithUnsupported(result, v); + auto Largest = result.back().first; + result.push_back({Largest + 1, LegalizerInfo::Unsupported}); + return result; +} + static bool AEABI(const ARMSubtarget &ST) { return ST.isTargetAEABI() || ST.isTargetGNUAEABI() || ST.isTargetMuslAEABI(); } @@ -49,14 +97,15 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { } for (unsigned Op : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) { - for (auto Ty : {s1, s8, s16}) - setAction({Op, Ty}, WidenScalar); + if (Op != G_ADD) + setLegalizeScalarToDifferentSizeStrategy( + Op, 0, widenToLargerTypesUnsupportedOtherwise); setAction({Op, s32}, Legal); } for (unsigned Op : {G_SDIV, G_UDIV}) { - for (auto Ty : {s8, s16}) - setAction({Op, Ty}, WidenScalar); + setLegalizeScalarToDifferentSizeStrategy(Op, 0, + widenToLargerTypesUnsupportedOtherwise); if (ST.hasDivideInARMMode()) setAction({Op, s32}, Legal); else @@ -64,8 +113,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { } for (unsigned Op : {G_SREM, G_UREM}) { - for (auto Ty : {s8, s16}) - setAction({Op, Ty}, WidenScalar); + setLegalizeScalarToDifferentSizeStrategy(Op, 0, widen_8_16); if (ST.hasDivideInARMMode()) setAction({Op, s32}, Lower); else if (AEABI(ST)) @@ -74,10 +122,8 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({Op, s32}, Libcall); } - for (unsigned Op : {G_SEXT, G_ZEXT}) { + for (unsigned Op : {G_SEXT, G_ZEXT, G_ANYEXT}) { setAction({Op, s32}, Legal); - for (auto Ty : {s1, s8, s16}) - setAction({Op, 1, Ty}, Legal); } for (unsigned Op : {G_ASHR, G_LSHR, G_SHL}) @@ -93,12 +139,11 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_BRCOND, s1}, Legal); setAction({G_CONSTANT, s32}, Legal); - for (auto Ty : {s1, s8, s16}) - setAction({G_CONSTANT, Ty}, WidenScalar); + setLegalizeScalarToDifferentSizeStrategy(G_CONSTANT, 0, widen_1_8_16); setAction({G_ICMP, s1}, Legal); - for (auto Ty : {s8, s16}) - setAction({G_ICMP, 1, Ty}, WidenScalar); + setLegalizeScalarToDifferentSizeStrategy(G_ICMP, 1, + widenToLargerTypesUnsupportedOtherwise); for (auto Ty : {s32, p0}) setAction({G_ICMP, 1, Ty}, Legal); diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 4aa7e150342..7424af9d5a5 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -41,10 +41,12 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Pass.h" @@ -53,8 +55,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/Target/ARM/ARMMacroFusion.cpp b/lib/Target/ARM/ARMMacroFusion.cpp index a34ed2cb5a2..5c9aad417ce 100644 --- a/lib/Target/ARM/ARMMacroFusion.cpp +++ b/lib/Target/ARM/ARMMacroFusion.cpp @@ -15,7 +15,7 @@ #include "ARMMacroFusion.h" #include "ARMSubtarget.h" #include "llvm/CodeGen/MacroFusion.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" namespace llvm { diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 4f330e3a884..f6996f098c0 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" @@ -38,7 +39,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <bitset> diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index d911dd97b1a..a0b98a43108 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCInstrDesc.h" @@ -34,7 +35,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include <algorithm> #include <cassert> #include <cstdint> diff --git a/lib/Target/ARM/ThumbRegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp index 15a56752333..d2bebb9eeec 100644 --- a/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -29,7 +29,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/lib/Target/AVR/AVRFrameLowering.h b/lib/Target/AVR/AVRFrameLowering.h index 30ef441183a..a0ba6c95127 100644 --- a/lib/Target/AVR/AVRFrameLowering.h +++ b/lib/Target/AVR/AVRFrameLowering.h @@ -10,7 +10,7 @@ #ifndef LLVM_AVR_FRAME_LOWERING_H #define LLVM_AVR_FRAME_LOWERING_H -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { diff --git a/lib/Target/AVR/AVRInstrInfo.h b/lib/Target/AVR/AVRInstrInfo.h index eee8a92c619..354edcec346 100644 --- a/lib/Target/AVR/AVRInstrInfo.h +++ b/lib/Target/AVR/AVRInstrInfo.h @@ -14,7 +14,7 @@ #ifndef LLVM_AVR_INSTR_INFO_H #define LLVM_AVR_INSTR_INFO_H -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "AVRRegisterInfo.h" diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp index 7099b29a8bc..b6ac93452cb 100644 --- a/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/lib/Target/AVR/AVRRegisterInfo.cpp @@ -18,7 +18,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/IR/Function.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "AVR.h" #include "AVRInstrInfo.h" diff --git a/lib/Target/BPF/BPFFrameLowering.h b/lib/Target/BPF/BPFFrameLowering.h index 5db963f518b..b4ffa0713fa 100644 --- a/lib/Target/BPF/BPFFrameLowering.h +++ b/lib/Target/BPF/BPFFrameLowering.h @@ -14,7 +14,7 @@ #ifndef LLVM_LIB_TARGET_BPF_BPFFRAMELOWERING_H #define LLVM_LIB_TARGET_BPF_BPFFRAMELOWERING_H -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { class BPFSubtarget; diff --git a/lib/Target/BPF/BPFInstrInfo.h b/lib/Target/BPF/BPFInstrInfo.h index c7048ab979b..f591f48a89a 100644 --- a/lib/Target/BPF/BPFInstrInfo.h +++ b/lib/Target/BPF/BPFInstrInfo.h @@ -15,7 +15,7 @@ #define LLVM_LIB_TARGET_BPF_BPFINSTRINFO_H #include "BPFRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "BPFGenInstrInfo.inc" diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp index 273843e9270..00d609e8960 100644 --- a/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/lib/Target/BPF/BPFRegisterInfo.cpp @@ -18,10 +18,10 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #define GET_REGINFO_TARGET_DESC #include "BPFGenRegisterInfo.inc" diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index 22794eb50e2..e28af5a844f 100644 --- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -14,9 +14,9 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> #include <vector> diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index 501ac2c44bb..6336075917e 100644 --- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -19,8 +19,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/PassSupport.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h index 296edbe1eff..988718860c5 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -15,7 +15,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include <vector> namespace llvm { diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index a5381c1fb1a..9b8970258a2 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrDesc.h" @@ -47,7 +48,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 2f172340c4e..1558c2e9850 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -19,8 +19,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Target/TargetInstrInfo.h" #include <cstdint> #include <vector> diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index 29e2bc32dfb..2154a485dc6 100644 --- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -161,9 +161,16 @@ namespace { }; struct Simplifier { - using Rule = std::function<Value * (Instruction *, LLVMContext &)>; + struct Rule { + using FuncType = std::function<Value* (Instruction*, LLVMContext&)>; + Rule(StringRef N, FuncType F) : Name(N), Fn(F) {} + StringRef Name; // For debugging. + FuncType Fn; + }; - void addRule(const Rule &R) { Rules.push_back(R); } + void addRule(StringRef N, const Rule::FuncType &F) { + Rules.push_back(Rule(N, F)); + } private: struct WorkListType { @@ -522,7 +529,7 @@ Value *Simplifier::simplify(Context &C) { continue; bool Changed = false; for (Rule &R : Rules) { - Value *W = R(U, C.Ctx); + Value *W = R.Fn(U, C.Ctx); if (!W) continue; Changed = true; @@ -1544,8 +1551,30 @@ Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At, return R; } +static bool hasZeroSignBit(const Value *V) { + if (const auto *CI = dyn_cast<const ConstantInt>(V)) + return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0; + const Instruction *I = dyn_cast<const Instruction>(V); + if (!I) + return false; + switch (I->getOpcode()) { + case Instruction::LShr: + if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1))) + return SI->getZExtValue() > 0; + return false; + case Instruction::Or: + case Instruction::Xor: + return hasZeroSignBit(I->getOperand(0)) && + hasZeroSignBit(I->getOperand(1)); + case Instruction::And: + return hasZeroSignBit(I->getOperand(0)) || + hasZeroSignBit(I->getOperand(1)); + } + return false; +} + void PolynomialMultiplyRecognize::setupSimplifier() { - Simp.addRule( + Simp.addRule("sink-zext", // Sink zext past bitwise operations. [](Instruction *I, LLVMContext &Ctx) -> Value* { if (I->getOpcode() != Instruction::ZExt) @@ -1566,7 +1595,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() { B.CreateZExt(T->getOperand(0), I->getType()), B.CreateZExt(T->getOperand(1), I->getType())); }); - Simp.addRule( + Simp.addRule("xor/and -> and/xor", // (xor (and x a) (and y a)) -> (and (xor x y) a) [](Instruction *I, LLVMContext &Ctx) -> Value* { if (I->getOpcode() != Instruction::Xor) @@ -1584,7 +1613,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() { return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)), And0->getOperand(1)); }); - Simp.addRule( + Simp.addRule("sink binop into select", // (Op (select c x y) z) -> (select c (Op x z) (Op y z)) // (Op x (select c y z)) -> (select c (Op x y) (Op x z)) [](Instruction *I, LLVMContext &Ctx) -> Value* { @@ -1610,7 +1639,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() { } return nullptr; }); - Simp.addRule( + Simp.addRule("fold select-select", // (select c (select c x y) z) -> (select c x z) // (select c x (select c y z)) -> (select c x z) [](Instruction *I, LLVMContext &Ctx) -> Value* { @@ -1629,23 +1658,19 @@ void PolynomialMultiplyRecognize::setupSimplifier() { } return nullptr; }); - Simp.addRule( + Simp.addRule("or-signbit -> xor-signbit", // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0) [](Instruction *I, LLVMContext &Ctx) -> Value* { if (I->getOpcode() != Instruction::Or) return nullptr; - Instruction *LShr = dyn_cast<Instruction>(I->getOperand(0)); - if (!LShr || LShr->getOpcode() != Instruction::LShr) - return nullptr; - ConstantInt *One = dyn_cast<ConstantInt>(LShr->getOperand(1)); - if (!One || One->getZExtValue() != 1) - return nullptr; ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1)); if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit()) return nullptr; - return IRBuilder<>(Ctx).CreateXor(LShr, Msb); + if (!hasZeroSignBit(I->getOperand(0))) + return nullptr; + return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb); }); - Simp.addRule( + Simp.addRule("sink lshr into binop", // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c)) [](Instruction *I, LLVMContext &Ctx) -> Value* { if (I->getOpcode() != Instruction::LShr) @@ -1667,7 +1692,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() { B.CreateLShr(BitOp->getOperand(0), S), B.CreateLShr(BitOp->getOperand(1), S)); }); - Simp.addRule( + Simp.addRule("expose bitop-const", // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b)) [](Instruction *I, LLVMContext &Ctx) -> Value* { auto IsBitOp = [](unsigned Op) -> bool { @@ -1737,9 +1762,17 @@ bool PolynomialMultiplyRecognize::recognize() { // XXX: Currently this approach can modify the loop before being 100% sure // that the transformation can be carried out. bool FoundPreScan = false; + auto FeedsPHI = [LoopB](const Value *V) -> bool { + for (const Value *U : V->users()) { + if (const auto *P = dyn_cast<const PHINode>(U)) + if (P->getParent() == LoopB) + return true; + } + return false; + }; for (Instruction &In : *LoopB) { SelectInst *SI = dyn_cast<SelectInst>(&In); - if (!SI) + if (!SI || !FeedsPHI(SI)) continue; Simplifier::Context C(SI); diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 93f1fd4109a..3c88eeeb8a4 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -24,12 +24,12 @@ #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h index 2525d272666..6cca5a849cc 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -20,8 +20,8 @@ #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index d432bfef7ae..05865c43f2d 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -1706,28 +1706,27 @@ multiclass Loadxim_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, defm: Loadxgim_pat<Load, VT, ValueMod, ImmPred, MI>; } -// Patterns to select load reg reg-indexed: Rs + Rt<<u2. -multiclass Loadxr_pat<PatFrag Load, ValueType VT, InstHexagon MI> { - let AddedComplexity = 40 in - def: Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))), - (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>; - - let AddedComplexity = 20 in - def: Pat<(VT (Load (add I32:$Rs, I32:$Rt))), - (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>; -} - -// Patterns to select load reg reg-indexed: Rs + Rt<<u2 with value modifier. -multiclass Loadxrm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, - InstHexagon MI> { - let AddedComplexity = 40 in - def: Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))), - (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2)))>; +// Pattern to select load reg reg-indexed: Rs + Rt<<u2. +class Loadxr_shl_pat<PatFrag Load, ValueType VT, InstHexagon MI> + : Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>; + +// Pattern to select load reg reg-indexed: Rs + Rt<<0. +class Loadxr_add_pat<PatFrag Load, ValueType VT, InstHexagon MI> + : Pat<(VT (Load (add I32:$Rs, I32:$Rt))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>; + +// Pattern to select load reg reg-indexed: Rs + Rt<<u2 with value modifier. +class Loadxrm_shl_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, + InstHexagon MI> + : Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))), + (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2)))>; - let AddedComplexity = 20 in - def: Pat<(VT (Load (add I32:$Rs, I32:$Rt))), - (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, 0)))>; -} +// Pattern to select load reg reg-indexed: Rs + Rt<<0 with value modifier. +class Loadxrm_add_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, + InstHexagon MI> + : Pat<(VT (Load (add I32:$Rs, I32:$Rt))), + (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, 0)))>; // Pattern to select load long-offset reg-indexed: Addr + Rt<<u2. // Don't match for u2==0, instead use reg+imm for those cases. @@ -1777,17 +1776,19 @@ let AddedComplexity = 20 in { defm: Loadxi_pat<atomic_load_64, i64, anyimm3, L2_loadrd_io>; } -defm: Loadxim_pat<extloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>; -defm: Loadxim_pat<extloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>; -defm: Loadxim_pat<extloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>; -defm: Loadxim_pat<extloadi32, i64, ToZext64, anyimm2, L2_loadri_io>; -defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>; -defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>; -defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>; -defm: Loadxim_pat<zextloadi32, i64, ToZext64, anyimm2, L2_loadri_io>; -defm: Loadxim_pat<sextloadi8, i64, ToSext64, anyimm0, L2_loadrb_io>; -defm: Loadxim_pat<sextloadi16, i64, ToSext64, anyimm1, L2_loadrh_io>; -defm: Loadxim_pat<sextloadi32, i64, ToSext64, anyimm2, L2_loadri_io>; +let AddedComplexity = 30 in { + defm: Loadxim_pat<extloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>; + defm: Loadxim_pat<extloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>; + defm: Loadxim_pat<extloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>; + defm: Loadxim_pat<extloadi32, i64, ToZext64, anyimm2, L2_loadri_io>; + defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>; + defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>; + defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>; + defm: Loadxim_pat<zextloadi32, i64, ToZext64, anyimm2, L2_loadri_io>; + defm: Loadxim_pat<sextloadi8, i64, ToSext64, anyimm0, L2_loadrb_io>; + defm: Loadxim_pat<sextloadi16, i64, ToSext64, anyimm1, L2_loadrh_io>; + defm: Loadxim_pat<sextloadi32, i64, ToSext64, anyimm2, L2_loadri_io>; +} let AddedComplexity = 60 in { def: Loadxu_pat<extloadi8, i32, anyimm0, L4_loadrub_ur>; @@ -1818,26 +1819,55 @@ let AddedComplexity = 60 in { def: Loadxum_pat<extloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>; } -defm: Loadxr_pat<extloadi8, i32, L4_loadrub_rr>; -defm: Loadxr_pat<zextloadi8, i32, L4_loadrub_rr>; -defm: Loadxr_pat<sextloadi8, i32, L4_loadrb_rr>; -defm: Loadxr_pat<extloadi16, i32, L4_loadruh_rr>; -defm: Loadxr_pat<zextloadi16, i32, L4_loadruh_rr>; -defm: Loadxr_pat<sextloadi16, i32, L4_loadrh_rr>; -defm: Loadxr_pat<load, i32, L4_loadri_rr>; -defm: Loadxr_pat<load, i64, L4_loadrd_rr>; -defm: Loadxr_pat<load, f32, L4_loadri_rr>; -defm: Loadxr_pat<load, f64, L4_loadrd_rr>; - -defm: Loadxrm_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>; -defm: Loadxrm_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>; -defm: Loadxrm_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>; -defm: Loadxrm_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>; -defm: Loadxrm_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>; -defm: Loadxrm_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>; -defm: Loadxrm_pat<extloadi32, i64, ToZext64, L4_loadri_rr>; -defm: Loadxrm_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>; -defm: Loadxrm_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>; +let AddedComplexity = 40 in { + def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxr_shl_pat<load, i32, L4_loadri_rr>; + def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, f32, L4_loadri_rr>; + def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>; +} + +let AddedComplexity = 20 in { + def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxr_add_pat<load, i32, L4_loadri_rr>; + def: Loadxr_add_pat<load, i64, L4_loadrd_rr>; + def: Loadxr_add_pat<load, f32, L4_loadri_rr>; + def: Loadxr_add_pat<load, f64, L4_loadrd_rr>; +} + +let AddedComplexity = 40 in { + def: Loadxrm_shl_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>; + def: Loadxrm_shl_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>; + def: Loadxrm_shl_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>; + def: Loadxrm_shl_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>; + def: Loadxrm_shl_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>; + def: Loadxrm_shl_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>; + def: Loadxrm_shl_pat<extloadi32, i64, ToZext64, L4_loadri_rr>; + def: Loadxrm_shl_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>; + def: Loadxrm_shl_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>; +} + +let AddedComplexity = 20 in { + def: Loadxrm_add_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>; + def: Loadxrm_add_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>; + def: Loadxrm_add_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>; + def: Loadxrm_add_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>; + def: Loadxrm_add_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>; + def: Loadxrm_add_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>; + def: Loadxrm_add_pat<extloadi32, i64, ToZext64, L4_loadri_rr>; + def: Loadxrm_add_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>; + def: Loadxrm_add_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>; +} // Absolute address diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index 7d961a238ae..da53a09a6fc 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -44,12 +44,12 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Constants.h" #include "llvm/PassSupport.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index e491c757670..f29f321214c 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -26,13 +26,13 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index 68484344fde..0ff3afff5f5 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -23,7 +23,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp index a0fdc70e141..52e5dcd4638 100644 --- a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp +++ b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp @@ -548,14 +548,13 @@ bool HexagonVectorLoopCarriedReuse::doVLCR() { findValueToReuse(); if (ReuseCandidate.isDefined()) { reuseValue(); - Changed = true; - Continue = true; - } - std::for_each(Dependences.begin(), Dependences.end(), - std::default_delete<DepChain>()); - } while (Continue); - return Changed; -} + Changed = true;
+ Continue = true;
+ }
+ llvm::for_each(Dependences, std::default_delete<DepChain>());
+ } while (Continue);
+ return Changed;
+}
void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I, DepChain &D) { diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp index de58ddff339..22bb8841f5f 100644 --- a/lib/Target/Hexagon/RDFGraph.cpp +++ b/lib/Target/Hexagon/RDFGraph.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Function.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCInstrDesc.h" @@ -28,7 +29,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/Target/Lanai/LanaiDelaySlotFiller.cpp b/lib/Target/Lanai/LanaiDelaySlotFiller.cpp index 802232b0582..6b4fa777178 100644 --- a/lib/Target/Lanai/LanaiDelaySlotFiller.cpp +++ b/lib/Target/Lanai/LanaiDelaySlotFiller.cpp @@ -17,8 +17,8 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/Target/Lanai/LanaiFrameLowering.h b/lib/Target/Lanai/LanaiFrameLowering.h index 2f9b6c3c158..ca690d513fc 100644 --- a/lib/Target/Lanai/LanaiFrameLowering.h +++ b/lib/Target/Lanai/LanaiFrameLowering.h @@ -15,7 +15,7 @@ #define LLVM_LIB_TARGET_LANAI_LANAIFRAMELOWERING_H #include "Lanai.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { diff --git a/lib/Target/Lanai/LanaiInstrInfo.h b/lib/Target/Lanai/LanaiInstrInfo.h index 4387fe1af3c..f07fede67a4 100644 --- a/lib/Target/Lanai/LanaiInstrInfo.h +++ b/lib/Target/Lanai/LanaiInstrInfo.h @@ -15,7 +15,7 @@ #define LLVM_LIB_TARGET_LANAI_LANAIINSTRINFO_H #include "LanaiRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "LanaiGenInstrInfo.inc" diff --git a/lib/Target/Lanai/LanaiMemAluCombiner.cpp b/lib/Target/Lanai/LanaiMemAluCombiner.cpp index 7259c02194c..c29c933db74 100644 --- a/lib/Target/Lanai/LanaiMemAluCombiner.cpp +++ b/lib/Target/Lanai/LanaiMemAluCombiner.cpp @@ -30,8 +30,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; #define GET_INSTRMAP_INFO diff --git a/lib/Target/Lanai/LanaiRegisterInfo.cpp b/lib/Target/Lanai/LanaiRegisterInfo.cpp index 6ea477dce3e..56a5e0ea2de 100644 --- a/lib/Target/Lanai/LanaiRegisterInfo.cpp +++ b/lib/Target/Lanai/LanaiRegisterInfo.cpp @@ -20,11 +20,11 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #define GET_REGINFO_TARGET_DESC #include "LanaiGenRegisterInfo.inc" diff --git a/lib/Target/Lanai/LanaiSubtarget.h b/lib/Target/Lanai/LanaiSubtarget.h index 2732ef3097e..313d950e8aa 100644 --- a/lib/Target/Lanai/LanaiSubtarget.h +++ b/lib/Target/Lanai/LanaiSubtarget.h @@ -19,7 +19,7 @@ #include "LanaiInstrInfo.h" #include "LanaiSelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/Target/Lanai/LanaiTargetMachine.h b/lib/Target/Lanai/LanaiTargetMachine.h index ce1271d9dea..2fb1a053610 100644 --- a/lib/Target/Lanai/LanaiTargetMachine.h +++ b/lib/Target/Lanai/LanaiTargetMachine.h @@ -19,7 +19,7 @@ #include "LanaiInstrInfo.h" #include "LanaiSelectionDAGInfo.h" #include "LanaiSubtarget.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h index fdc4aa52a19..8807101f37c 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.h +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -15,7 +15,7 @@ #define LLVM_LIB_TARGET_MSP430_MSP430FRAMELOWERING_H #include "MSP430.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { class MSP430FrameLowering : public TargetFrameLowering { diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index d81f17e753c..45357f54c9c 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -15,7 +15,7 @@ #define LLVM_LIB_TARGET_MSP430_MSP430INSTRINFO_H #include "MSP430RegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "MSP430GenInstrInfo.inc" diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index 97b5e810a1d..4935b80cfdd 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -16,7 +16,7 @@ #define LLVM_LIB_TARGET_MSP430_MSP430TARGETMACHINE_H #include "MSP430Subtarget.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 002fa512b21..d8e2eef6a9f 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -535,7 +535,7 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair, uint64_t Address, const void *Decoder); @@ -2481,10 +2481,8 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn, return MCDisassembler::Success; } -static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair, uint64_t Address, const void *Decoder) { - unsigned RegPair = fieldFromInstruction(Insn, 7, 3); - switch (RegPair) { default: return MCDisassembler::Fail; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 12f7638594d..eae0f975080 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -1115,6 +1115,29 @@ MipsMCCodeEmitter::getMovePRegPairOpValue(const MCInst &MI, unsigned OpNo, } unsigned +MipsMCCodeEmitter::getMovePRegSingleOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + assert(((OpNo == 2) || (OpNo == 3)) && + "Unexpected OpNo for movep operand encoding!"); + + MCOperand Op = MI.getOperand(OpNo); + assert(Op.isReg() && "Operand of movep is not a register!"); + switch (Op.getReg()) { + default: + llvm_unreachable("Unknown register for movep!"); + case Mips::ZERO: return 0; + case Mips::S1: return 1; + case Mips::V0: return 2; + case Mips::V1: return 3; + case Mips::S0: return 4; + case Mips::S2: return 5; + case Mips::S3: return 6; + case Mips::S4: return 7; + } +} + +unsigned MipsMCCodeEmitter::getSimm23Lsl2Encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h index d12d3195521..1e840114b2b 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h @@ -252,6 +252,9 @@ public: unsigned getMovePRegPairOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; + unsigned getMovePRegSingleOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getSimm23Lsl2Encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp index aad6bf378ea..0bddba78145 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp @@ -246,8 +246,6 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { break; case MEK_CALL_HI16: case MEK_CALL_LO16: - case MEK_DTPREL_HI: - case MEK_DTPREL_LO: case MEK_GOT: case MEK_GOT_CALL: case MEK_GOT_DISP: @@ -263,14 +261,16 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { case MEK_NEG: case MEK_PCREL_HI16: case MEK_PCREL_LO16: - case MEK_TLSLDM: // If we do have nested target-specific expressions, they will be in // a consecutive chain. if (const MipsMCExpr *E = dyn_cast<const MipsMCExpr>(getSubExpr())) E->fixELFSymbolsInTLSFixups(Asm); break; - case MEK_GOTTPREL: + case MEK_DTPREL_HI: + case MEK_DTPREL_LO: + case MEK_TLSLDM: case MEK_TLSGD: + case MEK_GOTTPREL: case MEK_TPREL_HI: case MEK_TPREL_LO: fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); diff --git a/lib/Target/Mips/MicroMips32r6InstrFormats.td b/lib/Target/Mips/MicroMips32r6InstrFormats.td index 2f0933277e8..e1f1f9262b9 100644 --- a/lib/Target/Mips/MicroMips32r6InstrFormats.td +++ b/lib/Target/Mips/MicroMips32r6InstrFormats.td @@ -829,6 +829,21 @@ class POOL16C_NOT16_FM_MMR6 : MicroMipsR6Inst16 { let Inst{3-0} = 0b0000; } +class POOL16C_MOVEP16_FM_MMR6 : MicroMipsR6Inst16 { + bits<3> dst_regs; + bits<3> rt; + bits<3> rs; + + bits<16> Inst; + + let Inst{15-10} = 0b010001; + let Inst{9-7} = dst_regs; + let Inst{6-4} = rt; + let Inst{3} = rs{2}; + let Inst{2} = 0b1; + let Inst{1-0} = rs{1-0}; +} + class POOL16C_OR16_XOR16_FM_MMR6<bits<4> op> : MicroMipsR6Inst16 { bits<3> rt; bits<3> rs; diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td index 425e75e14c8..49d6ae3f98a 100644 --- a/lib/Target/Mips/MicroMips32r6InstrInfo.td +++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td @@ -229,6 +229,7 @@ class SRL16_MMR6_ENC : SHIFT_FM_MM16<1>, MicroMipsR6Inst16; class BREAK16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b011011>; class LI16_MMR6_ENC : LI_FM_MM16; class MOVE16_MMR6_ENC : MOVE_FM_MM16<0b000011>; +class MOVEP_MMR6_ENC : POOL16C_MOVEP16_FM_MMR6; class SDBBP16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b111011>; class SUBU16_MMR6_ENC : POOL16A_SUBU16_FM_MMR6; class XOR16_MMR6_ENC : POOL16C_OR16_XOR16_FM_MMR6<0b1000>; @@ -1204,6 +1205,7 @@ class LI16_MMR6_DESC : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>, MMR6Arch<"li16">, MicroMipsR6Inst16, IsAsCheapAsAMove; class MOVE16_MMR6_DESC : MoveMM16<"move16", GPR32Opnd>, MMR6Arch<"move16">, MicroMipsR6Inst16; +class MOVEP_MMR6_DESC : MovePMM16<"movep", GPRMM16OpndMoveP>, MMR6Arch<"movep">; class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16", II_SDBBP>, MMR6Arch<"sdbbp16">, MicroMipsR6Inst16; class SUBU16_MMR6_DESC : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>, @@ -1679,6 +1681,8 @@ def LI16_MMR6 : StdMMR6Rel, LI16_MMR6_DESC, LI16_MMR6_ENC, ISA_MICROMIPS32R6; def MOVE16_MMR6 : StdMMR6Rel, MOVE16_MMR6_DESC, MOVE16_MMR6_ENC, ISA_MICROMIPS32R6; +def MOVEP_MMR6 : StdMMR6Rel, MOVEP_MMR6_DESC, MOVEP_MMR6_ENC, + ISA_MICROMIPS32R6; def SDBBP16_MMR6 : StdMMR6Rel, SDBBP16_MMR6_DESC, SDBBP16_MMR6_ENC, ISA_MICROMIPS32R6; def SUBU16_MMR6 : StdMMR6Rel, SUBU16_MMR6_DESC, SUBU16_MMR6_ENC, @@ -1879,3 +1883,10 @@ let AddedComplexity = 41 in { } def TAILCALL_MMR6 : TailCall<BC_MMR6, brtarget26_mm>, ISA_MICROMIPS32R6; + +def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)), + (TAILCALL_MMR6 tglobaladdr:$dst)>, ISA_MICROMIPS32R6; + +def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)), + (TAILCALL_MMR6 texternalsym:$dst)>, ISA_MICROMIPS32R6; + diff --git a/lib/Target/Mips/MicroMips64r6InstrInfo.td b/lib/Target/Mips/MicroMips64r6InstrInfo.td index e0f4d833392..4f705feed0a 100644 --- a/lib/Target/Mips/MicroMips64r6InstrInfo.td +++ b/lib/Target/Mips/MicroMips64r6InstrInfo.td @@ -162,12 +162,11 @@ class DCLZ_MM64R6_DESC { class DINSU_MM64R6_DESC : InsBase<"dinsu", GPR64Opnd, uimm5_plus32, uimm5_inssize_plus1, immZExt5Plus32, - immZExt5Plus1, MipsIns>; + immZExt5Plus1>; class DINSM_MM64R6_DESC : InsBase<"dinsm", GPR64Opnd, uimm5, uimm_range_2_64, - immZExt5, immZExtRange2To64, MipsIns>; + immZExt5, immZExtRange2To64>; class DINS_MM64R6_DESC : InsBase<"dins", GPR64Opnd, uimm5_report_uimm6, - uimm5_inssize_plus1, immZExt5, immZExt5Plus1, - MipsIns>; + uimm5_inssize_plus1, immZExt5, immZExt5Plus1>; class DMTC0_MM64R6_DESC : MTC0_MMR6_DESC_BASE<"dmtc0", COP0Opnd, GPR64Opnd, II_DMTC0>; class DMTC1_MM64R6_DESC : MTC1_MMR6_DESC_BASE<"dmtc1", FGR64Opnd, GPR64Opnd, diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 1f869db4efe..48c1d94d03c 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -631,7 +631,8 @@ def ADDIUSP_MM : AddImmUSP<"addiusp">, ADDIUSP_FM_MM16; def MFHI16_MM : MoveFromHILOMM<"mfhi", GPR32Opnd, AC0>, MFHILO_FM_MM16<0x10>; def MFLO16_MM : MoveFromHILOMM<"mflo", GPR32Opnd, AC0>, MFHILO_FM_MM16<0x12>; def MOVE16_MM : MoveMM16<"move", GPR32Opnd>, MOVE_FM_MM16<0x03>; -def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMoveP>, MOVEP_FM_MM16; +def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMoveP>, MOVEP_FM_MM16, + ISA_MICROMIPS_NOT_32R6_64R6; def LI16_MM : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>, LI_FM_MM16, IsAsCheapAsAMove; def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>, @@ -884,7 +885,7 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, uimm5_plus1, immZExt5, immZExt5Plus1, MipsExt>, EXT_FM_MM<0x2c>; def INS_MM : MMRel, InsBase<"ins", GPR32Opnd, uimm5, uimm5_inssize_plus1, - immZExt5, immZExt5Plus1, MipsIns>, + immZExt5, immZExt5Plus1>, EXT_FM_MM<0x0c>; /// Jump Instructions @@ -1061,13 +1062,13 @@ let Predicates = [InMicroMips] in { (LW_MM addr:$addr)>; def : MipsPat<(subc GPR32:$lhs, GPR32:$rhs), (SUBu_MM GPR32:$lhs, GPR32:$rhs)>; - - def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)), - (TAILCALL_MM tglobaladdr:$dst)>, ISA_MIPS1_NOT_32R6_64R6; - def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)), - (TAILCALL_MM texternalsym:$dst)>, ISA_MIPS1_NOT_32R6_64R6; } +def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)), + (TAILCALL_MM tglobaladdr:$dst)>, ISA_MICROMIPS32_NOT_MIPS32R6; +def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)), + (TAILCALL_MM texternalsym:$dst)>, ISA_MICROMIPS32_NOT_MIPS32R6; + let AddedComplexity = 40 in { def : MipsPat<(i32 (sextloadi16 addrRegImm:$a)), (LH_MM addrRegImm:$a)>; diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index 76bca3df2bc..cb59e2ddb1c 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -30,7 +30,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include <cassert> #include <cstdint> #include <vector> diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index bdb9eec4cc5..8ce47e3f669 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -17,8 +17,8 @@ #include "MipsRegisterInfo.h" #include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp index 44771cbe8be..ff95f3c7228 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -22,6 +22,8 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" @@ -29,8 +31,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 04a050c2ff4..dbd47de4dad 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -341,13 +341,13 @@ let AdditionalPredicates = [NotInMicroMips] in { // for dinsm and dinsu like binutils. let DecoderMethod = "DecodeDINS" in { def DINS : InsBase<"dins", GPR64Opnd, uimm6, uimm5_inssize_plus1, - immZExt5, immZExt5Plus1, MipsIns>, EXT_FM<7>, + immZExt5, immZExt5Plus1>, EXT_FM<7>, ISA_MIPS64R2; def DINSU : InsBase<"dinsu", GPR64Opnd, uimm5_plus32, uimm5_inssize_plus1, - immZExt5Plus32, immZExt5Plus1, MipsIns>, + immZExt5Plus32, immZExt5Plus1>, EXT_FM<6>, ISA_MIPS64R2; def DINSM : InsBase<"dinsm", GPR64Opnd, uimm5, uimm_range_2_64, - immZExt5, immZExtRange2To64, MipsIns>, + immZExt5, immZExtRange2To64>, EXT_FM<5>, ISA_MIPS64R2; } } diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index bec0ae6ba4c..5edd12c0232 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" @@ -64,7 +65,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include <algorithm> #include <cassert> diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index ef05166503b..27a85970da6 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -107,38 +107,31 @@ bool MipsFrameLowering::hasBP(const MachineFunction &MF) const { return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF); } +// Estimate the size of the stack, including the incoming arguments. We need to +// account for register spills, local objects, reserved call frame and incoming +// arguments. This is required to determine the largest possible positive offset +// from $sp so that it can be determined if an emergency spill slot for stack +// addresses is required. uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); - int64_t Offset = 0; + int64_t Size = 0; - // Iterate over fixed sized objects. + // Iterate over fixed sized objects which are incoming arguments. for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) - Offset = std::max(Offset, -MFI.getObjectOffset(I)); + if (MFI.getObjectOffset(I) > 0) + Size += MFI.getObjectSize(I); // Conservatively assume all callee-saved registers will be saved. for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { - unsigned Size = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R)); - Offset = alignTo(Offset + Size, Size); + unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R)); + Size = alignTo(Size + RegSize, RegSize); } - unsigned MaxAlign = MFI.getMaxAlignment(); - - // Check that MaxAlign is not zero if there is a stack object that is not a - // callee-saved spill. - assert(!MFI.getObjectIndexEnd() || MaxAlign); - - // Iterate over other objects. - for (unsigned I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) - Offset = alignTo(Offset + MFI.getObjectSize(I), MaxAlign); - - // Call frame. - if (MFI.adjustsStack() && hasReservedCallFrame(MF)) - Offset = alignTo(Offset + MFI.getMaxCallFrameSize(), - std::max(MaxAlign, getStackAlignment())); - - return alignTo(Offset, getStackAlignment()); + // Get the size of the rest of the frame objects and any possible reserved + // call frame, accounting for alignment. + return Size + MFI.estimateStackSize(MF); } // Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index 8c4214c4c21..883c3267d51 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -15,7 +15,7 @@ #define LLVM_LIB_TARGET_MIPS_MIPSFRAMELOWERING_H #include "Mips.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { class MipsSubtarget; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 38b3c3fb160..d31385f42d6 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -27,8 +27,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -45,6 +45,8 @@ #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -62,8 +64,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index a5ed1be3bee..c18e395f901 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -25,7 +25,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include <cstdint> #define GET_INSTRINFO_HEADER diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index c4c3eb760c5..3502dbcdae9 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -212,6 +212,8 @@ def HasMicroMips64r6 : Predicate<"Subtarget->inMicroMips64r6Mode()">, AssemblerPredicate<"FeatureMicroMips,FeatureMips64r6">; def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">, AssemblerPredicate<"FeatureMips16">; +def NotInMips16Mode : Predicate<"!Subtarget->inMips16Mode()">, + AssemblerPredicate<"!FeatureMips16">; def HasCnMips : Predicate<"Subtarget->hasCnMips()">, AssemblerPredicate<"FeatureCnMips">; def NotCnMips : Predicate<"!Subtarget->hasCnMips()">, @@ -1544,7 +1546,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1, PseudoInstExpansion<(JumpInst Opnd:$target)>; class TailCallReg<RegisterOperand RO> : - MipsPseudo<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>; + PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>; } class BAL_BR_Pseudo<Instruction RealInst> : @@ -1726,12 +1728,13 @@ class ExtBase<string opstr, RegisterOperand RO, Operand PosOpnd, [(set RO:$rt, (Op RO:$rs, PosImm:$pos, SizeImm:$size))], II_EXT, FrmR, opstr>, ISA_MIPS32R2; +// 'ins' and its' 64 bit variants are matched by C++ code. class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd, - Operand SizeOpnd, PatFrag PosImm, PatFrag SizeImm, - SDPatternOperator Op = null_frag>: + Operand SizeOpnd, PatFrag PosImm, PatFrag SizeImm>: InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, SizeOpnd:$size, RO:$src), !strconcat(opstr, " $rt, $rs, $pos, $size"), - [(set RO:$rt, (Op RO:$rs, PosImm:$pos, SizeImm:$size, RO:$src))], + [(set RO:$rt, (null_frag RO:$rs, PosImm:$pos, SizeImm:$size, + RO:$src))], II_INS, FrmR, opstr>, ISA_MIPS32R2 { let Constraints = "$src = $rt"; } @@ -2086,7 +2089,7 @@ def BLTZALL : MMRel, BGEZAL_FT<"bltzall", brtarget, GPR32Opnd>, BGEZAL_FM<0x12>, ISA_MIPS2_NOT_32R6_64R6; def BAL_BR : BAL_BR_Pseudo<BGEZAL>; -let Predicates = [NotInMicroMips] in { +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips] in { def TAILCALL : TailCall<J, jmptarget>; } @@ -2103,6 +2106,7 @@ class PseudoIndirectBranchBase<RegisterOperand RO> : let isBranch = 1; let isIndirectBranch = 1; bit isCTI = 1; + let Predicates = [NotInMips16Mode]; } def PseudoIndirectBranch : PseudoIndirectBranchBase<GPR32Opnd>; @@ -2236,7 +2240,7 @@ let AdditionalPredicates = [NotInMicroMips] in { EXT_FM<0>; def INS : MMRel, StdMMR6Rel, InsBase<"ins", GPR32Opnd, uimm5, uimm5_inssize_plus1, immZExt5, - immZExt5Plus1, MipsIns>, + immZExt5Plus1>, EXT_FM<4>; } /// Move Control Registers From/To CPU Registers @@ -2776,10 +2780,12 @@ def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)), // (JALR GPR32:$dst)>; // Tail call -def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)), - (TAILCALL tglobaladdr:$dst)>; -def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)), - (TAILCALL texternalsym:$dst)>; +let AdditionalPredicates = [NotInMicroMips] in { + def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)), + (TAILCALL tglobaladdr:$dst)>; + def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)), + (TAILCALL texternalsym:$dst)>; +} // hi/lo relocs multiclass MipsHiLoRelocs<Instruction Lui, Instruction Addiu, Register ZeroReg, RegisterOperand GPROpnd> { diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp index 01c0cbf8262..3910adb7316 100644 --- a/lib/Target/Mips/MipsOptimizePICCall.cpp +++ b/lib/Target/Mips/MipsOptimizePICCall.cpp @@ -28,11 +28,11 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/RecyclingAllocator.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 9c64a0ecbb1..ec966afee0e 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -28,7 +28,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cstdint> diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 08fb3d7d435..f64d91aad85 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -616,6 +616,7 @@ def GPRMM16OpndZero : RegisterOperand<GPRMM16Zero> { def GPRMM16OpndMoveP : RegisterOperand<GPRMM16MoveP> { let ParserMatchClass = GPRMM16AsmOperandMoveP; + let EncoderMethod = "getMovePRegSingleOpValue"; } def GPR64Opnd : RegisterOperand<GPR64> { diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 0b19b18449e..2d9cbabbc59 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCDwarf.h" @@ -37,7 +38,6 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> @@ -893,10 +893,12 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, } // Set scavenging frame index if necessary. - uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() + - estimateStackSize(MF); + uint64_t MaxSPOffset = estimateStackSize(MF); - if (isInt<16>(MaxSPOffset)) + // MSA has a minimum offset of 10 bits signed. If there is a variable + // sized object on the stack, the estimation cannot account for it. + if (isIntN(STI.hasMSA() ? 10 : 16, MaxSPOffset) && + !MF.getFrameInfo().hasVarSizedObjects()) return; const TargetRegisterClass &RC = diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 283fcaa73a7..3c6a7d7a665 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -905,6 +905,64 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { break; } + // Manually match MipsISD::Ins nodes to get the correct instruction. It has + // to be done in this fashion so that we respect the differences between + // dins and dinsm, as the difference is that the size operand has the range + // 0 < size <= 32 for dins while dinsm has the range 2 <= size <= 64 which + // means SelectionDAGISel would have to test all the operands at once to + // match the instruction. + case MipsISD::Ins: { + + // Sanity checking for the node operands. + if (Node->getValueType(0) != MVT::i32 && Node->getValueType(0) != MVT::i64) + return false; + + if (Node->getNumOperands() != 4) + return false; + + if (Node->getOperand(1)->getOpcode() != ISD::Constant || + Node->getOperand(2)->getOpcode() != ISD::Constant) + return false; + + MVT ResTy = Node->getSimpleValueType(0); + uint64_t Pos = Node->getConstantOperandVal(1); + uint64_t Size = Node->getConstantOperandVal(2); + + // Size has to be >0 for 'ins', 'dins' and 'dinsu'. + if (!Size) + return false; + + if (Pos + Size > 64) + return false; + + if (ResTy != MVT::i32 && ResTy != MVT::i64) + return false; + + unsigned Opcode = 0; + if (ResTy == MVT::i32) { + if (Pos + Size <= 32) + Opcode = Mips::INS; + } else { + if (Pos + Size <= 32) + Opcode = Mips::DINS; + else if (Pos < 32 && 1 < Size) + Opcode = Mips::DINSM; + else + Opcode = Mips::DINSU; + } + + if (Opcode) { + SDValue Ops[4] = { + Node->getOperand(0), CurDAG->getTargetConstant(Pos, DL, MVT::i32), + CurDAG->getTargetConstant(Size, DL, MVT::i32), Node->getOperand(3)}; + + ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, ResTy, Ops)); + return true; + } + + return false; + } + case MipsISD::ThreadPointer: { EVT PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout()); unsigned RdhwrOpc, DestReg; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 45d7f94f1d1..4dd9f7f219a 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Intrinsics.h" @@ -40,7 +41,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index 86bd24166bb..2ff6b99e78f 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -23,6 +23,8 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" @@ -30,8 +32,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/Mips/MipsScheduleGeneric.td b/lib/Target/Mips/MipsScheduleGeneric.td index 89cda676441..9621009ed1c 100644 --- a/lib/Target/Mips/MipsScheduleGeneric.td +++ b/lib/Target/Mips/MipsScheduleGeneric.td @@ -736,6 +736,7 @@ def : InstRW<[GenericDSPShort], (instregex "^MFHI_DSP_MM$")>; def : InstRW<[GenericDSPShort], (instregex "^MFLO_DSP_MM$")>; def : InstRW<[GenericDSPShort], (instregex "^MODSUB_MM$")>; def : InstRW<[GenericDSPShort], (instregex "^MOVEP_MM$")>; +def : InstRW<[GenericDSPShort], (instregex "^MOVEP_MMR6$")>; def : InstRW<[GenericDSPShort], (instregex "^MOVN_I_MM$")>; def : InstRW<[GenericDSPShort], (instregex "^MOVZ_I_MM$")>; def : InstRW<[GenericDSPShort], (instregex "^MSUBU_DSP_MM$")>; diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 6ced2f6967c..729f3ed7b79 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -20,8 +20,8 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h index 320ca9a2f09..a802cf85d2e 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -14,7 +14,7 @@ #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H #define LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { class NVPTXSubtarget; diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 7b9acb20b75..ac4f2544fc3 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3449,6 +3449,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( } case Intrinsic::nvvm_atomic_load_add_f32: + case Intrinsic::nvvm_atomic_load_add_f64: case Intrinsic::nvvm_atomic_load_inc_32: case Intrinsic::nvvm_atomic_load_dec_32: diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h index d284282e28c..18ba7684ae5 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -16,7 +16,7 @@ #include "NVPTX.h" #include "NVPTXRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "NVPTXGenInstrInfo.inc" diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index f745b6f6635..478f3e9d057 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1095,6 +1095,12 @@ def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; +def atomic_load_add_f64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>; +def atomic_load_add_f64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>; +def atomic_load_add_f64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>; defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>; @@ -1121,6 +1127,13 @@ defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>; +defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add", + atomic_load_add_f64_g, f64imm, fpimm, hasAtomAddF64>; +defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add", + atomic_load_add_f64_s, f64imm, fpimm, hasAtomAddF64>; +defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add", + atomic_load_add_f64_gen, f64imm, fpimm, hasAtomAddF64>; + // atom_sub def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp index 4e902c0fb50..38437b20433 100644 --- a/lib/Target/NVPTX/NVPTXPeephole.cpp +++ b/lib/Target/NVPTX/NVPTXPeephole.cpp @@ -36,7 +36,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index 88288abe64f..3957d426653 100644 --- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -20,7 +20,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 8d46694fbe5..75573832988 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -18,8 +18,8 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 7674135f0a7..54a72a688ee 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -17,7 +17,7 @@ #include "ManagedStringPool.h" #include "NVPTXSubtarget.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/lib/Target/Nios2/Nios2FrameLowering.h b/lib/Target/Nios2/Nios2FrameLowering.h index 2aaea678d9e..2d9e84b2c72 100644 --- a/lib/Target/Nios2/Nios2FrameLowering.h +++ b/lib/Target/Nios2/Nios2FrameLowering.h @@ -14,7 +14,7 @@ #define LLVM_LIB_TARGET_NIOS2_NIOS2FRAMELOWERING_H #include "Nios2.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { class Nios2Subtarget; diff --git a/lib/Target/Nios2/Nios2InstrInfo.h b/lib/Target/Nios2/Nios2InstrInfo.h index 47e5e83a39d..6a0a050c839 100644 --- a/lib/Target/Nios2/Nios2InstrInfo.h +++ b/lib/Target/Nios2/Nios2InstrInfo.h @@ -18,7 +18,7 @@ #include "Nios2RegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "Nios2GenInstrInfo.inc" diff --git a/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/lib/Target/PowerPC/PPCBranchCoalescing.cpp index 33085a42361..ac28d7ff497 100644 --- a/lib/Target/PowerPC/PPCBranchCoalescing.cpp +++ b/lib/Target/PowerPC/PPCBranchCoalescing.cpp @@ -21,9 +21,9 @@ #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index fa813db5fef..f845d5a9ac6 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -15,7 +15,7 @@ #include "PPC.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 8ea3689b08e..2092748ca1a 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugLoc.h" @@ -53,7 +54,6 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> #include <cassert> diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f3e7b4af45d..62ade966145 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -51,6 +51,7 @@ #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" @@ -82,7 +83,6 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -291,14 +291,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FROUND, MVT::f32, Legal); } - // PowerPC does not have BSWAP + // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd + // to speed up scalar BSWAP64. // CTPOP or CTTZ were introduced in P8/P9 respectivelly setOperationAction(ISD::BSWAP, MVT::i32 , Expand); - setOperationAction(ISD::BSWAP, MVT::i64 , Expand); if (Subtarget.isISA3_0()) { + setOperationAction(ISD::BSWAP, MVT::i64 , Custom); setOperationAction(ISD::CTTZ , MVT::i32 , Legal); setOperationAction(ISD::CTTZ , MVT::i64 , Legal); } else { + setOperationAction(ISD::BSWAP, MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); } @@ -781,6 +783,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SRL, MVT::v1i128, Legal); setOperationAction(ISD::SRA, MVT::v1i128, Expand); } + + if (Subtarget.hasP9Altivec()) { + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); + } } if (Subtarget.hasQPX()) { @@ -7888,6 +7895,107 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, return DAG.getNode(ISD::BITCAST, dl, VT, T); } +/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled +/// by the VINSERTB instruction introduced in ISA 3.0, else just return default +/// SDValue. +SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N, + SelectionDAG &DAG) const { + const unsigned BytesInVector = 16; + bool IsLE = Subtarget.isLittleEndian(); + SDLoc dl(N); + SDValue V1 = N->getOperand(0); + SDValue V2 = N->getOperand(1); + unsigned ShiftElts = 0, InsertAtByte = 0; + bool Swap = false; + + // Shifts required to get the byte we want at element 7. + unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1, + 0, 15, 14, 13, 12, 11, 10, 9}; + unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0, + 1, 2, 3, 4, 5, 6, 7, 8}; + + ArrayRef<int> Mask = N->getMask(); + int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + + // For each mask element, find out if we're just inserting something + // from V2 into V1 or vice versa. + // Possible permutations inserting an element from V2 into V1: + // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + // ... + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X + // Inserting from V1 into V2 will be similar, except mask range will be + // [16,31]. + + bool FoundCandidate = false; + // If both vector operands for the shuffle are the same vector, the mask + // will contain only elements from the first one and the second one will be + // undef. + unsigned VINSERTBSrcElem = IsLE ? 8 : 7; + // Go through the mask of half-words to find an element that's being moved + // from one vector to the other. + for (unsigned i = 0; i < BytesInVector; ++i) { + unsigned CurrentElement = Mask[i]; + // If 2nd operand is undefined, we should only look for element 7 in the + // Mask. + if (V2.isUndef() && CurrentElement != VINSERTBSrcElem) + continue; + + bool OtherElementsInOrder = true; + // Examine the other elements in the Mask to see if they're in original + // order. + for (unsigned j = 0; j < BytesInVector; ++j) { + if (j == i) + continue; + // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be + // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined, + // in which we always assume we're always picking from the 1st operand. + int MaskOffset = + (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0; + if (Mask[j] != OriginalOrder[j] + MaskOffset) { + OtherElementsInOrder = false; + break; + } + } + // If other elements are in original order, we record the number of shifts + // we need to get the element we want into element 7. Also record which byte + // in the vector we should insert into. + if (OtherElementsInOrder) { + // If 2nd operand is undefined, we assume no shifts and no swapping. + if (V2.isUndef()) { + ShiftElts = 0; + Swap = false; + } else { + // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4. + ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF] + : BigEndianShifts[CurrentElement & 0xF]; + Swap = CurrentElement < BytesInVector; + } + InsertAtByte = IsLE ? BytesInVector - (i + 1) : i; + FoundCandidate = true; + break; + } + } + + if (!FoundCandidate) + return SDValue(); + + // Candidate found, construct the proper SDAG sequence with VINSERTB, + // optionally with VECSHL if shift is required. + if (Swap) + std::swap(V1, V2); + if (V2.isUndef()) + V2 = V1; + if (ShiftElts) { + SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2, + DAG.getConstant(ShiftElts, dl, MVT::i32)); + return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl, + DAG.getConstant(InsertAtByte, dl, MVT::i32)); + } + return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2, + DAG.getConstant(InsertAtByte, dl, MVT::i32)); +} + /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled /// by the VINSERTH instruction introduced in ISA 3.0, else just return default /// SDValue. @@ -8035,8 +8143,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, } if (Subtarget.hasP9Altivec()) { - SDValue NewISDNode = lowerToVINSERTH(SVOp, DAG); - if (NewISDNode) + SDValue NewISDNode; + if ((NewISDNode = lowerToVINSERTH(SVOp, DAG))) + return NewISDNode; + + if ((NewISDNode = lowerToVINSERTB(SVOp, DAG))) return NewISDNode; } @@ -8675,6 +8786,23 @@ SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const { return Op; } +// Lower scalar BSWAP64 to xxbrd. +SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + // MTVSRDD + Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0), + Op.getOperand(0)); + // XXBRD + Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op); + // MFVSRD + int VectorIndex = 0; + if (Subtarget.isLittleEndian()) + VectorIndex = 1; + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op, + DAG.getTargetConstant(VectorIndex, dl, MVT::i32)); + return Op; +} + SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -8719,11 +8847,29 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2)); // We have legal lowering for constant indices but not for variable ones. - if (C) - return Op; - return SDValue(); + if (!C) + return SDValue(); + + EVT VT = Op.getValueType(); + SDLoc dl(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types. + if (VT == MVT::v8i16 || VT == MVT::v16i8) { + SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2); + unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8; + unsigned InsertAtElement = C->getZExtValue(); + unsigned InsertAtByte = InsertAtElement * BytesInEachElement; + if (Subtarget.isLittleEndian()) { + InsertAtByte = (16 - BytesInEachElement) - InsertAtByte; + } + return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz, + DAG.getConstant(InsertAtByte, dl, MVT::i32)); + } + return Op; } SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, @@ -9146,6 +9292,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SREM: case ISD::UREM: return LowerREM(Op, DAG); + case ISD::BSWAP: + return LowerBSWAP(Op, DAG); } } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 1a5efeba4cf..bf9c4b8e63b 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -953,6 +953,7 @@ namespace llvm { SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; @@ -1079,6 +1080,11 @@ namespace llvm { /// from one vector into the other. SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; + /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be + /// handled by the VINSERTB instruction introduced in ISA 3.0. This is + /// essentially v16i8 vector version of VINSERTH. + SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; + }; // end class PPCTargetLowering namespace PPC { diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 506fac7dfc1..e751c149b0b 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1312,7 +1312,12 @@ def VEXTUWLX : VX1_RT5_RA5_VB5<1677, "vextuwlx", []>; def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "vextuwrx", []>; // Vector Insert Element Instructions -def VINSERTB : VX1_VT5_UIM5_VB5<781, "vinsertb", []>; +def VINSERTB : VXForm_1<781, (outs vrrc:$vD), + (ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB), + "vinsertb $vD, $vB, $UIM", IIC_VecGeneral, + [(set v16i8:$vD, (PPCvecinsert v16i8:$vDi, v16i8:$vB, + imm32SExt16:$UIM))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; def VINSERTH : VXForm_1<845, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB), "vinserth $vD, $vB, $UIM", IIC_VecGeneral, diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index ab86a54f6fe..565392f76e4 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -16,7 +16,7 @@ #include "PPC.h" #include "PPCRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "PPCGenInstrInfo.inc" diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 1fc50d2c860..3261bc9bc53 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -2595,6 +2595,13 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>; } + // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead + // of f64 + def : Pat<(v8i16 (PPCmtvsrz i32:$A)), + (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; + def : Pat<(v16i8 (PPCmtvsrz i32:$A)), + (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; + // Patterns for which instructions from ISA 3.0 are a better match let Predicates = [IsLittleEndian, HasP9Vector] in { def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))), diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index d46c1383297..78467e81795 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -28,6 +28,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -37,8 +39,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <cstdlib> diff --git a/lib/Target/RISCV/RISCV.h b/lib/Target/RISCV/RISCV.h index 1b6140203c8..884cb2e5014 100644 --- a/lib/Target/RISCV/RISCV.h +++ b/lib/Target/RISCV/RISCV.h @@ -15,15 +15,21 @@ #ifndef LLVM_LIB_TARGET_RISCV_RISCV_H #define LLVM_LIB_TARGET_RISCV_RISCV_H -#include "MCTargetDesc/RISCVMCTargetDesc.h" -#include "llvm/Target/TargetMachine.h" +#include "MCTargetDesc/RISCVBaseInfo.h" namespace llvm { class RISCVTargetMachine; +class AsmPrinter; +class FunctionPass; class MCInst; +class MCOperand; class MachineInstr; +class MachineOperand; -void LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI); +void LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + const AsmPrinter &AP); +bool LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, + MCOperand &MCOp, const AsmPrinter &AP); FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM); } diff --git a/lib/Target/RISCV/RISCV.td b/lib/Target/RISCV/RISCV.td index 54aa570e13b..da919acad36 100644 --- a/lib/Target/RISCV/RISCV.td +++ b/lib/Target/RISCV/RISCV.td @@ -40,9 +40,7 @@ def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit]>; //===----------------------------------------------------------------------===// def RISCVInstrInfo : InstrInfo { - // TODO: disable guessInstructionProperties when - // https://reviews.llvm.org/D37065 lands. - let guessInstructionProperties = 1; + let guessInstructionProperties = 0; } def RISCVAsmParser : AsmParser { diff --git a/lib/Target/RISCV/RISCVAsmPrinter.cpp b/lib/Target/RISCV/RISCVAsmPrinter.cpp index 1c213b6c7e9..4808e6c73c5 100644 --- a/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -43,6 +43,11 @@ public: bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, const MachineInstr *MI); + + // Wrapper needed for tblgenned pseudo lowering. + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const { + return LowerRISCVMachineOperandToMCOperand(MO, MCOp, *this); + } }; } @@ -56,7 +61,7 @@ void RISCVAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; MCInst TmpInst; - LowerRISCVMachineInstrToMCInst(MI, TmpInst); + LowerRISCVMachineInstrToMCInst(MI, TmpInst, *this); EmitToStreamer(*OutStreamer, TmpInst); } diff --git a/lib/Target/RISCV/RISCVCallingConv.td b/lib/Target/RISCV/RISCVCallingConv.td index e0c25e32e01..0b7a523424c 100644 --- a/lib/Target/RISCV/RISCVCallingConv.td +++ b/lib/Target/RISCV/RISCVCallingConv.td @@ -27,3 +27,6 @@ def CC_RISCV32 : CallingConv<[ ]>; def CSR : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>; + +// Needed for implementation of RISCVRegisterInfo::getNoPreservedMask() +def CSR_NoRegs : CalleeSavedRegs<(add)>; diff --git a/lib/Target/RISCV/RISCVFrameLowering.h b/lib/Target/RISCV/RISCVFrameLowering.h index 14772ddac4a..0b2c7a40298 100644 --- a/lib/Target/RISCV/RISCVFrameLowering.h +++ b/lib/Target/RISCV/RISCVFrameLowering.h @@ -14,7 +14,7 @@ #ifndef LLVM_LIB_TARGET_RISCV_RISCVFRAMELOWERING_H #define LLVM_LIB_TARGET_RISCV_RISCVFRAMELOWERING_H -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { class RISCVSubtarget; @@ -30,6 +30,12 @@ public: void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; + + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override { + return MBB.erase(MI); + } }; } #endif diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp index d76170b7b78..98f7aa16e2e 100644 --- a/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/lib/Target/RISCV/RISCVISelLowering.cpp @@ -49,8 +49,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setStackPointerRegisterToSaveRestore(RISCV::X2); + for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) + setLoadExtAction(N, XLenVT, MVT::i1, Promote); + // TODO: add all necessary setOperationAction calls. + setOperationAction(ISD::GlobalAddress, XLenVT, Custom); + setOperationAction(ISD::BR_CC, XLenVT, Expand); setBooleanContents(ZeroOrOneBooleanContent); // Function alignments (log2). @@ -63,6 +68,30 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, switch (Op.getOpcode()) { default: report_fatal_error("unimplemented operand"); + case ISD::GlobalAddress: + return lowerGlobalAddress(Op, DAG); + } +} + +SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT Ty = Op.getValueType(); + GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); + const GlobalValue *GV = N->getGlobal(); + int64_t Offset = N->getOffset(); + + if (!isPositionIndependent() && !Subtarget.is64Bit()) { + SDValue GAHi = + DAG.getTargetGlobalAddress(GV, DL, Ty, Offset, RISCVII::MO_HI); + SDValue GALo = + DAG.getTargetGlobalAddress(GV, DL, Ty, Offset, RISCVII::MO_LO); + SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0); + SDValue MNLo = + SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0); + return MNLo; + } else { + report_fatal_error("Unable to lowerGlobalAddress"); } } @@ -79,6 +108,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( default: report_fatal_error("Unsupported calling convention"); case CallingConv::C: + case CallingConv::Fast: break; } @@ -115,6 +145,135 @@ SDValue RISCVTargetLowering::LowerFormalArguments( return Chain; } +// Lower a call to a callseq_start + CALL + callseq_end chain, and add input +// and output parameter nodes. +SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc &DL = CLI.DL; + SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + CLI.IsTailCall = false; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + if (IsVarArg) { + report_fatal_error("LowerCall with varargs not implemented"); + } + + MachineFunction &MF = DAG.getMachineFunction(); + + // Analyze the operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV32); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = ArgCCInfo.getNextStackOffset(); + + for (auto &Arg : Outs) { + if (!Arg.Flags.isByVal()) + continue; + report_fatal_error("Passing arguments byval not yet implemented"); + } + + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); + + // Copy argument values to their designated locations. + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + SDValue StackPtr; + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + SDValue ArgValue = OutVals[I]; + + // Promote the value if needed. + // For now, only handle fully promoted arguments. + switch (VA.getLocInfo()) { + case CCValAssign::Full: + break; + default: + llvm_unreachable("Unknown loc info!"); + } + + if (VA.isRegLoc()) { + // Queue up the argument copies and emit them at the end. + RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); + } else { + assert(VA.isMemLoc() && "Argument not register or memory"); + report_fatal_error("Passing arguments via the stack not yet implemented"); + } + } + + SDValue Glue; + + // Build a sequence of copy-to-reg nodes, chained and glued together. + for (auto &Reg : RegsToPass) { + Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); + Glue = Chain.getValue(1); + } + + if (isa<GlobalAddressSDNode>(Callee)) { + Callee = lowerGlobalAddress(Callee, DAG); + } else if (isa<ExternalSymbolSDNode>(Callee)) { + report_fatal_error( + "lowerExternalSymbol, needed for lowerCall, not yet handled"); + } + + // The first call operand is the chain and the second is the target address. + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (auto &Reg : RegsToPass) + Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Glue the call to the argument copies, if any. + if (Glue.getNode()) + Ops.push_back(Glue); + + // Emit the call. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); + Glue = Chain.getValue(1); + + // Mark the end of the call, which is glued to the call itself. + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getConstant(NumBytes, DL, PtrVT, true), + DAG.getConstant(0, DL, PtrVT, true), + Glue, DL); + Glue = Chain.getValue(1); + + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RVLocs; + CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); + RetCCInfo.AnalyzeCallResult(Ins, RetCC_RISCV32); + + // Copy all of the result registers out of their specified physreg. + for (auto &VA : RVLocs) { + // Copy the value out, gluing the copy to the end of the call sequence. + SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), + VA.getLocVT(), Glue); + Chain = RetValue.getValue(1); + Glue = RetValue.getValue(2); + + InVals.push_back(Chain.getValue(0)); + } + + return Chain; +} + SDValue RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -165,6 +324,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { break; case RISCVISD::RET_FLAG: return "RISCVISD::RET_FLAG"; + case RISCVISD::CALL: + return "RISCVISD::CALL"; } return nullptr; } diff --git a/lib/Target/RISCV/RISCVISelLowering.h b/lib/Target/RISCV/RISCVISelLowering.h index 9fed48fc04e..dfb4824cc18 100644 --- a/lib/Target/RISCV/RISCVISelLowering.h +++ b/lib/Target/RISCV/RISCVISelLowering.h @@ -24,7 +24,8 @@ class RISCVSubtarget; namespace RISCVISD { enum NodeType : unsigned { FIRST_NUMBER = ISD::BUILTIN_OP_END, - RET_FLAG + RET_FLAG, + CALL }; } @@ -52,10 +53,13 @@ private: const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const override; bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override { return true; } + SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/RISCV/RISCVInstrInfo.cpp b/lib/Target/RISCV/RISCVInstrInfo.cpp index 92db5358ce4..5b4f4fcbb88 100644 --- a/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -28,4 +28,50 @@ using namespace llvm; -RISCVInstrInfo::RISCVInstrInfo() : RISCVGenInstrInfo() {} +RISCVInstrInfo::RISCVInstrInfo() + : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP) {} + +void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned DstReg, + unsigned SrcReg, bool KillSrc) const { + assert(RISCV::GPRRegClass.contains(DstReg, SrcReg) && + "Impossible reg-to-reg copy"); + + BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); +} + +void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, bool IsKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + + if (RC == &RISCV::GPRRegClass) + BuildMI(MBB, I, DL, get(RISCV::SW)) + .addReg(SrcReg, getKillRegState(IsKill)) + .addFrameIndex(FI) + .addImm(0); + else + llvm_unreachable("Can't store this register to stack slot"); +} + +void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + + if (RC == &RISCV::GPRRegClass) + BuildMI(MBB, I, DL, get(RISCV::LW), DstReg).addFrameIndex(FI).addImm(0); + else + llvm_unreachable("Can't load this register from stack slot"); +} diff --git a/lib/Target/RISCV/RISCVInstrInfo.h b/lib/Target/RISCV/RISCVInstrInfo.h index 50404d5554d..05c8378445c 100644 --- a/lib/Target/RISCV/RISCVInstrInfo.h +++ b/lib/Target/RISCV/RISCVInstrInfo.h @@ -15,7 +15,7 @@ #define LLVM_LIB_TARGET_RISCV_RISCVINSTRINFO_H #include "RISCVRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "RISCVGenInstrInfo.inc" @@ -26,7 +26,21 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { public: RISCVInstrInfo(); + + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned DstReg, unsigned SrcReg, + bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned SrcReg, + bool IsKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned DstReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; }; } - #endif diff --git a/lib/Target/RISCV/RISCVInstrInfo.td b/lib/Target/RISCV/RISCVInstrInfo.td index 23adf1eda9d..23f218fda8f 100644 --- a/lib/Target/RISCV/RISCVInstrInfo.td +++ b/lib/Target/RISCV/RISCVInstrInfo.td @@ -17,8 +17,22 @@ include "RISCVInstrFormats.td" // RISC-V specific DAG Nodes. //===----------------------------------------------------------------------===// -def RetFlag : SDNode<"RISCVISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def SDT_RISCVCall : SDTypeProfile<0, -1, [SDTCisVT<0, XLenVT>]>; +def SDT_RISCVCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; +def SDT_RISCVCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; + + +def Call : SDNode<"RISCVISD::CALL", SDT_RISCVCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def CallSeqStart : SDNode<"ISD::CALLSEQ_START", SDT_RISCVCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def CallSeqEnd : SDNode<"ISD::CALLSEQ_END", SDT_RISCVCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def RetFlag : SDNode<"RISCVISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. @@ -67,7 +81,7 @@ def uimm12 : Operand<XLenVT> { } // A 13-bit signed immediate where the least significant bit is zero. -def simm13_lsb0 : Operand<XLenVT> { +def simm13_lsb0 : Operand<OtherVT> { let ParserMatchClass = SImmAsmOperand<13, "Lsb0">; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<13>"; @@ -80,12 +94,30 @@ def uimm20 : Operand<XLenVT> { } // A 21-bit signed immediate where the least significant bit is zero. -def simm21_lsb0 : Operand<XLenVT> { +def simm21_lsb0 : Operand<OtherVT> { let ParserMatchClass = SImmAsmOperand<21, "Lsb0">; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<21>"; } +// Standalone (codegen-only) immleaf patterns. +def simm32 : ImmLeaf<XLenVT, [{return isInt<32>(Imm);}]>; + +// Extract least significant 12 bits from an immediate value and sign extend +// them. +def LO12Sext : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(SignExtend64<12>(N->getZExtValue()), + SDLoc(N), N->getValueType(0)); +}]>; + +// Extract the most significant 20 bits from an immediate value. Add 1 if bit +// 11 is 1, to compensate for the low 12 bits in the matching immediate addi +// or ld/st being negative. +def HI20 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(((N->getZExtValue()+0x800) >> 12) & 0xfffff, + SDLoc(N), N->getValueType(0)); +}]>; + //===----------------------------------------------------------------------===// // Instruction Class Templates //===----------------------------------------------------------------------===// @@ -257,6 +289,12 @@ class PatGprUimm5<SDPatternOperator OpNode, RVInstIShift Inst> : Pat<(OpNode GPR:$rs1, uimm5:$shamt), (Inst GPR:$rs1, uimm5:$shamt)>; +/// Immediates + +def : Pat<(simm12:$imm), (ADDI X0, simm12:$imm)>; +// TODO: Add a pattern for immediates with all zeroes in the lower 12 bits. +def : Pat<(simm32:$imm), (ADDI (LUI (HI20 imm:$imm)), (LO12Sext imm:$imm))>; + /// Simple arithmetic operations def : PatGprGpr<add, ADD>; @@ -284,6 +322,80 @@ def : PatGprSimm12<setult, SLTIU>; /// Branches and jumps +// Match `(brcond (CondOp ..), ..)` and lower to the appropriate RISC-V branch +// instruction. +class BccPat<PatFrag CondOp, RVInstB Inst> + : Pat<(brcond (i32 (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12), + (Inst GPR:$rs1, GPR:$rs2, simm13_lsb0:$imm12)>; + +def : BccPat<seteq, BEQ>; +def : BccPat<setne, BNE>; +def : BccPat<setlt, BLT>; +def : BccPat<setge, BGE>; +def : BccPat<setult, BLTU>; +def : BccPat<setuge, BGEU>; + +class BccSwapPat<PatFrag CondOp, RVInst InstBcc> + : Pat<(brcond (i32 (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12), + (InstBcc GPR:$rs2, GPR:$rs1, bb:$imm12)>; + +// Condition codes that don't have matching RISC-V branch instructions, but +// are trivially supported by swapping the two input operands +def : BccSwapPat<setgt, BLT>; +def : BccSwapPat<setle, BGE>; +def : BccSwapPat<setugt, BLTU>; +def : BccSwapPat<setule, BGEU>; + +// An extra pattern is needed for a brcond without a setcc (i.e. where the +// condition was calculated elsewhere). +def : Pat<(brcond GPR:$cond, bb:$imm12), (BNE GPR:$cond, X0, bb:$imm12)>; + +let isBarrier = 1, isBranch = 1, isTerminator = 1 in +def PseudoBR : Pseudo<(outs), (ins simm21_lsb0:$imm20), [(br bb:$imm20)]>, + PseudoInstExpansion<(JAL X0, simm21_lsb0:$imm20)>; + +let isCall = 1, Defs=[X1] in +def PseudoCALL : Pseudo<(outs), (ins GPR:$rs1), [(Call GPR:$rs1)]>, + PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>; + let isBarrier = 1, isReturn = 1, isTerminator = 1 in def PseudoRET : Pseudo<(outs), (ins), [(RetFlag)]>, PseudoInstExpansion<(JALR X0, X1, 0)>; + +/// Loads + +multiclass LdPat<PatFrag LoadOp, RVInst Inst> { + def : Pat<(LoadOp GPR:$rs1), (Inst GPR:$rs1, 0)>; + def : Pat<(LoadOp (add GPR:$rs1, simm12:$imm12)), + (Inst GPR:$rs1, simm12:$imm12)>; +} + +defm : LdPat<sextloadi8, LB>; +defm : LdPat<extloadi8, LB>; +defm : LdPat<sextloadi16, LH>; +defm : LdPat<extloadi16, LH>; +defm : LdPat<load, LW>; +defm : LdPat<zextloadi8, LBU>; +defm : LdPat<zextloadi16, LHU>; + +/// Stores + +multiclass StPat<PatFrag StoreOp, RVInst Inst> { + def : Pat<(StoreOp GPR:$rs2, GPR:$rs1), (Inst GPR:$rs2, GPR:$rs1, 0)>; + def : Pat<(StoreOp GPR:$rs2, (add GPR:$rs1, simm12:$imm12)), + (Inst GPR:$rs2, GPR:$rs1, simm12:$imm12)>; +} + +defm : StPat<truncstorei8, SB>; +defm : StPat<truncstorei16, SH>; +defm : StPat<store, SW>; + +/// Other pseudo-instructions + +// Pessimistically assume the stack pointer will be clobbered +let Defs = [X2], Uses = [X2] in { +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(CallSeqStart timm:$amt1, timm:$amt2)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(CallSeqEnd timm:$amt1, timm:$amt2)]>; +} // Defs = [X2], Uses = [X2] diff --git a/lib/Target/RISCV/RISCVMCInstLower.cpp b/lib/Target/RISCV/RISCVMCInstLower.cpp index 1ac8d982ff9..ef0051ed56e 100644 --- a/lib/Target/RISCV/RISCVMCInstLower.cpp +++ b/lib/Target/RISCV/RISCVMCInstLower.cpp @@ -13,6 +13,8 @@ //===----------------------------------------------------------------------===// #include "RISCV.h" +#include "MCTargetDesc/RISCVMCExpr.h" +#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/MC/MCAsmInfo.h" @@ -24,27 +26,72 @@ using namespace llvm; -void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, - MCInst &OutMI) { +static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, + const AsmPrinter &AP) { + MCContext &Ctx = AP.OutContext; + RISCVMCExpr::VariantKind Kind; + + switch (MO.getTargetFlags()) { + default: + llvm_unreachable("Unknown target flag on GV operand"); + case RISCVII::MO_None: + Kind = RISCVMCExpr::VK_RISCV_None; + break; + case RISCVII::MO_LO: + Kind = RISCVMCExpr::VK_RISCV_LO; + break; + case RISCVII::MO_HI: + Kind = RISCVMCExpr::VK_RISCV_HI; + break; + } + + const MCExpr *ME = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); + + if (!MO.isJTI() && MO.getOffset()) + ME = MCBinaryExpr::createAdd( + ME, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + + ME = RISCVMCExpr::create(ME, Kind, Ctx); + return MCOperand::createExpr(ME); +} + +bool llvm::LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, + MCOperand &MCOp, + const AsmPrinter &AP) { + switch (MO.getType()) { + default: + report_fatal_error("LowerRISCVMachineInstrToMCInst: unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) + return false; + MCOp = MCOperand::createReg(MO.getReg()); + break; + case MachineOperand::MO_RegisterMask: + // Regmasks are like implicit defs. + return false; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::createImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::createExpr( + MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), AP.OutContext)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = lowerSymbolOperand(MO, AP.getSymbol(MO.getGlobal()), AP); + break; + } + return true; +} + +void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + const AsmPrinter &AP) { OutMI.setOpcode(MI->getOpcode()); for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp; - switch (MO.getType()) { - default: - report_fatal_error( - "LowerRISCVMachineInstrToMCInst: unknown operand type"); - case MachineOperand::MO_Register: - // Ignore all implicit register operands. - if (MO.isImplicit()) - continue; - MCOp = MCOperand::createReg(MO.getReg()); - break; - case MachineOperand::MO_Immediate: - MCOp = MCOperand::createImm(MO.getImm()); - break; - } - - OutMI.addOperand(MCOp); + if (LowerRISCVMachineOperandToMCOperand(MO, MCOp, AP)) + OutMI.addOperand(MCOp); } } diff --git a/lib/Target/RISCV/RISCVRegisterInfo.cpp b/lib/Target/RISCV/RISCVRegisterInfo.cpp index 4f6c528061c..cd658d7e2d9 100644 --- a/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -18,9 +18,9 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #define GET_REGINFO_TARGET_DESC #include "RISCVGenRegisterInfo.inc" @@ -50,12 +50,47 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +const uint32_t *RISCVRegisterInfo::getNoPreservedMask() const { + return CSR_NoRegs_RegMask; +} + void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { - report_fatal_error("Subroutines not supported yet"); + // TODO: this implementation is a temporary placeholder which does just + // enough to allow other aspects of code generation to be tested + + assert(SPAdj == 0 && "Unexpected non-zero SPAdj value"); + + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned FrameReg = getFrameRegister(MF); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + int Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg); + Offset += MI.getOperand(FIOperandNum + 1).getImm(); + + assert(TFI->hasFP(MF) && "eliminateFrameIndex currently requires hasFP"); + + // Offsets must be directly encoded in a 12-bit immediate field + if (!isInt<12>(Offset)) { + report_fatal_error( + "Frame offsets outside of the signed 12-bit range not supported"); + } + + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + return; } unsigned RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return RISCV::X8; } + +const uint32_t * +RISCVRegisterInfo::getCallPreservedMask(const MachineFunction & /*MF*/, + CallingConv::ID /*CC*/) const { + return CSR_RegMask; +} diff --git a/lib/Target/RISCV/RISCVRegisterInfo.h b/lib/Target/RISCV/RISCVRegisterInfo.h index 94af9f44ecd..d9de9bf8c76 100644 --- a/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/lib/Target/RISCV/RISCVRegisterInfo.h @@ -25,10 +25,15 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { RISCVRegisterInfo(unsigned HwMode); + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; + const uint32_t *getNoPreservedMask() const override; + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index df819ccd15d..6948b72747c 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -19,8 +19,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h index ac0e69ccde1..6098afa6898 100644 --- a/lib/Target/Sparc/SparcFrameLowering.h +++ b/lib/Target/Sparc/SparcFrameLowering.h @@ -15,7 +15,7 @@ #define LLVM_LIB_TARGET_SPARC_SPARCFRAMELOWERING_H #include "Sparc.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index c053cc4c475..524b5d05416 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -15,7 +15,7 @@ #define LLVM_LIB_TARGET_SPARC_SPARCINSTRINFO_H #include "SparcRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "SparcGenInstrInfo.inc" diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 37a1fdf4d77..b9647eaa3d5 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -20,10 +20,10 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index bfbdb8d0b44..ad6b55a9fc9 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -19,7 +19,7 @@ #include "SparcInstrInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h index 91c5a5d53a1..a75d111b029 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/lib/Target/SystemZ/SystemZFrameLowering.h @@ -11,7 +11,7 @@ #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H #include "llvm/ADT/IndexedMap.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { class SystemZTargetMachine; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 4533f4fdf21..19ce7776fed 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -27,12 +27,12 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index b8be1f5f392..216139eb7c7 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -20,7 +20,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include <cstdint> #define GET_INSTRINFO_HEADER diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp index d4cd89ce590..27c72b41e4f 100644 --- a/lib/Target/SystemZ/SystemZLDCleanup.cpp +++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -19,7 +19,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 05f93ce5162..a44fae523fe 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -13,7 +13,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" using namespace llvm; diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index a4d9421e08a..6d50369e587 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -323,6 +323,11 @@ unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const { return 0; } +bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) { + EVT VT = TLI->getValueType(DL, DataType); + return (VT.isScalarInteger() && TLI->isTypeLegal(VT)); +} + int SystemZTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 28821a2ca11..4b11a6f0a83 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -62,6 +62,7 @@ public: unsigned getPrefetchDistance() { return 2000; } unsigned getMinPrefetchStride() { return 2048; } + bool hasDivRemOp(Type *DataType, bool IsSigned); bool prefersVectorizedAddressing() { return false; } bool LSRWithInstrQueries() { return true; } bool supportsEfficientVectorElementLoadStore() { return true; } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 5dcb89477a3..b24888fa9cb 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -167,6 +167,13 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M, if (GV && !GV->isDeclarationForLinker()) return true; + // A symbol marked nonlazybind should not be accessed with a plt. If the + // symbol turns out to be external, the linker will convert a direct + // access to an access via the plt, so don't assume it is local. + const Function *F = dyn_cast_or_null<Function>(GV); + if (F && F->hasFnAttribute(Attribute::NonLazyBind)) + return false; + bool IsTLS = GV && GV->isThreadLocal(); bool IsAccessViaCopyRelocs = Options.MCOptions.MCPIECopyRelocations && GV && isa<GlobalVariable>(GV); diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 211358ad66c..ee60c8f3a7a 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -267,12 +267,11 @@ bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, if (AsmVariant != 0) report_fatal_error("There are no defined alternate asm variants"); - if (!ExtraCode) { - // TODO: For now, we just hard-code 0 as the constant offset; teach - // SelectInlineAsmMemoryOperand how to do address mode matching. - OS << "0(" + regToString(MI->getOperand(OpNo)) + ')'; - return false; - } + // The current approach to inline asm is that "r" constraints are expressed + // as local indices, rather than values on the operand stack. This simplifies + // using "r" as it eliminates the need to push and pop the values in a + // particular order, however it also makes it impossible to have an "m" + // constraint. So we don't support it. return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, AsmVariant, ExtraCode, OS); } diff --git a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp index 41249117ae0..e2edb924d4d 100644 --- a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -294,6 +294,17 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { unsigned OldReg = MO.getReg(); + // Inline asm may have a def in the middle of the operands. Our contract + // with inline asm register operands is to provide local indices as + // immediates. + if (MO.isDef()) { + assert(MI.getOpcode() == TargetOpcode::INLINEASM); + unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); + MRI.removeRegOperandFromUseList(&MO); + MO = MachineOperand::CreateImm(LocalId); + continue; + } + // If we see a stackified register, prepare to insert subsequent // get_locals before the start of its tree. if (MFI.isVRegStackified(OldReg)) { @@ -301,6 +312,15 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { continue; } + // Our contract with inline asm register operands is to provide local + // indices as immediates. + if (MI.getOpcode() == TargetOpcode::INLINEASM) { + unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); + MRI.removeRegOperandFromUseList(&MO); + MO = MachineOperand::CreateImm(LocalId); + continue; + } + // Insert a get_local. unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); const TargetRegisterClass *RC = MRI.getRegClass(OldReg); diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index f516a6b260d..e67b1c88b58 100644 --- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -541,7 +541,7 @@ unsigned WebAssemblyFastISel::getRegForUnsignedValue(const Value *V) { unsigned WebAssemblyFastISel::getRegForSignedValue(const Value *V) { MVT::SimpleValueType From = getSimpleType(V->getType()); MVT::SimpleValueType To = getLegalType(From); - return zeroExtend(getRegForValue(V), V, From, To); + return signExtend(getRegForValue(V), V, From, To); } unsigned WebAssemblyFastISel::getRegForPromotedValue(const Value *V, diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h index bf326fce88f..4cc7f5ae058 100644 --- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -16,7 +16,7 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYFRAMELOWERING_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYFRAMELOWERING_H -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { class MachineFrameInfo; diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h index df6c937a364..eb74106336e 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -17,7 +17,7 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYINSTRINFO_H #include "WebAssemblyRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "WebAssemblyGenInstrInfo.inc" diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index 9367464c806..5e7ebd19fac 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -24,7 +24,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Function.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index b8ea2f01133..2a784c9822a 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2791,7 +2791,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, isParsingIntelSyntax())) { default: llvm_unreachable("Unexpected match result!"); case Match_Success: - if (validateInstruction(Inst, Operands)) + if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) return true; // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the @@ -3082,7 +3082,7 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, // instruction will already have been filled in correctly, since the failing // matches won't have modified it). if (NumSuccessfulMatches == 1) { - if (validateInstruction(Inst, Operands)) + if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) return true; // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the individual diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 6ff1136cd85..0c99dbbe328 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -54,12 +54,12 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, if (TSFlags & X86II::LOCK) OS << "\tlock\t"; if (!(TSFlags & X86II::LOCK) && Flags & X86::IP_HAS_LOCK) - OS << "\tlock\n"; + OS << "\tlock\t"; if (Flags & X86::IP_HAS_REPEAT_NE) - OS << "\trepne\n"; + OS << "\trepne\t"; else if (Flags & X86::IP_HAS_REPEAT) - OS << "\trep\n"; + OS << "\trep\t"; // Output CALLpcrel32 as "callq" in 64-bit mode. // In Intel annotation it's always emitted as "call". diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 464941a1bab..1f02600a798 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -41,13 +41,13 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, uint64_t TSFlags = Desc.TSFlags; if (TSFlags & X86II::LOCK) - OS << "\tlock\n"; + OS << "\tlock\t"; unsigned Flags = MI->getFlags(); if (Flags & X86::IP_HAS_REPEAT_NE) - OS << "\trepne\n"; + OS << "\trepne\t"; else if (Flags & X86::IP_HAS_REPEAT) - OS << "\trep\n"; + OS << "\trep\t"; printInstruction(MI, OS); diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index f4021d7639b..34f2956e0c0 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -116,9 +116,15 @@ def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", "Enable AVX2 instructions", [FeatureAVX]>; +def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", + "Enable three-operand fused multiple-add", + [FeatureAVX]>; +def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", + "Support 16-bit floating point conversion instructions", + [FeatureAVX]>; def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F", "Enable AVX-512 instructions", - [FeatureAVX2]>; + [FeatureAVX2, FeatureFMA, FeatureF16C]>; def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", "Enable AVX-512 Exponential and Reciprocal Instructions", [FeatureAVX512]>; @@ -154,9 +160,6 @@ def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", "Enable packed carry-less multiplication instructions", [FeatureSSE2]>; -def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", - "Enable three-operand fused multiple-add", - [FeatureAVX]>; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", "Enable four-operand fused multiple-add", [FeatureAVX, FeatureSSE4A]>; @@ -177,9 +180,6 @@ def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", "Support MOVBE instruction">; def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", "Support RDRAND instruction">; -def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", - "Support 16-bit floating point conversion instructions", - [FeatureAVX]>; def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", "Support FS/GS Base instructions">; def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp index 34e384ba311..d8b5b7d3fc0 100644 --- a/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/lib/Target/X86/X86CallFrameOptimization.cpp @@ -34,13 +34,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCDwarf.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include <cassert> #include <cstddef> diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp index 7beb9c6e357..54f937bcda3 100644 --- a/lib/Target/X86/X86CallLowering.cpp +++ b/lib/Target/X86/X86CallLowering.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DataLayout.h" @@ -41,7 +42,6 @@ #include "llvm/IR/Value.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> #include <cstdint> diff --git a/lib/Target/X86/X86CmovConversion.cpp b/lib/Target/X86/X86CmovConversion.cpp index b2cd622b1e8..e75276960cc 100644 --- a/lib/Target/X86/X86CmovConversion.cpp +++ b/lib/Target/X86/X86CmovConversion.cpp @@ -57,6 +57,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCSchedule.h" @@ -64,7 +65,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/Target/X86/X86EvexToVex.cpp b/lib/Target/X86/X86EvexToVex.cpp index 744510a3a3b..6dd4631a484 100644 --- a/lib/Target/X86/X86EvexToVex.cpp +++ b/lib/Target/X86/X86EvexToVex.cpp @@ -171,7 +171,7 @@ static void performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) { case X86::VALIGNDZ128rri: case X86::VALIGNDZ128rmi: case X86::VALIGNQZ128rri: - case X86::VALIGNQZ128rmi: + case X86::VALIGNQZ128rmi: { assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) && "Unexpected new opcode!"); unsigned Scale = (Opc == X86::VALIGNQZ128rri || @@ -180,6 +180,24 @@ static void performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) { Imm.setImm(Imm.getImm() * Scale); break; } + case X86::VSHUFF32X4Z256rmi: + case X86::VSHUFF32X4Z256rri: + case X86::VSHUFF64X2Z256rmi: + case X86::VSHUFF64X2Z256rri: + case X86::VSHUFI32X4Z256rmi: + case X86::VSHUFI32X4Z256rri: + case X86::VSHUFI64X2Z256rmi: + case X86::VSHUFI64X2Z256rri: { + assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr || + NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) && + "Unexpected new opcode!"); + MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1); + int64_t ImmVal = Imm.getImm(); + // Set bit 5, move bit 1 to bit 4, copy bit 0. + Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1)); + break; + } + } } diff --git a/lib/Target/X86/X86FixupBWInsts.cpp b/lib/Target/X86/X86FixupBWInsts.cpp index b2b5a78fcdb..9664c931c35 100644 --- a/lib/Target/X86/X86FixupBWInsts.cpp +++ b/lib/Target/X86/X86FixupBWInsts.cpp @@ -55,9 +55,9 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; #define FIXUPBW_DESC "X86 Byte/Word Instruction Fixup" diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp index 9f649dad8bc..bbc2bffdb70 100644 --- a/lib/Target/X86/X86FixupLEAs.cpp +++ b/lib/Target/X86/X86FixupLEAs.cpp @@ -22,9 +22,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; namespace llvm { diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 5582526541b..7bcbe199124 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -37,11 +37,11 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 988f2967401..86e65b83ffa 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1562,6 +1562,11 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, bool HasFP = hasFP(MF); uint64_t NumBytes = 0; + bool NeedsDwarfCFI = + (!MF.getTarget().getTargetTriple().isOSDarwin() && + !MF.getTarget().getTargetTriple().isOSWindows()) && + (MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry()); + if (IsFunclet) { assert(HasFP && "EH funclets without FP not yet implemented"); NumBytes = getWinEHFuncletFrameSize(MF); @@ -1584,6 +1589,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr) .setMIFlag(MachineInstr::FrameDestroy); + if (NeedsDwarfCFI) { + unsigned DwarfStackPtr = + TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); + BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa( + nullptr, DwarfStackPtr, -SlotSize)); + --MBBI; + } } MachineBasicBlock::iterator FirstCSPop = MBBI; @@ -1647,6 +1659,11 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } else if (NumBytes) { // Adjust stack pointer back: ESP += numbytes. emitSPUpdate(MBB, MBBI, NumBytes, /*InEpilogue=*/true); + if (!hasFP(MF) && NeedsDwarfCFI) { + // Define the current CFA rule to use the provided offset. + BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset( + nullptr, -CSSize - SlotSize)); + } --MBBI; } @@ -1659,6 +1676,23 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (NeedsWin64CFI && MF.hasWinCFI()) BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); + if (!hasFP(MF) && NeedsDwarfCFI) { + MBBI = FirstCSPop; + int64_t Offset = -CSSize - SlotSize; + // Mark callee-saved pop instruction. + // Define the current CFA rule to use the provided offset. + while (MBBI != MBB.end()) { + MachineBasicBlock::iterator PI = MBBI; + unsigned Opc = PI->getOpcode(); + ++MBBI; + if (Opc == X86::POP32r || Opc == X86::POP64r) { + Offset += SlotSize; + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createDefCfaOffset(nullptr, Offset)); + } + } + } + if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) { // Add the return addr area delta back since we are not tail calling. int Offset = -1 * X86FI->getTCReturnAddrDelta(); @@ -2577,6 +2611,7 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, unsigned Regs[2]; unsigned FoundRegs = 0; + auto &MRI = MBB.getParent()->getRegInfo(); auto RegMask = Prev->getOperand(1); auto &RegClass = @@ -2590,6 +2625,10 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, if (!RegMask.clobbersPhysReg(Candidate)) continue; + // Don't clobber reserved registers + if (MRI.isReserved(Candidate)) + continue; + bool IsDef = false; for (const MachineOperand &MO : Prev->implicit_operands()) { if (MO.isReg() && MO.isDef() && @@ -2835,6 +2874,15 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( return MBBI; } +int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { + return TRI->getSlotSize(); +} + +unsigned X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) + const { + return TRI->getDwarfRegNum(StackPtr, true); +} + namespace { // Struct used by orderFrameObjects to help sort the stack objects. struct X86FrameSortingObject { diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index 38ac96e16d4..2bce79262d0 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -14,7 +14,7 @@ #ifndef LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H #define LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { @@ -168,6 +168,10 @@ public: MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP = false) const; + int getInitialCFAOffset(const MachineFunction &MF) const override; + + unsigned getInitialCFARegister(const MachineFunction &MF) const override; + private: uint64_t calculateMaxStackAlign(const MachineFunction &MF) const; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index e43fd508de3..0cbf7601790 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -449,15 +449,15 @@ namespace { // Returns true if this masked compare can be implemented legally with this // type. static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { - if (N->getOpcode() == X86ISD::PCMPEQM || - N->getOpcode() == X86ISD::PCMPGTM || - N->getOpcode() == X86ISD::CMPM || - N->getOpcode() == X86ISD::CMPMU) { + unsigned Opcode = N->getOpcode(); + if (Opcode == X86ISD::PCMPEQM || Opcode == X86ISD::PCMPGTM || + Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM || + Opcode == X86ISD::TESTNM || Opcode == X86ISD::CMPMU) { // We can get 256-bit 8 element types here without VLX being enabled. When // this happens we will use 512-bit operations and the mask will not be // zero extended. - if (N->getOperand(0).getValueType() == MVT::v8i32 || - N->getOperand(0).getValueType() == MVT::v8f32) + EVT OpVT = N->getOperand(0).getValueType(); + if (OpVT == MVT::v8i32 || OpVT == MVT::v8f32) return Subtarget->hasVLX(); return true; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b178ad6c13e..22b4d7997fa 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -380,8 +380,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Special handling for half-precision floating point conversions. // If we don't have F16C support, then lower half float conversions // into library calls. - if (Subtarget.useSoftFloat() || - (!Subtarget.hasF16C() && !Subtarget.hasAVX512())) { + if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) { setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); } @@ -4998,6 +4997,8 @@ static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) { switch (Opcode) { default: return false; + case X86ISD::TESTM: + case X86ISD::TESTNM: case X86ISD::PCMPEQM: case X86ISD::PCMPGTM: case X86ISD::CMPM: @@ -6746,6 +6747,9 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && "Unsupported vector type for broadcast."); + BitVector UndefElements; + SDValue Ld = BVOp->getSplatValue(&UndefElements); + // Attempt to use VBROADCASTM // From this paterrn: // a. t0 = (zext_i64 (bitcast_i8 v2i1 X)) @@ -6753,17 +6757,23 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, // // Create (VBROADCASTM v2i1 X) if (Subtarget.hasCDI() && (VT.is512BitVector() || Subtarget.hasVLX())) { - MVT EltType; - unsigned NumElts; + MVT EltType = VT.getScalarType(); + unsigned NumElts = VT.getVectorNumElements(); + SDValue BOperand; SDValue ZeroExtended = isSplatZeroExtended(BVOp, NumElts, EltType); - if (ZeroExtended && ZeroExtended.getOpcode() == ISD::BITCAST) { - SDValue BOperand = ZeroExtended.getOperand(0); + if ((ZeroExtended && ZeroExtended.getOpcode() == ISD::BITCAST) || + (Ld && Ld.getOpcode() == ISD::ZERO_EXTEND && + Ld.getOperand(0).getOpcode() == ISD::BITCAST)) { + if (ZeroExtended) + BOperand = ZeroExtended.getOperand(0); + else + BOperand = Ld.getOperand(0).getOperand(0); if (BOperand.getValueType().isVector() && BOperand.getSimpleValueType().getVectorElementType() == MVT::i1) { - if ((EltType == MVT::i64 && - VT.getVectorElementType() == MVT::i8) || // for broadcastmb2q - (EltType == MVT::i32 && - VT.getVectorElementType() == MVT::i16)) { // for broadcastmw2d + if ((EltType == MVT::i64 && (VT.getVectorElementType() == MVT::i8 || + NumElts == 8)) || // for broadcastmb2q + (EltType == MVT::i32 && (VT.getVectorElementType() == MVT::i16 || + NumElts == 16))) { // for broadcastmw2d SDValue Brdcst = DAG.getNode(X86ISD::VBROADCASTM, dl, MVT::getVectorVT(EltType, NumElts), BOperand); @@ -6773,9 +6783,6 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, } } - BitVector UndefElements; - SDValue Ld = BVOp->getSplatValue(&UndefElements); - // We need a splat of a single value to use broadcast, and it doesn't // make any sense if the value is only in one element of the vector. if (!Ld || (VT.getVectorNumElements() - UndefElements.count()) <= 1) { @@ -7707,6 +7714,111 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG, return SDValue(); } +// Tries to lower a BUILD_VECTOR composed of extract-extract chains that can be +// reasoned to be a permutation of a vector by indices in a non-constant vector. +// (build_vector (extract_elt V, (extract_elt I, 0)), +// (extract_elt V, (extract_elt I, 1)), +// ... +// -> +// (vpermv I, V) +// +// TODO: Handle undefs +// TODO: Utilize pshufb and zero mask blending to support more efficient +// construction of vectors with constant-0 elements. +// TODO: Use smaller-element vectors of same width, and "interpolate" the indices, +// when no native operation available. +static SDValue +LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + // Look for VPERMV and PSHUFB opportunities. + MVT VT = V.getSimpleValueType(); + switch (VT.SimpleTy) { + default: + return SDValue(); + case MVT::v16i8: + if (!Subtarget.hasSSE3()) + return SDValue(); + break; + case MVT::v8f32: + case MVT::v8i32: + if (!Subtarget.hasAVX2()) + return SDValue(); + break; + case MVT::v4i64: + case MVT::v4f64: + if (!Subtarget.hasVLX()) + return SDValue(); + break; + case MVT::v16f32: + case MVT::v8f64: + case MVT::v16i32: + case MVT::v8i64: + if (!Subtarget.hasAVX512()) + return SDValue(); + break; + case MVT::v32i16: + if (!Subtarget.hasBWI()) + return SDValue(); + break; + case MVT::v8i16: + case MVT::v16i16: + if (!Subtarget.hasVLX() || !Subtarget.hasBWI()) + return SDValue(); + break; + case MVT::v64i8: + if (!Subtarget.hasVBMI()) + return SDValue(); + break; + case MVT::v32i8: + if (!Subtarget.hasVLX() || !Subtarget.hasVBMI()) + return SDValue(); + break; + } + SDValue SrcVec, IndicesVec; + // Check for a match of the permute source vector and permute index elements. + // This is done by checking that the i-th build_vector operand is of the form: + // (extract_elt SrcVec, (extract_elt IndicesVec, i)). + for (unsigned Idx = 0, E = V.getNumOperands(); Idx != E; ++Idx) { + SDValue Op = V.getOperand(Idx); + if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + // If this is the first extract encountered in V, set the source vector, + // otherwise verify the extract is from the previously defined source + // vector. + if (!SrcVec) + SrcVec = Op.getOperand(0); + else if (SrcVec != Op.getOperand(0)) + return SDValue(); + SDValue ExtractedIndex = Op->getOperand(1); + // Peek through extends. + if (ExtractedIndex.getOpcode() == ISD::ZERO_EXTEND || + ExtractedIndex.getOpcode() == ISD::SIGN_EXTEND) + ExtractedIndex = ExtractedIndex.getOperand(0); + if (ExtractedIndex.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + // If this is the first extract from the index vector candidate, set the + // indices vector, otherwise verify the extract is from the previously + // defined indices vector. + if (!IndicesVec) + IndicesVec = ExtractedIndex.getOperand(0); + else if (IndicesVec != ExtractedIndex.getOperand(0)) + return SDValue(); + + auto *PermIdx = dyn_cast<ConstantSDNode>(ExtractedIndex.getOperand(1)); + if (!PermIdx || PermIdx->getZExtValue() != Idx) + return SDValue(); + } + MVT IndicesVT = VT; + if (VT.isFloatingPoint()) + IndicesVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()), + VT.getVectorNumElements()); + IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT); + return DAG.getNode(VT == MVT::v16i8 ? X86ISD::PSHUFB : X86ISD::VPERMV, + SDLoc(V), VT, IndicesVec, SrcVec); +} + SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -7922,6 +8034,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (IsAllConstants) return SDValue(); + if (SDValue V = LowerBUILD_VECTORAsVariablePermute(Op, DAG, Subtarget)) + return V; + // See if we can use a vector load to get all of the elements. if (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) { SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems); @@ -10716,10 +10831,16 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. - if (Subtarget.hasSSSE3()) + if (Subtarget.hasSSSE3()) { + if (Subtarget.hasVLX()) + if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v2i64, V1, V2, + Mask, Subtarget, DAG)) + return Rotate; + if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG)) return Rotate; + } // If we have direct support for blends, we should lower by decomposing into // a permute. That will be faster than the domain cross. @@ -11016,10 +11137,16 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. - if (Subtarget.hasSSSE3()) + if (Subtarget.hasSSSE3()) { + if (Subtarget.hasVLX()) + if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i32, V1, V2, + Mask, Subtarget, DAG)) + return Rotate; + if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; + } // Assume that a single SHUFPS is faster than an alternative sequence of // multiple instructions (even if the CPU has a domain penalty). @@ -12372,6 +12499,16 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); } } + + // Try to use SHUF128 if possible. + if (Subtarget.hasVLX()) { + if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) { + unsigned PermMask = ((WidenedMask[0] % 2) << 0) | + ((WidenedMask[1] % 2) << 1); + return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, + DAG.getConstant(PermMask, DL, MVT::i8)); + } + } } // Otherwise form a 128-bit permutation. After accounting for undefs, @@ -13697,10 +13834,6 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - if (SDValue Shuf128 = - lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG)) - return Shuf128; - if (V2.isUndef()) { // When the shuffle is mirrored between the 128-bit lanes of the unit, we // can use lower latency instructions that will operate on all four @@ -13722,6 +13855,10 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG)); } + if (SDValue Shuf128 = + lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG)) + return Shuf128; + // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -17333,6 +17470,20 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { if (Swap) std::swap(Op0, Op1); + + // See if it is the case of CMP(EQ|NEQ,AND(A,B),ZERO) and change it to TESTM|NM. + if ((!Opc && SSECC == 4) || Opc == X86ISD::PCMPEQM) { + SDValue A = peekThroughBitcasts(Op0); + if ((A.getOpcode() == ISD::AND || A.getOpcode() == X86ISD::FAND) && + ISD::isBuildVectorAllZeros(Op1.getNode())) { + MVT VT0 = Op0.getSimpleValueType(); + SDValue RHS = DAG.getBitcast(VT0, A.getOperand(0)); + SDValue LHS = DAG.getBitcast(VT0, A.getOperand(1)); + return DAG.getNode(Opc == X86ISD::PCMPEQM ? X86ISD::TESTNM : X86ISD::TESTM, + dl, VT, RHS, LHS); + } + } + if (Opc) return DAG.getNode(Opc, dl, VT, Op0, Op1); Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM; @@ -19838,10 +19989,19 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, else PassThru = Src1; - SDValue Rnd = Op.getOperand(5); + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; + if (IntrWithRoundingModeOpcode != 0) { + SDValue Rnd = Op.getOperand(5); + if (!isRoundModeCurDirection(Rnd)) + return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, + Op.getValueType(), Src1, Src2, + Src3, Rnd), + Mask, PassThru, Subtarget, DAG); + } + return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src1, Src2, - Src3, Rnd), + Src3), Mask, PassThru, Subtarget, DAG); } case IFMA_OP_MASKZ: @@ -24786,9 +24946,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FMAXC: return "X86ISD::FMAXC"; case X86ISD::FMINC: return "X86ISD::FMINC"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; - case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS"; case X86ISD::FRCP: return "X86ISD::FRCP"; - case X86ISD::FRCPS: return "X86ISD::FRCPS"; case X86ISD::EXTRQI: return "X86ISD::EXTRQI"; case X86ISD::INSERTQI: return "X86ISD::INSERTQI"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; @@ -24942,10 +25100,18 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND"; case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND"; case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND"; + case X86ISD::FMADDS1: return "X86ISD::FMADDS1"; + case X86ISD::FNMADDS1: return "X86ISD::FNMADDS1"; + case X86ISD::FMSUBS1: return "X86ISD::FMSUBS1"; + case X86ISD::FNMSUBS1: return "X86ISD::FNMSUBS1"; case X86ISD::FMADDS1_RND: return "X86ISD::FMADDS1_RND"; case X86ISD::FNMADDS1_RND: return "X86ISD::FNMADDS1_RND"; case X86ISD::FMSUBS1_RND: return "X86ISD::FMSUBS1_RND"; case X86ISD::FNMSUBS1_RND: return "X86ISD::FNMSUBS1_RND"; + case X86ISD::FMADDS3: return "X86ISD::FMADDS3"; + case X86ISD::FNMADDS3: return "X86ISD::FNMADDS3"; + case X86ISD::FMSUBS3: return "X86ISD::FMSUBS3"; + case X86ISD::FNMSUBS3: return "X86ISD::FNMSUBS3"; case X86ISD::FMADDS3_RND: return "X86ISD::FMADDS3_RND"; case X86ISD::FNMADDS3_RND: return "X86ISD::FNMADDS3_RND"; case X86ISD::FMSUBS3_RND: return "X86ISD::FMSUBS3_RND"; @@ -24966,9 +25132,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::SELECT: return "X86ISD::SELECT"; case X86ISD::SELECTS: return "X86ISD::SELECTS"; case X86ISD::ADDSUB: return "X86ISD::ADDSUB"; + case X86ISD::RCP14: return "X86ISD::RCP14"; + case X86ISD::RCP14S: return "X86ISD::RCP14S"; case X86ISD::RCP28: return "X86ISD::RCP28"; case X86ISD::RCP28S: return "X86ISD::RCP28S"; case X86ISD::EXP2: return "X86ISD::EXP2"; + case X86ISD::RSQRT14: return "X86ISD::RSQRT14"; + case X86ISD::RSQRT14S: return "X86ISD::RSQRT14S"; case X86ISD::RSQRT28: return "X86ISD::RSQRT28"; case X86ISD::RSQRT28S: return "X86ISD::RSQRT28S"; case X86ISD::FADD_RND: return "X86ISD::FADD_RND"; @@ -25006,6 +25176,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND"; case X86ISD::CVTPS2PH: return "X86ISD::CVTPS2PH"; case X86ISD::CVTPH2PS: return "X86ISD::CVTPH2PS"; + case X86ISD::CVTPH2PS_RND: return "X86ISD::CVTPH2PS_RND"; case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI"; case X86ISD::CVTP2UI: return "X86ISD::CVTP2UI"; case X86ISD::CVTP2SI_RND: return "X86ISD::CVTP2SI_RND"; @@ -30314,10 +30485,17 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { - if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI)) + if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget)) return NewOp; - if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget)) + // TODO - Remove this once we can handle the implicit zero-extension of + // X86ISD::PEXTRW/X86ISD::PEXTRB in: + // XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and + // combineBasicSADPattern. + if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI)) return NewOp; SDValue InputVector = N->getOperand(0); @@ -30464,16 +30642,6 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, return SDValue(); } -// TODO - merge with combineExtractVectorElt once it can handle the implicit -// zero-extension of X86ISD::PINSRW/X86ISD::PINSRB in: -// XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and -// combineBasicSADPattern. -static SDValue combineExtractVectorElt_SSE(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { - return combineExtractWithShuffle(N, DAG, DCI, Subtarget); -} - /// If a vector select has an operand that is -1 or 0, try to simplify the /// select to a bitwise logic operation. /// TODO: Move to DAGCombiner, possibly using TargetLowering::hasAndNot()? @@ -30674,26 +30842,6 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG, unsigned Opcode = Op.getOpcode(); switch (Opcode) { - case X86ISD::PALIGNR: - // PALIGNR can be converted to VALIGND/Q for 128-bit vectors. - if (!VT.is128BitVector()) - return false; - Opcode = X86ISD::VALIGN; - LLVM_FALLTHROUGH; - case X86ISD::VALIGN: { - if (EltVT != MVT::i32 && EltVT != MVT::i64) - return false; - uint64_t Imm = Op.getConstantOperandVal(2); - MVT OpEltVT = Op.getSimpleValueType().getVectorElementType(); - unsigned ShiftAmt = Imm * OpEltVT.getSizeInBits(); - unsigned EltSize = EltVT.getSizeInBits(); - // Make sure we can represent the same shift with the new VT. - if ((ShiftAmt % EltSize) != 0) - return false; - Imm = ShiftAmt / EltSize; - return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1), - DAG.getConstant(Imm, DL, MVT::i8)); - } case X86ISD::SHUF128: { if (EltVT.getSizeInBits() != 32 && EltVT.getSizeInBits() != 64) return false; @@ -34441,8 +34589,9 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// This function transforms vector truncation of 'extended sign-bits' values. -/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS operations. +/// This function transforms vector truncation of 'extended sign-bits' or +/// 'extended zero-bits' values. +/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS/PACKUS operations. static SDValue combineVectorSignBitsTruncation(SDNode *N, SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -34475,10 +34624,19 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, SDLoc &DL, // packed/truncated value. e.g. Comparison result, sext_in_reg, etc. unsigned NumSignBits = DAG.ComputeNumSignBits(In); unsigned NumPackedBits = std::min<unsigned>(SVT.getSizeInBits(), 16); - if (NumSignBits <= (InSVT.getSizeInBits() - NumPackedBits)) - return SDValue(); + if (NumSignBits > (InSVT.getSizeInBits() - NumPackedBits)) + return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget); + + // Use PACKUS if the input has zero-bits that extend all the way to the + // packed/truncated value. e.g. masks, zext_in_reg, etc. + KnownBits Known; + DAG.computeKnownBits(In, Known); + unsigned NumLeadingZeroBits = Known.countMinLeadingZeros(); + NumPackedBits = Subtarget.hasSSE41() ? NumPackedBits : 8; + if (NumLeadingZeroBits >= (InSVT.getSizeInBits() - NumPackedBits)) + return truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget); - return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget); + return SDValue(); } static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, @@ -34507,7 +34665,7 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc); } - // Try to truncate extended sign bits with PACKSS. + // Try to truncate extended sign/zero bits with PACKSS/PACKUS. if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget)) return V; @@ -35341,9 +35499,11 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, // Do not convert the passthru input of scalar intrinsics. // FIXME: We could allow negations of the lower element only. - bool NegA = N->getOpcode() != X86ISD::FMADDS1_RND && invertIfNegative(A); + bool NegA = N->getOpcode() != X86ISD::FMADDS1 && + N->getOpcode() != X86ISD::FMADDS1_RND && invertIfNegative(A); bool NegB = invertIfNegative(B); - bool NegC = N->getOpcode() != X86ISD::FMADDS3_RND && invertIfNegative(C); + bool NegC = N->getOpcode() != X86ISD::FMADDS3 && + N->getOpcode() != X86ISD::FMADDS3_RND && invertIfNegative(C); // Negative multiplication when NegA xor NegB bool NegMul = (NegA != NegB); @@ -35371,6 +35531,20 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADD_RND; break; case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUB_RND; break; } + } else if (N->getOpcode() == X86ISD::FMADDS1) { + switch (NewOpcode) { + case ISD::FMA: NewOpcode = X86ISD::FMADDS1; break; + case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS1; break; + case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS1; break; + case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS1; break; + } + } else if (N->getOpcode() == X86ISD::FMADDS3) { + switch (NewOpcode) { + case ISD::FMA: NewOpcode = X86ISD::FMADDS3; break; + case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS3; break; + case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS3; break; + case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS3; break; + } } else if (N->getOpcode() == X86ISD::FMADDS1_RND) { switch (NewOpcode) { case ISD::FMA: NewOpcode = X86ISD::FMADDS1_RND; break; @@ -36590,10 +36764,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: break; case ISD::EXTRACT_VECTOR_ELT: - return combineExtractVectorElt(N, DAG, DCI, Subtarget); case X86ISD::PEXTRW: case X86ISD::PEXTRB: - return combineExtractVectorElt_SSE(N, DAG, DCI, Subtarget); + return combineExtractVectorElt(N, DAG, DCI, Subtarget); case ISD::INSERT_SUBVECTOR: return combineInsertSubvector(N, DAG, DCI, Subtarget); case ISD::EXTRACT_SUBVECTOR: @@ -36689,6 +36862,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FMADD_RND: case X86ISD::FMADDS1_RND: case X86ISD::FMADDS3_RND: + case X86ISD::FMADDS1: + case X86ISD::FMADDS3: case ISD::FMA: return combineFMA(N, DAG, Subtarget); case ISD::MGATHER: case ISD::MSCATTER: return combineGatherScatter(N, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 17cb976a4c7..d1438e59f9b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -254,7 +254,9 @@ namespace llvm { /// Note that these typically require refinement /// in order to obtain suitable precision. FRSQRT, FRCP, - FRSQRTS, FRCPS, + + // AVX-512 reciprocal approximations with a little more precision. + RSQRT14, RSQRT14S, RCP14, RCP14S, // Thread Local Storage. TLSADDR, @@ -487,6 +489,12 @@ namespace llvm { FMADDSUB_RND, FMSUBADD_RND, + // Scalar intrinsic FMA. + FMADDS1, FMADDS3, + FNMADDS1, FNMADDS3, + FMSUBS1, FMSUBS3, + FNMSUBS1, FNMSUBS3, + // Scalar intrinsic FMA with rounding mode. // Two versions, passthru bits on op1 or op3. FMADDS1_RND, FMADDS3_RND, @@ -555,7 +563,7 @@ namespace llvm { RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2, // Conversions between float and half-float. - CVTPS2PH, CVTPH2PS, + CVTPS2PH, CVTPH2PS, CVTPH2PS_RND, // LWP insert record. LWPINS, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index a73ee19423d..84b44ac677b 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -6017,16 +6017,17 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in { } multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, - string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1, - SDNode OpNodeRnds3, X86VectorVTInfo _ , string SUFF> { + string OpcodeStr, SDNode OpNode, SDNode OpNodes1, + SDNode OpNodeRnds1, SDNode OpNodes3, + SDNode OpNodeRnds3, X86VectorVTInfo _, + string SUFF> { let ExeDomain = _.ExeDomain in { defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, // Operands for intrinsic are in 123 order to preserve passthu // semantics. - (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3, - (i32 FROUND_CURRENT))), - (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, - _.ScalarIntMemCPat:$src3, (i32 FROUND_CURRENT))), + (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, _.RC:$src3)), + (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, + _.ScalarIntMemCPat:$src3)), (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc))), (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, @@ -6035,10 +6036,9 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, (_.ScalarLdFrag addr:$src3)))), 0>; defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, - (_.VT (OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1, - (i32 FROUND_CURRENT))), - (_.VT (OpNodeRnds3 _.RC:$src2, _.ScalarIntMemCPat:$src3, - _.RC:$src1, (i32 FROUND_CURRENT))), + (_.VT (OpNodes3 _.RC:$src2, _.RC:$src3, _.RC:$src1)), + (_.VT (OpNodes3 _.RC:$src2, _.ScalarIntMemCPat:$src3, + _.RC:$src1)), (_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))), (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, @@ -6050,8 +6050,8 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, // 213 and 231 patterns this helps tablegen's duplicate pattern detection. defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, (null_frag), - (_.VT (OpNodeRnds1 _.RC:$src1, _.ScalarIntMemCPat:$src3, - _.RC:$src2, (i32 FROUND_CURRENT))), + (_.VT (OpNodes1 _.RC:$src1, _.ScalarIntMemCPat:$src3, + _.RC:$src2)), (null_frag), (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, _.FRC:$src2))), @@ -6061,26 +6061,29 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, } multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, - string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1, + string OpcodeStr, SDNode OpNode, SDNode OpNodes1, + SDNode OpNodeRnds1, SDNode OpNodes3, SDNode OpNodeRnds3> { let Predicates = [HasAVX512] in { defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, - OpNodeRnds1, OpNodeRnds3, f32x_info, "SS">, + OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3, + f32x_info, "SS">, EVEX_CD8<32, CD8VT1>, VEX_LIG; defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, - OpNodeRnds1, OpNodeRnds3, f64x_info, "SD">, + OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3, + f64x_info, "SD">, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; } } -defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnds1, - X86FmaddRnds3>; -defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnds1, - X86FmsubRnds3>; -defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, - X86FnmaddRnds1, X86FnmaddRnds3>; -defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, - X86FnmsubRnds1, X86FnmsubRnds3>; +defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86Fmadds1, + X86FmaddRnds1, X86Fmadds3, X86FmaddRnds3>; +defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86Fmsubs1, + X86FmsubRnds1, X86Fmsubs3, X86FmsubRnds3>; +defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86Fnmadds1, + X86FnmaddRnds1, X86Fnmadds3, X86FnmaddRnds3>; +defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1, + X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>; //===----------------------------------------------------------------------===// // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA @@ -6554,7 +6557,7 @@ defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", NotMemoryFoldable; def : Pat<(f64 (fpextend FR32X:$src)), - (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>, + (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, Requires<[HasAVX512]>; def : Pat<(f64 (fpextend (loadf32 addr:$src))), (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, @@ -6569,7 +6572,7 @@ def : Pat<(f64 (extloadf32 addr:$src)), Requires<[HasAVX512, OptForSpeed]>; def : Pat<(f32 (fpround FR64X:$src)), - (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, FR32X), FR64X:$src)>, + (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, Requires<[HasAVX512]>; def : Pat<(v4f32 (X86Movss @@ -7174,34 +7177,44 @@ def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))), //===----------------------------------------------------------------------===// multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, X86MemOperand x86memop, PatFrag ld_frag> { - defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src), - "vcvtph2ps", "$src", "$src", - (X86cvtph2ps (_src.VT _src.RC:$src), - (i32 FROUND_CURRENT))>, T8PD; - defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src), - "vcvtph2ps", "$src", "$src", - (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))), - (i32 FROUND_CURRENT))>, T8PD; + defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), + (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", + (X86cvtph2ps (_src.VT _src.RC:$src))>, T8PD; + defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), + (ins x86memop:$src), "vcvtph2ps", "$src", "$src", + (X86cvtph2ps (_src.VT + (bitconvert + (ld_frag addr:$src))))>, T8PD; } multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> { - defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src), - "vcvtph2ps", "{sae}, $src", "$src, {sae}", - (X86cvtph2ps (_src.VT _src.RC:$src), - (i32 FROUND_NO_EXC))>, T8PD, EVEX_B; + defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), + (ins _src.RC:$src), "vcvtph2ps", + "{sae}, $src", "$src, {sae}", + (X86cvtph2psRnd (_src.VT _src.RC:$src), + (i32 FROUND_NO_EXC))>, T8PD, EVEX_B; } -let Predicates = [HasAVX512] in { +let Predicates = [HasAVX512] in defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64>, avx512_cvtph2ps_sae<v16f32_info, v16i16x_info>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; - let Predicates = [HasVLX] in { - defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, - loadv2i64>,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; - defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, - loadv2i64>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; - } + +let Predicates = [HasVLX] in { + defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, + loadv2i64>,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; + defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, + loadv2i64>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; + + // Pattern match vcvtph2ps of a scalar i64 load. + def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))), + (VCVTPH2PSZ128rm addr:$src)>; + def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))), + (VCVTPH2PSZ128rm addr:$src)>; + def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert + (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + (VCVTPH2PSZ128rm addr:$src)>; } multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, @@ -7212,17 +7225,16 @@ multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)), NoItinerary, 0, 0>, AVX512AIi8Base; - def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), - (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1), - (i32 imm:$src2))), - addr:$dst)]>; - let hasSideEffects = 0, mayStore = 1 in - def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), - (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", - []>, EVEX_K; + let hasSideEffects = 0, mayStore = 1 in { + def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), + (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>; + def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), + (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", + []>, EVEX_K; + } } multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> { let hasSideEffects = 0 in @@ -7242,6 +7254,19 @@ let Predicates = [HasAVX512] in { defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; } + + def : Pat<(store (f64 (extractelt + (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), + (iPTR 0))), addr:$dst), + (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; + def : Pat<(store (i64 (extractelt + (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), + (iPTR 0))), addr:$dst), + (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; + def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst), + (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>; + def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst), + (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>; } // Patterns for matching conversions from float to half-float and vice versa. @@ -7264,35 +7289,6 @@ let Predicates = [HasVLX] in { (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >; } -// Patterns for matching float to half-float conversion when AVX512 is supported -// but F16C isn't. In that case we have to use 512-bit vectors. -let Predicates = [HasAVX512, NoVLX, NoF16C] in { - def : Pat<(fp_to_f16 FR32X:$src), - (i16 (EXTRACT_SUBREG - (VMOVPDI2DIZrr - (v8i16 (EXTRACT_SUBREG - (VCVTPS2PHZrr - (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), - (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), - sub_xmm), 4), sub_xmm))), sub_16bit))>; - - def : Pat<(f16_to_fp GR16:$src), - (f32 (COPY_TO_REGCLASS - (v4f32 (EXTRACT_SUBREG - (VCVTPH2PSZrr - (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), - (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), - sub_xmm)), sub_xmm)), FR32X))>; - - def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))), - (f32 (COPY_TO_REGCLASS - (v4f32 (EXTRACT_SUBREG - (VCVTPH2PSZrr - (VCVTPS2PHZrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), - (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), - sub_xmm), 4)), sub_xmm)), FR32X))>; -} - // Unordered/Ordered scalar fp compare with Sea and set EFLAGS multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, string OpcodeStr> { @@ -7362,13 +7358,13 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, } } -defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>, +defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, f32x_info>, EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>, +defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>, +defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, f32x_info>, EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>, +defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd @@ -7414,8 +7410,8 @@ multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> { } } -defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>; -defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>; +defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14>; +defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14>; /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, @@ -7582,7 +7578,8 @@ multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, } multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, - string SUFF, SDNode OpNode, SDNode OpNodeRnd> { + string SUFF, SDNode OpNode, SDNode OpNodeRnd, + Intrinsic Intr> { let ExeDomain = _.ExeDomain in { defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, @@ -7618,21 +7615,35 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, } } +let Predicates = [HasAVX512] in { def : Pat<(_.EltVT (OpNode _.FRC:$src)), (!cast<Instruction>(NAME#SUFF#Zr) (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; + def : Pat<(Intr VR128X:$src), + (!cast<Instruction>(NAME#SUFF#Zr_Int) VR128X:$src, + VR128X:$src)>; +} + +let Predicates = [HasAVX512, OptForSize] in { def : Pat<(_.EltVT (OpNode (load addr:$src))), (!cast<Instruction>(NAME#SUFF#Zm) - (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>; + (_.EltVT (IMPLICIT_DEF)), addr:$src)>; + + def : Pat<(Intr (scalar_to_vector (_.EltVT (load addr:$src2)))), + (!cast<Instruction>(NAME#SUFF#Zm_Int) + (_.VT (IMPLICIT_DEF)), addr:$src2)>; +} + } multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> { defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", f32x_info, "SS", fsqrt, - X86fsqrtRnds>, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, - NotMemoryFoldable; + X86fsqrtRnds, int_x86_sse_sqrt_ss>, + EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable; defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", f64x_info, "SD", fsqrt, - X86fsqrtRnds>, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W, + X86fsqrtRnds, int_x86_sse2_sqrt_sd>, + EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W, NotMemoryFoldable; } @@ -7641,19 +7652,6 @@ defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>, defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG; -let Predicates = [HasAVX512] in { - def : Pat<(f32 (X86frsqrt FR32X:$src)), - (COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>; - def : Pat<(f32 (X86frsqrt (load addr:$src))), - (COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, - Requires<[OptForSize]>; - def : Pat<(f32 (X86frcp FR32X:$src)), - (COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>; - def : Pat<(f32 (X86frcp (load addr:$src))), - (COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, - Requires<[OptForSize]>; -} - multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> { @@ -8911,6 +8909,123 @@ defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" , avx512vl_i8_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; +// Fragments to help convert valignq into masked valignd. Or valignq/valignd +// into vpalignr. +def ValignqImm32XForm : SDNodeXForm<imm, [{ + return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); +}]>; +def ValignqImm8XForm : SDNodeXForm<imm, [{ + return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); +}]>; +def ValigndImm8XForm : SDNodeXForm<imm, [{ + return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); +}]>; + +multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, + X86VectorVTInfo From, X86VectorVTInfo To, + SDNodeXForm ImmXForm> { + def : Pat<(To.VT (vselect To.KRCWM:$mask, + (bitconvert + (From.VT (OpNode From.RC:$src1, From.RC:$src2, + imm:$src3))), + To.RC:$src0)), + (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, + To.RC:$src1, To.RC:$src2, + (ImmXForm imm:$src3))>; + + def : Pat<(To.VT (vselect To.KRCWM:$mask, + (bitconvert + (From.VT (OpNode From.RC:$src1, From.RC:$src2, + imm:$src3))), + To.ImmAllZerosV)), + (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, + To.RC:$src1, To.RC:$src2, + (ImmXForm imm:$src3))>; + + def : Pat<(To.VT (vselect To.KRCWM:$mask, + (bitconvert + (From.VT (OpNode From.RC:$src1, + (bitconvert (To.LdFrag addr:$src2)), + imm:$src3))), + To.RC:$src0)), + (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, + To.RC:$src1, addr:$src2, + (ImmXForm imm:$src3))>; + + def : Pat<(To.VT (vselect To.KRCWM:$mask, + (bitconvert + (From.VT (OpNode From.RC:$src1, + (bitconvert (To.LdFrag addr:$src2)), + imm:$src3))), + To.ImmAllZerosV)), + (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, + To.RC:$src1, addr:$src2, + (ImmXForm imm:$src3))>; +} + +multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, + X86VectorVTInfo From, + X86VectorVTInfo To, + SDNodeXForm ImmXForm> : + avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { + def : Pat<(From.VT (OpNode From.RC:$src1, + (bitconvert (To.VT (X86VBroadcast + (To.ScalarLdFrag addr:$src2)))), + imm:$src3)), + (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, + (ImmXForm imm:$src3))>; + + def : Pat<(To.VT (vselect To.KRCWM:$mask, + (bitconvert + (From.VT (OpNode From.RC:$src1, + (bitconvert + (To.VT (X86VBroadcast + (To.ScalarLdFrag addr:$src2)))), + imm:$src3))), + To.RC:$src0)), + (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, + To.RC:$src1, addr:$src2, + (ImmXForm imm:$src3))>; + + def : Pat<(To.VT (vselect To.KRCWM:$mask, + (bitconvert + (From.VT (OpNode From.RC:$src1, + (bitconvert + (To.VT (X86VBroadcast + (To.ScalarLdFrag addr:$src2)))), + imm:$src3))), + To.ImmAllZerosV)), + (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, + To.RC:$src1, addr:$src2, + (ImmXForm imm:$src3))>; +} + +let Predicates = [HasAVX512] in { + // For 512-bit we lower to the widest element type we can. So we only need + // to handle converting valignq to valignd. + defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, + v16i32_info, ValignqImm32XForm>; +} + +let Predicates = [HasVLX] in { + // For 128-bit we lower to the widest element type we can. So we only need + // to handle converting valignq to valignd. + defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, + v4i32x_info, ValignqImm32XForm>; + // For 256-bit we lower to the widest element type we can. So we only need + // to handle converting valignq to valignd. + defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, + v8i32x_info, ValignqImm32XForm>; +} + +let Predicates = [HasVLX, HasBWI] in { + // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. + defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, + v16i8x_info, ValignqImm8XForm>; + defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, + v16i8x_info, ValigndImm8XForm>; +} + defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" , avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 453dcd83df1..15466c2978f 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -290,8 +290,7 @@ multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, } multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231, - string OpStr, Intrinsic IntF32, Intrinsic IntF64, - SDNode OpNode> { + string OpStr, SDNode OpNodeIntrin, SDNode OpNode> { let ExeDomain = SSEPackedSingle in defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", OpNode, FR32, f32mem>, @@ -309,43 +308,44 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231, // This is because src1 is tied to dest, and the scalar intrinsics // require the pass-through values to come from the first source // operand, not the second. - // TODO: Use AVX512 instructions when possible. - let Predicates = [HasFMA] in { - def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3), + let Predicates = [HasFMA, NoAVX512] in { + def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, VR128:$src2, VR128:$src3)), (!cast<Instruction>(NAME#"213SSr_Int") VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3), + def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, VR128:$src2, VR128:$src3)), (!cast<Instruction>(NAME#"213SDr_Int") VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(IntF32 VR128:$src1, VR128:$src2, sse_load_f32:$src3), + def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, VR128:$src2, + sse_load_f32:$src3)), (!cast<Instruction>(NAME#"213SSm_Int") VR128:$src1, VR128:$src2, sse_load_f32:$src3)>; - def : Pat<(IntF64 VR128:$src1, VR128:$src2, sse_load_f64:$src3), + def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, VR128:$src2, + sse_load_f64:$src3)), (!cast<Instruction>(NAME#"213SDm_Int") VR128:$src1, VR128:$src2, sse_load_f64:$src3)>; - def : Pat<(IntF32 VR128:$src1, sse_load_f32:$src3, VR128:$src2), + def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, sse_load_f32:$src3, + VR128:$src2)), (!cast<Instruction>(NAME#"132SSm_Int") VR128:$src1, VR128:$src2, sse_load_f32:$src3)>; - def : Pat<(IntF64 VR128:$src1, sse_load_f64:$src3, VR128:$src2), + def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, sse_load_f64:$src3, + VR128:$src2)), (!cast<Instruction>(NAME#"132SDm_Int") VR128:$src1, VR128:$src2, sse_load_f64:$src3)>; } } -defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, - int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG; -defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss, - int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG; +defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadds1, X86Fmadd>, VEX_LIG; +defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsubs1, X86Fmsub>, VEX_LIG; -defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss, - int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG; -defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, - int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG; +defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadds1, X86Fnmadd>, + VEX_LIG; +defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsubs1, X86Fnmsub>, + VEX_LIG; //===----------------------------------------------------------------------===// @@ -385,26 +385,28 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in } multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop, - ComplexPattern mem_cpat, Intrinsic Int> { + ValueType VT, ComplexPattern mem_cpat, SDNode OpNode> { let isCodeGenOnly = 1 in { def rr_Int : FMA4<opc, MRMSrcRegOp4, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, VEX_LIG; + (VT (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, VEX_W, + VEX_LIG; def rm_Int : FMA4<opc, MRMSrcMemOp4, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, memop:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, - mem_cpat:$src3))]>, VEX_W, VEX_LIG; + [(set VR128:$dst, (VT (OpNode VR128:$src1, VR128:$src2, + mem_cpat:$src3)))]>, VEX_W, VEX_LIG; def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, memop:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG; + (VT (OpNode VR128:$src1, mem_cpat:$src2, VR128:$src3)))]>, + VEX_LIG; let hasSideEffects = 0 in def rr_Int_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), @@ -475,19 +477,19 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { let ExeDomain = SSEPackedSingle in { // Scalar Instructions defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, - fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, - int_x86_fma_vfmadd_ss>; + fma4s_int<0x6A, "vfmaddss", ssmem, v4f32, sse_load_f32, + X86Fmadds1>; defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, - fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32, - int_x86_fma_vfmsub_ss>; + fma4s_int<0x6E, "vfmsubss", ssmem, v4f32, sse_load_f32, + X86Fmsubs1>; defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, X86Fnmadd, loadf32>, - fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32, - int_x86_fma_vfnmadd_ss>; + fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32, sse_load_f32, + X86Fnmadds1>; defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, X86Fnmsub, loadf32>, - fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, - int_x86_fma_vfnmsub_ss>; + fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32, sse_load_f32, + X86Fnmsubs1>; // Packed Instructions defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32, loadv4f32, loadv8f32>; @@ -506,19 +508,19 @@ let ExeDomain = SSEPackedSingle in { let ExeDomain = SSEPackedDouble in { // Scalar Instructions defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, - fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfmadd_sd>; + fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64, sse_load_f64, + X86Fmadds1>; defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, - fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfmsub_sd>; + fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64, sse_load_f64, + X86Fmsubs1>; defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, X86Fnmadd, loadf64>, - fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfnmadd_sd>; + fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64, sse_load_f64, + X86Fnmadds1>; defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, X86Fnmsub, loadf64>, - fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfnmsub_sd>; + fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64, sse_load_f64, + X86Fnmsubs1>; // Packed Instructions defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64, loadv2f64, loadv4f64>; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index c4f34bdd37e..d30400836bb 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -56,8 +56,6 @@ def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp>; def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; -def X86frsqrt14s: SDNode<"X86ISD::FRSQRTS", SDTFPBinOp>; -def X86frcp14s : SDNode<"X86ISD::FRCPS", SDTFPBinOp>; def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>; @@ -482,11 +480,22 @@ def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound, [SDNPCommutat def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>; // Scalar FMA intrinsics with passthru bits in operand 1. +def X86Fmadds1 : SDNode<"X86ISD::FMADDS1", SDTFPTernaryOp>; +def X86Fnmadds1 : SDNode<"X86ISD::FNMADDS1", SDTFPTernaryOp>; +def X86Fmsubs1 : SDNode<"X86ISD::FMSUBS1", SDTFPTernaryOp>; +def X86Fnmsubs1 : SDNode<"X86ISD::FNMSUBS1", SDTFPTernaryOp>; + +// Scalar FMA intrinsics with passthru bits in operand 1. def X86FmaddRnds1 : SDNode<"X86ISD::FMADDS1_RND", SDTFmaRound>; def X86FnmaddRnds1 : SDNode<"X86ISD::FNMADDS1_RND", SDTFmaRound>; def X86FmsubRnds1 : SDNode<"X86ISD::FMSUBS1_RND", SDTFmaRound>; def X86FnmsubRnds1 : SDNode<"X86ISD::FNMSUBS1_RND", SDTFmaRound>; +def X86Fmadds3 : SDNode<"X86ISD::FMADDS3", SDTFPTernaryOp>; +def X86Fnmadds3 : SDNode<"X86ISD::FNMADDS3", SDTFPTernaryOp>; +def X86Fmsubs3 : SDNode<"X86ISD::FMSUBS3", SDTFPTernaryOp>; +def X86Fnmsubs3 : SDNode<"X86ISD::FNMSUBS3", SDTFPTernaryOp>; + // Scalar FMA intrinsics with passthru bits in operand 3. def X86FmaddRnds3 : SDNode<"X86ISD::FMADDS3_RND", SDTFmaRound, [SDNPCommutative]>; def X86FnmaddRnds3 : SDNode<"X86ISD::FNMADDS3_RND", SDTFmaRound, [SDNPCommutative]>; @@ -498,10 +507,14 @@ def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>, def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>; def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTIFma, [SDNPCommutative]>; +def X86rsqrt14 : SDNode<"X86ISD::RSQRT14", SDTFPUnaryOp>; +def X86rcp14 : SDNode<"X86ISD::RCP14", SDTFPUnaryOp>; def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOpRound>; def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOpRound>; def X86exp2 : SDNode<"X86ISD::EXP2", SDTFPUnaryOpRound>; +def X86rsqrt14s : SDNode<"X86ISD::RSQRT14S", SDTFPBinOp>; +def X86rcp14s : SDNode<"X86ISD::RCP14S", SDTFPBinOp>; def X86rsqrt28s : SDNode<"X86ISD::RSQRT28S", SDTFPBinOpRound>; def X86rcp28s : SDNode<"X86ISD::RCP28S", SDTFPBinOpRound>; def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImmRound>; @@ -578,7 +591,12 @@ def X86VUintToFP : SDNode<"X86ISD::CVTUI2P", SDTVintToFP>; def X86cvtp2Int : SDNode<"X86ISD::CVTP2SI", SDTFloatToInt>; def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>; + def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", + SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>, + SDTCVecEltisVT<1, i16>]> >; + +def X86cvtph2psRnd : SDNode<"X86ISD::CVTPH2PS_RND", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>, SDTCVecEltisVT<1, i16>, SDTCisVT<2, i32>]> >; diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index e665ec1f14d..02a09c340ce 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -18,7 +18,7 @@ #include "X86InstrFMA3Info.h" #include "X86RegisterInfo.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "X86GenInstrInfo.inc" diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 559a8fcf107..f00caa130d0 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -850,7 +850,6 @@ def HasLWP : Predicate<"Subtarget->hasLWP()">; def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">; def HasF16C : Predicate<"Subtarget->hasF16C()">; -def NoF16C : Predicate<"!Subtarget->hasF16C()">; def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">; def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; def HasBMI : Predicate<"Subtarget->hasBMI()">; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 451303054f5..955a40ee171 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1514,13 +1514,12 @@ let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [], IIC_SSE_CVT_Scalar_RM>, - XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, + [], IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG, NotMemoryFoldable; } def : Pat<(f32 (fpround FR64:$src)), - (VCVTSD2SSrr (COPY_TO_REGCLASS FR64:$src, FR32), FR64:$src)>, + (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>, Requires<[UseAVX]>; def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), @@ -1574,20 +1573,18 @@ let hasSideEffects = 0, Predicates = [UseAVX] in { def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [], IIC_SSE_CVT_Scalar_RR>, - XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, + [], IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>, VEX_WIG, NotMemoryFoldable; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [], IIC_SSE_CVT_Scalar_RM>, - XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, + [], IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG, NotMemoryFoldable; } def : Pat<(f64 (fpextend FR32:$src)), - (VCVTSS2SDrr (COPY_TO_REGCLASS FR32:$src, FR64), FR32:$src)>, Requires<[UseAVX]>; + (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>; def : Pat<(fpextend (loadf32 addr:$src)), (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>; @@ -1899,7 +1896,7 @@ let Predicates = [HasAVX, NoVLX] in { (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))), (VCVTTPD2DQrm addr:$src)>; } -} // Predicates = [HasAVX] +} // Predicates = [HasAVX, NoVLX] def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", @@ -3095,7 +3092,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, ValueType vt, ValueType ScalarVT, X86MemOperand x86memop, Intrinsic Intr, SDNode OpNode, Domain d, - OpndItins itins, string Suffix> { + OpndItins itins, Predicate target, string Suffix> { let hasSideEffects = 0 in { def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -3126,21 +3123,17 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, // vrcpss mem, %xmm0, %xmm0 // TODO: In theory, we could fold the load, and avoid the stall caused by // the partial register store, either in ExecutionDepsFix or with smarter RA. - let Predicates = [UseAVX] in { + let Predicates = [target] in { def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)), RC:$src)>; - } - let Predicates = [HasAVX] in { def : Pat<(Intr VR128:$src), (!cast<Instruction>("V"#NAME#Suffix##r_Int) VR128:$src, VR128:$src)>; } - let Predicates = [HasAVX, OptForSize] in { + let Predicates = [target, OptForSize] in { def : Pat<(Intr (scalar_to_vector (ScalarVT (load addr:$src2)))), (!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)), addr:$src2)>; - } - let Predicates = [UseAVX, OptForSize] in { def : Pat<(ScalarVT (OpNode (load addr:$src))), (!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)), addr:$src)>; @@ -3186,7 +3179,7 @@ let Predicates = prds in { /// sse2_fp_unop_p - SSE2 unops in vector forms. multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), @@ -3220,41 +3213,41 @@ let Predicates = [HasAVX] in { } multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { + OpndItins itins, Predicate AVXTarget> { defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem, !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode, SSEPackedSingle, itins, UseSSE1, "SS">, XS; defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32, f32mem, !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode, - SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG, - NotMemoryFoldable; + SSEPackedSingle, itins, AVXTarget, "SS">, XS, VEX_4V, + VEX_LIG, VEX_WIG, NotMemoryFoldable; } multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { + OpndItins itins, Predicate AVXTarget> { defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem, !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd), OpNode, SSEPackedDouble, itins, UseSSE2, "SD">, XD; defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64, f64mem, !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd), - OpNode, SSEPackedDouble, itins, "SD">, + OpNode, SSEPackedDouble, itins, AVXTarget, "SD">, XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; } // Square root. -defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>, - sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS, [HasAVX]>, - sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>, +defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS, UseAVX>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS, [HasAVX, NoVLX]>, + sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD, UseAVX>, sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS, [HasAVX, NoVLX] >; -defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>, - sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP, [HasAVX, NoVLX]>; +defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS, HasAVX>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS, [HasAVX]>; +defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS, HasAVX>, + sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP, [HasAVX]>; // There is no f64 version of the reciprocal approximation instructions. @@ -7692,22 +7685,24 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, //===----------------------------------------------------------------------===// // Half precision conversion instructions //===----------------------------------------------------------------------===// -multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { +multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop> { def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), "vcvtph2ps\t{$src, $dst|$dst, $src}", - [(set RC:$dst, (Int VR128:$src))]>, + [(set RC:$dst, (X86cvtph2ps VR128:$src))]>, T8PD, VEX, Sched<[WriteCvtF2F]>; let hasSideEffects = 0, mayLoad = 1 in def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8PD, VEX, - Sched<[WriteCvtF2FLd]>; + "vcvtph2ps\t{$src, $dst|$dst, $src}", + [(set RC:$dst, (X86cvtph2ps (bc_v8i16 + (loadv2i64 addr:$src))))]>, + T8PD, VEX, Sched<[WriteCvtF2FLd]>; } -multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { +multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop> { def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), (ins RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>, + [(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>, TAPD, VEX, Sched<[WriteCvtF2F]>; let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteCvtF2FLd, WriteRMW] in @@ -7717,32 +7712,31 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { TAPD, VEX; } -let Predicates = [HasF16C] in { - defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>; - defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L; - defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>; - defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L; +let Predicates = [HasF16C, NoVLX] in { + defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem>; + defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem>, VEX_L; + defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem>; + defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem>, VEX_L; // Pattern match vcvtph2ps of a scalar i64 load. - def : Pat<(int_x86_vcvtph2ps_128 (vzmovl_v2i64 addr:$src)), + def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))), (VCVTPH2PSrm addr:$src)>; - def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)), + def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))), (VCVTPH2PSrm addr:$src)>; - def : Pat<(int_x86_vcvtph2ps_128 (bitconvert - (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert + (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), (VCVTPH2PSrm addr:$src)>; - def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16 - (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))), - addr:$dst), - (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; - def : Pat<(store (i64 (extractelt (bc_v2i64 (v8i16 - (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))), - addr:$dst), - (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; - def : Pat<(store (v8i16 (int_x86_vcvtps2ph_256 VR256:$src1, i32:$src2)), - addr:$dst), - (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>; + def : Pat<(store (f64 (extractelt + (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))), + (iPTR 0))), addr:$dst), + (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; + def : Pat<(store (i64 (extractelt + (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))), + (iPTR 0))), addr:$dst), + (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; + def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, i32:$src2)), addr:$dst), + (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>; } // Patterns for matching conversions from float to half-float and vice versa. diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 6f5e41bcdc6..9edac22d5ba 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -422,12 +422,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0), X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0), - X86_INTRINSIC_DATA(avx512_broadcastmb_128, BROADCASTM, X86ISD::VBROADCASTM, 0), - X86_INTRINSIC_DATA(avx512_broadcastmb_256, BROADCASTM, X86ISD::VBROADCASTM, 0), - X86_INTRINSIC_DATA(avx512_broadcastmb_512, BROADCASTM, X86ISD::VBROADCASTM, 0), - X86_INTRINSIC_DATA(avx512_broadcastmw_128, BROADCASTM, X86ISD::VBROADCASTM, 0), - X86_INTRINSIC_DATA(avx512_broadcastmw_256, BROADCASTM, X86ISD::VBROADCASTM, 0), - X86_INTRINSIC_DATA(avx512_broadcastmw_512, BROADCASTM, X86ISD::VBROADCASTM, 0), X86_INTRINSIC_DATA(avx512_cvtb2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), X86_INTRINSIC_DATA(avx512_cvtb2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), X86_INTRINSIC_DATA(avx512_cvtb2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), @@ -1077,12 +1071,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FSUBS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FSUBS_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK_RM, + X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK, X86ISD::CVTPH2PS, 0), - X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK_RM, - X86ISD::CVTPH2PS, 0), - X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_RM, + X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK, X86ISD::CVTPH2PS, 0), + X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK, + X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_RND), X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK, X86ISD::CVTPS2PH, 0), X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK, @@ -1098,8 +1092,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_512, FMA_OP_MASK, ISD::FMA, X86ISD::FMADD_RND), - X86_INTRINSIC_DATA(avx512_mask_vfmadd_sd, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_vfmadd_ss, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_sd, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1, X86ISD::FMADDS1_RND), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_ss, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1, X86ISD::FMADDS1_RND), X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB, @@ -1220,8 +1214,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_512, FMA_OP_MASK3, ISD::FMA, X86ISD::FMADD_RND), - X86_INTRINSIC_DATA(avx512_mask3_vfmadd_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3_RND, 0), - X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3_RND, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3, X86ISD::FMADDS3_RND), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3, X86ISD::FMADDS3_RND), X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_512, FMA_OP_MASK3, X86ISD::FMADDSUB, @@ -1239,8 +1233,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_256, FMA_OP_MASK3, X86ISD::FMSUB, 0), X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_512, FMA_OP_MASK3, X86ISD::FMSUB, X86ISD::FMSUB_RND), - X86_INTRINSIC_DATA(avx512_mask3_vfmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3_RND, 0), - X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3_RND, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3, X86ISD::FMSUBS3_RND), + X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3, X86ISD::FMSUBS3_RND), X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0), X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0), @@ -1259,8 +1253,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0), X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_512, FMA_OP_MASK3, X86ISD::FNMSUB, X86ISD::FNMSUB_RND), - X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3_RND, 0), - X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3_RND, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3, X86ISD::FNMSUBS3_RND), + X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3, X86ISD::FNMSUBS3_RND), X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ, X86ISD::VFIXUPIMM, 0), X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_256, FIXUPIMM_MASKZ, @@ -1298,8 +1292,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_512, FMA_OP_MASKZ, ISD::FMA, X86ISD::FMADD_RND), - X86_INTRINSIC_DATA(avx512_maskz_vfmadd_sd, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1_RND, 0), - X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ss, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1_RND, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_sd, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1, X86ISD::FMADDS1_RND), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ss, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1, X86ISD::FMADDS1_RND), X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_512, FMA_OP_MASKZ, X86ISD::FMADDSUB, @@ -1428,26 +1422,26 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_ptestnm_w_128, CMP_MASK, X86ISD::TESTNM, 0), X86_INTRINSIC_DATA(avx512_ptestnm_w_256, CMP_MASK, X86ISD::TESTNM, 0), X86_INTRINSIC_DATA(avx512_ptestnm_w_512, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0), - X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0), + X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0), + X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0), X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28S, 0), X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28S, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0), @@ -1468,6 +1462,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, ISD::FMA, 0), X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, ISD::FMA, 0), X86_INTRINSIC_DATA(fma_vfmadd_ps_256, INTR_TYPE_3OP, ISD::FMA, 0), + X86_INTRINSIC_DATA(fma_vfmadd_sd, INTR_TYPE_3OP, X86ISD::FMADDS1, 0), + X86_INTRINSIC_DATA(fma_vfmadd_ss, INTR_TYPE_3OP, X86ISD::FMADDS1, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), @@ -1476,6 +1472,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(fma_vfmsub_pd_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0), X86_INTRINSIC_DATA(fma_vfmsub_ps, INTR_TYPE_3OP, X86ISD::FMSUB, 0), X86_INTRINSIC_DATA(fma_vfmsub_ps_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0), + X86_INTRINSIC_DATA(fma_vfmsub_sd, INTR_TYPE_3OP, X86ISD::FMSUBS1, 0), + X86_INTRINSIC_DATA(fma_vfmsub_ss, INTR_TYPE_3OP, X86ISD::FMSUBS1, 0), X86_INTRINSIC_DATA(fma_vfmsubadd_pd, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0), X86_INTRINSIC_DATA(fma_vfmsubadd_pd_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0), X86_INTRINSIC_DATA(fma_vfmsubadd_ps, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0), @@ -1484,10 +1482,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(fma_vfnmadd_pd_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0), X86_INTRINSIC_DATA(fma_vfnmadd_ps, INTR_TYPE_3OP, X86ISD::FNMADD, 0), X86_INTRINSIC_DATA(fma_vfnmadd_ps_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0), + X86_INTRINSIC_DATA(fma_vfnmadd_sd, INTR_TYPE_3OP, X86ISD::FNMADDS1, 0), + X86_INTRINSIC_DATA(fma_vfnmadd_ss, INTR_TYPE_3OP, X86ISD::FNMADDS1, 0), X86_INTRINSIC_DATA(fma_vfnmsub_pd, INTR_TYPE_3OP, X86ISD::FNMSUB, 0), X86_INTRINSIC_DATA(fma_vfnmsub_pd_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0), X86_INTRINSIC_DATA(fma_vfnmsub_ps, INTR_TYPE_3OP, X86ISD::FNMSUB, 0), X86_INTRINSIC_DATA(fma_vfnmsub_ps_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(fma_vfnmsub_sd, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0), + X86_INTRINSIC_DATA(fma_vfnmsub_ss, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0), X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0), X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ), X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE), @@ -1584,6 +1586,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0), X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0), X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), + X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), + X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), + X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0), + X86_INTRINSIC_DATA(vcvtps2ph_256, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0), X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0), X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0), X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0), diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp index 98b4863134e..b1438bf7bc0 100644 --- a/lib/Target/X86/X86LegalizerInfo.cpp +++ b/lib/Target/X86/X86LegalizerInfo.cpp @@ -22,6 +22,38 @@ using namespace llvm; using namespace TargetOpcode; +/// FIXME: The following static functions are SizeChangeStrategy functions +/// that are meant to temporarily mimic the behaviour of the old legalization +/// based on doubling/halving non-legal types as closely as possible. This is +/// not entirly possible as only legalizing the types that are exactly a power +/// of 2 times the size of the legal types would require specifying all those +/// sizes explicitly. +/// In practice, not specifying those isn't a problem, and the below functions +/// should disappear quickly as we add support for legalizing non-power-of-2 +/// sized types further. +static void +addAndInterleaveWithUnsupported(LegalizerInfo::SizeAndActionsVec &result, + const LegalizerInfo::SizeAndActionsVec &v) { + for (unsigned i = 0; i < v.size(); ++i) { + result.push_back(v[i]); + if (i + 1 < v[i].first && i + 1 < v.size() && + v[i + 1].first != v[i].first + 1) + result.push_back({v[i].first + 1, LegalizerInfo::Unsupported}); + } +} + +static LegalizerInfo::SizeAndActionsVec +widen_1(const LegalizerInfo::SizeAndActionsVec &v) { + assert(v.size() >= 1); + assert(v[0].first > 1); + LegalizerInfo::SizeAndActionsVec result = {{1, LegalizerInfo::WidenScalar}, + {2, LegalizerInfo::Unsupported}}; + addAndInterleaveWithUnsupported(result, v); + auto Largest = result.back().first; + result.push_back({Largest + 1, LegalizerInfo::Unsupported}); + return result; +} + X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, const X86TargetMachine &TM) : Subtarget(STI), TM(TM) { @@ -37,6 +69,17 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, setLegalizerInfoAVX512DQ(); setLegalizerInfoAVX512BW(); + setLegalizeScalarToDifferentSizeStrategy(G_PHI, 0, widen_1); + for (unsigned BinOp : {G_SUB, G_MUL, G_AND, G_OR, G_XOR}) + setLegalizeScalarToDifferentSizeStrategy(BinOp, 0, widen_1); + for (unsigned MemOp : {G_LOAD, G_STORE}) + setLegalizeScalarToDifferentSizeStrategy(MemOp, 0, + narrowToSmallerAndWidenToSmallest); + setLegalizeScalarToDifferentSizeStrategy( + G_GEP, 1, widenToLargerTypesUnsupportedOtherwise); + setLegalizeScalarToDifferentSizeStrategy( + G_CONSTANT, 0, widenToLargerTypesAndNarrowToLargest); + computeTables(); } @@ -47,7 +90,6 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { const LLT s8 = LLT::scalar(8); const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); - const LLT s64 = LLT::scalar(64); for (auto Ty : {p0, s1, s8, s16, s32}) setAction({G_IMPLICIT_DEF, Ty}, Legal); @@ -55,15 +97,10 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { for (auto Ty : {s8, s16, s32, p0}) setAction({G_PHI, Ty}, Legal); - setAction({G_PHI, s1}, WidenScalar); - - for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) { + for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) for (auto Ty : {s8, s16, s32}) setAction({BinOp, Ty}, Legal); - setAction({BinOp, s1}, WidenScalar); - } - for (unsigned Op : {G_UADDE}) { setAction({Op, s32}, Legal); setAction({Op, 1, s1}, Legal); @@ -73,7 +110,6 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { for (auto Ty : {s8, s16, s32, p0}) setAction({MemOp, Ty}, Legal); - setAction({MemOp, s1}, WidenScalar); // And everything's fine in addrspace 0. setAction({MemOp, 1, p0}, Legal); } @@ -85,9 +121,6 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { setAction({G_GEP, p0}, Legal); setAction({G_GEP, 1, s32}, Legal); - for (auto Ty : {s1, s8, s16}) - setAction({G_GEP, 1, Ty}, WidenScalar); - // Control-flow setAction({G_BRCOND, s1}, Legal); @@ -95,9 +128,6 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { for (auto Ty : {s8, s16, s32, p0}) setAction({TargetOpcode::G_CONSTANT, Ty}, Legal); - setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar); - setAction({TargetOpcode::G_CONSTANT, s64}, NarrowScalar); - // Extensions for (auto Ty : {s8, s16, s32}) { setAction({G_ZEXT, Ty}, Legal); @@ -105,12 +135,6 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { setAction({G_ANYEXT, Ty}, Legal); } - for (auto Ty : {s1, s8, s16}) { - setAction({G_ZEXT, 1, Ty}, Legal); - setAction({G_SEXT, 1, Ty}, Legal); - setAction({G_ANYEXT, 1, Ty}, Legal); - } - // Comparison setAction({G_ICMP, s1}, Legal); @@ -123,7 +147,6 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { if (!Subtarget.is64Bit()) return; - const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); setAction({G_IMPLICIT_DEF, s64}, Legal); @@ -145,7 +168,6 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { // Extensions for (unsigned extOp : {G_ZEXT, G_SEXT, G_ANYEXT}) { setAction({extOp, s64}, Legal); - setAction({extOp, 1, s32}, Legal); } // Comparison diff --git a/lib/Target/X86/X86MacroFusion.cpp b/lib/Target/X86/X86MacroFusion.cpp index 0dd13077c37..67d95c2233d 100644 --- a/lib/Target/X86/X86MacroFusion.cpp +++ b/lib/Target/X86/X86MacroFusion.cpp @@ -14,8 +14,8 @@ #include "X86MacroFusion.h" #include "X86Subtarget.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/CodeGen/MacroFusion.h" +#include "llvm/CodeGen/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp index 3069d1fd349..9b7732c1db8 100644 --- a/lib/Target/X86/X86PadShortFunction.cpp +++ b/lib/Target/X86/X86PadShortFunction.cpp @@ -23,10 +23,10 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 1f49650340e..efa0cd2c6bc 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -27,14 +27,14 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index b0ce1335bd3..66fea1e688f 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -161,6 +161,8 @@ X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV, // In Regcall calling convention those registers are used for passing // parameters. Thus we need to prevent lazy binding in Regcall. return X86II::MO_GOTPCREL; + if (F && F->hasFnAttribute(Attribute::NonLazyBind) && is64Bit()) + return X86II::MO_GOTPCREL; return X86II::MO_PLT; } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index a8d7f290688..a21d068c7f4 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -463,7 +463,7 @@ public: bool hasPCLMUL() const { return HasPCLMUL; } // Prefer FMA4 to FMA - its better for commutation/memory folding and // has equal or better performance on all supported targets. - bool hasFMA() const { return (HasFMA || hasAVX512()) && !HasFMA4; } + bool hasFMA() const { return HasFMA && !HasFMA4; } bool hasFMA4() const { return HasFMA4; } bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } bool hasXOP() const { return HasXOP; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 6e6c724eb0a..11fe84f162d 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -436,4 +436,11 @@ void X86PassConfig::addPreEmitPass() { addPass(createX86FixupLEAs()); addPass(createX86EvexToVexInsts()); } + + // Verify basic block incoming and outgoing cfa offset and register values and + // correct CFA calculation rule where needed by inserting appropriate CFI + // instructions. + const Triple &TT = TM->getTargetTriple(); + if (!TT.isOSDarwin() && !TT.isOSWindows()) + addPass(createCFIInstrInserter()); } diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index effbd07fa31..6772d96c799 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1437,7 +1437,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, return Entry->Cost; } - return BaseT::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src, I); } int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, @@ -2644,12 +2644,15 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, { 3, MVT::v8i8, 9 }, //(load 24i8 and) deinterleave into 3 x 8i8 { 3, MVT::v16i8, 11}, //(load 48i8 and) deinterleave into 3 x 16i8 { 3, MVT::v32i8, 13}, //(load 96i8 and) deinterleave into 3 x 32i8 + { 3, MVT::v8f32, 17 }, //(load 24f32 and)deinterleave into 3 x 8f32 { 4, MVT::v2i8, 12 }, //(load 8i8 and) deinterleave into 4 x 2i8 { 4, MVT::v4i8, 4 }, //(load 16i8 and) deinterleave into 4 x 4i8 { 4, MVT::v8i8, 20 }, //(load 32i8 and) deinterleave into 4 x 8i8 { 4, MVT::v16i8, 39 }, //(load 64i8 and) deinterleave into 4 x 16i8 - { 4, MVT::v32i8, 80 } //(load 128i8 and) deinterleave into 4 x 32i8 + { 4, MVT::v32i8, 80 }, //(load 128i8 and) deinterleave into 4 x 32i8 + + { 8, MVT::v8f32, 40 } //(load 64f32 and)deinterleave into 8 x 8f32 }; static const CostTblEntry AVX2InterleavedStoreTbl[] = { diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index fb8c2a71c9a..ba01f1e25ba 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -26,13 +26,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include <cassert> diff --git a/lib/Target/X86/X86WinAllocaExpander.cpp b/lib/Target/X86/X86WinAllocaExpander.cpp index fc08f1582ad..8a186e94d9c 100644 --- a/lib/Target/X86/X86WinAllocaExpander.cpp +++ b/lib/Target/X86/X86WinAllocaExpander.cpp @@ -25,9 +25,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Function.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h index 27584f4e2b6..e98e9cda11d 100644 --- a/lib/Target/XCore/XCoreFrameLowering.h +++ b/lib/Target/XCore/XCoreFrameLowering.h @@ -15,7 +15,7 @@ #ifndef LLVM_LIB_TARGET_XCORE_XCOREFRAMELOWERING_H #define LLVM_LIB_TARGET_XCORE_XCOREFRAMELOWERING_H -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index a377784caf4..9d9ee33ce22 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -15,7 +15,7 @@ #define LLVM_LIB_TARGET_XCORE_XCOREINSTRINFO_H #include "XCoreRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "XCoreGenInstrInfo.inc" diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index d34e928b14f..a6cf6837009 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -30,7 +30,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 12090bff381..4bb2984e3b4 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -448,9 +448,13 @@ static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, for (auto *GVE : GVs) { DIVariable *Var = GVE->getVariable(); DIExpression *Expr = GVE->getExpression(); - if (NumElements > 1) - Expr = DIExpression::createFragmentExpression(Expr, FragmentOffsetInBits, - FragmentSizeInBits); + if (NumElements > 1) { + if (auto E = DIExpression::createFragmentExpression( + Expr, FragmentOffsetInBits, FragmentSizeInBits)) + Expr = *E; + else + return; + } auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr); NGV->addDebugInfo(NGVE); } diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index 9fa5ed9ab2b..6cef866b7b8 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -1401,7 +1401,7 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( FAlias->takeName(F); if (FAlias->hasName()) F->setName(FAlias->getName() + ".cfi"); - F->replaceAllUsesWith(FAlias); + F->replaceUsesExceptBlockAddr(FAlias); } if (!F->isDeclarationForLinker()) F->setLinkage(GlobalValue::InternalLinkage); diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index b5267f75e41..c47d8b78df3 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -931,15 +931,17 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE)) continue; - ORE.emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", - CS.getInstruction()) - << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " - << ore::NV("Caller", CS.getCaller()); - }); + // Construct remark before doing the inlining, as after successful inlining + // the callsite is removed. + OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction()); + OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " + << ore::NV("Caller", CS.getCaller()); InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI); - InlineFunction(CS, IFI); + if (!InlineFunction(CS, IFI)) + continue; + + ORE.emit(OR); // Now update the entry count: if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) { diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 828eb5eee29..5d373665509 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -467,6 +467,9 @@ void PassManagerBuilder::populateModulePassManager( addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); + if (OptLevel > 2) + MPM.add(createCallSiteSplittingPass()); + MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createCalledValuePropagationPass()); MPM.add(createGlobalOptimizerPass()); // Optimize out global vars @@ -545,6 +548,9 @@ void PassManagerBuilder::populateModulePassManager( // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes // during ThinLTO and perform the rest of the optimizations afterward. if (PrepareForThinLTO) { + // Ensure we perform any last passes, but do so before renaming anonymous + // globals in case the passes add any. + addExtensionsToPM(EP_OptimizerLast, MPM); // Rename anon globals to be able to export them in the summary. MPM.add(createNameAnonGlobalPass()); return; @@ -703,6 +709,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createInferFunctionAttrsLegacyPass()); if (OptLevel > 1) { + // Split call-site with more constrained arguments. + PM.add(createCallSiteSplittingPass()); + // Indirect call promotion. This should promote all the targets that are // left by the earlier promotion pass that promotes intra-module targets. // This two-step promotion is to save the compile time. For LTO, it should diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 34414f96cca..8930e9b2b95 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -1182,24 +1182,20 @@ void SampleProfileLoader::buildEdges(Function &F) { } } -/// Sorts the CallTargetMap \p M by count in descending order and stores the -/// sorted result in \p Sorted. Returns the total counts. -static uint64_t SortCallTargets(SmallVector<InstrProfValueData, 2> &Sorted, - const SampleRecord::CallTargetMap &M) { - Sorted.clear(); - uint64_t Sum = 0; - for (auto I = M.begin(); I != M.end(); ++I) { - Sum += I->getValue(); - Sorted.push_back({Function::getGUID(I->getKey()), I->getValue()}); - } - std::sort(Sorted.begin(), Sorted.end(), +/// Returns the sorted CallTargetMap \p M by count in descending order. +static SmallVector<InstrProfValueData, 2> SortCallTargets( + const SampleRecord::CallTargetMap &M) { + SmallVector<InstrProfValueData, 2> R; + for (auto I = M.begin(); I != M.end(); ++I) + R.push_back({Function::getGUID(I->getKey()), I->getValue()}); + std::sort(R.begin(), R.end(), [](const InstrProfValueData &L, const InstrProfValueData &R) { if (L.Count == R.Count) return L.Value > R.Value; else return L.Count > R.Count; }); - return Sum; + return R; } /// \brief Propagate weights into edges @@ -1292,8 +1288,10 @@ void SampleProfileLoader::propagateWeights(Function &F) { auto T = FS->findCallTargetMapAt(LineOffset, Discriminator); if (!T || T.get().empty()) continue; - SmallVector<InstrProfValueData, 2> SortedCallTargets; - uint64_t Sum = SortCallTargets(SortedCallTargets, T.get()); + SmallVector<InstrProfValueData, 2> SortedCallTargets = + SortCallTargets(T.get()); + uint64_t Sum; + findIndirectCallFunctionSamples(I, Sum); annotateValueSite(*I.getParent()->getParent()->getParent(), I, SortedCallTargets, Sum, IPVK_IndirectCallTarget, SortedCallTargets.size()); diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 18b246b5d99..d28d615f47e 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -482,7 +482,7 @@ Value *FAddCombine::performFactorization(Instruction *I) { return nullptr; FastMathFlags Flags; - Flags.setUnsafeAlgebra(); + Flags.setFast(); if (I0) Flags &= I->getFastMathFlags(); if (I1) Flags &= I->getFastMathFlags(); @@ -511,7 +511,7 @@ Value *FAddCombine::performFactorization(Instruction *I) { } Value *FAddCombine::simplify(Instruction *I) { - assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); + assert(I->isFast() && "Expected 'fast' instruction"); // Currently we are not able to handle vector type. if (I->getType()->isVectorTy()) @@ -1386,7 +1386,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (Value *V = SimplifySelectsFeedingBinaryOp(I, LHS, RHS)) return replaceInstUsesWith(I, V); - if (I.hasUnsafeAlgebra()) { + if (I.isFast()) { if (Value *V = FAddCombine(Builder).simplify(&I)) return replaceInstUsesWith(I, V); } @@ -1736,7 +1736,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1)) return replaceInstUsesWith(I, V); - if (I.hasUnsafeAlgebra()) { + if (I.isFast()) { if (Value *V = FAddCombine(Builder).simplify(&I)) return replaceInstUsesWith(I, V); } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 7a4abc9aca0..a00e6f73ab8 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2017,7 +2017,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } case Intrinsic::fmuladd: { // Canonicalize fast fmuladd to the separate fmul + fadd. - if (II->hasUnsafeAlgebra()) { + if (II->isFast()) { BuilderTy::FastMathFlagGuard Guard(Builder); Builder.setFastMathFlags(II->getFastMathFlags()); Value *Mul = Builder.CreateFMul(II->getArgOperand(0), diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index cb4788576c5..2974449830d 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -426,8 +426,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, // Look for an appropriate type: // - The type of Idx if the magic fits - // - The smallest fitting legal type if we have a DataLayout - // - Default to i32 + // - The smallest fitting legal type if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) Ty = Idx->getType(); else diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index e6b97538267..87666360c1a 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -487,7 +487,7 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op); if (!II) return; - if (II->getIntrinsicID() != Intrinsic::log2 || !II->hasUnsafeAlgebra()) + if (II->getIntrinsicID() != Intrinsic::log2 || !II->isFast()) return; Log2 = II; @@ -498,7 +498,8 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) { Instruction *I = dyn_cast<Instruction>(OpLog2Of); if (!I) return; - if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) + + if (I->getOpcode() != Instruction::FMul || !I->isFast()) return; if (match(I->getOperand(0), m_SpecificFP(0.5))) @@ -601,7 +602,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, Constant *C, } if (R) { - R->setHasUnsafeAlgebra(true); + R->setFast(true); InsertNewInstWith(R, *InsertBefore); } @@ -622,7 +623,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); - bool AllowReassociate = I.hasUnsafeAlgebra(); + bool AllowReassociate = I.isFast(); // Simplify mul instructions with a constant RHS. if (isa<Constant>(Op1)) { @@ -1341,7 +1342,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; - bool AllowReassociate = I.hasUnsafeAlgebra(); + bool AllowReassociate = I.isFast(); bool AllowReciprocal = I.hasAllowReciprocal(); if (Constant *Op1C = dyn_cast<Constant>(Op1)) { diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 45541c9adc0..44bbb84686a 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -310,6 +310,40 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, } } +// If this is a bitwise operator or add with a constant RHS we might be able +// to pull it through a shift. +static bool canShiftBinOpWithConstantRHS(BinaryOperator &Shift, + BinaryOperator *BO, + const APInt &C) { + bool IsValid = true; // Valid only for And, Or Xor, + bool HighBitSet = false; // Transform ifhigh bit of constant set? + + switch (BO->getOpcode()) { + default: IsValid = false; break; // Do not perform transform! + case Instruction::Add: + IsValid = Shift.getOpcode() == Instruction::Shl; + break; + case Instruction::Or: + case Instruction::Xor: + HighBitSet = false; + break; + case Instruction::And: + HighBitSet = true; + break; + } + + // If this is a signed shift right, and the high bit is modified + // by the logical operation, do not perform the transformation. + // The HighBitSet boolean indicates the value of the high bit of + // the constant which would cause it to be modified for this + // operation. + // + if (IsValid && Shift.getOpcode() == Instruction::AShr) + IsValid = C.isNegative() == HighBitSet; + + return IsValid; +} + Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; @@ -472,33 +506,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, // shift is the only use, we can pull it out of the shift. const APInt *Op0C; if (match(Op0BO->getOperand(1), m_APInt(Op0C))) { - bool isValid = true; // Valid only for And, Or, Xor - bool highBitSet = false; // Transform if high bit of constant set? - - switch (Op0BO->getOpcode()) { - default: isValid = false; break; // Do not perform transform! - case Instruction::Add: - isValid = isLeftShift; - break; - case Instruction::Or: - case Instruction::Xor: - highBitSet = false; - break; - case Instruction::And: - highBitSet = true; - break; - } - - // If this is a signed shift right, and the high bit is modified - // by the logical operation, do not perform the transformation. - // The highBitSet boolean indicates the value of the high bit of - // the constant which would cause it to be modified for this - // operation. - // - if (isValid && I.getOpcode() == Instruction::AShr) - isValid = Op0C->isNegative() == highBitSet; - - if (isValid) { + if (canShiftBinOpWithConstantRHS(I, Op0BO, *Op0C)) { Constant *NewRHS = ConstantExpr::get(I.getOpcode(), cast<Constant>(Op0BO->getOperand(1)), Op1); @@ -525,6 +533,53 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, return BinaryOperator::CreateSub(NewRHS, NewShift); } } + + // If we have a select that conditionally executes some binary operator, + // see if we can pull it the select and operator through the shift. + // + // For example, turning: + // shl (select C, (add X, C1), X), C2 + // Into: + // Y = shl X, C2 + // select C, (add Y, C1 << C2), Y + Value *Cond; + BinaryOperator *TBO; + Value *FalseVal; + if (match(Op0, m_Select(m_Value(Cond), m_OneUse(m_BinOp(TBO)), + m_Value(FalseVal)))) { + const APInt *C; + if (!isa<Constant>(FalseVal) && TBO->getOperand(0) == FalseVal && + match(TBO->getOperand(1), m_APInt(C)) && + canShiftBinOpWithConstantRHS(I, TBO, *C)) { + Constant *NewRHS = ConstantExpr::get(I.getOpcode(), + cast<Constant>(TBO->getOperand(1)), Op1); + + Value *NewShift = + Builder.CreateBinOp(I.getOpcode(), FalseVal, Op1); + Value *NewOp = Builder.CreateBinOp(TBO->getOpcode(), NewShift, + NewRHS); + return SelectInst::Create(Cond, NewOp, NewShift); + } + } + + BinaryOperator *FBO; + Value *TrueVal; + if (match(Op0, m_Select(m_Value(Cond), m_Value(TrueVal), + m_OneUse(m_BinOp(FBO))))) { + const APInt *C; + if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal && + match(FBO->getOperand(1), m_APInt(C)) && + canShiftBinOpWithConstantRHS(I, FBO, *C)) { + Constant *NewRHS = ConstantExpr::get(I.getOpcode(), + cast<Constant>(FBO->getOperand(1)), Op1); + + Value *NewShift = + Builder.CreateBinOp(I.getOpcode(), TrueVal, Op1); + Value *NewOp = Builder.CreateBinOp(FBO->getOpcode(), NewShift, + NewRHS); + return SelectInst::Create(Cond, NewShift, NewOp); + } + } } return nullptr; diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 11a43e803a9..c92d48396c8 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -844,8 +844,9 @@ public: PGOUseFunc(Function &Func, Module *Modu, std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFI = nullptr) - : F(Func), M(Modu), FuncInfo(Func, ComdatMembers, false, BPI, BFI), + BlockFrequencyInfo *BFIin = nullptr) + : F(Func), M(Modu), BFI(BFIin), + FuncInfo(Func, ComdatMembers, false, BPI, BFIin), FreqAttr(FFA_Normal) {} // Read counts for the instrumented BB from profile. @@ -863,6 +864,9 @@ public: // Annotate the value profile call sites for one value kind. void annotateValueSites(uint32_t Kind); + // Annotate the irreducible loop header weights. + void annotateIrrLoopHeaderWeights(); + // The hotness of the function from the profile count. enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; @@ -894,6 +898,7 @@ public: private: Function &F; Module *M; + BlockFrequencyInfo *BFI; // This member stores the shared information with class PGOGenFunc. FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; @@ -1183,6 +1188,18 @@ void PGOUseFunc::setBranchWeights() { } } +void PGOUseFunc::annotateIrrLoopHeaderWeights() { + DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); + // Find irr loop headers + for (auto &BB : F) { + if (BFI->isIrrLoopHeader(&BB)) { + TerminatorInst *TI = BB.getTerminator(); + const UseBBInfo &BBCountInfo = getBBInfo(&BB); + setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); + } + } +} + void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { Module *M = F.getParent(); IRBuilder<> Builder(&SI); @@ -1441,6 +1458,7 @@ static bool annotateAllFunctions( Func.populateCounters(); Func.setBranchWeights(); Func.annotateValueSites(); + Func.annotateIrrLoopHeaderWeights(); PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); if (FreqAttr == PGOUseFunc::FFA_Cold) ColdFunctions.push_back(&F); @@ -1582,6 +1600,12 @@ void llvm::setProfMetadata(Module *M, Instruction *TI, namespace llvm { +void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { + MDBuilder MDB(M->getContext()); + TI->setMetadata(llvm::LLVMContext::MD_irr_loop, + MDB.createIrrLoopHeaderWeight(Count)); +} + template <> struct GraphTraits<PGOUseFunc *> { using NodeRef = const BasicBlock *; using ChildIteratorType = succ_const_iterator; diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index f04d0f05ffc..1e683db5020 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -118,7 +119,8 @@ class AggressiveDeadCodeElimination { PostDominatorTree &PDT; /// Mapping of blocks to associated information, an element in BlockInfoVec. - DenseMap<BasicBlock *, BlockInfoType> BlockInfo; + /// Use MapVector to get deterministic iteration order. + MapVector<BasicBlock *, BlockInfoType> BlockInfo; bool isLive(BasicBlock *BB) { return BlockInfo[BB].Live; } /// Mapping of instructions to associated information. diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index d79ae851005..6a27fbca8b7 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_library(LLVMScalarOpts ADCE.cpp AlignmentFromAssumptions.cpp BDCE.cpp + CallSiteSplitting.cpp ConstantHoisting.cpp ConstantProp.cpp CorrelatedValuePropagation.cpp diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp new file mode 100644 index 00000000000..b70ed8d7d4c --- /dev/null +++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -0,0 +1,492 @@ +//===- CallSiteSplitting.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a transformation that tries to split a call-site to pass +// more constrained arguments if its argument is predicated in the control flow +// so that we can expose better context to the later passes (e.g, inliner, jump +// threading, or IPA-CP based function cloning, etc.). +// As of now we support two cases : +// +// 1) If a call site is dominated by an OR condition and if any of its arguments +// are predicated on this OR condition, try to split the condition with more +// constrained arguments. For example, in the code below, we try to split the +// call site since we can predicate the argument(ptr) based on the OR condition. +// +// Split from : +// if (!ptr || c) +// callee(ptr); +// to : +// if (!ptr) +// callee(null) // set the known constant value +// else if (c) +// callee(nonnull ptr) // set non-null attribute in the argument +// +// 2) We can also split a call-site based on constant incoming values of a PHI +// For example, +// from : +// Header: +// %c = icmp eq i32 %i1, %i2 +// br i1 %c, label %Tail, label %TBB +// TBB: +// br label Tail% +// Tail: +// %p = phi i32 [ 0, %Header], [ 1, %TBB] +// call void @bar(i32 %p) +// to +// Header: +// %c = icmp eq i32 %i1, %i2 +// br i1 %c, label %Tail-split0, label %TBB +// TBB: +// br label %Tail-split1 +// Tail-split0: +// call void @bar(i32 0) +// br label %Tail +// Tail-split1: +// call void @bar(i32 1) +// br label %Tail +// Tail: +// %p = phi i32 [ 0, %Tail-split0 ], [ 1, %Tail-split1 ] +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/CallSiteSplitting.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" + +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "callsite-splitting" + +STATISTIC(NumCallSiteSplit, "Number of call-site split"); + +static void addNonNullAttribute(Instruction *CallI, Instruction *&NewCallI, + Value *Op) { + if (!NewCallI) + NewCallI = CallI->clone(); + CallSite CS(NewCallI); + unsigned ArgNo = 0; + for (auto &I : CS.args()) { + if (&*I == Op) + CS.addParamAttr(ArgNo, Attribute::NonNull); + ++ArgNo; + } +} + +static void setConstantInArgument(Instruction *CallI, Instruction *&NewCallI, + Value *Op, Constant *ConstValue) { + if (!NewCallI) + NewCallI = CallI->clone(); + CallSite CS(NewCallI); + unsigned ArgNo = 0; + for (auto &I : CS.args()) { + if (&*I == Op) + CS.setArgument(ArgNo, ConstValue); + ++ArgNo; + } +} + +static bool createCallSitesOnOrPredicatedArgument( + CallSite CS, Instruction *&NewCSTakenFromHeader, + Instruction *&NewCSTakenFromNextCond, + SmallVectorImpl<BranchInst *> &BranchInsts, BasicBlock *HeaderBB) { + assert(BranchInsts.size() <= 2 && + "Unexpected number of blocks in the OR predicated condition"); + Instruction *Instr = CS.getInstruction(); + BasicBlock *CallSiteBB = Instr->getParent(); + TerminatorInst *HeaderTI = HeaderBB->getTerminator(); + bool IsCSInTakenPath = CallSiteBB == HeaderTI->getSuccessor(0); + + for (unsigned I = 0, E = BranchInsts.size(); I != E; ++I) { + BranchInst *PBI = BranchInsts[I]; + assert(isa<ICmpInst>(PBI->getCondition()) && + "Unexpected condition in a conditional branch."); + ICmpInst *Cmp = cast<ICmpInst>(PBI->getCondition()); + Value *Arg = Cmp->getOperand(0); + assert(isa<Constant>(Cmp->getOperand(1)) && + "Expected op1 to be a constant."); + Constant *ConstVal = cast<Constant>(Cmp->getOperand(1)); + CmpInst::Predicate Pred = Cmp->getPredicate(); + + if (PBI->getParent() == HeaderBB) { + Instruction *&CallTakenFromHeader = + IsCSInTakenPath ? NewCSTakenFromHeader : NewCSTakenFromNextCond; + Instruction *&CallUntakenFromHeader = + IsCSInTakenPath ? NewCSTakenFromNextCond : NewCSTakenFromHeader; + + assert((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && + "Unexpected predicate in an OR condition"); + + // Set the constant value for agruments in the call predicated based on + // the OR condition. + Instruction *&CallToSetConst = Pred == ICmpInst::ICMP_EQ + ? CallTakenFromHeader + : CallUntakenFromHeader; + setConstantInArgument(Instr, CallToSetConst, Arg, ConstVal); + + // Add the NonNull attribute if compared with the null pointer. + if (ConstVal->getType()->isPointerTy() && ConstVal->isNullValue()) { + Instruction *&CallToSetAttr = Pred == ICmpInst::ICMP_EQ + ? CallUntakenFromHeader + : CallTakenFromHeader; + addNonNullAttribute(Instr, CallToSetAttr, Arg); + } + continue; + } + + if (Pred == ICmpInst::ICMP_EQ) { + if (PBI->getSuccessor(0) == Instr->getParent()) { + // Set the constant value for the call taken from the second block in + // the OR condition. + setConstantInArgument(Instr, NewCSTakenFromNextCond, Arg, ConstVal); + } else { + // Add the NonNull attribute if compared with the null pointer for the + // call taken from the second block in the OR condition. + if (ConstVal->getType()->isPointerTy() && ConstVal->isNullValue()) + addNonNullAttribute(Instr, NewCSTakenFromNextCond, Arg); + } + } else { + if (PBI->getSuccessor(0) == Instr->getParent()) { + // Add the NonNull attribute if compared with the null pointer for the + // call taken from the second block in the OR condition. + if (ConstVal->getType()->isPointerTy() && ConstVal->isNullValue()) + addNonNullAttribute(Instr, NewCSTakenFromNextCond, Arg); + } else if (Pred == ICmpInst::ICMP_NE) { + // Set the constant value for the call in the untaken path from the + // header block. + setConstantInArgument(Instr, NewCSTakenFromNextCond, Arg, ConstVal); + } else + llvm_unreachable("Unexpected condition"); + } + } + return NewCSTakenFromHeader || NewCSTakenFromNextCond; +} + +static bool canSplitCallSite(CallSite CS) { + // FIXME: As of now we handle only CallInst. InvokeInst could be handled + // without too much effort. + Instruction *Instr = CS.getInstruction(); + if (!isa<CallInst>(Instr)) + return false; + + // Allow splitting a call-site only when there is no instruction before the + // call-site in the basic block. Based on this constraint, we only clone the + // call instruction, and we do not move a call-site across any other + // instruction. + BasicBlock *CallSiteBB = Instr->getParent(); + if (Instr != CallSiteBB->getFirstNonPHI()) + return false; + + pred_iterator PII = pred_begin(CallSiteBB); + pred_iterator PIE = pred_end(CallSiteBB); + unsigned NumPreds = std::distance(PII, PIE); + + // Allow only one extra call-site. No more than two from one call-site. + if (NumPreds != 2) + return false; + + // Cannot split an edge from an IndirectBrInst. + BasicBlock *Preds[2] = {*PII++, *PII}; + if (isa<IndirectBrInst>(Preds[0]->getTerminator()) || + isa<IndirectBrInst>(Preds[1]->getTerminator())) + return false; + + return CallSiteBB->canSplitPredecessors(); +} + +/// Return true if the CS is split into its new predecessors which are directly +/// hooked to each of its orignial predecessors pointed by PredBB1 and PredBB2. +/// Note that PredBB1 and PredBB2 are decided in findPredicatedArgument(), +/// especially for the OR predicated case where PredBB1 will point the header, +/// and PredBB2 will point the the second compare block. CallInst1 and CallInst2 +/// will be the new call-sites placed in the new predecessors split for PredBB1 +/// and PredBB2, repectively. Therefore, CallInst1 will be the call-site placed +/// between Header and Tail, and CallInst2 will be the call-site between TBB and +/// Tail. For example, in the IR below with an OR condition, the call-site can +/// be split +/// +/// from : +/// +/// Header: +/// %c = icmp eq i32* %a, null +/// br i1 %c %Tail, %TBB +/// TBB: +/// %c2 = icmp eq i32* %b, null +/// br i1 %c %Tail, %End +/// Tail: +/// %ca = call i1 @callee (i32* %a, i32* %b) +/// +/// to : +/// +/// Header: // PredBB1 is Header +/// %c = icmp eq i32* %a, null +/// br i1 %c %Tail-split1, %TBB +/// TBB: // PredBB2 is TBB +/// %c2 = icmp eq i32* %b, null +/// br i1 %c %Tail-split2, %End +/// Tail-split1: +/// %ca1 = call @callee (i32* null, i32* %b) // CallInst1 +/// br %Tail +/// Tail-split2: +/// %ca2 = call @callee (i32* nonnull %a, i32* null) // CallInst2 +/// br %Tail +/// Tail: +/// %p = phi i1 [%ca1, %Tail-split1],[%ca2, %Tail-split2] +/// +/// Note that for an OR predicated case, CallInst1 and CallInst2 should be +/// created with more constrained arguments in +/// createCallSitesOnOrPredicatedArgument(). +static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2, + Instruction *CallInst1, Instruction *CallInst2) { + Instruction *Instr = CS.getInstruction(); + BasicBlock *TailBB = Instr->getParent(); + assert(Instr == (TailBB->getFirstNonPHI()) && "Unexpected call-site"); + + BasicBlock *SplitBlock1 = + SplitBlockPredecessors(TailBB, PredBB1, ".predBB1.split"); + BasicBlock *SplitBlock2 = + SplitBlockPredecessors(TailBB, PredBB2, ".predBB2.split"); + + assert((SplitBlock1 && SplitBlock2) && "Unexpected new basic block split."); + + if (!CallInst1) + CallInst1 = Instr->clone(); + if (!CallInst2) + CallInst2 = Instr->clone(); + + CallInst1->insertBefore(&*SplitBlock1->getFirstInsertionPt()); + CallInst2->insertBefore(&*SplitBlock2->getFirstInsertionPt()); + + CallSite CS1(CallInst1); + CallSite CS2(CallInst2); + + // Handle PHIs used as arguments in the call-site. + for (auto &PI : *TailBB) { + PHINode *PN = dyn_cast<PHINode>(&PI); + if (!PN) + break; + unsigned ArgNo = 0; + for (auto &CI : CS.args()) { + if (&*CI == PN) { + CS1.setArgument(ArgNo, PN->getIncomingValueForBlock(SplitBlock1)); + CS2.setArgument(ArgNo, PN->getIncomingValueForBlock(SplitBlock2)); + } + ++ArgNo; + } + } + + // Replace users of the original call with a PHI mering call-sites split. + if (Instr->getNumUses()) { + PHINode *PN = PHINode::Create(Instr->getType(), 2, "phi.call", Instr); + PN->addIncoming(CallInst1, SplitBlock1); + PN->addIncoming(CallInst2, SplitBlock2); + Instr->replaceAllUsesWith(PN); + } + DEBUG(dbgs() << "split call-site : " << *Instr << " into \n"); + DEBUG(dbgs() << " " << *CallInst1 << " in " << SplitBlock1->getName() + << "\n"); + DEBUG(dbgs() << " " << *CallInst2 << " in " << SplitBlock2->getName() + << "\n"); + Instr->eraseFromParent(); + NumCallSiteSplit++; +} + +static bool isCondRelevantToAnyCallArgument(ICmpInst *Cmp, CallSite CS) { + assert(isa<Constant>(Cmp->getOperand(1)) && "Expected a constant operand."); + Value *Op0 = Cmp->getOperand(0); + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; + ++I, ++ArgNo) { + // Don't consider constant or arguments that are already known non-null. + if (isa<Constant>(*I) || CS.paramHasAttr(ArgNo, Attribute::NonNull)) + continue; + + if (*I == Op0) + return true; + } + return false; +} + +static void findOrCondRelevantToCallArgument( + CallSite CS, BasicBlock *PredBB, BasicBlock *OtherPredBB, + SmallVectorImpl<BranchInst *> &BranchInsts, BasicBlock *&HeaderBB) { + auto *PBI = dyn_cast<BranchInst>(PredBB->getTerminator()); + if (!PBI || !PBI->isConditional()) + return; + + if (PBI->getSuccessor(0) == OtherPredBB || + PBI->getSuccessor(1) == OtherPredBB) + if (PredBB == OtherPredBB->getSinglePredecessor()) { + assert(!HeaderBB && "Expect to find only a single header block"); + HeaderBB = PredBB; + } + + CmpInst::Predicate Pred; + Value *Cond = PBI->getCondition(); + if (!match(Cond, m_ICmp(Pred, m_Value(), m_Constant()))) + return; + ICmpInst *Cmp = cast<ICmpInst>(Cond); + if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) + if (isCondRelevantToAnyCallArgument(Cmp, CS)) + BranchInsts.push_back(PBI); +} + +// Return true if the call-site has an argument which is a PHI with only +// constant incoming values. +static bool isPredicatedOnPHI(CallSite CS) { + Instruction *Instr = CS.getInstruction(); + BasicBlock *Parent = Instr->getParent(); + if (Instr != Parent->getFirstNonPHI()) + return false; + + for (auto &BI : *Parent) { + if (PHINode *PN = dyn_cast<PHINode>(&BI)) { + for (auto &I : CS.args()) + if (&*I == PN) { + assert(PN->getNumIncomingValues() == 2 && + "Unexpected number of incoming values"); + if (PN->getIncomingBlock(0) == PN->getIncomingBlock(1)) + return false; + if (PN->getIncomingValue(0) == PN->getIncomingValue(1)) + continue; + if (isa<Constant>(PN->getIncomingValue(0)) && + isa<Constant>(PN->getIncomingValue(1))) + return true; + } + } + break; + } + return false; +} + +// Return true if an agument in CS is predicated on an 'or' condition. +// Create new call-site with arguments constrained based on the OR condition. +static bool findPredicatedOnOrCondition(CallSite CS, BasicBlock *PredBB1, + BasicBlock *PredBB2, + Instruction *&NewCallTakenFromHeader, + Instruction *&NewCallTakenFromNextCond, + BasicBlock *&HeaderBB) { + SmallVector<BranchInst *, 4> BranchInsts; + findOrCondRelevantToCallArgument(CS, PredBB1, PredBB2, BranchInsts, HeaderBB); + findOrCondRelevantToCallArgument(CS, PredBB2, PredBB1, BranchInsts, HeaderBB); + if (BranchInsts.empty() || !HeaderBB) + return false; + + // If an OR condition is detected, try to create call sites with constrained + // arguments (e.g., NonNull attribute or constant value). + return createCallSitesOnOrPredicatedArgument(CS, NewCallTakenFromHeader, + NewCallTakenFromNextCond, + BranchInsts, HeaderBB); +} + +static bool findPredicatedArgument(CallSite CS, Instruction *&CallInst1, + Instruction *&CallInst2, + BasicBlock *&PredBB1, BasicBlock *&PredBB2) { + BasicBlock *CallSiteBB = CS.getInstruction()->getParent(); + pred_iterator PII = pred_begin(CallSiteBB); + pred_iterator PIE = pred_end(CallSiteBB); + assert(std::distance(PII, PIE) == 2 && "Expect only two predecessors."); + (void)PIE; + BasicBlock *Preds[2] = {*PII++, *PII}; + BasicBlock *&HeaderBB = PredBB1; + if (!findPredicatedOnOrCondition(CS, Preds[0], Preds[1], CallInst1, CallInst2, + HeaderBB) && + !isPredicatedOnPHI(CS)) + return false; + + if (!PredBB1) + PredBB1 = Preds[0]; + + PredBB2 = PredBB1 == Preds[0] ? Preds[1] : Preds[0]; + return true; +} + +static bool tryToSplitCallSite(CallSite CS) { + if (!CS.arg_size()) + return false; + + BasicBlock *PredBB1 = nullptr; + BasicBlock *PredBB2 = nullptr; + Instruction *CallInst1 = nullptr; + Instruction *CallInst2 = nullptr; + if (!canSplitCallSite(CS) || + !findPredicatedArgument(CS, CallInst1, CallInst2, PredBB1, PredBB2)) { + assert(!CallInst1 && !CallInst2 && "Unexpected new call-sites cloned."); + return false; + } + splitCallSite(CS, PredBB1, PredBB2, CallInst1, CallInst2); + return true; +} + +static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) { + bool Changed = false; + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) { + BasicBlock &BB = *BI++; + for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) { + Instruction *I = &*II++; + CallSite CS(cast<Value>(I)); + if (!CS || isa<IntrinsicInst>(I) || isInstructionTriviallyDead(I, &TLI)) + continue; + + Function *Callee = CS.getCalledFunction(); + if (!Callee || Callee->isDeclaration()) + continue; + Changed |= tryToSplitCallSite(CS); + } + } + return Changed; +} + +namespace { +struct CallSiteSplittingLegacyPass : public FunctionPass { + static char ID; + CallSiteSplittingLegacyPass() : FunctionPass(ID) { + initializeCallSiteSplittingLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); + FunctionPass::getAnalysisUsage(AU); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + return doCallSiteSplitting(F, TLI); + } +}; +} // namespace + +char CallSiteSplittingLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(CallSiteSplittingLegacyPass, "callsite-splitting", + "Call-site splitting", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting", + "Call-site splitting", false, false) +FunctionPass *llvm::createCallSiteSplittingPass() { + return new CallSiteSplittingLegacyPass(); +} + +PreservedAnalyses CallSiteSplittingPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + + if (!doCallSiteSplitting(F, TLI)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + return PA; +} diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 9ce42a06825..abb50f27f1c 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -48,6 +48,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -1624,6 +1625,15 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { if (DU.NarrowDef->use_empty()) DeadInsts.emplace_back(DU.NarrowDef); } + + // Attach any debug information to the new PHI. Since OrigPhi and WidePHI + // evaluate the same recurrence, we can just copy the debug info over. + SmallVector<DbgValueInst *, 1> DbgValues; + llvm::findDbgValues(DbgValues, OrigPhi); + auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(), + ValueAsMetadata::get(WidePhi)); + for (auto &DbgValue : DbgValues) + DbgValue->setOperand(0, MDPhi); return WidePhi; } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index ade4fbbcb6f..e6cab3f34cf 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -192,11 +192,12 @@ JumpThreadingPass::JumpThreadingPass(int T) { // P(cond == true ) = P(A) + P(cond == true | B) * P(B) // // which gives us: -// P(A) <= P(c == true), i.e. +// P(A) is less than P(cond == true), i.e. // P(t == true) <= P(cond == true) // -// In other words, if we know P(cond == true), we know that P(t == true) -// can not be greater than 1%. +// In other words, if we know P(cond == true) is unlikely, we know +// that P(t == true) is also unlikely. +// static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) { BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator()); if (!CondBr) diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 6ca8d602302..c60ec9f50f7 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -62,6 +62,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -93,9 +94,8 @@ static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); -static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, - const Loop *CurLoop, AliasSetTracker *CurAST, - const LoopSafetyInfo *SafetyInfo, +static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, + const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, @@ -394,8 +394,12 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // if (isNotUsedInLoop(I, CurLoop, SafetyInfo) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { - ++II; - Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE); + if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE)) { + ++II; + CurAST->deleteValue(&I); + I.eraseFromParent(); + Changed = true; + } } } } @@ -717,26 +721,6 @@ static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, if (!BlockColors.empty() && BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1) return false; - - // A PHI node where all of the incoming values are this instruction are - // special -- they can just be RAUW'ed with the instruction and thus - // don't require a use in the predecessor. This is a particular important - // special case because it is the pattern found in LCSSA form. - if (isTriviallyReplacablePHI(*PN, I)) { - if (CurLoop->contains(PN)) - return false; - else - continue; - } - - // Otherwise, PHI node uses occur in predecessor blocks if the incoming - // values. Check for such a use being inside the loop. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == &I) - if (CurLoop->contains(PN->getIncomingBlock(i))) - return false; - - continue; } if (CurLoop->contains(UI)) @@ -806,14 +790,96 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, return New; } +static Instruction *sinkThroughTriviallyReplacablePHI( + PHINode *TPN, Instruction *I, LoopInfo *LI, + SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies, + const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop) { + assert(isTriviallyReplacablePHI(*TPN, *I) && + "Expect only trivially replacalbe PHI"); + BasicBlock *ExitBlock = TPN->getParent(); + Instruction *New; + auto It = SunkCopies.find(ExitBlock); + if (It != SunkCopies.end()) + New = It->second; + else + New = SunkCopies[ExitBlock] = + CloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI, SafetyInfo); + return New; +} + +static bool canSplitPredecessors(PHINode *PN) { + BasicBlock *BB = PN->getParent(); + if (!BB->canSplitPredecessors()) + return false; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *BBPred = *PI; + if (isa<IndirectBrInst>(BBPred->getTerminator())) + return false; + } + return true; +} + +static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, + LoopInfo *LI, const Loop *CurLoop) { +#ifndef NDEBUG + SmallVector<BasicBlock *, 32> ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); +#endif + BasicBlock *ExitBB = PN->getParent(); + assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block."); + + // Split predecessors of the loop exit to make instructions in the loop are + // exposed to exit blocks through trivially replacable PHIs while keeping the + // loop in the canonical form where each predecessor of each exit block should + // be contained within the loop. For example, this will convert the loop below + // from + // + // LB1: + // %v1 = + // br %LE, %LB2 + // LB2: + // %v2 = + // br %LE, %LB1 + // LE: + // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replacable + // + // to + // + // LB1: + // %v1 = + // br %LE.split, %LB2 + // LB2: + // %v2 = + // br %LE.split2, %LB1 + // LE.split: + // %p1 = phi [%v1, %LB1] <-- trivially replacable + // br %LE + // LE.split2: + // %p2 = phi [%v2, %LB2] <-- trivially replacable + // br %LE + // LE: + // %p = phi [%p1, %LE.split], [%p2, %LE.split2] + // + SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB)); + while (!PredBBs.empty()) { + BasicBlock *PredBB = *PredBBs.begin(); + assert(CurLoop->contains(PredBB) && + "Expect all predecessors are in the loop"); + if (PN->getBasicBlockIndex(PredBB) >= 0) + SplitBlockPredecessors(ExitBB, PredBB, ".split.loop.exit", DT, LI, true); + PredBBs.remove(PredBB); + } +} + /// When an instruction is found to only be used outside of the loop, this /// function moves it to the exit blocks and patches up SSA form as needed. /// This method is guaranteed to remove the original instruction from its /// position, and may either delete it or move it to outside of the loop. /// -static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, - const Loop *CurLoop, AliasSetTracker *CurAST, - const LoopSafetyInfo *SafetyInfo, +static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, + const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); ORE->emit([&]() { @@ -828,57 +894,75 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, ++NumSunk; Changed = true; -#ifndef NDEBUG - SmallVector<BasicBlock *, 32> ExitBlocks; - CurLoop->getUniqueExitBlocks(ExitBlocks); - SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), - ExitBlocks.end()); -#endif + // Iterate over users to be ready for actual sinking. Replace users via + // unrechable blocks with undef and make all user PHIs trivially replcable. + SmallPtrSet<Instruction *, 8> VisitedUsers; + for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) { + auto *User = cast<Instruction>(*UI); + Use &U = UI.getUse(); + ++UI; - // Clones of this instruction. Don't create more than one per exit block! - SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies; + if (VisitedUsers.count(User)) + continue; - // If this instruction is only used outside of the loop, then all users are - // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of - // the instruction. - while (!I.use_empty()) { - Value::user_iterator UI = I.user_begin(); - auto *User = cast<Instruction>(*UI); if (!DT->isReachableFromEntry(User->getParent())) { User->replaceUsesOfWith(&I, UndefValue::get(I.getType())); continue; } + // The user must be a PHI node. PHINode *PN = cast<PHINode>(User); // Surprisingly, instructions can be used outside of loops without any // exits. This can only happen in PHI nodes if the incoming block is // unreachable. - Use &U = UI.getUse(); BasicBlock *BB = PN->getIncomingBlock(U); if (!DT->isReachableFromEntry(BB)) { U = UndefValue::get(I.getType()); continue; } - BasicBlock *ExitBlock = PN->getParent(); - assert(ExitBlockSet.count(ExitBlock) && - "The LCSSA PHI is not in an exit block!"); + VisitedUsers.insert(PN); + if (isTriviallyReplacablePHI(*PN, I)) + continue; - Instruction *New; - auto It = SunkCopies.find(ExitBlock); - if (It != SunkCopies.end()) - New = It->second; - else - New = SunkCopies[ExitBlock] = - CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo); + if (!canSplitPredecessors(PN)) + return false; + + // Split predecessors of the PHI so that we can make users trivially + // replacable. + splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop); + // Should rebuild the iterators, as they may be invalidated by + // splitPredecessorsOfLoopExit(). + UI = I.user_begin(); + UE = I.user_end(); + } + +#ifndef NDEBUG + SmallVector<BasicBlock *, 32> ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); +#endif + + // Clones of this instruction. Don't create more than one per exit block! + SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies; + + // If this instruction is only used outside of the loop, then all users are + // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of + // the instruction. + while (!I.use_empty()) { + Value::user_iterator UI = I.user_begin(); + PHINode *PN = cast<PHINode>(*UI); + assert(ExitBlockSet.count(PN->getParent()) && + "The LCSSA PHI is not in an exit block!"); + // The PHI must be trivially replacable. + Instruction *New = sinkThroughTriviallyReplacablePHI(PN, &I, LI, SunkCopies, + SafetyInfo, CurLoop); PN->replaceAllUsesWith(New); PN->eraseFromParent(); } - - CurAST->deleteValue(&I); - I.eraseFromParent(); return Changed; } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 413fb75d172..eb5f3cc47ce 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1326,9 +1326,9 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX, // step 2: detect instructions corresponding to "x.next = x >> 1" if (!DefX || DefX->getOpcode() != Instruction::AShr) return false; - if (ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1))) - if (!Shft || !Shft->isOne()) - return false; + ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1)); + if (!Shft || !Shft->isOne()) + return false; VarX = DefX->getOperand(0); // step 3: Check the recurrence of variable X diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp index 9a623be234f..52dea3254e7 100644 --- a/lib/Transforms/Scalar/LoopPredication.cpp +++ b/lib/Transforms/Scalar/LoopPredication.cpp @@ -174,6 +174,9 @@ using namespace llvm; +static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation", + cl::Hidden, cl::init(true)); + namespace { class LoopPredication { /// Represents an induction variable check: @@ -186,6 +189,10 @@ class LoopPredication { const SCEV *Limit) : Pred(Pred), IV(IV), Limit(Limit) {} LoopICmp() {} + void dump() { + dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV + << ", Limit = " << *Limit << "\n"; + } }; ScalarEvolution *SE; @@ -195,6 +202,7 @@ class LoopPredication { BasicBlock *Preheader; LoopICmp LatchCheck; + bool isSupportedStep(const SCEV* Step); Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI) { return parseLoopICmp(ICI->getPredicate(), ICI->getOperand(0), ICI->getOperand(1)); @@ -204,14 +212,36 @@ class LoopPredication { Optional<LoopICmp> parseLoopLatchICmp(); + bool CanExpand(const SCEV* S); Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, Instruction *InsertAt); Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander, IRBuilder<> &Builder); + Optional<Value *> widenICmpRangeCheckIncrementingLoop(LoopICmp LatchCheck, + LoopICmp RangeCheck, + SCEVExpander &Expander, + IRBuilder<> &Builder); + bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander); + // When the IV type is wider than the range operand type, we can still do loop + // predication, by generating SCEVs for the range and latch that are of the + // same type. We achieve this by generating a SCEV truncate expression for the + // latch IV. This is done iff truncation of the IV is a safe operation, + // without loss of information. + // Another way to achieve this is by generating a wider type SCEV for the + // range check operand, however, this needs a more involved check that + // operands do not overflow. This can lead to loss of information when the + // range operand is of the form: add i32 %offset, %iv. We need to prove that + // sext(x + y) is same as sext(x) + sext(y). + // This function returns true if we can safely represent the IV type in + // the RangeCheckType without loss of information. + bool isSafeToTruncateWideIVType(Type *RangeCheckType); + // Return the loopLatchCheck corresponding to the RangeCheckType if safe to do + // so. + Optional<LoopICmp> generateLoopLatchCheck(Type *RangeCheckType); public: LoopPredication(ScalarEvolution *SE) : SE(SE){}; bool runOnLoop(Loop *L); @@ -301,53 +331,54 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander, return Builder.CreateICmp(Pred, LHSV, RHSV); } -/// If ICI can be widened to a loop invariant condition emits the loop -/// invariant condition in the loop preheader and return it, otherwise -/// returns None. -Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, - SCEVExpander &Expander, - IRBuilder<> &Builder) { - DEBUG(dbgs() << "Analyzing ICmpInst condition:\n"); - DEBUG(ICI->dump()); +Optional<LoopPredication::LoopICmp> +LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) { - // parseLoopStructure guarantees that the latch condition is: - // ++i <pred> latchLimit, where <pred> is u<, u<=, s<, or s<=. - // We are looking for the range checks of the form: - // i u< guardLimit - auto RangeCheck = parseLoopICmp(ICI); - if (!RangeCheck) { - DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); + auto *LatchType = LatchCheck.IV->getType(); + if (RangeCheckType == LatchType) + return LatchCheck; + // For now, bail out if latch type is narrower than range type. + if (DL->getTypeSizeInBits(LatchType) < DL->getTypeSizeInBits(RangeCheckType)) return None; - } - if (RangeCheck->Pred != ICmpInst::ICMP_ULT) { - DEBUG(dbgs() << "Unsupported range check predicate(" << RangeCheck->Pred - << ")!\n"); + if (!isSafeToTruncateWideIVType(RangeCheckType)) return None; - } - auto *RangeCheckIV = RangeCheck->IV; - auto *Ty = RangeCheckIV->getType(); - if (Ty != LatchCheck.IV->getType()) { - DEBUG(dbgs() << "Type mismatch between range check and latch IVs!\n"); + // We can now safely identify the truncated version of the IV and limit for + // RangeCheckType. + LoopICmp NewLatchCheck; + NewLatchCheck.Pred = LatchCheck.Pred; + NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>( + SE->getTruncateExpr(LatchCheck.IV, RangeCheckType)); + if (!NewLatchCheck.IV) return None; - } - if (!RangeCheckIV->isAffine()) { - DEBUG(dbgs() << "Range check IV is not affine!\n"); - return None; - } - auto *Step = RangeCheckIV->getStepRecurrence(*SE); - if (Step != LatchCheck.IV->getStepRecurrence(*SE)) { - DEBUG(dbgs() << "Range check and latch have IVs different steps!\n"); - return None; - } - assert(Step->isOne() && "must be one"); + NewLatchCheck.Limit = SE->getTruncateExpr(LatchCheck.Limit, RangeCheckType); + DEBUG(dbgs() << "IV of type: " << *LatchType + << "can be represented as range check type:" << *RangeCheckType + << "\n"); + DEBUG(dbgs() << "LatchCheck.IV: " << *NewLatchCheck.IV << "\n"); + DEBUG(dbgs() << "LatchCheck.Limit: " << *NewLatchCheck.Limit << "\n"); + return NewLatchCheck; +} + +bool LoopPredication::isSupportedStep(const SCEV* Step) { + return Step->isOne(); +} - // Generate the widened condition: +bool LoopPredication::CanExpand(const SCEV* S) { + return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); +} + +Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop( + LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck, + SCEVExpander &Expander, IRBuilder<> &Builder) { + auto *Ty = RangeCheck.IV->getType(); + // Generate the widened condition for the forward loop: // guardStart u< guardLimit && // latchLimit <pred> guardLimit - 1 - guardStart + latchStart // where <pred> depends on the latch condition predicate. See the file // header comment for the reasoning. - const SCEV *GuardStart = RangeCheckIV->getStart(); - const SCEV *GuardLimit = RangeCheck->Limit; + // guardLimit - guardStart + latchStart - 1 + const SCEV *GuardStart = RangeCheck.IV->getStart(); + const SCEV *GuardLimit = RangeCheck.Limit; const SCEV *LatchStart = LatchCheck.IV->getStart(); const SCEV *LatchLimit = LatchCheck.Limit; @@ -355,7 +386,11 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, const SCEV *RHS = SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart), SE->getMinusSCEV(LatchStart, SE->getOne(Ty))); - + if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) || + !CanExpand(LatchLimit) || !CanExpand(RHS)) { + DEBUG(dbgs() << "Can't expand limit check!\n"); + return None; + } ICmpInst::Predicate LimitCheckPred; switch (LatchCheck.Pred) { case ICmpInst::ICMP_ULT: @@ -378,22 +413,68 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, DEBUG(dbgs() << "RHS: " << *RHS << "\n"); DEBUG(dbgs() << "Pred: " << LimitCheckPred << "\n"); - auto CanExpand = [this](const SCEV *S) { - return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); - }; - if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) || - !CanExpand(LatchLimit) || !CanExpand(RHS)) { - DEBUG(dbgs() << "Can't expand limit check!\n"); - return None; - } - Instruction *InsertAt = Preheader->getTerminator(); auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, LatchLimit, RHS, InsertAt); - auto *FirstIterationCheck = expandCheck(Expander, Builder, RangeCheck->Pred, + auto *FirstIterationCheck = expandCheck(Expander, Builder, RangeCheck.Pred, GuardStart, GuardLimit, InsertAt); return Builder.CreateAnd(FirstIterationCheck, LimitCheck); } +/// If ICI can be widened to a loop invariant condition emits the loop +/// invariant condition in the loop preheader and return it, otherwise +/// returns None. +Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, + SCEVExpander &Expander, + IRBuilder<> &Builder) { + DEBUG(dbgs() << "Analyzing ICmpInst condition:\n"); + DEBUG(ICI->dump()); + + // parseLoopStructure guarantees that the latch condition is: + // ++i <pred> latchLimit, where <pred> is u<, u<=, s<, or s<=. + // We are looking for the range checks of the form: + // i u< guardLimit + auto RangeCheck = parseLoopICmp(ICI); + if (!RangeCheck) { + DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); + return None; + } + DEBUG(dbgs() << "Guard check:\n"); + DEBUG(RangeCheck->dump()); + if (RangeCheck->Pred != ICmpInst::ICMP_ULT) { + DEBUG(dbgs() << "Unsupported range check predicate(" << RangeCheck->Pred + << ")!\n"); + return None; + } + auto *RangeCheckIV = RangeCheck->IV; + if (!RangeCheckIV->isAffine()) { + DEBUG(dbgs() << "Range check IV is not affine!\n"); + return None; + } + auto *Step = RangeCheckIV->getStepRecurrence(*SE); + // We cannot just compare with latch IV step because the latch and range IVs + // may have different types. + if (!isSupportedStep(Step)) { + DEBUG(dbgs() << "Range check and latch have IVs different steps!\n"); + return None; + } + auto *Ty = RangeCheckIV->getType(); + auto CurrLatchCheckOpt = generateLoopLatchCheck(Ty); + if (!CurrLatchCheckOpt) { + DEBUG(dbgs() << "Failed to generate a loop latch check " + "corresponding to range type: " + << *Ty << "\n"); + return None; + } + + LoopICmp CurrLatchCheck = *CurrLatchCheckOpt; + // At this point the range check step and latch step should have the same + // value and type. + assert(Step == CurrLatchCheck.IV->getStepRecurrence(*SE) && + "Range and latch should have same step recurrence!"); + + return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck, + Expander, Builder); +} bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard, SCEVExpander &Expander) { @@ -485,15 +566,6 @@ Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() { return None; } - if (Result->Pred != ICmpInst::ICMP_ULT && - Result->Pred != ICmpInst::ICMP_SLT && - Result->Pred != ICmpInst::ICMP_ULE && - Result->Pred != ICmpInst::ICMP_SLE) { - DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred - << ")!\n"); - return None; - } - // Check affine first, so if it's not we don't try to compute the step // recurrence. if (!Result->IV->isAffine()) { @@ -502,14 +574,55 @@ Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() { } auto *Step = Result->IV->getStepRecurrence(*SE); - if (!Step->isOne()) { + if (!isSupportedStep(Step)) { DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n"); return None; } + auto IsUnsupportedPredicate = [](const SCEV *Step, ICmpInst::Predicate Pred) { + assert(Step->isOne() && "expected Step to be one!"); + return Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_SLT && + Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE; + }; + + if (IsUnsupportedPredicate(Step, Result->Pred)) { + DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred + << ")!\n"); + return None; + } return Result; } +// Returns true if its safe to truncate the IV to RangeCheckType. +bool LoopPredication::isSafeToTruncateWideIVType(Type *RangeCheckType) { + if (!EnableIVTruncation) + return false; + assert(DL->getTypeSizeInBits(LatchCheck.IV->getType()) > + DL->getTypeSizeInBits(RangeCheckType) && + "Expected latch check IV type to be larger than range check operand " + "type!"); + // The start and end values of the IV should be known. This is to guarantee + // that truncating the wide type will not lose information. + auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit); + auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart()); + if (!Limit || !Start) + return false; + // This check makes sure that the IV does not change sign during loop + // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE, + // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the + // IV wraps around, and the truncation of the IV would lose the range of + // iterations between 2^32 and 2^64. + bool Increasing; + if (!SE->isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing)) + return false; + // The active bits should be less than the bits in the RangeCheckType. This + // guarantees that truncating the latch check to RangeCheckType is a safe + // operation. + auto RangeCheckTypeBitSize = DL->getTypeSizeInBits(RangeCheckType); + return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize && + Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize; +} + bool LoopPredication::runOnLoop(Loop *Loop) { L = Loop; @@ -535,6 +648,9 @@ bool LoopPredication::runOnLoop(Loop *Loop) { return false; LatchCheck = *LatchCheckOpt; + DEBUG(dbgs() << "Latch check:\n"); + DEBUG(LatchCheck.dump()); + // Collect all the guards into a vector and process later, so as not // to invalidate the instruction iterator. SmallVector<IntrinsicInst *, 4> Guards; diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index bbb179d3790..7f03f2379e7 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1037,7 +1037,7 @@ struct LSRFixup { Value *OperandValToReplace = nullptr; /// If this user is to use the post-incremented value of an induction - /// variable, this variable is non-null and holds the loop associated with the + /// variable, this set is non-empty and holds the loops associated with the /// induction variable. PostIncLoopSet PostIncLoops; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index a44ca333fee..1f32f9f24aa 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -145,8 +145,7 @@ XorOpnd::XorOpnd(Value *V) { static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { if (V->hasOneUse() && isa<Instruction>(V) && cast<Instruction>(V)->getOpcode() == Opcode && - (!isa<FPMathOperator>(V) || - cast<Instruction>(V)->hasUnsafeAlgebra())) + (!isa<FPMathOperator>(V) || cast<Instruction>(V)->isFast())) return cast<BinaryOperator>(V); return nullptr; } @@ -156,8 +155,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1, if (V->hasOneUse() && isa<Instruction>(V) && (cast<Instruction>(V)->getOpcode() == Opcode1 || cast<Instruction>(V)->getOpcode() == Opcode2) && - (!isa<FPMathOperator>(V) || - cast<Instruction>(V)->hasUnsafeAlgebra())) + (!isa<FPMathOperator>(V) || cast<Instruction>(V)->isFast())) return cast<BinaryOperator>(V); return nullptr; } @@ -565,7 +563,7 @@ static bool LinearizeExprTree(BinaryOperator *I, assert((!isa<Instruction>(Op) || cast<Instruction>(Op)->getOpcode() != Opcode || (isa<FPMathOperator>(Op) && - !cast<Instruction>(Op)->hasUnsafeAlgebra())) && + !cast<Instruction>(Op)->isFast())) && "Should have been handled above!"); assert(Op->hasOneUse() && "Has uses outside the expression tree!"); @@ -2017,8 +2015,8 @@ void ReassociatePass::OptimizeInst(Instruction *I) { if (I->isCommutative()) canonicalizeOperands(I); - // Don't optimize floating point instructions that don't have unsafe algebra. - if (I->getType()->isFPOrFPVectorTy() && !I->hasUnsafeAlgebra()) + // Don't optimize floating-point instructions unless they are 'fast'. + if (I->getType()->isFPOrFPVectorTy() && !I->isFast()) return; // Do not reassociate boolean (i1) expressions. We want to preserve the diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 1ca77cfec32..44acfc88579 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -125,10 +125,10 @@ struct RewriteStatepointsForGC : public ModulePass { Changed |= runOnFunction(F); if (Changed) { - // stripNonValidAttributesAndMetadata asserts that shouldRewriteStatepointsIn + // stripNonValidData asserts that shouldRewriteStatepointsIn // returns true for at least one function in the module. Since at least // one function changed, we know that the precondition is satisfied. - stripNonValidAttributesAndMetadata(M); + stripNonValidData(M); } return Changed; @@ -146,15 +146,17 @@ struct RewriteStatepointsForGC : public ModulePass { /// metadata implying dereferenceability that are no longer valid/correct after /// RewriteStatepointsForGC has run. This is because semantically, after /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire - /// heap. stripNonValidAttributesAndMetadata (conservatively) restores + /// heap. stripNonValidData (conservatively) restores /// correctness by erasing all attributes in the module that externally imply /// dereferenceability. Similar reasoning also applies to the noalias /// attributes and metadata. gc.statepoint can touch the entire heap including /// noalias objects. - void stripNonValidAttributesAndMetadata(Module &M); + /// Apart from attributes and metadata, we also remove instructions that imply + /// constant physical memory: llvm.invariant.start. + void stripNonValidData(Module &M); - // Helpers for stripNonValidAttributesAndMetadata - void stripNonValidAttributesAndMetadataFromBody(Function &F); + // Helpers for stripNonValidData + void stripNonValidDataFromBody(Function &F); void stripNonValidAttributesFromPrototype(Function &F); // Certain metadata on instructions are invalid after running RS4GC. @@ -2385,14 +2387,30 @@ void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC); } -void RewriteStatepointsForGC::stripNonValidAttributesAndMetadataFromBody(Function &F) { +void RewriteStatepointsForGC::stripNonValidDataFromBody(Function &F) { if (F.empty()) return; LLVMContext &Ctx = F.getContext(); MDBuilder Builder(Ctx); + // Set of invariantstart instructions that we need to remove. + // Use this to avoid invalidating the instruction iterator. + SmallVector<IntrinsicInst*, 12> InvariantStartInstructions; + for (Instruction &I : instructions(F)) { + // invariant.start on memory location implies that the referenced memory + // location is constant and unchanging. This is no longer true after + // RewriteStatepointsForGC runs because there can be calls to gc.statepoint + // which frees the entire heap and the presence of invariant.start allows + // the optimizer to sink the load of a memory location past a statepoint, + // which is incorrect. + if (auto *II = dyn_cast<IntrinsicInst>(&I)) + if (II->getIntrinsicID() == Intrinsic::invariant_start) { + InvariantStartInstructions.push_back(II); + continue; + } + if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) { assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!"); bool IsImmutableTBAA = @@ -2422,6 +2440,12 @@ void RewriteStatepointsForGC::stripNonValidAttributesAndMetadataFromBody(Functio RemoveNonValidAttrAtIndex(Ctx, CS, AttributeList::ReturnIndex); } } + + // Delete the invariant.start instructions and RAUW undef. + for (auto *II : InvariantStartInstructions) { + II->replaceAllUsesWith(UndefValue::get(II->getType())); + II->eraseFromParent(); + } } /// Returns true if this function should be rewritten by this pass. The main @@ -2438,7 +2462,7 @@ static bool shouldRewriteStatepointsIn(Function &F) { return false; } -void RewriteStatepointsForGC::stripNonValidAttributesAndMetadata(Module &M) { +void RewriteStatepointsForGC::stripNonValidData(Module &M) { #ifndef NDEBUG assert(llvm::any_of(M, shouldRewriteStatepointsIn) && "precondition!"); #endif @@ -2447,7 +2471,7 @@ void RewriteStatepointsForGC::stripNonValidAttributesAndMetadata(Module &M) { stripNonValidAttributesFromPrototype(F); for (Function &F : M) - stripNonValidAttributesAndMetadataFromBody(F); + stripNonValidDataFromBody(F); } bool RewriteStatepointsForGC::runOnFunction(Function &F) { diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index b968cb8c892..6de6c8cce2c 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -4133,8 +4133,10 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { "new fragment is outside of original fragment"); Start -= OrigFragment->OffsetInBits; } - FragmentExpr = - DIExpression::createFragmentExpression(Expr, Start, Size); + if (auto E = DIExpression::createFragmentExpression(Expr, Start, Size)) + FragmentExpr = *E; + else + continue; } // Remove any existing intrinsics describing the same alloca. diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index c1034ace206..8a5ae1b8731 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -35,6 +35,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeADCELegacyPassPass(Registry); initializeBDCELegacyPassPass(Registry); initializeAlignmentFromAssumptionsPass(Registry); + initializeCallSiteSplittingLegacyPassPass(Registry); initializeConstantHoistingLegacyPassPass(Registry); initializeConstantPropagationPass(Registry); initializeCorrelatedValuePropagationPass(Registry); diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp index fbb61ac1ae9..2e6fc4e8482 100644 --- a/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -203,6 +203,23 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV, } void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { + + // Check the summaries to see if the symbol gets resolved to a known local + // definition. + if (GV.hasName()) { + ValueInfo VI = ImportIndex.getValueInfo(GV.getGUID()); + if (VI) { + // Need to check all summaries are local in case of hash collisions. + bool IsLocal = VI.getSummaryList().size() && + llvm::all_of(VI.getSummaryList(), + [](const std::unique_ptr<GlobalValueSummary> &Summary) { + return Summary->isDSOLocal(); + }); + if (IsLocal) + GV.setDSOLocal(true); + } + } + bool DoPromote = false; if (GV.hasLocalLinkage() && ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) { diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 8c643c93ec4..89dbe4b8fda 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -1362,16 +1362,25 @@ void llvm::salvageDebugInfo(Instruction &I) { SmallVector<DbgValueInst *, 1> DbgValues; auto &M = *I.getModule(); - auto MDWrap = [&](Value *V) { + auto wrapMD = [&](Value *V) { return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V)); }; - if (isa<BitCastInst>(&I)) { + auto applyOffset = [&](DbgValueInst *DVI, uint64_t Offset) { + auto *DIExpr = DVI->getExpression(); + DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset, + DIExpression::WithStackValue); + DVI->setOperand(0, wrapMD(I.getOperand(0))); + DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr)); + DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); + }; + + if (isa<BitCastInst>(&I) || isa<IntToPtrInst>(&I)) { findDbgValues(DbgValues, &I); for (auto *DVI : DbgValues) { // Bitcasts are entirely irrelevant for debug info. Rewrite the dbg.value // to use the cast's source. - DVI->setOperand(0, MDWrap(I.getOperand(0))); + DVI->setOperand(0, wrapMD(I.getOperand(0))); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); } } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { @@ -1383,24 +1392,26 @@ void llvm::salvageDebugInfo(Instruction &I) { // Rewrite a constant GEP into a DIExpression. Since we are performing // arithmetic to compute the variable's *value* in the DIExpression, we // need to mark the expression with a DW_OP_stack_value. - if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) { - auto *DIExpr = DVI->getExpression(); + if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) // GEP offsets are i32 and thus always fit into an int64_t. - DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, - Offset.getSExtValue(), - DIExpression::WithStackValue); - DVI->setOperand(0, MDWrap(I.getOperand(0))); - DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr)); - DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); - } + applyOffset(DVI, Offset.getSExtValue()); } + } else if (auto *BI = dyn_cast<BinaryOperator>(&I)) { + if (BI->getOpcode() == Instruction::Add) + if (auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1))) + if (ConstInt->getBitWidth() <= 64) { + APInt Offset = ConstInt->getValue(); + findDbgValues(DbgValues, &I); + for (auto *DVI : DbgValues) + applyOffset(DVI, Offset.getSExtValue()); + } } else if (isa<LoadInst>(&I)) { findDbgValues(DbgValues, &I); for (auto *DVI : DbgValues) { // Rewrite the load into DW_OP_deref. auto *DIExpr = DVI->getExpression(); DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref); - DVI->setOperand(0, MDWrap(I.getOperand(0))); + DVI->setOperand(0, wrapMD(I.getOperand(0))); DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr)); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); } diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 13c0bfbcb2e..0de6924e635 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -432,7 +432,7 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, InstDesc &Prev, bool HasFunNoNaNAttr) { bool FP = I->getType()->isFloatingPointTy(); Instruction *UAI = Prev.getUnsafeAlgebraInst(); - if (!UAI && FP && !I->hasUnsafeAlgebra()) + if (!UAI && FP && !I->isFast()) UAI = I; // Found an unsafe (unvectorizable) algebra instruction. switch (I->getOpcode()) { @@ -660,11 +660,11 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, break; } - // We only match FP sequences with unsafe algebra, so we can unconditionally + // We only match FP sequences that are 'fast', so we can unconditionally // set it on any generated instructions. IRBuilder<>::FastMathFlagGuard FMFG(Builder); FastMathFlags FMF; - FMF.setUnsafeAlgebra(); + FMF.setFast(); Builder.setFastMathFlags(FMF); Value *Cmp; @@ -768,7 +768,7 @@ Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, // Floating point operations had to be 'fast' to enable the induction. FastMathFlags Flags; - Flags.setUnsafeAlgebra(); + Flags.setFast(); Value *MulExp = B.CreateFMul(StepValue, Index); if (isa<Instruction>(MulExp)) @@ -1338,7 +1338,7 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { static Value *addFastMathFlag(Value *V) { if (isa<FPMathOperator>(V)) { FastMathFlags Flags; - Flags.setUnsafeAlgebra(); + Flags.setFast(); cast<Instruction>(V)->setFastMathFlags(Flags); } return V; @@ -1401,7 +1401,7 @@ Value *llvm::createSimpleTargetReduction( RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; // TODO: Support creating ordered reductions. FastMathFlags FMFUnsafe; - FMFUnsafe.setUnsafeAlgebra(); + FMFUnsafe.setFast(); switch (Opcode) { case Instruction::Add: diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 3c4dae92ebf..e0045e9f48a 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2901,7 +2901,9 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, else return false; } - return N <= PHINodeFoldingThreshold; + // The store we want to merge is counted in N, so add 1 to make sure + // we're counting the instructions that would be left. + return N <= (PHINodeFoldingThreshold + 1); }; if (!MergeCondStoresAggressively && diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 33117659489..a29b83717f3 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1111,7 +1111,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { // Example: x = 1000, y = 0.001. // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). auto *OpC = dyn_cast<CallInst>(Op1); - if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) { + if (OpC && OpC->isFast() && CI->isFast()) { LibFunc Func; Function *OpCCallee = OpC->getCalledFunction(); if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && @@ -1136,7 +1136,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { LibFunc_sqrtl)) { // If -ffast-math: // pow(x, -0.5) -> 1.0 / sqrt(x) - if (CI->hasUnsafeAlgebra()) { + if (CI->isFast()) { IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(CI->getFastMathFlags()); @@ -1157,7 +1157,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { LibFunc_sqrtl)) { // In -ffast-math, pow(x, 0.5) -> sqrt(x). - if (CI->hasUnsafeAlgebra()) { + if (CI->isFast()) { IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(CI->getFastMathFlags()); @@ -1196,7 +1196,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); // In -ffast-math, generate repeated fmul instead of generating pow(x, n). - if (CI->hasUnsafeAlgebra()) { + if (CI->isFast()) { APFloat V = abs(Op2C->getValueAPF()); // We limit to a max of 7 fmul(s). Thus max exponent is 32. // This transformation applies to integer exponents only. @@ -1284,9 +1284,9 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { IRBuilder<>::FastMathFlagGuard Guard(B); FastMathFlags FMF; - if (CI->hasUnsafeAlgebra()) { - // Unsafe algebra sets all fast-math-flags to true. - FMF.setUnsafeAlgebra(); + if (CI->isFast()) { + // If the call is 'fast', then anything we create here will also be 'fast'. + FMF.setFast(); } else { // At a minimum, no-nans-fp-math must be true. if (!CI->hasNoNaNs()) @@ -1317,13 +1317,13 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { if (UnsafeFPShrink && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); - if (!CI->hasUnsafeAlgebra()) + if (!CI->isFast()) return Ret; Value *Op1 = CI->getArgOperand(0); auto *OpC = dyn_cast<CallInst>(Op1); - // The earlier call must also be unsafe in order to do these transforms. - if (!OpC || !OpC->hasUnsafeAlgebra()) + // The earlier call must also be 'fast' in order to do these transforms. + if (!OpC || !OpC->isFast()) return Ret; // log(pow(x,y)) -> y*log(x) @@ -1333,7 +1333,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { IRBuilder<>::FastMathFlagGuard Guard(B); FastMathFlags FMF; - FMF.setUnsafeAlgebra(); + FMF.setFast(); B.setFastMathFlags(FMF); LibFunc Func; @@ -1365,11 +1365,11 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); - if (!CI->hasUnsafeAlgebra()) + if (!CI->isFast()) return Ret; Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0)); - if (!I || I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) + if (!I || I->getOpcode() != Instruction::FMul || !I->isFast()) return Ret; // We're looking for a repeated factor in a multiplication tree, @@ -1391,8 +1391,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { Value *OtherMul0, *OtherMul1; if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) { // Pattern: sqrt((x * y) * z) - if (OtherMul0 == OtherMul1 && - cast<Instruction>(Op0)->hasUnsafeAlgebra()) { + if (OtherMul0 == OtherMul1 && cast<Instruction>(Op0)->isFast()) { // Matched: sqrt((x * x) * z) RepeatOp = OtherMul0; OtherOp = Op1; @@ -1437,8 +1436,8 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { if (!OpC) return Ret; - // Both calls must allow unsafe optimizations in order to remove them. - if (!CI->hasUnsafeAlgebra() || !OpC->hasUnsafeAlgebra()) + // Both calls must be 'fast' in order to remove them. + if (!CI->isFast() || !OpC->isFast()) return Ret; // tan(atan(x)) -> x @@ -2167,10 +2166,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { // Command-line parameter overrides instruction attribute. // This can't be moved to optimizeFloatingPointLibCall() because it may be - // used by the intrinsic optimizations. + // used by the intrinsic optimizations. if (EnableUnsafeFPShrink.getNumOccurrences() > 0) UnsafeFPShrink = EnableUnsafeFPShrink; - else if (isa<FPMathOperator>(CI) && CI->hasUnsafeAlgebra()) + else if (isa<FPMathOperator>(CI) && CI->isFast()) UnsafeFPShrink = true; // First, check for intrinsics. diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp index 07157069518..934a1bd73c2 100644 --- a/lib/Transforms/Utils/SplitModule.cpp +++ b/lib/Transforms/Utils/SplitModule.cpp @@ -141,15 +141,15 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, } if (GV.hasLocalLinkage()) - addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV); - }; - - std::for_each(M->begin(), M->end(), recordGVSet); - std::for_each(M->global_begin(), M->global_end(), recordGVSet); - std::for_each(M->alias_begin(), M->alias_end(), recordGVSet); - - // Assigned all GVs to merged clusters while balancing number of objects in - // each. + addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV);
+ };
+
+ llvm::for_each(M->functions(), recordGVSet);
+ llvm::for_each(M->globals(), recordGVSet);
+ llvm::for_each(M->aliases(), recordGVSet);
+
+ // Assigned all GVs to merged clusters while balancing number of objects in
+ // each.
auto CompareClusters = [](const std::pair<unsigned, unsigned> &a, const std::pair<unsigned, unsigned> &b) { if (a.second || b.second) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index ca2f5a178e0..ed29ca0b573 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -385,7 +385,7 @@ static unsigned getReciprocalPredBlockProb() { return 2; } static Value *addFastMathFlag(Value *V) { if (isa<FPMathOperator>(V)) { FastMathFlags Flags; - Flags.setUnsafeAlgebra(); + Flags.setFast(); cast<Instruction>(V)->setFastMathFlags(Flags); } return V; @@ -2720,7 +2720,7 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step, // Floating point operations had to be 'fast' to enable the induction. FastMathFlags Flags; - Flags.setUnsafeAlgebra(); + Flags.setFast(); Value *MulOp = Builder.CreateFMul(Cv, Step); if (isa<Instruction>(MulOp)) @@ -5396,7 +5396,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // operations, shuffles, or casts, as they don't change precision or // semantics. } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && - !I.hasUnsafeAlgebra()) { + !I.isFast()) { DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); Hints->setPotentiallyUnsafe(); } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 5dcf5528ac9..4232252af36 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4880,7 +4880,7 @@ class HorizontalReduction { case RK_Min: case RK_Max: return Opcode == Instruction::ICmp || - cast<Instruction>(I->getOperand(0))->hasUnsafeAlgebra(); + cast<Instruction>(I->getOperand(0))->isFast(); case RK_UMin: case RK_UMax: assert(Opcode == Instruction::ICmp && @@ -5232,7 +5232,7 @@ public: Value *VectorizedTree = nullptr; IRBuilder<> Builder(ReductionRoot); FastMathFlags Unsafe; - Unsafe.setUnsafeAlgebra(); + Unsafe.setFast(); Builder.setFastMathFlags(Unsafe); unsigned i = 0; |