git: 70be2f0deb0f - stable/13 - Merge llvm-project release/18.x llvmorg-18.1.6-0-g1118c2e05e67
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 28 May 2024 05:28:37 UTC
The branch stable/13 has been updated by dim: URL: https://cgit.FreeBSD.org/src/commit/?id=70be2f0deb0f49e1f454b6d8e80cf56a24322a54 commit 70be2f0deb0f49e1f454b6d8e80cf56a24322a54 Author: Dimitry Andric <dim@FreeBSD.org> AuthorDate: 2024-05-24 15:51:19 +0000 Commit: Dimitry Andric <dim@FreeBSD.org> CommitDate: 2024-05-28 05:26:45 +0000 Merge llvm-project release/18.x llvmorg-18.1.6-0-g1118c2e05e67 This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvm-project release/18.x llvmorg-18.1.6-0-g1118c2e05e67. PR: 276104 MFC after: 3 days (cherry picked from commit 3a0793336edfc21cb6d4c8c5c5d7f1665f3e6c5a) --- .../clang/lib/CodeGen/CodeGenModule.cpp | 14 +++++ .../clang/lib/Driver/ToolChains/OpenBSD.cpp | 3 +- .../clang/lib/Format/UnwrappedLineParser.cpp | 7 ++- .../clang/lib/Format/WhitespaceManager.cpp | 2 +- .../clang/lib/Interpreter/IncrementalParser.cpp | 24 ++++++-- .../clang/lib/Interpreter/IncrementalParser.h | 5 ++ .../llvm-project/clang/lib/Sema/SemaTemplate.cpp | 25 +++++++-- contrib/llvm-project/libcxx/src/atomic.cpp | 16 +++++- contrib/llvm-project/libcxx/src/chrono.cpp | 4 +- contrib/llvm-project/lld/ELF/Relocations.cpp | 5 +- .../llvm/include/llvm/CodeGen/MachineFrameInfo.h | 7 +++ .../llvm/lib/Analysis/InstructionSimplify.cpp | 4 ++ .../lib/CodeGen/InterleavedLoadCombinePass.cpp | 3 + .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 +- .../Target/AArch64/AArch64Arm64ECCallLowering.cpp | 16 ++++-- .../lib/Target/AArch64/AArch64ISelLowering.cpp | 3 +- .../AArch64/GISel/AArch64GlobalISelUtils.cpp | 6 ++ .../Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 1 + .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 5 +- .../llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 2 +- .../llvm/lib/Target/AMDGPU/SIInstrInfo.h | 11 ++++ .../llvm/lib/Target/AMDGPU/SOPInstructions.td | 2 +- .../llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp | 57 ++++++------------- .../Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp | 8 +-- .../Target/RISCV/MCTargetDesc/RISCVELFStreamer.h | 1 - .../lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp | 2 - .../RISCV/MCTargetDesc/RISCVTargetStreamer.cpp | 5 ++ .../RISCV/MCTargetDesc/RISCVTargetStreamer.h | 5 ++ .../llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp | 32 +++++++++-- .../lib/Target/RISCV/RISCVExpandPseudoInsts.cpp | 5 +- .../llvm/lib/Target/RISCV/RISCVFeatures.td | 5 ++ .../llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 9 ++- .../llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 65 +++++++++++++--------- .../llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +- .../llvm/lib/Target/X86/X86InstrAVX512.td | 42 +++++++------- .../llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 7 ++- .../llvm/lib/Transforms/IPO/GlobalOpt.cpp | 3 + .../Transforms/InstCombine/InstCombineSelect.cpp | 14 ++++- .../lib/Transforms/Vectorize/SLPVectorizer.cpp | 21 +------ lib/clang/include/VCSVersion.inc | 6 +- lib/clang/include/clang/Basic/Version.inc | 6 +- lib/clang/include/lld/Common/Version.inc | 2 +- lib/clang/include/lldb/Version/Version.inc | 6 +- lib/clang/include/llvm/Config/config.h | 4 +- lib/clang/include/llvm/Config/llvm-config.h | 4 +- lib/clang/include/llvm/Support/VCSRevision.h | 2 +- 46 files changed, 316 insertions(+), 169 deletions(-) diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp index 1280bcd36de9..eb13cd40eb8a 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp @@ -67,6 +67,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/RISCVISAInfo.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include "llvm/TargetParser/Triple.h" @@ -1059,6 +1060,19 @@ void CodeGenModule::Release() { llvm::LLVMContext &Ctx = TheModule.getContext(); getModule().addModuleFlag(llvm::Module::Error, "target-abi", llvm::MDString::get(Ctx, ABIStr)); + + // Add the canonical ISA string as metadata so the backend can set the ELF + // attributes correctly. We use AppendUnique so LTO will keep all of the + // unique ISA strings that were linked together. + const std::vector<std::string> &Features = + getTarget().getTargetOpts().Features; + auto ParseResult = llvm::RISCVISAInfo::parseFeatures( + Arch == llvm::Triple::riscv64 ? 64 : 32, Features); + if (!errorToBool(ParseResult.takeError())) + getModule().addModuleFlag( + llvm::Module::AppendUnique, "riscv-isa", + llvm::MDNode::get( + Ctx, llvm::MDString::get(Ctx, (*ParseResult)->toString()))); } if (CodeGenOpts.SanitizeCfiCrossDso) { diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.cpp index fd6aa4d7e684..00b6c520fcdd 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.cpp +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.cpp @@ -371,7 +371,8 @@ std::string OpenBSD::getCompilerRT(const ArgList &Args, StringRef Component, if (Component == "builtins") { SmallString<128> Path(getDriver().SysRoot); llvm::sys::path::append(Path, "/usr/lib/libcompiler_rt.a"); - return std::string(Path); + if (getVFS().exists(Path)) + return std::string(Path); } SmallString<128> P(getDriver().ResourceDir); std::string CRTBasename = diff --git a/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp index a6eb18bb2b32..f70affb732a0 100644 --- a/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp +++ b/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp @@ -2510,6 +2510,7 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { assert(FormatTok->is(tok::l_paren) && "'(' expected."); auto *LeftParen = FormatTok; bool SeenEqual = false; + bool MightBeFoldExpr = false; const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); nextToken(); do { @@ -2521,7 +2522,7 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { parseChildBlock(); break; case tok::r_paren: - if (!MightBeStmtExpr && !Line->InMacroBody && + if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody && Style.RemoveParentheses > FormatStyle::RPS_Leave) { const auto *Prev = LeftParen->Previous; const auto *Next = Tokens->peekNextToken(); @@ -2564,6 +2565,10 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { parseBracedList(); } break; + case tok::ellipsis: + MightBeFoldExpr = true; + nextToken(); + break; case tok::equal: SeenEqual = true; if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) diff --git a/contrib/llvm-project/clang/lib/Format/WhitespaceManager.cpp b/contrib/llvm-project/clang/lib/Format/WhitespaceManager.cpp index df84f97a8e8a..7525e6ee650b 100644 --- a/contrib/llvm-project/clang/lib/Format/WhitespaceManager.cpp +++ b/contrib/llvm-project/clang/lib/Format/WhitespaceManager.cpp @@ -1466,7 +1466,7 @@ WhitespaceManager::CellDescriptions WhitespaceManager::getCells(unsigned Start, : Cell); // Go to the next non-comment and ensure there is a break in front const auto *NextNonComment = C.Tok->getNextNonComment(); - while (NextNonComment->is(tok::comma)) + while (NextNonComment && NextNonComment->is(tok::comma)) NextNonComment = NextNonComment->getNextNonComment(); auto j = i; while (Changes[j].Tok != NextNonComment && j < End) diff --git a/contrib/llvm-project/clang/lib/Interpreter/IncrementalParser.cpp b/contrib/llvm-project/clang/lib/Interpreter/IncrementalParser.cpp index 370bcbfee8b0..f5f32b9f3924 100644 --- a/contrib/llvm-project/clang/lib/Interpreter/IncrementalParser.cpp +++ b/contrib/llvm-project/clang/lib/Interpreter/IncrementalParser.cpp @@ -209,6 +209,10 @@ IncrementalParser::IncrementalParser(Interpreter &Interp, if (Err) return; CI->ExecuteAction(*Act); + + if (getCodeGen()) + CachedInCodeGenModule = GenModule(); + std::unique_ptr<ASTConsumer> IncrConsumer = std::make_unique<IncrementalASTConsumer>(Interp, CI->takeASTConsumer()); CI->setASTConsumer(std::move(IncrConsumer)); @@ -224,11 +228,8 @@ IncrementalParser::IncrementalParser(Interpreter &Interp, return; // PTU.takeError(); } - if (CodeGenerator *CG = getCodeGen()) { - std::unique_ptr<llvm::Module> M(CG->ReleaseModule()); - CG->StartModule("incr_module_" + std::to_string(PTUs.size()), - M->getContext()); - PTU->TheModule = std::move(M); + if (getCodeGen()) { + PTU->TheModule = GenModule(); assert(PTU->TheModule && "Failed to create initial PTU"); } } @@ -364,6 +365,19 @@ IncrementalParser::Parse(llvm::StringRef input) { std::unique_ptr<llvm::Module> IncrementalParser::GenModule() { static unsigned ID = 0; if (CodeGenerator *CG = getCodeGen()) { + // Clang's CodeGen is designed to work with a single llvm::Module. In many + // cases for convenience various CodeGen parts have a reference to the + // llvm::Module (TheModule or Module) which does not change when a new + // module is pushed. However, the execution engine wants to take ownership + // of the module which does not map well to CodeGen's design. To work this + // around we created an empty module to make CodeGen happy. We should make + // sure it always stays empty. + assert((!CachedInCodeGenModule || + (CachedInCodeGenModule->empty() && + CachedInCodeGenModule->global_empty() && + CachedInCodeGenModule->alias_empty() && + CachedInCodeGenModule->ifunc_empty())) && + "CodeGen wrote to a readonly module"); std::unique_ptr<llvm::Module> M(CG->ReleaseModule()); CG->StartModule("incr_module_" + std::to_string(ID++), M->getContext()); return M; diff --git a/contrib/llvm-project/clang/lib/Interpreter/IncrementalParser.h b/contrib/llvm-project/clang/lib/Interpreter/IncrementalParser.h index e13b74c7f659..f63bce50acd3 100644 --- a/contrib/llvm-project/clang/lib/Interpreter/IncrementalParser.h +++ b/contrib/llvm-project/clang/lib/Interpreter/IncrementalParser.h @@ -24,6 +24,7 @@ #include <memory> namespace llvm { class LLVMContext; +class Module; } // namespace llvm namespace clang { @@ -57,6 +58,10 @@ protected: /// of code. std::list<PartialTranslationUnit> PTUs; + /// When CodeGen is created the first llvm::Module gets cached in many places + /// and we must keep it alive. + std::unique_ptr<llvm::Module> CachedInCodeGenModule; + IncrementalParser(); public: diff --git a/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp b/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp index b619f5d729e8..a12a64939c46 100644 --- a/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp +++ b/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp @@ -2404,9 +2404,6 @@ struct ConvertConstructorToDeductionGuideTransform { Args.addOuterRetainedLevel(); } - if (NestedPattern) - Args.addOuterRetainedLevels(NestedPattern->getTemplateDepth()); - FunctionProtoTypeLoc FPTL = CD->getTypeSourceInfo()->getTypeLoc() .getAsAdjusted<FunctionProtoTypeLoc>(); assert(FPTL && "no prototype for constructor declaration"); @@ -2526,11 +2523,27 @@ private: // -- The types of the function parameters are those of the constructor. for (auto *OldParam : TL.getParams()) { - ParmVarDecl *NewParam = - transformFunctionTypeParam(OldParam, Args, MaterializedTypedefs); - if (NestedPattern && NewParam) + ParmVarDecl *NewParam = OldParam; + // Given + // template <class T> struct C { + // template <class U> struct D { + // template <class V> D(U, V); + // }; + // }; + // First, transform all the references to template parameters that are + // defined outside of the surrounding class template. That is T in the + // above example. + if (NestedPattern) { NewParam = transformFunctionTypeParam(NewParam, OuterInstantiationArgs, MaterializedTypedefs); + if (!NewParam) + return QualType(); + } + // Then, transform all the references to template parameters that are + // defined at the class template and the constructor. In this example, + // they're U and V, respectively. + NewParam = + transformFunctionTypeParam(NewParam, Args, MaterializedTypedefs); if (!NewParam) return QualType(); ParamTypes.push_back(NewParam->getType()); diff --git a/contrib/llvm-project/libcxx/src/atomic.cpp b/contrib/llvm-project/libcxx/src/atomic.cpp index 6c7fa1206cf4..98b7f0372369 100644 --- a/contrib/llvm-project/libcxx/src/atomic.cpp +++ b/contrib/llvm-project/libcxx/src/atomic.cpp @@ -25,16 +25,28 @@ # if !defined(SYS_futex) && defined(SYS_futex_time64) # define SYS_futex SYS_futex_time64 # endif +# define _LIBCPP_FUTEX(...) syscall(SYS_futex, __VA_ARGS__) #elif defined(__FreeBSD__) # include <sys/types.h> # include <sys/umtx.h> +# define _LIBCPP_FUTEX(...) syscall(SYS_futex, __VA_ARGS__) + +#elif defined(__OpenBSD__) + +# include <sys/futex.h> + +// OpenBSD has no indirect syscalls +# define _LIBCPP_FUTEX(...) futex(__VA_ARGS__) + #else // <- Add other operating systems here // Baseline needs no new headers +# define _LIBCPP_FUTEX(...) syscall(SYS_futex, __VA_ARGS__) + #endif _LIBCPP_BEGIN_NAMESPACE_STD @@ -44,11 +56,11 @@ _LIBCPP_BEGIN_NAMESPACE_STD static void __libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) { static constexpr timespec __timeout = {2, 0}; - syscall(SYS_futex, __ptr, FUTEX_WAIT_PRIVATE, __val, &__timeout, 0, 0); + _LIBCPP_FUTEX(__ptr, FUTEX_WAIT_PRIVATE, __val, &__timeout, 0, 0); } static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile* __ptr, bool __notify_one) { - syscall(SYS_futex, __ptr, FUTEX_WAKE_PRIVATE, __notify_one ? 1 : INT_MAX, 0, 0, 0); + _LIBCPP_FUTEX(__ptr, FUTEX_WAKE_PRIVATE, __notify_one ? 1 : INT_MAX, 0, 0, 0); } #elif defined(__APPLE__) && defined(_LIBCPP_USE_ULOCK) diff --git a/contrib/llvm-project/libcxx/src/chrono.cpp b/contrib/llvm-project/libcxx/src/chrono.cpp index c5e827c0cb59..e7d6dfbc2292 100644 --- a/contrib/llvm-project/libcxx/src/chrono.cpp +++ b/contrib/llvm-project/libcxx/src/chrono.cpp @@ -31,7 +31,9 @@ # include <sys/time.h> // for gettimeofday and timeval #endif -#if defined(__APPLE__) || defined(__gnu_hurd__) || (defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0) +// OpenBSD does not have a fully conformant suite of POSIX timers, but +// it does have clock_gettime and CLOCK_MONOTONIC which is all we need. +#if defined(__APPLE__) || defined(__gnu_hurd__) || defined(__OpenBSD__) || (defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0) # define _LIBCPP_HAS_CLOCK_GETTIME #endif diff --git a/contrib/llvm-project/lld/ELF/Relocations.cpp b/contrib/llvm-project/lld/ELF/Relocations.cpp index 619fbaf5dc54..92a1b9baaca3 100644 --- a/contrib/llvm-project/lld/ELF/Relocations.cpp +++ b/contrib/llvm-project/lld/ELF/Relocations.cpp @@ -1480,7 +1480,10 @@ template <class ELFT, class RelTy> void RelocationScanner::scanOne(RelTy *&i) { // Process TLS relocations, including TLS optimizations. Note that // R_TPREL and R_TPREL_NEG relocations are resolved in processAux. - if (sym.isTls()) { + // + // Some RISCV TLSDESC relocations reference a local NOTYPE symbol, + // but we need to process them in handleTlsRelocation. + if (sym.isTls() || oneof<R_TLSDESC_PC, R_TLSDESC_CALL>(expr)) { if (unsigned processed = handleTlsRelocation(type, sym, *sec, offset, addend, expr)) { i += processed - 1; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 7d11d63d4066..c35faac09c4d 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -697,6 +697,13 @@ public: return Objects[ObjectIdx+NumFixedObjects].isAliased; } + /// Set "maybe pointed to by an LLVM IR value" for an object. + void setIsAliasedObjectIndex(int ObjectIdx, bool IsAliased) { + assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && + "Invalid Object Idx!"); + Objects[ObjectIdx+NumFixedObjects].isAliased = IsAliased; + } + /// Returns true if the specified index corresponds to an immutable object. bool isImmutableObjectIndex(int ObjectIdx) const { // Tail calling functions can clobber their function arguments. diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp index 72b6dfa181e8..8dcffe45c644 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4322,6 +4322,10 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, if (match(I, m_Intrinsic<Intrinsic::is_constant>())) return nullptr; + // Don't simplify freeze. + if (isa<FreezeInst>(I)) + return nullptr; + // Replace Op with RepOp in instruction operands. SmallVector<Value *, 8> NewOps; bool AnyReplaced = false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index f2d5c3c867c2..bbb0b654dc67 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -877,6 +877,9 @@ public: if (LI->isAtomic()) return false; + if (!DL.typeSizeEqualsStoreSize(Result.VTy->getElementType())) + return false; + // Get the base polynomial computePolynomialFromPointer(*LI->getPointerOperand(), Offset, BasePtr, DL); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5ce1013f30fd..7406a8ac1611 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -10888,7 +10888,7 @@ static void tryToElideArgumentCopy( } // Perform the elision. Delete the old stack object and replace its only use - // in the variable info map. Mark the stack object as mutable. + // in the variable info map. Mark the stack object as mutable and aliased. LLVM_DEBUG({ dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n' << " Replacing frame index " << OldIndex << " with " << FixedIndex @@ -10896,6 +10896,7 @@ static void tryToElideArgumentCopy( }); MFI.RemoveStackObject(OldIndex); MFI.setIsImmutableObjectIndex(FixedIndex, false); + MFI.setIsAliasedObjectIndex(FixedIndex, true); AllocaIndex = FixedIndex; ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex}); for (SDValue ArgVal : ArgVals) diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp index 55c5bbc66a3f..862aefe46193 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -181,13 +181,14 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes( } for (unsigned E = FT->getNumParams(); I != E; ++I) { - Align ParamAlign = AttrList.getParamAlignment(I).valueOrOne(); #if 0 // FIXME: Need more information about argument size; see // https://reviews.llvm.org/D132926 uint64_t ArgSizeBytes = AttrList.getParamArm64ECArgSizeBytes(I); + Align ParamAlign = AttrList.getParamAlignment(I).valueOrOne(); #else uint64_t ArgSizeBytes = 0; + Align ParamAlign = Align(); #endif Type *Arm64Ty, *X64Ty; canonicalizeThunkType(FT->getParamType(I), ParamAlign, @@ -297,7 +298,7 @@ void AArch64Arm64ECCallLowering::canonicalizeThunkType( uint64_t TotalSizeBytes = ElementCnt * ElementSizePerBytes; if (ElementTy->isFloatTy() || ElementTy->isDoubleTy()) { Out << (ElementTy->isFloatTy() ? "F" : "D") << TotalSizeBytes; - if (Alignment.value() >= 8 && !T->isPointerTy()) + if (Alignment.value() >= 16 && !Ret) Out << "a" << Alignment.value(); Arm64Ty = T; if (TotalSizeBytes <= 8) { @@ -328,7 +329,7 @@ void AArch64Arm64ECCallLowering::canonicalizeThunkType( Out << "m"; if (TypeSize != 4) Out << TypeSize; - if (Alignment.value() >= 8 && !T->isPointerTy()) + if (Alignment.value() >= 16 && !Ret) Out << "a" << Alignment.value(); // FIXME: Try to canonicalize Arm64Ty more thoroughly? Arm64Ty = T; @@ -513,7 +514,14 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { // Call the function passed to the thunk. Value *Callee = Thunk->getArg(0); Callee = IRB.CreateBitCast(Callee, PtrTy); - Value *Call = IRB.CreateCall(Arm64Ty, Callee, Args); + CallInst *Call = IRB.CreateCall(Arm64Ty, Callee, Args); + + auto SRetAttr = F->getAttributes().getParamAttr(0, Attribute::StructRet); + auto InRegAttr = F->getAttributes().getParamAttr(0, Attribute::InReg); + if (SRetAttr.isValid() && !InRegAttr.isValid()) { + Thunk->addParamAttr(1, SRetAttr); + Call->addParamAttr(0, SRetAttr); + } Value *RetVal = Call; if (TransformDirectToSRet) { diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 95d8ab95b2c0..bcfd0253e73c 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22122,7 +22122,8 @@ SDValue performCONDCombine(SDNode *N, SDNode *SubsNode = N->getOperand(CmpIndex).getNode(); unsigned CondOpcode = SubsNode->getOpcode(); - if (CondOpcode != AArch64ISD::SUBS || SubsNode->hasAnyUseOfValue(0)) + if (CondOpcode != AArch64ISD::SUBS || SubsNode->hasAnyUseOfValue(0) || + !SubsNode->hasOneUse()) return SDValue(); // There is a SUBS feeding this condition. Is it fed by a mask we can diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp index 92db89cc0915..80fe4bcb8b58 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp @@ -147,6 +147,12 @@ void AArch64GISelUtils::changeFCMPPredToAArch64CC( case CmpInst::FCMP_UNE: CondCode = AArch64CC::NE; break; + case CmpInst::FCMP_TRUE: + CondCode = AArch64CC::AL; + break; + case CmpInst::FCMP_FALSE: + CondCode = AArch64CC::NV; + break; } } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 4b9d549e7911..de3c89e925a2 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -877,6 +877,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) .legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64})) + .moreElementsToNextPow2(0) .widenVectorEltsToVectorMinSize(0, 64); getActionDefinitionsBuilder(G_BUILD_VECTOR) diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index b8e5e7bbdaba..06cdd7e4ef48 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -623,8 +623,11 @@ bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const { EltTy = GV->getValueType(); // Look at the first element of the struct to determine the type we are // loading - while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) + while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) { + if (StructEltTy->getNumElements() == 0) + break; EltTy = StructEltTy->getTypeAtIndex(0U); + } // Look at the first element of the array to determine its type if (isa<ArrayType>(EltTy)) EltTy = EltTy->getArrayElementType(); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 6ecb1c8bf6e1..7a3198612f86 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1832,7 +1832,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, // not, we need to ensure the subtarget is capable of backing off barrier // instructions in case there are any outstanding memory operations that may // cause an exception. Otherwise, insert an explicit S_WAITCNT 0 here. - if (MI.getOpcode() == AMDGPU::S_BARRIER && + if (TII->isBarrierStart(MI.getOpcode()) && !ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) { Wait = Wait.combined( AMDGPU::Waitcnt::allZero(ST->hasExtendedWaitCounts(), ST->hasVscnt())); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 1c9dacc09f81..626d903c0c69 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -908,6 +908,17 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform; } + // Check to see if opcode is for a barrier start. Pre gfx12 this is just the + // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want + // to check for the barrier start (S_BARRIER_SIGNAL*) + bool isBarrierStart(unsigned Opcode) const { + return Opcode == AMDGPU::S_BARRIER || + Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 || + Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 || + Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM || + Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM; + } + static bool doesNotReadTiedSource(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td index ae5ef0541929..5762efde73f0 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1786,7 +1786,7 @@ def : GCNPat< let SubtargetPredicate = isNotGFX12Plus in def : GCNPat <(int_amdgcn_s_wait_event_export_ready), (S_WAIT_EVENT (i16 0))>; let SubtargetPredicate = isGFX12Plus in - def : GCNPat <(int_amdgcn_s_wait_event_export_ready), (S_WAIT_EVENT (i16 1))>; + def : GCNPat <(int_amdgcn_s_wait_event_export_ready), (S_WAIT_EVENT (i16 2))>; // The first 10 bits of the mode register are the core FP mode on all // subtargets. diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp index d9465e86d896..ebd876d50c44 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Pass.h" @@ -116,9 +117,20 @@ private: // sure that they can be replaced. static bool hasReplaceableUsers(GlobalVariable &GV) { for (User *CurrentUser : GV.users()) { - // Instruction users are always valid. - if (isa<Instruction>(CurrentUser)) + if (auto *I = dyn_cast<Instruction>(CurrentUser)) { + // Do not merge globals in exception pads. + if (I->isEHPad()) + return false; + + if (auto *II = dyn_cast<IntrinsicInst>(I)) { + // Some intrinsics require a plain global. + if (II->getIntrinsicID() == Intrinsic::eh_typeid_for) + return false; + } + + // Other instruction users are always valid. continue; + } // We cannot replace GlobalValue users because they are not just nodes // in IR. To replace a user like this we would need to create a new @@ -302,14 +314,6 @@ void PPCMergeStringPool::replaceUsesWithGEP(GlobalVariable *GlobalToReplace, Users.push_back(CurrentUser); for (User *CurrentUser : Users) { - Instruction *UserInstruction = dyn_cast<Instruction>(CurrentUser); - Constant *UserConstant = dyn_cast<Constant>(CurrentUser); - - // At this point we expect that the user is either an instruction or a - // constant. - assert((UserConstant || UserInstruction) && - "Expected the user to be an instruction or a constant."); - // The user was not found so it must have been replaced earlier. if (!userHasOperand(CurrentUser, GlobalToReplace)) continue; @@ -318,38 +322,13 @@ void PPCMergeStringPool::replaceUsesWithGEP(GlobalVariable *GlobalToReplace, if (isa<GlobalValue>(CurrentUser)) continue; - if (!UserInstruction) { - // User is a constant type. - Constant *ConstGEP = ConstantExpr::getInBoundsGetElementPtr( - PooledStructType, GPool, Indices); - UserConstant->handleOperandChange(GlobalToReplace, ConstGEP); - continue; - } - - if (PHINode *UserPHI = dyn_cast<PHINode>(UserInstruction)) { - // GEP instructions cannot be added before PHI nodes. - // With getInBoundsGetElementPtr we create the GEP and then replace it - // inline into the PHI. - Constant *ConstGEP = ConstantExpr::getInBoundsGetElementPtr( - PooledStructType, GPool, Indices); - UserPHI->replaceUsesOfWith(GlobalToReplace, ConstGEP); - continue; - } - // The user is a valid instruction that is not a PHINode. - GetElementPtrInst *GEPInst = - GetElementPtrInst::Create(PooledStructType, GPool, Indices); - GEPInst->insertBefore(UserInstruction); - - LLVM_DEBUG(dbgs() << "Inserting GEP before:\n"); - LLVM_DEBUG(UserInstruction->dump()); - + Constant *ConstGEP = ConstantExpr::getInBoundsGetElementPtr( + PooledStructType, GPool, Indices); LLVM_DEBUG(dbgs() << "Replacing this global:\n"); LLVM_DEBUG(GlobalToReplace->dump()); LLVM_DEBUG(dbgs() << "with this:\n"); - LLVM_DEBUG(GEPInst->dump()); - - // After the GEP is inserted the GV can be replaced. - CurrentUser->replaceUsesOfWith(GlobalToReplace, GEPInst); + LLVM_DEBUG(ConstGEP->dump()); + GlobalToReplace->replaceAllUsesWith(ConstGEP); } } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index 961b8f0afe22..cdf7c048a4bf 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -31,12 +31,13 @@ using namespace llvm; // This part is for ELF object output. RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI) - : RISCVTargetStreamer(S), CurrentVendor("riscv"), STI(STI) { + : RISCVTargetStreamer(S), CurrentVendor("riscv") { MCAssembler &MCA = getStreamer().getAssembler(); const FeatureBitset &Features = STI.getFeatureBits(); auto &MAB = static_cast<RISCVAsmBackend &>(MCA.getBackend()); setTargetABI(RISCVABI::computeTargetABI(STI.getTargetTriple(), Features, MAB.getTargetOptions().getABIName())); + setFlagsFromFeatures(STI); // `j label` in `.option norelax; j label; .option relax; ...; label:` needs a // relocation to ensure the jump target is correct after linking. This is due // to a limitation that shouldForceRelocation has to make the decision upfront @@ -87,14 +88,13 @@ void RISCVTargetELFStreamer::finishAttributeSection() { void RISCVTargetELFStreamer::finish() { RISCVTargetStreamer::finish(); MCAssembler &MCA = getStreamer().getAssembler(); - const FeatureBitset &Features = STI.getFeatureBits(); RISCVABI::ABI ABI = getTargetABI(); unsigned EFlags = MCA.getELFHeaderEFlags(); - if (Features[RISCV::FeatureStdExtC]) + if (hasRVC()) EFlags |= ELF::EF_RISCV_RVC; - if (Features[RISCV::FeatureStdExtZtso]) + if (hasTSO()) EFlags |= ELF::EF_RISCV_TSO; switch (ABI) { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h index a6f54bf67b5d..e8f29cd8449b 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h @@ -46,7 +46,6 @@ private: StringRef CurrentVendor; MCSection *AttributeSection = nullptr; - const MCSubtargetInfo &STI; void emitAttribute(unsigned Attribute, unsigned Value) override; void emitTextAttribute(unsigned Attribute, StringRef String) override; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp index 254a9a4bc0ef..b8e0f3a867f4 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp @@ -207,8 +207,6 @@ void RISCVMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { case VK_RISCV_TLS_GOT_HI: case VK_RISCV_TLS_GD_HI: case VK_RISCV_TLSDESC_HI: - case VK_RISCV_TLSDESC_ADD_LO: - case VK_RISCV_TLSDESC_LOAD_LO: break; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp index ac4861bf113e..eee78a8c161f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -48,6 +48,11 @@ void RISCVTargetStreamer::setTargetABI(RISCVABI::ABI ABI) { TargetABI = ABI; } +void RISCVTargetStreamer::setFlagsFromFeatures(const MCSubtargetInfo &STI) { + HasRVC = STI.hasFeature(RISCV::FeatureStdExtC); + HasTSO = STI.hasFeature(RISCV::FeatureStdExtZtso); +} + void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI, bool EmitStackAlign) { if (EmitStackAlign) { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h index 070e72fb157a..cb8bc21cb635 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h @@ -33,6 +33,8 @@ struct RISCVOptionArchArg { class RISCVTargetStreamer : public MCTargetStreamer { RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown; + bool HasRVC = false; + bool HasTSO = false; public: RISCVTargetStreamer(MCStreamer &S); @@ -58,6 +60,9 @@ public: void emitTargetAttributes(const MCSubtargetInfo &STI, bool EmitStackAlign); void setTargetABI(RISCVABI::ABI ABI); RISCVABI::ABI getTargetABI() const { return TargetABI; } + void setFlagsFromFeatures(const MCSubtargetInfo &STI); + bool hasRVC() const { return HasRVC; } + bool hasTSO() const { return HasTSO; } }; // This part is for ascii assembly output diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index b2e9cd87373b..87bd9b4048cd 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -100,7 +100,7 @@ public: bool emitDirectiveOptionArch(); private: - void emitAttributes(); + void emitAttributes(const MCSubtargetInfo &SubtargetInfo); void emitNTLHint(const MachineInstr *MI); @@ -385,8 +385,32 @@ void RISCVAsmPrinter::emitStartOfAsmFile(Module &M) { if (const MDString *ModuleTargetABI = dyn_cast_or_null<MDString>(M.getModuleFlag("target-abi"))) RTS.setTargetABI(RISCVABI::getTargetABI(ModuleTargetABI->getString())); + + MCSubtargetInfo SubtargetInfo = *TM.getMCSubtargetInfo(); + + // Use module flag to update feature bits. + if (auto *MD = dyn_cast_or_null<MDNode>(M.getModuleFlag("riscv-isa"))) { + for (auto &ISA : MD->operands()) { + if (auto *ISAString = dyn_cast_or_null<MDString>(ISA)) { + auto ParseResult = llvm::RISCVISAInfo::parseArchString( + ISAString->getString(), /*EnableExperimentalExtension=*/true, + /*ExperimentalExtensionVersionCheck=*/true); + if (!errorToBool(ParseResult.takeError())) { + auto &ISAInfo = *ParseResult; + for (const auto &Feature : RISCVFeatureKV) { + if (ISAInfo->hasExtension(Feature.Key) && + !SubtargetInfo.hasFeature(Feature.Value)) + SubtargetInfo.ToggleFeature(Feature.Key); + } + } + } + } + + RTS.setFlagsFromFeatures(SubtargetInfo); + } + if (TM.getTargetTriple().isOSBinFormatELF()) - emitAttributes(); + emitAttributes(SubtargetInfo); } void RISCVAsmPrinter::emitEndOfAsmFile(Module &M) { @@ -398,13 +422,13 @@ void RISCVAsmPrinter::emitEndOfAsmFile(Module &M) { EmitHwasanMemaccessSymbols(M); } -void RISCVAsmPrinter::emitAttributes() { +void RISCVAsmPrinter::emitAttributes(const MCSubtargetInfo &SubtargetInfo) { RISCVTargetStreamer &RTS = static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer()); // Use MCSubtargetInfo from TargetMachine. Individual functions may have // attributes that differ from other functions in the module and we have no // way to know which function is correct. - RTS.emitTargetAttributes(*TM.getMCSubtargetInfo(), /*EmitStackAlign*/ true); + RTS.emitTargetAttributes(SubtargetInfo, /*EmitStackAlign*/ true); } void RISCVAsmPrinter::emitFunctionEntryLabel() { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 0a314fdd41cb..89207640ee54 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -317,8 +317,9 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB, .addReg(MBBI->getOperand(1).getReg()) .add(MBBI->getOperand(2)); if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) { - // FIXME: Zdinx RV32 can not work on unaligned memory. - assert(!STI->hasFastUnalignedAccess()); + // FIXME: Zdinx RV32 can not work on unaligned scalar memory. + assert(!STI->hasFastUnalignedAccess() && + !STI->enableUnalignedScalarMem()); assert(MBBI->getOperand(2).getOffset() % 8 == 0); MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td index 26451c80f57b..1bb6b6a561f4 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1025,6 +1025,11 @@ def FeatureFastUnalignedAccess "true", "Has reasonably performant unaligned " "loads and stores (both scalar and vector)">; +def FeatureUnalignedScalarMem + : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem", + "true", "Has reasonably performant unaligned scalar " + "loads and stores">; + def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", "UsePostRAScheduler", "true", "Schedule again after register allocation">; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d46093b9e260..3fe7ddfdd427 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1883,7 +1883,8 @@ bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, // replace. If we don't support unaligned scalar mem, prefer the constant // pool. // TODO: Can the caller pass down the alignment? - if (!Subtarget.hasFastUnalignedAccess()) + if (!Subtarget.hasFastUnalignedAccess() && + !Subtarget.enableUnalignedScalarMem()) return true; // Prefer to keep the load if it would require many instructions. @@ -19772,8 +19773,10 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( unsigned *Fast) const { if (!VT.isVector()) { if (Fast) - *Fast = Subtarget.hasFastUnalignedAccess(); - return Subtarget.hasFastUnalignedAccess(); + *Fast = Subtarget.hasFastUnalignedAccess() || + Subtarget.enableUnalignedScalarMem(); + return Subtarget.hasFastUnalignedAccess() || + Subtarget.enableUnalignedScalarMem(); } // All vector implementations must support element alignment diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index bf6547cc87ec..2f2dc6b80792 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -70,49 +70,62 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB = MI->getParent(); MachineFunction &MF = *MBB->getParent(); - // Get two load or store instructions. Use the original instruction for one - // of them (arbitrarily the second here) and create a clone for the other. - MachineInstr *EarlierMI = MF.CloneMachineInstr(&*MI); - MBB->insert(MI, EarlierMI); + // Get two load or store instructions. Use the original instruction for + // one of them and create a clone for the other. + MachineInstr *HighPartMI = MF.CloneMachineInstr(&*MI); + MachineInstr *LowPartMI = &*MI; + MBB->insert(LowPartMI, HighPartMI); // Set up the two 64-bit registers and remember super reg and its flags. - MachineOperand &HighRegOp = EarlierMI->getOperand(0); - MachineOperand &LowRegOp = MI->getOperand(0); + MachineOperand &HighRegOp = HighPartMI->getOperand(0); + MachineOperand &LowRegOp = LowPartMI->getOperand(0); Register Reg128 = LowRegOp.getReg(); unsigned Reg128Killed = getKillRegState(LowRegOp.isKill()); unsigned Reg128Undef = getUndefRegState(LowRegOp.isUndef()); HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64)); LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_l64)); - if (MI->mayStore()) { - // Add implicit uses of the super register in case one of the subregs is - // undefined. We could track liveness and skip storing an undefined - // subreg, but this is hopefully rare (discovered with llvm-stress). - // If Reg128 was killed, set kill flag on MI. - unsigned Reg128UndefImpl = (Reg128Undef | RegState::Implicit); - MachineInstrBuilder(MF, EarlierMI).addReg(Reg128, Reg128UndefImpl); - MachineInstrBuilder(MF, MI).addReg(Reg128, (Reg128UndefImpl | Reg128Killed)); - } - // The address in the first (high) instruction is already correct. // Adjust the offset in the second (low) instruction. - MachineOperand &HighOffsetOp = EarlierMI->getOperand(2); - MachineOperand &LowOffsetOp = MI->getOperand(2); + MachineOperand &HighOffsetOp = HighPartMI->getOperand(2); + MachineOperand &LowOffsetOp = LowPartMI->getOperand(2); LowOffsetOp.setImm(LowOffsetOp.getImm() + 8); - // Clear the kill flags on the registers in the first instruction. - if (EarlierMI->getOperand(0).isReg() && EarlierMI->getOperand(0).isUse()) - EarlierMI->getOperand(0).setIsKill(false); - EarlierMI->getOperand(1).setIsKill(false); - EarlierMI->getOperand(3).setIsKill(false); - // Set the opcodes. unsigned HighOpcode = getOpcodeForOffset(NewOpcode, HighOffsetOp.getImm()); unsigned LowOpcode = getOpcodeForOffset(NewOpcode, LowOffsetOp.getImm()); assert(HighOpcode && LowOpcode && "Both offsets should be in range"); + HighPartMI->setDesc(get(HighOpcode)); + LowPartMI->setDesc(get(LowOpcode)); + + MachineInstr *FirstMI = HighPartMI; + if (MI->mayStore()) { + FirstMI->getOperand(0).setIsKill(false); + // Add implicit uses of the super register in case one of the subregs is + // undefined. We could track liveness and skip storing an undefined + // subreg, but this is hopefully rare (discovered with llvm-stress). + // If Reg128 was killed, set kill flag on MI. + unsigned Reg128UndefImpl = (Reg128Undef | RegState::Implicit); + MachineInstrBuilder(MF, HighPartMI).addReg(Reg128, Reg128UndefImpl); + MachineInstrBuilder(MF, LowPartMI).addReg(Reg128, (Reg128UndefImpl | Reg128Killed)); + } else { + // If HighPartMI clobbers any of the address registers, it needs to come + // after LowPartMI. + auto overlapsAddressReg = [&](Register Reg) -> bool { + return RI.regsOverlap(Reg, MI->getOperand(1).getReg()) || + RI.regsOverlap(Reg, MI->getOperand(3).getReg()); + }; + if (overlapsAddressReg(HighRegOp.getReg())) { + assert(!overlapsAddressReg(LowRegOp.getReg()) && + "Both loads clobber address!"); + MBB->splice(HighPartMI, MBB, LowPartMI); + FirstMI = LowPartMI; + } + } - EarlierMI->setDesc(get(HighOpcode)); - MI->setDesc(get(LowOpcode)); *** 421 LINES SKIPPED ***