git: e10324587a6a - stable/12 - Merge llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp release/11.x llvmorg-11.0.0-rc1-25-g903c872b169.

From: Dimitry Andric <dim_at_FreeBSD.org>
Date: Wed, 22 Dec 2021 10:05:24 UTC
The branch stable/12 has been updated by dim:

URL: https://cgit.FreeBSD.org/src/commit/?id=e10324587a6a63cf75a36b5275dd3220d5d102bf

commit e10324587a6a63cf75a36b5275dd3220d5d102bf
Author:     Dimitry Andric <dim@FreeBSD.org>
AuthorDate: 2020-07-31 22:23:32 +0000
Commit:     Dimitry Andric <dim@FreeBSD.org>
CommitDate: 2021-12-22 09:58:10 +0000

    Merge llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp
    release/11.x llvmorg-11.0.0-rc1-25-g903c872b169.
    
    (cherry picked from commit 979e22ff1ac2a50acbf94e28576a058db89003b5)
---
 contrib/llvm-project/clang/lib/Sema/SemaOpenMP.cpp |   6 +-
 contrib/llvm-project/lld/COFF/Config.h             |   1 +
 contrib/llvm-project/lld/COFF/Driver.cpp           |   7 +-
 contrib/llvm-project/lld/COFF/InputFiles.cpp       |   8 +-
 contrib/llvm-project/lld/COFF/MinGW.cpp            |   9 +
 contrib/llvm-project/lld/COFF/Options.td           |   1 +
 contrib/llvm-project/lld/COFF/Writer.cpp           |   2 +-
 .../include/llvm/CodeGen/TargetFrameLowering.h     |   6 +
 .../llvm/lib/Analysis/BasicAliasAnalysis.cpp       |  55 +--
 .../llvm/lib/CodeGen/LocalStackSlotAllocation.cpp  |   4 +
 .../llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp  |  24 +-
 .../CodeGen/SelectionDAG/LegalizeVectorTypes.cpp   |  67 ++--
 .../llvm/lib/MC/WinCOFFObjectWriter.cpp            |   1 +
 .../lib/Target/AArch64/AArch64FrameLowering.cpp    |  47 +--
 .../llvm/lib/Target/AArch64/AArch64FrameLowering.h |   6 +
 .../lib/Target/AArch64/AArch64ISelDAGToDAG.cpp     | 131 ++++---
 .../lib/Target/AArch64/AArch64ISelLowering.cpp     |  23 +-
 .../llvm/lib/Target/AArch64/AArch64InstrFormats.td |   5 +-
 .../llvm/lib/Target/AArch64/AArch64InstrInfo.cpp   |  29 ++
 .../lib/Target/AArch64/AArch64RegisterInfo.cpp     |  28 +-
 .../llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td |  22 ++
 .../llvm/lib/Target/AArch64/SVEInstrFormats.td     |  10 +-
 .../llvm/lib/Target/PowerPC/PPCISelLowering.cpp    |  27 +-
 .../llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp    | 324 ++++++++++++++++
 .../llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h      |   9 +
 .../llvm/lib/Target/RISCV/RISCVISelLowering.cpp    |  27 +-
 .../llvm/lib/Target/RISCV/RISCVInstrInfoB.td       | 429 +++++++++++++++++++++
 .../llvm/lib/Target/X86/X86ISelLowering.cpp        | 131 +++++--
 .../llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp    |   6 +-
 .../InstCombine/InstCombineSimplifyDemanded.cpp    |  11 +-
 .../InstCombine/InstructionCombining.cpp           |   2 +-
 .../llvm/lib/Transforms/Scalar/JumpThreading.cpp   |   8 +
 .../lib/Transforms/Vectorize/SLPVectorizer.cpp     |  13 +-
 .../openmp/runtime/src/kmp_ftn_entry.h             |   8 +-
 contrib/llvm-project/openmp/runtime/src/kmp_os.h   |  10 +-
 .../openmp/runtime/src/ompt-specific.cpp           |   2 +-
 36 files changed, 1265 insertions(+), 234 deletions(-)

diff --git a/contrib/llvm-project/clang/lib/Sema/SemaOpenMP.cpp b/contrib/llvm-project/clang/lib/Sema/SemaOpenMP.cpp
index 8bf605e5e76b..533c5b1f6ff0 100644
--- a/contrib/llvm-project/clang/lib/Sema/SemaOpenMP.cpp
+++ b/contrib/llvm-project/clang/lib/Sema/SemaOpenMP.cpp
@@ -2244,7 +2244,11 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level,
           [](OpenMPDirectiveKind K) { return isOpenMPTaskingDirective(K); },
           Level)) {
     bool IsTriviallyCopyable =
-        D->getType().getNonReferenceType().isTriviallyCopyableType(Context);
+        D->getType().getNonReferenceType().isTriviallyCopyableType(Context) &&
+        !D->getType()
+             .getNonReferenceType()
+             .getCanonicalType()
+             ->getAsCXXRecordDecl();
     OpenMPDirectiveKind DKind = DSAStack->getDirective(Level);
     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
     getOpenMPCaptureRegions(CaptureRegions, DKind);
diff --git a/contrib/llvm-project/lld/COFF/Config.h b/contrib/llvm-project/lld/COFF/Config.h
index 72d826b8bd17..7c439176f3a4 100644
--- a/contrib/llvm-project/lld/COFF/Config.h
+++ b/contrib/llvm-project/lld/COFF/Config.h
@@ -140,6 +140,7 @@ struct Configuration {
   bool safeSEH = false;
   Symbol *sehTable = nullptr;
   Symbol *sehCount = nullptr;
+  bool noSEH = false;
 
   // Used for /opt:lldlto=N
   unsigned ltoo = 2;
diff --git a/contrib/llvm-project/lld/COFF/Driver.cpp b/contrib/llvm-project/lld/COFF/Driver.cpp
index 7372505bb616..9ceccef86779 100644
--- a/contrib/llvm-project/lld/COFF/Driver.cpp
+++ b/contrib/llvm-project/lld/COFF/Driver.cpp
@@ -1700,9 +1700,10 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
   config->wordsize = config->is64() ? 8 : 4;
 
   // Handle /safeseh, x86 only, on by default, except for mingw.
-  if (config->machine == I386 &&
-      args.hasFlag(OPT_safeseh, OPT_safeseh_no, !config->mingw))
-    config->safeSEH = true;
+  if (config->machine == I386) {
+    config->safeSEH = args.hasFlag(OPT_safeseh, OPT_safeseh_no, !config->mingw);
+    config->noSEH = args.hasArg(OPT_noseh);
+  }
 
   // Handle /functionpadmin
   for (auto *arg : args.filtered(OPT_functionpadmin, OPT_functionpadmin_opt))
diff --git a/contrib/llvm-project/lld/COFF/InputFiles.cpp b/contrib/llvm-project/lld/COFF/InputFiles.cpp
index 0adc2b91bd99..4346b3a2ffa7 100644
--- a/contrib/llvm-project/lld/COFF/InputFiles.cpp
+++ b/contrib/llvm-project/lld/COFF/InputFiles.cpp
@@ -348,13 +348,13 @@ void ObjFile::recordPrevailingSymbolForMingw(
   // of the section chunk we actually include instead of discarding it,
   // add the symbol to a map to allow using it for implicitly
   // associating .[px]data$<func> sections to it.
+  // Use the suffix from the .text$<func> instead of the leader symbol
+  // name, for cases where the names differ (i386 mangling/decorations,
+  // cases where the leader is a weak symbol named .weak.func.default*).
   int32_t sectionNumber = sym.getSectionNumber();
   SectionChunk *sc = sparseChunks[sectionNumber];
   if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
-    StringRef name;
-    name = check(coffObj->getSymbolName(sym));
-    if (getMachineType() == I386)
-      name.consume_front("_");
+    StringRef name = sc->getSectionName().split('$').second;
     prevailingSectionMap[name] = sectionNumber;
   }
 }
diff --git a/contrib/llvm-project/lld/COFF/MinGW.cpp b/contrib/llvm-project/lld/COFF/MinGW.cpp
index bded985f04d0..e24cdca6ee34 100644
--- a/contrib/llvm-project/lld/COFF/MinGW.cpp
+++ b/contrib/llvm-project/lld/COFF/MinGW.cpp
@@ -34,6 +34,11 @@ AutoExporter::AutoExporter() {
       "libclang_rt.builtins-arm",
       "libclang_rt.builtins-i386",
       "libclang_rt.builtins-x86_64",
+      "libclang_rt.profile",
+      "libclang_rt.profile-aarch64",
+      "libclang_rt.profile-arm",
+      "libclang_rt.profile-i386",
+      "libclang_rt.profile-x86_64",
       "libc++",
       "libc++abi",
       "libunwind",
@@ -57,6 +62,10 @@ AutoExporter::AutoExporter() {
       "__builtin_",
       // Artificial symbols such as .refptr
       ".",
+      // profile generate symbols
+      "__profc_",
+      "__profd_",
+      "__profvp_",
   };
 
   excludeSymbolSuffixes = {
diff --git a/contrib/llvm-project/lld/COFF/Options.td b/contrib/llvm-project/lld/COFF/Options.td
index 212879e1d60b..087d53b5d2dd 100644
--- a/contrib/llvm-project/lld/COFF/Options.td
+++ b/contrib/llvm-project/lld/COFF/Options.td
@@ -204,6 +204,7 @@ def include_optional : Joined<["/", "-", "/?", "-?"], "includeoptional:">,
     HelpText<"Add symbol as undefined, but allow it to remain undefined">;
 def kill_at : F<"kill-at">;
 def lldmingw : F<"lldmingw">;
+def noseh : F<"noseh">;
 def output_def : Joined<["/", "-", "/?", "-?"], "output-def:">;
 def pdb_source_path : P<"pdbsourcepath",
     "Base path used to make relative source file path absolute in PDB">;
diff --git a/contrib/llvm-project/lld/COFF/Writer.cpp b/contrib/llvm-project/lld/COFF/Writer.cpp
index 3bcc1777f7ac..082de5b8c1d6 100644
--- a/contrib/llvm-project/lld/COFF/Writer.cpp
+++ b/contrib/llvm-project/lld/COFF/Writer.cpp
@@ -1393,7 +1393,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_GUARD_CF;
   if (config->integrityCheck)
     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY;
-  if (setNoSEHCharacteristic)
+  if (setNoSEHCharacteristic || config->noSEH)
     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_SEH;
   if (config->terminalServerAware)
     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index c3a11b199675..d6580430daf7 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -134,6 +134,12 @@ public:
   /// was called).
   virtual unsigned getStackAlignmentSkew(const MachineFunction &MF) const;
 
+  /// This method returns whether or not it is safe for an object with the
+  /// given stack id to be bundled into the local area.
+  virtual bool isStackIdSafeForLocalArea(unsigned StackId) const {
+    return true;
+  }
+
   /// getOffsetOfLocalArea - This method returns the offset of the local area
   /// from the stack pointer on entrance to a function.
   ///
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 74664098ce1d..33f122728d2a 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1648,8 +1648,32 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
     }
 
   SmallVector<Value *, 4> V1Srcs;
+  // For a recursive phi, that recurses through a contant gep, we can perform
+  // aliasing calculations using the other phi operands with an unknown size to
+  // specify that an unknown number of elements after the initial value are
+  // potentially accessed.
   bool isRecursive = false;
-  if (PV)  {
+  auto CheckForRecPhi = [&](Value *PV) {
+    if (!EnableRecPhiAnalysis)
+      return false;
+    if (GEPOperator *PVGEP = dyn_cast<GEPOperator>(PV)) {
+      // Check whether the incoming value is a GEP that advances the pointer
+      // result of this PHI node (e.g. in a loop). If this is the case, we
+      // would recurse and always get a MayAlias. Handle this case specially
+      // below. We need to ensure that the phi is inbounds and has a constant
+      // positive operand so that we can check for alias with the initial value
+      // and an unknown but positive size.
+      if (PVGEP->getPointerOperand() == PN && PVGEP->isInBounds() &&
+          PVGEP->getNumIndices() == 1 && isa<ConstantInt>(PVGEP->idx_begin()) &&
+          !cast<ConstantInt>(PVGEP->idx_begin())->isNegative()) {
+        isRecursive = true;
+        return true;
+      }
+    }
+    return false;
+  };
+
+  if (PV) {
     // If we have PhiValues then use it to get the underlying phi values.
     const PhiValues::ValueSet &PhiValueSet = PV->getValuesForPhi(PN);
     // If we have more phi values than the search depth then return MayAlias
@@ -1660,19 +1684,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
       return MayAlias;
     // Add the values to V1Srcs
     for (Value *PV1 : PhiValueSet) {
-      if (EnableRecPhiAnalysis) {
-        if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
-          // Check whether the incoming value is a GEP that advances the pointer
-          // result of this PHI node (e.g. in a loop). If this is the case, we
-          // would recurse and always get a MayAlias. Handle this case specially
-          // below.
-          if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
-              isa<ConstantInt>(PV1GEP->idx_begin())) {
-            isRecursive = true;
-            continue;
-          }
-        }
-      }
+      if (CheckForRecPhi(PV1))
+        continue;
       V1Srcs.push_back(PV1);
     }
   } else {
@@ -1687,18 +1700,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
         // and 'n' are the number of PHI sources.
         return MayAlias;
 
-      if (EnableRecPhiAnalysis)
-        if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
-          // Check whether the incoming value is a GEP that advances the pointer
-          // result of this PHI node (e.g. in a loop). If this is the case, we
-          // would recurse and always get a MayAlias. Handle this case specially
-          // below.
-          if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
-              isa<ConstantInt>(PV1GEP->idx_begin())) {
-            isRecursive = true;
-            continue;
-          }
-        }
+      if (CheckForRecPhi(PV1))
+        continue;
 
       if (UniqueSrc.insert(PV1).second)
         V1Srcs.push_back(PV1);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 6c5ef0255a08..204fb556d810 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -220,6 +220,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
         continue;
       if (StackProtectorFI == (int)i)
         continue;
+      if (!TFI.isStackIdSafeForLocalArea(MFI.getStackID(i)))
+        continue;
 
       switch (MFI.getObjectSSPLayout(i)) {
       case MachineFrameInfo::SSPLK_None:
@@ -254,6 +256,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
       continue;
     if (ProtectedObjs.count(i))
       continue;
+    if (!TFI.isStackIdSafeForLocalArea(MFI.getStackID(i)))
+      continue;
 
     AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
   }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f14b3dba4f31..ec384d2a7c56 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11372,9 +11372,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       // Stop if more than one members are non-undef.
       if (NumDefs > 1)
         break;
+
       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
                                      VT.getVectorElementType(),
-                                     X.getValueType().getVectorNumElements()));
+                                     X.getValueType().getVectorElementCount()));
     }
 
     if (NumDefs == 0)
@@ -18795,6 +18796,11 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
   EVT VT = N->getValueType(0);
   EVT OpVT = N->getOperand(0).getValueType();
+
+  // We currently can't generate an appropriate shuffle for a scalable vector.
+  if (VT.isScalableVector())
+    return SDValue();
+
   int NumElts = VT.getVectorNumElements();
   int NumOpElts = OpVT.getVectorNumElements();
 
@@ -19055,11 +19061,14 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
     return V;
 
   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
-  // nodes often generate nop CONCAT_VECTOR nodes.
-  // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
-  // place the incoming vectors at the exact same location.
+  // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
+  // operands and look for a CONCAT operations that place the incoming vectors
+  // at the exact same location.
+  //
+  // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
   SDValue SingleSource = SDValue();
-  unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
+  unsigned PartNumElem =
+      N->getOperand(0).getValueType().getVectorMinNumElements();
 
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     SDValue Op = N->getOperand(i);
@@ -19181,7 +19190,10 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
 
   // The binop must be a vector type, so we can extract some fraction of it.
   EVT WideBVT = BinOp.getValueType();
-  if (!WideBVT.isVector())
+  // The optimisations below currently assume we are dealing with fixed length
+  // vectors. It is possible to add support for scalable vectors, but at the
+  // moment we've done no analysis to prove whether they are profitable or not.
+  if (!WideBVT.isFixedLengthVector())
     return SDValue();
 
   EVT VT = Extract->getValueType(0);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 414ba25ffd5f..c81d03cac81b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2151,7 +2151,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
   EVT InVT = Lo.getValueType();
 
   EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
-                               InVT.getVectorNumElements());
+                               InVT.getVectorElementCount());
 
   if (N->isStrictFPOpcode()) {
     Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, 
@@ -2197,13 +2197,19 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
   SDValue Idx = N->getOperand(1);
   SDLoc dl(N);
   SDValue Lo, Hi;
+
+  if (SubVT.isScalableVector() !=
+      N->getOperand(0).getValueType().isScalableVector())
+    report_fatal_error("Extracting a fixed-length vector from an illegal "
+                       "scalable vector is not yet supported");
+
   GetSplitVector(N->getOperand(0), Lo, Hi);
 
-  uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+  uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
   uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
 
   if (IdxVal < LoElts) {
-    assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
+    assert(IdxVal + SubVT.getVectorMinNumElements() <= LoElts &&
            "Extracted subvector crosses vector split!");
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
   } else {
@@ -2559,13 +2565,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
   SDValue InVec = N->getOperand(OpNo);
   EVT InVT = InVec->getValueType(0);
   EVT OutVT = N->getValueType(0);
-  unsigned NumElements = OutVT.getVectorNumElements();
+  ElementCount NumElements = OutVT.getVectorElementCount();
   bool IsFloat = OutVT.isFloatingPoint();
 
-  // Widening should have already made sure this is a power-two vector
-  // if we're trying to split it at all. assert() that's true, just in case.
-  assert(!(NumElements & 1) && "Splitting vector, but not in half!");
-
   unsigned InElementSize = InVT.getScalarSizeInBits();
   unsigned OutElementSize = OutVT.getScalarSizeInBits();
 
@@ -2595,6 +2597,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
   GetSplitVector(InVec, InLoVec, InHiVec);
 
   // Truncate them to 1/2 the element size.
+  //
+  // This assumes the number of elements is a power of two; any vector that
+  // isn't should be widened, not split.
   EVT HalfElementVT = IsFloat ?
     EVT::getFloatingPointVT(InElementSize/2) :
     EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
@@ -3605,16 +3610,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
   EVT InVT = N->getOperand(0).getValueType();
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDLoc dl(N);
-  unsigned WidenNumElts = WidenVT.getVectorNumElements();
-  unsigned NumInElts = InVT.getVectorNumElements();
   unsigned NumOperands = N->getNumOperands();
 
   bool InputWidened = false; // Indicates we need to widen the input.
   if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
-    if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+    unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
+    unsigned NumInElts = InVT.getVectorMinNumElements();
+    if (WidenNumElts % NumInElts == 0) {
       // Add undef vectors to widen to correct length.
-      unsigned NumConcat = WidenVT.getVectorNumElements() /
-                           InVT.getVectorNumElements();
+      unsigned NumConcat = WidenNumElts / NumInElts;
       SDValue UndefVal = DAG.getUNDEF(InVT);
       SmallVector<SDValue, 16> Ops(NumConcat);
       for (unsigned i=0; i < NumOperands; ++i)
@@ -3638,6 +3642,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
         return GetWidenedVector(N->getOperand(0));
 
       if (NumOperands == 2) {
+        assert(!WidenVT.isScalableVector() &&
+               "Cannot use vector shuffles to widen CONCAT_VECTOR result");
+        unsigned WidenNumElts = WidenVT.getVectorNumElements();
+        unsigned NumInElts = InVT.getVectorNumElements();
+
         // Replace concat of two operands with a shuffle.
         SmallVector<int, 16> MaskOps(WidenNumElts, -1);
         for (unsigned i = 0; i < NumInElts; ++i) {
@@ -3652,6 +3661,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
     }
   }
 
+  assert(!WidenVT.isScalableVector() &&
+         "Cannot use build vectors to widen CONCAT_VECTOR result");
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  unsigned NumInElts = InVT.getVectorNumElements();
+
   // Fall back to use extracts and build vector.
   EVT EltVT = WidenVT.getVectorElementType();
   SmallVector<SDValue, 16> Ops(WidenNumElts);
@@ -4913,7 +4927,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
 
   int LdWidth = LdVT.getSizeInBits();
   int WidthDiff = WidenWidth - LdWidth;
-  // Allow wider loads.
+  // Allow wider loads if they are sufficiently aligned to avoid memory faults
+  // and if the original load is simple.
   unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment();
 
   // Find the vector type that can load from.
@@ -4965,19 +4980,6 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
                       LD->getPointerInfo().getWithOffset(Offset),
                       LD->getOriginalAlign(), MMOFlags, AAInfo);
       LdChain.push_back(L.getValue(1));
-      if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
-        // Later code assumes the vector loads produced will be mergeable, so we
-        // must pad the final entry up to the previous width. Scalars are
-        // combined separately.
-        SmallVector<SDValue, 16> Loads;
-        Loads.push_back(L);
-        unsigned size = L->getValueSizeInBits(0);
-        while (size < LdOp->getValueSizeInBits(0)) {
-          Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
-          size += L->getValueSizeInBits(0);
-        }
-        L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads);
-      }
     } else {
       L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
                       LD->getPointerInfo().getWithOffset(Offset),
@@ -5018,8 +5020,17 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
     EVT NewLdTy = LdOps[i].getValueType();
     if (NewLdTy != LdTy) {
       // Create a larger vector.
+      unsigned NumOps = NewLdTy.getSizeInBits() / LdTy.getSizeInBits();
+      assert(NewLdTy.getSizeInBits() % LdTy.getSizeInBits() == 0);
+      SmallVector<SDValue, 16> WidenOps(NumOps);
+      unsigned j = 0;
+      for (; j != End-Idx; ++j)
+        WidenOps[j] = ConcatOps[Idx+j];
+      for (; j != NumOps; ++j)
+        WidenOps[j] = DAG.getUNDEF(LdTy);
+
       ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
-                                     makeArrayRef(&ConcatOps[Idx], End - Idx));
+                                     WidenOps);
       Idx = End - 1;
       LdTy = NewLdTy;
     }
diff --git a/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 4796ef531054..8e7bf1eb0169 100644
--- a/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -375,6 +375,7 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &MCSym,
   COFFSymbol *Local = nullptr;
   if (cast<MCSymbolCOFF>(MCSym).isWeakExternal()) {
     Sym->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+    Sym->Section = nullptr;
 
     COFFSymbol *WeakDefault = getLinkedSymbol(MCSym);
     if (!WeakDefault) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index efa3fd5ca9ce..4789a9f02937 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1192,7 +1192,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
 
   // Process the SVE callee-saves to determine what space needs to be
   // allocated.
-  if (AFI->getSVECalleeSavedStackSize()) {
+  if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
     // Find callee save instructions in frame.
     CalleeSavesBegin = MBBI;
     assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
@@ -1200,11 +1200,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       ++MBBI;
     CalleeSavesEnd = MBBI;
 
-    int64_t OffsetToFirstCalleeSaveFromSP =
-        MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
-    StackOffset OffsetToCalleeSavesFromSP =
-        StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
-    AllocateBefore -= OffsetToCalleeSavesFromSP;
+    AllocateBefore = {CalleeSavedSize, MVT::nxv1i8};
     AllocateAfter = SVEStackSize - AllocateBefore;
   }
 
@@ -1582,7 +1578,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   // deallocated.
   StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
   MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
-  if (AFI->getSVECalleeSavedStackSize()) {
+  if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
     RestoreBegin = std::prev(RestoreEnd);;
     while (IsSVECalleeSave(RestoreBegin) &&
            RestoreBegin != MBB.begin())
@@ -1592,23 +1588,21 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
     assert(IsSVECalleeSave(RestoreBegin) &&
            IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
 
-    int64_t OffsetToFirstCalleeSaveFromSP =
-        MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
-    StackOffset OffsetToCalleeSavesFromSP =
-        StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
-    DeallocateBefore = OffsetToCalleeSavesFromSP;
-    DeallocateAfter = SVEStackSize - DeallocateBefore;
+    StackOffset CalleeSavedSizeAsOffset = {CalleeSavedSize, MVT::nxv1i8};
+    DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
+    DeallocateAfter = CalleeSavedSizeAsOffset;
   }
 
   // Deallocate the SVE area.
   if (SVEStackSize) {
     if (AFI->isStackRealigned()) {
-      if (AFI->getSVECalleeSavedStackSize())
-        // Set SP to start of SVE area, from which the callee-save reloads
-        // can be done. The code below will deallocate the stack space
+      if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize())
+        // Set SP to start of SVE callee-save area from which they can
+        // be reloaded. The code below will deallocate the stack space
         // space by moving FP -> SP.
         emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
-                        -SVEStackSize, TII, MachineInstr::FrameDestroy);
+                        {-CalleeSavedSize, MVT::nxv1i8}, TII,
+                        MachineInstr::FrameDestroy);
     } else {
       if (AFI->getSVECalleeSavedStackSize()) {
         // Deallocate the non-SVE locals first before we can deallocate (and
@@ -2595,25 +2589,23 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
                                               int &MinCSFrameIndex,
                                               int &MaxCSFrameIndex,
                                               bool AssignOffsets) {
+#ifndef NDEBUG
   // First process all fixed stack objects.
-  int64_t Offset = 0;
   for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
-    if (MFI.getStackID(I) == TargetStackID::SVEVector) {
-      int64_t FixedOffset = -MFI.getObjectOffset(I);
-      if (FixedOffset > Offset)
-        Offset = FixedOffset;
-    }
+    assert(MFI.getStackID(I) != TargetStackID::SVEVector &&
+           "SVE vectors should never be passed on the stack by value, only by "
+           "reference.");
+#endif
 
   auto Assign = [&MFI](int FI, int64_t Offset) {
     LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
     MFI.setObjectOffset(FI, Offset);
   };
 
+  int64_t Offset = 0;
+
   // Then process all callee saved slots.
   if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
-    // Make sure to align the last callee save slot.
-    MFI.setObjectAlignment(MaxCSFrameIndex, Align(16));
-
     // Assign offsets to the callee save slots.
     for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
       Offset += MFI.getObjectSize(I);
@@ -2623,6 +2615,9 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
     }
   }
 
+  // Ensure that the Callee-save area is aligned to 16bytes.
+  Offset = alignTo(Offset, Align(16U));
+
   // Create a buffer of SVE objects to allocate and sort it.
   SmallVector<int, 8> ObjectsToAllocate;
   for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 9d0a6d9eaf25..444740cb50ab 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -105,6 +105,12 @@ public:
     }
   }
 
+  bool isStackIdSafeForLocalArea(unsigned StackId) const override {
+    // We don't support putting SVE objects into the pre-allocated local
+    // frame block at the moment.
+    return StackId != TargetStackID::SVEVector;
+  }
+
 private:
   bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
                                       uint64_t StackBumpBytes) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 10c477853353..7799ebfbd68e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -245,7 +245,8 @@ public:
                          unsigned SubRegIdx);
   void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
   void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
-  void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, const unsigned Opc);
+  void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
+                            unsigned Opc_rr, unsigned Opc_ri);
 
   bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
   /// SVE Reg+Imm addressing mode.
@@ -262,14 +263,12 @@ public:
   void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
   void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
   void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
-  template <unsigned Scale>
-  void SelectPredicatedStore(SDNode *N, unsigned NumVecs, const unsigned Opc_rr,
-                             const unsigned Opc_ri);
-  template <unsigned Scale>
+  void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
+                             unsigned Opc_rr, unsigned Opc_ri);
   std::tuple<unsigned, SDValue, SDValue>
-  findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
-                           const unsigned Opc_ri, const SDValue &OldBase,
-                           const SDValue &OldOffset);
+  findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
+                           const SDValue &OldBase, const SDValue &OldOffset,
+                           unsigned Scale);
 
   bool tryBitfieldExtractOp(SDNode *N);
   bool tryBitfieldExtractOpFromSExt(SDNode *N);
@@ -1414,12 +1413,12 @@ void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
 /// Optimize \param OldBase and \param OldOffset selecting the best addressing
 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
 /// new Base and an SDValue representing the new offset.
-template <unsigned Scale>
 std::tuple<unsigned, SDValue, SDValue>
-AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
-                                              const unsigned Opc_ri,
+AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
+                                              unsigned Opc_ri,
                                               const SDValue &OldBase,
-                                              const SDValue &OldOffset) {
+                                              const SDValue &OldOffset,
+                                              unsigned Scale) {
   SDValue NewBase = OldBase;
   SDValue NewOffset = OldOffset;
   // Detect a possible Reg+Imm addressing mode.
@@ -1429,21 +1428,30 @@ AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
   // Detect a possible reg+reg addressing mode, but only if we haven't already
   // detected a Reg+Imm one.
   const bool IsRegReg =
-      !IsRegImm && SelectSVERegRegAddrMode<Scale>(OldBase, NewBase, NewOffset);
+      !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
 
   // Select the instruction.
   return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
 }
 
 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
-                                               const unsigned Opc) {
+                                               unsigned Scale, unsigned Opc_ri,
+                                               unsigned Opc_rr) {
+  assert(Scale < 4 && "Invalid scaling value.");
   SDLoc DL(N);
   EVT VT = N->getValueType(0);
   SDValue Chain = N->getOperand(0);
 
+  // Optimize addressing mode.
+  SDValue Base, Offset;
+  unsigned Opc;
+  std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
+      N, Opc_rr, Opc_ri, N->getOperand(2),
+      CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
+
   SDValue Ops[] = {N->getOperand(1), // Predicate
-                   N->getOperand(2), // Memory operand
-                   CurDAG->getTargetConstant(0, DL, MVT::i64), Chain};
+                   Base,             // Memory operand
+                   Offset, Chain};
 
   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
 
@@ -1479,10 +1487,9 @@ void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
   ReplaceNode(N, St);
 }
 
-template <unsigned Scale>
 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
-                                                const unsigned Opc_rr,
-                                                const unsigned Opc_ri) {
+                                                unsigned Scale, unsigned Opc_rr,
+                                                unsigned Opc_ri) {
   SDLoc dl(N);
 
   // Form a REG_SEQUENCE to force register allocation.
@@ -1492,9 +1499,9 @@ void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
   // Optimize addressing mode.
   unsigned Opc;
   SDValue Offset, Base;
-  std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore<Scale>(
+  std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
       N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
-      CurDAG->getTargetConstant(0, dl, MVT::i64));
+      CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
 
   SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
                    Base,                               // address
@@ -4085,63 +4092,51 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
     }
     case Intrinsic::aarch64_sve_st2: {
       if (VT == MVT::nxv16i8) {
-        SelectPredicatedStore</*Scale=*/0>(Node, 2, AArch64::ST2B,
-                                           AArch64::ST2B_IMM);
+        SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
         return;
       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
                  (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
-        SelectPredicatedStore</*Scale=*/1>(Node, 2, AArch64::ST2H,
-                                           AArch64::ST2H_IMM);
+        SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
         return;
       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-        SelectPredicatedStore</*Scale=*/2>(Node, 2, AArch64::ST2W,
-                                           AArch64::ST2W_IMM);
+        SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
         return;
       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-        SelectPredicatedStore</*Scale=*/3>(Node, 2, AArch64::ST2D,
-                                           AArch64::ST2D_IMM);
+        SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
         return;
       }
       break;
     }
     case Intrinsic::aarch64_sve_st3: {
       if (VT == MVT::nxv16i8) {
-        SelectPredicatedStore</*Scale=*/0>(Node, 3, AArch64::ST3B,
-                                           AArch64::ST3B_IMM);
+        SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
         return;
       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
                  (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
-        SelectPredicatedStore</*Scale=*/1>(Node, 3, AArch64::ST3H,
-                                           AArch64::ST3H_IMM);
+        SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
         return;
       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-        SelectPredicatedStore</*Scale=*/2>(Node, 3, AArch64::ST3W,
-                                           AArch64::ST3W_IMM);
+        SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
         return;
       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-        SelectPredicatedStore</*Scale=*/3>(Node, 3, AArch64::ST3D,
-                                           AArch64::ST3D_IMM);
+        SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
         return;
       }
       break;
     }
     case Intrinsic::aarch64_sve_st4: {
       if (VT == MVT::nxv16i8) {
-        SelectPredicatedStore</*Scale=*/0>(Node, 4, AArch64::ST4B,
-                                           AArch64::ST4B_IMM);
+        SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
         return;
       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
                  (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
-        SelectPredicatedStore</*Scale=*/1>(Node, 4, AArch64::ST4H,
-                                           AArch64::ST4H_IMM);
+        SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
         return;
       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-        SelectPredicatedStore</*Scale=*/2>(Node, 4, AArch64::ST4W,
-                                           AArch64::ST4W_IMM);
+        SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
         return;
       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-        SelectPredicatedStore</*Scale=*/3>(Node, 4, AArch64::ST4D,
-                                           AArch64::ST4D_IMM);
+        SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
         return;
       }
       break;
@@ -4741,51 +4736,51 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
   }
   case AArch64ISD::SVE_LD2_MERGE_ZERO: {
     if (VT == MVT::nxv16i8) {
-      SelectPredicatedLoad(Node, 2, AArch64::LD2B_IMM);
+      SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
       return;
     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
                (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
-      SelectPredicatedLoad(Node, 2, AArch64::LD2H_IMM);
+      SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
       return;
     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-      SelectPredicatedLoad(Node, 2, AArch64::LD2W_IMM);
+      SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
       return;
     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-      SelectPredicatedLoad(Node, 2, AArch64::LD2D_IMM);
+      SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
       return;
     }
     break;
   }
   case AArch64ISD::SVE_LD3_MERGE_ZERO: {
     if (VT == MVT::nxv16i8) {
-      SelectPredicatedLoad(Node, 3, AArch64::LD3B_IMM);
+      SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
       return;
     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
                (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
-      SelectPredicatedLoad(Node, 3, AArch64::LD3H_IMM);
+      SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
       return;
     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-      SelectPredicatedLoad(Node, 3, AArch64::LD3W_IMM);
+      SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
       return;
     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-      SelectPredicatedLoad(Node, 3, AArch64::LD3D_IMM);
+      SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
       return;
     }
     break;
   }
   case AArch64ISD::SVE_LD4_MERGE_ZERO: {
     if (VT == MVT::nxv16i8) {
-      SelectPredicatedLoad(Node, 4, AArch64::LD4B_IMM);
+      SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
       return;
     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
                (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
-      SelectPredicatedLoad(Node, 4, AArch64::LD4H_IMM);
+      SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
       return;
     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-      SelectPredicatedLoad(Node, 4, AArch64::LD4W_IMM);
+      SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
       return;
     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-      SelectPredicatedLoad(Node, 4, AArch64::LD4D_IMM);
+      SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
       return;
     }
     break;
@@ -4805,10 +4800,14 @@ FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
 
 /// When \p PredVT is a scalable vector predicate in the form
 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of
-/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. If the input
+/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
+/// structured vectors (NumVec >1), the output data type is
+/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
 /// EVT.
-static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT) {
+static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
+                                                unsigned NumVec) {
+  assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
   if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
     return EVT();
 
@@ -4818,7 +4817,8 @@ static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT) {
 
   ElementCount EC = PredVT.getVectorElementCount();
   EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.Min);
-  EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC);
+  EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
+
   return MemVT;
 }
 
@@ -4842,6 +4842,15 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
     return cast<VTSDNode>(Root->getOperand(3))->getVT();
   case AArch64ISD::ST1_PRED:
     return cast<VTSDNode>(Root->getOperand(4))->getVT();
+  case AArch64ISD::SVE_LD2_MERGE_ZERO:
+    return getPackedVectorTypeFromPredicateType(
+        Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
+  case AArch64ISD::SVE_LD3_MERGE_ZERO:
+    return getPackedVectorTypeFromPredicateType(
+        Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
+  case AArch64ISD::SVE_LD4_MERGE_ZERO:
+    return getPackedVectorTypeFromPredicateType(
+        Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
   default:
     break;
   }
@@ -4857,7 +4866,7 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
   // We are using an SVE prefetch intrinsic. Type must be inferred
   // from the width of the predicate.
   return getPackedVectorTypeFromPredicateType(
-      Ctx, Root->getOperand(2)->getValueType(0));
+      Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
 }
 
 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 85db14ab66fe..1500da2fdfc7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -932,8 +932,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
         setOperationAction(ISD::SHL, VT, Custom);
         setOperationAction(ISD::SRL, VT, Custom);
         setOperationAction(ISD::SRA, VT, Custom);
-        if (VT.getScalarType() == MVT::i1)
+        if (VT.getScalarType() == MVT::i1) {
           setOperationAction(ISD::SETCC, VT, Custom);
+          setOperationAction(ISD::TRUNCATE, VT, Custom);
+          setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
+        }
       }
     }
 
@@ -8858,6 +8861,16 @@ SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
                                              SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
 
+  if (VT.getScalarType() == MVT::i1) {
+    // Lower i1 truncate to `(x & 1) != 0`.
+    SDLoc dl(Op);
+    EVT OpVT = Op.getOperand(0).getValueType();
+    SDValue Zero = DAG.getConstant(0, dl, OpVT);
+    SDValue One = DAG.getConstant(1, dl, OpVT);
+    SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
+    return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
+  }
+
   if (!VT.isVector() || VT.isScalableVector())
     return Op;
 
@@ -12288,6 +12301,9 @@ static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) {
                 "Unsupported opcode.");
   SDLoc DL(N);
   EVT VT = N->getValueType(0);
*** 1496 LINES SKIPPED ***