svn commit: r338000 - in vendor/llvm/dist-release_70: cmake/modules docs lib/Analysis lib/CodeGen/SelectionDAG lib/Target/PowerPC lib/Target/X86 lib/Transforms/Vectorize test/Analysis/BasicAA test/...

Sat Aug 18 08:26:52 UTC 2018

Author: dim
Date: Sat Aug 18 08:26:46 2018
New Revision: 338000
URL: https://svnweb.freebsd.org/changeset/base/338000

Log:
  Vendor import of llvm release_70 branch r339999:
  https://llvm.org/svn/llvm-project/llvm/branches/release_70@339999

Added:
  vendor/llvm/dist-release_70/test/Analysis/BasicAA/tail-byval.ll
  vendor/llvm/dist-release_70/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll
  vendor/llvm/dist-release_70/test/CodeGen/ARM/inlineasm-operand-implicit-cast.ll
  vendor/llvm/dist-release_70/test/CodeGen/PowerPC/pr38087.ll
  vendor/llvm/dist-release_70/test/CodeGen/X86/pr38533.ll
  vendor/llvm/dist-release_70/test/CodeGen/X86/pr38539.ll
  vendor/llvm/dist-release_70/test/Transforms/DeadStoreElimination/tail-byval.ll
Deleted:
  vendor/llvm/dist-release_70/test/CodeGen/ARM/inline-asm-operand-implicit-cast.ll
Modified:
  vendor/llvm/dist-release_70/cmake/modules/HandleLLVMOptions.cmake
  vendor/llvm/dist-release_70/docs/ReleaseNotes.rst
  vendor/llvm/dist-release_70/lib/Analysis/BasicAliasAnalysis.cpp
  vendor/llvm/dist-release_70/lib/Analysis/InstructionSimplify.cpp
  vendor/llvm/dist-release_70/lib/Analysis/MemorySSA.cpp
  vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
  vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
  vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  vendor/llvm/dist-release_70/lib/Target/PowerPC/PPCISelLowering.cpp
  vendor/llvm/dist-release_70/lib/Target/X86/X86FlagsCopyLowering.cpp
  vendor/llvm/dist-release_70/lib/Transforms/Vectorize/SLPVectorizer.cpp
  vendor/llvm/dist-release_70/test/BugPoint/compile-custom.ll
  vendor/llvm/dist-release_70/test/BugPoint/unsymbolized.ll
  vendor/llvm/dist-release_70/test/CodeGen/X86/flags-copy-lowering.mir
  vendor/llvm/dist-release_70/test/Other/opt-bisect-legacy-pass-manager.ll
  vendor/llvm/dist-release_70/test/TableGen/JSON.td
  vendor/llvm/dist-release_70/test/ThinLTO/X86/cache.ll
  vendor/llvm/dist-release_70/test/Transforms/EarlyCSE/memoryssa.ll
  vendor/llvm/dist-release_70/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
  vendor/llvm/dist-release_70/test/tools/gold/X86/common.ll
  vendor/llvm/dist-release_70/test/tools/gold/X86/v1.16/wrap-1.ll
  vendor/llvm/dist-release_70/test/tools/gold/X86/v1.16/wrap-2.ll
  vendor/llvm/dist-release_70/test/tools/llvm-cov/showLineExecutionCounts.cpp
  vendor/llvm/dist-release_70/test/tools/llvm-objcopy/auto-remove-shndx.test
  vendor/llvm/dist-release_70/test/tools/llvm-objcopy/many-sections.test
  vendor/llvm/dist-release_70/test/tools/llvm-objcopy/remove-shndx.test
  vendor/llvm/dist-release_70/test/tools/llvm-objcopy/strict-no-add.test
  vendor/llvm/dist-release_70/test/tools/llvm-symbolizer/pdb/pdb.test
  vendor/llvm/dist-release_70/test/tools/llvm-symbolizer/ppc64.test
  vendor/llvm/dist-release_70/unittests/Analysis/MemorySSA.cpp
  vendor/llvm/dist-release_70/utils/lit/lit/Test.py
  vendor/llvm/dist-release_70/utils/lit/lit/llvm/config.py
  vendor/llvm/dist-release_70/utils/lit/tests/Inputs/shtest-env/lit.cfg
  vendor/llvm/dist-release_70/utils/lit/tests/Inputs/shtest-format/external_shell/fail_with_bad_encoding.txt
  vendor/llvm/dist-release_70/utils/lit/tests/Inputs/shtest-format/lit.cfg
  vendor/llvm/dist-release_70/utils/lit/tests/Inputs/shtest-shell/dev-null.txt
  vendor/llvm/dist-release_70/utils/lit/tests/Inputs/shtest-shell/lit.cfg
  vendor/llvm/dist-release_70/utils/lit/tests/Inputs/shtest-shell/redirects.txt
  vendor/llvm/dist-release_70/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt
  vendor/llvm/dist-release_70/utils/lit/tests/Inputs/shtest-timeout/lit.cfg
  vendor/llvm/dist-release_70/utils/lit/tests/lit.cfg

Modified: vendor/llvm/dist-release_70/cmake/modules/HandleLLVMOptions.cmake
==============================================================================

--- vendor/llvm/dist-release_70/cmake/modules/HandleLLVMOptions.cmake	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/cmake/modules/HandleLLVMOptions.cmake	Sat Aug 18 08:26:46 2018	(r338000)
@@ -149,6 +149,7 @@ endif()
 # is unloaded.
 if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
   set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,nodelete")
+  set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-z,nodelete")
 endif()
 
 

Modified: vendor/llvm/dist-release_70/docs/ReleaseNotes.rst
==============================================================================
--- vendor/llvm/dist-release_70/docs/ReleaseNotes.rst	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/docs/ReleaseNotes.rst	Sat Aug 18 08:26:46 2018	(r338000)
@@ -121,6 +121,16 @@ Non-comprehensive list of changes in this release
   not to be compliant, and higher optimization levels will still emit some
   information in v4 format.
 
+* Added support for the ``.rva`` assembler directive for COFF targets.
+
+* The :program:`llvm-rc` tool (Windows Resource Compiler) has been improved
+  a bit. There are still known missing features, but it is generally usable
+  in many cases. (The tool still doesn't preprocess input files automatically,
+  but it can now handle leftover C declarations in preprocessor output, if
+  given output from a preprocessor run externally.)
+
+* CodeView debug info can now be emitted MinGW configurations, if requested.
+
 * Note..
 
 .. NOTE
@@ -144,12 +154,36 @@ Changes to the LLVM IR
 
 * invariant.group metadata can now refer only empty metadata nodes.
 
-Changes to the ARM Backend
---------------------------
+Changes to the AArch64 Target
+-----------------------------
 
- During this release ...
+* The ``.inst`` assembler directive is now usable on both COFF and Mach-O
+  targets, in addition to ELF.
 
+* Support for most remaining COFF relocations have been added.
 
+* Support for TLS on Windows has been added.
+
+Changes to the ARM Target
+-------------------------
+
+* The ``.inst`` assembler directive is now usable on both COFF and Mach-O
+  targets, in addition to ELF. For Thumb, it can now also automatically
+  deduce the instruction size, without having to specify it with
+  e.g. ``.inst.w`` as before.
+
+Changes to the Hexagon Target
+-----------------------------
+
+* Hexagon now supports auto-vectorization for HVX. It is disabled by default
+  and can be turned on with ``-fvectorize``. For auto-vectorization to take
+  effect, code generation for HVX needs to be enabled with ``-mhvx``.
+  The complete set of options should include ``-fvectorize``, ``-mhvx``,
+  and ``-mhvx-length={64b|128b}``.
+
+* The support for Hexagon ISA V4 is deprecated and will be removed in the
+  next release.
+
 Changes to the MIPS Target
 --------------------------
 
@@ -184,7 +218,13 @@ During this release the SystemZ target has:
 Changes to the X86 Target
 -------------------------
 
- During this release ...
+* The calling convention for the ``f80`` data type on MinGW targets has been
+  fixed. Normally, the calling convention for this type is handled within clang,
+  but if an intrinsic is used, which LLVM expands into a libcall, the
+  proper calling convention needs to be supported in LLVM as well. (Note,
+  on Windows, this data type is only used for long doubles in MinGW
+  environments - in MSVC environments, long doubles are the same size as
+  normal doubles.)
 
 Changes to the AMDGPU Target
 -----------------------------

Modified: vendor/llvm/dist-release_70/lib/Analysis/BasicAliasAnalysis.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/Analysis/BasicAliasAnalysis.cpp	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/lib/Analysis/BasicAliasAnalysis.cpp	Sat Aug 18 08:26:46 2018	(r338000)
@@ -801,14 +801,15 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallS
 
   const Value *Object = GetUnderlyingObject(Loc.Ptr, DL);
 
-  // If this is a tail call and Loc.Ptr points to a stack location, we know that
-  // the tail call cannot access or modify the local stack.
-  // We cannot exclude byval arguments here; these belong to the caller of
-  // the current function not to the current function, and a tail callee
-  // may reference them.
+  // Calls marked 'tail' cannot read or write allocas from the current frame
+  // because the current frame might be destroyed by the time they run. However,
+  // a tail call may use an alloca with byval. Calling with byval copies the
+  // contents of the alloca into argument registers or stack slots, so there is
+  // no lifetime issue.
   if (isa<AllocaInst>(Object))
     if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
-      if (CI->isTailCall())
+      if (CI->isTailCall() &&
+          !CI->getAttributes().hasAttrSomewhere(Attribute::ByVal))
         return ModRefInfo::NoModRef;
 
   // If the pointer is to a locally allocated object that does not escape,

Modified: vendor/llvm/dist-release_70/lib/Analysis/InstructionSimplify.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/Analysis/InstructionSimplify.cpp	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/lib/Analysis/InstructionSimplify.cpp	Sat Aug 18 08:26:46 2018	(r338000)
@@ -1338,7 +1338,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1,
     const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
     const unsigned Width = Op0->getType()->getScalarSizeInBits();
     const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
-    if (EffWidthY <= ShRAmt->getZExtValue())
+    if (ShRAmt->uge(EffWidthY))
       return X;
   }
 
@@ -1878,9 +1878,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, 
       match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)),
                                      m_Value(XShifted)),
                         m_Value(Y)))) {
-    const unsigned ShftCnt = ShAmt->getZExtValue();
-    const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
     const unsigned Width = Op0->getType()->getScalarSizeInBits();
+    const unsigned ShftCnt = ShAmt->getLimitedValue(Width);
+    const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
     const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
     if (EffWidthY <= ShftCnt) {
       const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI,

Modified: vendor/llvm/dist-release_70/lib/Analysis/MemorySSA.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/Analysis/MemorySSA.cpp	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/lib/Analysis/MemorySSA.cpp	Sat Aug 18 08:26:46 2018	(r338000)
@@ -258,13 +258,18 @@ static ClobberAlias instructionClobbersQuery(MemoryDef
 
   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
     // These intrinsics will show up as affecting memory, but they are just
-    // markers.
+    // markers, mostly.
+    //
+    // FIXME: We probably don't actually want MemorySSA to model these at all
+    // (including creating MemoryAccesses for them): we just end up inventing
+    // clobbers where they don't really exist at all. Please see D43269 for
+    // context.
     switch (II->getIntrinsicID()) {
     case Intrinsic::lifetime_start:
       if (UseCS)
         return {false, NoAlias};
       AR = AA.alias(MemoryLocation(II->getArgOperand(1)), UseLoc);
-      return {AR == MustAlias, AR};
+      return {AR != NoAlias, AR};
     case Intrinsic::lifetime_end:
     case Intrinsic::invariant_start:
     case Intrinsic::invariant_end:

Modified: vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp	Sat Aug 18 08:26:46 2018	(r338000)
@@ -1778,15 +1778,16 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNod
   SDValue Op = N->getOperand(0);
   EVT OpVT = Op->getValueType(0);
 
-  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
-  assert (IVT == N->getValueType(0) && "Bitcast to type of different size");
-
   SDValue Promoted = GetPromotedFloat(N->getOperand(0));
   EVT PromotedVT = Promoted->getValueType(0);
 
   // Convert the promoted float value to the desired IVT.
-  return DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N), IVT,
-                     Promoted);
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+  SDValue Convert = DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N),
+                                IVT, Promoted);
+  // The final result type might not be an scalar so we need a bitcast. The
+  // bitcast will be further legalized if needed.
+  return DAG.getBitcast(N->getValueType(0), Convert);
 }
 
 // Promote Operand 1 of FCOPYSIGN.  Operand 0 ought to be handled by
@@ -1941,8 +1942,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, u
 SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) {
   EVT VT = N->getValueType(0);
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
-  return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT,
-                     N->getOperand(0));
+  // Input type isn't guaranteed to be a scalar int so bitcast if not. The
+  // bitcast will be legalized further if necessary.
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(),
+                              N->getOperand(0).getValueType().getSizeInBits());
+  SDValue Cast = DAG.getBitcast(IVT, N->getOperand(0));
+  return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, Cast);
 }
 
 SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) {

Modified: vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp	Sat Aug 18 08:26:46 2018	(r338000)
@@ -269,8 +269,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode
     return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
   case TargetLowering::TypePromoteFloat: {
     // Convert the promoted float by hand.
-    SDValue PromotedOp = GetPromotedFloat(InOp);
-    return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp);
+    if (!NOutVT.isVector())
+      return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, GetPromotedFloat(InOp));
     break;
   }
   case TargetLowering::TypeExpandInteger:

Modified: vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/SelectionDAG.cpp	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/SelectionDAG.cpp	Sat Aug 18 08:26:46 2018	(r338000)
@@ -2374,7 +2374,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownB
     if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
       // Offset the demanded elts by the subvector index.
       uint64_t Idx = SubIdx->getZExtValue();
-      APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
+      APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
       computeKnownBits(Src, Known, DemandedSrc, Depth + 1);
     } else {
       computeKnownBits(Src, Known, Depth + 1);
@@ -3533,7 +3533,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, 
     if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
       // Offset the demanded elts by the subvector index.
       uint64_t Idx = SubIdx->getZExtValue();
-      APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
+      APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
       return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
     }
     return ComputeNumSignBits(Src, Depth + 1);

Modified: vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp	Sat Aug 18 08:26:46 2018	(r338000)
@@ -7768,10 +7768,29 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCall
     SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
                                              Chain, &Flag, CS.getInstruction());
 
-    // FIXME: Why don't we do this for inline asms with MRVs?
-    if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
-      EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType());
-
+    llvm::Type *CSResultType = CS.getType();
+    unsigned numRet;
+    ArrayRef<Type *> ResultTypes;
+    SmallVector<SDValue, 1> ResultValues(1);
+    if (CSResultType->isSingleValueType()) {
+      numRet = 1;
+      ResultValues[0] = Val;
+      ResultTypes = makeArrayRef(CSResultType);
+    } else {
+      numRet = CSResultType->getNumContainedTypes();
+      assert(Val->getNumOperands() == numRet &&
+             "Mismatch in number of output operands in asm result");
+      ResultTypes = CSResultType->subtypes();
+      ArrayRef<SDUse> ValueUses = Val->ops();
+      ResultValues.resize(numRet);
+      std::transform(ValueUses.begin(), ValueUses.end(), ResultValues.begin(),
+                     [](const SDUse &u) -> SDValue { return u.get(); });
+    }
+    SmallVector<EVT, 1> ResultVTs(numRet);
+    for (unsigned i = 0; i < numRet; i++) {
+      EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), ResultTypes[i]);
+      SDValue Val = ResultValues[i];
+      assert(ResultTypes[i]->isSized() && "Unexpected unsized type");
       // If the type of the inline asm call site return value is different but
       // has same size as the type of the asm output bitcast it.  One example
       // of this is for vectors with different width / number of elements.
@@ -7782,22 +7801,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCall
       // This can also happen for a return value that disagrees with the
       // register class it is put in, eg. a double in a general-purpose
       // register on a 32-bit machine.
-      if (ResultType != Val.getValueType() &&
-          ResultType.getSizeInBits() == Val.getValueSizeInBits()) {
-        Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(),
-                          ResultType, Val);
-
-      } else if (ResultType != Val.getValueType() &&
-                 ResultType.isInteger() && Val.getValueType().isInteger()) {
-        // If a result value was tied to an input value, the computed result may
-        // have a wider width than the expected result.  Extract the relevant
-        // portion.
-        Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val);
+      if (ResultVT != Val.getValueType() &&
+          ResultVT.getSizeInBits() == Val.getValueSizeInBits())
+        Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, Val);
+      else if (ResultVT != Val.getValueType() && ResultVT.isInteger() &&
+               Val.getValueType().isInteger()) {
+        // If a result value was tied to an input value, the computed result
+        // may have a wider width than the expected result.  Extract the
+        // relevant portion.
+        Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, Val);
       }
 
-      assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+      assert(ResultVT == Val.getValueType() && "Asm result value mismatch!");
+      ResultVTs[i] = ResultVT;
+      ResultValues[i] = Val;
     }
 
+    Val = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+                      DAG.getVTList(ResultVTs), ResultValues);
     setValue(CS.getInstruction(), Val);
     // Don't need to use this as a chain in this case.
     if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())

Modified: vendor/llvm/dist-release_70/lib/Target/PowerPC/PPCISelLowering.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/Target/PowerPC/PPCISelLowering.cpp	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/lib/Target/PowerPC/PPCISelLowering.cpp	Sat Aug 18 08:26:46 2018	(r338000)
@@ -12007,10 +12007,15 @@ static SDValue combineBVOfVecSExt(SDNode *N, Selection
   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
     if (!Op)
       return false;
-    if (Op.getOpcode() != ISD::SIGN_EXTEND)
+    if (Op.getOpcode() != ISD::SIGN_EXTEND &&
+        Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
       return false;
 
+    // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
+    // of the right width.
     SDValue Extract = Op.getOperand(0);
+    if (Extract.getOpcode() == ISD::ANY_EXTEND)
+      Extract = Extract.getOperand(0);
     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
       return false;
 
@@ -12098,8 +12103,10 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNod
     return Reduced;
 
   // If we're building a vector out of extended elements from another vector
-  // we have P9 vector integer extend instructions.
-  if (Subtarget.hasP9Altivec()) {
+  // we have P9 vector integer extend instructions. The code assumes legal
+  // input types (i.e. it can't handle things like v4i16) so do not run before
+  // legalization.
+  if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
     Reduced = combineBVOfVecSExt(N, DAG);
     if (Reduced)
       return Reduced;

Modified: vendor/llvm/dist-release_70/lib/Target/X86/X86FlagsCopyLowering.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/Target/X86/X86FlagsCopyLowering.cpp	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/lib/Target/X86/X86FlagsCopyLowering.cpp	Sat Aug 18 08:26:46 2018	(r338000)
@@ -97,6 +97,7 @@ class X86FlagsCopyLoweringPass : public MachineFunctio
 
 private:
   MachineRegisterInfo *MRI;
+  const X86Subtarget *Subtarget;
   const X86InstrInfo *TII;
   const TargetRegisterInfo *TRI;
   const TargetRegisterClass *PromoteRC;
@@ -346,10 +347,10 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(Ma
   LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
                     << " **********\n");
 
-  auto &Subtarget = MF.getSubtarget<X86Subtarget>();
+  Subtarget = &MF.getSubtarget<X86Subtarget>();
   MRI = &MF.getRegInfo();
-  TII = Subtarget.getInstrInfo();
-  TRI = Subtarget.getRegisterInfo();
+  TII = Subtarget->getInstrInfo();
+  TRI = Subtarget->getRegisterInfo();
   MDT = &getAnalysis<MachineDominatorTree>();
   PromoteRC = &X86::GR8RegClass;
 
@@ -960,10 +961,14 @@ void X86FlagsCopyLoweringPass::rewriteSetCarryExtended
           .addReg(Reg)
           .addImm(SubRegIdx[OrigRegSize]);
     } else if (OrigRegSize > TargetRegSize) {
-      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG),
+      if (TargetRegSize == 1 && !Subtarget->is64Bit()) {
+        // Need to constrain the register class.
+        MRI->constrainRegClass(Reg, &X86::GR32_ABCDRegClass);
+      }
+
+      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY),
               NewReg)
-          .addReg(Reg)
-          .addImm(SubRegIdx[TargetRegSize]);
+          .addReg(Reg, 0, SubRegIdx[TargetRegSize]);
     } else {
       BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
           .addReg(Reg);

Modified: vendor/llvm/dist-release_70/lib/Transforms/Vectorize/SLPVectorizer.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/Transforms/Vectorize/SLPVectorizer.cpp	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/lib/Transforms/Vectorize/SLPVectorizer.cpp	Sat Aug 18 08:26:46 2018	(r338000)
@@ -3109,14 +3109,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         }
         if (NeedToShuffleReuses) {
           // TODO: Merge this shuffle with the ReorderShuffleMask.
-          if (!E->ReorderIndices.empty())
+          if (E->ReorderIndices.empty())
             Builder.SetInsertPoint(VL0);
-          else if (auto *I = dyn_cast<Instruction>(V))
-            Builder.SetInsertPoint(I->getParent(),
-                                   std::next(I->getIterator()));
-          else
-            Builder.SetInsertPoint(&F->getEntryBlock(),
-                                   F->getEntryBlock().getFirstInsertionPt());
           V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
                                           E->ReuseShuffleIndices, "shuffle");
         }

Added: vendor/llvm/dist-release_70/test/Analysis/BasicAA/tail-byval.ll
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ vendor/llvm/dist-release_70/test/Analysis/BasicAA/tail-byval.ll	Sat Aug 18 08:26:46 2018	(r338000)
@@ -0,0 +1,15 @@
+; RUN: opt -basicaa -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
+
+declare void @takebyval(i32* byval %p)
+
+define i32 @tailbyval() {
+entry:
+  %p = alloca i32
+  store i32 42, i32* %p
+  tail call void @takebyval(i32* byval %p)
+  %rv = load i32, i32* %p
+  ret i32 %rv
+}
+; FIXME: This should be Just Ref.
+; CHECK-LABEL: Function: tailbyval: 1 pointers, 1 call sites
+; CHECK-NEXT:   Both ModRef:  Ptr: i32* %p       <->  tail call void @takebyval(i32* byval %p)

Modified: vendor/llvm/dist-release_70/test/BugPoint/compile-custom.ll
==============================================================================
--- vendor/llvm/dist-release_70/test/BugPoint/compile-custom.ll	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/test/BugPoint/compile-custom.ll	Sat Aug 18 08:26:46 2018	(r338000)
@@ -1,4 +1,4 @@
-; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext --compile-custom --compile-command="%python %/s.py arg1 arg2" --opt-command opt --output-prefix %t %s | FileCheck %s
+; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext --compile-custom --compile-command="%/s.py arg1 arg2" --opt-command opt --output-prefix %t %s | FileCheck %s
 ; REQUIRES: loadable_module
 
 ; Test that arguments are correctly passed in --compile-command.  The output

Modified: vendor/llvm/dist-release_70/test/BugPoint/unsymbolized.ll
==============================================================================
--- vendor/llvm/dist-release_70/test/BugPoint/unsymbolized.ll	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/test/BugPoint/unsymbolized.ll	Sat Aug 18 08:26:46 2018	(r338000)
@@ -2,7 +2,7 @@
 ; RUN: echo "import sys" > %t.py
 ; RUN: echo "print('args = ' + str(sys.argv))" >> %t.py
 ; RUN: echo "exit(1)" >> %t.py
-; RUN: not bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -opt-command="%python" -opt-args %t.py | FileCheck %s
+; RUN: not bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -opt-command=%python -opt-args %t.py | FileCheck %s
 ; RUN: not --crash opt -load %llvmshlibdir/BugpointPasses%shlibext %s -bugpoint-crashcalls -disable-symbolication 2>&1 | FileCheck --check-prefix=CRASH %s
 ; RUN: not bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -opt-command=%t.non.existent.opt.binary -opt-args %t.py 2>&1 | FileCheck %s --check-prefix=BAD-OPT
 

Added: vendor/llvm/dist-release_70/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ vendor/llvm/dist-release_70/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll	Sat Aug 18 08:26:46 2018	(r338000)
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s
+
+; Test for ICE in SelectionDAG::computeKnownBits when visiting EXTRACT_SUBVECTOR
+; with DemandedElts already as wide as the source vector.
+
+define <3 x i32> @quux() #0 {
+; CHECK-LABEL: quux:
+; CHECK:       ; %bb.0: ; %bb
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 1
+; CHECK-NEXT:    v_mov_b32_e32 v2, 1
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  %tmp = shufflevector <4 x i8> <i8 1, i8 2, i8 3, i8 4>, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %tmp1 = extractelement <3 x i8> %tmp, i64 0
+  %tmp2 = zext i8 %tmp1 to i32
+  %tmp3 = insertelement <3 x i32> undef, i32 %tmp2, i32 0
+  %tmp4 = extractelement <3 x i8> %tmp, i64 1
+  %tmp5 = zext i8 %tmp4 to i32
+  %tmp6 = insertelement <3 x i32> %tmp3, i32 %tmp5, i32 1
+  %tmp7 = extractelement <3 x i8> %tmp, i64 2
+  %tmp8 = zext i8 %tmp7 to i32
+  %tmp9 = insertelement <3 x i32> %tmp6, i32 %tmp8, i32 2
+  %tmp10 = lshr <3 x i32> %tmp9, <i32 1, i32 1, i32 1>
+  ret <3 x i32> %tmp10
+}
+
+attributes #0 = { noinline optnone }

Added: vendor/llvm/dist-release_70/test/CodeGen/ARM/inlineasm-operand-implicit-cast.ll
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ vendor/llvm/dist-release_70/test/CodeGen/ARM/inlineasm-operand-implicit-cast.ll	Sat Aug 18 08:26:46 2018	(r338000)
@@ -0,0 +1,307 @@
+; RUN: llc -mtriple armv7-arm-linux-gnueabihf -O2 -mcpu=cortex-a7 < %s | FileCheck %s
+
+%struct.twofloat = type { float, float }
+%struct.twodouble = type { double, double }
+
+; Check support for returning a float in GPR with soft float ABI
+define arm_aapcscc float @zerobits_float_soft() #0 {
+; CHECK-LABEL: zerobits_float_soft
+; CHECK: mov r0, #0
+  %1 = tail call float asm "mov ${0}, #0", "=&r"()
+  ret float %1
+}
+
+; Check support for returning a double in GPR with soft float ABI
+define arm_aapcscc double @zerobits_double_soft() #0 {
+; CHECK-LABEL: zerobits_double_soft
+; CHECK: mov r0, #0
+; CHECK-NEXT: mov r1, #0
+  %1 = tail call double asm "mov ${0:Q}, #0\0Amov ${0:R}, #0", "=&r"()
+  ret double %1
+}
+
+; Check support for returning a float in GPR with matching float input with
+; soft float ABI
+define arm_aapcscc float @flt_gpr_matching_in_op_soft(float %f) #0 {
+; CHECK-LABEL: flt_gpr_matching_in_op_soft
+; CHECK: mov r0, r0
+  %1 = call float asm "mov $0, $1", "=&r,0"(float %f)
+  ret float %1
+}
+
+; Check support for returning a double in GPR with matching double input with
+; soft float ABI
+define arm_aapcscc double @dbl_gpr_matching_in_op_soft(double %d) #0 {
+; CHECK-LABEL: dbl_gpr_matching_in_op_soft
+; CHECK: mov r1, r0
+  %1 = call double asm "mov ${0:R}, ${1:Q}", "=&r,0"(double %d)
+  ret double %1
+}
+
+; Check support for returning a float in specific GPR with matching float input
+; with soft float ABI
+define arm_aapcscc float @flt_gpr_matching_spec_reg_in_op_soft(float %f) #0 {
+; CHECK-LABEL: flt_gpr_matching_spec_reg_in_op_soft
+; CHECK: mov r3, r3
+  %1 = call float asm "mov $0, $1", "=&{r3},0"(float %f)
+  ret float %1
+}
+
+; Check support for returning a double in specific GPR with matching double
+; input with soft float ABI
+define arm_aapcscc double @dbl_gpr_matching_spec_reg_in_op_soft(double %d) #0 {
+; CHECK-LABEL: dbl_gpr_matching_spec_reg_in_op_soft
+; CHECK: mov r3, r2
+  %1 = call double asm "mov ${0:R}, ${1:Q}", "=&{r2},0"(double %d)
+  ret double %1
+}
+
+; Check support for returning several float in GPR
+define arm_aapcscc float @zerobits_float_convoluted_soft() #0 {
+; CHECK-LABEL: zerobits_float_convoluted_soft
+; CHECK: mov r0, #0
+; CHECK-NEXT: mov r1, #0
+  %1 = call { float, float } asm "mov $0, #0; mov $1, #0", "=r,=r"()
+  %asmresult = extractvalue { float, float } %1, 0
+  %asmresult1 = extractvalue { float, float } %1, 1
+  %add = fadd float %asmresult, %asmresult1
+  ret float %add
+}
+
+; Check support for returning several double in GPR
+define double @zerobits_double_convoluted_soft() #0 {
+; CHECK-LABEL: zerobits_double_convoluted_soft
+; CHECK: mov r0, #0
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: mov r3, #0
+  %1 = call { double, double } asm "mov ${0:Q}, #0; mov ${0:R}, #0; mov ${1:Q}, #0; mov ${1:R}, #0", "=r,=r"()
+  %asmresult = extractvalue { double, double } %1, 0
+  %asmresult1 = extractvalue { double, double } %1, 1
+  %add = fadd double %asmresult, %asmresult1
+  ret double %add
+}
+
+; Check support for returning several floats in GPRs with matching float inputs
+; with soft float ABI
+define arm_aapcscc float @flt_gprs_matching_in_op_soft(float %f1, float %f2) #0 {
+; CHECK-LABEL: flt_gprs_matching_in_op_soft
+; CHECK: mov r0, r0
+; CHECK-NEXT: mov r1, r1
+  %1 = call { float, float } asm "mov $0, $2; mov $1, $3", "=&r,=&r,0,1"(float %f1, float %f2)
+  %asmresult1 = extractvalue { float, float } %1, 0
+  %asmresult2 = extractvalue { float, float } %1, 1
+  %add = fadd float %asmresult1, %asmresult2
+  ret float %add
+}
+
+; Check support for returning several double in GPRs with matching double input
+; with soft float ABI
+define arm_aapcscc double @dbl_gprs_matching_in_op_soft(double %d1, double %d2) #0 {
+; CHECK-LABEL: dbl_gprs_matching_in_op_soft
+; CHECK: mov r1, r0
+; CHECK-NEXT: mov r3, r2
+  %1 = call { double, double } asm "mov ${0:R}, ${2:Q}; mov ${1:R}, ${3:Q}", "=&r,=&r,0,1"(double %d1, double %d2)
+  %asmresult1 = extractvalue { double, double } %1, 0
+  %asmresult2 = extractvalue { double, double } %1, 1
+  %add = fadd double %asmresult1, %asmresult2
+  ret double %add
+}
+
+; Check support for returning several float in specific GPRs with matching
+; float input with soft float ABI
+define arm_aapcscc float @flt_gprs_matching_spec_reg_in_op_soft(float %f1, float %f2) #0 {
+; CHECK-LABEL: flt_gprs_matching_spec_reg_in_op_soft
+; CHECK: mov r3, r3
+; CHECK-NEXT: mov r4, r4
+  %1 = call { float, float } asm "mov $0, $2; mov $1, $3", "=&{r3},=&{r4},0,1"(float %f1, float %f2)
+  %asmresult1 = extractvalue { float, float } %1, 0
+  %asmresult2 = extractvalue { float, float } %1, 1
+  %add = fadd float %asmresult1, %asmresult2
+  ret float %add
+}
+
+; Check support for returning several double in specific GPRs with matching
+; double input with soft float ABI
+define arm_aapcscc double @dbl_gprs_matching_spec_reg_in_op_soft(double %d1, double %d2) #0 {
+; CHECK-LABEL: dbl_gprs_matching_spec_reg_in_op_soft
+; CHECK: mov r3, r2
+; CHECK-NEXT: mov r5, r4
+  %1 = call { double, double } asm "mov ${0:R}, ${2:Q}; mov ${1:R}, ${3:Q}", "=&{r2},=&{r4},0,1"(double %d1, double %d2)
+  %asmresult1 = extractvalue { double, double } %1, 0
+  %asmresult2 = extractvalue { double, double } %1, 1
+  %add = fadd double %asmresult1, %asmresult2
+  ret double %add
+}
+
+attributes #0 = { nounwind "target-features"="+d16,+vfp2,+vfp3,-fp-only-sp" "use-soft-float"="true" }
+
+
+; Check support for returning a float in GPR with hard float ABI
+define float @zerobits_float_hard() #1 {
+; CHECK-LABEL: zerobits_float_hard
+; CHECK: mov r0, #0
+; CHECK: vmov s0, r0
+  %1 = tail call float asm "mov ${0}, #0", "=&r"()
+  ret float %1
+}
+
+; Check support for returning a double in GPR with hard float ABI
+define double @zerobits_double_hard() #1 {
+; CHECK-LABEL: zerobits_double_hard
+; CHECK: mov r0, #0
+; CHECK-NEXT: mov r1, #0
+; CHECK: vmov d0, r0, r1
+  %1 = tail call double asm "mov ${0:Q}, #0\0Amov ${0:R}, #0", "=&r"()
+  ret double %1
+}
+
+; Check support for returning a float in GPR with matching float input with
+; hard float ABI
+define float @flt_gpr_matching_in_op_hard(float %f) #1 {
+; CHECK-LABEL: flt_gpr_matching_in_op_hard
+; CHECK: vmov r0, s0
+; CHECK: mov r0, r0
+; CHECK: vmov s0, r0
+  %1 = call float asm "mov $0, $1", "=&r,0"(float %f)
+  ret float %1
+}
+
+; Check support for returning a double in GPR with matching double input with
+; hard float ABI
+define double @dbl_gpr_matching_in_op_hard(double %d) #1 {
+; CHECK-LABEL: dbl_gpr_matching_in_op_hard
+; CHECK: vmov r0, r1, d0
+; CHECK: mov r1, r0
+; CHECK: vmov d0, r0, r1
+  %1 = call double asm "mov ${0:R}, ${1:Q}", "=&r,0"(double %d)
+  ret double %1
+}
+
+; Check support for returning a float in specific GPR with matching float
+; input with hard float ABI
+define float @flt_gpr_matching_spec_reg_in_op_hard(float %f) #1 {
+; CHECK-LABEL: flt_gpr_matching_spec_reg_in_op_hard
+; CHECK: vmov r3, s0
+; CHECK: mov r3, r3
+; CHECK: vmov s0, r3
+  %1 = call float asm "mov $0, $1", "=&{r3},0"(float %f)
+  ret float %1
+}
+
+; Check support for returning a double in specific GPR with matching double
+; input with hard float ABI
+define double @dbl_gpr_matching_spec_reg_in_op_hard(double %d) #1 {
+; CHECK-LABEL: dbl_gpr_matching_spec_reg_in_op_hard
+; CHECK: vmov r2, r3, d0
+; CHECK: mov r3, r2
+; CHECK: vmov d0, r2, r3
+  %1 = call double asm "mov ${0:R}, ${1:Q}", "=&{r2},0"(double %d)
+  ret double %1
+}
+
+; Check support for returning several float in GPR
+define %struct.twofloat @zerobits_float_convoluted_hard() #1 {
+; CHECK-LABEL: zerobits_float_convoluted_hard
+; CHECK: mov r0, #0
+; CHECK-NEXT: mov r1, #0
+; CHECK: vmov s0, r0
+; CHECK-NEXT: vmov s1, r1
+  %1 = call { float, float } asm "mov $0, #0; mov $1, #0", "=r,=r"()
+  %asmresult1 = extractvalue { float, float } %1, 0
+  %asmresult2 = extractvalue { float, float } %1, 1
+  %partialres = insertvalue %struct.twofloat undef, float %asmresult1, 0
+  %res = insertvalue %struct.twofloat %partialres, float %asmresult2, 1
+  ret %struct.twofloat %res
+}
+
+; Check support for returning several double in GPR
+define %struct.twodouble @zerobits_double_convoluted_hard() #1 {
+; CHECK-LABEL: zerobits_double_convoluted_hard
+; CHECK: mov r0, #0
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: mov r3, #0
+; CHECK: vmov d0, r0, r1
+; CHECK-NEXT: vmov d1, r2, r3
+  %1 = call { double, double } asm "mov ${0:Q}, #0; mov ${0:R}, #0; mov ${1:Q}, #0; mov ${1:R}, #0", "=r,=r"()
+  %asmresult1 = extractvalue { double, double } %1, 0
+  %asmresult2 = extractvalue { double, double } %1, 1
+  %partialres = insertvalue %struct.twodouble undef, double %asmresult1, 0
+  %res = insertvalue %struct.twodouble %partialres, double %asmresult2, 1
+  ret %struct.twodouble %res
+}
+
+; Check support for returning several floats in GPRs with matching float inputs
+; with hard float ABI
+define %struct.twofloat @flt_gprs_matching_in_op_hard(float %f1, float %f2) #1 {
+; CHECK-LABEL: flt_gprs_matching_in_op_hard
+; CHECK: vmov r0, s0
+; CHECK-NEXT: vmov r1, s1
+; CHECK: mov r0, r0
+; CHECK-NEXT: mov r1, r1
+; CHECK: vmov s0, r0
+; CHECK-NEXT: vmov s1, r1
+  %1 = call { float, float } asm "mov $0, $2; mov $1, $3", "=&r,=&r,0,1"(float %f1, float %f2)
+  %asmresult1 = extractvalue { float, float } %1, 0
+  %asmresult2 = extractvalue { float, float } %1, 1
+  %partialres = insertvalue %struct.twofloat undef, float %asmresult1, 0
+  %res = insertvalue %struct.twofloat %partialres, float %asmresult2, 1
+  ret %struct.twofloat %res
+}
+
+; Check support for returning several double in GPRs with matching double input
+; with hard float ABI
+define %struct.twodouble @dbl_gprs_matching_in_op_hard(double %d1, double %d2) #1 {
+; CHECK-LABEL: dbl_gprs_matching_in_op_hard
+; CHECK: vmov r0, r1, d0
+; CHECK-NEXT: vmov r2, r3, d1
+; CHECK: mov r1, r0
+; CHECK-NEXT: mov r3, r2
+; CHECK: vmov d0, r0, r1
+; CHECK-NEXT: vmov d1, r2, r3
+  %1 = call { double, double } asm "mov ${0:R}, ${2:Q}; mov ${1:R}, ${3:Q}", "=&r,=&r,0,1"(double %d1, double %d2)
+  %asmresult1 = extractvalue { double, double } %1, 0
+  %asmresult2 = extractvalue { double, double } %1, 1
+  %partialres = insertvalue %struct.twodouble undef, double %asmresult1, 0
+  %res = insertvalue %struct.twodouble %partialres, double %asmresult2, 1
+  ret %struct.twodouble %res
+}
+
+; Check support for returning several float in specific GPRs with matching
+; float input with hard float ABI
+define %struct.twofloat @flt_gprs_matching_spec_reg_in_op_hard(float %f1, float %f2) #1 {
+; CHECK-LABEL: flt_gprs_matching_spec_reg_in_op_hard
+; CHECK: vmov r3, s0
+; CHECK-NEXT: vmov r4, s1
+; CHECK: mov r3, r3
+; CHECK-NEXT: mov r4, r4
+; CHECK: vmov s0, r3
+; CHECK-NEXT: vmov s1, r4
+  %1 = call { float, float } asm "mov $0, $2; mov $1, $3", "=&{r3},=&{r4},0,1"(float %f1, float %f2)
+  %asmresult1 = extractvalue { float, float } %1, 0
+  %asmresult2 = extractvalue { float, float } %1, 1
+  %partialres = insertvalue %struct.twofloat undef, float %asmresult1, 0
+  %res = insertvalue %struct.twofloat %partialres, float %asmresult2, 1
+  ret %struct.twofloat %res
+}
+
+; Check support for returning several double in specific GPRs with matching
+; double input with hard float ABI
+define %struct.twodouble @dbl_gprs_matching_spec_reg_in_op_hard(double %d1, double %d2) #1 {
+; CHECK-LABEL: dbl_gprs_matching_spec_reg_in_op_hard
+; CHECK: vmov r2, r3, d0
+; CHECK-NEXT: vmov r4, r5, d1
+; CHECK: mov r3, r2
+; CHECK-NEXT: mov r5, r4
+; CHECK: vmov d0, r2, r3
+; CHECK-NEXT: vmov d1, r4, r5
+  %1 = call { double, double } asm "mov ${0:R}, ${2:Q}; mov ${1:R}, ${3:Q}", "=&{r2},=&{r4},0,1"(double %d1, double %d2)
+  %asmresult1 = extractvalue { double, double } %1, 0
+  %asmresult2 = extractvalue { double, double } %1, 1
+  %partialres = insertvalue %struct.twodouble undef, double %asmresult1, 0
+  %res = insertvalue %struct.twodouble %partialres, double %asmresult2, 1
+  ret %struct.twodouble %res
+}
+
+attributes #1 = { nounwind "target-features"="+d16,+vfp2,+vfp3,-fp-only-sp" "use-soft-float"="false" }

Added: vendor/llvm/dist-release_70/test/CodeGen/PowerPC/pr38087.ll
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ vendor/llvm/dist-release_70/test/CodeGen/PowerPC/pr38087.ll	Sat Aug 18 08:26:46 2018	(r338000)
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \
+; RUN:   -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names < %s | \
+; RUN:   FileCheck %s
+; Function Attrs: nounwind readnone speculatable
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
+
+; Function Attrs: nounwind readnone speculatable
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #0
+
+define void @draw_llvm_vs_variant0() {
+; CHECK-LABEL: draw_llvm_vs_variant0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ldx r3, 0, r3
+; CHECK-NEXT:    mtvsrd f0, r3
+; CHECK-NEXT:    xxswapd v2, vs0
+; CHECK-NEXT:    vmrglh v2, v2, v2
+; CHECK-NEXT:    vextsh2w v2, v2
+; CHECK-NEXT:    xvcvsxwsp vs0, v2
+; CHECK-NEXT:    xxspltw vs0, vs0, 2
+; CHECK-NEXT:    xvmaddasp vs0, vs0, vs0
+; CHECK-NEXT:    stxvx vs0, 0, r3
+; CHECK-NEXT:    blr
+entry:
+  %.size = load i32, i32* undef
+  %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %.size, i32 7)
+  %1 = extractvalue { i32, i1 } %0, 0
+  %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %1, i32 0)
+  %3 = extractvalue { i32, i1 } %2, 0
+  %4 = select i1 false, i32 0, i32 %3
+  %5 = xor i1 false, true
+  %6 = sext i1 %5 to i32
+  %7 = load <4 x i16>, <4 x i16>* undef, align 2
+  %8 = extractelement <4 x i16> %7, i32 0
+  %9 = sext i16 %8 to i32
+  %10 = insertelement <4 x i32> undef, i32 %9, i32 0
+  %11 = extractelement <4 x i16> %7, i32 1
+  %12 = sext i16 %11 to i32
+  %13 = insertelement <4 x i32> %10, i32 %12, i32 1
+  %14 = extractelement <4 x i16> %7, i32 2
+  %15 = sext i16 %14 to i32
+  %16 = insertelement <4 x i32> %13, i32 %15, i32 2
+  %17 = extractelement <4 x i16> %7, i32 3
+  %18 = sext i16 %17 to i32
+  %19 = insertelement <4 x i32> %16, i32 %18, i32 3
+  %20 = sitofp <4 x i32> %19 to <4 x float>
+  %21 = insertelement <4 x i32> undef, i32 %6, i32 0
+  %22 = shufflevector <4 x i32> %21, <4 x i32> undef, <4 x i32> zeroinitializer
+  %23 = bitcast <4 x float> %20 to <4 x i32>
+  %24 = and <4 x i32> %23, %22
+  %25 = bitcast <4 x i32> %24 to <4 x float>
+  %26 = shufflevector <4 x float> %25, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %27 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %26)
+  store <4 x float> %27, <4 x float>* undef
+  ret void
+}

Modified: vendor/llvm/dist-release_70/test/CodeGen/X86/flags-copy-lowering.mir
==============================================================================
--- vendor/llvm/dist-release_70/test/CodeGen/X86/flags-copy-lowering.mir	Sat Aug 18 06:33:51 2018	(r337999)
+++ vendor/llvm/dist-release_70/test/CodeGen/X86/flags-copy-lowering.mir	Sat Aug 18 08:26:46 2018	(r338000)
@@ -545,7 +545,7 @@ body:             |
     MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %4
   ; CHECK-NOT:     $eflags =
   ; CHECK:         %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
-  ; CHECK-NEXT:    %[[ZERO_SUBREG:[^:]*]]:gr8 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_8bit
+  ; CHECK-NEXT:    %[[ZERO_SUBREG:[^:]*]]:gr8 = COPY %[[ZERO]].sub_8bit
   ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr8 = SUB8rr %[[ZERO_SUBREG]], %[[CF_REG]]
   ; CHECK-NEXT:    MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
 
@@ -554,9 +554,9 @@ body:             |
     MOV16mr $rsp, 1, $noreg, -16, $noreg, killed %5
   ; CHECK-NOT:     $eflags =
   ; CHECK:         %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
-  ; CHECK-NEXT:    %[[CF_TRUNC:[^:]*]]:gr16 = EXTRACT_SUBREG %[[CF_EXT]], %subreg.sub_16bit
+  ; CHECK-NEXT:    %[[CF_TRUNC:[^:]*]]:gr16 = COPY %[[CF_EXT]].sub_16bit
   ; CHECK-NEXT:    %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
-  ; CHECK-NEXT:    %[[ZERO_SUBREG:[^:]*]]:gr16 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_16bit
+  ; CHECK-NEXT:    %[[ZERO_SUBREG:[^:]*]]:gr16 = COPY %[[ZERO]].sub_16bit
   ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr16 = SUB16rr %[[ZERO_SUBREG]], %[[CF_TRUNC]]
   ; CHECK-NEXT:    MOV16mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
 

Added: vendor/llvm/dist-release_70/test/CodeGen/X86/pr38533.ll
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ vendor/llvm/dist-release_70/test/CodeGen/X86/pr38533.ll	Sat Aug 18 08:26:46 2018	(r338000)
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
+
+; This test makes sure that a vector that needs to be promoted that is bitcasted to fp16 is legalized correctly without causing a width mismatch.
+define void @constant_fold_vector_to_half() {
+; CHECK-LABEL: constant_fold_vector_to_half:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movw $16384, (%rax) # imm = 0x4000
+; CHECK-NEXT:    retq
+  store volatile half bitcast (<4 x i4> <i4 0, i4 0, i4 0, i4 4> to half), half* undef
+  ret void
+}
+
+; Similarly this makes sure that the opposite bitcast of the above is also legalized without crashing.
+define void @pr38533_2(half %x) {
+; SSE-LABEL: pr38533_2:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %rax
+; SSE-NEXT:    .cfi_def_cfa_offset 16
+; SSE-NEXT:    callq __gnu_f2h_ieee
+; SSE-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    movw %ax, (%rax)
+; SSE-NEXT:    popq %rax
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    retq
+;
+; AVX512-LABEL: pr38533_2:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT:    vmovd %xmm0, %eax
+; AVX512-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
+; AVX512-NEXT:    movw %ax, (%rax)
+; AVX512-NEXT:    retq
+  %a = bitcast half %x to <4 x i4>
+  store volatile <4 x i4> %a, <4 x i4>* undef
+  ret void
+}
+
+; This case is a bitcast from fp16 to a 16-bit wide legal vector type. In this case the result type is legal when the bitcast gets type legalized.
+define void @pr38533_3(half %x) {
+; SSE-LABEL: pr38533_3:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %rax
+; SSE-NEXT:    .cfi_def_cfa_offset 16
+; SSE-NEXT:    callq __gnu_f2h_ieee
+; SSE-NEXT:    movw %ax, (%rsp)
+; SSE-NEXT:    movzwl (%rsp), %eax
+; SSE-NEXT:    movw %ax, (%rax)
+; SSE-NEXT:    popq %rax
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    retq
+;
+; AVX512-LABEL: pr38533_3:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT:    vmovd %xmm0, %eax
+; AVX512-NEXT:    movw %ax, (%rax)
+; AVX512-NEXT:    retq
+  %a = bitcast half %x to <16 x i1>
+  store volatile <16 x i1> %a, <16 x i1>* undef
+  ret void
+}

Added: vendor/llvm/dist-release_70/test/CodeGen/X86/pr38539.ll
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ vendor/llvm/dist-release_70/test/CodeGen/X86/pr38539.ll	Sat Aug 18 08:26:46 2018	(r338000)
@@ -0,0 +1,314 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X86
+
+; This test is targeted at 64-bit mode. It used to crash due to the creation of an EXTRACT_SUBREG after the peephole pass had ran.
+define void @f() {
+; X64-LABEL: f:
+; X64:       # %bb.0: # %BB
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    subq $16, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 48
+; X64-NEXT:    .cfi_offset %rbx, -32
+; X64-NEXT:    .cfi_offset %r14, -24
+; X64-NEXT:    .cfi_offset %rbp, -16
+; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
+; X64-NEXT:    movq %rbx, %rcx
+; X64-NEXT:    shlq $62, %rcx
+; X64-NEXT:    sarq $62, %rcx
+; X64-NEXT:    movq (%rsp), %r14
+; X64-NEXT:    movb (%rax), %bpl
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    xorl %esi, %esi
+; X64-NEXT:    movq %r14, %rdx
+; X64-NEXT:    callq __modti3
+; X64-NEXT:    andl $3, %edx
+; X64-NEXT:    cmpq %rax, %r14
+; X64-NEXT:    sbbq %rdx, %rbx
+; X64-NEXT:    setb %sil
+; X64-NEXT:    setae %bl
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    setne %dl
+; X64-NEXT:    setne (%rax)
+; X64-NEXT:    movzbl %bpl, %eax
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    subb %sil, %cl
+; X64-NEXT:    # kill: def $eax killed $eax def $ax
+; X64-NEXT:    divb %al
+; X64-NEXT:    negb %bl
+; X64-NEXT:    cmpb %al, %al
+; X64-NEXT:    setle %al
+; X64-NEXT:    negb %al
+; X64-NEXT:    cbtw
+; X64-NEXT:    idivb %bl
+; X64-NEXT:    movsbl %ah, %eax
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    shlq $4, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    negb %dl
+; X64-NEXT:    leaq -16(%rsp,%rax), %rax
+; X64-NEXT:    movq %rax, (%rax)
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    cbtw
+; X64-NEXT:    idivb %dl
+; X64-NEXT:    movsbl %ah, %eax
+; X64-NEXT:    andb $1, %al
+; X64-NEXT:    movb %al, (%rax)
+; X64-NEXT:    addq $16, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    popq %r14
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+;
+; X86-LABEL: f:
+; X86:       # %bb.0: # %BB
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***