svn commit: r337631 - in vendor/llvm/dist-release_70: . docs lib/Analysis lib/CodeGen/SelectionDAG lib/MC/MCParser lib/Target/AMDGPU lib/Target/NVPTX lib/Transforms/Instrumentation test/CodeGen/AAr...
Dimitry Andric
dim at FreeBSD.org
Sat Aug 11 16:29:30 UTC 2018
Author: dim
Date: Sat Aug 11 16:29:25 2018
New Revision: 337631
URL: https://svnweb.freebsd.org/changeset/base/337631
Log:
Vendor import of llvm release_70 branch r339355:
https://llvm.org/svn/llvm-project/llvm/branches/release_70@339355
Added:
vendor/llvm/dist-release_70/test/CodeGen/NVPTX/load-store.ll
vendor/llvm/dist-release_70/test/Instrumentation/BoundsChecking/many-traps-2.ll
Deleted:
vendor/llvm/dist-release_70/test/MC/ELF/extra-section-flags.s
Modified:
vendor/llvm/dist-release_70/CMakeLists.txt
vendor/llvm/dist-release_70/docs/ReleaseNotes.rst
vendor/llvm/dist-release_70/lib/Analysis/InstructionSimplify.cpp
vendor/llvm/dist-release_70/lib/Analysis/ValueTracking.cpp
vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
vendor/llvm/dist-release_70/lib/MC/MCParser/ELFAsmParser.cpp
vendor/llvm/dist-release_70/lib/Target/AMDGPU/AMDGPU.td
vendor/llvm/dist-release_70/lib/Target/AMDGPU/AMDGPUFeatures.td
vendor/llvm/dist-release_70/lib/Target/AMDGPU/R600ISelLowering.cpp
vendor/llvm/dist-release_70/lib/Target/AMDGPU/R600ISelLowering.h
vendor/llvm/dist-release_70/lib/Target/AMDGPU/VOP3Instructions.td
vendor/llvm/dist-release_70/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
vendor/llvm/dist-release_70/lib/Transforms/Instrumentation/BoundsChecking.cpp
vendor/llvm/dist-release_70/test/CodeGen/AArch64/fcopysign.ll
vendor/llvm/dist-release_70/test/CodeGen/AMDGPU/kernel-args.ll
vendor/llvm/dist-release_70/test/CodeGen/AMDGPU/mad_uint24.ll
vendor/llvm/dist-release_70/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
vendor/llvm/dist-release_70/test/CodeGen/X86/masked_memop.ll
vendor/llvm/dist-release_70/test/Transforms/InstSimplify/AndOrXor.ll
vendor/llvm/dist-release_70/test/Transforms/InstSimplify/floating-point-compare.ll
vendor/llvm/dist-release_70/test/Transforms/NewGVN/pair_jumpthread.ll
Modified: vendor/llvm/dist-release_70/CMakeLists.txt
==============================================================================
--- vendor/llvm/dist-release_70/CMakeLists.txt Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/CMakeLists.txt Sat Aug 11 16:29:25 2018 (r337631)
@@ -1037,6 +1037,6 @@ if(LLVM_DISTRIBUTION_COMPONENTS)
endif()
# This allows us to deploy the Universal CRT DLLs by passing -DCMAKE_INSTALL_UCRT_LIBRARIES=ON to CMake
-if (MSVC AND CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
+if (MSVC AND CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows" AND CMAKE_INSTALL_UCRT_LIBRARIES)
include(InstallRequiredSystemLibraries)
endif()
Modified: vendor/llvm/dist-release_70/docs/ReleaseNotes.rst
==============================================================================
--- vendor/llvm/dist-release_70/docs/ReleaseNotes.rst Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/docs/ReleaseNotes.rst Sat Aug 11 16:29:25 2018 (r337631)
@@ -40,6 +40,12 @@ Non-comprehensive list of changes in this release
functionality, or simply have a lot to talk about), see the `NOTE` below
for adding a new subsection.
+* The Windows installer no longer includes a Visual Studio integration.
+ Instead, a new
+ `LLVM Compiler Toolchain Visual Studio extension <https://marketplace.visualstudio.com/items?itemName=LLVMExtensions.llvm-toolchain>`
+ is available on the Visual Studio Marketplace. The new integration includes
+ support for Visual Studio 2017.
+
* Libraries have been renamed from 7.0 to 7. This change also impacts
downstream libraries like lldb.
@@ -155,6 +161,26 @@ Changes to the PowerPC Target
During this release ...
+Changes to the SystemZ Target
+-----------------------------
+
+During this release the SystemZ target has:
+
+* Added support for vector registers in inline asm statements.
+
+* Added support for stackmaps, patchpoints, and the anyregcc
+ calling convention.
+
+* Changed the default function alignment to 16 bytes.
+
+* Improved codegen for condition code handling.
+
+* Improved instruction scheduling and microarchitecture tuning for z13/z14.
+
+* Fixed support for generating GCOV coverage data.
+
+* Fixed some codegen bugs.
+
Changes to the X86 Target
-------------------------
@@ -191,6 +217,10 @@ Changes to the DAG infrastructure
should use UADDO/ADDCARRY/USUBO/SUBCARRY instead of the deprecated opcodes.
* The SETCCE opcode has now been removed in favor of SETCCCARRY.
+
+* TableGen now supports multi-alternative pattern fragments via the PatFrags
+ class. PatFrag is now derived from PatFrags, which may require minor
+ changes to backends that directly access PatFrag members.
External Open Source Projects Using LLVM 7
==========================================
Modified: vendor/llvm/dist-release_70/lib/Analysis/InstructionSimplify.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/Analysis/InstructionSimplify.cpp Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/lib/Analysis/InstructionSimplify.cpp Sat Aug 11 16:29:25 2018 (r337631)
@@ -1863,6 +1863,40 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1,
MaxRecurse))
return V;
+ // Assuming the effective width of Y is not larger than A, i.e. all bits
+ // from X and Y are disjoint in (X << A) | Y,
+ // if the mask of this AND op covers all bits of X or Y, while it covers
+ // no bits from the other, we can bypass this AND op. E.g.,
+ // ((X << A) | Y) & Mask -> Y,
+ // if Mask = ((1 << effective_width_of(Y)) - 1)
+ // ((X << A) | Y) & Mask -> X << A,
+ // if Mask = ((1 << effective_width_of(X)) - 1) << A
+ // SimplifyDemandedBits in InstCombine can optimize the general case.
+ // This pattern aims to help other passes for a common case.
+ Value *Y, *XShifted;
+ if (match(Op1, m_APInt(Mask)) &&
+ match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)),
+ m_Value(XShifted)),
+ m_Value(Y)))) {
+ const unsigned ShftCnt = ShAmt->getZExtValue();
+ const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ const unsigned Width = Op0->getType()->getScalarSizeInBits();
+ const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
+ if (EffWidthY <= ShftCnt) {
+ const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI,
+ Q.DT);
+ const unsigned EffWidthX = Width - XKnown.countMinLeadingZeros();
+ const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY);
+ const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShftCnt;
+ // If the mask is extracting all bits from X or Y as is, we can skip
+ // this AND op.
+ if (EffBitsY.isSubsetOf(*Mask) && !EffBitsX.intersects(*Mask))
+ return Y;
+ if (EffBitsX.isSubsetOf(*Mask) && !EffBitsY.intersects(*Mask))
+ return XShifted;
+ }
+ }
+
return nullptr;
}
Modified: vendor/llvm/dist-release_70/lib/Analysis/ValueTracking.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/Analysis/ValueTracking.cpp Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/lib/Analysis/ValueTracking.cpp Sat Aug 11 16:29:25 2018 (r337631)
@@ -2817,10 +2817,13 @@ static bool cannotBeOrderedLessThanZeroImpl(const Valu
default:
break;
case Intrinsic::maxnum:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
- Depth + 1) ||
- cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
- Depth + 1);
+ return (isKnownNeverNaN(I->getOperand(0)) &&
+ cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI,
+ SignBitOnly, Depth + 1)) ||
+ (isKnownNeverNaN(I->getOperand(1)) &&
+ cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI,
+ SignBitOnly, Depth + 1));
+
case Intrinsic::minnum:
return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
Depth + 1) &&
Modified: vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Sat Aug 11 16:29:25 2018 (r337631)
@@ -1489,24 +1489,20 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *
// Get the signbit at the right position for MagAsInt.
int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit;
+ EVT ShiftVT = IntVT;
+ if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) {
+ SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit);
+ ShiftVT = MagVT;
+ }
+ if (ShiftAmount > 0) {
+ SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, ShiftVT);
+ SignBit = DAG.getNode(ISD::SRL, DL, ShiftVT, SignBit, ShiftCnst);
+ } else if (ShiftAmount < 0) {
+ SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, ShiftVT);
+ SignBit = DAG.getNode(ISD::SHL, DL, ShiftVT, SignBit, ShiftCnst);
+ }
if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) {
- if (ShiftAmount > 0) {
- SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, IntVT);
- SignBit = DAG.getNode(ISD::SRL, DL, IntVT, SignBit, ShiftCnst);
- } else if (ShiftAmount < 0) {
- SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, IntVT);
- SignBit = DAG.getNode(ISD::SHL, DL, IntVT, SignBit, ShiftCnst);
- }
SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit);
- } else if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) {
- SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit);
- if (ShiftAmount > 0) {
- SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, MagVT);
- SignBit = DAG.getNode(ISD::SRL, DL, MagVT, SignBit, ShiftCnst);
- } else if (ShiftAmount < 0) {
- SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, MagVT);
- SignBit = DAG.getNode(ISD::SHL, DL, MagVT, SignBit, ShiftCnst);
- }
}
// Store the part with the modified sign and convert back to float.
Modified: vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Sat Aug 11 16:29:25 2018 (r337631)
@@ -3641,26 +3641,43 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N)
}
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
- assert(OpNo == 3 && "Can widen only data operand of mstore");
+ assert((OpNo == 2 || OpNo == 3) &&
+ "Can widen only data or mask operand of mstore");
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
EVT MaskVT = Mask.getValueType();
SDValue StVal = MST->getValue();
- // Widen the value
- SDValue WideVal = GetWidenedVector(StVal);
SDLoc dl(N);
- // The mask should be widened as well.
- EVT WideVT = WideVal.getValueType();
- EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
- MaskVT.getVectorElementType(),
- WideVT.getVectorNumElements());
- Mask = ModifyToType(Mask, WideMaskVT, true);
+ if (OpNo == 3) {
+ // Widen the value
+ StVal = GetWidenedVector(StVal);
+ // The mask should be widened as well.
+ EVT WideVT = StVal.getValueType();
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WideVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+ } else {
+ EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT);
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+
+ EVT ValueVT = StVal.getValueType();
+ if (getTypeAction(ValueVT) == TargetLowering::TypeWidenVector)
+ StVal = GetWidenedVector(StVal);
+ else {
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
+ ValueVT.getVectorElementType(),
+ WideMaskVT.getVectorNumElements());
+ StVal = ModifyToType(StVal, WideVT);
+ }
+ }
+
assert(Mask.getValueType().getVectorNumElements() ==
- WideVal.getValueType().getVectorNumElements() &&
+ StVal.getValueType().getVectorNumElements() &&
"Mask and data vectors should have the same number of elements");
- return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
+ return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(),
Mask, MST->getMemoryVT(), MST->getMemOperand(),
false, MST->isCompressingStore());
}
Modified: vendor/llvm/dist-release_70/lib/MC/MCParser/ELFAsmParser.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/MC/MCParser/ELFAsmParser.cpp Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/lib/MC/MCParser/ELFAsmParser.cpp Sat Aug 11 16:29:25 2018 (r337631)
@@ -481,34 +481,6 @@ static bool hasPrefix(StringRef SectionName, StringRef
return SectionName.startswith(Prefix) || SectionName == Prefix.drop_back();
}
-// Return a set of section flags based on the section name that can then
-// be augmented later, otherwise return 0 if we don't have any reasonable
-// defaults.
-static unsigned defaultSectionFlags(StringRef SectionName) {
-
- if (hasPrefix(SectionName, ".rodata.cst"))
- return ELF::SHF_ALLOC | ELF::SHF_MERGE;
-
- if (hasPrefix(SectionName, ".rodata.") || SectionName == ".rodata1")
- return ELF::SHF_ALLOC;
-
- if (SectionName == ".fini" || SectionName == ".init" ||
- hasPrefix(SectionName, ".text."))
- return ELF::SHF_ALLOC | ELF::SHF_EXECINSTR;
-
- if (hasPrefix(SectionName, ".data.") || SectionName == ".data1" ||
- hasPrefix(SectionName, ".bss.") ||
- hasPrefix(SectionName, ".init_array.") ||
- hasPrefix(SectionName, ".fini_array.") ||
- hasPrefix(SectionName, ".preinit_array."))
- return ELF::SHF_ALLOC | ELF::SHF_WRITE;
-
- if (hasPrefix(SectionName, ".tdata.") || hasPrefix(SectionName, ".tbss."))
- return ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_TLS;
-
- return 0;
-}
-
bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
StringRef SectionName;
@@ -518,13 +490,27 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush,
StringRef TypeName;
int64_t Size = 0;
StringRef GroupName;
+ unsigned Flags = 0;
const MCExpr *Subsection = nullptr;
bool UseLastGroup = false;
MCSymbolELF *Associated = nullptr;
int64_t UniqueID = ~0;
- // Set the default section flags first in case no others are given.
- unsigned Flags = defaultSectionFlags(SectionName);
+ // Set the defaults first.
+ if (hasPrefix(SectionName, ".rodata.") || SectionName == ".rodata1")
+ Flags |= ELF::SHF_ALLOC;
+ else if (SectionName == ".fini" || SectionName == ".init" ||
+ hasPrefix(SectionName, ".text."))
+ Flags |= ELF::SHF_ALLOC | ELF::SHF_EXECINSTR;
+ else if (hasPrefix(SectionName, ".data.") || SectionName == ".data1" ||
+ hasPrefix(SectionName, ".bss.") ||
+ hasPrefix(SectionName, ".init_array.") ||
+ hasPrefix(SectionName, ".fini_array.") ||
+ hasPrefix(SectionName, ".preinit_array."))
+ Flags |= ELF::SHF_ALLOC | ELF::SHF_WRITE;
+ else if (hasPrefix(SectionName, ".tdata.") ||
+ hasPrefix(SectionName, ".tbss."))
+ Flags |= ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_TLS;
if (getLexer().is(AsmToken::Comma)) {
Lex();
@@ -552,12 +538,6 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush,
if (extraFlags == -1U)
return TokError("unknown flag");
-
- // If we found additional section flags on a known section then give a
- // warning.
- if (Flags && Flags != extraFlags)
- Warning(loc, "setting incorrect section attributes for " + SectionName);
-
Flags |= extraFlags;
bool Mergeable = Flags & ELF::SHF_MERGE;
Modified: vendor/llvm/dist-release_70/lib/Target/AMDGPU/AMDGPU.td
==============================================================================
--- vendor/llvm/dist-release_70/lib/Target/AMDGPU/AMDGPU.td Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/lib/Target/AMDGPU/AMDGPU.td Sat Aug 11 16:29:25 2018 (r337631)
@@ -267,15 +267,6 @@ def FeatureD16PreservesUnusedBits : SubtargetFeature<
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
-// Some instructions do not support denormals despite this flag. Using
-// fp32 denormals also causes instructions to run at the double
-// precision rate for the device.
-def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
- "FP32Denormals",
- "true",
- "Enable single precision denormal handling"
->;
-
// Denormal handling for fp64 and fp16 is controlled by the same
// config register when fp16 supported.
// TODO: Do we need a separate f16 setting when not legal?
Modified: vendor/llvm/dist-release_70/lib/Target/AMDGPU/AMDGPUFeatures.td
==============================================================================
--- vendor/llvm/dist-release_70/lib/Target/AMDGPU/AMDGPUFeatures.td Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/lib/Target/AMDGPU/AMDGPUFeatures.td Sat Aug 11 16:29:25 2018 (r337631)
@@ -19,6 +19,15 @@ def FeatureFMA : SubtargetFeature<"fmaf",
"Enable single precision FMA (not as fast as mul+add, but fused)"
>;
+// Some instructions do not support denormals despite this flag. Using
+// fp32 denormals also causes instructions to run at the double
+// precision rate for the device.
+def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
+ "FP32Denormals",
+ "true",
+ "Enable single precision denormal handling"
+>;
+
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
"localmemorysize"#Value,
"LocalMemorySize",
Modified: vendor/llvm/dist-release_70/lib/Target/AMDGPU/R600ISelLowering.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/Target/AMDGPU/R600ISelLowering.cpp Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/lib/Target/AMDGPU/R600ISelLowering.cpp Sat Aug 11 16:29:25 2018 (r337631)
@@ -903,7 +903,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(Sel
unsigned DwordOffset) const {
unsigned ByteOffset = DwordOffset * 4;
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUASI.CONSTANT_BUFFER_0);
+ AMDGPUASI.PARAM_I_ADDRESS);
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
assert(isInt<16>(ByteOffset));
@@ -1457,33 +1457,17 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, Sele
return scalarizeVectorLoad(LoadNode, DAG);
}
+ // This is still used for explicit load from addrspace(8)
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
if (ConstantBlock > -1 &&
((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
(LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
SDValue Result;
- if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
- isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
+ if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
isa<ConstantSDNode>(Ptr)) {
- SDValue Slots[4];
- for (unsigned i = 0; i < 4; i++) {
- // We want Const position encoded with the following formula :
- // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
- // const_index is Ptr computed by llvm using an alignment of 16.
- // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
- // then div by 4 at the ISel step
- SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
- Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
- }
- EVT NewVT = MVT::v4i32;
- unsigned NumElements = 4;
- if (VT.isVector()) {
- NewVT = VT;
- NumElements = VT.getVectorNumElements();
- }
- Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
+ return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
} else {
+ //TODO: Does this even work?
// non-constant ptr can't be folded, keeps it as a v4f32 load
Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
@@ -1622,7 +1606,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
}
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUASI.CONSTANT_BUFFER_0);
+ AMDGPUASI.PARAM_I_ADDRESS);
// i64 isn't a legal type, so the register type used ends up as i32, which
// isn't expected here. It attempts to create this sextload, but it ends up
@@ -1646,17 +1630,17 @@ SDValue R600TargetLowering::LowerFormalArguments(
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
unsigned PartOffset = VA.getLocMemOffset();
+ unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
SDValue Arg = DAG.getLoad(
ISD::UNINDEXED, Ext, VT, DL, Chain,
DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
PtrInfo,
- MemVT, /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
+ MemVT, Alignment, MachineMemOperand::MONonTemporal |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
- // 4 is the preferred alignment for the CONSTANT memory space.
InVals.push_back(Arg);
}
return Chain;
@@ -1804,6 +1788,52 @@ SDValue R600TargetLowering::OptimizeSwizzle(SDValue Bu
return BuildVector;
}
+SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
+ SelectionDAG &DAG) const {
+ SDLoc DL(LoadNode);
+ EVT VT = LoadNode->getValueType(0);
+ SDValue Chain = LoadNode->getChain();
+ SDValue Ptr = LoadNode->getBasePtr();
+ assert (isa<ConstantSDNode>(Ptr));
+
+ //TODO: Support smaller loads
+ if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
+ return SDValue();
+
+ if (LoadNode->getAlignment() < 4)
+ return SDValue();
+
+ int ConstantBlock = ConstantAddressBlock(Block);
+
+ SDValue Slots[4];
+ for (unsigned i = 0; i < 4; i++) {
+ // We want Const position encoded with the following formula :
+ // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
+ // const_index is Ptr computed by llvm using an alignment of 16.
+ // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
+ // then div by 4 at the ISel step
+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
+ Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
+ }
+ EVT NewVT = MVT::v4i32;
+ unsigned NumElements = 4;
+ if (VT.isVector()) {
+ NewVT = VT;
+ NumElements = VT.getVectorNumElements();
+ }
+ SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
+ if (!VT.isVector()) {
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
+ DAG.getConstant(0, DL, MVT::i32));
+ }
+ SDValue MergedValues[2] = {
+ Result,
+ Chain
+ };
+ return DAG.getMergeValues(MergedValues, DL);
+}
+
//===----------------------------------------------------------------------===//
// Custom DAG Optimizations
//===----------------------------------------------------------------------===//
@@ -2022,6 +2052,16 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *
NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
}
+
+ case ISD::LOAD: {
+ LoadSDNode *LoadNode = cast<LoadSDNode>(N);
+ SDValue Ptr = LoadNode->getBasePtr();
+ if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
+ isa<ConstantSDNode>(Ptr))
+ return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
+ break;
+ }
+
default: break;
}
Modified: vendor/llvm/dist-release_70/lib/Target/AMDGPU/R600ISelLowering.h
==============================================================================
--- vendor/llvm/dist-release_70/lib/Target/AMDGPU/R600ISelLowering.h Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/lib/Target/AMDGPU/R600ISelLowering.h Sat Aug 11 16:29:25 2018 (r337631)
@@ -98,9 +98,11 @@ class R600TargetLowering final : public AMDGPUTargetLo
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
- bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src,
- SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm,
- SelectionDAG &DAG) const;
+ bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src,
+ SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm,
+ SelectionDAG &DAG) const;
+ SDValue constBufferLoad(LoadSDNode *LoadNode, int Block,
+ SelectionDAG &DAG) const;
SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
};
Modified: vendor/llvm/dist-release_70/lib/Target/AMDGPU/VOP3Instructions.td
==============================================================================
--- vendor/llvm/dist-release_70/lib/Target/AMDGPU/VOP3Instructions.td Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/lib/Target/AMDGPU/VOP3Instructions.td Sat Aug 11 16:29:25 2018 (r337631)
@@ -461,17 +461,6 @@ def : GCNPat <
(inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
>;
-def : GCNPat<
- (i32 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))),
- (inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
->;
-
-def : GCNPat<
- (i64 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))),
- (REG_SEQUENCE VReg_64,
- (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)), sub0,
- (V_MOV_B32_e32 (i32 0)), sub1)
->;
}
defm: Ternary_i16_Pats<mul, add, V_MAD_U16, zext>;
Modified: vendor/llvm/dist-release_70/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp Sat Aug 11 16:29:25 2018 (r337631)
@@ -16,6 +16,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -81,10 +82,12 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
case ISD::LOAD:
+ case ISD::ATOMIC_LOAD:
if (tryLoad(N))
return;
break;
case ISD::STORE:
+ case ISD::ATOMIC_STORE:
if (tryStore(N))
return;
break;
@@ -834,17 +837,27 @@ static Optional<unsigned> pickOpcodeForVT(
bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
SDLoc dl(N);
- LoadSDNode *LD = cast<LoadSDNode>(N);
+ MemSDNode *LD = cast<MemSDNode>(N);
+ assert(LD->readMem() && "Expected load");
+ LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT();
SDNode *NVPTXLD = nullptr;
// do not support pre/post inc/dec
- if (LD->isIndexed())
+ if (PlainLoad && PlainLoad->isIndexed())
return false;
if (!LoadedVT.isSimple())
return false;
+ AtomicOrdering Ordering = LD->getOrdering();
+ // In order to lower atomic loads with stronger guarantees we would need to
+ // use load.acquire or insert fences. However these features were only added
+ // with PTX ISA 6.0 / sm_70.
+ // TODO: Check if we can actually use the new instructions and implement them.
+ if (isStrongerThanMonotonic(Ordering))
+ return false;
+
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
@@ -855,8 +868,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
// Volatile Setting
- // - .volatile is only availalble for .global and .shared
- bool isVolatile = LD->isVolatile();
+ // - .volatile is only available for .global and .shared
+ // - .volatile has the same memory synchronization semantics as .relaxed.sys
+ bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
@@ -882,7 +896,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
fromTypeWidth = 32;
}
- if ((LD->getExtensionType() == ISD::SEXTLOAD))
+ if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD))
fromType = NVPTX::PTXLdStInstCode::Signed;
else if (ScalarVT.isFloatingPoint())
// f16 uses .b16 as its storage type.
@@ -1691,25 +1705,38 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
SDLoc dl(N);
- StoreSDNode *ST = cast<StoreSDNode>(N);
+ MemSDNode *ST = cast<MemSDNode>(N);
+ assert(ST->writeMem() && "Expected store");
+ StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
+ AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
+ assert((PlainStore || AtomicStore) && "Expected store");
EVT StoreVT = ST->getMemoryVT();
SDNode *NVPTXST = nullptr;
// do not support pre/post inc/dec
- if (ST->isIndexed())
+ if (PlainStore && PlainStore->isIndexed())
return false;
if (!StoreVT.isSimple())
return false;
+ AtomicOrdering Ordering = ST->getOrdering();
+ // In order to lower atomic loads with stronger guarantees we would need to
+ // use store.release or insert fences. However these features were only added
+ // with PTX ISA 6.0 / sm_70.
+ // TODO: Check if we can actually use the new instructions and implement them.
+ if (isStrongerThanMonotonic(Ordering))
+ return false;
+
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
unsigned int PointerSize =
CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
// Volatile Setting
- // - .volatile is only availalble for .global and .shared
- bool isVolatile = ST->isVolatile();
+ // - .volatile is only available for .global and .shared
+ // - .volatile has the same memory synchronization semantics as .relaxed.sys
+ bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
@@ -1739,41 +1766,53 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
toType = NVPTX::PTXLdStInstCode::Unsigned;
// Create the machine instruction DAG
- SDValue Chain = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDValue N2 = N->getOperand(2);
+ SDValue Chain = ST->getChain();
+ SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
+ SDValue BasePtr = ST->getBasePtr();
SDValue Addr;
SDValue Offset, Base;
Optional<unsigned> Opcode;
- MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
+ MVT::SimpleValueType SourceVT =
+ Value.getNode()->getSimpleValueType(0).SimpleTy;
- if (SelectDirectAddr(N2, Addr)) {
+ if (SelectDirectAddr(BasePtr, Addr)) {
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
if (!Opcode)
return false;
- SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
- getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
- getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
- Chain };
+ SDValue Ops[] = {Value,
+ getI32Imm(isVolatile, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(toType, dl),
+ getI32Imm(toTypeWidth, dl),
+ Addr,
+ Chain};
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
- } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
- : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ } else if (PointerSize == 64
+ ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
+ : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
if (!Opcode)
return false;
- SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
- getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
- getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
- Offset, Chain };
+ SDValue Ops[] = {Value,
+ getI32Imm(isVolatile, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(toType, dl),
+ getI32Imm(toTypeWidth, dl),
+ Base,
+ Offset,
+ Chain};
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
- } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
- : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ } else if (PointerSize == 64
+ ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
+ : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
if (PointerSize == 64)
Opcode = pickOpcodeForVT(
SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
@@ -1787,10 +1826,15 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
if (!Opcode)
return false;
- SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
- getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
- getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
- Offset, Chain };
+ SDValue Ops[] = {Value,
+ getI32Imm(isVolatile, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(toType, dl),
+ getI32Imm(toTypeWidth, dl),
+ Base,
+ Offset,
+ Chain};
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
} else {
if (PointerSize == 64)
@@ -1806,10 +1850,14 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
if (!Opcode)
return false;
- SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
- getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
- getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
- Chain };
+ SDValue Ops[] = {Value,
+ getI32Imm(isVolatile, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(toType, dl),
+ getI32Imm(toTypeWidth, dl),
+ BasePtr,
+ Chain};
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
}
Modified: vendor/llvm/dist-release_70/lib/Transforms/Instrumentation/BoundsChecking.cpp
==============================================================================
--- vendor/llvm/dist-release_70/lib/Transforms/Instrumentation/BoundsChecking.cpp Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/lib/Transforms/Instrumentation/BoundsChecking.cpp Sat Aug 11 16:29:25 2018 (r337631)
@@ -47,21 +47,17 @@ STATISTIC(ChecksUnable, "Bounds checks unable to add")
using BuilderTy = IRBuilder<TargetFolder>;
-/// Adds run-time bounds checks to memory accessing instructions.
+/// Gets the conditions under which memory accessing instructions will overflow.
///
/// \p Ptr is the pointer that will be read/written, and \p InstVal is either
/// the result from the load or the value being stored. It is used to determine
/// the size of memory block that is touched.
///
-/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
-///
-/// Returns true if any change was made to the IR, false otherwise.
-template <typename GetTrapBBT>
-static bool instrumentMemAccess(Value *Ptr, Value *InstVal,
- const DataLayout &DL, TargetLibraryInfo &TLI,
- ObjectSizeOffsetEvaluator &ObjSizeEval,
- BuilderTy &IRB, GetTrapBBT GetTrapBB,
- ScalarEvolution &SE) {
+/// Returns the condition under which the access will overflow.
+static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal,
+ const DataLayout &DL, TargetLibraryInfo &TLI,
+ ObjectSizeOffsetEvaluator &ObjSizeEval,
+ BuilderTy &IRB, ScalarEvolution &SE) {
uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType());
LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
<< " bytes\n");
@@ -70,7 +66,7 @@ static bool instrumentMemAccess(Value *Ptr, Value *Ins
if (!ObjSizeEval.bothKnown(SizeOffset)) {
++ChecksUnable;
- return false;
+ return nullptr;
}
Value *Size = SizeOffset.first;
@@ -107,13 +103,23 @@ static bool instrumentMemAccess(Value *Ptr, Value *Ins
Or = IRB.CreateOr(Cmp1, Or);
}
+ return Or;
+}
+
+/// Adds run-time bounds checks to memory accessing instructions.
+///
+/// \p Or is the condition that should guard the trap.
+///
+/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
+template <typename GetTrapBBT>
+static void insertBoundsCheck(Value *Or, BuilderTy IRB, GetTrapBBT GetTrapBB) {
// check if the comparison is always false
ConstantInt *C = dyn_cast_or_null<ConstantInt>(Or);
if (C) {
++ChecksSkipped;
// If non-zero, nothing to do.
if (!C->getZExtValue())
- return true;
+ return;
}
++ChecksAdded;
@@ -127,12 +133,11 @@ static bool instrumentMemAccess(Value *Ptr, Value *Ins
// FIXME: We should really handle this differently to bypass the splitting
// the block.
BranchInst::Create(GetTrapBB(IRB), OldBB);
- return true;
+ return;
}
// Create the conditional branch.
BranchInst::Create(GetTrapBB(IRB), Cont, Or, OldBB);
- return true;
}
static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
@@ -143,11 +148,25 @@ static bool addBoundsChecking(Function &F, TargetLibra
// check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
// touching instructions
- std::vector<Instruction *> WorkList;
+ SmallVector<std::pair<Instruction *, Value *>, 4> TrapInfo;
for (Instruction &I : instructions(F)) {
- if (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<AtomicCmpXchgInst>(I) ||
- isa<AtomicRMWInst>(I))
- WorkList.push_back(&I);
+ Value *Or = nullptr;
+ BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL));
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI,
+ ObjSizeEval, IRB, SE);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(),
+ DL, TLI, ObjSizeEval, IRB, SE);
+ } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
+ Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(),
+ DL, TLI, ObjSizeEval, IRB, SE);
+ } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
+ Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(), DL,
+ TLI, ObjSizeEval, IRB, SE);
+ }
+ if (Or)
+ TrapInfo.push_back(std::make_pair(&I, Or));
}
// Create a trapping basic block on demand using a callback. Depending on
@@ -176,29 +195,14 @@ static bool addBoundsChecking(Function &F, TargetLibra
return TrapBB;
};
- bool MadeChange = false;
- for (Instruction *Inst : WorkList) {
+ // Add the checks.
+ for (const auto &Entry : TrapInfo) {
+ Instruction *Inst = Entry.first;
BuilderTy IRB(Inst->getParent(), BasicBlock::iterator(Inst), TargetFolder(DL));
- if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- MadeChange |= instrumentMemAccess(LI->getPointerOperand(), LI, DL, TLI,
- ObjSizeEval, IRB, GetTrapBB, SE);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- MadeChange |=
- instrumentMemAccess(SI->getPointerOperand(), SI->getValueOperand(),
- DL, TLI, ObjSizeEval, IRB, GetTrapBB, SE);
- } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {
- MadeChange |=
- instrumentMemAccess(AI->getPointerOperand(), AI->getCompareOperand(),
- DL, TLI, ObjSizeEval, IRB, GetTrapBB, SE);
- } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) {
- MadeChange |=
- instrumentMemAccess(AI->getPointerOperand(), AI->getValOperand(), DL,
- TLI, ObjSizeEval, IRB, GetTrapBB, SE);
- } else {
- llvm_unreachable("unknown Instruction type");
- }
+ insertBoundsCheck(Entry.second, IRB, GetTrapBB);
}
- return MadeChange;
+
+ return !TrapInfo.empty();
}
PreservedAnalyses BoundsCheckingPass::run(Function &F, FunctionAnalysisManager &AM) {
Modified: vendor/llvm/dist-release_70/test/CodeGen/AArch64/fcopysign.ll
==============================================================================
--- vendor/llvm/dist-release_70/test/CodeGen/AArch64/fcopysign.ll Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/test/CodeGen/AArch64/fcopysign.ll Sat Aug 11 16:29:25 2018 (r337631)
@@ -5,10 +5,12 @@ target triple = "aarch64--"
declare fp128 @llvm.copysign.f128(fp128, fp128)
- at val = global double zeroinitializer, align 8
+ at val_float = global float zeroinitializer, align 4
+ at val_double = global double zeroinitializer, align 8
+ at val_fp128 = global fp128 zeroinitializer, align 16
; CHECK-LABEL: copysign0
-; CHECK: ldr [[REG:x[0-9]+]], [x8, :lo12:val]
+; CHECK: ldr [[REG:x[0-9]+]], [x8, :lo12:val_double]
; CHECK: and [[ANDREG:x[0-9]+]], [[REG]], #0x8000000000000000
; CHECK: lsr x[[LSRREGNUM:[0-9]+]], [[ANDREG]], #56
; CHECK: bfxil w[[LSRREGNUM]], w{{[0-9]+}}, #0, #7
@@ -16,8 +18,25 @@ declare fp128 @llvm.copysign.f128(fp128, fp128)
; CHECK: ldr q{{[0-9]+}},
define fp128 @copysign0() {
entry:
- %v = load double, double* @val, align 8
+ %v = load double, double* @val_double, align 8
%conv = fpext double %v to fp128
%call = tail call fp128 @llvm.copysign.f128(fp128 0xL00000000000000007FFF000000000000, fp128 %conv) #2
+ ret fp128 %call
+}
+
+; CHECK-LABEL: copysign1
+; CHECK-DAG: ldr [[REG:q[0-9]+]], [x8, :lo12:val_fp128]
+; CHECK-DAG: ldr [[REG:w[0-9]+]], [x8, :lo12:val_float]
+; CHECK: and [[ANDREG:w[0-9]+]], [[REG]], #0x80000000
+; CHECK: lsr w[[LSRREGNUM:[0-9]+]], [[ANDREG]], #24
+; CHECK: bfxil w[[LSRREGNUM]], w{{[0-9]+}}, #0, #7
+; CHECK: strb w[[LSRREGNUM]],
+; CHECK: ldr q{{[0-9]+}},
+define fp128 at copysign1() {
+entry:
+ %v0 = load fp128, fp128* @val_fp128, align 16
+ %v1 = load float, float* @val_float, align 4
+ %conv = fpext float %v1 to fp128
+ %call = tail call fp128 @llvm.copysign.f128(fp128 %v0, fp128 %conv)
ret fp128 %call
}
Modified: vendor/llvm/dist-release_70/test/CodeGen/AMDGPU/kernel-args.ll
==============================================================================
--- vendor/llvm/dist-release_70/test/CodeGen/AMDGPU/kernel-args.ll Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/test/CodeGen/AMDGPU/kernel-args.ll Sat Aug 11 16:29:25 2018 (r337631)
@@ -16,13 +16,8 @@
; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
-; EG: LSHR T0.X, KC0[2].Y, literal.x,
-; EG-NEXT: MOV * T1.X, KC0[2].Z,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-
-; CM: LSHR * T0.X, KC0[2].Y, literal.x,
-; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: MOV * T1.X, KC0[2].Z,
+; EGCM: VTX_READ_8{{.*}} #3
+; EGCM: KC0[2].Y
define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
%ext = zext i8 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
@@ -92,14 +87,8 @@ define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1
; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xffff{{$}}
; HSA-VI: flat_store_dword
-
-; EG: LSHR T0.X, KC0[2].Y, literal.x,
-; EG-NEXT: MOV * T1.X, KC0[2].Z,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-
-; CM: LSHR * T0.X, KC0[2].Y, literal.x,
-; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: MOV * T1.X, KC0[2].Z,
+; EGCM: VTX_READ_16
+; EGCM: KC0[2].Y
define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
%ext = zext i16 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
Modified: vendor/llvm/dist-release_70/test/CodeGen/AMDGPU/mad_uint24.ll
==============================================================================
--- vendor/llvm/dist-release_70/test/CodeGen/AMDGPU/mad_uint24.ll Sat Aug 11 16:12:23 2018 (r337630)
+++ vendor/llvm/dist-release_70/test/CodeGen/AMDGPU/mad_uint24.ll Sat Aug 11 16:29:25 2018 (r337631)
@@ -1,8 +1,8 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC
-; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC
+; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN
+; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -136,5 +136,92 @@ bb4: ; pr
bb18: ; preds = %bb4
store i32 %tmp16, i32 addrspace(1)* %arg
+ ret void
+}
+
+; FUNC-LABEL: {{^}}i8_mad_sat_16:
+; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
+; The result must be sign-extended
+; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
+; EG: 8
+; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-vendor
mailing list