git: 78d3648e73d1 - main - Merge commit 55c466da2f2f from llvm-project (by Benjamin Kramer):
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 11 Apr 2024 21:17:48 UTC
The branch main has been updated by dim:
URL: https://cgit.FreeBSD.org/src/commit/?id=78d3648e73d11c5a4dbcc0392907f0723bf1df1c
commit 78d3648e73d11c5a4dbcc0392907f0723bf1df1c
Author: Dimitry Andric <dim@FreeBSD.org>
AuthorDate: 2024-04-11 21:12:42 +0000
Commit: Dimitry Andric <dim@FreeBSD.org>
CommitDate: 2024-04-11 21:16:36 +0000
Merge commit 55c466da2f2f from llvm-project (by Benjamin Kramer):
[X86][AVX512BF16] Add a few missing insert/extract patterns
These are really the same as the f16 (and i16) instructions, but we need
them for any type that can occur.
Merge commit 2e4e04c59043 from llvm-project (by Phoebe Wang):
[X86][BF16] Do not lower to VCVTNEPS2BF16 without AVX512VL (#86395)
Fixes: #86305
These should fix "fatal error: error in backend: Cannot select: t71:
v32bf16 = insert_subvector t67, t64, Constant:i32<16>" when building the
misc/ncnn port.
PR: 278305
Reported by: yuri
MFC after: 1 month
---
contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +++++--
contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td | 12 ++++++++++++
2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9e64726fb6ff..96bbd981ff24 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21420,7 +21420,9 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
}
if (VT.getScalarType() == MVT::bf16) {
- if (SVT.getScalarType() == MVT::f32 && isTypeLegal(VT))
+ if (SVT.getScalarType() == MVT::f32 &&
+ ((Subtarget.hasBF16() && Subtarget.hasVLX()) ||
+ Subtarget.hasAVXNECONVERT()))
return Op;
return SDValue();
}
@@ -21527,7 +21529,8 @@ SDValue X86TargetLowering::LowerFP_TO_BF16(SDValue Op,
SDLoc DL(Op);
MVT SVT = Op.getOperand(0).getSimpleValueType();
- if (SVT == MVT::f32 && (Subtarget.hasBF16() || Subtarget.hasAVXNECONVERT())) {
+ if (SVT == MVT::f32 && ((Subtarget.hasBF16() && Subtarget.hasVLX()) ||
+ Subtarget.hasAVXNECONVERT())) {
SDValue Res;
Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4f32, Op.getOperand(0));
Res = DAG.getNode(X86ISD::CVTNEPS2BF16, DL, MVT::v8bf16, Res);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
index bb5e22c71427..fdca58141f0f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -494,6 +494,8 @@ defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
+defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8bf16x_info, v16bf16x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
// Codegen pattern with the alternative types insert VEC128 into VEC512
defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
@@ -501,6 +503,8 @@ defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
+defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8bf16x_info, v32bf16_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
// Codegen pattern with the alternative types insert VEC256 into VEC512
defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
@@ -508,6 +512,8 @@ defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
+defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16bf16x_info, v32bf16_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
@@ -795,6 +801,8 @@ defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
+defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16bf16x_info, v8bf16x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
// Codegen pattern with the alternative types extract VEC128 from VEC512
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
@@ -803,6 +811,8 @@ defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
+defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32bf16_info, v8bf16x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
// Codegen pattern with the alternative types extract VEC256 from VEC512
defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
@@ -810,6 +820,8 @@ defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
+defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32bf16_info, v16bf16x_info,
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
// A 128-bit extract from bits [255:128] of a 512-bit vector should use a