git: 471c6e3367a1 - stable/12 - Apply fix for LLVM PR51957 (Miscompilation in Botan's SHA3)

From: Dimitry Andric <dim_at_FreeBSD.org>
Date: Sat, 25 Dec 2021 11:56:27 UTC
The branch stable/12 has been updated by dim:

URL: https://cgit.FreeBSD.org/src/commit/?id=471c6e3367a11d2d8a9bf49ca98faa1a452f77ac

commit 471c6e3367a11d2d8a9bf49ca98faa1a452f77ac
Author:     Dimitry Andric <dim@FreeBSD.org>
AuthorDate: 2021-11-10 18:38:23 +0000
Commit:     Dimitry Andric <dim@FreeBSD.org>
CommitDate: 2021-12-25 11:51:41 +0000

    Apply fix for LLVM PR51957 (Miscompilation in Botan's SHA3)
    
    Merge commit e27a6db5298f from llvm git (by Jameson Nash):
    
      Bad SLPVectorization shufflevector replacement, resulting in write to wrong memory location
    
      We see that it might otherwise do:
    
        %10 = getelementptr {}**, <2 x {}***> %9, <2 x i32> <i32 10, i32 4>
        %11 = bitcast <2 x {}***> %10 to <2 x i64*>
      ...
        %27 = extractelement <2 x i64*> %11, i32 0
        %28 = bitcast i64* %27 to <2 x i64>*
        store <2 x i64> %22, <2 x i64>* %28, align 4, !tbaa !2
    
      Which is an out-of-bounds store (the extractelement got offset 10
      instead of offset 4 as intended). With the fix, we correctly generate
      extractelement for i32 1 and generate correct code.
    
      Differential Revision: https://reviews.llvm.org/D106613
    
    (cherry picked from commit 397a8ba05313cc3815d219c9d1b2de1372fcb561)
---
 .../lib/Transforms/Vectorize/SLPVectorizer.cpp     | 25 ++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cc3f5c7d4b48..1d06bc7d79a7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5430,8 +5430,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         // The pointer operand uses an in-tree scalar so we add the new BitCast
         // to ExternalUses list to make sure that an extract will be generated
         // in the future.
-        if (getTreeEntry(PO))
-          ExternalUses.emplace_back(PO, cast<User>(VecPtr), 0);
+        if (TreeEntry *Entry = getTreeEntry(PO)) {
+          // Find which lane we need to extract.
+          unsigned FoundLane = Entry->findLaneForValue(PO);
+          ExternalUses.emplace_back(PO, cast<User>(VecPtr), FoundLane);
+        }
 
         NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
       } else {
@@ -5474,8 +5477,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       // The pointer operand uses an in-tree scalar, so add the new BitCast to
       // ExternalUses to make sure that an extract will be generated in the
       // future.
-      if (getTreeEntry(ScalarPtr))
-        ExternalUses.push_back(ExternalUser(ScalarPtr, cast<User>(VecPtr), 0));
+      if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) {
+        // Find which lane we need to extract.
+        unsigned FoundLane = Entry->findLaneForValue(ScalarPtr);
+        ExternalUses.push_back(
+            ExternalUser(ScalarPtr, cast<User>(VecPtr), FoundLane));
+      }
 
       Value *V = propagateMetadata(ST, E->Scalars);
 
@@ -5577,8 +5584,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       // The scalar argument uses an in-tree scalar so we add the new vectorized
       // call to ExternalUses list to make sure that an extract will be
       // generated in the future.
-      if (ScalarArg && getTreeEntry(ScalarArg))
-        ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
+      if (ScalarArg) {
+        if (TreeEntry *Entry = getTreeEntry(ScalarArg)) {
+          // Find which lane we need to extract.
+          unsigned FoundLane = Entry->findLaneForValue(ScalarArg);
+          ExternalUses.push_back(
+              ExternalUser(ScalarArg, cast<User>(V), FoundLane));
+        }
+      }
 
       propagateIRFlags(V, E->Scalars, VL0);
       ShuffleBuilder.addMask(E->ReuseShuffleIndices);