Blame 0001-InstCombine-Fix-big-endian-miscompile-of-bitcast-zex.patch

ca4f417
From f8e146f3430de3a6cd904f3f3f7aa1bfaefee14c Mon Sep 17 00:00:00 2001
9b85888
From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
9b85888
Date: Thu, 28 Nov 2019 23:18:28 +0100
9b85888
Subject: [PATCH] [InstCombine] Fix big-endian miscompile of (bitcast
9b85888
 (zext/trunc (bitcast)))
9b85888
9b85888
Summary:
9b85888
optimizeVectorResize is rewriting patterns like:
9b85888
  %1 = bitcast vector %src to integer
9b85888
  %2 = trunc/zext %1
9b85888
  %dst = bitcast %2 to vector
9b85888
9b85888
Since bitcasting between integer an vector types gives
9b85888
different integer values depending on endianness, we need
9b85888
to take endianness into account. As it happens the old
9b85888
implementation only produced the correct result for little
9b85888
endian targets.
9b85888
9b85888
Fixes: https://bugs.llvm.org/show_bug.cgi?id=44178
9b85888
9b85888
Reviewers: spatel, lattner, lebedev.ri
9b85888
9b85888
Reviewed By: spatel, lebedev.ri
9b85888
9b85888
Subscribers: lebedev.ri, hiraditya, uabelho, llvm-commits
9b85888
9b85888
Tags: #llvm
9b85888
9b85888
Differential Revision: https://reviews.llvm.org/D70844
ca4f417
ca4f417
(cherry picked from commit a9d6b0e5444741d08ff1df7cf71d1559e7fefc1f)
9b85888
---
9b85888
 .../InstCombine/InstCombineCasts.cpp          | 79 +++++++++++++------
ca4f417
 llvm/test/Transforms/InstCombine/cast.ll      |  6 +-
ca4f417
 2 files changed, 60 insertions(+), 25 deletions(-)
9b85888
9b85888
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
ca4f417
index 2c9ba203fbf3..0af3de300e77 100644
9b85888
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
9b85888
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
9b85888
@@ -18,6 +18,7 @@
9b85888
 #include "llvm/IR/DIBuilder.h"
9b85888
 #include "llvm/IR/PatternMatch.h"
9b85888
 #include "llvm/Support/KnownBits.h"
9b85888
+#include <numeric>
9b85888
 using namespace llvm;
9b85888
 using namespace PatternMatch;
9b85888
 
9b85888
@@ -1820,12 +1821,24 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
9b85888
 }
9b85888
 
9b85888
 /// This input value (which is known to have vector type) is being zero extended
9b85888
-/// or truncated to the specified vector type.
9b85888
+/// or truncated to the specified vector type. Since the zext/trunc is done
9b85888
+/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
9b85888
+/// endianness will impact which end of the vector that is extended or
9b85888
+/// truncated.
9b85888
+///
9b85888
+/// A vector is always stored with index 0 at the lowest address, which
9b85888
+/// corresponds to the most significant bits for a big endian stored integer and
9b85888
+/// the least significant bits for little endian. A trunc/zext of an integer
9b85888
+/// impacts the big end of the integer. Thus, we need to add/remove elements at
9b85888
+/// the front of the vector for big endian targets, and the back of the vector
9b85888
+/// for little endian targets.
9b85888
+///
9b85888
 /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
9b85888
 ///
9b85888
 /// The source and destination vector types may have different element types.
9b85888
-static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
9b85888
-                                         InstCombiner &IC) {
9b85888
+static Instruction *optimizeVectorResizeWithIntegerBitCasts(Value *InVal,
9b85888
+                                                            VectorType *DestTy,
9b85888
+                                                            InstCombiner &IC) {
9b85888
   // We can only do this optimization if the output is a multiple of the input
9b85888
   // element size, or the input is a multiple of the output element size.
9b85888
   // Convert the input type to have the same element type as the output.
9b85888
@@ -1844,31 +1857,53 @@ static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
9b85888
     InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
9b85888
   }
9b85888
 
9b85888
+  bool IsBigEndian = IC.getDataLayout().isBigEndian();
9b85888
+  unsigned SrcElts = SrcTy->getNumElements();
9b85888
+  unsigned DestElts = DestTy->getNumElements();
9b85888
+
9b85888
+  assert(SrcElts != DestElts && "Element counts should be different.");
9b85888
+
9b85888
   // Now that the element types match, get the shuffle mask and RHS of the
9b85888
   // shuffle to use, which depends on whether we're increasing or decreasing the
9b85888
   // size of the input.
9b85888
-  SmallVector<uint32_t, 16> ShuffleMask;
9b85888
+  SmallVector<uint32_t, 16> ShuffleMaskStorage;
9b85888
+  ArrayRef<uint32_t> ShuffleMask;
9b85888
   Value *V2;
9b85888
 
9b85888
-  if (SrcTy->getNumElements() > DestTy->getNumElements()) {
9b85888
-    // If we're shrinking the number of elements, just shuffle in the low
9b85888
-    // elements from the input and use undef as the second shuffle input.
9b85888
-    V2 = UndefValue::get(SrcTy);
9b85888
-    for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
9b85888
-      ShuffleMask.push_back(i);
9b85888
+  // Produce an identify shuffle mask for the src vector.
9b85888
+  ShuffleMaskStorage.resize(SrcElts);
9b85888
+  std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0);
9b85888
 
9b85888
+  if (SrcElts > DestElts) {
9b85888
+    // If we're shrinking the number of elements (rewriting an integer
9b85888
+    // truncate), just shuffle in the elements corresponding to the least
9b85888
+    // significant bits from the input and use undef as the second shuffle
9b85888
+    // input.
9b85888
+    V2 = UndefValue::get(SrcTy);
9b85888
+    // Make sure the shuffle mask selects the "least significant bits" by
9b85888
+    // keeping elements from back of the src vector for big endian, and from the
9b85888
+    // front for little endian.
9b85888
+    ShuffleMask = ShuffleMaskStorage;
9b85888
+    if (IsBigEndian)
9b85888
+      ShuffleMask = ShuffleMask.take_back(DestElts);
9b85888
+    else
9b85888
+      ShuffleMask = ShuffleMask.take_front(DestElts);
9b85888
   } else {
9b85888
-    // If we're increasing the number of elements, shuffle in all of the
9b85888
-    // elements from InVal and fill the rest of the result elements with zeros
9b85888
-    // from a constant zero.
9b85888
+    // If we're increasing the number of elements (rewriting an integer zext),
9b85888
+    // shuffle in all of the elements from InVal. Fill the rest of the result
9b85888
+    // elements with zeros from a constant zero.
9b85888
     V2 = Constant::getNullValue(SrcTy);
9b85888
-    unsigned SrcElts = SrcTy->getNumElements();
9b85888
-    for (unsigned i = 0, e = SrcElts; i != e; ++i)
9b85888
-      ShuffleMask.push_back(i);
9b85888
-
9b85888
-    // The excess elements reference the first element of the zero input.
9b85888
-    for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
9b85888
-      ShuffleMask.push_back(SrcElts);
9b85888
+    // Use first elt from V2 when indicating zero in the shuffle mask.
9b85888
+    uint32_t NullElt = SrcElts;
9b85888
+    // Extend with null values in the "most significant bits" by adding elements
9b85888
+    // in front of the src vector for big endian, and at the back for little
9b85888
+    // endian.
9b85888
+    unsigned DeltaElts = DestElts - SrcElts;
9b85888
+    if (IsBigEndian)
9b85888
+      ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
9b85888
+    else
9b85888
+      ShuffleMaskStorage.append(DeltaElts, NullElt);
9b85888
+    ShuffleMask = ShuffleMaskStorage;
9b85888
   }
9b85888
 
9b85888
   return new ShuffleVectorInst(InVal, V2,
ca4f417
@@ -2359,8 +2394,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
9b85888
         CastInst *SrcCast = cast<CastInst>(Src);
9b85888
         if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
9b85888
           if (isa<VectorType>(BCIn->getOperand(0)->getType()))
9b85888
-            if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0),
9b85888
-                                               cast<VectorType>(DestTy), *this))
9b85888
+            if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts(
9b85888
+                    BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
9b85888
               return I;
9b85888
       }
9b85888
 
ca4f417
diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
ca4f417
index b6d1eda0601d..3ce8de033422 100644
ca4f417
--- a/llvm/test/Transforms/InstCombine/cast.ll
ca4f417
+++ b/llvm/test/Transforms/InstCombine/cast.ll
ca4f417
@@ -824,7 +824,7 @@ define i64 @test59(i8 %A, i8 %B) {
ca4f417
 
ca4f417
 define <3 x i32> @test60(<4 x i32> %call4) {
ca4f417
 ; CHECK-LABEL: @test60(
ca4f417
-; CHECK-NEXT:    [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
ca4f417
+; CHECK-NEXT:    [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> <i32 1, i32 2, i32 3>
ca4f417
 ; CHECK-NEXT:    ret <3 x i32> [[P10]]
ca4f417
 ;
ca4f417
   %p11 = bitcast <4 x i32> %call4 to i128
ca4f417
@@ -836,7 +836,7 @@ define <3 x i32> @test60(<4 x i32> %call4) {
ca4f417
 
ca4f417
 define <4 x i32> @test61(<3 x i32> %call4) {
ca4f417
 ; CHECK-LABEL: @test61(
ca4f417
-; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ca4f417
+; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
ca4f417
 ; CHECK-NEXT:    ret <4 x i32> [[P10]]
ca4f417
 ;
ca4f417
   %p11 = bitcast <3 x i32> %call4 to i96
ca4f417
@@ -848,7 +848,7 @@ define <4 x i32> @test61(<3 x i32> %call4) {
ca4f417
 define <4 x i32> @test62(<3 x float> %call4) {
ca4f417
 ; CHECK-LABEL: @test62(
ca4f417
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32>
ca4f417
-; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ca4f417
+; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
ca4f417
 ; CHECK-NEXT:    ret <4 x i32> [[P10]]
ca4f417
 ;
ca4f417
   %p11 = bitcast <3 x float> %call4 to i96
9b85888
-- 
9b85888
2.26.2
9b85888