sergesanspaille / rpms / llvm

Forked from rpms/llvm 5 years ago
Clone
93d2074
From 2ac90db51fc323d183aabe744e57f4feca6d3008 Mon Sep 17 00:00:00 2001
93d2074
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
93d2074
Date: Wed, 1 Aug 2018 11:57:58 +0000
93d2074
Subject: [PATCH] [SystemZ, TableGen] Fix shift count handling
93d2074
93d2074
*Backport of this patch from trunk without the TableGen fix and modified
93d2074
to work with LLVM 6.0 TableGen. *
93d2074
93d2074
The DAG combiner logic to simplify AND masks in shift counts is invalid.
93d2074
While it is true that the SystemZ shift instructions ignore all but the
93d2074
low 6 bits of the shift count, it is still invalid to simplify the AND
93d2074
masks while the DAG still uses the standard shift operators (which are
93d2074
*not* defined to match the SystemZ instruction behavior).
93d2074
93d2074
Instead, this patch performs equivalent operations during instruction
93d2074
selection. For completely removing the AND, this now happens via
93d2074
additional DAG match patterns implemented by a multi-alternative
93d2074
PatFrags. For simplifying a 32-bit AND to a 16-bit AND, the existing DAG
93d2074
patterns were already mostly OK, they just needed an output XForm to
93d2074
actually truncate the immediate value.
93d2074
93d2074
Unfortunately, the latter change also exposed a bug in TableGen: it
93d2074
seems XForms are currently only handled correctly for direct operands of
93d2074
the outermost operation node. This patch also fixes that bug by simply
93d2074
recurring through the whole pattern. This should be NFC for all other
93d2074
targets.
93d2074
93d2074
Differential Revision: https://reviews.llvm.org/D50096
93d2074
93d2074
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338521 91177308-0d34-0410-b5e6-96231b3b80d8
93d2074
---
93d2074
 lib/Target/SystemZ/SystemZISelLowering.cpp | 78 ------------------------------
93d2074
 lib/Target/SystemZ/SystemZISelLowering.h   |  1 -
93d2074
 lib/Target/SystemZ/SystemZInstrInfo.td     | 49 +++++++++++++------
93d2074
 lib/Target/SystemZ/SystemZOperands.td      |  1 +
93d2074
 lib/Target/SystemZ/SystemZOperators.td     |  6 +++
93d2074
 test/CodeGen/SystemZ/shift-12.ll           | 12 +++++
93d2074
 utils/TableGen/CodeGenDAGPatterns.cpp      | 39 ++++++++-------
93d2074
 7 files changed, 71 insertions(+), 115 deletions(-)
93d2074
93d2074
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
93d2074
index adf3683..505b143 100644
93d2074
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
93d2074
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
93d2074
@@ -522,10 +522,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
93d2074
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
93d2074
   setTargetDAGCombine(ISD::FP_ROUND);
93d2074
   setTargetDAGCombine(ISD::BSWAP);
93d2074
-  setTargetDAGCombine(ISD::SHL);
93d2074
-  setTargetDAGCombine(ISD::SRA);
93d2074
-  setTargetDAGCombine(ISD::SRL);
93d2074
-  setTargetDAGCombine(ISD::ROTL);
93d2074
 
93d2074
   // Handle intrinsics.
93d2074
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
93d2074
@@ -5405,76 +5401,6 @@ SDValue SystemZTargetLowering::combineBSWAP(
93d2074
   return SDValue();
93d2074
 }
93d2074
 
93d2074
-SDValue SystemZTargetLowering::combineSHIFTROT(
93d2074
-    SDNode *N, DAGCombinerInfo &DCI) const {
93d2074
-
93d2074
-  SelectionDAG &DAG = DCI.DAG;
93d2074
-
93d2074
-  // Shift/rotate instructions only use the last 6 bits of the second operand
93d2074
-  // register. If the second operand is the result of an AND with an immediate
93d2074
-  // value that has its last 6 bits set, we can safely remove the AND operation.
93d2074
-  //
93d2074
-  // If the AND operation doesn't have the last 6 bits set, we can't remove it
93d2074
-  // entirely, but we can still truncate it to a 16-bit value. This prevents
93d2074
-  // us from ending up with a NILL with a signed operand, which will cause the
93d2074
-  // instruction printer to abort.
93d2074
-  SDValue N1 = N->getOperand(1);
93d2074
-  if (N1.getOpcode() == ISD::AND) {
93d2074
-    SDValue AndMaskOp = N1->getOperand(1);
93d2074
-    auto *AndMask = dyn_cast<ConstantSDNode>(AndMaskOp);
93d2074
-
93d2074
-    // The AND mask is constant
93d2074
-    if (AndMask) {
93d2074
-      auto AmtVal = AndMask->getZExtValue();
93d2074
-      
93d2074
-      // Bottom 6 bits are set
93d2074
-      if ((AmtVal & 0x3f) == 0x3f) {
93d2074
-        SDValue AndOp = N1->getOperand(0);
93d2074
-
93d2074
-        // This is the only use, so remove the node
93d2074
-        if (N1.hasOneUse()) {
93d2074
-          // Combine the AND away
93d2074
-          DCI.CombineTo(N1.getNode(), AndOp);
93d2074
-
93d2074
-          // Return N so it isn't rechecked
93d2074
-          return SDValue(N, 0);
93d2074
-
93d2074
-        // The node will be reused, so create a new node for this one use
93d2074
-        } else {
93d2074
-          SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
93d2074
-                                        N->getValueType(0), N->getOperand(0),
93d2074
-                                        AndOp);
93d2074
-          DCI.AddToWorklist(Replace.getNode());
93d2074
-
93d2074
-          return Replace;
93d2074
-        }
93d2074
-
93d2074
-      // We can't remove the AND, but we can use NILL here (normally we would
93d2074
-      // use NILF). Only keep the last 16 bits of the mask. The actual
93d2074
-      // transformation will be handled by .td definitions.
93d2074
-      } else if (AmtVal >> 16 != 0) {
93d2074
-        SDValue AndOp = N1->getOperand(0);
93d2074
-
93d2074
-        auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff,
93d2074
-                                       SDLoc(AndMaskOp),
93d2074
-                                       AndMaskOp.getValueType());
93d2074
-
93d2074
-        auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(),
93d2074
-                                  AndOp, NewMask);
93d2074
-
93d2074
-        SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
93d2074
-                                      N->getValueType(0), N->getOperand(0),
93d2074
-                                      NewAnd);
93d2074
-        DCI.AddToWorklist(Replace.getNode());
93d2074
-
93d2074
-        return Replace;
93d2074
-      }
93d2074
-    }
93d2074
-  }
93d2074
-
93d2074
-  return SDValue();
93d2074
-}
93d2074
-
93d2074
 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
93d2074
                                                  DAGCombinerInfo &DCI) const {
93d2074
   switch(N->getOpcode()) {
93d2074
@@ -5487,10 +5413,6 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
93d2074
   case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
93d2074
   case ISD::FP_ROUND:           return combineFP_ROUND(N, DCI);
93d2074
   case ISD::BSWAP:              return combineBSWAP(N, DCI);
93d2074
-  case ISD::SHL:
93d2074
-  case ISD::SRA:
93d2074
-  case ISD::SRL:
93d2074
-  case ISD::ROTL:               return combineSHIFTROT(N, DCI);
93d2074
   }
93d2074
 
93d2074
   return SDValue();
93d2074
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
93d2074
index 2cdc88d..1918d45 100644
93d2074
--- a/lib/Target/SystemZ/SystemZISelLowering.h
93d2074
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
93d2074
@@ -570,7 +570,6 @@ private:
93d2074
   SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
93d2074
   SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
93d2074
   SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
93d2074
-  SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const;
93d2074
 
93d2074
   // If the last instruction before MBBI in MBB was some form of COMPARE,
93d2074
   // try to replace it with a COMPARE AND BRANCH just before MBBI.
93d2074
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
93d2074
index abb8045..fb40cb4 100644
93d2074
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
93d2074
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
93d2074
@@ -1318,9 +1318,20 @@ def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
93d2074
 // Shifts
93d2074
 //===----------------------------------------------------------------------===//
93d2074
 
93d2074
+// Complexity is 8 so we match it before the NILL paterns below.
93d2074
+let AddedComplexity = 8 in {
93d2074
+
93d2074
+class ShiftAndPat <SDNode node, Instruction inst, ValueType vt> : Pat <
93d2074
+  (node vt:$val, (and i32:$count, imm32bottom6set)),
93d2074
+  (inst vt:$val, i32:$count, 0)
93d2074
+>;
93d2074
+}
93d2074
+
93d2074
 // Logical shift left.
93d2074
 defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
93d2074
+def : ShiftAndPat <shl, SLL, i32>;
93d2074
 def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
93d2074
+def : ShiftAndPat <shl, SLLG, i64>;
93d2074
 def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>;
93d2074
 
93d2074
 // Arithmetic shift left.
93d2074
@@ -1332,7 +1343,9 @@ let Defs = [CC] in {
93d2074
 
93d2074
 // Logical shift right.
93d2074
 defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
93d2074
+def : ShiftAndPat <srl, SRL, i32>;
93d2074
 def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>;
93d2074
+def : ShiftAndPat <srl, SRLG, i64>;
93d2074
 def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>;
93d2074
 
93d2074
 // Arithmetic shift right.
93d2074
@@ -1341,10 +1354,14 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
93d2074
   def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>;
93d2074
   def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>;
93d2074
 }
93d2074
+def : ShiftAndPat <sra, SRA, i32>;
93d2074
+def : ShiftAndPat <sra, SRAG, i64>;
93d2074
 
93d2074
 // Rotate left.
93d2074
 def RLL  : BinaryRSY<"rll",  0xEB1D, rotl, GR32>;
93d2074
+def : ShiftAndPat <rotl, RLL, i32>;
93d2074
 def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>;
93d2074
+def : ShiftAndPat <rotl, RLLG, i64>;
93d2074
 
93d2074
 // Rotate second operand left and inserted selected bits into first operand.
93d2074
 // These can act like 32-bit operands provided that the constant start and
93d2074
@@ -2154,29 +2171,29 @@ def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
93d2074
 // Complexity is added so that we match this before we match NILF on the AND
93d2074
 // operation alone.
93d2074
 let AddedComplexity = 4 in {
93d2074
-  def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
93d2074
-            (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
93d2074
+  def : Pat<(shl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
93d2074
+            (SLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
93d2074
 
93d2074
-  def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
93d2074
-            (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
93d2074
+  def : Pat<(sra GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
93d2074
+            (SRA GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
93d2074
 
93d2074
-  def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
93d2074
-            (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
93d2074
+  def : Pat<(srl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
93d2074
+            (SRL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
93d2074
 
93d2074
-  def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
93d2074
-            (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
93d2074
+  def : Pat<(shl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
93d2074
+            (SLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
93d2074
 
93d2074
-  def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
93d2074
-            (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
93d2074
+  def : Pat<(sra GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
93d2074
+            (SRAG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
93d2074
 
93d2074
-  def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
93d2074
-            (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
93d2074
+  def : Pat<(srl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
93d2074
+            (SRLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
93d2074
 
93d2074
-  def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
93d2074
-            (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
93d2074
+  def : Pat<(rotl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
93d2074
+            (RLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
93d2074
 
93d2074
-  def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
93d2074
-            (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
93d2074
+  def : Pat<(rotl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
93d2074
+            (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
93d2074
 }
93d2074
 
93d2074
 // Peepholes for turning scalar operations into block operations.
93d2074
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
93d2074
index 7136121..61a1124 100644
93d2074
--- a/lib/Target/SystemZ/SystemZOperands.td
93d2074
+++ b/lib/Target/SystemZ/SystemZOperands.td
93d2074
@@ -341,6 +341,7 @@ def imm32zx16 : Immediate
93d2074
 }], UIMM16, "U16Imm">;
93d2074
 
93d2074
 def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
93d2074
+def imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;
93d2074
 
93d2074
 // Full 32-bit immediates.  we need both signed and unsigned versions
93d2074
 // because the assembler is picky.  E.g. AFI requires signed operands
93d2074
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
93d2074
index d067f33..269c3d0 100644
93d2074
--- a/lib/Target/SystemZ/SystemZOperators.td
93d2074
+++ b/lib/Target/SystemZ/SystemZOperators.td
93d2074
@@ -611,6 +611,12 @@ class storei<SDPatternOperator operator, SDPatternOperator store = store>
93d2074
   : PatFrag<(ops node:$addr),
93d2074
             (store (operator), node:$addr)>;
93d2074
 
93d2074
+// Create a shift operator that optionally ignores an AND of the
93d2074
+// shift count with an immediate if the bottom 6 bits are all set.
93d2074
+def imm32bottom6set : PatLeaf<(i32 imm), [{
93d2074
+  return (N->getZExtValue() & 0x3f) == 0x3f;
93d2074
+}]>;
93d2074
+
93d2074
 // Vector representation of all-zeros and all-ones.
93d2074
 def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
93d2074
 def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
93d2074
diff --git a/test/CodeGen/SystemZ/shift-12.ll b/test/CodeGen/SystemZ/shift-12.ll
93d2074
index 4ebc42b..53d3d53 100644
93d2074
--- a/test/CodeGen/SystemZ/shift-12.ll
93d2074
+++ b/test/CodeGen/SystemZ/shift-12.ll
93d2074
@@ -104,3 +104,15 @@ define i32 @f10(i32 %a, i32 %sh) {
93d2074
   %reuse = add i32 %and, %shift
93d2074
   ret i32 %reuse
93d2074
 }
93d2074
+
93d2074
+; Test that AND is not removed for i128 (which calls __ashlti3)
93d2074
+define i128 @f11(i128 %a, i32 %sh) {
93d2074
+; CHECK-LABEL: f11:
93d2074
+; CHECK: risbg %r4, %r4, 57, 191, 0
93d2074
+; CHECK: brasl %r14, __ashlti3@PLT
93d2074
+  %and = and i32 %sh, 127
93d2074
+  %ext = zext i32 %and to i128
93d2074
+  %shift = shl i128 %a, %ext
93d2074
+  ret i128 %shift
93d2074
+}
93d2074
+
93d2074
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
93d2074
index 493066e..74af62b 100644
93d2074
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
93d2074
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
93d2074
@@ -3919,6 +3919,24 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) {
93d2074
   return false;
93d2074
 }
93d2074
 
93d2074
+// Promote xform function to be an explicit node wherever set.
93d2074
+static TreePatternNode* PromoteXForms(TreePatternNode* N) {
93d2074
+  if (Record *Xform = N->getTransformFn()) {
93d2074
+      N->setTransformFn(nullptr);
93d2074
+      std::vector<TreePatternNode*> Children;
93d2074
+      Children.push_back(PromoteXForms(N));
93d2074
+      return new TreePatternNode(Xform, std::move(Children),
93d2074
+                                               N->getNumTypes());
93d2074
+  }
93d2074
+
93d2074
+  if (!N->isLeaf())
93d2074
+    for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) {
93d2074
+      TreePatternNode* Child = N->getChild(i);
93d2074
+      N->setChild(i, std::move(PromoteXForms(Child)));
93d2074
+    }
93d2074
+  return N;
93d2074
+}
93d2074
+
93d2074
 void CodeGenDAGPatterns::ParsePatterns() {
93d2074
   std::vector<Record*> Patterns = Records.getAllDerivedDefinitions("Pattern");
93d2074
 
93d2074
@@ -4009,26 +4027,7 @@ void CodeGenDAGPatterns::ParsePatterns() {
93d2074
                                   InstImpResults);
93d2074
 
93d2074
     // Promote the xform function to be an explicit node if set.
93d2074
-    TreePatternNode *DstPattern = Result.getOnlyTree();
93d2074
-    std::vector<TreePatternNode*> ResultNodeOperands;
93d2074
-    for (unsigned ii = 0, ee = DstPattern->getNumChildren(); ii != ee; ++ii) {
93d2074
-      TreePatternNode *OpNode = DstPattern->getChild(ii);
93d2074
-      if (Record *Xform = OpNode->getTransformFn()) {
93d2074
-        OpNode->setTransformFn(nullptr);
93d2074
-        std::vector<TreePatternNode*> Children;
93d2074
-        Children.push_back(OpNode);
93d2074
-        OpNode = new TreePatternNode(Xform, Children, OpNode->getNumTypes());
93d2074
-      }
93d2074
-      ResultNodeOperands.push_back(OpNode);
93d2074
-    }
93d2074
-    DstPattern = Result.getOnlyTree();
93d2074
-    if (!DstPattern->isLeaf())
93d2074
-      DstPattern = new TreePatternNode(DstPattern->getOperator(),
93d2074
-                                       ResultNodeOperands,
93d2074
-                                       DstPattern->getNumTypes());
93d2074
-
93d2074
-    for (unsigned i = 0, e = Result.getOnlyTree()->getNumTypes(); i != e; ++i)
93d2074
-      DstPattern->setType(i, Result.getOnlyTree()->getExtType(i));
93d2074
+    TreePatternNode* DstPattern = PromoteXForms(Result.getOnlyTree());
93d2074
 
93d2074
     TreePattern Temp(Result.getRecord(), DstPattern, false, *this);
93d2074
     Temp.InferAllTypes();
93d2074
-- 
93d2074
1.8.3.1
93d2074