Add target hook to allow merging stores of nonzero constants

On GPU targets, materializing constants is cheap and stores are expensive, so only doing this for zero vectors was silly. Most of the new testcases aren't optimally merged, and are for later improvements. llvm-svn: 238108
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2015-05-24 00:51:27 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2015-05-24 00:51:27 +0000
commit: 65ad1602b038549dfff4e8e2fe30dac15e45189d (patch)
tree: c03cf718e5e9fdc2cfc6eb633fbde3d35f134e89 /llvm/lib
parent: dc4c87f051871ddd4abd252a9755ffd26c9ce565 (diff)
download: bcm5719-llvm-65ad1602b038549dfff4e8e2fe30dac15e45189d.tar.gz
bcm5719-llvm-65ad1602b038549dfff4e8e2fe30dac15e45189d.zip
3 files changed, 20 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 77e648c152c..2c2dc859816 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10892,10 +10892,17 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       }
     }
 
-    // We only use vectors if the constant is known to be zero and the
-    // function is not marked with the noimplicitfloat attribute.
-    if (NonZero || NoVectors)
+
+    // We only use vectors if the constant is known to be zero or the target
+    // allows it and the function is not marked with the noimplicitfloat
+    // attribute.
+    if (NoVectors) {
+      LastLegalVectorType = 0;
+    } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT,
+                                                            LastLegalVectorType,
+                                                            FirstStoreAS)) {
       LastLegalVectorType = 0;
+    }
 
     // Check if we found a legal integer type to store.
     if (LastLegalType == 0 && LastLegalVectorType == 0)
diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
index b304488142a..880240c51a8 100644
--- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -509,6 +509,12 @@ bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
   return VT == MVT::f32 || VT == MVT::f64;
 }
 
+bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT,
+                                                         unsigned NumElem,
+                                                         unsigned AS) const {
+  return true;
+}
+
 bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
   // Truncate is just accessing a subregister.
   return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.h b/llvm/lib/Target/R600/AMDGPUISelLowering.h
index 8507cb3c745..c9f198129ef 100644
--- a/llvm/lib/Target/R600/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/R600/AMDGPUISelLowering.h
@@ -133,6 +133,10 @@ public:
                              EVT ExtVT) const override;
 
   bool isLoadBitCastBeneficial(EVT, EVT) const override;
+
+  bool storeOfVectorConstantIsCheap(EVT MemVT,
+                                    unsigned NumElem,
+                                    unsigned AS) const override;
   bool isCheapToSpeculateCttz() const override;
   bool isCheapToSpeculateCtlz() const override;
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2015-05-24 00:51:27 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2015-05-24 00:51:27 +0000
commit	65ad1602b038549dfff4e8e2fe30dac15e45189d (patch)
tree	c03cf718e5e9fdc2cfc6eb633fbde3d35f134e89 /llvm/lib
parent	dc4c87f051871ddd4abd252a9755ffd26c9ce565 (diff)
download	bcm5719-llvm-65ad1602b038549dfff4e8e2fe30dac15e45189d.tar.gz bcm5719-llvm-65ad1602b038549dfff4e8e2fe30dac15e45189d.zip