summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-02-07 00:21:34 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-02-07 00:21:34 +0000
commita18b3bcf51870e177586d70b02565d4ca13ab83a (patch)
tree369b1f7504d7dc5bd7ee802fb8135e02841542ba /llvm/lib
parent258f059f88daf5ddb081c56c1daee71f2de81fad (diff)
downloadbcm5719-llvm-a18b3bcf51870e177586d70b02565d4ca13ab83a.tar.gz
bcm5719-llvm-a18b3bcf51870e177586d70b02565d4ca13ab83a.zip
AMDGPU: Select BFI patterns with 64-bit ints
llvm-svn: 324431
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructions.td47
-rw-r--r--llvm/lib/Target/AMDGPU/EvergreenInstructions.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td3
3 files changed, 46 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 76d35469027..4f28d6fa430 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -570,6 +570,18 @@ multiclass BFIPatterns <Instruction BFI_INT,
(BFI_INT $x, $y, $z)
>;
+ // 64-bit version
+ def : AMDGPUPat <
+ (or (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
+ (REG_SEQUENCE RC64,
+ (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
+ (i32 (EXTRACT_SUBREG $y, sub0)),
+ (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
+ (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
+ (i32 (EXTRACT_SUBREG $y, sub1)),
+ (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
+ >;
+
// SHA-256 Ch function
// z ^ (x & (y ^ z))
def : AMDGPUPat <
@@ -577,6 +589,18 @@ multiclass BFIPatterns <Instruction BFI_INT,
(BFI_INT $x, $y, $z)
>;
+ // 64-bit version
+ def : AMDGPUPat <
+ (xor i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
+ (REG_SEQUENCE RC64,
+ (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
+ (i32 (EXTRACT_SUBREG $y, sub0)),
+ (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
+ (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
+ (i32 (EXTRACT_SUBREG $y, sub1)),
+ (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
+ >;
+
def : AMDGPUPat <
(fcopysign f32:$src0, f32:$src1),
(BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1)
@@ -610,10 +634,25 @@ multiclass BFIPatterns <Instruction BFI_INT,
// SHA-256 Ma patterns
// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
-class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : AMDGPUPat <
- (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
- (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
->;
+multiclass SHA256MaPattern <Instruction BFI_INT, Instruction XOR, RegisterClass RC64> {
+ def : AMDGPUPat <
+ (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
+ (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
+ >;
+
+ def : AMDGPUPat <
+ (or (and i64:$x, i64:$z), (and i64:$y, (or i64:$x, i64:$z))),
+ (REG_SEQUENCE RC64,
+ (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub0)),
+ (i32 (EXTRACT_SUBREG $y, sub0))),
+ (i32 (EXTRACT_SUBREG $z, sub0)),
+ (i32 (EXTRACT_SUBREG $y, sub0))), sub0,
+ (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub1)),
+ (i32 (EXTRACT_SUBREG $y, sub1))),
+ (i32 (EXTRACT_SUBREG $z, sub1)),
+ (i32 (EXTRACT_SUBREG $y, sub1))), sub1)
+ >;
+}
// Bitfield extract patterns
diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 5e26f97b0c8..148b45ba6bb 100644
--- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -693,7 +693,7 @@ def : EGOrCaymanPat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>;
def : EGOrCaymanPat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
// SHA-256 Patterns
-def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
+defm : SHA256MaPattern <BFI_INT_eg, XOR_INT, R600_Reg64>;
def EG_ExportSwz : ExportSwzInst {
let Word1{19-16} = 0; // BURST_COUNT
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index f87b4f4fda6..2a77bfed8dc 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1098,6 +1098,7 @@ let SubtargetPredicate = isGCN in {
def : IMad24Pat<V_MAD_I32_I24, 1>;
def : UMad24Pat<V_MAD_U32_U24, 1>;
+// FIXME: This should only be done for VALU inputs
defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
def : ROTRPattern <V_ALIGNBIT_B32>;
@@ -1487,7 +1488,7 @@ defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>;
-def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
+defm : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64, SReg_64>;
def : IntMed3Pat<V_MED3_I32, smax, smax_oneuse, smin_oneuse>;
def : IntMed3Pat<V_MED3_U32, umax, umax_oneuse, umin_oneuse>;
OpenPOWER on IntegriCloud