diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-07-30 12:16:58 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-07-30 12:16:58 +0000 |
commit | de496c32a4939056e60886f0cb343b2301484b38 (patch) | |
tree | b43043e313e51353d080f11b0d34a9e44091234b /llvm/lib | |
parent | f3c9a34def73e18eaa97ae7c43fd12a80fd405be (diff) | |
download | bcm5719-llvm-de496c32a4939056e60886f0cb343b2301484b38.tar.gz bcm5719-llvm-de496c32a4939056e60886f0cb343b2301484b38.zip |
AMDGPU: Reduce code size with fcanonicalize (fneg x)
When fcanonicalize is lowered to a mul, we can
use -1.0 for free and avoid the cost of the bigger
encoding for source modifers.
llvm-svn: 338244
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 10 |
2 files changed, 11 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 9426df39959..c9c932ef2f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -567,6 +567,7 @@ int PI = 0x40490fdb; int TWO_PI_INV = 0x3e22f983; int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding int FP16_ONE = 0x3C00; +int FP16_NEG_ONE = 0xBC00; int V2FP16_ONE = 0x3C003C00; int FP32_ONE = 0x3f800000; int FP32_NEG_ONE = 0xbf800000; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index c3f8bfb53ef..5c10646161b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1387,6 +1387,11 @@ def : GCNPat< >; def : GCNPat< + (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))), + (V_MUL_F16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src, 0, 0) +>; + +def : GCNPat< (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))), (V_PK_MUL_F16 0, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE) >; @@ -1411,6 +1416,11 @@ def : GCNPat< (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))), (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src, 0, 0) >; + +def : GCNPat< + (fcanonicalize (f32 (fneg (VOP3Mods f32:$src, i32:$src_mods)))), + (V_MUL_F32_e64 0, (i32 CONST.FP32_NEG_ONE), $src_mods, $src, 0, 0) +>; } let OtherPredicates = [FP32Denormals] in { |