diff options
| author | Sjoerd Meijer <sjoerd.meijer@arm.com> | 2018-02-15 10:33:07 +0000 |
|---|---|---|
| committer | Sjoerd Meijer <sjoerd.meijer@arm.com> | 2018-02-15 10:33:07 +0000 |
| commit | 9430c8cd1c6fd35777604a2abc055075bc29d9b8 (patch) | |
| tree | 2f3181f7dcca00fc6c850b4d184ccab989c47ca3 | |
| parent | 4500001905805be5437cfe21e9f65820202c8966 (diff) | |
| download | bcm5719-llvm-9430c8cd1c6fd35777604a2abc055075bc29d9b8.tar.gz bcm5719-llvm-9430c8cd1c6fd35777604a2abc055075bc29d9b8.zip | |
[ARM] f16 vcmp fixes
This adds f16 VCMP match rules and fixes the test cases.
Differential Revision: https://reviews.llvm.org/D43291
llvm-svn: 325228
| -rw-r--r-- | llvm/lib/Target/ARM/ARMInstrVFP.td | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/fp16-instructions.ll | 74 |
2 files changed, 54 insertions, 28 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index 6e805475698..9efc4f9c343 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -594,9 +594,9 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, } def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, - (outs), (ins SPR:$Sd), + (outs), (ins HPR:$Sd), IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0", - []> { + [(arm_cmpfp0 HPR:$Sd, (i32 1))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -622,9 +622,9 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, } def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0, - (outs), (ins SPR:$Sd), + (outs), (ins HPR:$Sd), IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0", - []> { + [(arm_cmpfp0 HPR:$Sd, (i32 0))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } diff --git a/llvm/test/CodeGen/ARM/fp16-instructions.ll b/llvm/test/CodeGen/ARM/fp16-instructions.ll index e0c6e5c1f90..3d8dbe80321 100644 --- a/llvm/test/CodeGen/ARM/fp16-instructions.ll +++ b/llvm/test/CodeGen/ARM/fp16-instructions.ll @@ -98,7 +98,7 @@ entry: } ; 3. VCMP -define zeroext i1 @VCMP(float %F.coerce, float %G.coerce) { +define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) { entry: %0 = bitcast float %F.coerce to i32 %tmp.0.extract.trunc = trunc i32 %0 to i16 @@ -106,45 +106,66 @@ entry: %2 = bitcast float %G.coerce to i32 %tmp1.0.extract.trunc = trunc i32 %2 to i16 %3 = bitcast i16 %tmp1.0.extract.trunc to half - %cmp = fcmp ogt half %1, %3 + %cmp = fcmp une half %1, %3 ret i1 %cmp -; CHECK-LABEL: VCMP: +; CHECK-LABEL: VCMP1: -; CHECK-SOFT: bl __aeabi_fcmpgt +; CHECK-SOFT: bl __aeabi_fcmpeq ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f -; CHECK-SOFTFP-VFP3: vcmpe.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-VFP3: vcmp.f32 s{{.}}, s{{.}} ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}} ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 -; CHECK-SOFTFP-FULLFP16: vcmpe.f16 [[S2]], [[S0]] +; CHECK-SOFTFP-FULLFP16: vcmp.f16 [[S2]], [[S0]] -; CHECK-SOFTFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0 -; CHECK-SOFTFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1 -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s1 +; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0 +; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1 +; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s1 +} + +; Check VCMPZH +define zeroext i1 @VCMP2(float %F.coerce) { +entry: + %0 = bitcast float %F.coerce to i32 + %tmp.0.extract.trunc = trunc i32 %0 to i16 + %1 = bitcast i16 %tmp.0.extract.trunc to half + %cmp = fcmp une half %1, 0.000000e+00 + ret i1 %cmp + +; CHECK-LABEL: VCMP2: + +; CHECK-SOFT: bl __aeabi_fcmpeq +; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0 +; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0 +; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0 } ; 4. VCMPE +define i32 @VCMPE1(float %F.coerce) { +entry: + %0 = bitcast float %F.coerce to i32 + %tmp.0.extract.trunc = trunc i32 %0 to i16 + %1 = bitcast i16 %tmp.0.extract.trunc to half + %tmp = fcmp olt half %1, 0.000000e+00 + %tmp1 = zext i1 %tmp to i32 + ret i32 %tmp1 -; FIXME: enable when constant pool is fixed -; -;define i32 @VCMPE_IMM(float %F.coerce) { -;entry: -; %0 = bitcast float %F.coerce to i32 -; %tmp.0.extract.trunc = trunc i32 %0 to i16 -; %1 = bitcast i16 %tmp.0.extract.trunc to half -; %tmp = fcmp olt half %1, 1.000000e+00 -; %tmp1 = zext i1 %tmp to i32 -; ret i32 %tmp1 -;} - -define i32 @VCMPE(float %F.coerce, float %G.coerce) { +; CHECK-LABEL: VCMPE1: + +; CHECK-SOFT: bl __aeabi_fcmplt +; CHECK-SOFTFP-FP16: vcmpe.f32 s0, #0 +; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s0, #0 +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, #0 +} + +define i32 @VCMPE2(float %F.coerce, float %G.coerce) { entry: %0 = bitcast float %F.coerce to i32 %tmp.0.extract.trunc = trunc i32 %0 to i16 @@ -156,7 +177,12 @@ entry: %tmp1 = zext i1 %tmp to i32 ret i32 %tmp1 -; CHECK-LABEL: VCMPE: +; CHECK-LABEL: VCMPE2: + +; CHECK-SOFT: bl __aeabi_fcmplt +; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} } ; 5. VCVT (between floating-point and fixed-point) |

