diff options
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td | 87 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Hexagon/extload-combine.ll | 80 | 
2 files changed, 167 insertions, 0 deletions
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td index 022a7f61365..fee83fb8110 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -3188,6 +3188,93 @@ def STriw_offset_ext_V4 : STInst<(outs),                      (add IntRegs:$src1, u6_2ImmPred:$src2))]>,              Requires<[HasV4T]>; +def : Pat<(i64 (ctlz (i64 DoubleRegs:$src1))), +          (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTLZ64_rr DoubleRegs:$src1))))>, +          Requires<[HasV4T]>; + +def : Pat<(i64 (cttz (i64 DoubleRegs:$src1))), +          (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTTZ64_rr DoubleRegs:$src1))))>, +          Requires<[HasV4T]>; + + +// i8 -> i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi8. +let Predicates = [HasV4T], AddedComplexity = 120 in { +def:  Pat <(i64 (extloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), +      (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 tglobaladdr:$addr)))>; + +def:  Pat <(i64 (zextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), +      (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 tglobaladdr:$addr)))>; + +def:  Pat <(i64 (sextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), +      (i64 (SXTW (LDrib_abs_V4 tglobaladdr:$addr)))>; + +def:  Pat <(i64 (extloadi8 FoldGlobalAddr:$addr)), +      (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 FoldGlobalAddr:$addr)))>; + +def:  Pat <(i64 (zextloadi8 FoldGlobalAddr:$addr)), +      (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 FoldGlobalAddr:$addr)))>; + +def:  Pat <(i64 (sextloadi8 FoldGlobalAddr:$addr)), +      (i64 (SXTW (LDrib_abs_V4 FoldGlobalAddr:$addr)))>; +} +// i16 -> i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi16. +let AddedComplexity = 120 in { +def:  Pat <(i64 (extloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), +      (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 tglobaladdr:$addr)))>, +      Requires<[HasV4T]>; + +def:  Pat <(i64 (zextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), +      (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 tglobaladdr:$addr)))>, +      Requires<[HasV4T]>; + +def:  Pat <(i64 (sextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), +      (i64 (SXTW (LDrih_abs_V4 tglobaladdr:$addr)))>, +      Requires<[HasV4T]>; + +def:  Pat <(i64 (extloadi16 FoldGlobalAddr:$addr)), +      (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 FoldGlobalAddr:$addr)))>, +      Requires<[HasV4T]>; + +def:  Pat <(i64 (zextloadi16 FoldGlobalAddr:$addr)), +      (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 FoldGlobalAddr:$addr)))>, +      Requires<[HasV4T]>; + +def:  Pat <(i64 (sextloadi16 FoldGlobalAddr:$addr)), +      (i64 (SXTW (LDrih_abs_V4 FoldGlobalAddr:$addr)))>, +      Requires<[HasV4T]>; +} +// i32->i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi32. +let AddedComplexity = 120 in { +def:  Pat <(i64 (extloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), +      (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>, +      Requires<[HasV4T]>; + +def:  Pat <(i64 (zextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), +      (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>, +      Requires<[HasV4T]>; + +def:  Pat <(i64 (sextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), +      (i64 (SXTW (LDriw_abs_V4 tglobaladdr:$addr)))>, +      Requires<[HasV4T]>; + +def:  Pat <(i64 (extloadi32 FoldGlobalAddr:$addr)), +      (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, +      Requires<[HasV4T]>; + +def:  Pat <(i64 (zextloadi32 FoldGlobalAddr:$addr)), +      (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, +      Requires<[HasV4T]>; + +def:  Pat <(i64 (sextloadi32 FoldGlobalAddr:$addr)), +      (i64 (SXTW (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, +      Requires<[HasV4T]>; +}  // Indexed store double word - global address.  // memw(Rs+#u6:2)=#S8 diff --git a/llvm/test/CodeGen/Hexagon/extload-combine.ll b/llvm/test/CodeGen/Hexagon/extload-combine.ll new file mode 100644 index 00000000000..b3b8bf07032 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/extload-combine.ll @@ -0,0 +1,80 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s +; Check that the combine/stxw instructions are being generated. +; In case of combine one of the operand should be 0 and another should be +; the output of absolute addressing load instruction. + +@a = external global i16 +@b = external global i16 +@c = external global i16 +@char_a = external global i8 +@char_b = external global i8 +@char_c = external global i8 +@int_a = external global i32 +@int_b = external global i32 +@int_c = external global i32 + +; Function Attrs: nounwind +define i64 @short_test1() #0 { +; CHECK: [[VAR:r[0-9]+]]{{ *}}={{ *}}memuh(## +; CHECK: combine(#0, [[VAR]]) +entry: +  store i16 0, i16* @a, align 2 +  %0 = load i16* @b, align 2 +  %conv2 = zext i16 %0 to i64 +  ret i64 %conv2 +} + +; Function Attrs: nounwind +define i64 @short_test2() #0 { +; CHECK: [[VAR1:r[0-9]+]]{{ *}}={{ *}}memh(## +; CHECK: sxtw([[VAR1]]) +entry: +  store i16 0, i16* @a, align 2 +  %0 = load i16* @c, align 2 +  %conv2 = sext i16 %0 to i64 +  ret i64 %conv2 +} + +; Function Attrs: nounwind +define i64 @char_test1() #0 { +; CHECK: [[VAR2:r[0-9]+]]{{ *}}={{ *}}memub(## +; CHECK: combine(#0, [[VAR2]]) +entry: +  store i8 0, i8* @char_a, align 1 +  %0 = load i8* @char_b, align 1 +  %conv2 = zext i8 %0 to i64 +  ret i64 %conv2 +} + +; Function Attrs: nounwind +define i64 @char_test2() #0 { +; CHECK: [[VAR3:r[0-9]+]]{{ *}}={{ *}}memb(## +; CHECK: sxtw([[VAR3]]) +entry: +  store i8 0, i8* @char_a, align 1 +  %0 = load i8* @char_c, align 1 +  %conv2 = sext i8 %0 to i64 +  ret i64 %conv2 +} + +; Function Attrs: nounwind +define i64 @int_test1() #0 { +; CHECK: [[VAR4:r[0-9]+]]{{ *}}={{ *}}memw(## +; CHECK: combine(#0, [[VAR4]]) +entry: +  store i32 0, i32* @int_a, align 4 +  %0 = load i32* @int_b, align 4 +  %conv = zext i32 %0 to i64 +  ret i64 %conv +} + +; Function Attrs: nounwind +define i64 @int_test2() #0 { +; CHECK: [[VAR5:r[0-9]+]]{{ *}}={{ *}}memw(## +; CHECK: sxtw([[VAR5]]) +entry: +  store i32 0, i32* @int_a, align 4 +  %0 = load i32* @int_c, align 4 +  %conv = sext i32 %0 to i64 +  ret i64 %conv +}  | 

