summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp37
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h1
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td10
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td8
-rw-r--r--llvm/test/CodeGen/PowerPC/ppc64-P9-mod.ll263
-rw-r--r--llvm/test/MC/Disassembler/PowerPC/ppc64-encoding.txt12
-rw-r--r--llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt12
-rw-r--r--llvm/test/MC/PowerPC/ppc64-encoding.s13
8 files changed, 351 insertions, 5 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c78d3fa45c5..8880e9d88fb 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -204,11 +204,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
- // PowerPC has no SREM/UREM instructions
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i64, Expand);
- setOperationAction(ISD::UREM, MVT::i64, Expand);
+ // PowerPC has no SREM/UREM instructions unless we are on P9
+ // On P9 we may use a hardware instruction to compute the remainder.
+ // The instructions are not legalized directly because in the cases where the
+ // result of both the remainder and the division is required it is more
+ // efficient to compute the remainder from the result of the division rather
+ // than use the remainder instruction.
+ if (Subtarget.isISA3_0()) {
+ setOperationAction(ISD::SREM, MVT::i32, Custom);
+ setOperationAction(ISD::UREM, MVT::i32, Custom);
+ setOperationAction(ISD::SREM, MVT::i64, Custom);
+ setOperationAction(ISD::UREM, MVT::i64, Custom);
+ } else {
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+ }
// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
@@ -8394,6 +8406,18 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return SDValue();
}
+SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
+ // Check for a DIV with the same operands as this REM.
+ for (auto UI : Op.getOperand(1)->uses()) {
+ if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
+ (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
+ if (UI->getOperand(0) == Op.getOperand(0) &&
+ UI->getOperand(1) == Op.getOperand(1))
+ return SDValue();
+ }
+ return Op;
+}
+
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -8862,6 +8886,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INTRINSIC_VOID:
return LowerINTRINSIC_VOID(Op, DAG);
+ case ISD::SREM:
+ case ISD::UREM:
+ return LowerREM(Op, DAG);
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 7982a4a9e9f..678d92898a0 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -918,6 +918,7 @@ namespace llvm {
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 295590b2acf..70536a6039b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -683,6 +683,16 @@ def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"divde $rT, $rA, $rB", IIC_IntDivD,
[(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
isPPC64, Requires<[HasExtDiv]>;
+
+let Predicates = [IsISA3_0] in {
+def MODSD : XForm_8<31, 777, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "modsd $rT, $rA, $rB", IIC_IntDivW,
+ [(set i64:$rT, (srem i64:$rA, i64:$rB))]>;
+def MODUD : XForm_8<31, 265, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "modud $rT, $rA, $rB", IIC_IntDivW,
+ [(set i64:$rT, (urem i64:$rA, i64:$rB))]>;
+}
+
let Defs = [CR0] in
def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"divde. $rT, $rA, $rB", IIC_IntDivD,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 8223aa655e3..37506239c58 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2544,6 +2544,14 @@ let Uses = [RM] in {
"mffs. $rT", IIC_IntMFFS, []>, isDOT;
}
+let Predicates = [IsISA3_0] in {
+def MODSW : XForm_8<31, 779, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "modsw $rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (srem i32:$rA, i32:$rB))]>;
+def MODUW : XForm_8<31, 267, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "moduw $rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (urem i32:$rA, i32:$rB))]>;
+}
let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations.
// XO-Form instructions. Arithmetic instructions that can set overflow bit
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-mod.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-mod.ll
new file mode 100644
index 00000000000..46e347becbb
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-mod.ll
@@ -0,0 +1,263 @@
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-PWR8 -implicit-check-not mod[us][wd]
+
+@mod_resultsw = common local_unnamed_addr global i32 0, align 4
+@mod_resultud = common local_unnamed_addr global i64 0, align 8
+@div_resultsw = common local_unnamed_addr global i32 0, align 4
+@mod_resultuw = common local_unnamed_addr global i32 0, align 4
+@div_resultuw = common local_unnamed_addr global i32 0, align 4
+@div_resultsd = common local_unnamed_addr global i64 0, align 8
+@mod_resultsd = common local_unnamed_addr global i64 0, align 8
+@div_resultud = common local_unnamed_addr global i64 0, align 8
+
+; Function Attrs: norecurse nounwind
+define void @modulo_sw(i32 signext %a, i32 signext %b) local_unnamed_addr {
+entry:
+ %rem = srem i32 %a, %b
+ store i32 %rem, i32* @mod_resultsw, align 4
+ ret void
+; CHECK-LABEL: modulo_sw
+; CHECK: modsw {{[0-9]+}}, 3, 4
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_sw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind readnone
+define zeroext i32 @modulo_uw(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr {
+entry:
+ %rem = urem i32 %a, %b
+ ret i32 %rem
+; CHECK-LABEL: modulo_uw
+; CHECK: moduw {{[0-9]+}}, 3, 4
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_uw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind readnone
+define i64 @modulo_sd(i64 %a, i64 %b) local_unnamed_addr {
+entry:
+ %rem = srem i64 %a, %b
+ ret i64 %rem
+; CHECK-LABEL: modulo_sd
+; CHECK: modsd {{[0-9]+}}, 3, 4
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_sd
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+define void @modulo_ud(i64 %a, i64 %b) local_unnamed_addr {
+entry:
+ %rem = urem i64 %a, %b
+ store i64 %rem, i64* @mod_resultud, align 8
+ ret void
+; CHECK-LABEL: modulo_ud
+; CHECK: modud {{[0-9]+}}, 3, 4
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_ud
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+define void @modulo_div_sw(i32 signext %a, i32 signext %b) local_unnamed_addr {
+entry:
+ %rem = srem i32 %a, %b
+ store i32 %rem, i32* @mod_resultsw, align 4
+ %div = sdiv i32 %a, %b
+ store i32 %div, i32* @div_resultsw, align 4
+ ret void
+; CHECK-LABEL: modulo_div_sw
+; CHECK-NOT: modsw
+; CHECK: div
+; CHECK-NOT: modsw
+; CHECK: mull
+; CHECK-NOT: modsw
+; CHECK: sub
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_div_sw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+define void @modulo_div_abc_sw(i32 signext %a, i32 signext %b, i32 signext %c) local_unnamed_addr {
+entry:
+ %rem = srem i32 %a, %c
+ store i32 %rem, i32* @mod_resultsw, align 4
+ %div = sdiv i32 %b, %c
+ store i32 %div, i32* @div_resultsw, align 4
+ ret void
+; CHECK-LABEL: modulo_div_abc_sw
+; CHECK: modsw {{[0-9]+}}, 3, 5
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_div_abc_sw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+define void @modulo_div_uw(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr {
+entry:
+ %rem = urem i32 %a, %b
+ store i32 %rem, i32* @mod_resultuw, align 4
+ %div = udiv i32 %a, %b
+ store i32 %div, i32* @div_resultuw, align 4
+ ret void
+; CHECK-LABEL: modulo_div_uw
+; CHECK-NOT: modsw
+; CHECK: div
+; CHECK-NOT: modsw
+; CHECK: mull
+; CHECK-NOT: modsw
+; CHECK: sub
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_div_uw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+define void @modulo_div_swuw(i32 signext %a, i32 signext %b) local_unnamed_addr {
+entry:
+ %rem = srem i32 %a, %b
+ store i32 %rem, i32* @mod_resultsw, align 4
+ %div = udiv i32 %a, %b
+ store i32 %div, i32* @div_resultsw, align 4
+ ret void
+; CHECK-LABEL: modulo_div_swuw
+; CHECK: modsw {{[0-9]+}}, 3, 4
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_div_swuw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+define void @modulo_div_udsd(i64 %a, i64 %b) local_unnamed_addr {
+entry:
+ %rem = urem i64 %a, %b
+ store i64 %rem, i64* @mod_resultud, align 8
+ %div = sdiv i64 %a, %b
+ store i64 %div, i64* @div_resultsd, align 8
+ ret void
+; CHECK-LABEL: modulo_div_udsd
+; CHECK: modud {{[0-9]+}}, 3, 4
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_div_udsd
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+define void @modulo_const32_sw(i32 signext %a) local_unnamed_addr {
+entry:
+ %rem = srem i32 %a, 32
+ store i32 %rem, i32* @mod_resultsw, align 4
+ ret void
+; CHECK-LABEL: modulo_const32_sw
+; CHECK-NOT: modsw
+; CHECK: srawi
+; CHECK-NOT: modsw
+; CHECK: addze
+; CHECK-NOT: modsw
+; CHECK: slwi
+; CHECK-NOT: modsw
+; CHECK: subf
+; CHECK-NOT: modsw
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_const32_sw
+; CHECK-PWR8: srawi
+; CHECK-PWR8: addze
+; CHECK-PWR8: slwi
+; CHECK-PWR8: subf
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind readnone
+define signext i32 @modulo_const3_sw(i32 signext %a) local_unnamed_addr {
+entry:
+ %rem = srem i32 %a, 3
+ ret i32 %rem
+; CHECK-LABEL: modulo_const3_sw
+; CHECK-NOT: modsw
+; CHECK: mull
+; CHECK-NOT: modsw
+; CHECK: sub
+; CHECK-NOT: modsw
+; CHECK: blr
+; CHECK-PWR8-LABEL: modulo_const3_sw
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind readnone
+define signext i32 @const2_modulo_sw(i32 signext %a) local_unnamed_addr {
+entry:
+ %rem = srem i32 2, %a
+ ret i32 %rem
+; CHECK-LABEL: const2_modulo_sw
+; CHECK: modsw {{[0-9]+}}, {{[0-9]+}}, 3
+; CHECK: blr
+; CHECK-PWR8-LABEL: const2_modulo_sw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+; Function Attrs: norecurse nounwind
+; FIXME On power 9 this test will still produce modsw because the divide is in
+; a different block than the remainder. Due to the nature of the SDAG we cannot
+; see the div in the other block.
+define void @blocks_modulo_div_sw(i32 signext %a, i32 signext %b, i32 signext %c) local_unnamed_addr {
+entry:
+ %div = sdiv i32 %a, %b
+ store i32 %div, i32* @div_resultsw, align 4
+ %cmp = icmp sgt i32 %c, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %rem = srem i32 %a, %b
+ store i32 %rem, i32* @mod_resultsw, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+; CHECK-LABEL: blocks_modulo_div_sw
+; CHECK: div
+; CHECK: modsw {{[0-9]+}}, 3, 4
+; CHECK: blr
+; CHECK-PWR8-LABEL: blocks_modulo_div_sw
+; CHECK-PWR8: div
+; CHECK-PWR8: mull
+; CHECK-PWR8: sub
+; CHECK-PWR8: blr
+}
+
+
diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
index a6d079297bc..25ed35fcb1c 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
@@ -352,6 +352,18 @@
# CHECK: divweu. 2, 3, 4
0x7c 0x43 0x23 0x17
+# CHECK: modsw 2, 3, 4
+0x7c 0x43 0x26 0x16
+
+# CHECK: moduw 2, 3, 4
+0x7c 0x43 0x22 0x16
+
+# CHECK: modsd 2, 3, 4
+0x7c 0x43 0x26 0x12
+
+# CHECK: modud 2, 3, 4
+0x7c 0x43 0x22 0x12
+
# CHECK: mulld 2, 3, 4
0x7c 0x43 0x21 0xd2
diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt
index 9ddc286d8aa..9dc99401055 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt
@@ -349,6 +349,18 @@
# CHECK: divweu. 2, 3, 4
0x17 0x23 0x43 0x7c
+# CHECK: modsw 2, 3, 4
+0x16 0x26 0x43 0x7c
+
+# CHECK: moduw 2, 3, 4
+0x16 0x22 0x43 0x7c
+
+# CHECK: modsd 2, 3, 4
+0x12 0x26 0x43 0x7c
+
+# CHECK: modud 2, 3, 4
+0x12 0x22 0x43 0x7c
+
# CHECK: mulld 2, 3, 4
0xd2 0x21 0x43 0x7c
diff --git a/llvm/test/MC/PowerPC/ppc64-encoding.s b/llvm/test/MC/PowerPC/ppc64-encoding.s
index a772ca44986..237dd5cfd72 100644
--- a/llvm/test/MC/PowerPC/ppc64-encoding.s
+++ b/llvm/test/MC/PowerPC/ppc64-encoding.s
@@ -493,6 +493,19 @@
# FIXME: divweuo 2, 3, 4
# FIXME: divweuo. 2, 3, 4
+# CHECK-BE: modsw 2, 3, 4 # encoding: [0x7c,0x43,0x26,0x16]
+# CHECK-LE: modsw 2, 3, 4 # encoding: [0x16,0x26,0x43,0x7c]
+ modsw 2, 3, 4
+# CHECK-BE: moduw 2, 3, 4 # encoding: [0x7c,0x43,0x22,0x16]
+# CHECK-LE: moduw 2, 3, 4 # encoding: [0x16,0x22,0x43,0x7c]
+ moduw 2, 3, 4
+# CHECK-BE: modsd 2, 3, 4 # encoding: [0x7c,0x43,0x26,0x12]
+# CHECK-LE: modsd 2, 3, 4 # encoding: [0x12,0x26,0x43,0x7c]
+ modsd 2, 3, 4
+# CHECK-BE: modud 2, 3, 4 # encoding: [0x7c,0x43,0x22,0x12]
+# CHECK-LE: modud 2, 3, 4 # encoding: [0x12,0x22,0x43,0x7c]
+ modud 2, 3, 4
+
# CHECK-BE: mulld 2, 3, 4 # encoding: [0x7c,0x43,0x21,0xd2]
# CHECK-LE: mulld 2, 3, 4 # encoding: [0xd2,0x21,0x43,0x7c]
mulld 2, 3, 4
OpenPOWER on IntegriCloud