summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp68
-rw-r--r--llvm/test/CodeGen/RISCV/addcarry.ll4
-rw-r--r--llvm/test/CodeGen/X86/smul_fix.ll15
-rw-r--r--llvm/test/CodeGen/X86/smul_fix_sat.ll128
-rw-r--r--llvm/test/CodeGen/X86/umul_fix.ll20
5 files changed, 99 insertions, 136 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 5db61436129..83d4249fcac 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2857,11 +2857,6 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
"the size of the current value type");
EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
- SDValue ResultLL = Result[0];
- SDValue ResultLH = Result[1];
- SDValue ResultHL = Result[2];
- SDValue ResultHH = Result[3];
-
// After getting the multiplication result in 4 parts, we need to perform a
// shift right by the amount of the scale to get the result in that scale.
//
@@ -2876,50 +2871,22 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
//
// |NVTSize-|
//
- // The resulting Lo and Hi will only need to be one of these 32-bit parts
- // after shifting.
- if (Scale < NVTSize) {
- // If the scale is less than the size of the VT we expand to, the Hi and
- // Lo of the result will be in the first 2 parts of the result after
- // shifting right. This only requires shifting by the scale as far as the
- // third part in the result (ResultHL).
- SDValue SRLAmnt = DAG.getConstant(Scale, dl, ShiftTy);
- SDValue SHLAmnt = DAG.getConstant(NVTSize - Scale, dl, ShiftTy);
- Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLL, SRLAmnt);
- Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
- DAG.getNode(ISD::SHL, dl, NVT, ResultLH, SHLAmnt));
- Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
- DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
- } else if (Scale == NVTSize) {
- // If the scales are equal, Lo and Hi are ResultLH and ResultHL,
- // respectively. Avoid shifting to prevent undefined behavior.
- Lo = ResultLH;
- Hi = ResultHL;
- } else if (Scale < VTSize) {
- // If the scale is instead less than the old VT size, but greater than or
- // equal to the expanded VT size, the first part of the result (ResultLL) is
- // no longer a part of Lo because it would be scaled out anyway. Instead we
- // can start shifting right from the fourth part (ResultHH) to the second
- // part (ResultLH), and ResultLH will be the new Lo.
- SDValue SRLAmnt = DAG.getConstant(Scale - NVTSize, dl, ShiftTy);
- SDValue SHLAmnt = DAG.getConstant(VTSize - Scale, dl, ShiftTy);
- Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
- Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
- DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
- Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt);
- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
- DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt));
- } else if (Scale == VTSize) {
- assert(
- !Signed &&
- "Only unsigned types can have a scale equal to the operand bit width");
-
- Lo = ResultHL;
- Hi = ResultHH;
- } else
- llvm_unreachable("Expected the scale to be less than or equal to the width "
- "of the operands");
+ // The resulting Lo and Hi would normally be in LL and LH after the shift. But
+ // to avoid unneccessary shifting of all 4 parts, we can adjust the shift
+ // amount and get Lo and Hi using two funnel shifts. Or for the special case
+ // when Scale is a multiple of NVTSize we can just pick the result without
+ // shifting.
+ uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed.
+ if (Scale % NVTSize) {
+ SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy);
+ Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0],
+ ShiftAmount);
+ Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1],
+ ShiftAmount);
+ } else {
+ Lo = Result[Part0];
+ Hi = Result[Part0 + 1];
+ }
// Unless saturation is requested we are done. The result is in <Hi,Lo>.
if (!Saturating)
@@ -2934,6 +2901,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
// highest bit of HH determines saturation direction in the event of
// saturation.
+ SDValue ResultHL = Result[2];
+ SDValue ResultHH = Result[3];
+
SDValue SatMax, SatMin;
SDValue NVTZero = DAG.getConstant(0, dl, NVT);
SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
diff --git a/llvm/test/CodeGen/RISCV/addcarry.ll b/llvm/test/CodeGen/RISCV/addcarry.ll
index 15fe53e9f24..5a25fb98509 100644
--- a/llvm/test/CodeGen/RISCV/addcarry.ll
+++ b/llvm/test/CodeGen/RISCV/addcarry.ll
@@ -34,10 +34,10 @@ define i64 @addcarry(i64 %x, i64 %y) nounwind {
; RISCV32-NEXT: mul a0, a0, a2
; RISCV32-NEXT: srli a0, a0, 2
; RISCV32-NEXT: slli a1, a6, 30
-; RISCV32-NEXT: or a0, a0, a1
+; RISCV32-NEXT: or a0, a1, a0
; RISCV32-NEXT: srli a1, a6, 2
; RISCV32-NEXT: slli a2, a5, 30
-; RISCV32-NEXT: or a1, a1, a2
+; RISCV32-NEXT: or a1, a2, a1
; RISCV32-NEXT: ret
%tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 2);
ret i64 %tmp;
diff --git a/llvm/test/CodeGen/X86/smul_fix.ll b/llvm/test/CodeGen/X86/smul_fix.ll
index 80ed91fee17..c34730b1125 100644
--- a/llvm/test/CodeGen/X86/smul_fix.ll
+++ b/llvm/test/CodeGen/X86/smul_fix.ll
@@ -374,26 +374,25 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: addl %ebx, %ebp
+; X86-NEXT: addl %edx, %ebx
; X86-NEXT: adcl $0, %edi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: imull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %edx, %ebp
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: addl %ebx, %eax
; X86-NEXT: adcl %edi, %edx
-; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: adcl $0, %ebp
; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: adcl $0, %ebp
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: movl %ebp, %esi
; X86-NEXT: sbbl $0, %esi
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: cmovnsl %ebx, %esi
+; X86-NEXT: cmovnsl %ebp, %esi
; X86-NEXT: cmovnsl %edx, %ecx
; X86-NEXT: movl %ecx, %edi
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
diff --git a/llvm/test/CodeGen/X86/smul_fix_sat.ll b/llvm/test/CodeGen/X86/smul_fix_sat.ll
index d1f864c5731..284f51d7422 100644
--- a/llvm/test/CodeGen/X86/smul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/smul_fix_sat.ll
@@ -60,7 +60,6 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $8, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %ecx, %eax
@@ -69,64 +68,61 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: addl %ebx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: adcl $0, %edi
-; X86-NEXT: movl %ebx, %eax
; X86-NEXT: imull %esi
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ebp, %eax
; X86-NEXT: adcl %edi, %edx
-; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: addl %esi, %edx
-; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: subl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: sbbl $0, %edi
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: cmovnsl %ecx, %edi
-; X86-NEXT: cmovnsl %edx, %esi
-; X86-NEXT: movl %esi, %ecx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: addl %ebx, %edx
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: sbbl $0, %ebx
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnsl %esi, %ebx
+; X86-NEXT: cmovnsl %edx, %edi
; X86-NEXT: movl %edi, %ebp
-; X86-NEXT: sbbl $0, %ebp
+; X86-NEXT: subl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: sbbl $0, %esi
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnsl %ebx, %esi
; X86-NEXT: cmovnsl %edi, %ebp
-; X86-NEXT: cmovnsl %esi, %ecx
-; X86-NEXT: testl %ebp, %ebp
-; X86-NEXT: setg %bh
-; X86-NEXT: sete {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: cmpl $1, %ecx
-; X86-NEXT: seta %bl
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: shldl $30, %eax, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: shldl $30, %esi, %eax
-; X86-NEXT: andb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload
-; X86-NEXT: orb %bh, %bl
-; X86-NEXT: testb %bl, %bl
-; X86-NEXT: movl $2147483647, %esi # imm = 0x7FFFFFFF
-; X86-NEXT: cmovnel %esi, %edx
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: cmovnel %esi, %eax
-; X86-NEXT: cmpl $-1, %ebp
-; X86-NEXT: setl %bl
+; X86-NEXT: testl %esi, %esi
+; X86-NEXT: setg %bl
; X86-NEXT: sete %bh
-; X86-NEXT: cmpl $-2, %ecx
-; X86-NEXT: setb %cl
-; X86-NEXT: andb %bh, %cl
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: orb %bl, %cl
-; X86-NEXT: cmovnel %esi, %eax
-; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT: cmovnel %ecx, %edx
-; X86-NEXT: addl $8, %esp
+; X86-NEXT: cmpl $1, %ebp
+; X86-NEXT: seta %dl
+; X86-NEXT: andb %bh, %dl
+; X86-NEXT: orb %bl, %dl
+; X86-NEXT: shrdl $2, %eax, %ecx
+; X86-NEXT: shrdl $2, %ebp, %eax
+; X86-NEXT: testb %dl, %dl
+; X86-NEXT: movl $2147483647, %edi # imm = 0x7FFFFFFF
+; X86-NEXT: cmovel %eax, %edi
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: cmovnel %eax, %ecx
+; X86-NEXT: cmpl $-1, %esi
+; X86-NEXT: setl %al
+; X86-NEXT: sete %dl
+; X86-NEXT: cmpl $-2, %ebp
+; X86-NEXT: setb %ah
+; X86-NEXT: andb %dl, %ah
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: orb %al, %ah
+; X86-NEXT: cmovnel %edx, %ecx
+; X86-NEXT: movl $-2147483648, %edx # imm = 0x80000000
+; X86-NEXT: cmovel %edi, %edx
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -688,44 +684,42 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: addl %ebx, %ebp
+; X86-NEXT: addl %edx, %ebx
; X86-NEXT: adcl $0, %edi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: imull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %edx, %ebp
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: addl %ebx, %eax
; X86-NEXT: adcl %edi, %edx
-; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: adcl $0, %ebp
; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: adcl $0, %ebp
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: movl %ebp, %esi
; X86-NEXT: sbbl $0, %esi
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: cmovnsl %ebx, %esi
+; X86-NEXT: cmovnsl %ebp, %esi
; X86-NEXT: cmovnsl %edx, %ecx
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %esi, %ebx
-; X86-NEXT: sbbl $0, %ebx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl $0, %edi
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: cmovnsl %esi, %ebx
-; X86-NEXT: cmovnsl %ecx, %edi
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: shldl $1, %edi, %edx
-; X86-NEXT: shrdl $31, %edi, %eax
-; X86-NEXT: cmpl $1073741823, %ebx # imm = 0x3FFFFFFF
+; X86-NEXT: cmovnsl %esi, %edi
+; X86-NEXT: cmovnsl %ecx, %edx
+; X86-NEXT: shrdl $31, %edx, %eax
+; X86-NEXT: shrdl $31, %edi, %edx
+; X86-NEXT: cmpl $1073741823, %edi # imm = 0x3FFFFFFF
; X86-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
; X86-NEXT: cmovgl %ecx, %edx
; X86-NEXT: movl $-1, %ecx
; X86-NEXT: cmovgl %ecx, %eax
; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: cmpl $-1073741824, %ebx # imm = 0xC0000000
+; X86-NEXT: cmpl $-1073741824, %edi # imm = 0xC0000000
; X86-NEXT: cmovll %ecx, %eax
; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-NEXT: cmovll %ecx, %edx
diff --git a/llvm/test/CodeGen/X86/umul_fix.ll b/llvm/test/CodeGen/X86/umul_fix.ll
index e4277deea14..8481fe4ac6b 100644
--- a/llvm/test/CodeGen/X86/umul_fix.ll
+++ b/llvm/test/CodeGen/X86/umul_fix.ll
@@ -60,9 +60,10 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; X86-NEXT: addl %ebp, %eax
; X86-NEXT: adcl %edi, %edx
; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
-; X86-NEXT: addl %esi, %edx
-; X86-NEXT: shldl $30, %eax, %edx
+; X86-NEXT: addl %edx, %esi
+; X86-NEXT: shldl $30, %eax, %esi
; X86-NEXT: shldl $30, %ecx, %eax
+; X86-NEXT: movl %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -318,23 +319,22 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %ebx, %ecx
+; X86-NEXT: addl %edx, %ebx
; X86-NEXT: adcl $0, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %esi
-; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: addl %ebx, %eax
; X86-NEXT: adcl %edi, %edx
-; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: adcl $0, %ecx
; X86-NEXT: addl %ebp, %edx
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: shldl $1, %edx, %ebx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: shldl $1, %edx, %ecx
; X86-NEXT: shrdl $31, %edx, %eax
-; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: movl %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
OpenPOWER on IntegriCloud