summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-12-11 07:41:06 +0000
committerCraig Topper <craig.topper@intel.com>2018-12-11 07:41:06 +0000
commit4bd93fa5bbe91bafbac5a9f606375ddf629001c0 (patch)
tree20cbade90d4ab88e577111180ae00619eac990ce
parenta64aa485e1568dfa7ef141d832535dc3797ee373 (diff)
downloadbcm5719-llvm-4bd93fa5bbe91bafbac5a9f606375ddf629001c0.tar.gz
bcm5719-llvm-4bd93fa5bbe91bafbac5a9f606375ddf629001c0.zip
[X86] Switch the 64-bit mulx schedule test to use inline assembly.
I'm not sure we should always prefer MULX over MUL. So making the MULX guaranteed with inline assembly. llvm-svn: 348833
-rw-r--r--llvm/test/CodeGen/X86/bmi2-schedule.ll70
1 files changed, 27 insertions, 43 deletions
diff --git a/llvm/test/CodeGen/X86/bmi2-schedule.ll b/llvm/test/CodeGen/X86/bmi2-schedule.ll
index 5232e51ac73..f235e793ab9 100644
--- a/llvm/test/CodeGen/X86/bmi2-schedule.ll
+++ b/llvm/test/CodeGen/X86/bmi2-schedule.ll
@@ -158,72 +158,56 @@ define void @test_mulx_i32(i32 %a0, i32 %a1, i32* %a2) optsize {
ret void
}
-define i64 @test_mulx_i64(i64 %a0, i64 %a1, i64 *%a2) {
+define void @test_mulx_i64(i64 %a0, i64 %a1, i64 *%a2) {
; GENERIC-LABEL: test_mulx_i64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: movq %rdx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: movq %rdi, %rdx # sched: [1:0.33]
-; GENERIC-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00]
-; GENERIC-NEXT: mulxq (%rax), %rdx, %rax # sched: [9:1.00]
-; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT: #APP
+; GENERIC-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00]
+; GENERIC-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00]
+; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_mulx_i64:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: movq %rdx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: movq %rdi, %rdx # sched: [1:0.25]
-; HASWELL-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00]
-; HASWELL-NEXT: mulxq (%rax), %rdx, %rax # sched: [9:1.00]
-; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
+; HASWELL-NEXT: #APP
+; HASWELL-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00]
+; HASWELL-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00]
+; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_mulx_i64:
; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movq %rdx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: movq %rdi, %rdx # sched: [1:0.25]
-; BROADWELL-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00]
-; BROADWELL-NEXT: mulxq (%rax), %rdx, %rax # sched: [9:1.00]
-; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-NEXT: #APP
+; BROADWELL-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00]
+; BROADWELL-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00]
+; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_mulx_i64:
; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movq %rdx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: movq %rdi, %rdx # sched: [1:0.25]
-; SKYLAKE-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00]
-; SKYLAKE-NEXT: mulxq (%rax), %rdx, %rax # sched: [9:1.00]
-; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT: #APP
+; SKYLAKE-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00]
+; SKYLAKE-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00]
+; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_mulx_i64:
; KNL: # %bb.0:
-; KNL-NEXT: movq %rdx, %rax # sched: [1:0.25]
-; KNL-NEXT: movq %rdi, %rdx # sched: [1:0.25]
-; KNL-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00]
-; KNL-NEXT: mulxq (%rax), %rdx, %rax # sched: [9:1.00]
-; KNL-NEXT: orq %rcx, %rax # sched: [1:0.25]
+; KNL-NEXT: #APP
+; KNL-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00]
+; KNL-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00]
+; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_mulx_i64:
; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movq %rdx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: movq %rdi, %rdx # sched: [1:0.25]
-; ZNVER1-NEXT: mulxq %rsi, %rsi, %rcx # sched: [3:1.00]
-; ZNVER1-NEXT: mulxq (%rax), %rdx, %rax # sched: [8:1.00]
-; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-NEXT: #APP
+; ZNVER1-NEXT: mulxq %rsi, %rsi, %rdi # sched: [3:1.00]
+; ZNVER1-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [8:1.00]
+; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a2
- %2 = zext i64 %a0 to i128
- %3 = zext i64 %a1 to i128
- %4 = zext i64 %1 to i128
- %5 = mul i128 %2, %3
- %6 = mul i128 %2, %4
- %7 = lshr i128 %5, 64
- %8 = lshr i128 %6, 64
- %9 = trunc i128 %7 to i64
- %10 = trunc i128 %8 to i64
- %11 = or i64 %9, %10
- ret i64 %11
+ tail call void asm "mulx $1, $1, $0 \0A\09 mulx $2, $1, $0 ", "r,r,*m"(i64 %a0, i64 %a1, i64* %a2) nounwind
+ ret void
}
define i32 @test_pdep_i32(i32 %a0, i32 %a1, i32 *%a2) {
OpenPOWER on IntegriCloud