diff options
author | Craig Topper <craig.topper@intel.com> | 2018-12-11 07:41:06 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-12-11 07:41:06 +0000 |
commit | 4bd93fa5bbe91bafbac5a9f606375ddf629001c0 (patch) | |
tree | 20cbade90d4ab88e577111180ae00619eac990ce | |
parent | a64aa485e1568dfa7ef141d832535dc3797ee373 (diff) | |
download | bcm5719-llvm-4bd93fa5bbe91bafbac5a9f606375ddf629001c0.tar.gz bcm5719-llvm-4bd93fa5bbe91bafbac5a9f606375ddf629001c0.zip |
[X86] Switch the 64-bit mulx schedule test to use inline assembly.
I'm not sure we should always prefer MULX over MUL. So making the MULX guaranteed with inline assembly.
llvm-svn: 348833
-rw-r--r-- | llvm/test/CodeGen/X86/bmi2-schedule.ll | 70 |
1 files changed, 27 insertions, 43 deletions
diff --git a/llvm/test/CodeGen/X86/bmi2-schedule.ll b/llvm/test/CodeGen/X86/bmi2-schedule.ll index 5232e51ac73..f235e793ab9 100644 --- a/llvm/test/CodeGen/X86/bmi2-schedule.ll +++ b/llvm/test/CodeGen/X86/bmi2-schedule.ll @@ -158,72 +158,56 @@ define void @test_mulx_i32(i32 %a0, i32 %a1, i32* %a2) optsize { ret void } -define i64 @test_mulx_i64(i64 %a0, i64 %a1, i64 *%a2) { +define void @test_mulx_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: test_mulx_i64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movq %rdx, %rax # sched: [1:0.33] -; GENERIC-NEXT: movq %rdi, %rdx # sched: [1:0.33] -; GENERIC-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00] -; GENERIC-NEXT: mulxq (%rax), %rdx, %rax # sched: [9:1.00] -; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: #APP +; GENERIC-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00] +; GENERIC-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00] +; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mulx_i64: ; HASWELL: # %bb.0: -; HASWELL-NEXT: movq %rdx, %rax # sched: [1:0.25] -; HASWELL-NEXT: movq %rdi, %rdx # sched: [1:0.25] -; HASWELL-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00] -; HASWELL-NEXT: mulxq (%rax), %rdx, %rax # sched: [9:1.00] -; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] +; HASWELL-NEXT: #APP +; HASWELL-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00] +; HASWELL-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00] +; HASWELL-NEXT: #NO_APP ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_mulx_i64: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movq %rdx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: movq %rdi, %rdx # sched: [1:0.25] -; BROADWELL-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00] -; BROADWELL-NEXT: mulxq (%rax), %rdx, %rax # sched: [9:1.00] -; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: #APP +; BROADWELL-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00] +; BROADWELL-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00] +; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mulx_i64: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movq %rdx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: movq %rdi, %rdx # sched: [1:0.25] -; SKYLAKE-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00] -; SKYLAKE-NEXT: mulxq (%rax), %rdx, %rax # sched: [9:1.00] -; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: #APP +; SKYLAKE-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00] +; SKYLAKE-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00] +; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_mulx_i64: ; KNL: # %bb.0: -; KNL-NEXT: movq %rdx, %rax # sched: [1:0.25] -; KNL-NEXT: movq %rdi, %rdx # sched: [1:0.25] -; KNL-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00] -; KNL-NEXT: mulxq (%rax), %rdx, %rax # sched: [9:1.00] -; KNL-NEXT: orq %rcx, %rax # sched: [1:0.25] +; KNL-NEXT: #APP +; KNL-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00] +; KNL-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00] +; KNL-NEXT: #NO_APP ; KNL-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_mulx_i64: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movq %rdx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: movq %rdi, %rdx # sched: [1:0.25] -; ZNVER1-NEXT: mulxq %rsi, %rsi, %rcx # sched: [3:1.00] -; ZNVER1-NEXT: mulxq (%rax), %rdx, %rax # sched: [8:1.00] -; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25] +; ZNVER1-NEXT: #APP +; ZNVER1-NEXT: mulxq %rsi, %rsi, %rdi # sched: [3:1.00] +; ZNVER1-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [8:1.00] +; ZNVER1-NEXT: #NO_APP ; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a2 - %2 = zext i64 %a0 to i128 - %3 = zext i64 %a1 to i128 - %4 = zext i64 %1 to i128 - %5 = mul i128 %2, %3 - %6 = mul i128 %2, %4 - %7 = lshr i128 %5, 64 - %8 = lshr i128 %6, 64 - %9 = trunc i128 %7 to i64 - %10 = trunc i128 %8 to i64 - %11 = or i64 %9, %10 - ret i64 %11 + tail call void asm "mulx $1, $1, $0 \0A\09 mulx $2, $1, $0 ", "r,r,*m"(i64 %a0, i64 %a1, i64* %a2) nounwind + ret void } define i32 @test_pdep_i32(i32 %a0, i32 %a1, i32 *%a2) { |