diff options
| author | Nemanja Ivanovic <nemanjai@ca.ibm.com> | 2019-11-25 09:32:28 -0600 |
|---|---|---|
| committer | Nemanja Ivanovic <nemanjai@ca.ibm.com> | 2019-11-25 11:41:34 -0600 |
| commit | 7fbaa8097ecc4309fec49db14fadac731ce53079 (patch) | |
| tree | f4ca18f5e419994788a996906120fb4c4f591ef5 /llvm/test | |
| parent | e85d2e4981b9db98798ce3e15078775eb50be854 (diff) | |
| download | bcm5719-llvm-7fbaa8097ecc4309fec49db14fadac731ce53079.tar.gz bcm5719-llvm-7fbaa8097ecc4309fec49db14fadac731ce53079.zip | |
[PowerPC] Fix VSX clobbers of CSR registers
If an inline asm statement clobbers a VSX register that overlaps with a
callee-saved Altivec register or FPR, we will not record the clobber and will
therefore violate the ABI. This is clearly a bug so this patch fixes it.
Differential revision: https://reviews.llvm.org/D68576
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/CSR-fit.ll | 38 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/inline-asm-vsx-clobbers.ll | 32 |
2 files changed, 63 insertions, 7 deletions
diff --git a/llvm/test/CodeGen/PowerPC/CSR-fit.ll b/llvm/test/CodeGen/PowerPC/CSR-fit.ll index 20438a45a25..49bfc79eb4c 100644 --- a/llvm/test/CodeGen/PowerPC/CSR-fit.ll +++ b/llvm/test/CodeGen/PowerPC/CSR-fit.ll @@ -178,16 +178,26 @@ define dso_local signext i32 @caller4(i32 signext %a, i32 signext %b) local_unna ; CHECK-PWR8: # %bb.0: # %entry ; CHECK-PWR8-NEXT: mflr r0 ; CHECK-PWR8-NEXT: std r0, 16(r1) -; CHECK-PWR8-NEXT: stdu r1, -32(r1) -; CHECK-PWR8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-PWR8-NEXT: stdu r1, -240(r1) +; CHECK-PWR8-NEXT: .cfi_def_cfa_offset 240 ; CHECK-PWR8-NEXT: .cfi_offset lr, 16 +; CHECK-PWR8-NEXT: .cfi_offset v20, -192 +; CHECK-PWR8-NEXT: .cfi_offset v21, -176 +; CHECK-PWR8-NEXT: li r5, 48 +; CHECK-PWR8-NEXT: stxvd2x v20, r1, r5 # 16-byte Folded Spill +; CHECK-PWR8-NEXT: li r5, 64 +; CHECK-PWR8-NEXT: stxvd2x v21, r1, r5 # 16-byte Folded Spill ; CHECK-PWR8-NEXT: #APP ; CHECK-PWR8-NEXT: add r3, r3, r4 ; CHECK-PWR8-NEXT: #NO_APP ; CHECK-PWR8-NEXT: extsw r3, r3 ; CHECK-PWR8-NEXT: bl callee ; CHECK-PWR8-NEXT: nop -; CHECK-PWR8-NEXT: addi r1, r1, 32 +; CHECK-PWR8-NEXT: li r4, 64 +; CHECK-PWR8-NEXT: lxvd2x v21, r1, r4 # 16-byte Folded Reload +; CHECK-PWR8-NEXT: li r4, 48 +; CHECK-PWR8-NEXT: lxvd2x v20, r1, r4 # 16-byte Folded Reload +; CHECK-PWR8-NEXT: addi r1, r1, 240 ; CHECK-PWR8-NEXT: ld r0, 16(r1) ; CHECK-PWR8-NEXT: mtlr r0 ; CHECK-PWR8-NEXT: blr @@ -196,16 +206,22 @@ define dso_local signext i32 @caller4(i32 signext %a, i32 signext %b) local_unna ; CHECK-PWR9: # %bb.0: # %entry ; CHECK-PWR9-NEXT: mflr r0 ; CHECK-PWR9-NEXT: std r0, 16(r1) -; CHECK-PWR9-NEXT: stdu r1, -32(r1) -; CHECK-PWR9-NEXT: .cfi_def_cfa_offset 32 +; CHECK-PWR9-NEXT: stdu r1, -224(r1) +; CHECK-PWR9-NEXT: .cfi_def_cfa_offset 224 ; CHECK-PWR9-NEXT: .cfi_offset lr, 16 +; CHECK-PWR9-NEXT: .cfi_offset v20, -192 +; CHECK-PWR9-NEXT: .cfi_offset v21, -176 +; CHECK-PWR9-NEXT: stxv v20, 32(r1) # 16-byte Folded Spill +; CHECK-PWR9-NEXT: stxv v21, 48(r1) # 16-byte Folded Spill ; CHECK-PWR9-NEXT: #APP ; CHECK-PWR9-NEXT: add r3, r3, r4 ; CHECK-PWR9-NEXT: #NO_APP ; CHECK-PWR9-NEXT: extsw r3, r3 ; CHECK-PWR9-NEXT: bl callee ; CHECK-PWR9-NEXT: nop -; CHECK-PWR9-NEXT: addi r1, r1, 32 +; CHECK-PWR9-NEXT: lxv v21, 48(r1) # 16-byte Folded Reload +; CHECK-PWR9-NEXT: lxv v20, 32(r1) # 16-byte Folded Reload +; CHECK-PWR9-NEXT: addi r1, r1, 224 ; CHECK-PWR9-NEXT: ld r0, 16(r1) ; CHECK-PWR9-NEXT: mtlr r0 ; CHECK-PWR9-NEXT: blr @@ -226,19 +242,24 @@ define dso_local signext i32 @caller_mixed(i32 signext %a, i32 signext %b) local ; CHECK-PWR8-NEXT: .cfi_offset r14, -288 ; CHECK-PWR8-NEXT: .cfi_offset f14, -144 ; CHECK-PWR8-NEXT: .cfi_offset v20, -480 +; CHECK-PWR8-NEXT: .cfi_offset v21, -464 ; CHECK-PWR8-NEXT: li r5, 48 ; CHECK-PWR8-NEXT: std r14, 240(r1) # 8-byte Folded Spill ; CHECK-PWR8-NEXT: stfd f14, 384(r1) # 8-byte Folded Spill ; CHECK-PWR8-NEXT: stxvd2x v20, r1, r5 # 16-byte Folded Spill +; CHECK-PWR8-NEXT: li r5, 64 +; CHECK-PWR8-NEXT: stxvd2x v21, r1, r5 # 16-byte Folded Spill ; CHECK-PWR8-NEXT: #APP ; CHECK-PWR8-NEXT: add r3, r3, r4 ; CHECK-PWR8-NEXT: #NO_APP ; CHECK-PWR8-NEXT: extsw r3, r3 ; CHECK-PWR8-NEXT: bl callee ; CHECK-PWR8-NEXT: nop -; CHECK-PWR8-NEXT: li r4, 48 +; CHECK-PWR8-NEXT: li r4, 64 ; CHECK-PWR8-NEXT: lfd f14, 384(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r14, 240(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: lxvd2x v21, r1, r4 # 16-byte Folded Reload +; CHECK-PWR8-NEXT: li r4, 48 ; CHECK-PWR8-NEXT: lxvd2x v20, r1, r4 # 16-byte Folded Reload ; CHECK-PWR8-NEXT: addi r1, r1, 528 ; CHECK-PWR8-NEXT: ld r0, 16(r1) @@ -255,15 +276,18 @@ define dso_local signext i32 @caller_mixed(i32 signext %a, i32 signext %b) local ; CHECK-PWR9-NEXT: .cfi_offset r14, -288 ; CHECK-PWR9-NEXT: .cfi_offset f14, -144 ; CHECK-PWR9-NEXT: .cfi_offset v20, -480 +; CHECK-PWR9-NEXT: .cfi_offset v21, -464 ; CHECK-PWR9-NEXT: std r14, 224(r1) # 8-byte Folded Spill ; CHECK-PWR9-NEXT: stfd f14, 368(r1) # 8-byte Folded Spill ; CHECK-PWR9-NEXT: stxv v20, 32(r1) # 16-byte Folded Spill +; CHECK-PWR9-NEXT: stxv v21, 48(r1) # 16-byte Folded Spill ; CHECK-PWR9-NEXT: #APP ; CHECK-PWR9-NEXT: add r3, r3, r4 ; CHECK-PWR9-NEXT: #NO_APP ; CHECK-PWR9-NEXT: extsw r3, r3 ; CHECK-PWR9-NEXT: bl callee ; CHECK-PWR9-NEXT: nop +; CHECK-PWR9-NEXT: lxv v21, 48(r1) # 16-byte Folded Reload ; CHECK-PWR9-NEXT: lxv v20, 32(r1) # 16-byte Folded Reload ; CHECK-PWR9-NEXT: lfd f14, 368(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: ld r14, 224(r1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/inline-asm-vsx-clobbers.ll b/llvm/test/CodeGen/PowerPC/inline-asm-vsx-clobbers.ll new file mode 100644 index 00000000000..3cf0ce781ce --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/inline-asm-vsx-clobbers.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s + +define dso_local void @clobberVR(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr { +; CHECK-LABEL: clobberVR: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stxv v22, -160(r1) # 16-byte Folded Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lxv v22, -160(r1) # 16-byte Folded Reload +; CHECK-NEXT: blr +entry: + tail call void asm sideeffect "nop", "~{vs54}"() + ret void +} + +define dso_local void @clobberFPR(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr { +; CHECK-LABEL: clobberFPR: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lfd f14, -144(r1) # 8-byte Folded Reload +; CHECK-NEXT: blr +entry: + tail call void asm sideeffect "nop", "~{vs14}"() + ret void +} |

