diff options
Diffstat (limited to 'llvm/test')
31 files changed, 1834 insertions, 160 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-aapcs.ll b/llvm/test/CodeGen/AArch64/arm64-aapcs.ll index 79a25c1e3b6..7887facb9ac 100644 --- a/llvm/test/CodeGen/AArch64/arm64-aapcs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-aapcs.ll @@ -25,7 +25,7 @@ define [2 x i64] @test_i64x2_align(i32, [2 x i64] %arg, i32 %after) { @var64 = global i64 0, align 8 ; Check stack slots are 64-bit at all times. -define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short, +define void @test_stack_slots([8 x i64], i1 %bool, i8 %char, i16 %short, i32 %int, i64 %long) { ; CHECK-LABEL: test_stack_slots: ; CHECK-DAG: ldr w[[ext1:[0-9]+]], [sp, #24] diff --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll index 727c189721f..05f467e1934 100644 --- a/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll +++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll @@ -1,4 +1,5 @@ ; RUN: llc -o - %s -mtriple=arm64-apple-ios -O3 -aarch64-enable-collect-loh | FileCheck %s +; RUN: llc -o - %s -mtriple=arm64_32-apple-watchos -O3 -aarch64-enable-collect-loh | FileCheck %s ; Check that the LOH analysis does not crash when the analysed chained ; contains instructions that are filtered out. ; diff --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll index 773286ef1d7..962e36ddb61 100644 --- a/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll +++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll @@ -1,4 +1,5 @@ ; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s +; RUN: llc -o - %s -mtriple=arm64_32-apple-ios -O2 | FileCheck %s ; Test case for <rdar://problem/15942912>. ; AdrpAddStr cannot be used when the store uses same ; register as address and value. Indeed, the related diff --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll index eb3607dd437..816e5a7cc6f 100644 --- a/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll +++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll @@ -1,4 +1,5 @@ ; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s +; RUN: llc -o - %s -mtriple=arm64_32-apple-watchos -O2 | FileCheck %s ; RUN: llc -o - %s -mtriple=arm64-linux-gnu -O2 | FileCheck %s --check-prefix=CHECK-ELF ; CHECK-ELF-NOT: .loh @@ -60,9 +61,9 @@ if.end4: ; preds = %if.then2, %if.then, ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define i32 @getC() { @@ -76,9 +77,9 @@ define i32 @getC() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldrsw x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldrsw x0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define i64 @getSExtC() { @@ -94,10 +95,10 @@ define i64 @getSExtC() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] -; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], [x[[LDRGOT_REG]]] ; CHECK-NEXT: add [[ADD:w[0-9]+]], [[LOAD]], w0 -; CHECK-NEXT: str [[ADD]], {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str [[ADD]], [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]] define void @getSeveralC(i32 %t) { @@ -114,9 +115,9 @@ entry: ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: str w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define void @setC(i32 %t) { @@ -142,7 +143,7 @@ entry: ; CHECK-NEXT: ret ; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] define i32 @getInternalCPlus4() { - %addr = getelementptr i32, i32* @InternalC, i32 4 + %addr = getelementptr inbounds i32, i32* @InternalC, i32 4 %res = load i32, i32* %addr, align 4 ret i32 %res } @@ -159,7 +160,7 @@ define i32 @getInternalCPlus4() { ; CHECK-NEXT: ret ; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] define i64 @getSExtInternalCPlus4() { - %addr = getelementptr i32, i32* @InternalC, i32 4 + %addr = getelementptr inbounds i32, i32* @InternalC, i32 4 %res = load i32, i32* %addr, align 4 %sextres = sext i32 %res to i64 ret i64 %sextres @@ -180,7 +181,7 @@ define i64 @getSExtInternalCPlus4() { ; CHECK: .loh AdrpAdd [[ADRP_LABEL]], [[ADDGOT_LABEL]] define void @getSeveralInternalCPlus4(i32 %t) { entry: - %addr = getelementptr i32, i32* @InternalC, i32 4 + %addr = getelementptr inbounds i32, i32* @InternalC, i32 4 %tmp = load i32, i32* %addr, align 4 %add = add nsw i32 %tmp, %t store i32 %add, i32* %addr, align 4 @@ -200,7 +201,7 @@ entry: ; CHECK: .loh AdrpAddStr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] define void @setInternalCPlus4(i32 %t) { entry: - %addr = getelementptr i32, i32* @InternalC, i32 4 + %addr = getelementptr inbounds i32, i32* @InternalC, i32 4 store i32 %t, i32* %addr, align 4 ret void } @@ -276,8 +277,8 @@ entry: ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] -; CHECK-NEXT: ldrb w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] +; CHECK-NEXT: ldrb w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]] define i8 @getD() { @@ -289,9 +290,9 @@ define i8 @getD() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: strb w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: strb w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setD(i8 %t) { @@ -305,9 +306,9 @@ define void @setD(i8 %t) { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldrsb w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldrsb w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define i32 @getSExtD() { @@ -322,9 +323,9 @@ define i32 @getSExtD() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldrsb x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldrsb x0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define i64 @getSExt64D() { @@ -341,8 +342,8 @@ define i64 @getSExt64D() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] -; CHECK-NEXT: ldrh w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] +; CHECK-NEXT: ldrh w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]] define i16 @getE() { @@ -356,9 +357,9 @@ define i16 @getE() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldrsh w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldrsh w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define i32 @getSExtE() { @@ -371,9 +372,9 @@ define i32 @getSExtE() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: strh w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: strh w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setE(i16 %t) { @@ -387,9 +388,9 @@ define void @setE(i16 %t) { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldrsh x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldrsh x0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define i64 @getSExt64E() { @@ -406,9 +407,9 @@ define i64 @getSExt64E() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _F@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr x0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define i64 @getF() { @@ -420,9 +421,9 @@ define i64 @getF() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _F@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: str x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str x0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setF(i64 %t) { @@ -438,9 +439,9 @@ define void @setF(i64 %t) { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _G@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr s0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr s0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define float @getG() { @@ -452,9 +453,9 @@ define float @getG() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _G@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: str s0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str s0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setG(float %t) { @@ -470,9 +471,9 @@ define void @setG(float %t) { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _H@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr h0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr h0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define half @getH() { @@ -484,9 +485,9 @@ define half @getH() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _H@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: str h0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str h0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setH(half %t) { @@ -502,9 +503,9 @@ define void @setH(half %t) { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _I@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr d0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr d0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define double @getI() { @@ -516,9 +517,9 @@ define double @getI() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _I@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: str d0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str d0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setI(double %t) { @@ -534,9 +535,9 @@ define void @setI(double %t) { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _J@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr d0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr d0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define <2 x i32> @getJ() { @@ -548,9 +549,9 @@ define <2 x i32> @getJ() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _J@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: str d0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str d0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setJ(<2 x i32> %t) { @@ -566,9 +567,9 @@ define void @setJ(<2 x i32> %t) { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _K@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr q0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr q0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define <4 x i32> @getK() { @@ -580,9 +581,9 @@ define <4 x i32> @getK() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _K@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: str q0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str q0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setK(<4 x i32> %t) { @@ -598,9 +599,9 @@ define void @setK(<4 x i32> %t) { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr b0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr b0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define <1 x i8> @getL() { @@ -612,11 +613,11 @@ define <1 x i8> @getL() { ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF] ; CHECK-NEXT: ; kill ; Ultimately we should generate str b0, but right now, we match the vector ; variant which does not allow to fold the immediate into the store. -; CHECK-NEXT: st1.b { v0 }[0], {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: st1.b { v0 }[0], [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]] define void @setL(<1 x i8> %t) { diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll index 7dcd6e25ae1..018a1143fc3 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-redzone | FileCheck %s +; RUN: llc < %s -mtriple=arm64_32-apple-ios -aarch64-redzone | FileCheck %s define i64* @store64(i64* %ptr, i64 %index, i64 %spacing) { ; CHECK-LABEL: store64: diff --git a/llvm/test/CodeGen/AArch64/arm64-stacksave.ll b/llvm/test/CodeGen/AArch64/arm64-stacksave.ll index a79e99ba323..13d4ae23db6 100644 --- a/llvm/test/CodeGen/AArch64/arm64-stacksave.ll +++ b/llvm/test/CodeGen/AArch64/arm64-stacksave.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -verify-coalescing +; RUN: llc -mtriple=arm64-apple-macosx10.8.0 < %s -verify-coalescing +; RUN: llc -mtriple=arm64_32-apple-ios9.0 < %s -verify-coalescing ; <rdar://problem/11522048> -target triple = "arm64-apple-macosx10.8.0" ; Verify that we can handle spilling the stack pointer without attempting ; spilling it directly. diff --git a/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll b/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll new file mode 100644 index 00000000000..5995de2942e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll @@ -0,0 +1,44 @@ +; RUN: llc -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s + +; If %base < 96 then the sum will not wrap (in an unsigned sense), but "ldr w0, +; [x0, #-96]" would. +define i32 @test_valid_wrap(i32 %base) { +; CHECK-LABEL: test_valid_wrap: +; CHECK: sub w[[ADDR:[0-9]+]], w0, #96 +; CHECK: ldr w0, [x[[ADDR]]] + + %newaddr = add nuw i32 %base, -96 + %ptr = inttoptr i32 %newaddr to i32* + %val = load i32, i32* %ptr + ret i32 %val +} + +define i8 @test_valid_wrap_optimizable(i8* %base) { +; CHECK-LABEL: test_valid_wrap_optimizable: +; CHECK: ldurb w0, [x0, #-96] + + %newaddr = getelementptr inbounds i8, i8* %base, i32 -96 + %val = load i8, i8* %newaddr + ret i8 %val +} + +define i8 @test_valid_wrap_optimizable1(i8* %base, i32 %offset) { +; CHECK-LABEL: test_valid_wrap_optimizable1: +; CHECK: ldrb w0, [x0, w1, sxtw] + + %newaddr = getelementptr inbounds i8, i8* %base, i32 %offset + %val = load i8, i8* %newaddr + ret i8 %val +} + +; +define i8 @test_valid_wrap_optimizable2(i8* %base, i32 %offset) { +; CHECK-LABEL: test_valid_wrap_optimizable2: +; CHECK: sxtw x[[OFFSET:[0-9]+]], w1 +; CHECK: mov w[[BASE:[0-9]+]], #-100 +; CHECK: ldrb w0, [x[[OFFSET]], x[[BASE]]] + + %newaddr = getelementptr inbounds i8, i8* inttoptr(i32 -100 to i8*), i32 %offset + %val = load i8, i8* %newaddr + ret i8 %val +} diff --git a/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll b/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll new file mode 100644 index 00000000000..34682e82f3d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll @@ -0,0 +1,261 @@ +; RUN: llc -mtriple=arm64_32-apple-ios7.0 -o - %s | FileCheck %s + +define i8 @test_load_8(i8* %addr) { +; CHECK-LABAL: test_load_8: +; CHECK: ldarb w0, [x0] + %val = load atomic i8, i8* %addr seq_cst, align 1 + ret i8 %val +} + +define i16 @test_load_16(i16* %addr) { +; CHECK-LABAL: test_load_16: +; CHECK: ldarh w0, [x0] + %val = load atomic i16, i16* %addr acquire, align 2 + ret i16 %val +} + +define i32 @test_load_32(i32* %addr) { +; CHECK-LABAL: test_load_32: +; CHECK: ldar w0, [x0] + %val = load atomic i32, i32* %addr seq_cst, align 4 + ret i32 %val +} + +define i64 @test_load_64(i64* %addr) { +; CHECK-LABAL: test_load_64: +; CHECK: ldar x0, [x0] + %val = load atomic i64, i64* %addr seq_cst, align 8 + ret i64 %val +} + +define i8* @test_load_ptr(i8** %addr) { +; CHECK-LABAL: test_load_ptr: +; CHECK: ldar w0, [x0] + %val = load atomic i8*, i8** %addr seq_cst, align 8 + ret i8* %val +} + +define void @test_store_8(i8* %addr) { +; CHECK-LABAL: test_store_8: +; CHECK: stlrb wzr, [x0] + store atomic i8 0, i8* %addr seq_cst, align 1 + ret void +} + +define void @test_store_16(i16* %addr) { +; CHECK-LABAL: test_store_16: +; CHECK: stlrh wzr, [x0] + store atomic i16 0, i16* %addr seq_cst, align 2 + ret void +} + +define void @test_store_32(i32* %addr) { +; CHECK-LABAL: test_store_32: +; CHECK: stlr wzr, [x0] + store atomic i32 0, i32* %addr seq_cst, align 4 + ret void +} + +define void @test_store_64(i64* %addr) { +; CHECK-LABAL: test_store_64: +; CHECK: stlr xzr, [x0] + store atomic i64 0, i64* %addr seq_cst, align 8 + ret void +} + +define void @test_store_ptr(i8** %addr) { +; CHECK-LABAL: test_store_ptr: +; CHECK: stlr wzr, [x0] + store atomic i8* null, i8** %addr seq_cst, align 8 + ret void +} + +declare i64 @llvm.aarch64.ldxr.p0i8(i8* %addr) +declare i64 @llvm.aarch64.ldxr.p0i16(i16* %addr) +declare i64 @llvm.aarch64.ldxr.p0i32(i32* %addr) +declare i64 @llvm.aarch64.ldxr.p0i64(i64* %addr) + +define i8 @test_ldxr_8(i8* %addr) { +; CHECK-LABEL: test_ldxr_8: +; CHECK: ldxrb w0, [x0] + + %val = call i64 @llvm.aarch64.ldxr.p0i8(i8* %addr) + %val8 = trunc i64 %val to i8 + ret i8 %val8 +} + +define i16 @test_ldxr_16(i16* %addr) { +; CHECK-LABEL: test_ldxr_16: +; CHECK: ldxrh w0, [x0] + + %val = call i64 @llvm.aarch64.ldxr.p0i16(i16* %addr) + %val16 = trunc i64 %val to i16 + ret i16 %val16 +} + +define i32 @test_ldxr_32(i32* %addr) { +; CHECK-LABEL: test_ldxr_32: +; CHECK: ldxr w0, [x0] + + %val = call i64 @llvm.aarch64.ldxr.p0i32(i32* %addr) + %val32 = trunc i64 %val to i32 + ret i32 %val32 +} + +define i64 @test_ldxr_64(i64* %addr) { +; CHECK-LABEL: test_ldxr_64: +; CHECK: ldxr x0, [x0] + + %val = call i64 @llvm.aarch64.ldxr.p0i64(i64* %addr) + ret i64 %val +} + +declare i64 @llvm.aarch64.ldaxr.p0i8(i8* %addr) +declare i64 @llvm.aarch64.ldaxr.p0i16(i16* %addr) +declare i64 @llvm.aarch64.ldaxr.p0i32(i32* %addr) +declare i64 @llvm.aarch64.ldaxr.p0i64(i64* %addr) + +define i8 @test_ldaxr_8(i8* %addr) { +; CHECK-LABEL: test_ldaxr_8: +; CHECK: ldaxrb w0, [x0] + + %val = call i64 @llvm.aarch64.ldaxr.p0i8(i8* %addr) + %val8 = trunc i64 %val to i8 + ret i8 %val8 +} + +define i16 @test_ldaxr_16(i16* %addr) { +; CHECK-LABEL: test_ldaxr_16: +; CHECK: ldaxrh w0, [x0] + + %val = call i64 @llvm.aarch64.ldaxr.p0i16(i16* %addr) + %val16 = trunc i64 %val to i16 + ret i16 %val16 +} + +define i32 @test_ldaxr_32(i32* %addr) { +; CHECK-LABEL: test_ldaxr_32: +; CHECK: ldaxr w0, [x0] + + %val = call i64 @llvm.aarch64.ldaxr.p0i32(i32* %addr) + %val32 = trunc i64 %val to i32 + ret i32 %val32 +} + +define i64 @test_ldaxr_64(i64* %addr) { +; CHECK-LABEL: test_ldaxr_64: +; CHECK: ldaxr x0, [x0] + + %val = call i64 @llvm.aarch64.ldaxr.p0i64(i64* %addr) + ret i64 %val +} + +declare i32 @llvm.aarch64.stxr.p0i8(i64, i8*) +declare i32 @llvm.aarch64.stxr.p0i16(i64, i16*) +declare i32 @llvm.aarch64.stxr.p0i32(i64, i32*) +declare i32 @llvm.aarch64.stxr.p0i64(i64, i64*) + +define i32 @test_stxr_8(i8* %addr, i8 %val) { +; CHECK-LABEL: test_stxr_8: +; CHECK: stxrb [[TMP:w[0-9]+]], w1, [x0] +; CHECK: mov w0, [[TMP]] + + %extval = zext i8 %val to i64 + %success = call i32 @llvm.aarch64.stxr.p0i8(i64 %extval, i8* %addr) + ret i32 %success +} + +define i32 @test_stxr_16(i16* %addr, i16 %val) { +; CHECK-LABEL: test_stxr_16: +; CHECK: stxrh [[TMP:w[0-9]+]], w1, [x0] +; CHECK: mov w0, [[TMP]] + + %extval = zext i16 %val to i64 + %success = call i32 @llvm.aarch64.stxr.p0i16(i64 %extval, i16* %addr) + ret i32 %success +} + +define i32 @test_stxr_32(i32* %addr, i32 %val) { +; CHECK-LABEL: test_stxr_32: +; CHECK: stxr [[TMP:w[0-9]+]], w1, [x0] +; CHECK: mov w0, [[TMP]] + + %extval = zext i32 %val to i64 + %success = call i32 @llvm.aarch64.stxr.p0i32(i64 %extval, i32* %addr) + ret i32 %success +} + +define i32 @test_stxr_64(i64* %addr, i64 %val) { +; CHECK-LABEL: test_stxr_64: +; CHECK: stxr [[TMP:w[0-9]+]], x1, [x0] +; CHECK: mov w0, [[TMP]] + + %success = call i32 @llvm.aarch64.stxr.p0i64(i64 %val, i64* %addr) + ret i32 %success +} + +declare i32 @llvm.aarch64.stlxr.p0i8(i64, i8*) +declare i32 @llvm.aarch64.stlxr.p0i16(i64, i16*) +declare i32 @llvm.aarch64.stlxr.p0i32(i64, i32*) +declare i32 @llvm.aarch64.stlxr.p0i64(i64, i64*) + +define i32 @test_stlxr_8(i8* %addr, i8 %val) { +; CHECK-LABEL: test_stlxr_8: +; CHECK: stlxrb [[TMP:w[0-9]+]], w1, [x0] +; CHECK: mov w0, [[TMP]] + + %extval = zext i8 %val to i64 + %success = call i32 @llvm.aarch64.stlxr.p0i8(i64 %extval, i8* %addr) + ret i32 %success +} + +define i32 @test_stlxr_16(i16* %addr, i16 %val) { +; CHECK-LABEL: test_stlxr_16: +; CHECK: stlxrh [[TMP:w[0-9]+]], w1, [x0] +; CHECK: mov w0, [[TMP]] + + %extval = zext i16 %val to i64 + %success = call i32 @llvm.aarch64.stlxr.p0i16(i64 %extval, i16* %addr) + ret i32 %success +} + +define i32 @test_stlxr_32(i32* %addr, i32 %val) { +; CHECK-LABEL: test_stlxr_32: +; CHECK: stlxr [[TMP:w[0-9]+]], w1, [x0] +; CHECK: mov w0, [[TMP]] + + %extval = zext i32 %val to i64 + %success = call i32 @llvm.aarch64.stlxr.p0i32(i64 %extval, i32* %addr) + ret i32 %success +} + +define i32 @test_stlxr_64(i64* %addr, i64 %val) { +; CHECK-LABEL: test_stlxr_64: +; CHECK: stlxr [[TMP:w[0-9]+]], x1, [x0] +; CHECK: mov w0, [[TMP]] + + %success = call i32 @llvm.aarch64.stlxr.p0i64(i64 %val, i64* %addr) + ret i32 %success +} + +define {i8*, i1} @test_cmpxchg_ptr(i8** %addr, i8* %cmp, i8* %new) { +; CHECK-LABEL: test_cmpxchg_ptr: +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr [[OLD:w[0-9]+]], [x0] +; CHECK: cmp [[OLD]], w1 +; CHECK: b.ne [[DONE:LBB[0-9]+_[0-9]+]] +; CHECK: stlxr [[SUCCESS:w[0-9]+]], w2, [x0] +; CHECK: cbnz [[SUCCESS]], [[LOOP]] + +; CHECK: mov w1, #1 +; CHECK: mov w0, [[OLD]] +; CHECK: ret + +; CHECK: [[DONE]]: +; CHECK: clrex +; CHECK: mov w1, wzr +; CHECK: mov w0, [[OLD]] +; CHECK: ret + %res = cmpxchg i8** %addr, i8* %cmp, i8* %new acq_rel acquire + ret {i8*, i1} %res +} diff --git a/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll b/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll new file mode 100644 index 00000000000..15baad215a1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=arm64_32-apple-ios -O0 -fast-isel %s -o - | FileCheck %s +@var = global i8* null + +define void @test_store_release_ptr() { +; CHECK-LABEL: test_store_release_ptr +; CHECK: mov [[ZERO:w[0-9]+]], wzr +; CHECK: stlr [[ZERO]] + store atomic i8* null, i8** @var release, align 4 + br label %next + +next: + ret void +} + +declare [2 x i32] @callee() + +define void @test_struct_return(i32* %addr) { +; CHECK-LABEL: test_struct_return: +; CHECK: bl _callee +; CHECK-DAG: lsr [[HI:x[0-9]+]], x0, #32 +; CHECK-DAG: str w0 + %res = call [2 x i32] @callee() + %res.0 = extractvalue [2 x i32] %res, 0 + store i32 %res.0, i32* %addr + %res.1 = extractvalue [2 x i32] %res, 1 + store i32 %res.1, i32* %addr + ret void +} diff --git a/llvm/test/CodeGen/AArch64/arm64_32-frame-pointers.ll b/llvm/test/CodeGen/AArch64/arm64_32-frame-pointers.ll new file mode 100644 index 00000000000..34f5d9b3160 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64_32-frame-pointers.ll @@ -0,0 +1,26 @@ +; RUN: llc -mtriple=arm64_32-apple-ios8.0 %s -o - | FileCheck %s + +; We're provoking LocalStackSlotAllocation to create some shared frame bases +; here: it wants multiple <fi#N> using instructions that can be satisfied by a +; single base, but not within the addressing-mode. +; +; When that happens it's important that we don't mix our pointer sizes +; (e.g. try to create an ldr from a w-register base). +define i8 @test_register_wrangling() { +; CHECK-LABEL: test_register_wrangling: +; CHECK: add [[TMP:x[0-9]+]], sp, +; CHECK: add x[[BASE:[0-9]+]], [[TMP]], +; CHECK: ldrb {{w[0-9]+}}, [x[[BASE]], #1] +; CHECK: ldrb {{w[0-9]+}}, [x[[BASE]]] + + %var1 = alloca i8, i32 4100 + %var3 = alloca i8 + %dummy = alloca i8, i32 4100 + + %var1p1 = getelementptr i8, i8* %var1, i32 1 + %val1 = load i8, i8* %var1 + %val2 = load i8, i8* %var3 + + %sum = add i8 %val1, %val2 + ret i8 %sum +} diff --git a/llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll b/llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll new file mode 100644 index 00000000000..21c49d38877 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll @@ -0,0 +1,61 @@ +; RUN: opt -codegenprepare -mtriple=arm64_32-apple-ios %s -S -o - | FileCheck %s + +define void @test_simple_sink(i1* %base, i64 %offset) { +; CHECK-LABEL: @test_simple_sink +; CHECK: next: +; CHECK: [[BASE8:%.*]] = bitcast i1* %base to i8* +; CHECK: [[ADDR8:%.*]] = getelementptr i8, i8* [[BASE8]], i64 %offset +; CHECK: [[ADDR:%.*]] = bitcast i8* [[ADDR8]] to i1* +; CHECK: load volatile i1, i1* [[ADDR]] + %addr = getelementptr i1, i1* %base, i64 %offset + %tst = load i1, i1* %addr + br i1 %tst, label %next, label %end + +next: + load volatile i1, i1* %addr + ret void + +end: + ret void +} + +define void @test_inbounds_sink(i1* %base, i64 %offset) { +; CHECK-LABEL: @test_inbounds_sink +; CHECK: next: +; CHECK: [[BASE8:%.*]] = bitcast i1* %base to i8* +; CHECK: [[ADDR8:%.*]] = getelementptr inbounds i8, i8* [[BASE8]], i64 %offset +; CHECK: [[ADDR:%.*]] = bitcast i8* [[ADDR8]] to i1* +; CHECK: load volatile i1, i1* [[ADDR]] + %addr = getelementptr inbounds i1, i1* %base, i64 %offset + %tst = load i1, i1* %addr + br i1 %tst, label %next, label %end + +next: + load volatile i1, i1* %addr + ret void + +end: + ret void +} + +; No address derived via an add can be guaranteed inbounds +define void @test_add_sink(i1* %base, i64 %offset) { +; CHECK-LABEL: @test_add_sink +; CHECK: next: +; CHECK: [[BASE8:%.*]] = bitcast i1* %base to i8* +; CHECK: [[ADDR8:%.*]] = getelementptr i8, i8* [[BASE8]], i64 %offset +; CHECK: [[ADDR:%.*]] = bitcast i8* [[ADDR8]] to i1* +; CHECK: load volatile i1, i1* [[ADDR]] + %base64 = ptrtoint i1* %base to i64 + %addr64 = add nsw nuw i64 %base64, %offset + %addr = inttoptr i64 %addr64 to i1* + %tst = load i1, i1* %addr + br i1 %tst, label %next, label %end + +next: + load volatile i1, i1* %addr + ret void + +end: + ret void +} diff --git a/llvm/test/CodeGen/AArch64/arm64_32-memcpy.ll b/llvm/test/CodeGen/AArch64/arm64_32-memcpy.ll new file mode 100644 index 00000000000..f484a2fe651 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64_32-memcpy.ll @@ -0,0 +1,66 @@ +; RUN: llc -mtriple=arm64_32-apple-ios9.0 -o - %s | FileCheck %s + +define i64 @test_memcpy(i64* %addr, i8* %src, i1 %tst) minsize { +; CHECK-LABEL: test_memcpy: +; CHECK: ldr [[VAL64:x[0-9]+]], [x0] +; [...] +; CHECK: and x0, [[VAL64]], #0xffffffff +; CHECK: bl _memcpy + + %val64 = load i64, i64* %addr + br i1 %tst, label %true, label %false + +true: + ret i64 %val64 + +false: + %val32 = trunc i64 %val64 to i32 + %val.ptr = inttoptr i32 %val32 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %val.ptr, i8* %src, i32 128, i32 0, i1 1) + ret i64 undef +} + +define i64 @test_memmove(i64* %addr, i8* %src, i1 %tst) minsize { +; CHECK-LABEL: test_memmove: +; CHECK: ldr [[VAL64:x[0-9]+]], [x0] +; [...] +; CHECK: and x0, [[VAL64]], #0xffffffff +; CHECK: bl _memmove + + %val64 = load i64, i64* %addr + br i1 %tst, label %true, label %false + +true: + ret i64 %val64 + +false: + %val32 = trunc i64 %val64 to i32 + %val.ptr = inttoptr i32 %val32 to i8* + call void @llvm.memmove.p0i8.p0i8.i32(i8* %val.ptr, i8* %src, i32 128, i32 0, i1 1) + ret i64 undef +} + +define i64 @test_memset(i64* %addr, i8* %src, i1 %tst) minsize { +; CHECK-LABEL: test_memset: +; CHECK: ldr [[VAL64:x[0-9]+]], [x0] +; [...] +; CHECK: and x0, [[VAL64]], #0xffffffff +; CHECK: bl _memset + + %val64 = load i64, i64* %addr + br i1 %tst, label %true, label %false + +true: + ret i64 %val64 + +false: + %val32 = trunc i64 %val64 to i32 + %val.ptr = inttoptr i32 %val32 to i8* + call void @llvm.memset.p0i8.i32(i8* %val.ptr, i8 42, i32 256, i32 0, i1 1) + ret i64 undef +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) + diff --git a/llvm/test/CodeGen/AArch64/arm64_32-neon.ll b/llvm/test/CodeGen/AArch64/arm64_32-neon.ll new file mode 100644 index 00000000000..9a1ecb2bc16 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64_32-neon.ll @@ -0,0 +1,198 @@ +; RUN: llc -mtriple=arm64_32-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck %s + +define <2 x double> @test_insert_elt(<2 x double> %vec, double %val) { +; CHECK-LABEL: test_insert_elt: +; CHECK: mov.d v0[0], v1[0] + %res = insertelement <2 x double> %vec, double %val, i32 0 + ret <2 x double> %res +} + +define void @test_split_16B(<4 x float> %val, <4 x float>* %addr) { +; CHECK-LABEL: test_split_16B: +; CHECK: str q0, [x0] + store <4 x float> %val, <4 x float>* %addr, align 8 + ret void +} + +define void @test_split_16B_splat(<4 x i32>, <4 x i32>* %addr) { +; CHECK-LABEL: test_split_16B_splat: +; CHECK: str {{q[0-9]+}} + + %vec.tmp0 = insertelement <4 x i32> undef, i32 42, i32 0 + %vec.tmp1 = insertelement <4 x i32> %vec.tmp0, i32 42, i32 1 + %vec.tmp2 = insertelement <4 x i32> %vec.tmp1, i32 42, i32 2 + %vec = insertelement <4 x i32> %vec.tmp2, i32 42, i32 3 + + store <4 x i32> %vec, <4 x i32>* %addr, align 8 + ret void +} + + +%vec = type <2 x double> + +declare {%vec, %vec} @llvm.aarch64.neon.ld2r.v2f64.p0i8(i8*) +define {%vec, %vec} @test_neon_load(i8* %addr) { +; CHECK-LABEL: test_neon_load: +; CHECK: ld2r.2d { v0, v1 }, [x0] + %res = call {%vec, %vec} @llvm.aarch64.neon.ld2r.v2f64.p0i8(i8* %addr) + ret {%vec, %vec} %res +} + +declare {%vec, %vec} @llvm.aarch64.neon.ld2lane.v2f64.p0i8(%vec, %vec, i64, i8*) +define {%vec, %vec} @test_neon_load_lane(i8* %addr, %vec %in1, %vec %in2) { +; CHECK-LABEL: test_neon_load_lane: +; CHECK: ld2.d { v0, v1 }[0], [x0] + %res = call {%vec, %vec} @llvm.aarch64.neon.ld2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 0, i8* %addr) + ret {%vec, %vec} %res +} + +declare void @llvm.aarch64.neon.st2.v2f64.p0i8(%vec, %vec, i8*) +define void @test_neon_store(i8* %addr, %vec %in1, %vec %in2) { +; CHECK-LABEL: test_neon_store: +; CHECK: st2.2d { v0, v1 }, [x0] + call void @llvm.aarch64.neon.st2.v2f64.p0i8(%vec %in1, %vec %in2, i8* %addr) + ret void +} + +declare void @llvm.aarch64.neon.st2lane.v2f64.p0i8(%vec, %vec, i64, i8*) +define void @test_neon_store_lane(i8* %addr, %vec %in1, %vec %in2) { +; CHECK-LABEL: test_neon_store_lane: +; CHECK: st2.d { v0, v1 }[1], [x0] + call void @llvm.aarch64.neon.st2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 1, i8* %addr) + ret void +} + +declare {%vec, %vec} @llvm.aarch64.neon.ld2.v2f64.p0i8(i8*) +define {{%vec, %vec}, i8*} @test_neon_load_post(i8* %addr, i32 %offset) { +; CHECK-LABEL: test_neon_load_post: +; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1 +; CHECK: ld2.2d { v0, v1 }, [x0], [[OFFSET]] + + %vecs = call {%vec, %vec} @llvm.aarch64.neon.ld2.v2f64.p0i8(i8* %addr) + + %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset + + %res.tmp = insertvalue {{%vec, %vec}, i8*} undef, {%vec, %vec} %vecs, 0 + %res = insertvalue {{%vec, %vec}, i8*} %res.tmp, i8* %addr.new, 1 + ret {{%vec, %vec}, i8*} %res +} + +define {{%vec, %vec}, i8*} @test_neon_load_post_lane(i8* %addr, i32 %offset, %vec %in1, %vec %in2) { +; CHECK-LABEL: test_neon_load_post_lane: +; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1 +; CHECK: ld2.d { v0, v1 }[1], [x0], [[OFFSET]] + + %vecs = call {%vec, %vec} @llvm.aarch64.neon.ld2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 1, i8* %addr) + + %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset + + %res.tmp = insertvalue {{%vec, %vec}, i8*} undef, {%vec, %vec} %vecs, 0 + %res = insertvalue {{%vec, %vec}, i8*} %res.tmp, i8* %addr.new, 1 + ret {{%vec, %vec}, i8*} %res +} + +define i8* @test_neon_store_post(i8* %addr, i32 %offset, %vec %in1, %vec %in2) { +; CHECK-LABEL: test_neon_store_post: +; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1 +; CHECK: st2.2d { v0, v1 }, [x0], [[OFFSET]] + + call void @llvm.aarch64.neon.st2.v2f64.p0i8(%vec %in1, %vec %in2, i8* %addr) + + %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset + + ret i8* %addr.new +} + +define i8* @test_neon_store_post_lane(i8* %addr, i32 %offset, %vec %in1, %vec %in2) { +; CHECK-LABEL: test_neon_store_post_lane: +; CHECK: sxtw [[OFFSET:x[0-9]+]], w1 +; CHECK: st2.d { v0, v1 }[0], [x0], [[OFFSET]] + + call void @llvm.aarch64.neon.st2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 0, i8* %addr) + + %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset + + ret i8* %addr.new +} + +; ld1 is slightly different because it goes via ISelLowering of normal IR ops +; rather than an intrinsic. +define {%vec, double*} @test_neon_ld1_post_lane(double* %addr, i32 %offset, %vec %in) { +; CHECK-LABEL: test_neon_ld1_post_lane: +; CHECK: sbfiz [[OFFSET:x[0-9]+]], x1, #3, #32 +; CHECK: ld1.d { v0 }[0], [x0], [[OFFSET]] + + %loaded = load double, double* %addr, align 8 + %newvec = insertelement %vec %in, double %loaded, i32 0 + + %addr.new = getelementptr inbounds double, double* %addr, i32 %offset + + %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0 + %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1 + + ret {%vec, double*} %res +} + +define {{%vec, %vec}, i8*} @test_neon_load_post_exact(i8* %addr) { +; CHECK-LABEL: test_neon_load_post_exact: +; CHECK: ld2.2d { v0, v1 }, [x0], #32 + + %vecs = call {%vec, %vec} @llvm.aarch64.neon.ld2.v2f64.p0i8(i8* %addr) + + %addr.new = getelementptr inbounds i8, i8* %addr, i32 32 + + %res.tmp = insertvalue {{%vec, %vec}, i8*} undef, {%vec, %vec} %vecs, 0 + %res = insertvalue {{%vec, %vec}, i8*} %res.tmp, i8* %addr.new, 1 + ret {{%vec, %vec}, i8*} %res +} + +define {%vec, double*} @test_neon_ld1_post_lane_exact(double* %addr, %vec %in) { +; CHECK-LABEL: test_neon_ld1_post_lane_exact: +; CHECK: ld1.d { v0 }[0], [x0], #8 + + %loaded = load double, double* %addr, align 8 + %newvec = insertelement %vec %in, double %loaded, i32 0 + + %addr.new = getelementptr inbounds double, double* %addr, i32 1 + + %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0 + %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1 + + ret {%vec, double*} %res +} + +; As in the general load/store case, this GEP has defined semantics when the +; address wraps. We cannot use post-indexed addressing. +define {%vec, double*} @test_neon_ld1_notpost_lane_exact(double* %addr, %vec %in) { +; CHECK-LABEL: test_neon_ld1_notpost_lane_exact: +; CHECK-NOT: ld1.d { {{v[0-9]+}} }[0], [{{x[0-9]+|sp}}], #8 +; CHECK: add w0, w0, #8 +; CHECK: ret + + %loaded = load double, double* %addr, align 8 + %newvec = insertelement %vec %in, double %loaded, i32 0 + + %addr.new = getelementptr double, double* %addr, i32 1 + + %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0 + %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1 + + ret {%vec, double*} %res +} + +define {%vec, double*} @test_neon_ld1_notpost_lane(double* %addr, i32 %offset, %vec %in) { +; CHECK-LABEL: test_neon_ld1_notpost_lane: +; CHECK-NOT: ld1.d { {{v[0-9]+}} }[0], [{{x[0-9]+|sp}}], {{x[0-9]+|sp}} +; CHECK: add w0, w0, w1, lsl #3 +; CHECK: ret + + %loaded = load double, double* %addr, align 8 + %newvec = insertelement %vec %in, double %loaded, i32 0 + + %addr.new = getelementptr double, double* %addr, i32 %offset + + %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0 + %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1 + + ret {%vec, double*} %res +} diff --git a/llvm/test/CodeGen/AArch64/arm64_32-null.ll b/llvm/test/CodeGen/AArch64/arm64_32-null.ll new file mode 100644 index 00000000000..9d62c56248b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64_32-null.ll @@ -0,0 +1,28 @@ +; RUN: llc -fast-isel=true -global-isel=false -O0 -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s +; RUN: llc -fast-isel=false -global-isel=false -O0 -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s + +define void @test_store(i8** %p) { +; CHECK-LABEL: test_store: +; CHECK: mov [[R1:w[0-9]+]], wzr +; CHECK: str [[R1]], [x0] + + store i8* null, i8** %p + ret void +} + +define void @test_phi(i8** %p) { +; CHECK-LABEL: test_phi: +; CHECK: mov [[R1:x[0-9]+]], xzr +; CHECK: str [[R1]], [sp] +; CHECK: b [[BB:LBB[0-9_]+]] +; CHECK: [[BB]]: +; CHECK: ldr x0, [sp] +; CHECK: str w0, [x{{.*}}] + +bb0: + br label %bb1 +bb1: + %tmp0 = phi i8* [ null, %bb0 ] + store i8* %tmp0, i8** %p + ret void +} diff --git a/llvm/test/CodeGen/AArch64/arm64_32-pointer-extend.ll b/llvm/test/CodeGen/AArch64/arm64_32-pointer-extend.ll new file mode 100644 index 00000000000..74b88305b57 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64_32-pointer-extend.ll @@ -0,0 +1,49 @@ +; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -o - | FileCheck %s + +define void @pass_pointer(i64 %in) { +; CHECK-LABEL: pass_pointer: +; CHECK: and x0, x0, #0xffffffff +; CHECK: bl _take_pointer + + %in32 = trunc i64 %in to i32 + %ptr = inttoptr i32 %in32 to i8* + call i64 @take_pointer(i8* %ptr) + ret void +} + +define i64 @take_pointer(i8* %ptr) nounwind { +; CHECK-LABEL: take_pointer: +; CHECK-NEXT: %bb.0 +; CHECK-NEXT: ret + + %val = ptrtoint i8* %ptr to i32 + %res = zext i32 %val to i64 + ret i64 %res +} + +define i32 @callee_ptr_stack_slot([8 x i64], i8*, i32 %val) { +; CHECK-LABEL: callee_ptr_stack_slot: +; CHECK: ldr w0, [sp, #4] + + ret i32 %val +} + +define void @caller_ptr_stack_slot(i8* %ptr) { +; CHECK-LABEL: caller_ptr_stack_slot: +; CHECK-DAG: mov [[VAL:w[0-9]]], #42 +; CHECK: stp w0, [[VAL]], [sp] + + call i32 @callee_ptr_stack_slot([8 x i64] undef, i8* %ptr, i32 42) + ret void +} + +define i8* @return_ptr(i64 %in, i64 %r) { +; CHECK-LABEL: return_ptr: +; CHECK: sdiv [[VAL64:x[0-9]+]], x0, x1 +; CHECK: and x0, [[VAL64]], #0xffffffff + + %sum = sdiv i64 %in, %r + %sum32 = trunc i64 %sum to i32 + %res = inttoptr i32 %sum32 to i8* + ret i8* %res +} diff --git a/llvm/test/CodeGen/AArch64/arm64_32-stack-pointers.ll b/llvm/test/CodeGen/AArch64/arm64_32-stack-pointers.ll new file mode 100644 index 00000000000..a233e3416c1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64_32-stack-pointers.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=arm64_32-apple-ios9.0 -o - %s | FileCheck %s + +declare void @callee([8 x i64], i8*, i8*) + +; Make sure we don't accidentally store X0 or XZR, which might well +; clobber other arguments or data. +define void @test_stack_ptr_32bits(i8* %in) { +; CHECK-LABEL: test_stack_ptr_32bits: +; CHECK-DAG: stp wzr, w0, [sp] + + call void @callee([8 x i64] undef, i8* null, i8* %in) + ret void +} diff --git a/llvm/test/CodeGen/AArch64/arm64_32-tls.ll b/llvm/test/CodeGen/AArch64/arm64_32-tls.ll new file mode 100644 index 00000000000..fada715304c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64_32-tls.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s + +define i32 @test_thread_local() { +; CHECK-LABEL: test_thread_local: +; CHECK: adrp x[[TMP:[0-9]+]], _var@TLVPPAGE +; CHECK: ldr w0, [x[[TMP]], _var@TLVPPAGEOFF] +; CHECK: ldr w[[DEST:[0-9]+]], [x0] +; CHECK: blr x[[DEST]] + + %val = load i32, i32* @var + ret i32 %val +} + +@var = thread_local global i32 zeroinitializer + +; CHECK: .tbss _var$tlv$init, 4, 2 + +; CHECK-LABEL: __DATA,__thread_vars +; CHECK: _var: +; CHECK: .long __tlv_bootstrap +; CHECK: .long 0 +; CHECK: .long _var$tlv$init diff --git a/llvm/test/CodeGen/AArch64/arm64_32-va.ll b/llvm/test/CodeGen/AArch64/arm64_32-va.ll new file mode 100644 index 00000000000..94ff4716139 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64_32-va.ll @@ -0,0 +1,56 @@ +; RUN: llc -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s + +define void @test_va_copy(i8* %dst, i8* %src) { +; CHECK-LABEL: test_va_copy: +; CHECK: ldr [[PTR:w[0-9]+]], [x1] +; CHECK: str [[PTR]], [x0] + + call void @llvm.va_copy(i8* %dst, i8* %src) + ret void +} + +define void @test_va_start(i32, ...) { +; CHECK-LABEL: test_va_start +; CHECK: add x[[LIST:[0-9]+]], sp, #16 +; CHECK: str w[[LIST]], + %slot = alloca i8*, align 4 + %list = bitcast i8** %slot to i8* + call void @llvm.va_start(i8* %list) + ret void +} + +define void @test_va_start_odd([8 x i64], i32, ...) { +; CHECK-LABEL: test_va_start_odd: +; CHECK: add x[[LIST:[0-9]+]], sp, #20 +; CHECK: str w[[LIST]], + %slot = alloca i8*, align 4 + %list = bitcast i8** %slot to i8* + call void @llvm.va_start(i8* %list) + ret void +} + +define i8* @test_va_arg(i8** %list) { +; CHECK-LABEL: test_va_arg: +; CHECK: ldr w[[LOC:[0-9]+]], [x0] +; CHECK: add [[NEXTLOC:w[0-9]+]], w[[LOC]], #4 +; CHECK: str [[NEXTLOC]], [x0] +; CHECK: ldr w0, [x[[LOC]]] + %res = va_arg i8** %list, i8* + ret i8* %res +} + +define i8* @really_test_va_arg(i8** %list, i1 %tst) { +; CHECK-LABEL: really_test_va_arg: +; CHECK: ldr w[[LOC:[0-9]+]], [x0] +; CHECK: add [[NEXTLOC:w[0-9]+]], w[[LOC]], #4 +; CHECK: str [[NEXTLOC]], [x0] +; CHECK: ldr w[[VAARG:[0-9]+]], [x[[LOC]]] +; CHECK: csel x0, x[[VAARG]], xzr + %tmp = va_arg i8** %list, i8* + %res = select i1 %tst, i8* %tmp, i8* null + ret i8* %res +} + +declare void @llvm.va_start(i8*) + +declare void @llvm.va_copy(i8*, i8*) diff --git a/llvm/test/CodeGen/AArch64/arm64_32.ll b/llvm/test/CodeGen/AArch64/arm64_32.ll new file mode 100644 index 00000000000..5fd619409a1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64_32.ll @@ -0,0 +1,715 @@ +; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -filetype=obj -o - -disable-post-ra -frame-pointer=all | \ +; RUN: llvm-objdump -private-headers - | \ +; RUN: FileCheck %s --check-prefix=CHECK-MACHO +; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -o - -aarch64-enable-atomic-cfg-tidy=0 -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-OPT +; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -o - -fast-isel -aarch64-enable-atomic-cfg-tidy=0 -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FAST + +; CHECK-MACHO: Mach header +; CHECK-MACHO: MH_MAGIC ARM64_32 V8 + +@var64 = global i64 zeroinitializer, align 8 +@var32 = global i32 zeroinitializer, align 4 + +@var_got = external global i8 + +define i32* @test_global_addr() { +; CHECK-LABEL: test_global_addr: +; CHECK: adrp [[PAGE:x[0-9]+]], _var32@PAGE +; CHECK: add x0, [[PAGE]], _var32@PAGEOFF + ret i32* @var32 +} + +; ADRP is necessarily 64-bit. The important point to check is that, however that +; gets truncated to 32-bits, it's free. No need to zero out higher bits of that +; register. +define i64 @test_global_addr_extension() { +; CHECK-LABEL: test_global_addr_extension: +; CHECK: adrp [[PAGE:x[0-9]+]], _var32@PAGE +; CHECK: add x0, [[PAGE]], _var32@PAGEOFF +; CHECK-NOT: and +; CHECK: ret + + ret i64 ptrtoint(i32* @var32 to i64) +} + +define i32 @test_global_value() { +; CHECK-LABEL: test_global_value: +; CHECK: adrp x[[PAGE:[0-9]+]], _var32@PAGE +; CHECK: ldr w0, [x[[PAGE]], _var32@PAGEOFF] + %val = load i32, i32* @var32, align 4 + ret i32 %val +} + +; Because the addition may wrap, it is not safe to use "ldr w0, [xN, #32]" here. +define i32 @test_unsafe_indexed_add() { +; CHECK-LABEL: test_unsafe_indexed_add: +; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF +; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32 +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = ptrtoint i32* @var32 to i32 + %addr_plus_32 = add i32 %addr_int, 32 + %addr = inttoptr i32 %addr_plus_32 to i32* + %val = load i32, i32* %addr, align 4 + ret i32 %val +} + +; Since we've promised there is no unsigned overflow, @var32 must be at least +; 32-bytes below 2^32, and we can use the load this time. +define i32 @test_safe_indexed_add() { +; CHECK-LABEL: test_safe_indexed_add: +; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF +; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32 +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = ptrtoint i32* @var32 to i64 + %addr_plus_32 = add nuw i64 %addr_int, 32 + %addr = inttoptr i64 %addr_plus_32 to i32* + %val = load i32, i32* %addr, align 4 + ret i32 %val +} + +define i32 @test_safe_indexed_or(i32 %in) { +; CHECK-LABEL: test_safe_indexed_or: +; CHECK: and [[TMP:w[0-9]+]], {{w[0-9]+}}, #0xfffffff0 +; CHECK: orr w[[ADDR:[0-9]+]], [[TMP]], #0x4 +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = and i32 %in, -16 + %addr_plus_4 = or i32 %addr_int, 4 + %addr = inttoptr i32 %addr_plus_4 to i32* + %val = load i32, i32* %addr, align 4 + ret i32 %val +} + + +; Promising nsw is not sufficient because the addressing mode basically +; calculates "zext(base) + zext(offset)" and nsw only guarantees +; "sext(base) + sext(offset) == base + offset". +define i32 @test_unsafe_nsw_indexed_add() { +; CHECK-LABEL: test_unsafe_nsw_indexed_add: +; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF +; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32 +; CHECK-NOT: ubfx +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = ptrtoint i32* @var32 to i32 + %addr_plus_32 = add nsw i32 %addr_int, 32 + %addr = inttoptr i32 %addr_plus_32 to i32* + %val = load i32, i32* %addr, align 4 + ret i32 %val +} + +; Because the addition may wrap, it is not safe to use "ldr w0, [xN, #32]" here. +define i32 @test_unsafe_unscaled_add() { +; CHECK-LABEL: test_unsafe_unscaled_add: +; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF +; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3 +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = ptrtoint i32* @var32 to i32 + %addr_plus_3 = add i32 %addr_int, 3 + %addr = inttoptr i32 %addr_plus_3 to i32* + %val = load i32, i32* %addr, align 1 + ret i32 %val +} + +; Since we've promised there is no unsigned overflow, @var32 must be at least +; 32-bytes below 2^32, and we can use the load this time. +define i32 @test_safe_unscaled_add() { +; CHECK-LABEL: test_safe_unscaled_add: +; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF +; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3 +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = ptrtoint i32* @var32 to i32 + %addr_plus_3 = add nuw i32 %addr_int, 3 + %addr = inttoptr i32 %addr_plus_3 to i32* + %val = load i32, i32* %addr, align 1 + ret i32 %val +} + +; Promising nsw is not sufficient because the addressing mode basically +; calculates "zext(base) + zext(offset)" and nsw only guarantees +; "sext(base) + sext(offset) == base + offset". +define i32 @test_unsafe_nsw_unscaled_add() { +; CHECK-LABEL: test_unsafe_nsw_unscaled_add: +; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF +; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3 +; CHECK-NOT: ubfx +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = ptrtoint i32* @var32 to i32 + %addr_plus_3 = add nsw i32 %addr_int, 3 + %addr = inttoptr i32 %addr_plus_3 to i32* + %val = load i32, i32* %addr, align 1 + ret i32 %val +} + +; Because the addition may wrap, it is not safe to use "ldur w0, [xN, #-3]" +; here. +define i32 @test_unsafe_negative_unscaled_add() { +; CHECK-LABEL: test_unsafe_negative_unscaled_add: +; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF +; CHECK: sub w[[ADDR:[0-9]+]], w[[VAR32]], #3 +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = ptrtoint i32* @var32 to i32 + %addr_minus_3 = add i32 %addr_int, -3 + %addr = inttoptr i32 %addr_minus_3 to i32* + %val = load i32, i32* %addr, align 1 + ret i32 %val +} + +define i8* @test_got_addr() { +; CHECK-LABEL: test_got_addr: +; CHECK: adrp x[[PAGE:[0-9]+]], _var_got@GOTPAGE +; CHECK: ldr w0, [x[[PAGE]], _var_got@GOTPAGEOFF] + ret i8* @var_got +} + +define float @test_va_arg_f32(i8** %list) { +; CHECK-LABEL: test_va_arg_f32: + +; CHECK: ldr w[[START:[0-9]+]], [x0] +; CHECK: add [[AFTER:w[0-9]+]], w[[START]], #8 +; CHECK: str [[AFTER]], [x0] + + ; Floating point arguments get promoted to double as per C99. +; CHECK: ldr [[DBL:d[0-9]+]], [x[[START]]] +; CHECK: fcvt s0, [[DBL]] + %res = va_arg i8** %list, float + ret float %res +} + +; Interesting point is that the slot is 4 bytes. +define i8 @test_va_arg_i8(i8** %list) { +; CHECK-LABEL: test_va_arg_i8: + +; CHECK: ldr w[[START:[0-9]+]], [x0] +; CHECK: add [[AFTER:w[0-9]+]], w[[START]], #4 +; CHECK: str [[AFTER]], [x0] + + ; i8 gets promoted to int (again, as per C99). +; CHECK: ldr w0, [x[[START]]] + + %res = va_arg i8** %list, i8 + ret i8 %res +} + +; Interesting point is that the slot needs aligning (again, min size is 4 +; bytes). +define i64 @test_va_arg_i64(i64** %list) { +; CHECK-LABEL: test_va_arg_i64: + + ; Update the list for the next user (minimum slot size is 4, but the actual + ; argument is 8 which had better be reflected!) +; CHECK: ldr w[[UNALIGNED_START:[0-9]+]], [x0] +; CHECK: add [[ALIGN_TMP:x[0-9]+]], x[[UNALIGNED_START]], #7 +; CHECK: and x[[START:[0-9]+]], [[ALIGN_TMP]], #0x1fffffff8 +; CHECK: add w[[AFTER:[0-9]+]], w[[START]], #8 +; CHECK: str w[[AFTER]], [x0] + +; CHECK: ldr x0, [x[[START]]] + + %res = va_arg i64** %list, i64 + ret i64 %res +} + +declare void @bar(...) +define void @test_va_call(i8 %l, i8 %r, float %in, i8* %ptr) { +; CHECK-LABEL: test_va_call: +; CHECK: add [[SUM:w[0-9]+]], {{w[0-9]+}}, w1 + +; CHECK-DAG: str w2, [sp, #32] +; CHECK-DAG: str xzr, [sp, #24] +; CHECK-DAG: str s0, [sp, #16] +; CHECK-DAG: str xzr, [sp, #8] +; CHECK-DAG: str [[SUM]], [sp] + + ; Add them to ensure real promotion occurs. + %sum = add i8 %l, %r + call void(...) @bar(i8 %sum, i64 0, float %in, double 0.0, i8* %ptr) + ret void +} + +declare i8* @llvm.frameaddress(i32) + +define i8* @test_frameaddr() { +; CHECK-LABEL: test_frameaddr: +; CHECK: ldr {{w0|x0}}, [x29] + %val = call i8* @llvm.frameaddress(i32 1) + ret i8* %val +} + +declare i8* @llvm.returnaddress(i32) + +define i8* @test_toplevel_returnaddr() { +; CHECK-LABEL: test_toplevel_returnaddr: +; CHECK: mov x0, x30 + %val = call i8* @llvm.returnaddress(i32 0) + ret i8* %val +} + +define i8* @test_deep_returnaddr() { +; CHECK-LABEL: test_deep_returnaddr: +; CHECK: ldr x[[FRAME_REC:[0-9]+]], [x29] +; CHECK: ldr x0, [x[[FRAME_REC]], #8] + %val = call i8* @llvm.returnaddress(i32 1) + ret i8* %val +} + +define void @test_indirect_call(void()* %func) { +; CHECK-LABEL: test_indirect_call: +; CHECK: blr x0 + call void() %func() + ret void +} + +; Safe to use the unextended address here +define void @test_indirect_safe_call(i32* %weird_funcs) { +; CHECK-LABEL: test_indirect_safe_call: +; CHECK: add w[[ADDR32:[0-9]+]], w0, #4 +; CHECK-OPT-NOT: ubfx +; CHECK: blr x[[ADDR32]] + %addr = getelementptr i32, i32* %weird_funcs, i32 1 + %func = bitcast i32* %addr to void()* + call void() %func() + ret void +} + +declare void @simple() +define void @test_simple_tail_call() { +; CHECK-LABEL: test_simple_tail_call: +; CHECK: b _simple + tail call void @simple() + ret void +} + +define void @test_indirect_tail_call(void()* %func) { +; CHECK-LABEL: test_indirect_tail_call: +; CHECK: br x0 + tail call void() %func() + ret void +} + +; Safe to use the unextended address here +define void @test_indirect_safe_tail_call(i32* %weird_funcs) { +; CHECK-LABEL: test_indirect_safe_tail_call: +; CHECK: add w[[ADDR32:[0-9]+]], w0, #4 +; CHECK-OPT-NOT: ubfx +; CHECK-OPT: br x[[ADDR32]] + %addr = getelementptr i32, i32* %weird_funcs, i32 1 + %func = bitcast i32* %addr to void()* + tail call void() %func() + ret void +} + +; For the "armv7k" slice, Clang will be emitting some small structs as [N x +; i32]. For ABI compatibility with arm64_32 these need to be passed in *X* +; registers (e.g. [2 x i32] would be packed into a single register). + +define i32 @test_in_smallstruct_low([3 x i32] %in) { +; CHECK-LABEL: test_in_smallstruct_low: +; CHECK: mov x0, x1 + %val = extractvalue [3 x i32] %in, 2 + ret i32 %val +} + +define i32 @test_in_smallstruct_high([3 x i32] %in) { +; CHECK-LABEL: test_in_smallstruct_high: +; CHECK: lsr x0, x0, #32 + %val = extractvalue [3 x i32] %in, 1 + ret i32 %val +} + +; The 64-bit DarwinPCS ABI has the quirk that structs on the stack are always +; 64-bit aligned. This must not happen for arm64_32 since othwerwise va_arg will +; be incompatible with the armv7k ABI. +define i32 @test_in_smallstruct_stack([8 x i64], i32, [3 x i32] %in) { +; CHECK-LABEL: test_in_smallstruct_stack: +; CHECK: ldr w0, [sp, #4] + %val = extractvalue [3 x i32] %in, 0 + ret i32 %val +} + +define [2 x i32] @test_ret_smallstruct([3 x i32] %in) { +; CHECK-LABEL: test_ret_smallstruct: +; CHECK: mov x0, #1 +; CHECK: movk x0, #2, lsl #32 + + ret [2 x i32] [i32 1, i32 2] +} + +declare void @smallstruct_callee([4 x i32]) +define void @test_call_smallstruct() { +; CHECK-LABEL: test_call_smallstruct: +; CHECK: mov x0, #1 +; CHECK: movk x0, #2, lsl #32 +; CHECK: mov x1, #3 +; CHECK: movk x1, #4, lsl #32 +; CHECK: bl _smallstruct_callee + + call void @smallstruct_callee([4 x i32] [i32 1, i32 2, i32 3, i32 4]) + ret void +} + +declare void @smallstruct_callee_stack([8 x i64], i32, [2 x i32]) +define void @test_call_smallstruct_stack() { +; CHECK-LABEL: test_call_smallstruct_stack: +; CHECK: mov [[VAL:x[0-9]+]], #1 +; CHECK: movk [[VAL]], #2, lsl #32 +; CHECK: stur [[VAL]], [sp, #4] + + call void @smallstruct_callee_stack([8 x i64] undef, i32 undef, [2 x i32] [i32 1, i32 2]) + ret void +} + +declare [3 x i32] @returns_smallstruct() +define i32 @test_use_smallstruct_low() { +; CHECK-LABEL: test_use_smallstruct_low: +; CHECK: bl _returns_smallstruct +; CHECK: mov x0, x1 + + %struct = call [3 x i32] @returns_smallstruct() + %val = extractvalue [3 x i32] %struct, 2 + ret i32 %val +} + +define i32 @test_use_smallstruct_high() { +; CHECK-LABEL: test_use_smallstruct_high: +; CHECK: bl _returns_smallstruct +; CHECK: lsr x0, x0, #32 + + %struct = call [3 x i32] @returns_smallstruct() + %val = extractvalue [3 x i32] %struct, 1 + ret i32 %val +} + +; If a small struct can't be allocated to x0-x7, the remaining registers should +; be marked as unavailable and subsequent GPR arguments should also be on the +; stack. Obviously the struct itself should be passed entirely on the stack. +define i32 @test_smallstruct_padding([7 x i64], [4 x i32] %struct, i32 %in) { +; CHECK-LABEL: test_smallstruct_padding: +; CHECK-DAG: ldr [[IN:w[0-9]+]], [sp, #16] +; CHECK-DAG: ldr [[LHS:w[0-9]+]], [sp] +; CHECK: add w0, [[LHS]], [[IN]] + %lhs = extractvalue [4 x i32] %struct, 0 + %sum = add i32 %lhs, %in + ret i32 %sum +} + +declare void @take_small_smallstruct(i64, [1 x i32]) +define void @test_small_smallstruct() { +; CHECK-LABEL: test_small_smallstruct: +; CHECK-DAG: mov w0, #1 +; CHECK-DAG: mov w1, #2 +; CHECK: bl _take_small_smallstruct + call void @take_small_smallstruct(i64 1, [1 x i32] [i32 2]) + ret void +} + +define void @test_bare_frameaddr(i8** %addr) { +; CHECK-LABEL: test_bare_frameaddr: +; CHECK: add x[[LOCAL:[0-9]+]], sp, #{{[0-9]+}} +; CHECK: str w[[LOCAL]], + + %ptr = alloca i8 + store i8* %ptr, i8** %addr, align 4 + ret void +} + +define void @test_sret_use([8 x i64]* sret %out) { +; CHECK-LABEL: test_sret_use: +; CHECK: str xzr, [x8] + %addr = getelementptr [8 x i64], [8 x i64]* %out, i32 0, i32 0 + store i64 0, i64* %addr + ret void +} + +define i64 @test_sret_call() { +; CHECK-LABEL: test_sret_call: +; CHECK: mov x8, sp +; CHECK: bl _test_sret_use + %arr = alloca [8 x i64] + call void @test_sret_use([8 x i64]* sret %arr) + + %addr = getelementptr [8 x i64], [8 x i64]* %arr, i32 0, i32 0 + %val = load i64, i64* %addr + ret i64 %val +} + +define double @test_constpool() { +; CHECK-LABEL: test_constpool: +; CHECK: adrp x[[PAGE:[0-9]+]], [[POOL:lCPI[0-9]+_[0-9]+]]@PAGE +; CHECK: ldr d0, [x[[PAGE]], [[POOL]]@PAGEOFF] + ret double 1.0e-6 +} + +define i8* @test_blockaddress() { +; CHECK-LABEL: test_blockaddress: +; CHECK: [[BLOCK:Ltmp[0-9]+]]: +; CHECK: adrp [[PAGE:x[0-9]+]], [[BLOCK]]@PAGE +; CHECK: add x0, [[PAGE]], [[BLOCK]]@PAGEOFF + br label %dest +dest: + ret i8* blockaddress(@test_blockaddress, %dest) +} + +define i8* @test_indirectbr(i8* %dest) { +; CHECK-LABEL: test_indirectbr: +; CHECK: br x0 + indirectbr i8* %dest, [label %true, label %false] + +true: + ret i8* blockaddress(@test_indirectbr, %true) +false: + ret i8* blockaddress(@test_indirectbr, %false) +} + +; ISelDAGToDAG tries to fold an offset FI load (in this case var+4) into the +; actual load instruction. This needs to be done slightly carefully since we +; claim the FI in the process -- it doesn't need extending. +define float @test_frameindex_offset_load() { +; CHECK-LABEL: test_frameindex_offset_load: +; CHECK: ldr s0, [sp, #4] + %arr = alloca float, i32 4, align 8 + %addr = getelementptr inbounds float, float* %arr, i32 1 + + %val = load float, float* %addr, align 4 + ret float %val +} + +define void @test_unaligned_frameindex_offset_store() { +; CHECK-LABEL: test_unaligned_frameindex_offset_store: +; CHECK: mov x[[TMP:[0-9]+]], sp +; CHECK: orr w[[ADDR:[0-9]+]], w[[TMP]], #0x2 +; CHECK: mov [[VAL:w[0-9]+]], #42 +; CHECK: str [[VAL]], [x[[ADDR]]] + %arr = alloca [4 x i32] + + %addr.int = ptrtoint [4 x i32]* %arr to i32 + %addr.nextint = add nuw i32 %addr.int, 2 + %addr.next = inttoptr i32 %addr.nextint to i32* + store i32 42, i32* %addr.next + ret void +} + + +define {i64, i64*} @test_pre_idx(i64* %addr) { +; CHECK-LABEL: test_pre_idx: + +; CHECK: add w[[ADDR:[0-9]+]], w0, #8 +; CHECK: ldr x0, [x[[ADDR]]] + %addr.int = ptrtoint i64* %addr to i32 + %addr.next.int = add nuw i32 %addr.int, 8 + %addr.next = inttoptr i32 %addr.next.int to i64* + %val = load i64, i64* %addr.next + + %tmp = insertvalue {i64, i64*} undef, i64 %val, 0 + %res = insertvalue {i64, i64*} %tmp, i64* %addr.next, 1 + + ret {i64, i64*} %res +} + +; Forming a post-indexed load is invalid here since the GEP needs to work when +; %addr wraps round to 0. +define {i64, i64*} @test_invalid_pre_idx(i64* %addr) { +; CHECK-LABEL: test_invalid_pre_idx: +; CHECK: add w1, w0, #8 +; CHECK: ldr x0, [x1] + %addr.next = getelementptr i64, i64* %addr, i32 1 + %val = load i64, i64* %addr.next + + %tmp = insertvalue {i64, i64*} undef, i64 %val, 0 + %res = insertvalue {i64, i64*} %tmp, i64* %addr.next, 1 + + ret {i64, i64*} %res +} + +declare void @callee([8 x i32]*) +define void @test_stack_guard() ssp { +; CHECK-LABEL: test_stack_guard: +; CHECK: adrp x[[GUARD_GOTPAGE:[0-9]+]], ___stack_chk_guard@GOTPAGE +; CHECK: ldr w[[GUARD_ADDR:[0-9]+]], [x[[GUARD_GOTPAGE]], ___stack_chk_guard@GOTPAGEOFF] +; CHECK: ldr [[GUARD_VAL:w[0-9]+]], [x[[GUARD_ADDR]]] +; CHECK: stur [[GUARD_VAL]], [x29, #[[GUARD_OFFSET:-[0-9]+]]] + +; CHECK: add x0, sp, #{{[0-9]+}} +; CHECK: bl _callee + +; CHECK-OPT: adrp x[[GUARD_GOTPAGE:[0-9]+]], ___stack_chk_guard@GOTPAGE +; CHECK-OPT: ldr w[[GUARD_ADDR:[0-9]+]], [x[[GUARD_GOTPAGE]], ___stack_chk_guard@GOTPAGEOFF] +; CHECK-OPT: ldr [[GUARD_VAL:w[0-9]+]], [x[[GUARD_ADDR]]] +; CHECK-OPT: ldur [[NEW_VAL:w[0-9]+]], [x29, #[[GUARD_OFFSET]]] +; CHECK-OPT: cmp [[GUARD_VAL]], [[NEW_VAL]] +; CHECK-OPT: b.ne [[FAIL:LBB[0-9]+_[0-9]+]] + +; CHECK-OPT: [[FAIL]]: +; CHECK-OPT-NEXT: bl ___stack_chk_fail + %arr = alloca [8 x i32] + call void @callee([8 x i32]* %arr) + ret void +} + +declare i32 @__gxx_personality_v0(...) +declare void @eat_landingpad_args(i32, i8*, i32) +@_ZTI8Whatever = external global i8 +define void @test_landingpad_marshalling() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: test_landingpad_marshalling: +; CHECK-OPT: mov x2, x1 +; CHECK-OPT: mov x1, x0 +; CHECK: bl _eat_landingpad_args + invoke void @callee([8 x i32]* undef) to label %done unwind label %lpad + +lpad: ; preds = %entry + %exc = landingpad { i8*, i32 } + catch i8* @_ZTI8Whatever + %pointer = extractvalue { i8*, i32 } %exc, 0 + %selector = extractvalue { i8*, i32 } %exc, 1 + call void @eat_landingpad_args(i32 undef, i8* %pointer, i32 %selector) + ret void + +done: + ret void +} + +define void @test_dynamic_stackalloc() { +; CHECK-LABEL: test_dynamic_stackalloc: +; CHECK: sub [[REG:x[0-9]+]], sp, #32 +; CHECK: mov sp, [[REG]] +; CHECK-OPT-NOT: ubfx +; CHECK: bl _callee + br label %next + +next: + %val = alloca [8 x i32] + call void @callee([8 x i32]* %val) + ret void +} + +define void @test_asm_memory(i32* %base.addr) { +; CHECK-LABEL: test_asm_memory: +; CHECK: add w[[ADDR:[0-9]+]], w0, #4 +; CHECK: str wzr, [x[[ADDR]] + %addr = getelementptr i32, i32* %base.addr, i32 1 + call void asm sideeffect "str wzr, $0", "*m"(i32* %addr) + ret void +} + +define void @test_unsafe_asm_memory(i64 %val) { +; CHECK-LABEL: test_unsafe_asm_memory: +; CHECK: and x[[ADDR:[0-9]+]], x0, #0xffffffff +; CHECK: str wzr, [x[[ADDR]]] + %addr_int = trunc i64 %val to i32 + %addr = inttoptr i32 %addr_int to i32* + call void asm sideeffect "str wzr, $0", "*m"(i32* %addr) + ret void +} + +define [9 x i8*] @test_demoted_return(i8* %in) { +; CHECK-LABEL: test_demoted_return: +; CHECK: str w0, [x8, #32] + %res = insertvalue [9 x i8*] undef, i8* %in, 8 + ret [9 x i8*] %res +} + +define i8* @test_inttoptr(i64 %in) { +; CHECK-LABEL: test_inttoptr: +; CHECK: and x0, x0, #0xffffffff + %res = inttoptr i64 %in to i8* + ret i8* %res +} + +declare i32 @llvm.get.dynamic.area.offset.i32() +define i32 @test_dynamic_area() { +; CHECK-LABEL: test_dynamic_area: +; CHECK: mov w0, wzr + %res = call i32 @llvm.get.dynamic.area.offset.i32() + ret i32 %res +} + +define void @test_pointer_vec_store(<2 x i8*>* %addr) { +; CHECK-LABEL: test_pointer_vec_store: +; CHECK: str xzr, [x0] +; CHECK-NOT: str +; CHECK-NOT: stp + + store <2 x i8*> zeroinitializer, <2 x i8*>* %addr, align 16 + ret void +} + +define <2 x i8*> @test_pointer_vec_load(<2 x i8*>* %addr) { +; CHECK-LABEL: test_pointer_vec_load: +; CHECK: ldr d[[TMP:[0-9]+]], [x0] +; CHECK: ushll.2d v0, v[[TMP]], #0 + %val = load <2 x i8*>, <2 x i8*>* %addr, align 16 + ret <2 x i8*> %val +} + +define void @test_inline_asm_mem_pointer(i32* %in) { +; CHECK-LABEL: test_inline_asm_mem_pointer: +; CHECK: str w0, + tail call void asm sideeffect "ldr x0, $0", "rm"(i32* %in) + ret void +} + + +define void @test_struct_hi(i32 %hi) nounwind { +; CHECK-LABEL: test_struct_hi: +; CHECK: mov w[[IN:[0-9]+]], w0 +; CHECK: bl _get_int +; CHECK-NEXT: bfi x0, x[[IN]], #32, #32 +; CHECK-NEXT: bl _take_pair + %val.64 = call i64 @get_int() + %val.32 = trunc i64 %val.64 to i32 + + %pair.0 = insertvalue [2 x i32] undef, i32 %val.32, 0 + %pair.1 = insertvalue [2 x i32] %pair.0, i32 %hi, 1 + call void @take_pair([2 x i32] %pair.1) + + ret void +} +declare void @take_pair([2 x i32]) +declare i64 @get_int() + +define i1 @test_icmp_ptr(i8* %in) { +; CHECK-LABEL: test_icmp_ptr +; CHECK: ubfx x0, x0, #31, #1 + %res = icmp slt i8* %in, null + ret i1 %res +} + +define void @test_multiple_icmp_ptr(i8* %l, i8* %r) { +; CHECK-LABEL: test_multiple_icmp_ptr: +; CHECK: tbnz w0, #31, [[FALSEBB:LBB[0-9]+_[0-9]+]] +; CHECK: tbnz w1, #31, [[FALSEBB]] + %tst1 = icmp sgt i8* %l, inttoptr (i32 -1 to i8*) + %tst2 = icmp sgt i8* %r, inttoptr (i32 -1 to i8*) + %tst = and i1 %tst1, %tst2 + br i1 %tst, label %true, label %false + +true: + call void(...) @bar() + ret void + +false: + ret void +} + +define { [18 x i8] }* @test_gep_nonpow2({ [18 x i8] }* %a0, i32 %a1) { +; CHECK-LABEL: test_gep_nonpow2: +; CHECK: mov w[[SIZE:[0-9]+]], #18 +; CHECK-NEXT: smaddl x0, w1, w[[SIZE]], x0 +; CHECK-NEXT: ret + %tmp0 = getelementptr inbounds { [18 x i8] }, { [18 x i8] }* %a0, i32 %a1 + ret { [18 x i8] }* %tmp0 +} + +define void @test_bzero(i64 %in) { +; CHECK-LABEL: test_bzero: +; CHECK-DAG: lsr x1, x0, #32 +; CHECK-DAG: and x0, x0, #0xffffffff +; CHECK: bl _bzero + + %ptr.i32 = trunc i64 %in to i32 + %size.64 = lshr i64 %in, 32 + %size = trunc i64 %size.64 to i32 + %ptr = inttoptr i32 %ptr.i32 to i8* + tail call void @llvm.memset.p0i8.i32(i8* align 4 %ptr, i8 0, i32 %size, i1 false) + ret void +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1) diff --git a/llvm/test/CodeGen/AArch64/fastcc-reserved.ll b/llvm/test/CodeGen/AArch64/fastcc-reserved.ll index b5e03f08280..a463e622179 100644 --- a/llvm/test/CodeGen/AArch64/fastcc-reserved.ll +++ b/llvm/test/CodeGen/AArch64/fastcc-reserved.ll @@ -4,7 +4,7 @@ ; call-frame is not reserved (hence disable-fp-elim), but where ; callee-pop can occur (hence tailcallopt). -declare fastcc void @will_pop([8 x i32], i32 %val) +declare fastcc void @will_pop([8 x i64], i32 %val) define fastcc void @foo(i32 %in) { ; CHECK-LABEL: foo: @@ -18,7 +18,7 @@ define fastcc void @foo(i32 %in) { ; Reserve space for call-frame: ; CHECK: str w{{[0-9]+}}, [sp, #-16]! - call fastcc void @will_pop([8 x i32] undef, i32 42) + call fastcc void @will_pop([8 x i64] undef, i32 42) ; CHECK: bl will_pop ; Since @will_pop is fastcc with tailcallopt, it will put the stack @@ -31,7 +31,7 @@ define fastcc void @foo(i32 %in) { ret void } -declare void @wont_pop([8 x i32], i32 %val) +declare void @wont_pop([8 x i64], i32 %val) define void @foo1(i32 %in) { ; CHECK-LABEL: foo1: @@ -44,7 +44,7 @@ define void @foo1(i32 %in) { ; Reserve space for call-frame ; CHECK: str w{{[0-9]+}}, [sp, #-16]! - call void @wont_pop([8 x i32] undef, i32 42) + call void @wont_pop([8 x i64] undef, i32 42) ; CHECK: bl wont_pop ; This time we *do* need to unreserve the call-frame diff --git a/llvm/test/CodeGen/AArch64/fastcc.ll b/llvm/test/CodeGen/AArch64/fastcc.ll index d4e116134cd..fbdbf60ac8f 100644 --- a/llvm/test/CodeGen/AArch64/fastcc.ll +++ b/llvm/test/CodeGen/AArch64/fastcc.ll @@ -18,7 +18,7 @@ define fastcc void @func_stack0() { ; CHECK-TAIL: str w{{[0-9]+}}, [sp] - call fastcc void @func_stack8([8 x i32] undef, i32 42) + call fastcc void @func_stack8([8 x i64] undef, i32 42) ; CHECK: bl func_stack8 ; CHECK-NOT: sub sp, sp, ; CHECK-NOT: [sp, #{{[-0-9]+}}]! @@ -28,7 +28,7 @@ define fastcc void @func_stack0() { ; CHECK-TAIL: stp xzr, xzr, [sp, #-16]! - call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) + call fastcc void @func_stack32([8 x i64] undef, i128 0, i128 9) ; CHECK: bl func_stack32 ; CHECK-NOT: sub sp, sp, @@ -56,7 +56,7 @@ define fastcc void @func_stack0() { ; CHECK-TAIL-NEXT: ret } -define fastcc void @func_stack8([8 x i32], i32 %stacked) { +define fastcc void @func_stack8([8 x i64], i32 %stacked) { ; CHECK-LABEL: func_stack8: ; CHECK: sub sp, sp, #48 ; CHECK: stp x29, x30, [sp, #32] @@ -71,7 +71,7 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) { ; CHECK-TAIL: str w{{[0-9]+}}, [sp] - call fastcc void @func_stack8([8 x i32] undef, i32 42) + call fastcc void @func_stack8([8 x i64] undef, i32 42) ; CHECK: bl func_stack8 ; CHECK-NOT: sub sp, sp, ; CHECK-NOT: [sp, #{{[-0-9]+}}]! @@ -82,7 +82,7 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) { ; CHECK-TAIL: stp xzr, xzr, [sp, #-16]! - call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) + call fastcc void @func_stack32([8 x i64] undef, i128 0, i128 9) ; CHECK: bl func_stack32 ; CHECK-NOT: sub sp, sp, @@ -109,7 +109,7 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) { ; CHECK-TAIL-NEXT: ret } -define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) { +define fastcc void @func_stack32([8 x i64], i128 %stacked0, i128 %stacked1) { ; CHECK-LABEL: func_stack32: ; CHECK: add x29, sp, #32 @@ -117,7 +117,7 @@ define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) { ; CHECK-TAIL: add x29, sp, #32 - call fastcc void @func_stack8([8 x i32] undef, i32 42) + call fastcc void @func_stack8([8 x i64] undef, i32 42) ; CHECK: bl func_stack8 ; CHECK-NOT: sub sp, sp, ; CHECK-NOT: [sp, #{{[-0-9]+}}]! @@ -127,7 +127,7 @@ define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) { ; CHECK-TAIL: stp xzr, xzr, [sp, #-16]! - call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) + call fastcc void @func_stack32([8 x i64] undef, i128 0, i128 9) ; CHECK: bl func_stack32 ; CHECK-NOT: sub sp, sp, @@ -155,7 +155,7 @@ define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) { } ; Check that arg stack pop is done after callee-save restore when no frame pointer is used. -define fastcc void @func_stack32_leaf([8 x i32], i128 %stacked0, i128 %stacked1) { +define fastcc void @func_stack32_leaf([8 x i64], i128 %stacked0, i128 %stacked1) { ; CHECK-LABEL: func_stack32_leaf: ; CHECK: str x20, [sp, #-16]! ; CHECK: nop @@ -186,7 +186,7 @@ define fastcc void @func_stack32_leaf([8 x i32], i128 %stacked0, i128 %stacked1) } ; Check that arg stack pop is done after callee-save restore when no frame pointer is used. -define fastcc void @func_stack32_leaf_local([8 x i32], i128 %stacked0, i128 %stacked1) { +define fastcc void @func_stack32_leaf_local([8 x i64], i128 %stacked0, i128 %stacked1) { ; CHECK-LABEL: func_stack32_leaf_local: ; CHECK: sub sp, sp, #32 ; CHECK-NEXT: str x20, [sp, #16] @@ -222,7 +222,7 @@ define fastcc void @func_stack32_leaf_local([8 x i32], i128 %stacked0, i128 %sta } ; Check that arg stack pop is done after callee-save restore when no frame pointer is used. -define fastcc void @func_stack32_leaf_local_nocs([8 x i32], i128 %stacked0, i128 %stacked1) { +define fastcc void @func_stack32_leaf_local_nocs([8 x i64], i128 %stacked0, i128 %stacked1) { ; CHECK-LABEL: func_stack32_leaf_local_nocs: ; CHECK: sub sp, sp, #16 ; CHECK: add sp, sp, #16 diff --git a/llvm/test/CodeGen/AArch64/jump-table-32.ll b/llvm/test/CodeGen/AArch64/jump-table-32.ll new file mode 100644 index 00000000000..339a44fc95a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/jump-table-32.ll @@ -0,0 +1,42 @@ +; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64_32-apple-ios7.0 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s + +define i32 @test_jumptable(i32 %in) { +; CHECK: test_jumptable + + switch i32 %in, label %def [ + i32 0, label %lbl1 + i32 1, label %lbl2 + i32 2, label %lbl3 + i32 4, label %lbl4 + ] +; CHECK: adrp [[JTPAGE:x[0-9]+]], LJTI0_0@PAGE +; CHECK: mov w[[INDEX:[0-9]+]], w0 +; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], LJTI0_0@PAGEOFF +; CHECK: adr [[BASE_BLOCK:x[0-9]+]], LBB0_2 +; CHECK: ldrb w[[OFFSET:[0-9]+]], [x[[JT]], x[[INDEX]]] +; CHECK: add [[DEST:x[0-9]+]], [[BASE_BLOCK]], x[[OFFSET]], lsl #2 +; CHECK: br [[DEST]] + +def: + ret i32 0 + +lbl1: + ret i32 1 + +lbl2: + ret i32 2 + +lbl3: + ret i32 4 + +lbl4: + ret i32 8 + +} + +; CHECK: LJTI0_0: +; CHECK-NEXT: .byte +; CHECK-NEXT: .byte +; CHECK-NEXT: .byte +; CHECK-NEXT: .byte +; CHECK-NEXT: .byte diff --git a/llvm/test/CodeGen/AArch64/sibling-call.ll b/llvm/test/CodeGen/AArch64/sibling-call.ll index be59f27fa85..a9e0225187e 100644 --- a/llvm/test/CodeGen/AArch64/sibling-call.ll +++ b/llvm/test/CodeGen/AArch64/sibling-call.ll @@ -1,8 +1,8 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-ldst-opt=0 | FileCheck %s declare void @callee_stack0() -declare void @callee_stack8([8 x i32], i64) -declare void @callee_stack16([8 x i32], i64, i64) +declare void @callee_stack8([8 x i64], i64) +declare void @callee_stack16([8 x i64], i64, i64) define void @caller_to0_from0() nounwind { ; CHECK-LABEL: caller_to0_from0: @@ -12,7 +12,7 @@ define void @caller_to0_from0() nounwind { ; CHECK-NEXT: b callee_stack0 } -define void @caller_to0_from8([8 x i32], i64) nounwind{ +define void @caller_to0_from8([8 x i64], i64) nounwind{ ; CHECK-LABEL: caller_to0_from8: ; CHECK-NEXT: // %bb. @@ -26,51 +26,51 @@ define void @caller_to8_from0() { ; Caller isn't going to clean up any extra stack we allocate, so it ; can't be a tail call. - tail call void @callee_stack8([8 x i32] undef, i64 42) + tail call void @callee_stack8([8 x i64] undef, i64 42) ret void ; CHECK: bl callee_stack8 } -define void @caller_to8_from8([8 x i32], i64 %a) { +define void @caller_to8_from8([8 x i64], i64 %a) { ; CHECK-LABEL: caller_to8_from8: ; CHECK-NOT: sub sp, sp, ; This should reuse our stack area for the 42 - tail call void @callee_stack8([8 x i32] undef, i64 42) + tail call void @callee_stack8([8 x i64] undef, i64 42) ret void ; CHECK: str {{x[0-9]+}}, [sp] ; CHECK-NEXT: b callee_stack8 } -define void @caller_to16_from8([8 x i32], i64 %a) { +define void @caller_to16_from8([8 x i64], i64 %a) { ; CHECK-LABEL: caller_to16_from8: ; Shouldn't be a tail call: we can't use SP+8 because our caller might ; have something there. This may sound obvious but implementation does ; some funky aligning. - tail call void @callee_stack16([8 x i32] undef, i64 undef, i64 undef) + tail call void @callee_stack16([8 x i64] undef, i64 undef, i64 undef) ; CHECK: bl callee_stack16 ret void } -define void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) { +define void @caller_to8_from24([8 x i64], i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: caller_to8_from24: ; CHECK-NOT: sub sp, sp ; Reuse our area, putting "42" at incoming sp - tail call void @callee_stack8([8 x i32] undef, i64 42) + tail call void @callee_stack8([8 x i64] undef, i64 42) ret void ; CHECK: str {{x[0-9]+}}, [sp] ; CHECK-NEXT: b callee_stack8 } -define void @caller_to16_from16([8 x i32], i64 %a, i64 %b) { +define void @caller_to16_from16([8 x i64], i64 %a, i64 %b) { ; CHECK-LABEL: caller_to16_from16: ; CHECK-NOT: sub sp, sp, ; Here we want to make sure that both loads happen before the stores: ; otherwise either %a or %b will be wrongly clobbered. - tail call void @callee_stack16([8 x i32] undef, i64 %b, i64 %a) + tail call void @callee_stack16([8 x i64] undef, i64 %b, i64 %a) ret void ; CHECK: ldr [[VAL0:x[0-9]+]], diff --git a/llvm/test/CodeGen/AArch64/swift-return.ll b/llvm/test/CodeGen/AArch64/swift-return.ll index 3d0bed4c934..2bf5e379b37 100644 --- a/llvm/test/CodeGen/AArch64/swift-return.ll +++ b/llvm/test/CodeGen/AArch64/swift-return.ll @@ -1,5 +1,7 @@ ; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s ; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-O0 +; RUN: llc -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck %s +; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-O0 ; CHECK-LABEL: test1 ; CHECK: bl _gen @@ -8,7 +10,7 @@ ; CHECK-O0-LABEL: test1 ; CHECK-O0: bl _gen ; CHECK-O0: sxth [[TMP:w.*]], w0 -; CHECK-O0: add w8, [[TMP]], w1, sxtb +; CHECK-O0: add {{w[0-9]+}}, [[TMP]], w1, sxtb define i16 @test1(i32) { entry: %call = call swiftcc { i16, i8 } @gen(i32 %0) diff --git a/llvm/test/CodeGen/AArch64/swiftcc.ll b/llvm/test/CodeGen/AArch64/swiftcc.ll index 43249542715..fb74fe4a6b1 100644 --- a/llvm/test/CodeGen/AArch64/swiftcc.ll +++ b/llvm/test/CodeGen/AArch64/swiftcc.ll @@ -1,5 +1,7 @@ ; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s ; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck %s +; RUN: llc -O0 -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck %s ; CHECK: t1 ; CHECK: fadd s0, s0, s1 diff --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll index 823599d5de6..93529d1ee86 100644 --- a/llvm/test/CodeGen/AArch64/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/swifterror.ll @@ -1,5 +1,7 @@ -; RUN: llc -fast-isel-sink-local-values -verify-machineinstrs -frame-pointer=all -enable-shrink-wrap=false < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-APPLE %s -; RUN: llc -fast-isel-sink-local-values -verify-machineinstrs -frame-pointer=all -O0 -fast-isel < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-O0 %s +; RUN: llc -fast-isel-sink-local-values -verify-machineinstrs -frame-pointer=all -enable-shrink-wrap=false < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-APPLE --check-prefix=CHECK-APPLE-AARCH64 %s +; RUN: llc -fast-isel-sink-local-values -verify-machineinstrs -frame-pointer=all -O0 -fast-isel < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-O0 --check-prefix=CHECK-O0-AARCH64 %s +; RUN: llc -fast-isel-sink-local-values -verify-machineinstrs -frame-pointer=all -enable-shrink-wrap=false < %s -mtriple=arm64_32-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-APPLE --check-prefix=CHECK-APPLE-ARM64_32 %s +; RUN: llc -fast-isel-sink-local-values -verify-machineinstrs -O0 -fast-isel < %s -mtriple=arm64_32-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-O0 --check-prefix=CHECK-O0-ARM64_32 %s declare i8* @malloc(i64) declare void @free(i8*) @@ -40,7 +42,8 @@ define float @caller(i8* %error_ref) { ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo ; CHECK-APPLE: mov x0, x21 -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE-AARCH64: cbnz x21 +; CHECK-APPLE-ARM64_32: cbnz w0 ; Access part of the error object and save it to error_ref ; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] @@ -50,7 +53,8 @@ define float @caller(i8* %error_ref) { ; CHECK-O0: mov x21 ; CHECK-O0: bl {{.*}}foo ; CHECK-O0: mov [[ID:x[0-9]+]], x21 -; CHECK-O0: cbnz x21 +; CHECK-O0-AARCH64: cbnz x21 +; CHECK-O0-ARM64_32: cmp x21, #0 entry: %error_ptr_ref = alloca swifterror %swift_error* store %swift_error* null, %swift_error** %error_ptr_ref @@ -76,7 +80,8 @@ define float @caller2(i8* %error_ref) { ; CHECK-APPLE: fmov [[CMP:s[0-9]+]], #1.0 ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE-AARCH64: cbnz x21 +; CHECK-APPLE-ARM64_32: cbnz w21 ; CHECK-APPLE: fcmp s0, [[CMP]] ; CHECK-APPLE: b.le ; Access part of the error object and save it to error_ref @@ -89,7 +94,8 @@ define float @caller2(i8* %error_ref) { ; CHECK-O0: mov x21 ; CHECK-O0: bl {{.*}}foo ; CHECK-O0: mov [[ID:x[0-9]+]], x21 -; CHECK-O0: cbnz x21 +; CHECK-O0-AARCH64: cbnz x21 +; CHECK-O0-ARM64_32: cmp x21, #0 entry: %error_ptr_ref = alloca swifterror %swift_error* br label %bb_loop @@ -171,29 +177,52 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float ; CHECK-APPLE: mov x21, x0 ; CHECK-APPLE: ret -; CHECK-O0-LABEL: foo_loop: +; CHECK-O0-AARCH64-LABEL: foo_loop: ; spill x21 -; CHECK-O0: str x21, [sp, [[SLOT:#[0-9]+]]] -; CHECK-O0: b [[BB1:[A-Za-z0-9_]*]] -; CHECK-O0: [[BB1]]: -; CHECK-O0: ldr x0, [sp, [[SLOT]]] -; CHECK-O0: str x0, [sp, [[SLOT2:#[0-9]+]]] -; CHECK-O0: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]] -; CHECK-O0: mov w{{.*}}, #16 -; CHECK-O0: malloc -; CHECK-O0: mov [[ID:x[0-9]+]], x0 -; CHECK-O0: strb w{{.*}}, [{{.*}}[[ID]], #8] +; CHECK-O0-AARCH64: str x21, [sp, [[SLOT:#[0-9]+]]] +; CHECK-O0-AARCH64: b [[BB1:[A-Za-z0-9_]*]] +; CHECK-O0-AARCH64: [[BB1]]: +; CHECK-O0-AARCH64: ldr x0, [sp, [[SLOT]]] +; CHECK-O0-AARCH64: str x0, [sp, [[SLOT2:#[0-9]+]]] +; CHECK-O0-AARCH64: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]] +; CHECK-O0-AARCH64: mov w{{.*}}, #16 +; CHECK-O0-AARCH64: malloc +; CHECK-O0-AARCH64: mov [[ID:x[0-9]+]], x0 +; CHECK-O0-AARCH64: strb w{{.*}}, [{{.*}}[[ID]], #8] ; spill x0 -; CHECK-O0: str x0, [sp, [[SLOT2]]] -; CHECK-O0:[[BB2]]: -; CHECK-O0: ldr x0, [sp, [[SLOT2]]] -; CHECK-O0: fcmp -; CHECK-O0: str x0, [sp] -; CHECK-O0: b.le [[BB1]] +; CHECK-O0-AARCH64: str x0, [sp, [[SLOT2]]] +; CHECK-O0-AARCH64:[[BB2]]: +; CHECK-O0-AARCH64: ldr x0, [sp, [[SLOT2]]] +; CHECK-O0-AARCH64: fcmp +; CHECK-O0-AARCH64: str x0, [sp] +; CHECK-O0-AARCH64: b.le [[BB1]] ; reload from stack -; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp] -; CHECK-O0: mov x21, [[ID3]] -; CHECK-O0: ret +; CHECK-O0-AARCH64: ldr [[ID3:x[0-9]+]], [sp] +; CHECK-O0-AARCH64: mov x21, [[ID3]] +; CHECK-O0-AARCH64: ret + +; CHECK-O0-ARM64_32-LABEL: foo_loop: +; spill x21 +; CHECK-O0-ARM64_32: str x21, [sp, [[SLOT:#[0-9]+]]] +; CHECK-O0-ARM64_32: b [[BB1:[A-Za-z0-9_]*]] +; CHECK-O0-ARM64_32: [[BB1]]: +; CHECK-O0-ARM64_32: ldr x0, [sp, [[SLOT]]] +; CHECK-O0-ARM64_32: str x0, [sp, [[SLOT2:#[0-9]+]]] +; CHECK-O0-ARM64_32: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]] +; CHECK-O0-ARM64_32: mov w{{.*}}, #16 +; CHECK-O0-ARM64_32: malloc +; CHECK-O0-ARM64_32: mov {{.*}}, x0 +; CHECK-O0-ARM64_32: strb w{{.*}}, +; CHECK-O0-ARM64_32:[[BB2]]: +; CHECK-O0-ARM64_32: ldr x0, [sp, [[SLOT2]]] +; CHECK-O0-ARM64_32: fcmp +; CHECK-O0-ARM64_32: str x0, [sp[[OFFSET:.*]]] +; CHECK-O0-ARM64_32: b.le [[BB1]] +; reload from stack +; CHECK-O0-ARM64_32: ldr [[ID3:x[0-9]+]], [sp[[OFFSET]]] +; CHECK-O0-ARM64_32: mov x21, [[ID3]] +; CHECK-O0-ARM64_32: ret + entry: br label %bb_loop @@ -261,7 +290,8 @@ define float @caller3(i8* %error_ref) { ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_sret ; CHECK-APPLE: mov x0, x21 -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE-AARCH64: cbnz x21 +; CHECK-APPLE-ARM64_32: cbnz w0 ; Access part of the error object and save it to error_ref ; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] @@ -273,7 +303,8 @@ define float @caller3(i8* %error_ref) { ; CHECK-O0: mov x21 ; CHECK-O0: bl {{.*}}foo_sret ; CHECK-O0: mov [[ID2:x[0-9]+]], x21 -; CHECK-O0: cbnz x21 +; CHECK-O0-AARCH64: cbnz x21 +; CHECK-O0-ARM64_32: cmp x21, #0 ; Access part of the error object and save it to error_ref ; reload from stack ; CHECK-O0: ldrb [[CODE:w[0-9]+]] @@ -306,20 +337,22 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) { ; CHECK-APPLE-LABEL: foo_vararg: ; CHECK-APPLE: mov w0, #16 ; CHECK-APPLE: malloc -; CHECK-APPLE-DAG: mov [[ID:w[0-9]+]], #1 -; CHECK-APPLE-DAG: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16 -; CHECK-APPLE-DAG: strb [[ID]], [x0, #8] ; First vararg -; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #16] +; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP:x[0-9]+]], #16] +; CHECK-APPLE-AARCH64: mov [[ID:w[0-9]+]], #1 +; CHECK-APPLE-AARCH64: add [[ARGS:x[0-9]+]], [[TMP]], #16 +; CHECK-APPLE-AARCH64: strb [[ID]], [x0, #8] ; Second vararg -; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #24] -; CHECK-APPLE-DAG: add {{x[0-9]+}}, {{x[0-9]+}}, #16 +; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #24] ; Third vararg -; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32] +; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32] + +; CHECK-APPLE-ARM64_32: mov [[ID:w[0-9]+]], #1 +; CHECK-APPLE-ARM64_32: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16 +; CHECK-APPLE-ARM64_32: strb [[ID]], [x0, #8] + -; CHECK-APPLE: mov x21, x0 -; CHECK-APPLE-NOT: x21 entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -347,18 +380,18 @@ entry: define float @caller4(i8* %error_ref) { ; CHECK-APPLE-LABEL: caller4: -; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 -; CHECK-APPLE: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] -; CHECK-APPLE: str {{x[0-9]+}}, [sp] +; CHECK-APPLE-AARCH64: mov [[ID:x[0-9]+]], x0 +; CHECK-APPLE-AARCH64: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] +; CHECK-APPLE-AARCH64: str {{x[0-9]+}}, [sp] -; CHECK-APPLE: mov x21, xzr -; CHECK-APPLE: bl {{.*}}foo_vararg -; CHECK-APPLE: mov x0, x21 -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE-AARCH64: mov x21, xzr +; CHECK-APPLE-AARCH64: bl {{.*}}foo_vararg +; CHECK-APPLE-AARCH64: mov x0, x21 +; CHECK-APPLE-AARCH64: cbnz x21 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] -; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: bl {{.*}}free +; CHECK-APPLE-AARCH64: ldrb [[CODE:w[0-9]+]], [x0, #8] +; CHECK-APPLE-AARCH64: strb [[CODE]], [{{.*}}[[ID]]] +; CHECK-APPLE-AARCH64: bl {{.*}}free entry: %error_ptr_ref = alloca swifterror %swift_error* store %swift_error* null, %swift_error** %error_ptr_ref diff --git a/llvm/test/CodeGen/AArch64/swiftself.ll b/llvm/test/CodeGen/AArch64/swiftself.ll index 063085636b3..a13fbb8d5a6 100644 --- a/llvm/test/CodeGen/AArch64/swiftself.ll +++ b/llvm/test/CodeGen/AArch64/swiftself.ll @@ -1,6 +1,7 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTAARCH64 %s ; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=aarch64-unknown-linux-gnu -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-unknown-linux-gnu -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTAARCH64 %s +; RUN: llc -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTARM64_32 %s ; Parameter with swiftself should be allocated to x20. ; CHECK-LABEL: swiftself_param: @@ -48,8 +49,9 @@ define void @swiftself_passthrough(i8* swiftself %addr0) { ; We can use a tail call if the callee swiftself is the same as the caller one. ; This should also work with fast-isel. ; CHECK-LABEL: swiftself_tail: -; CHECK: b {{_?}}swiftself_param -; CHECK-NOT: ret +; OPTAARCH64: b {{_?}}swiftself_param +; OPTAARCH64-NOT: ret +; OPTARM64_32: bl {{_?}}swiftself_param define i8* @swiftself_tail(i8* swiftself %addr0) { call void asm sideeffect "", "~{x20}"() %res = tail call i8* @swiftself_param(i8* swiftself %addr0) @@ -71,12 +73,19 @@ define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind { ; we normally would. We marked the first parameter with swiftself which means it ; will no longer be passed in x0. declare swiftcc i8* @thisreturn_attribute(i8* returned swiftself) -; OPT-LABEL: swiftself_nothisreturn: -; OPT-DAG: ldr x20, [x20] -; OPT-DAG: mov [[CSREG:x[1-9].*]], x8 -; OPT: bl {{_?}}thisreturn_attribute -; OPT: str x0, {{\[}}[[CSREG]] -; OPT: ret +; OPTAARCH64-LABEL: swiftself_nothisreturn: +; OPTAARCH64-DAG: ldr x20, [x20] +; OPTAARCH64-DAG: mov [[CSREG:x[1-9].*]], x8 +; OPTAARCH64: bl {{_?}}thisreturn_attribute +; OPTAARCH64: str x0, {{\[}}[[CSREG]] +; OPTAARCH64: ret + +; OPTARM64_32-LABEL: swiftself_nothisreturn: +; OPTARM64_32-DAG: ldr w20, [x20] +; OPTARM64_32-DAG: mov [[CSREG:x[1-9].*]], x8 +; OPTARM64_32: bl {{_?}}thisreturn_attribute +; OPTARM64_32: str w0, {{\[}}[[CSREG]] +; OPTARM64_32: ret define hidden swiftcc void @swiftself_nothisreturn(i8** noalias nocapture sret, i8** noalias nocapture readonly swiftself) { entry: %2 = load i8*, i8** %1, align 8 diff --git a/llvm/test/CodeGen/AArch64/tail-call.ll b/llvm/test/CodeGen/AArch64/tail-call.ll index fe3e7d7c6a6..fafad4b0c58 100644 --- a/llvm/test/CodeGen/AArch64/tail-call.ll +++ b/llvm/test/CodeGen/AArch64/tail-call.ll @@ -2,8 +2,8 @@ ; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefixes=GISEL,COMMON declare fastcc void @callee_stack0() -declare fastcc void @callee_stack8([8 x i32], i64) -declare fastcc void @callee_stack16([8 x i32], i64, i64) +declare fastcc void @callee_stack8([8 x i64], i64) +declare fastcc void @callee_stack16([8 x i64], i64, i64) declare extern_weak fastcc void @callee_weak() define fastcc void @caller_to0_from0() nounwind { @@ -16,7 +16,7 @@ define fastcc void @caller_to0_from0() nounwind { ; COMMON-NEXT: b callee_stack0 } -define fastcc void @caller_to0_from8([8 x i32], i64) { +define fastcc void @caller_to0_from8([8 x i64], i64) { ; COMMON-LABEL: caller_to0_from8: tail call fastcc void @callee_stack0() @@ -32,33 +32,33 @@ define fastcc void @caller_to8_from0() { ; Key point is that the "42" should go #16 below incoming stack ; pointer (we didn't have arg space to reuse). - tail call fastcc void @callee_stack8([8 x i32] undef, i64 42) + tail call fastcc void @callee_stack8([8 x i64] undef, i64 42) ret void ; COMMON: str {{x[0-9]+}}, [sp, #16]! ; COMMON-NEXT: b callee_stack8 } -define fastcc void @caller_to8_from8([8 x i32], i64 %a) { +define fastcc void @caller_to8_from8([8 x i64], i64 %a) { ; COMMON-LABEL: caller_to8_from8: ; COMMON: sub sp, sp, #16 ; Key point is that the "%a" should go where at SP on entry. - tail call fastcc void @callee_stack8([8 x i32] undef, i64 42) + tail call fastcc void @callee_stack8([8 x i64] undef, i64 42) ret void ; COMMON: str {{x[0-9]+}}, [sp, #16]! ; COMMON-NEXT: b callee_stack8 } -define fastcc void @caller_to16_from8([8 x i32], i64 %a) { +define fastcc void @caller_to16_from8([8 x i64], i64 %a) { ; COMMON-LABEL: caller_to16_from8: ; COMMON: sub sp, sp, #16 ; Important point is that the call reuses the "dead" argument space ; above %a on the stack. If it tries to go below incoming-SP then the ; callee will not deallocate the space, even in fastcc. - tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2) + tail call fastcc void @callee_stack16([8 x i64] undef, i64 42, i64 2) ; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]! ; COMMON-NEXT: b callee_stack16 @@ -66,12 +66,12 @@ define fastcc void @caller_to16_from8([8 x i32], i64 %a) { } -define fastcc void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) { +define fastcc void @caller_to8_from24([8 x i64], i64 %a, i64 %b, i64 %c) { ; COMMON-LABEL: caller_to8_from24: ; COMMON: sub sp, sp, #16 ; Key point is that the "%a" should go where at #16 above SP on entry. - tail call fastcc void @callee_stack8([8 x i32] undef, i64 42) + tail call fastcc void @callee_stack8([8 x i64] undef, i64 42) ret void ; COMMON: str {{x[0-9]+}}, [sp, #32]! @@ -79,13 +79,13 @@ define fastcc void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) { } -define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) { +define fastcc void @caller_to16_from16([8 x i64], i64 %a, i64 %b) { ; COMMON-LABEL: caller_to16_from16: ; COMMON: sub sp, sp, #16 ; Here we want to make sure that both loads happen before the stores: ; otherwise either %a or %b will be wrongly clobbered. - tail call fastcc void @callee_stack16([8 x i32] undef, i64 %b, i64 %a) + tail call fastcc void @callee_stack16([8 x i64] undef, i64 %b, i64 %a) ret void ; COMMON: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll index 51522e1d12e..8edd867ff16 100644 --- a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll @@ -27,8 +27,8 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; AARCH-NEXT: orr w10, w10, w11 ; AARCH-NEXT: orr w9, w10, w9 ; AARCH-NEXT: mul x0, x0, x2 -; AARCH-NEXT: mov x1, x8 -; AARCH-NEXT: mov w2, w9 +; AARCH-DAG: mov x1, x8 +; AARCH-DAG: mov w2, w9 ; AARCH-NEXT: ret start: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 diff --git a/llvm/test/CodeGen/AArch64/win64_vararg.ll b/llvm/test/CodeGen/AArch64/win64_vararg.ll index 50f7bf5907b..38775f2874f 100644 --- a/llvm/test/CodeGen/AArch64/win64_vararg.ll +++ b/llvm/test/CodeGen/AArch64/win64_vararg.ll @@ -261,11 +261,11 @@ define i32 @snprintf(i8*, i64, i8*, ...) local_unnamed_addr #5 { ; CHECK-DAG: mov w6, w3 ; CHECK-DAG: mov [[REG1:w[0-9]+]], w2 ; CHECK: mov w2, w1 -; CHECK: str w4, [sp] ; CHECK: fmov x1, d0 ; CHECK: fmov x3, d1 ; CHECK: fmov x5, d2 ; CHECK: fmov x7, d3 +; CHECK: str w4, [sp] ; CHECK: mov w4, [[REG1]] ; CHECK: str x30, [sp, #16] ; CHECK: str d4, [sp, #8] diff --git a/llvm/test/MC/AArch64/arm64_32-compact-unwind.s b/llvm/test/MC/AArch64/arm64_32-compact-unwind.s new file mode 100644 index 00000000000..59d882ae3a5 --- /dev/null +++ b/llvm/test/MC/AArch64/arm64_32-compact-unwind.s @@ -0,0 +1,15 @@ +; RUN: llvm-mc -triple=arm64_32-ios7.0 -filetype=obj %s -o %t +; RUN: llvm-objdump -s %t | FileCheck %s + +; The compact unwind format in ILP32 mode is pretty much the same, except +; references to addresses (function, personality, LSDA) are pointer-sized. + +; CHECK: Contents of section __compact_unwind: +; CHECK-NEXT: 0004 00000000 04000000 00000002 00000000 +; CHECK-NEXT: 0014 00000000 + .globl _test_compact_unwind + .align 2 +_test_compact_unwind: + .cfi_startproc + ret + .cfi_endproc |

