3 files changed, 43 insertions, 91 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index f74a9fa3bb0..280e5fc2b50 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3794,34 +3794,41 @@ bool AArch64FastISel::selectTrunc(const Instruction *I) {
     return false;
   bool SrcIsKill = hasTrivialKill(Op);
 
-  // If we're truncating from i64/i32 to a smaller non-legal type then generate
-  // an AND.
-  uint64_t Mask = 0;
-  switch (DestVT.SimpleTy) {
-  default:
-    // Trunc i64 to i32 is handled by the target-independent fast-isel.
-    return false;
-  case MVT::i1:
-    Mask = 0x1;
-    break;
-  case MVT::i8:
-    Mask = 0xff;
-    break;
-  case MVT::i16:
-    Mask = 0xffff;
-    break;
-  }
+  // If we're truncating from i64 to a smaller non-legal type then generate an
+  // AND. Otherwise, we know the high bits are undefined and a truncate only
+  // generate a COPY. We cannot mark the source register also as result
+  // register, because this can incorrectly transfer the kill flag onto the
+  // source register.
+  unsigned ResultReg;
   if (SrcVT == MVT::i64) {
+    uint64_t Mask = 0;
+    switch (DestVT.SimpleTy) {
+    default:
+      // Trunc i64 to i32 is handled by the target-independent fast-isel.
+      return false;
+    case MVT::i1:
+      Mask = 0x1;
+      break;
+    case MVT::i8:
+      Mask = 0xff;
+      break;
+    case MVT::i16:
+      Mask = 0xffff;
+      break;
+    }
     // Issue an extract_subreg to get the lower 32-bits.
-    SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
-                                        AArch64::sub_32);
-    SrcIsKill = true;
+    unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
+                                                AArch64::sub_32);
+    // Create the AND instruction which performs the actual truncation.
+    ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
+    assert(ResultReg && "Unexpected AND instruction emission failure.");
+  } else {
+    ResultReg = createResultReg(&AArch64::GPR32RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg)
+        .addReg(SrcReg, getKillRegState(SrcIsKill));
   }
 
-  // Create the AND instruction which performs the actual truncation.
-  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, SrcIsKill, Mask);
-  assert(ResultReg && "Unexpected AND instruction emission failure.");
-
   updateValueMap(I, ResultReg);
   return true;
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
index a1b7ab02dbb..1b688652331 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
@@ -363,8 +363,7 @@ entry:
 define i32 @i64_trunc_i32(i64 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: i64_trunc_i32
-; CHECK:       mov [[REG:x[0-9]+]], x0
-; CHECK-NEXT:  mov x0, [[REG]]
+; CHECK: mov x1, x0
   %conv = trunc i64 %a to i32
   ret i32 %conv
 }
@@ -372,9 +371,9 @@ entry:
 define zeroext i16 @i64_trunc_i16(i64 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: i64_trunc_i16
-; CHECK:       mov x[[REG:[0-9]+]], x0
-; CHECK-NEXT:  and [[REG2:w[0-9]+]], w[[REG]], #0xffff
-; CHECK-NEXT:  uxth w0, [[REG2]]
+; CHECK: mov x[[REG:[0-9]+]], x0
+; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0xffff
+; CHECK: uxth w0, [[REG2]]
   %conv = trunc i64 %a to i16
   ret i16 %conv
 }
@@ -382,9 +381,9 @@ entry:
 define zeroext i8 @i64_trunc_i8(i64 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: i64_trunc_i8
-; CHECK:       mov x[[REG:[0-9]+]], x0
-; CHECK-NEXT:  and [[REG2:w[0-9]+]], w[[REG]], #0xff
-; CHECK-NEXT:  uxtb w0, [[REG2]]
+; CHECK: mov x[[REG:[0-9]+]], x0
+; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0xff
+; CHECK: uxtb w0, [[REG2]]
   %conv = trunc i64 %a to i8
   ret i8 %conv
 }
@@ -392,67 +391,13 @@ entry:
 define zeroext i1 @i64_trunc_i1(i64 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: i64_trunc_i1
-; CHECK:       mov x[[REG:[0-9]+]], x0
-; CHECK-NEXT:  and [[REG2:w[0-9]+]], w[[REG]], #0x1
-; CHECK-NEXT:  and w0, [[REG2]], #0x1
+; CHECK: mov x[[REG:[0-9]+]], x0
+; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0x1
+; CHECK: and w0, [[REG2]], #0x1
   %conv = trunc i64 %a to i1
   ret i1 %conv
 }
 
-define zeroext i16 @i32_trunc_i16(i32 %a) nounwind ssp {
-entry:
-; CHECK-LABEL: i32_trunc_i16
-; CHECK:       and [[REG:w[0-9]+]], w0, #0xffff
-; CHECK-NEXT:  uxth w0, [[REG]]
-  %conv = trunc i32 %a to i16
-  ret i16 %conv
-}
-
-define zeroext i8 @i32_trunc_i8(i32 %a) nounwind ssp {
-entry:
-; CHECK-LABEL: i32_trunc_i8
-; CHECK:       and [[REG:w[0-9]+]], w0, #0xff
-; CHECK-NEXT:  uxtb w0, [[REG]]
-  %conv = trunc i32 %a to i8
-  ret i8 %conv
-}
-
-define zeroext i1 @i32_trunc_i1(i32 %a) nounwind ssp {
-entry:
-; CHECK-LABEL: i32_trunc_i1
-; CHECK:       and [[REG:w[0-9]+]], w0, #0x1
-; CHECK-NEXT:  and w0, [[REG]], #0x1
-  %conv = trunc i32 %a to i1
-  ret i1 %conv
-}
-
-define zeroext i8 @i16_trunc_i8(i16 zeroext %a) nounwind ssp {
-entry:
-; CHECK-LABEL: i16_trunc_i8
-; CHECK:       and [[REG:w[0-9]+]], w0, #0xff
-; CHECK-NEXT:  uxtb w0, [[REG]]
-  %conv = trunc i16 %a to i8
-  ret i8 %conv
-}
-
-define zeroext i1 @i16_trunc_i1(i16 zeroext %a) nounwind ssp {
-entry:
-; CHECK-LABEL: i16_trunc_i1
-; CHECK:       and [[REG:w[0-9]+]], w0, #0x1
-; CHECK-NEXT:  and w0, [[REG]], #0x1
-  %conv = trunc i16 %a to i1
-  ret i1 %conv
-}
-
-define zeroext i1 @i8_trunc_i1(i8 zeroext %a) nounwind ssp {
-entry:
-; CHECK-LABEL: i8_trunc_i1
-; CHECK:       and [[REG:w[0-9]+]], w0, #0x1
-; CHECK-NEXT:  and w0, [[REG]], #0x1
-  %conv = trunc i8 %a to i1
-  ret i1 %conv
-}
-
 ; rdar://15101939
 define void @stack_trunc() nounwind {
 ; CHECK-LABEL: stack_trunc
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-address-extends.ll b/llvm/test/CodeGen/AArch64/fast-isel-address-extends.ll
index f6f79fd1a70..6a17ec502a0 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-address-extends.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-address-extends.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -o - -O2 -verify-machineinstrs -fast-isel=true | FileCheck %s
+; RUN: llc %s -o - -O0 -verify-machineinstrs -fast-isel=true | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios8.0.0"
@@ -7,7 +7,7 @@ target triple = "arm64-apple-ios8.0.0"
 ; This was incorrect as %.mux isn't available in the last bb.
 
 ; CHECK: sxtw [[REG:x[0-9]+]]
-; CHECK: strh wzr, {{\[}}{{.*}}, [[REG]], lsl #1]
+; CHECK: strh wzr, {{\[}}[[REG]], {{.*}}, lsl #1]
 
 ; Function Attrs: nounwind optsize ssp
 define void @EdgeLoop(i32 %dir, i32 %edge, i32 %width, i16* %tmp89, i32 %tmp136, i16 %tmp144) #0 {