2 files changed, 87 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 7353c2c010c..a474601d5f3 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -441,7 +441,11 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
     LLT OrigTy = MRI.getType(OrigReg);
     LLT NewTy = MRI.getType(NewReg);
     if (OrigTy != NewTy) {
-      assert(OrigTy.getSizeInBits() == NewTy.getSizeInBits() &&
+      // The default mapping is not supposed to change the size of
+      // the storage. However, right now we don't necessarily bump all
+      // the types to storage size. For instance, we can consider
+      // s16 G_AND legal whereas the storage size is going to be 32.
+      assert(OrigTy.getSizeInBits() <= NewTy.getSizeInBits() &&
              "Types with difference size cannot be handled by the default "
              "mapping");
       DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to "
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
index fb021893869..a5df93b4926 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
@@ -97,6 +97,19 @@
   define void @fp16Ext32() { ret void }
   define void @fp16Ext64() { ret void }
   define void @fp32Ext64() { ret void }
+
+  define half @passFp16(half %p) {
+  entry:
+    ret half %p
+  }
+
+  define half @passFp16ViaAllocas(half %p) {
+  entry:
+    %p.addr = alloca half, align 2
+    store half %p, half* %p.addr, align 2
+    %0 = load half, half* %p.addr, align 2
+    ret half %0
+  }
 ...
 
 ---
@@ -875,3 +888,72 @@ body:             |
     RET_ReallyLR implicit %d0
 
 ...
+
+---
+# Make sure we map FP16 ABI on FPR register bank.
+# CHECK-LABEL: name: passFp16
+# CHECK: registers:
+# CHECK:  - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:  %0:fpr(s16) = COPY %h0
+# CHECK-NEXT: %h0 = COPY %0(s16)
+name:            passFp16
+alignment:       2
+legalized:       true
+registers:
+  - { id: 0, class: _ }
+body:             |
+  bb.1.entry:
+    liveins: %h0
+
+    %0(s16) = COPY %h0
+    %h0 = COPY %0(s16)
+    RET_ReallyLR implicit %h0
+
+...
+---
+# This test tries to mix 16-bit types on fpr with 32-bit types on gpr.
+# The problem when doing that is that switching from fpr to gpr requires
+# more than just a plain COPY.
+# In this specific case, currently we map the ABI copy from h0 to fpr,
+# then, the fast mapping takes GPR for store and the size of the storage
+# gets bumped to 32-bit.
+# CHECK-LABEL: name: passFp16ViaAllocas
+# CHECK: registers:
+# CHECK:  - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 3, class: gpr, preferred-register: '' }
+#
+# CHECK:  %0:fpr(s16) = COPY %h0
+# CHECK-NEXT: %1:gpr(p0) = G_FRAME_INDEX %stack.0.p.addr
+# Currently the default mapping we provide for store does not
+# consider fpr for s16, unless they are produced by floating point
+# operation. Thus, we have to repair the assignment.
+# CHECK-NEXT: %3:gpr(s16) = COPY %0(s16)
+# CHECK-NEXT: G_STORE %3(s16), %1(p0) :: (store 2 into %ir.p.addr)
+# CHECK-NEXT: %2:gpr(s16) = G_LOAD %1(p0) :: (load 2 from %ir.p.addr)
+# CHECK-NEXT: %h0 = COPY %2(s16)
+name:            passFp16ViaAllocas
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+frameInfo:
+  maxAlignment:    2
+stack:
+  - { id: 0, name: p.addr, size: 2, alignment: 2, stack-id: 0 }
+body:             |
+  bb.1.entry:
+    liveins: %h0
+
+    %0(s16) = COPY %h0
+    %1(p0) = G_FRAME_INDEX %stack.0.p.addr
+    G_STORE %0(s16), %1(p0) :: (store 2 into %ir.p.addr)
+    %2(s16) = G_LOAD %1(p0) :: (load 2 from %ir.p.addr)
+    %h0 = COPY %2(s16)
+    RET_ReallyLR implicit %h0
+
+...