diff options
| author | Bill Wendling <isanbard@gmail.com> | 2011-03-14 23:02:38 +0000 | 
|---|---|---|
| committer | Bill Wendling <isanbard@gmail.com> | 2011-03-14 23:02:38 +0000 | 
| commit | e1fd78f2bcb843c95e2da2c91bf82c995081692b (patch) | |
| tree | e104616a4cf6d16511552028a2a4304a36850dbf /llvm/lib/Target/ARM/ARMISelLowering.h | |
| parent | e2eb8076016f3cc30082d815469532b5ff22bb5d (diff) | |
| download | bcm5719-llvm-e1fd78f2bcb843c95e2da2c91bf82c995081692b.tar.gz bcm5719-llvm-e1fd78f2bcb843c95e2da2c91bf82c995081692b.zip | |
Generate a VTBL instruction instead of a series of loads and stores when we
can. As Nate pointed out, VTBL isn't super performant, but it *has* to be better
than this:
_shuf:
@ BB#0:       @ %entry
  push        {r4, r7, lr}
  add         r7, sp, #4
  sub         sp, #12
  mov         r4, sp
  bic         r4, r4, #7
  mov         sp, r4
  mov         r2, sp
  vmov        d16, r0, r1
  orr         r0, r2, #6
  orr         r3, r2, #7
  vst1.8      {d16[0]}, [r3]
  vst1.8      {d16[5]}, [r0]
  subs        r4, r7, #4
  orr         r0, r2, #5
  vst1.8      {d16[4]}, [r0]
  orr         r0, r2, #4
  vst1.8      {d16[4]}, [r0]
  orr         r0, r2, #3
  vst1.8      {d16[0]}, [r0]
  orr         r0, r2, #2
  vst1.8      {d16[2]}, [r0]
  orr         r0, r2, #1
  vst1.8      {d16[1]}, [r0]
  vst1.8      {d16[3]}, [r2]
  vldr.64     d16, [sp]
  vmov        r0, r1, d16
  mov         sp, r4
  pop         {r4, r7, pc}
The "illegal" testcase in vext.ll is no longer illegal.
<rdar://problem/9078775>
llvm-svn: 127630
Diffstat (limited to 'llvm/lib/Target/ARM/ARMISelLowering.h')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.h | 4 | 
1 files changed, 4 insertions, 0 deletions
| diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 0f56201bcb8..8eb4525b82f 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -153,6 +153,10 @@ namespace llvm {        VZIP,         // zip (interleave)        VUZP,         // unzip (deinterleave)        VTRN,         // transpose +      VTBL1,        // 1-register shuffle with mask +      VTBL2,        // 2-register shuffle with mask +      VTBL3,        // 3-register shuffle with mask +      VTBL4,        // 4-register shuffle with mask        // Vector multiply long:        VMULLs,       // ...signed | 

