diff options
Diffstat (limited to 'clang/lib/CodeGen')
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 025b34e809c..741f36b095d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9325,6 +9325,57 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, makeArrayRef(Indices, NumElts), "blend"); } + case X86::BI__builtin_ia32_pshuflw: + case X86::BI__builtin_ia32_pshuflw256: + case X86::BI__builtin_ia32_pshuflw512: { + uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); + llvm::Type *Ty = Ops[0]->getType(); + unsigned NumElts = Ty->getVectorNumElements(); + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + Imm = (Imm & 0xff) * 0x01010101; + + uint32_t Indices[32]; + for (unsigned l = 0; l != NumElts; l += 8) { + for (unsigned i = 0; i != 4; ++i) { + Indices[l + i] = l + (Imm & 3); + Imm >>= 2; + } + for (unsigned i = 4; i != 8; ++i) + Indices[l + i] = l + i; + } + + return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), + makeArrayRef(Indices, NumElts), + "pshuflw"); + } + case X86::BI__builtin_ia32_pshufhw: + case X86::BI__builtin_ia32_pshufhw256: + case X86::BI__builtin_ia32_pshufhw512: { + uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); + llvm::Type *Ty = Ops[0]->getType(); + unsigned NumElts = Ty->getVectorNumElements(); + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + Imm = (Imm & 0xff) * 0x01010101; + + uint32_t Indices[32]; + for (unsigned l = 0; l != NumElts; l += 8) { + for (unsigned i = 0; i != 4; ++i) + Indices[l + i] = l + i; + for (unsigned i = 4; i != 8; ++i) { + Indices[l + i] = l + 4 + (Imm & 3); + Imm >>= 2; + } + } + + return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), + makeArrayRef(Indices, NumElts), + "pshufhw"); + } + case X86::BI__builtin_ia32_pshufd: + case X86::BI__builtin_ia32_pshufd256: + case X86::BI__builtin_ia32_pshufd512: case X86::BI__builtin_ia32_vpermilpd: case X86::BI__builtin_ia32_vpermilps: case X86::BI__builtin_ia32_vpermilpd256: |

