diff options
| author | Vincent Lejeune <vljn@ovi.com> | 2013-10-13 17:56:10 +0000 |
|---|---|---|
| committer | Vincent Lejeune <vljn@ovi.com> | 2013-10-13 17:56:10 +0000 |
| commit | fa58a5fb609add6c1ff6cfa7b2bf8bdc32b3c9ca (patch) | |
| tree | e333458d3259ef8e24ddc31faf4d429f806a26ff | |
| parent | 301beb80d42e4aebcbfb9836210ecf61873143cd (diff) | |
| download | bcm5719-llvm-fa58a5fb609add6c1ff6cfa7b2bf8bdc32b3c9ca.tar.gz bcm5719-llvm-fa58a5fb609add6c1ff6cfa7b2bf8bdc32b3c9ca.zip | |
R600: Use masked read sel for texture instructions
llvm-svn: 192554
| -rw-r--r-- | llvm/lib/Target/R600/R600ISelLowering.cpp | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/R600/swizzle-export.ll | 15 |
2 files changed, 12 insertions, 8 deletions
diff --git a/llvm/lib/Target/R600/R600ISelLowering.cpp b/llvm/lib/Target/R600/R600ISelLowering.cpp index 81a28be104a..3c2e3888e08 100644 --- a/llvm/lib/Target/R600/R600ISelLowering.cpp +++ b/llvm/lib/Target/R600/R600ISelLowering.cpp @@ -1379,6 +1379,11 @@ CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, }; for (unsigned i = 0; i < 4; i++) { + if (NewBldVec[i].getOpcode() == ISD::UNDEF) + // We mask write here to teach later passes that the ith element of this + // vector is undef. Thus we can use it to reduce 128 bits reg usage, + // break false dependencies and additionnaly make assembly easier to read. + RemapSwizzle[i] = 7; // SEL_MASK_WRITE if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) { if (C->isZero()) { RemapSwizzle[i] = 4; // SEL_0 diff --git a/llvm/test/CodeGen/R600/swizzle-export.ll b/llvm/test/CodeGen/R600/swizzle-export.ll index 02fe13a720e..9a58f667f0d 100644 --- a/llvm/test/CodeGen/R600/swizzle-export.ll +++ b/llvm/test/CodeGen/R600/swizzle-export.ll @@ -93,6 +93,7 @@ main_body: } ; EG-CHECK: @main2 +; EG-CHECK: T{{[0-9]+}}.XY__ ; EG-CHECK: T{{[0-9]+}}.YXZ0 define void @main2() #0 { @@ -110,14 +111,12 @@ main_body: %10 = extractelement <4 x float> %9, i32 1 %11 = insertelement <4 x float> undef, float %0, i32 0 %12 = insertelement <4 x float> %11, float %1, i32 1 - %13 = insertelement <4 x float> %12, float %2, i32 2 - %14 = insertelement <4 x float> %13, float %3, i32 3 - call void @llvm.R600.store.swizzle(<4 x float> %14, i32 60, i32 1) - %15 = insertelement <4 x float> undef, float %6, i32 0 - %16 = insertelement <4 x float> %15, float %8, i32 1 - %17 = insertelement <4 x float> %16, float %10, i32 2 - %18 = insertelement <4 x float> %17, float 0.000000e+00, i32 3 - call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2) + call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1) + %13 = insertelement <4 x float> undef, float %6, i32 0 + %14 = insertelement <4 x float> %13, float %8, i32 1 + %15 = insertelement <4 x float> %14, float %10, i32 2 + %16 = insertelement <4 x float> %15, float 0.000000e+00, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %16, i32 0, i32 2) ret void } |

