diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2018-06-29 13:44:06 +0000 | 
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2018-06-29 13:44:06 +0000 | 
| commit | da66753e0112f667441097e9bfd38dea111cd8df (patch) | |
| tree | d0de1c36c7584259a20ea38cc8861cbf38260a6b /llvm/test/Transforms/InstCombine/shuffle_select.ll | |
| parent | f9e1dc002364efe0f06bad68254c8d4f6b08a435 (diff) | |
| download | bcm5719-llvm-da66753e0112f667441097e9bfd38dea111cd8df.tar.gz bcm5719-llvm-da66753e0112f667441097e9bfd38dea111cd8df.zip  | |
[InstCombine] enhance shuffle-of-binops to allow different variable ops (PR37806)
This was discussed in D48401 as another improvement for:
https://bugs.llvm.org/show_bug.cgi?id=37806
If we have 2 different variable values, then we shuffle (select) those lanes, 
shuffle (select) the constants, and then perform the binop. This eliminates a binop.
The new shuffle uses the same shuffle mask as the existing shuffle, so there's no 
danger of creating a difficult shuffle.
All of the earlier constraints still apply, but we also check for extra uses to 
avoid creating more instructions than we'll remove.
Additionally, we're disallowing the fold for div/rem because that could expose a
UB hole.
Differential Revision: https://reviews.llvm.org/D48678
llvm-svn: 335974
Diffstat (limited to 'llvm/test/Transforms/InstCombine/shuffle_select.ll')
| -rw-r--r-- | llvm/test/Transforms/InstCombine/shuffle_select.ll | 63 | 
1 files changed, 28 insertions, 35 deletions
diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll index ecc1d92fea8..a19dd6297ac 100644 --- a/llvm/test/Transforms/InstCombine/shuffle_select.ll +++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll @@ -241,9 +241,8 @@ define <4 x double> @frem(<4 x double> %v0) {  define <4 x i32> @add_2_vars(<4 x i32> %v0, <4 x i32> %v1) {  ; CHECK-LABEL: @add_2_vars( -; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 undef, i32 3, i32 undef> -; CHECK-NEXT:    [[T2:%.*]] = add <4 x i32> [[V1:%.*]], <i32 undef, i32 6, i32 undef, i32 8> -; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>  ; CHECK-NEXT:    ret <4 x i32> [[T3]]  ;    %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> @@ -256,9 +255,8 @@ define <4 x i32> @add_2_vars(<4 x i32> %v0, <4 x i32> %v1) {  define <4 x i32> @sub_2_vars(<4 x i32> %v0, <4 x i32> %v1) {  ; CHECK-LABEL: @sub_2_vars( -; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 undef>, [[V0:%.*]] -; CHECK-NEXT:    [[T2:%.*]] = sub <4 x i32> <i32 undef, i32 undef, i32 undef, i32 8>, [[V1:%.*]] -; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> +; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]]  ; CHECK-NEXT:    ret <4 x i32> [[T3]]  ;    %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 @@ -272,9 +270,8 @@ define <4 x i32> @sub_2_vars(<4 x i32> %v0, <4 x i32> %v1) {  define <4 x i32> @mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {  ; CHECK-LABEL: @mul_2_vars( -; CHECK-NEXT:    [[T1:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 undef, i32 undef, i32 3, i32 undef> -; CHECK-NEXT:    [[T2:%.*]] = mul <4 x i32> [[V1:%.*]], <i32 undef, i32 6, i32 undef, i32 8> -; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7> +; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 undef, i32 6, i32 3, i32 8>  ; CHECK-NEXT:    ret <4 x i32> [[T3]]  ;    %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> @@ -287,9 +284,8 @@ define <4 x i32> @mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {  define <4 x i32> @shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) {  ; CHECK-LABEL: @shl_2_vars( -; CHECK-NEXT:    [[T1:%.*]] = shl nsw <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4> -; CHECK-NEXT:    [[T2:%.*]] = shl nsw <4 x i32> [[V1:%.*]], <i32 5, i32 6, i32 7, i32 8> -; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> +; CHECK-NEXT:    [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 undef, i32 6, i32 3, i32 undef>  ; CHECK-NEXT:    ret <4 x i32> [[T3]]  ;    %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> @@ -302,9 +298,8 @@ define <4 x i32> @shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) {  define <4 x i32> @lshr_2_vars(<4 x i32> %v0, <4 x i32> %v1) {  ; CHECK-LABEL: @lshr_2_vars( -; CHECK-NEXT:    [[T1:%.*]] = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] -; CHECK-NEXT:    [[T2:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]] -; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 4, i32 5, i32 2, i32 7> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 4, i32 5, i32 2, i32 7> +; CHECK-NEXT:    [[T3:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]]  ; CHECK-NEXT:    ret <4 x i32> [[T3]]  ;    %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 @@ -317,9 +312,8 @@ define <4 x i32> @lshr_2_vars(<4 x i32> %v0, <4 x i32> %v1) {  define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) {  ; CHECK-LABEL: @ashr_2_vars( -; CHECK-NEXT:    [[T1:%.*]] = ashr <3 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3> -; CHECK-NEXT:    [[T2:%.*]] = ashr <3 x i32> [[V1:%.*]], <i32 4, i32 5, i32 6> -; CHECK-NEXT:    [[T3:%.*]] = shufflevector <3 x i32> [[T1]], <3 x i32> [[T2]], <3 x i32> <i32 3, i32 1, i32 2> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[V0:%.*]], <3 x i32> [[V1:%.*]], <3 x i32> <i32 3, i32 1, i32 2> +; CHECK-NEXT:    [[T3:%.*]] = ashr <3 x i32> [[TMP1]], <i32 4, i32 2, i32 3>  ; CHECK-NEXT:    ret <3 x i32> [[T3]]  ;    %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3> @@ -330,9 +324,8 @@ define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) {  define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) {  ; CHECK-LABEL: @and_2_vars( -; CHECK-NEXT:    [[T1:%.*]] = and <3 x i42> [[V0:%.*]], <i42 1, i42 undef, i42 undef> -; CHECK-NEXT:    [[T2:%.*]] = and <3 x i42> [[V1:%.*]], <i42 undef, i42 5, i42 undef> -; CHECK-NEXT:    [[T3:%.*]] = shufflevector <3 x i42> [[T1]], <3 x i42> [[T2]], <3 x i32> <i32 0, i32 4, i32 undef> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i42> [[V0:%.*]], <3 x i42> [[V1:%.*]], <3 x i32> <i32 0, i32 4, i32 undef> +; CHECK-NEXT:    [[T3:%.*]] = and <3 x i42> [[TMP1]], <i42 1, i42 5, i42 undef>  ; CHECK-NEXT:    ret <3 x i42> [[T3]]  ;    %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3> @@ -346,8 +339,8 @@ define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) {  define <4 x i32> @or_2_vars(<4 x i32> %v0, <4 x i32> %v1) {  ; CHECK-LABEL: @or_2_vars(  ; CHECK-NEXT:    [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4> -; CHECK-NEXT:    [[T2:%.*]] = or <4 x i32> [[V1:%.*]], <i32 5, i32 6, i32 undef, i32 undef> -; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 4, i32 5, i32 2, i32 3> +; CHECK-NEXT:    [[T3:%.*]] = or <4 x i32> [[TMP1]], <i32 5, i32 6, i32 3, i32 4>  ; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])  ; CHECK-NEXT:    ret <4 x i32> [[T3]]  ; @@ -392,6 +385,8 @@ define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {    ret <4 x i32> %t3  } +; TODO: If the shuffle has no undefs, it's safe to shuffle the variables first. +  define <4 x i32> @sdiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {  ; CHECK-LABEL: @sdiv_2_vars(  ; CHECK-NEXT:    [[T1:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4> @@ -405,6 +400,8 @@ define <4 x i32> @sdiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {    ret <4 x i32> %t3  } +; TODO: If the shuffle has no undefs, it's safe to shuffle the variables first. +  define <4 x i32> @urem_2_vars(<4 x i32> %v0, <4 x i32> %v1) {  ; CHECK-LABEL: @urem_2_vars(  ; CHECK-NEXT:    [[T1:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] @@ -435,9 +432,8 @@ define <4 x i32> @srem_2_vars(<4 x i32> %v0, <4 x i32> %v1) {  define <4 x float> @fadd_2_vars(<4 x float> %v0, <4 x float> %v1) {  ; CHECK-LABEL: @fadd_2_vars( -; CHECK-NEXT:    [[T1:%.*]] = fadd <4 x float> [[V0:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00> -; CHECK-NEXT:    [[T2:%.*]] = fadd <4 x float> [[V1:%.*]], <float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00> -; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x float> [[T1]], <4 x float> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> +; CHECK-NEXT:    [[T3:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00>  ; CHECK-NEXT:    ret <4 x float> [[T3]]  ;    %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0> @@ -448,9 +444,8 @@ define <4 x float> @fadd_2_vars(<4 x float> %v0, <4 x float> %v1) {  define <4 x double> @fsub_2_vars(<4 x double> %v0, <4 x double> %v1) {  ; CHECK-LABEL: @fsub_2_vars( -; CHECK-NEXT:    [[T1:%.*]] = fsub <4 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>, [[V0:%.*]] -; CHECK-NEXT:    [[T2:%.*]] = fsub <4 x double> <double 5.000000e+00, double 6.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V1:%.*]] -; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7> +; CHECK-NEXT:    [[T3:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]]  ; CHECK-NEXT:    ret <4 x double> [[T3]]  ;    %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0 @@ -463,9 +458,8 @@ define <4 x double> @fsub_2_vars(<4 x double> %v0, <4 x double> %v1) {  define <4 x float> @fmul_2_vars(<4 x float> %v0, <4 x float> %v1) {  ; CHECK-LABEL: @fmul_2_vars( -; CHECK-NEXT:    [[T1:%.*]] = fmul reassoc nsz <4 x float> [[V0:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00> -; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc nsz <4 x float> [[V1:%.*]], <float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00> -; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x float> [[T1]], <4 x float> [[T2]], <4 x i32> <i32 0, i32 5, i32 6, i32 7> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7> +; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc nsz <4 x float> [[TMP1]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>  ; CHECK-NEXT:    ret <4 x float> [[T3]]  ;    %t1 = fmul reassoc nsz <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0> @@ -476,9 +470,8 @@ define <4 x float> @fmul_2_vars(<4 x float> %v0, <4 x float> %v1) {  define <4 x double> @frem_2_vars(<4 x double> %v0, <4 x double> %v1) {  ; CHECK-LABEL: @frem_2_vars( -; CHECK-NEXT:    [[T1:%.*]] = frem nnan ninf <4 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>, [[V0:%.*]] -; CHECK-NEXT:    [[T2:%.*]] = frem nnan arcp <4 x double> <double 5.000000e+00, double 6.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V1:%.*]] -; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7> +; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7> +; CHECK-NEXT:    [[T3:%.*]] = frem nnan <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]]  ; CHECK-NEXT:    ret <4 x double> [[T3]]  ;    %t1 = frem nnan ninf <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0  | 

