diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 80 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-arith.ll | 56 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-logic.ll | 26 |
3 files changed, 146 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 486d7255dbe..0f4736bb760 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -7315,6 +7315,86 @@ static const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = { X86::VPXORQZrrk, X86::VPXORDZrrk }, { X86::VXORPSZrrkz, X86::VXORPDZrrkz, X86::VPXORQZrrkz, X86::VPXORDZrrkz }, + // Broadcast loads can be handled the same as masked operations to avoid + // changing element size. + { X86::VANDNPSZ128rmb, X86::VANDNPDZ128rmb, + X86::VPANDNQZ128rmb, X86::VPANDNDZ128rmb }, + { X86::VANDPSZ128rmb, X86::VANDPDZ128rmb, + X86::VPANDQZ128rmb, X86::VPANDDZ128rmb }, + { X86::VORPSZ128rmb, X86::VORPDZ128rmb, + X86::VPORQZ128rmb, X86::VPORDZ128rmb }, + { X86::VXORPSZ128rmb, X86::VXORPDZ128rmb, + X86::VPXORQZ128rmb, X86::VPXORDZ128rmb }, + { X86::VANDNPSZ256rmb, X86::VANDNPDZ256rmb, + X86::VPANDNQZ256rmb, X86::VPANDNDZ256rmb }, + { X86::VANDPSZ256rmb, X86::VANDPDZ256rmb, + X86::VPANDQZ256rmb, X86::VPANDDZ256rmb }, + { X86::VORPSZ256rmb, X86::VORPDZ256rmb, + X86::VPORQZ256rmb, X86::VPORDZ256rmb }, + { X86::VXORPSZ256rmb, X86::VXORPDZ256rmb, + X86::VPXORQZ256rmb, X86::VPXORDZ256rmb }, + { X86::VANDNPSZrmb, X86::VANDNPDZrmb, + X86::VPANDNQZrmb, X86::VPANDNDZrmb }, + { X86::VANDPSZrmb, X86::VANDPDZrmb, + X86::VPANDQZrmb, X86::VPANDDZrmb }, + { X86::VANDPSZrmb, X86::VANDPDZrmb, + X86::VPANDQZrmb, X86::VPANDDZrmb }, + { X86::VORPSZrmb, X86::VORPDZrmb, + X86::VPORQZrmb, X86::VPORDZrmb }, + { X86::VXORPSZrmb, X86::VXORPDZrmb, + X86::VPXORQZrmb, X86::VPXORDZrmb }, + { X86::VANDNPSZ128rmbk, X86::VANDNPDZ128rmbk, + X86::VPANDNQZ128rmbk, X86::VPANDNDZ128rmbk }, + { X86::VANDPSZ128rmbk, X86::VANDPDZ128rmbk, + X86::VPANDQZ128rmbk, X86::VPANDDZ128rmbk }, + { X86::VORPSZ128rmbk, X86::VORPDZ128rmbk, + X86::VPORQZ128rmbk, X86::VPORDZ128rmbk }, + { X86::VXORPSZ128rmbk, X86::VXORPDZ128rmbk, + X86::VPXORQZ128rmbk, X86::VPXORDZ128rmbk }, + { X86::VANDNPSZ256rmbk, X86::VANDNPDZ256rmbk, + X86::VPANDNQZ256rmbk, X86::VPANDNDZ256rmbk }, + { X86::VANDPSZ256rmbk, X86::VANDPDZ256rmbk, + X86::VPANDQZ256rmbk, X86::VPANDDZ256rmbk }, + { X86::VORPSZ256rmbk, X86::VORPDZ256rmbk, + X86::VPORQZ256rmbk, X86::VPORDZ256rmbk }, + { X86::VXORPSZ256rmbk, X86::VXORPDZ256rmbk, + X86::VPXORQZ256rmbk, X86::VPXORDZ256rmbk }, + { X86::VANDNPSZrmbk, X86::VANDNPDZrmbk, + X86::VPANDNQZrmbk, X86::VPANDNDZrmbk }, + { X86::VANDPSZrmbk, X86::VANDPDZrmbk, + X86::VPANDQZrmbk, X86::VPANDDZrmbk }, + { X86::VANDPSZrmbk, X86::VANDPDZrmbk, + X86::VPANDQZrmbk, X86::VPANDDZrmbk }, + { X86::VORPSZrmbk, X86::VORPDZrmbk, + X86::VPORQZrmbk, X86::VPORDZrmbk }, + { X86::VXORPSZrmbk, X86::VXORPDZrmbk, + X86::VPXORQZrmbk, X86::VPXORDZrmbk }, + { X86::VANDNPSZ128rmbkz,X86::VANDNPDZ128rmbkz, + X86::VPANDNQZ128rmbkz,X86::VPANDNDZ128rmbkz}, + { X86::VANDPSZ128rmbkz, X86::VANDPDZ128rmbkz, + X86::VPANDQZ128rmbkz, X86::VPANDDZ128rmbkz }, + { X86::VORPSZ128rmbkz, X86::VORPDZ128rmbkz, + X86::VPORQZ128rmbkz, X86::VPORDZ128rmbkz }, + { X86::VXORPSZ128rmbkz, X86::VXORPDZ128rmbkz, + X86::VPXORQZ128rmbkz, X86::VPXORDZ128rmbkz }, + { X86::VANDNPSZ256rmbkz,X86::VANDNPDZ256rmbkz, + X86::VPANDNQZ256rmbkz,X86::VPANDNDZ256rmbkz}, + { X86::VANDPSZ256rmbkz, X86::VANDPDZ256rmbkz, + X86::VPANDQZ256rmbkz, X86::VPANDDZ256rmbkz }, + { X86::VORPSZ256rmbkz, X86::VORPDZ256rmbkz, + X86::VPORQZ256rmbkz, X86::VPORDZ256rmbkz }, + { X86::VXORPSZ256rmbkz, X86::VXORPDZ256rmbkz, + X86::VPXORQZ256rmbkz, X86::VPXORDZ256rmbkz }, + { X86::VANDNPSZrmbkz, X86::VANDNPDZrmbkz, + X86::VPANDNQZrmbkz, X86::VPANDNDZrmbkz }, + { X86::VANDPSZrmbkz, X86::VANDPDZrmbkz, + X86::VPANDQZrmbkz, X86::VPANDDZrmbkz }, + { X86::VANDPSZrmbkz, X86::VANDPDZrmbkz, + X86::VPANDQZrmbkz, X86::VPANDDZrmbkz }, + { X86::VORPSZrmbkz, X86::VORPDZrmbkz, + X86::VPORQZrmbkz, X86::VPORDZrmbkz }, + { X86::VXORPSZrmbkz, X86::VXORPDZrmbkz, + X86::VPXORQZrmbkz, X86::VPXORDZrmbkz }, }; // FIXME: Some shuffle and unpack instructions have equivalents in different diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll index e24a13e32b1..a739ee3ac0f 100644 --- a/llvm/test/CodeGen/X86/avx512-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-arith.ll @@ -594,10 +594,30 @@ define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { } define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { -; CHECK-LABEL: orq_broadcast: -; CHECK: ## BB#0: -; CHECK-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: retq +; AVX512F-LABEL: orq_broadcast: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: orq_broadcast: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: orq_broadcast: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: orq_broadcast: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; SKX-LABEL: orq_broadcast: +; SKX: ## BB#0: +; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; SKX-NEXT: retq %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> ret <8 x i64> %b } @@ -634,10 +654,30 @@ entry: } define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { -; CHECK-LABEL: andqbrst: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: retq +; AVX512F-LABEL: andqbrst: +; AVX512F: ## BB#0: ## %entry +; AVX512F-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: andqbrst: +; AVX512VL: ## BB#0: ## %entry +; AVX512VL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: andqbrst: +; AVX512BW: ## BB#0: ## %entry +; AVX512BW-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: andqbrst: +; AVX512DQ: ## BB#0: ## %entry +; AVX512DQ-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; SKX-LABEL: andqbrst: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 +; SKX-NEXT: retq entry: %a = load i64, i64* %ap, align 8 %b = insertelement <8 x i64> undef, i64 %a, i32 0 diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll index ca603b11fbb..119e03dc19d 100644 --- a/llvm/test/CodeGen/X86/avx512-logic.ll +++ b/llvm/test/CodeGen/X86/avx512-logic.ll @@ -116,10 +116,15 @@ entry: define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { -; ALL-LABEL: orq_broadcast: -; ALL: ## BB#0: -; ALL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; ALL-NEXT: retq +; KNL-LABEL: orq_broadcast: +; KNL: ## BB#0: +; KNL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: orq_broadcast: +; SKX: ## BB#0: +; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; SKX-NEXT: retq %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> ret <8 x i64> %b } @@ -141,10 +146,15 @@ entry: } define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { -; ALL-LABEL: andqbrst: -; ALL: ## BB#0: ## %entry -; ALL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 -; ALL-NEXT: retq +; KNL-LABEL: andqbrst: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: andqbrst: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 +; SKX-NEXT: retq entry: %a = load i64, i64* %ap, align 8 %b = insertelement <8 x i64> undef, i64 %a, i32 0 |