summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp20
-rw-r--r--clang/test/CodeGen/sse4a-builtins.c8
2 files changed, 25 insertions, 3 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 43892389013..23f62ed60a1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6848,6 +6848,26 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
SI->setAlignment(1);
return SI;
}
+ case X86::BI__builtin_ia32_movntsd:
+ case X86::BI__builtin_ia32_movntss: {
+ llvm::MDNode *Node = llvm::MDNode::get(
+ getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
+
+ // Extract the 0'th element of the source vector.
+ Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract");
+
+ // Convert the type of the pointer to a pointer to the stored type.
+ Value *BC = Builder.CreateBitCast(Ops[0],
+ llvm::PointerType::getUnqual(Scl->getType()),
+ "cast");
+
+ // Unaligned nontemporal store of the scalar value.
+ StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC);
+ SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
+ SI->setAlignment(1);
+ return SI;
+ }
+
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
case X86::BI__builtin_ia32_selectb_512:
diff --git a/clang/test/CodeGen/sse4a-builtins.c b/clang/test/CodeGen/sse4a-builtins.c
index 48e7c662c57..0604423fe17 100644
--- a/clang/test/CodeGen/sse4a-builtins.c
+++ b/clang/test/CodeGen/sse4a-builtins.c
@@ -33,12 +33,14 @@ __m128i test_mm_insert_si64(__m128i x, __m128i y) {
void test_mm_stream_sd(double *p, __m128d a) {
// CHECK-LABEL: test_mm_stream_sd
- // CHECK: call void @llvm.x86.sse4a.movnt.sd(i8* %{{[^,]+}}, <2 x double> %{{[^,]+}})
- _mm_stream_sd(p, a);
+ // CHECK: extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK: store double %{{.*}}, double* %{{.*}}, align 1, !nontemporal
+ _mm_stream_sd(p, a);
}
void test_mm_stream_ss(float *p, __m128 a) {
// CHECK-LABEL: test_mm_stream_ss
- // CHECK: call void @llvm.x86.sse4a.movnt.ss(i8* %{{[^,]+}}, <4 x float> %{{[^,]+}})
+ // CHECK: extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK: store float %{{.*}}, float* %{{.*}}, align 1, !nontemporal
_mm_stream_ss(p, a);
}
OpenPOWER on IntegriCloud