summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorNate Begeman <natebegeman@mac.com>2008-02-03 07:18:54 +0000
committerNate Begeman <natebegeman@mac.com>2008-02-03 07:18:54 +0000
commite14fdfaecd87eb7e588022aa34891b9204367bb1 (patch)
tree764845edc5eba66049d16c4294f576e72285a4d7 /llvm/lib
parent62f67ea73aa9b727cffc8a99fdf469521bbccec5 (diff)
downloadbcm5719-llvm-e14fdfaecd87eb7e588022aa34891b9204367bb1.tar.gz
bcm5719-llvm-e14fdfaecd87eb7e588022aa34891b9204367bb1.zip
SSE 4.1 Intrinsics and detection
llvm-svn: 46681
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86.td7
-rw-r--r--llvm/lib/Target/X86/X86Instr64bit.td10
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td2
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td95
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp2
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h4
6 files changed, 119 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 905c704e752..9347310ef3d 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -34,6 +34,12 @@ def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
"Enable SSSE3 instructions",
[FeatureSSE3]>;
+def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
+ "Enable SSE 4.1 instructions",
+ [FeatureSSSE3]>;
+def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
+ "Enable SSE 4.2 instructions",
+ [FeatureSSE41]>;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
"Enable 3DNow! instructions">;
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
@@ -66,6 +72,7 @@ def : Proc<"yonah", [FeatureSSE3]>;
def : Proc<"prescott", [FeatureSSE3]>;
def : Proc<"nocona", [FeatureSSE3]>;
def : Proc<"core2", [FeatureSSSE3]>;
+def : Proc<"penryn", [FeatureSSE41]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
diff --git a/llvm/lib/Target/X86/X86Instr64bit.td b/llvm/lib/Target/X86/X86Instr64bit.td
index a55e73130c7..9528dbd2c93 100644
--- a/llvm/lib/Target/X86/X86Instr64bit.td
+++ b/llvm/lib/Target/X86/X86Instr64bit.td
@@ -1266,3 +1266,13 @@ def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// X86-64 SSE4.1 Instructions
+//===----------------------------------------------------------------------===//
+
+// PEXTRB, unary, TA, 0x14, REX.W
+// PEXTRW, unary, TA, 0x15, REX.W
+// PEXTRQ, unary, TA, 0x16, REX.W
+// EXTRACTPS, unary, TA, 0x17, REX.W
+// PINSRQ, 2addr, binary, TA, 0x22, REX.W
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 478007b2073..a79947b5c0c 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -166,6 +166,8 @@ def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
+def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
+def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 30d088c4ad5..5cf9a9c4e1c 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3038,3 +3038,98 @@ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 Instructions
+//===----------------------------------------------------------------------===//
+
+// SSE4.1 Instruction Templates:
+//
+// SS418I - SSE 4.1 instructions with T8 prefix.
+// SS41AI - SSE 4.1 instructions with TA prefix.
+//
+class SS418I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE41]>;
+class SS41AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>;
+
+
+multiclass sse41_fp_unop_rm<bits<8> opcss, bits<8> opcps,
+ bits<8> opcsd, bits<8> opcpd,
+ string OpcodeStr,
+ Intrinsic F32Int,
+ Intrinsic V4F32Int,
+ Intrinsic F64Int,
+ Intrinsic V2F64Int,
+ bit Commutable = 0> {
+ // Intrinsic operation, reg.
+ def SSr_Int : SS41AI<opcss, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32imm:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (F32Int VR128:$src1, imm:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, mem.
+ def SSm_Int : SS41AI<opcss, MRMSrcMem,
+ (outs VR128:$dst), (ins ssmem:$src1, i32imm:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (F32Int sse_load_f32:$src1, imm:$src2))]>;
+
+ // Vector intrinsic operation, reg
+ def PSr_Int : SS41AI<opcps, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector intrinsic operation, mem
+ def PSm_Int : SS41AI<opcps, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (V4F32Int (load addr:$src1), imm:$src2))]>;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : SS41AI<opcsd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32imm:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (F64Int VR128:$src1, imm:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, mem.
+ def SDm_Int : SS41AI<opcsd, MRMSrcMem,
+ (outs VR128:$dst), (ins sdmem:$src1, i32imm:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (F64Int sse_load_f64:$src1, imm:$src2))]>;
+
+ // Vector intrinsic operation, reg
+ def PDr_Int : SS41AI<opcpd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector intrinsic operation, mem
+ def PDm_Int : SS41AI<opcpd, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (V2F64Int (load addr:$src1), imm:$src2))]>;
+}
+
+// FP round - roundss, roundps, roundsd, roundpd
+defm ROUND : sse41_fp_unop_rm<0x0A, 0x08, 0x0B, 0x09, "round",
+ int_x86_sse41_round_ss, int_x86_sse41_round_ps,
+ int_x86_sse41_round_sd, int_x86_sse41_round_pd>;
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index 1480332ef0c..35a83e4ef78 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -114,6 +114,8 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
if ((EDX >> 26) & 0x1) X86SSELevel = SSE2;
if (ECX & 0x1) X86SSELevel = SSE3;
if ((ECX >> 9) & 0x1) X86SSELevel = SSSE3;
+ if ((ECX >> 19) & 0x1) X86SSELevel = SSE41;
+ if ((ECX >> 20) & 0x1) X86SSELevel = SSE42;
if (memcmp(text.c, "GenuineIntel", 12) == 0 ||
memcmp(text.c, "AuthenticAMD", 12) == 0) {
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index ee193cf519a..c2687265af9 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -38,7 +38,7 @@ public:
};
protected:
enum X86SSEEnum {
- NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3
+ NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42
};
enum X863DNowEnum {
@@ -127,6 +127,8 @@ public:
bool hasSSE2() const { return X86SSELevel >= SSE2; }
bool hasSSE3() const { return X86SSELevel >= SSE3; }
bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
+ bool hasSSE41() const { return X86SSELevel >= SSE41; }
+ bool hasSSE42() const { return X86SSELevel >= SSE42; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
OpenPOWER on IntegriCloud