summaryrefslogtreecommitdiffstats
path: root/clang/lib/Basic/SourceManager.cpp
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2012-04-06 20:49:55 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2012-04-06 20:49:55 +0000
commit543036a447e9731beab8250d63be94ebcdc22f7e (patch)
treef3e970bbfc73af9500ded1a22f55002e17119040 /clang/lib/Basic/SourceManager.cpp
parentd2863dab852e7c87366ea65d930f30463bb57ef4 (diff)
downloadbcm5719-llvm-543036a447e9731beab8250d63be94ebcdc22f7e.tar.gz
bcm5719-llvm-543036a447e9731beab8250d63be94ebcdc22f7e.zip
SourceManager: Vectorize ComputeLineNumbers for SSE2.
This method is very hot, it is called when emitting diagnostics, in -E mode and for many #pragma handlers. It scans through the whole source file to count newlines, records and caches them in a vector. The speedup from vectorization isn't very large, as we fall back to bytewise scanning when we hit a newline. There might be a way to avoid leaving the sse loop but everything I tried didn't work out because a call to push_back clobbers xmm registers. About 2% speedup on average on "clang -E > /dev/null" of all .cpp files in clang's lib/Sema. llvm-svn: 154204
Diffstat (limited to 'clang/lib/Basic/SourceManager.cpp')
-rw-r--r--clang/lib/Basic/SourceManager.cpp41
1 files changed, 39 insertions, 2 deletions
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index a2540bc18af..cef091c5983 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -1037,6 +1037,10 @@ unsigned SourceManager::getPresumedColumnNumber(SourceLocation Loc,
return getPresumedLoc(Loc).getColumn();
}
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
static LLVM_ATTRIBUTE_NOINLINE void
ComputeLineNumbers(DiagnosticsEngine &Diag, ContentCache *FI,
llvm::BumpPtrAllocator &Alloc,
@@ -1062,11 +1066,44 @@ static void ComputeLineNumbers(DiagnosticsEngine &Diag, ContentCache *FI,
unsigned Offs = 0;
while (1) {
// Skip over the contents of the line.
- // TODO: Vectorize this? This is very performance sensitive for programs
- // with lots of diagnostics and in -E mode.
const unsigned char *NextBuf = (const unsigned char *)Buf;
+
+#ifdef __SSE2__
+ // Try to skip to the next newline using SSE instructions. This is very
+ // performance sensitive for programs with lots of diagnostics and in -E
+ // mode.
+ __m128i CRs = _mm_set1_epi8('\r');
+ __m128i LFs = _mm_set1_epi8('\n');
+
+ // First fix up the alignment to 16 bytes.
+ while (((uintptr_t)NextBuf & 0xF) != 0) {
+ if (*NextBuf == '\n' || *NextBuf == '\r' || *NextBuf == '\0')
+ goto FoundSpecialChar;
+ ++NextBuf;
+ }
+
+ // Scan 16 byte chunks for '\r' and '\n'. Ignore '\0'.
+ while (NextBuf+16 <= End) {
+ __m128i Chunk = *(__m128i*)NextBuf;
+ __m128i Cmp = _mm_or_si128(_mm_cmpeq_epi8(Chunk, CRs),
+ _mm_cmpeq_epi8(Chunk, LFs));
+ unsigned Mask = _mm_movemask_epi8(Cmp);
+
+ // If we found a newline, adjust the pointer and jump to the handling code.
+ if (Mask != 0) {
+ NextBuf += llvm::CountTrailingZeros_32(Mask);
+ goto FoundSpecialChar;
+ }
+ NextBuf += 16;
+ }
+#endif
+
while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
++NextBuf;
+
+#ifdef __SSE2__
+FoundSpecialChar:
+#endif
Offs += NextBuf-Buf;
Buf = NextBuf;
OpenPOWER on IntegriCloud