summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Support/YAMLTraits.h109
-rw-r--r--llvm/tools/llvm-yaml-numeric-parser-fuzzer/CMakeLists.txt9
-rw-r--r--llvm/tools/llvm-yaml-numeric-parser-fuzzer/DummyYAMLNumericParserFuzzer.cpp19
-rw-r--r--llvm/tools/llvm-yaml-numeric-parser-fuzzer/yaml-numeric-parser-fuzzer.cpp47
-rw-r--r--llvm/unittests/Support/YAMLIOTest.cpp83
5 files changed, 234 insertions, 33 deletions
diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index 4b8c4e95828..fa8caed1a9c 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -27,6 +27,7 @@
#include <cctype>
#include <cstddef>
#include <cstdint>
+#include <iterator>
#include <map>
#include <memory>
#include <new>
@@ -449,46 +450,100 @@ public:
static bool const value = (sizeof(test<DocumentListTraits<T>>(nullptr))==1);
};
-inline bool isNumber(StringRef S) {
- static const char OctalChars[] = "01234567";
- if (S.startswith("0") &&
- S.drop_front().find_first_not_of(OctalChars) == StringRef::npos)
- return true;
+inline bool isNumeric(StringRef S) {
+ const static auto skipDigits = [](StringRef Input) {
+ return Input.drop_front(
+ std::min(Input.find_first_not_of("0123456789"), Input.size()));
+ };
- if (S.startswith("0o") &&
- S.drop_front(2).find_first_not_of(OctalChars) == StringRef::npos)
- return true;
+ // Make S.front() and S.drop_front().front() (if S.front() is [+-]) calls
+ // safe.
+ if (S.empty() || S.equals("+") || S.equals("-"))
+ return false;
- static const char HexChars[] = "0123456789abcdefABCDEF";
- if (S.startswith("0x") &&
- S.drop_front(2).find_first_not_of(HexChars) == StringRef::npos)
+ if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN"))
return true;
- static const char DecChars[] = "0123456789";
- if (S.find_first_not_of(DecChars) == StringRef::npos)
- return true;
+ // Infinity and decimal numbers can be prefixed with sign.
+ StringRef Tail = (S.front() == '-' || S.front() == '+') ? S.drop_front() : S;
- if (S.equals(".inf") || S.equals(".Inf") || S.equals(".INF"))
+ // Check for infinity first, because checking for hex and oct numbers is more
+ // expensive.
+ if (Tail.equals(".inf") || Tail.equals(".Inf") || Tail.equals(".INF"))
return true;
- Regex FloatMatcher("^(\\.[0-9]+|[0-9]+(\\.[0-9]*)?)([eE][-+]?[0-9]+)?$");
- if (FloatMatcher.match(S))
- return true;
+ // Section 10.3.2 Tag Resolution
+ // YAML 1.2 Specification prohibits Base 8 and Base 16 numbers prefixed with
+ // [-+], so S should be used instead of Tail.
+ if (S.startswith("0o"))
+ return S.size() > 2 &&
+ S.drop_front(2).find_first_not_of("01234567") == StringRef::npos;
+
+ if (S.startswith("0x"))
+ return S.size() > 2 && S.drop_front(2).find_first_not_of(
+ "0123456789abcdefABCDEF") == StringRef::npos;
+
+ // Parse float: [-+]? (\. [0-9]+ | [0-9]+ (\. [0-9]* )?) ([eE] [-+]? [0-9]+)?
+ S = Tail;
+
+ // Handle cases when the number starts with '.' and hence needs at least one
+ // digit after dot (as opposed by number which has digits before the dot), but
+ // doesn't have one.
+ if (S.startswith(".") &&
+ (S.equals(".") ||
+ (S.size() > 1 && std::strchr("0123456789", S[1]) == nullptr)))
+ return false;
+
+ if (S.startswith("E") || S.startswith("e"))
+ return false;
+
+ enum ParseState {
+ Default,
+ FoundDot,
+ FoundExponent,
+ };
+ ParseState State = Default;
- return false;
-}
+ S = skipDigits(S);
-inline bool isNumeric(StringRef S) {
- if ((S.front() == '-' || S.front() == '+') && isNumber(S.drop_front()))
+ // Accept decimal integer.
+ if (S.empty())
return true;
- if (isNumber(S))
- return true;
+ if (S.front() == '.') {
+ State = FoundDot;
+ S = S.drop_front();
+ } else if (S.front() == 'e' || S.front() == 'E') {
+ State = FoundExponent;
+ S = S.drop_front();
+ } else {
+ return false;
+ }
- if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN"))
- return true;
+ if (State == FoundDot) {
+ S = skipDigits(S);
+ if (S.empty())
+ return true;
+
+ if (S.front() == 'e' || S.front() == 'E') {
+ State = FoundExponent;
+ S = S.drop_front();
+ } else {
+ return false;
+ }
+ }
+
+ assert(FoundExponent && "Should have found exponent at this point.");
+ if (S.empty())
+ return false;
+
+ if (S.front() == '+' || S.front() == '-') {
+ S = S.drop_front();
+ if (S.empty())
+ return false;
+ }
- return false;
+ return skipDigits(S).empty();
}
inline bool isNull(StringRef S) {
diff --git a/llvm/tools/llvm-yaml-numeric-parser-fuzzer/CMakeLists.txt b/llvm/tools/llvm-yaml-numeric-parser-fuzzer/CMakeLists.txt
new file mode 100644
index 00000000000..34027431697
--- /dev/null
+++ b/llvm/tools/llvm-yaml-numeric-parser-fuzzer/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(LLVM_LINK_COMPONENTS
+ Support
+ FuzzMutate
+)
+
+add_llvm_fuzzer(llvm-yaml-numeric-parser-fuzzer
+ yaml-numeric-parser-fuzzer.cpp
+ DUMMY_MAIN DummyYAMLNumericParserFuzzer.cpp
+ )
diff --git a/llvm/tools/llvm-yaml-numeric-parser-fuzzer/DummyYAMLNumericParserFuzzer.cpp b/llvm/tools/llvm-yaml-numeric-parser-fuzzer/DummyYAMLNumericParserFuzzer.cpp
new file mode 100644
index 00000000000..3396168acec
--- /dev/null
+++ b/llvm/tools/llvm-yaml-numeric-parser-fuzzer/DummyYAMLNumericParserFuzzer.cpp
@@ -0,0 +1,19 @@
+//===--- DummyYAMLNumericParserFuzzer.cpp ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of main so we can build and test without linking libFuzzer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/FuzzMutate/FuzzerCLI.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
+int main(int argc, char *argv[]) {
+ return llvm::runFuzzerOnInputs(argc, argv, LLVMFuzzerTestOneInput);
+}
diff --git a/llvm/tools/llvm-yaml-numeric-parser-fuzzer/yaml-numeric-parser-fuzzer.cpp b/llvm/tools/llvm-yaml-numeric-parser-fuzzer/yaml-numeric-parser-fuzzer.cpp
new file mode 100644
index 00000000000..1510aeb9a37
--- /dev/null
+++ b/llvm/tools/llvm-yaml-numeric-parser-fuzzer/yaml-numeric-parser-fuzzer.cpp
@@ -0,0 +1,47 @@
+//===--- special-case-list-fuzzer.cpp - Fuzzer for special case lists -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cassert>
+#include <string>
+
+llvm::Regex Infinity("^[-+]?(\\.inf|\\.Inf|\\.INF)$");
+llvm::Regex Base8("^0o[0-7]+$");
+llvm::Regex Base16("^0x[0-9a-fA-F]+$");
+llvm::Regex Float("^[-+]?(\\.[0-9]+|[0-9]+(\\.[0-9]*)?)([eE][-+]?[0-9]+)?$");
+
+inline bool isNumericRegex(llvm::StringRef S) {
+
+ if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN"))
+ return true;
+
+ if (Infinity.match(S))
+ return true;
+
+ if (Base8.match(S))
+ return true;
+
+ if (Base16.match(S))
+ return true;
+
+ if (Float.match(S))
+ return true;
+
+ return false;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ std::string Input(reinterpret_cast<const char *>(Data), Size);
+ Input.erase(std::remove(Input.begin(), Input.end(), 0), Input.end());
+ if (!Input.empty() && llvm::yaml::isNumeric(Input) != isNumericRegex(Input))
+ __builtin_trap();
+ return 0;
+}
diff --git a/llvm/unittests/Support/YAMLIOTest.cpp b/llvm/unittests/Support/YAMLIOTest.cpp
index 13364806524..4530482ec80 100644
--- a/llvm/unittests/Support/YAMLIOTest.cpp
+++ b/llvm/unittests/Support/YAMLIOTest.cpp
@@ -16,16 +16,17 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+using llvm::yaml::Hex16;
+using llvm::yaml::Hex32;
+using llvm::yaml::Hex64;
+using llvm::yaml::Hex8;
using llvm::yaml::Input;
-using llvm::yaml::Output;
using llvm::yaml::IO;
-using llvm::yaml::MappingTraits;
+using llvm::yaml::isNumeric;
using llvm::yaml::MappingNormalization;
+using llvm::yaml::MappingTraits;
+using llvm::yaml::Output;
using llvm::yaml::ScalarTraits;
-using llvm::yaml::Hex8;
-using llvm::yaml::Hex16;
-using llvm::yaml::Hex32;
-using llvm::yaml::Hex64;
using ::testing::StartsWith;
@@ -2569,3 +2570,73 @@ TEST(YAMLIO, TestEscaped) {
TestEscaped((char const *)foobar, "\"foo\\u200Bbar\"");
}
}
+
+TEST(YAMLIO, Numeric) {
+ EXPECT_TRUE(isNumeric(".inf"));
+ EXPECT_TRUE(isNumeric(".INF"));
+ EXPECT_TRUE(isNumeric(".Inf"));
+ EXPECT_TRUE(isNumeric("-.inf"));
+ EXPECT_TRUE(isNumeric("+.inf"));
+
+ EXPECT_TRUE(isNumeric(".nan"));
+ EXPECT_TRUE(isNumeric(".NaN"));
+ EXPECT_TRUE(isNumeric(".NAN"));
+
+ EXPECT_TRUE(isNumeric("0"));
+ EXPECT_TRUE(isNumeric("0."));
+ EXPECT_TRUE(isNumeric("0.0"));
+ EXPECT_TRUE(isNumeric("-0.0"));
+ EXPECT_TRUE(isNumeric("+0.0"));
+
+ EXPECT_TRUE(isNumeric("12345"));
+ EXPECT_TRUE(isNumeric("012345"));
+ EXPECT_TRUE(isNumeric("+12.0"));
+ EXPECT_TRUE(isNumeric(".5"));
+ EXPECT_TRUE(isNumeric("+.5"));
+ EXPECT_TRUE(isNumeric("-1.0"));
+
+ EXPECT_TRUE(isNumeric("2.3e4"));
+ EXPECT_TRUE(isNumeric("-2E+05"));
+ EXPECT_TRUE(isNumeric("+12e03"));
+ EXPECT_TRUE(isNumeric("6.8523015e+5"));
+
+ EXPECT_TRUE(isNumeric("1.e+1"));
+ EXPECT_TRUE(isNumeric(".0e+1"));
+
+ EXPECT_TRUE(isNumeric("0x2aF3"));
+ EXPECT_TRUE(isNumeric("0o01234567"));
+
+ EXPECT_FALSE(isNumeric("not a number"));
+ EXPECT_FALSE(isNumeric("."));
+ EXPECT_FALSE(isNumeric(".e+1"));
+ EXPECT_FALSE(isNumeric(".1e"));
+ EXPECT_FALSE(isNumeric(".1e+"));
+ EXPECT_FALSE(isNumeric(".1e++1"));
+
+ EXPECT_FALSE(isNumeric("ABCD"));
+ EXPECT_FALSE(isNumeric("+0x2AF3"));
+ EXPECT_FALSE(isNumeric("-0x2AF3"));
+ EXPECT_FALSE(isNumeric("0x2AF3Z"));
+ EXPECT_FALSE(isNumeric("0o012345678"));
+ EXPECT_FALSE(isNumeric("0xZ"));
+ EXPECT_FALSE(isNumeric("-0o012345678"));
+ EXPECT_FALSE(isNumeric("000003A8229434B839616A25C16B0291F77A438B"));
+
+ EXPECT_FALSE(isNumeric(""));
+ EXPECT_FALSE(isNumeric("."));
+ EXPECT_FALSE(isNumeric(".e+1"));
+ EXPECT_FALSE(isNumeric(".e+"));
+ EXPECT_FALSE(isNumeric(".e"));
+ EXPECT_FALSE(isNumeric("e1"));
+
+ // Deprecated formats: as for YAML 1.2 specification, the following are not
+ // valid numbers anymore:
+ //
+ // * Sexagecimal numbers
+ // * Decimal numbers with comma s the delimiter
+ // * "inf", "nan" without '.' prefix
+ EXPECT_FALSE(isNumeric("3:25:45"));
+ EXPECT_FALSE(isNumeric("+12,345"));
+ EXPECT_FALSE(isNumeric("-inf"));
+ EXPECT_FALSE(isNumeric("1,230.15"));
+}
OpenPOWER on IntegriCloud