diff options
author | Justin Bogner <mail@justinbogner.com> | 2017-08-21 22:57:06 +0000 |
---|---|---|
committer | Justin Bogner <mail@justinbogner.com> | 2017-08-21 22:57:06 +0000 |
commit | 7d449d31a4d2fe393e32be7c478c18b16b301428 (patch) | |
tree | 6deed6bee8fd3f5cc1ce4b77505a53f51326009a /llvm/lib/FuzzMutate | |
parent | 4056e80719317ec1719dde7c2b08d1720a4ad2a4 (diff) | |
download | bcm5719-llvm-7d449d31a4d2fe393e32be7c478c18b16b301428.tar.gz bcm5719-llvm-7d449d31a4d2fe393e32be7c478c18b16b301428.zip |
Re-apply "Introduce FuzzMutate library"
Same as r311392 with some fixes for library dependencies. Thanks to
Chapuni for helping work those out!
Original commit message:
This introduces the FuzzMutate library, which provides structured
fuzzing for LLVM IR, as described in my EuroLLVM 2017 talk. Most of
the basic mutators to inject and delete IR are provided, with support
for most basic operations.
llvm-svn: 311402
Diffstat (limited to 'llvm/lib/FuzzMutate')
-rw-r--r-- | llvm/lib/FuzzMutate/CMakeLists.txt | 12 | ||||
-rw-r--r-- | llvm/lib/FuzzMutate/IRMutator.cpp | 183 | ||||
-rw-r--r-- | llvm/lib/FuzzMutate/LLVMBuild.txt | 22 | ||||
-rw-r--r-- | llvm/lib/FuzzMutate/OpDescriptor.cpp | 38 | ||||
-rw-r--r-- | llvm/lib/FuzzMutate/Operations.cpp | 312 | ||||
-rw-r--r-- | llvm/lib/FuzzMutate/RandomIRBuilder.cpp | 140 |
6 files changed, 707 insertions, 0 deletions
diff --git a/llvm/lib/FuzzMutate/CMakeLists.txt b/llvm/lib/FuzzMutate/CMakeLists.txt new file mode 100644 index 00000000000..4aec682b01f --- /dev/null +++ b/llvm/lib/FuzzMutate/CMakeLists.txt @@ -0,0 +1,12 @@ +add_llvm_library(LLVMFuzzMutate + IRMutator.cpp + OpDescriptor.cpp + Operations.cpp + RandomIRBuilder.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/FuzzMutate + + DEPENDS + intrinsics_gen + ) diff --git a/llvm/lib/FuzzMutate/IRMutator.cpp b/llvm/lib/FuzzMutate/IRMutator.cpp new file mode 100644 index 00000000000..6545446a984 --- /dev/null +++ b/llvm/lib/FuzzMutate/IRMutator.cpp @@ -0,0 +1,183 @@ +//===-- IRMutator.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/FuzzMutate/IRMutator.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/FuzzMutate/Operations.h" +#include "llvm/FuzzMutate/Random.h" +#include "llvm/FuzzMutate/RandomIRBuilder.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Scalar/DCE.h" + +using namespace llvm; + +static void createEmptyFunction(Module &M) { + // TODO: Some arguments and a return value would probably be more interesting. + LLVMContext &Context = M.getContext(); + Function *F = Function::Create(FunctionType::get(Type::getVoidTy(Context), {}, + /*isVarArg=*/false), + GlobalValue::ExternalLinkage, "f", &M); + BasicBlock *BB = BasicBlock::Create(Context, "BB", F); + ReturnInst::Create(Context, BB); +} + +void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) { + if (M.empty()) + createEmptyFunction(M); + + auto RS = makeSampler<Function *>(IB.Rand); + for (Function &F : M) + if (!F.isDeclaration()) + RS.sample(&F, /*Weight=*/1); + mutate(*RS.getSelection(), IB); +} + +void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) { + mutate(*makeSampler(IB.Rand, make_pointer_range(F)).getSelection(), IB); +} + +void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { + mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB); +} + +void IRMutator::mutateModule(Module &M, int Seed, size_t CurSize, + size_t MaxSize) { + std::vector<Type *> Types; + for (const auto &Getter : AllowedTypes) + Types.push_back(Getter(M.getContext())); + RandomIRBuilder IB(Seed, Types); + + auto RS = makeSampler<IRMutationStrategy *>(IB.Rand); + for (const auto &Strategy : Strategies) + RS.sample(Strategy.get(), + Strategy->getWeight(CurSize, MaxSize, RS.totalWeight())); + auto Strategy = RS.getSelection(); + + Strategy->mutate(M, IB); +} + +static void eliminateDeadCode(Function &F) { + FunctionPassManager FPM; + FPM.addPass(DCEPass()); + FunctionAnalysisManager FAM; + FAM.registerPass([&] { return TargetLibraryAnalysis(); }); + FPM.run(F, FAM); +} + +void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) { + IRMutationStrategy::mutate(F, IB); + eliminateDeadCode(F); +} + +std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() { + std::vector<fuzzerop::OpDescriptor> Ops; + describeFuzzerIntOps(Ops); + describeFuzzerFloatOps(Ops); + describeFuzzerControlFlowOps(Ops); + describeFuzzerPointerOps(Ops); + describeFuzzerAggregateOps(Ops); + describeFuzzerVectorOps(Ops); + return Ops; +} + +fuzzerop::OpDescriptor +InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) { + auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) { + return Op.SourcePreds[0].matches({}, Src); + }; + auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred)); + if (RS.isEmpty()) + report_fatal_error("No available operations for src type"); + return *RS; +} + +void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { + SmallVector<Instruction *, 32> Insts; + for (auto I = BB.getFirstInsertionPt(), E = BB.end(); I != E; ++I) + Insts.push_back(&*I); + + // Choose an insertion point for our new instruction. + size_t IP = uniform<size_t>(IB.Rand, 0, Insts.size() - 1); + + auto InstsBefore = makeArrayRef(Insts).slice(0, IP); + auto InstsAfter = makeArrayRef(Insts).slice(IP); + + // Choose a source, which will be used to constrain the operation selection. + SmallVector<Value *, 2> Srcs; + Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore)); + + // Choose an operation that's constrained to be valid for the type of the + // source, collect any other sources it needs, and then build it. + fuzzerop::OpDescriptor OpDesc = chooseOperation(Srcs[0], IB); + for (const auto &Pred : makeArrayRef(OpDesc.SourcePreds).slice(1)) + Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred)); + if (Value *Op = OpDesc.BuilderFunc(Srcs, Insts[IP])) { + // Find a sink and wire up the results of the operation. + IB.connectToSink(BB, InstsAfter, Op); + } +} + +uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize, + uint64_t CurrentWeight) { + // If we have less than 200 bytes, panic and try to always delete. + if (CurrentSize > MaxSize - 200) + return CurrentWeight ? CurrentWeight * 100 : 1; + // Draw a line starting from when we only have 1k left and increasing linearly + // to double the current weight. + int Line = (-2 * CurrentWeight) * (MaxSize - CurrentSize + 1000); + // Clamp negative weights to zero. + if (Line < 0) + return 0; + return Line; +} + +void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) { + auto RS = makeSampler<Instruction *>(IB.Rand); + // Avoid terminators so we don't have to worry about keeping the CFG coherent. + for (Instruction &Inst : instructions(F)) + if (!Inst.isTerminator()) + RS.sample(&Inst, /*Weight=*/1); + assert(!RS.isEmpty() && "No instructions to delete"); + // Delete the instruction. + mutate(*RS.getSelection(), IB); + // Clean up any dead code that's left over after removing the instruction. + eliminateDeadCode(F); +} + +void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) { + assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG"); + + if (Inst.getType()->isVoidTy()) { + // Instructions with void type (ie, store) have no uses to worry about. Just + // erase it and move on. + Inst.eraseFromParent(); + return; + } + + // Otherwise we need to find some other value with the right type to keep the + // users happy. + auto Pred = fuzzerop::onlyType(Inst.getType()); + auto RS = makeSampler<Value *>(IB.Rand); + SmallVector<Instruction *, 32> InstsBefore; + BasicBlock *BB = Inst.getParent(); + for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E; + ++I) { + if (Pred.matches({}, &*I)) + RS.sample(&*I, /*Weight=*/1); + InstsBefore.push_back(&*I); + } + if (!RS) + RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1); + + Inst.replaceAllUsesWith(RS.getSelection()); +} diff --git a/llvm/lib/FuzzMutate/LLVMBuild.txt b/llvm/lib/FuzzMutate/LLVMBuild.txt new file mode 100644 index 00000000000..9e85540f037 --- /dev/null +++ b/llvm/lib/FuzzMutate/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/FuzzMutate/LLVMBuild.txt ---------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = FuzzMutate +parent = Libraries +required_libraries = Analysis Core Scalar Support Target diff --git a/llvm/lib/FuzzMutate/OpDescriptor.cpp b/llvm/lib/FuzzMutate/OpDescriptor.cpp new file mode 100644 index 00000000000..1c5d8f606ae --- /dev/null +++ b/llvm/lib/FuzzMutate/OpDescriptor.cpp @@ -0,0 +1,38 @@ +//===-- OpDescriptor.cpp --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/FuzzMutate/OpDescriptor.h" +#include "llvm/IR/Constants.h" + +using namespace llvm; +using namespace fuzzerop; + +void fuzzerop::makeConstantsWithType(Type *T, std::vector<Constant *> &Cs) { + if (auto *IntTy = dyn_cast<IntegerType>(T)) { + uint64_t W = IntTy->getBitWidth(); + Cs.push_back(ConstantInt::get(IntTy, APInt::getMaxValue(W))); + Cs.push_back(ConstantInt::get(IntTy, APInt::getMinValue(W))); + Cs.push_back(ConstantInt::get(IntTy, APInt::getSignedMaxValue(W))); + Cs.push_back(ConstantInt::get(IntTy, APInt::getSignedMinValue(W))); + Cs.push_back(ConstantInt::get(IntTy, APInt::getOneBitSet(W, W / 2))); + } else if (T->isFloatingPointTy()) { + auto &Ctx = T->getContext(); + auto &Sem = T->getFltSemantics(); + Cs.push_back(ConstantFP::get(Ctx, APFloat::getZero(Sem))); + Cs.push_back(ConstantFP::get(Ctx, APFloat::getLargest(Sem))); + Cs.push_back(ConstantFP::get(Ctx, APFloat::getSmallest(Sem))); + } else + Cs.push_back(UndefValue::get(T)); +} + +std::vector<Constant *> fuzzerop::makeConstantsWithType(Type *T) { + std::vector<Constant *> Result; + makeConstantsWithType(T, Result); + return Result; +} diff --git a/llvm/lib/FuzzMutate/Operations.cpp b/llvm/lib/FuzzMutate/Operations.cpp new file mode 100644 index 00000000000..083d9aa039e --- /dev/null +++ b/llvm/lib/FuzzMutate/Operations.cpp @@ -0,0 +1,312 @@ +//===-- Operations.cpp ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/FuzzMutate/Operations.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; +using namespace fuzzerop; + +void llvm::describeFuzzerIntOps(std::vector<fuzzerop::OpDescriptor> &Ops) { + Ops.push_back(binOpDescriptor(1, Instruction::Add)); + Ops.push_back(binOpDescriptor(1, Instruction::Sub)); + Ops.push_back(binOpDescriptor(1, Instruction::Mul)); + Ops.push_back(binOpDescriptor(1, Instruction::SDiv)); + Ops.push_back(binOpDescriptor(1, Instruction::UDiv)); + Ops.push_back(binOpDescriptor(1, Instruction::SRem)); + Ops.push_back(binOpDescriptor(1, Instruction::URem)); + Ops.push_back(binOpDescriptor(1, Instruction::Shl)); + Ops.push_back(binOpDescriptor(1, Instruction::LShr)); + Ops.push_back(binOpDescriptor(1, Instruction::AShr)); + Ops.push_back(binOpDescriptor(1, Instruction::And)); + Ops.push_back(binOpDescriptor(1, Instruction::Or)); + Ops.push_back(binOpDescriptor(1, Instruction::Xor)); + + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_EQ)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_NE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_UGT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_UGE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_ULT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_ULE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SGT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SGE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SLT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SLE)); +} + +void llvm::describeFuzzerFloatOps(std::vector<fuzzerop::OpDescriptor> &Ops) { + Ops.push_back(binOpDescriptor(1, Instruction::FAdd)); + Ops.push_back(binOpDescriptor(1, Instruction::FSub)); + Ops.push_back(binOpDescriptor(1, Instruction::FMul)); + Ops.push_back(binOpDescriptor(1, Instruction::FDiv)); + Ops.push_back(binOpDescriptor(1, Instruction::FRem)); + + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_FALSE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OEQ)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OGT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OGE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OLT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OLE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ONE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ORD)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UNO)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UEQ)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UGT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UGE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ULT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ULE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UNE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_TRUE)); +} + +void llvm::describeFuzzerControlFlowOps( + std::vector<fuzzerop::OpDescriptor> &Ops) { + Ops.push_back(splitBlockDescriptor(1)); +} + +void llvm::describeFuzzerPointerOps(std::vector<fuzzerop::OpDescriptor> &Ops) { + Ops.push_back(gepDescriptor(1)); +} + +void llvm::describeFuzzerAggregateOps( + std::vector<fuzzerop::OpDescriptor> &Ops) { + Ops.push_back(extractValueDescriptor(1)); + Ops.push_back(insertValueDescriptor(1)); +} + +void llvm::describeFuzzerVectorOps(std::vector<fuzzerop::OpDescriptor> &Ops) { + Ops.push_back(extractElementDescriptor(1)); + Ops.push_back(insertElementDescriptor(1)); + Ops.push_back(shuffleVectorDescriptor(1)); +} + +OpDescriptor llvm::fuzzerop::binOpDescriptor(unsigned Weight, + Instruction::BinaryOps Op) { + auto buildOp = [Op](ArrayRef<Value *> Srcs, Instruction *Inst) { + return BinaryOperator::Create(Op, Srcs[0], Srcs[1], "B", Inst); + }; + switch (Op) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return {Weight, {anyIntType(), matchFirstType()}, buildOp}; + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + return {Weight, {anyFloatType(), matchFirstType()}, buildOp}; + case Instruction::BinaryOpsEnd: + llvm_unreachable("Value out of range of enum"); + } + llvm_unreachable("Covered switch"); +} + +OpDescriptor llvm::fuzzerop::cmpOpDescriptor(unsigned Weight, + Instruction::OtherOps CmpOp, + CmpInst::Predicate Pred) { + auto buildOp = [CmpOp, Pred](ArrayRef<Value *> Srcs, Instruction *Inst) { + return CmpInst::Create(CmpOp, Pred, Srcs[0], Srcs[1], "C", Inst); + }; + + switch (CmpOp) { + case Instruction::ICmp: + return {Weight, {anyIntType(), matchFirstType()}, buildOp}; + case Instruction::FCmp: + return {Weight, {anyFloatType(), matchFirstType()}, buildOp}; + default: + llvm_unreachable("CmpOp must be ICmp or FCmp"); + } +} + +OpDescriptor llvm::fuzzerop::splitBlockDescriptor(unsigned Weight) { + auto buildSplitBlock = [](ArrayRef<Value *> Srcs, Instruction *Inst) { + BasicBlock *Block = Inst->getParent(); + BasicBlock *Next = Block->splitBasicBlock(Inst, "BB"); + if (Block != &Block->getParent()->getEntryBlock()) { + // Loop back on this block by replacing the unconditional forward branch + // with a conditional with a backedge. + BranchInst::Create(Block, Next, Srcs[0], Block->getTerminator()); + Block->getTerminator()->eraseFromParent(); + + // We need values for each phi in the block. Since there isn't a good way + // to do a variable number of input values currently, we just fill them + // with undef. + for (PHINode &PHI : Block->phis()) + PHI.addIncoming(UndefValue::get(PHI.getType()), Block); + } + return nullptr; + }; + SourcePred isInt1Ty{[](ArrayRef<Value *>, const Value *V) { + return V->getType()->isIntegerTy(1); + }, + None}; + return {Weight, {isInt1Ty}, buildSplitBlock}; +} + +OpDescriptor llvm::fuzzerop::gepDescriptor(unsigned Weight) { + auto buildGEP = [](ArrayRef<Value *> Srcs, Instruction *Inst) { + Type *Ty = cast<PointerType>(Srcs[0]->getType())->getElementType(); + auto Indices = makeArrayRef(Srcs).drop_front(1); + return GetElementPtrInst::Create(Ty, Srcs[0], Indices, "G", Inst); + }; + // TODO: Handle aggregates and vectors + // TODO: Support multiple indices. + // TODO: Try to avoid meaningless accesses. + return {Weight, {anyPtrType(), anyIntType()}, buildGEP}; +} + +static uint64_t getAggregateNumElements(Type *T) { + assert(T->isAggregateType() && "Not a struct or array"); + if (isa<StructType>(T)) + return T->getStructNumElements(); + return T->getArrayNumElements(); +} + +static SourcePred validExtractValueIndex() { + auto Pred = [](ArrayRef<Value *> Cur, const Value *V) { + if (auto *CI = dyn_cast<ConstantInt>(V)) + if (!CI->uge(getAggregateNumElements(Cur[0]->getType()))) + return true; + return false; + }; + auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) { + std::vector<Constant *> Result; + auto *Int32Ty = Type::getInt32Ty(Cur[0]->getContext()); + uint64_t N = getAggregateNumElements(Cur[0]->getType()); + // Create indices at the start, end, and middle, but avoid dups. + Result.push_back(ConstantInt::get(Int32Ty, 0)); + if (N > 1) + Result.push_back(ConstantInt::get(Int32Ty, N - 1)); + if (N > 2) + Result.push_back(ConstantInt::get(Int32Ty, N / 2)); + return Result; + }; + return {Pred, Make}; +} + +OpDescriptor llvm::fuzzerop::extractValueDescriptor(unsigned Weight) { + auto buildExtract = [](ArrayRef<Value *> Srcs, Instruction *Inst) { + // TODO: It's pretty inefficient to shuffle this all through constants. + unsigned Idx = cast<ConstantInt>(Srcs[1])->getZExtValue(); + return ExtractValueInst::Create(Srcs[0], {Idx}, "E", Inst); + }; + // TODO: Should we handle multiple indices? + return {Weight, {anyAggregateType(), validExtractValueIndex()}, buildExtract}; +} + +static SourcePred matchScalarInAggregate() { + auto Pred = [](ArrayRef<Value *> Cur, const Value *V) { + if (isa<ArrayType>(Cur[0]->getType())) + return V->getType() == Cur[0]->getType(); + auto *STy = cast<StructType>(Cur[0]->getType()); + for (int I = 0, E = STy->getNumElements(); I < E; ++I) + if (STy->getTypeAtIndex(I) == V->getType()) + return true; + return false; + }; + auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *>) { + if (isa<ArrayType>(Cur[0]->getType())) + return makeConstantsWithType(Cur[0]->getType()); + std::vector<Constant *> Result; + auto *STy = cast<StructType>(Cur[0]->getType()); + for (int I = 0, E = STy->getNumElements(); I < E; ++I) + makeConstantsWithType(STy->getTypeAtIndex(I), Result); + return Result; + }; + return {Pred, Make}; +} + +static SourcePred validInsertValueIndex() { + auto Pred = [](ArrayRef<Value *> Cur, const Value *V) { + auto *CTy = cast<CompositeType>(Cur[0]->getType()); + if (auto *CI = dyn_cast<ConstantInt>(V)) + if (CI->getBitWidth() == 32) + if (CTy->getTypeAtIndex(CI->getZExtValue()) == V->getType()) + return true; + return false; + }; + auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) { + std::vector<Constant *> Result; + auto *Int32Ty = Type::getInt32Ty(Cur[0]->getContext()); + auto *CTy = cast<CompositeType>(Cur[0]->getType()); + for (int I = 0, E = getAggregateNumElements(CTy); I < E; ++I) + if (CTy->getTypeAtIndex(I) == Cur[1]->getType()) + Result.push_back(ConstantInt::get(Int32Ty, I)); + return Result; + }; + return {Pred, Make}; +} + +OpDescriptor llvm::fuzzerop::insertValueDescriptor(unsigned Weight) { + auto buildInsert = [](ArrayRef<Value *> Srcs, Instruction *Inst) { + // TODO: It's pretty inefficient to shuffle this all through constants. + unsigned Idx = cast<ConstantInt>(Srcs[2])->getZExtValue(); + return InsertValueInst::Create(Srcs[0], Srcs[1], {Idx}, "I", Inst); + }; + return { + Weight, + {anyAggregateType(), matchScalarInAggregate(), validInsertValueIndex()}, + buildInsert}; +} + +OpDescriptor llvm::fuzzerop::extractElementDescriptor(unsigned Weight) { + auto buildExtract = [](ArrayRef<Value *> Srcs, Instruction *Inst) { + return ExtractElementInst::Create(Srcs[0], Srcs[1], "E", Inst); + }; + // TODO: Try to avoid undefined accesses. + return {Weight, {anyVectorType(), anyIntType()}, buildExtract}; +} + +OpDescriptor llvm::fuzzerop::insertElementDescriptor(unsigned Weight) { + auto buildInsert = [](ArrayRef<Value *> Srcs, Instruction *Inst) { + return InsertElementInst::Create(Srcs[0], Srcs[1], Srcs[2], "I", Inst); + }; + // TODO: Try to avoid undefined accesses. + return {Weight, + {anyVectorType(), matchScalarOfFirstType(), anyIntType()}, + buildInsert}; +} + +static SourcePred validShuffleVectorIndex() { + auto Pred = [](ArrayRef<Value *> Cur, const Value *V) { + return ShuffleVectorInst::isValidOperands(Cur[0], Cur[1], V); + }; + auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) { + auto *FirstTy = cast<VectorType>(Cur[0]->getType()); + auto *Int32Ty = Type::getInt32Ty(Cur[0]->getContext()); + // TODO: It's straighforward to make up reasonable values, but listing them + // exhaustively would be insane. Come up with a couple of sensible ones. + return std::vector<Constant *>{ + UndefValue::get(VectorType::get(Int32Ty, FirstTy->getNumElements()))}; + }; + return {Pred, Make}; +} + +OpDescriptor llvm::fuzzerop::shuffleVectorDescriptor(unsigned Weight) { + auto buildShuffle = [](ArrayRef<Value *> Srcs, Instruction *Inst) { + return new ShuffleVectorInst(Srcs[0], Srcs[1], Srcs[2], "S", Inst); + }; + return {Weight, + {anyVectorType(), matchFirstType(), validShuffleVectorIndex()}, + buildShuffle}; +} diff --git a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp new file mode 100644 index 00000000000..42e30464b0d --- /dev/null +++ b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp @@ -0,0 +1,140 @@ +//===-- RandomIRBuilder.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/FuzzMutate/RandomIRBuilder.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/FuzzMutate/Random.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" + +using namespace llvm; +using namespace fuzzerop; + +Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB, + ArrayRef<Instruction *> Insts) { + return findOrCreateSource(BB, Insts, {}, anyType()); +} + +Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB, + ArrayRef<Instruction *> Insts, + ArrayRef<Value *> Srcs, + SourcePred Pred) { + auto MatchesPred = [&Srcs, &Pred](Instruction *Inst) { + return Pred.matches(Srcs, Inst); + }; + auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred)); + // Also consider choosing no source, meaning we want a new one. + RS.sample(nullptr, /*Weight=*/1); + if (Instruction *Src = RS.getSelection()) + return Src; + return newSource(BB, Insts, Srcs, Pred); +} + +Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts, + ArrayRef<Value *> Srcs, SourcePred Pred) { + // Generate some constants to choose from. + auto RS = makeSampler<Value *>(Rand); + RS.sample(Pred.generate(Srcs, KnownTypes)); + assert(!RS.isEmpty() && "Failed to generate sources"); + + // If we can find a pointer to load from, use it half the time. + Value *Ptr = findPointer(BB, Insts, Srcs, Pred); + if (Ptr) + RS.sample(Ptr, RS.totalWeight()); + + Value *Result = RS.getSelection(); + if (Result != Ptr) + return Result; + + // If we choose the pointer, we need to create a load. + auto IP = BB.getFirstInsertionPt(); + if (auto *I = dyn_cast<Instruction>(Ptr)) + IP = ++I->getIterator(); + return new LoadInst(Ptr, "L", &*IP); +} + +static bool isCompatibleReplacement(const Instruction *I, const Use &Operand, + const Value *Replacement) { + if (Operand->getType() != Replacement->getType()) + return false; + switch (I->getOpcode()) { + case Instruction::GetElementPtr: + case Instruction::ExtractElement: + case Instruction::ExtractValue: + // TODO: We could potentially validate these, but for now just leave indices + // alone. + if (Operand.getOperandNo() > 1) + return false; + break; + case Instruction::InsertValue: + case Instruction::InsertElement: + if (Operand.getOperandNo() > 2) + return false; + break; + default: + break; + } + return true; +} + +void RandomIRBuilder::connectToSink(BasicBlock &BB, + ArrayRef<Instruction *> Insts, Value *V) { + auto RS = makeSampler<Use *>(Rand); + for (auto &I : Insts) { + if (isa<IntrinsicInst>(I)) + // TODO: Replacing operands of intrinsics would be interesting, but + // there's no easy way to verify that a given replacement is valid given + // that intrinsics can impose arbitrary constraints. + continue; + for (Use &U : I->operands()) + if (isCompatibleReplacement(I, U, V)) + RS.sample(&U, 1); + } + // Also consider choosing no sink, meaning we want a new one. + RS.sample(nullptr, /*Weight=*/1); + + if (Use *Sink = RS.getSelection()) { + User *U = Sink->getUser(); + unsigned OpNo = Sink->getOperandNo(); + U->setOperand(OpNo, V); + return; + } + newSink(BB, Insts, V); +} + +void RandomIRBuilder::newSink(BasicBlock &BB, ArrayRef<Instruction *> Insts, + Value *V) { + Value *Ptr = findPointer(BB, Insts, {V}, matchFirstType()); + if (!Ptr) { + if (uniform(Rand, 0, 1)) + Ptr = new AllocaInst(V->getType(), 0, "A", &*BB.getFirstInsertionPt()); + else + Ptr = UndefValue::get(PointerType::get(V->getType(), 0)); + } + + new StoreInst(V, Ptr, Insts.back()); +} + +Value *RandomIRBuilder::findPointer(BasicBlock &BB, + ArrayRef<Instruction *> Insts, + ArrayRef<Value *> Srcs, SourcePred Pred) { + auto IsMatchingPtr = [&Srcs, &Pred](Instruction *Inst) { + if (auto PtrTy = dyn_cast<PointerType>(Inst->getType())) + // TODO: Check if this is horribly expensive. + return Pred.matches(Srcs, UndefValue::get(PtrTy->getElementType())); + return false; + }; + if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr))) + return RS.getSelection(); + return nullptr; +} |