summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/IR/Instruction.cpp2
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp1
-rw-r--r--llvm/lib/Passes/PassRegistry.def1
-rw-r--r--llvm/lib/Transforms/Instrumentation/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp283
5 files changed, 288 insertions, 0 deletions
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 42a7a1cce7f..ba5629d1662 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -138,8 +138,10 @@ void Instruction::dropPoisonGeneratingFlags() {
cast<GetElementPtrInst>(this)->setIsInBounds(false);
break;
}
+ // TODO: FastMathFlags!
}
+
bool Instruction::isExact() const {
return cast<PossiblyExactOperator>(this)->isExact();
}
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index bd7ec65052a..e2b2a2b2526 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -100,6 +100,7 @@
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/Scalar/ADCE.h"
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 6e7f4177e56..e785558d5a7 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -86,6 +86,7 @@ MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr))
MODULE_PASS("verify", VerifierPass())
MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false))
MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false))
+MODULE_PASS("poison-checking", PoisonCheckingPass())
#undef MODULE_PASS
#ifndef CGSCC_ANALYSIS
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index 969529c21c5..78b697f7f94 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -12,6 +12,7 @@ add_llvm_library(LLVMInstrumentation
InstrProfiling.cpp
PGOInstrumentation.cpp
PGOMemOPSizeOpt.cpp
+ PoisonChecking.cpp
SanitizerCoverage.cpp
ThreadSanitizer.cpp
HWAddressSanitizer.cpp
diff --git a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
new file mode 100644
index 00000000000..8406eb5d396
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
@@ -0,0 +1,283 @@
+//===- PoisonChecking.cpp - -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements a transform pass which instruments IR such that poison semantics
+// are made explicit. That is, it provides a (possibly partial) executable
+// semantics for every instruction w.r.t. poison as specified in the LLVM
+// LangRef. There are obvious parallels to the sanitizer tools, but this pass
+// is focused purely on the semantics of LLVM IR, not any particular source
+// language. If you're looking for something to see if your C/C++ contains
+// UB, this is not it.
+//
+// The rewritten semantics of each instruction will include the following
+// components:
+//
+// 1) The original instruction, unmodified.
+// 2) A propagation rule which translates dynamic information about the poison
+// state of each input to whether the dynamic output of the instruction
+// produces poison.
+// 3) A flag validation rule which validates any poison producing flags on the
+// instruction itself (e.g. checks for overflow on nsw).
+// 4) A check rule which traps (to a handler function) if this instruction must
+// execute undefined behavior given the poison state of it's inputs.
+//
+// At the moment, the UB detection is done in a best effort manner; that is,
+// the resulting code may produce a false negative result (not report UB when
+// it actually exists according to the LangRef spec), but should never produce
+// a false positive (report UB where it doesn't exist). The intention is to
+// eventually support a "strict" mode which never dynamically reports a false
+// negative at the cost of rejecting some valid inputs to translation.
+//
+// Use cases for this pass include:
+// - Understanding (and testing!) the implications of the definition of poison
+// from the LangRef.
+// - Validating the output of a IR fuzzer to ensure that all programs produced
+// are well defined on the specific input used.
+// - Finding/confirming poison specific miscompiles by checking the poison
+// status of an input/IR pair is the same before and after an optimization
+// transform.
+// - Checking that a bugpoint reduction does not introduce UB which didn't
+// exist in the original program being reduced.
+//
+// The major sources of inaccuracy are currently:
+// - Most validation rules not yet implemented for instructions with poison
+// relavant flags. At the moment, only nsw/nuw on add/sub are supported.
+// - UB which is control dependent on a branch on poison is not yet
+// reported. Currently, only data flow dependence is modeled.
+// - Poison which is propagated through memory is not modeled. As such,
+// storing poison to memory and then reloading it will cause a false negative
+// as we consider the reloaded value to not be poisoned.
+// - Poison propagation across function boundaries is not modeled. At the
+// moment, all arguments and return values are assumed not to be poison.
+// - Undef is not modeled. In particular, the optimizer's freedom to pick
+// concrete values for undef bits so as to maximize potential for producing
+// poison is not modeled.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "poison-checking"
+
+static cl::opt<bool>
+LocalCheck("poison-checking-function-local",
+ cl::init(false),
+ cl::desc("Check that returns are non-poison (for testing)"));
+
+
+static bool isConstantFalse(Value* V) {
+ assert(V->getType()->isIntegerTy(1));
+ if (auto *CI = dyn_cast<ConstantInt>(V))
+ return CI->isZero();
+ return false;
+}
+
+static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
+ if (Ops.size() == 0)
+ return B.getFalse();
+ unsigned i = 0;
+ for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
+ if (i == Ops.size())
+ return B.getFalse();
+ Value *Accum = Ops[i++];
+ for (; i < Ops.size(); i++)
+ if (!isConstantFalse(Ops[i]))
+ Accum = B.CreateOr(Accum, Ops[i]);
+ return Accum;
+}
+
+static void generatePoisonChecksForBinOp(Instruction &I,
+ SmallVector<Value*, 2> &Checks) {
+ assert(isa<BinaryOperator>(I));
+
+ IRBuilder<> B(&I);
+ Value *LHS = I.getOperand(0);
+ Value *RHS = I.getOperand(1);
+ switch (I.getOpcode()) {
+ default:
+ return;
+ case Instruction::Add: {
+ if (I.hasNoSignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ if (I.hasNoUnsignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ break;
+ }
+ case Instruction::Sub: {
+ if (I.hasNoSignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ if (I.hasNoUnsignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ break;
+ }
+ case Instruction::Mul: {
+ if (I.hasNoSignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ if (I.hasNoUnsignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ break;
+ }
+ };
+}
+
+static Value* generatePoisonChecks(Instruction &I) {
+ IRBuilder<> B(&I);
+ SmallVector<Value*, 2> Checks;
+ if (isa<BinaryOperator>(I))
+ generatePoisonChecksForBinOp(I, Checks);
+ return buildOrChain(B, Checks);
+}
+
+static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
+ auto Itr = ValToPoison.find(V);
+ if (Itr != ValToPoison.end())
+ return Itr->second;
+ if (isa<Constant>(V)) {
+ return ConstantInt::getFalse(V->getContext());
+ }
+ // Return false for unknwon values - this implements a non-strict mode where
+ // unhandled IR constructs are simply considered to never produce poison. At
+ // some point in the future, we probably want a "strict mode" for testing if
+ // nothing else.
+ return ConstantInt::getFalse(V->getContext());
+}
+
+static void CreateAssert(IRBuilder<> &B, Value *Cond) {
+ assert(Cond->getType()->isIntegerTy(1));
+ if (auto *CI = dyn_cast<ConstantInt>(Cond))
+ if (CI->isAllOnesValue())
+ return;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ M->getOrInsertFunction("__poison_checker_assert",
+ Type::getVoidTy(M->getContext()),
+ Type::getInt1Ty(M->getContext()));
+ Function *TrapFunc = M->getFunction("__poison_checker_assert");
+ B.CreateCall(TrapFunc, Cond);
+}
+
+static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
+ assert(Cond->getType()->isIntegerTy(1));
+ CreateAssert(B, B.CreateNot(Cond));
+}
+
+static bool rewrite(Function &F) {
+ auto * const Int1Ty = Type::getInt1Ty(F.getContext());
+
+ DenseMap<Value *, Value *> ValToPoison;
+
+ for (BasicBlock &BB : F)
+ for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
+ auto *OldPHI = cast<PHINode>(&*I);
+ auto *NewPHI = PHINode::Create(Int1Ty,
+ OldPHI->getNumIncomingValues());
+ for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
+ NewPHI->addIncoming(UndefValue::get(Int1Ty),
+ OldPHI->getIncomingBlock(i));
+ NewPHI->insertBefore(OldPHI);
+ ValToPoison[OldPHI] = NewPHI;
+ }
+
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB) {
+ if (isa<PHINode>(I)) continue;
+
+ IRBuilder<> B(cast<Instruction>(&I));
+ if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I)))
+ CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
+
+ if (LocalCheck)
+ if (auto *RI = dyn_cast<ReturnInst>(&I))
+ if (RI->getNumOperands() != 0) {
+ Value *Op = RI->getOperand(0);
+ CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
+ }
+
+ SmallVector<Value*, 4> Checks;
+ if (propagatesFullPoison(&I))
+ for (Value *V : I.operands())
+ Checks.push_back(getPoisonFor(ValToPoison, V));
+
+ if (auto *Check = generatePoisonChecks(I))
+ Checks.push_back(Check);
+ ValToPoison[&I] = buildOrChain(B, Checks);
+ }
+
+ for (BasicBlock &BB : F)
+ for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
+ auto *OldPHI = cast<PHINode>(&*I);
+ if (!ValToPoison.count(OldPHI))
+ continue; // skip the newly inserted phis
+ auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
+ for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
+ auto *OldVal = OldPHI->getIncomingValue(i);
+ NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
+ }
+ }
+ return true;
+}
+
+
+PreservedAnalyses PoisonCheckingPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ bool Changed = false;
+ for (auto &F : M)
+ Changed |= rewrite(F);
+
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+PreservedAnalyses PoisonCheckingPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+
+/* Major TODO Items:
+ - Control dependent poison UB
+ - Strict mode - (i.e. must analyze every operand)
+ - Poison through memory
+ - Function ABIs
+
+ Minor TODO items:
+ - Add propagation rules for and/or instructions
+ - Add hasPoisonFlags predicate to ValueTracking
+ - Add poison check rules for:
+ - exact flags, out of bounds operands
+ - inbounds (can't be strict due to unknown allocation sizes)
+ - fmf and fp casts
+ */
OpenPOWER on IntegriCloud