22 files changed, 310 insertions, 319 deletions
diff --git a/mlir/include/mlir/AffineOps/AffineOps.h b/mlir/include/mlir/AffineOps/AffineOps.h
index d8e34dc7248..4f949231674 100644
--- a/mlir/include/mlir/AffineOps/AffineOps.h
+++ b/mlir/include/mlir/AffineOps/AffineOps.h
@@ -253,18 +253,14 @@ public:
 
   unsigned getNumOperands() { return opEnd - opStart; }
   Value *getOperand(unsigned idx) {
-    return inst->getInstruction()->getOperand(opStart + idx);
+    return inst.getInstruction()->getOperand(opStart + idx);
   }
 
   using operand_iterator = AffineForOp::operand_iterator;
   using operand_range = AffineForOp::operand_range;
 
-  operand_iterator operand_begin() {
-    return inst->getInstruction()->operand_begin() + opStart;
-  }
-  operand_iterator operand_end() {
-    return inst->getInstruction()->operand_begin() + opEnd;
-  }
+  operand_iterator operand_begin() { return inst.operand_begin() + opStart; }
+  operand_iterator operand_end() { return inst.operand_begin() + opEnd; }
   operand_range getOperands() { return {operand_begin(), operand_end()}; }
 
 private:
diff --git a/mlir/include/mlir/EDSC/Builders.h b/mlir/include/mlir/EDSC/Builders.h
index 38d3bf32dbc..8a186c28476 100644
--- a/mlir/include/mlir/EDSC/Builders.h
+++ b/mlir/include/mlir/EDSC/Builders.h
@@ -433,8 +433,8 @@ ValueHandle ValueHandle::create(Args... args) {
     return ValueHandle(inst->getResult(0));
   } else if (inst->getNumResults() == 0) {
     if (auto f = inst->dyn_cast<AffineForOp>()) {
-      f->createBody();
-      return ValueHandle(f->getInductionVar());
+      f.createBody();
+      return ValueHandle(f.getInductionVar());
     }
   }
   llvm_unreachable("unsupported instruction, use an InstructionHandle instead");
diff --git a/mlir/lib/AffineOps/AffineOps.cpp b/mlir/lib/AffineOps/AffineOps.cpp
index 92035489e21..2901d815032 100644
--- a/mlir/lib/AffineOps/AffineOps.cpp
+++ b/mlir/lib/AffineOps/AffineOps.cpp
@@ -62,7 +62,7 @@ bool mlir::isValidDim(Value *value) {
       return true;
     // Affine apply operation is ok if all of its operands are ok.
     if (auto op = inst->dyn_cast<AffineApplyOp>())
-      return op->isValidDim();
+      return op.isValidDim();
     // The dim op is okay if its operand memref/tensor is defined at the top
     // level.
     if (auto dimOp = inst->dyn_cast<DimOp>())
@@ -488,12 +488,11 @@ AffineApplyNormalizer::AffineApplyNormalizer(AffineMap map,
                              : AffineApplyOp();
       if (affineApply) {
         // a. Compose affine.apply instructions.
-        LLVM_DEBUG(affineApply->getInstruction()->print(
+        LLVM_DEBUG(affineApply.getInstruction()->print(
             dbgs() << "\nCompose AffineApplyOp recursively: "));
-        AffineMap affineApplyMap = affineApply->getAffineMap();
+        AffineMap affineApplyMap = affineApply.getAffineMap();
         SmallVector<Value *, 8> affineApplyOperands(
-            affineApply->getOperands().begin(),
-            affineApply->getOperands().end());
+            affineApply.getOperands().begin(), affineApply.getOperands().end());
         AffineApplyNormalizer normalizer(affineApplyMap, affineApplyOperands);
 
         LLVM_DEBUG(normalizer.affineMap.print(
@@ -684,10 +683,10 @@ void mlir::canonicalizeMapAndOperands(
 
 PatternMatchResult SimplifyAffineApply::match(Instruction *op) const {
   auto apply = op->cast<AffineApplyOp>();
-  auto map = apply->getAffineMap();
+  auto map = apply.getAffineMap();
 
   AffineMap oldMap = map;
-  SmallVector<Value *, 8> resultOperands(apply->getOperands());
+  SmallVector<Value *, 8> resultOperands(apply.getOperands());
   composeAffineMapAndOperands(&map, &resultOperands);
   if (map != oldMap)
     return matchSuccess(
@@ -997,7 +996,7 @@ struct AffineForLoopBoundFolder : public RewritePattern {
     auto forOp = op->cast<AffineForOp>();
 
     // If the loop has non-constant bounds, it may be foldable.
-    if (!forOp->hasConstantBounds())
+    if (!forOp.hasConstantBounds())
       return matchSuccess();
 
     return matchFailure();
@@ -1009,8 +1008,8 @@ struct AffineForLoopBoundFolder : public RewritePattern {
       // Check to see if each of the operands is the result of a constant.  If
       // so, get the value.  If not, ignore it.
       SmallVector<Attribute, 8> operandConstants;
-      auto boundOperands = lower ? forOp->getLowerBoundOperands()
-                                 : forOp->getUpperBoundOperands();
+      auto boundOperands =
+          lower ? forOp.getLowerBoundOperands() : forOp.getUpperBoundOperands();
       for (auto *operand : boundOperands) {
         Attribute operandCst;
         matchPattern(operand, m_Constant(&operandCst));
@@ -1018,7 +1017,7 @@ struct AffineForLoopBoundFolder : public RewritePattern {
       }
 
       AffineMap boundMap =
-          lower ? forOp->getLowerBoundMap() : forOp->getUpperBoundMap();
+          lower ? forOp.getLowerBoundMap() : forOp.getUpperBoundMap();
       assert(boundMap.getNumResults() >= 1 &&
              "bound maps should have at least one result");
       SmallVector<Attribute, 4> foldedResults;
@@ -1034,16 +1033,16 @@ struct AffineForLoopBoundFolder : public RewritePattern {
         maxOrMin = lower ? llvm::APIntOps::smax(maxOrMin, foldedResult)
                          : llvm::APIntOps::smin(maxOrMin, foldedResult);
       }
-      lower ? forOp->setConstantLowerBound(maxOrMin.getSExtValue())
-            : forOp->setConstantUpperBound(maxOrMin.getSExtValue());
+      lower ? forOp.setConstantLowerBound(maxOrMin.getSExtValue())
+            : forOp.setConstantUpperBound(maxOrMin.getSExtValue());
     };
 
     // Try to fold the lower bound.
-    if (!forOp->hasConstantLowerBound())
+    if (!forOp.hasConstantLowerBound())
       foldLowerOrUpperBound(/*lower=*/true);
 
     // Try to fold the upper bound.
-    if (!forOp->hasConstantUpperBound())
+    if (!forOp.hasConstantUpperBound())
       foldLowerOrUpperBound(/*lower=*/false);
 
     rewriter.updatedRootInPlace(op);
@@ -1196,7 +1195,7 @@ void mlir::extractForInductionVars(ArrayRef<AffineForOp> forInsts,
                                    SmallVectorImpl<Value *> *ivs) {
   ivs->reserve(forInsts.size());
   for (auto forInst : forInsts)
-    ivs->push_back(forInst->getInductionVar());
+    ivs->push_back(forInst.getInductionVar());
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Analysis/AffineAnalysis.cpp b/mlir/lib/Analysis/AffineAnalysis.cpp
index e2e9ef68b17..a2d511cf965 100644
--- a/mlir/lib/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Analysis/AffineAnalysis.cpp
@@ -546,7 +546,7 @@ static Block *getCommonBlock(const MemRefAccess &srcAccess,
   auto *commonForValue = srcDomain.getIdValue(numCommonLoops - 1);
   auto forOp = getForInductionVarOwner(commonForValue);
   assert(forOp && "commonForValue was not an induction variable");
-  return forOp->getBody();
+  return forOp.getBody();
 }
 
 // Returns true if the ancestor operation instruction of 'srcAccess' appears
diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp
index 64d1809922c..64f18e325de 100644
--- a/mlir/lib/Analysis/AffineStructures.cpp
+++ b/mlir/lib/Analysis/AffineStructures.cpp
@@ -218,9 +218,9 @@ AffineValueMap::AffineValueMap(AffineMap map, ArrayRef<Value *> operands,
       results(results.begin(), results.end()) {}
 
 AffineValueMap::AffineValueMap(AffineApplyOp applyOp)
-    : map(applyOp->getAffineMap()),
-      operands(applyOp->operand_begin(), applyOp->operand_end()) {
-  results.push_back(applyOp->getResult());
+    : map(applyOp.getAffineMap()),
+      operands(applyOp.operand_begin(), applyOp.operand_end()) {
+  results.push_back(applyOp.getResult());
 }
 
 AffineValueMap::AffineValueMap(AffineBound bound)
@@ -721,7 +721,7 @@ void FlatAffineConstraints::addInductionVarOrTerminalSymbol(Value *id) {
     addDimId(getNumDimIds(), id);
     if (failed(this->addAffineForOpDomain(loop)))
       LLVM_DEBUG(
-          loop->emitWarning("failed to add domain info to constraint system"));
+          loop.emitWarning("failed to add domain info to constraint system"));
     return;
   }
   // Add top level symbol.
@@ -737,15 +737,15 @@ void FlatAffineConstraints::addInductionVarOrTerminalSymbol(Value *id) {
 LogicalResult FlatAffineConstraints::addAffineForOpDomain(AffineForOp forOp) {
   unsigned pos;
   // Pre-condition for this method.
-  if (!findId(*forOp->getInductionVar(), &pos)) {
+  if (!findId(*forOp.getInductionVar(), &pos)) {
     assert(false && "Value not found");
     return failure();
   }
 
-  int64_t step = forOp->getStep();
+  int64_t step = forOp.getStep();
   if (step != 1) {
-    if (!forOp->hasConstantLowerBound())
-      forOp->emitWarning("domain conservatively approximated");
+    if (!forOp.hasConstantLowerBound())
+      forOp.emitWarning("domain conservatively approximated");
     else {
       // Add constraints for the stride.
       // (iv - lb) % step = 0 can be written as:
@@ -753,7 +753,7 @@ LogicalResult FlatAffineConstraints::addAffineForOpDomain(AffineForOp forOp) {
       // Add local variable 'q' and add the above equality.
       // The first constraint is q = (iv - lb) floordiv step
       SmallVector<int64_t, 8> dividend(getNumCols(), 0);
-      int64_t lb = forOp->getConstantLowerBound();
+      int64_t lb = forOp.getConstantLowerBound();
       dividend[pos] = 1;
       dividend.back() -= lb;
       addLocalFloorDiv(dividend, step);
@@ -767,25 +767,25 @@ LogicalResult FlatAffineConstraints::addAffineForOpDomain(AffineForOp forOp) {
     }
   }
 
-  if (forOp->hasConstantLowerBound()) {
-    addConstantLowerBound(pos, forOp->getConstantLowerBound());
+  if (forOp.hasConstantLowerBound()) {
+    addConstantLowerBound(pos, forOp.getConstantLowerBound());
   } else {
     // Non-constant lower bound case.
-    SmallVector<Value *, 4> lbOperands(forOp->getLowerBoundOperands().begin(),
-                                       forOp->getLowerBoundOperands().end());
-    if (failed(addLowerOrUpperBound(pos, forOp->getLowerBoundMap(), lbOperands,
+    SmallVector<Value *, 4> lbOperands(forOp.getLowerBoundOperands().begin(),
+                                       forOp.getLowerBoundOperands().end());
+    if (failed(addLowerOrUpperBound(pos, forOp.getLowerBoundMap(), lbOperands,
                                     /*eq=*/false, /*lower=*/true)))
       return failure();
   }
 
-  if (forOp->hasConstantUpperBound()) {
-    addConstantUpperBound(pos, forOp->getConstantUpperBound() - 1);
+  if (forOp.hasConstantUpperBound()) {
+    addConstantUpperBound(pos, forOp.getConstantUpperBound() - 1);
     return success();
   }
   // Non-constant upper bound case.
-  SmallVector<Value *, 4> ubOperands(forOp->getUpperBoundOperands().begin(),
-                                     forOp->getUpperBoundOperands().end());
-  return addLowerOrUpperBound(pos, forOp->getUpperBoundMap(), ubOperands,
+  SmallVector<Value *, 4> ubOperands(forOp.getUpperBoundOperands().begin(),
+                                     forOp.getUpperBoundOperands().end());
+  return addLowerOrUpperBound(pos, forOp.getUpperBoundMap(), ubOperands,
                               /*eq=*/false, /*lower=*/false);
 }
 
diff --git a/mlir/lib/Analysis/LoopAnalysis.cpp b/mlir/lib/Analysis/LoopAnalysis.cpp
index ba9a29177fe..bf8e265dbb8 100644
--- a/mlir/lib/Analysis/LoopAnalysis.cpp
+++ b/mlir/lib/Analysis/LoopAnalysis.cpp
@@ -53,12 +53,12 @@ void mlir::buildTripCountMapAndOperands(
     SmallVectorImpl<Value *> *tripCountOperands) {
   int64_t loopSpan;
 
-  int64_t step = forOp->getStep();
-  FuncBuilder b(forOp->getInstruction());
+  int64_t step = forOp.getStep();
+  FuncBuilder b(forOp.getInstruction());
 
-  if (forOp->hasConstantBounds()) {
-    int64_t lb = forOp->getConstantLowerBound();
-    int64_t ub = forOp->getConstantUpperBound();
+  if (forOp.hasConstantBounds()) {
+    int64_t lb = forOp.getConstantLowerBound();
+    int64_t ub = forOp.getConstantUpperBound();
     loopSpan = ub - lb;
     if (loopSpan < 0)
       loopSpan = 0;
@@ -66,20 +66,20 @@ void mlir::buildTripCountMapAndOperands(
     tripCountOperands->clear();
     return;
   }
-  auto lbMap = forOp->getLowerBoundMap();
-  auto ubMap = forOp->getUpperBoundMap();
+  auto lbMap = forOp.getLowerBoundMap();
+  auto ubMap = forOp.getUpperBoundMap();
   if (lbMap.getNumResults() != 1) {
     *map = AffineMap();
     return;
   }
-  SmallVector<Value *, 4> lbOperands(forOp->getLowerBoundOperands());
-  SmallVector<Value *, 4> ubOperands(forOp->getUpperBoundOperands());
-  auto lb = b.create<AffineApplyOp>(forOp->getLoc(), lbMap, lbOperands);
+  SmallVector<Value *, 4> lbOperands(forOp.getLowerBoundOperands());
+  SmallVector<Value *, 4> ubOperands(forOp.getUpperBoundOperands());
+  auto lb = b.create<AffineApplyOp>(forOp.getLoc(), lbMap, lbOperands);
   SmallVector<Value *, 4> ubs;
   ubs.reserve(ubMap.getNumResults());
   for (auto ubExpr : ubMap.getResults())
     ubs.push_back(b.create<AffineApplyOp>(
-        forOp->getLoc(),
+        forOp.getLoc(),
         b.getAffineMap(ubMap.getNumDims(), ubMap.getNumSymbols(), {ubExpr}, {}),
         ubOperands));
 
@@ -102,8 +102,8 @@ void mlir::buildTripCountMapAndOperands(
   for (auto *v : ubs)
     if (v->use_empty())
       v->getDefiningInst()->erase();
-  if (lb->use_empty())
-    lb->erase();
+  if (lb.use_empty())
+    lb.erase();
 }
 
 /// Returns the trip count of the loop if it's a constant, None otherwise. This
@@ -280,7 +280,7 @@ using VectorizableInstFun = std::function<bool(AffineForOp, Instruction &)>;
 
 static bool isVectorizableLoopWithCond(AffineForOp loop,
                                        VectorizableInstFun isVectorizableInst) {
-  auto *forInst = loop->getInstruction();
+  auto *forInst = loop.getInstruction();
   if (!matcher::isParallelLoop(*forInst) &&
       !matcher::isReductionLoop(*forInst)) {
     return false;
@@ -339,9 +339,9 @@ bool mlir::isVectorizableLoopAlongFastestVaryingMemRefDim(
       [fastestVaryingDim](AffineForOp loop, Instruction &op) {
         auto load = op.dyn_cast<LoadOp>();
         auto store = op.dyn_cast<StoreOp>();
-        return load ? isContiguousAccess(*loop->getInductionVar(), load,
+        return load ? isContiguousAccess(*loop.getInductionVar(), load,
                                          fastestVaryingDim)
-                    : isContiguousAccess(*loop->getInductionVar(), store,
+                    : isContiguousAccess(*loop.getInductionVar(), store,
                                          fastestVaryingDim);
       });
   return isVectorizableLoopWithCond(loop, fun);
@@ -360,7 +360,7 @@ bool mlir::isVectorizableLoop(AffineForOp loop) {
 // TODO(mlir-team): extend this to check for memory-based dependence
 // violation when we have the support.
 bool mlir::isInstwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) {
-  auto *forBody = forOp->getBody();
+  auto *forBody = forOp.getBody();
   assert(shifts.size() == forBody->getInstructions().size());
 
   // Work backwards over the body of the block so that the shift of a use's
diff --git a/mlir/lib/Analysis/SliceAnalysis.cpp b/mlir/lib/Analysis/SliceAnalysis.cpp
index 4b599c4d4df..878b713ded1 100644
--- a/mlir/lib/Analysis/SliceAnalysis.cpp
+++ b/mlir/lib/Analysis/SliceAnalysis.cpp
@@ -53,7 +53,7 @@ static void getForwardSliceImpl(Instruction *inst,
   }
 
   if (auto forOp = inst->dyn_cast<AffineForOp>()) {
-    for (auto &u : forOp->getInductionVar()->getUses()) {
+    for (auto &u : forOp.getInductionVar()->getUses()) {
       auto *ownerInst = u.getOwner();
       if (forwardSlice->count(ownerInst) == 0) {
         getForwardSliceImpl(ownerInst, forwardSlice, filter);
diff --git a/mlir/lib/Analysis/TestParallelismDetection.cpp b/mlir/lib/Analysis/TestParallelismDetection.cpp
index 7ed59b403cd..af112e5b02c 100644
--- a/mlir/lib/Analysis/TestParallelismDetection.cpp
+++ b/mlir/lib/Analysis/TestParallelismDetection.cpp
@@ -47,7 +47,7 @@ void TestParallelismDetection::runOnFunction() {
   FuncBuilder b(f);
   f->walk<AffineForOp>([&](AffineForOp forOp) {
     if (isLoopParallel(forOp))
-      forOp->emitNote("parallel loop");
+      forOp.emitNote("parallel loop");
   });
 }
 
diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp
index 6bc395c46bd..2ac4ee9000f 100644
--- a/mlir/lib/Analysis/Utils.cpp
+++ b/mlir/lib/Analysis/Utils.cpp
@@ -374,7 +374,7 @@ LogicalResult mlir::boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
                     std::is_same<LoadOrStoreOpPointer, StoreOp>::value,
                 "argument should be either a LoadOp or a StoreOp");
 
-  Instruction *opInst = loadOrStoreOp->getInstruction();
+  Instruction *opInst = loadOrStoreOp.getInstruction();
 
   MemRefRegion region(opInst->getLoc());
   if (failed(region.compute(opInst, /*loopDepth=*/0)))
@@ -458,7 +458,7 @@ static Instruction *getInstAtPosition(ArrayRef<unsigned> positions,
       return &inst;
     if (auto childAffineForOp = inst.dyn_cast<AffineForOp>())
       return getInstAtPosition(positions, level + 1,
-                               childAffineForOp->getBody());
+                               childAffineForOp.getBody());
 
     for (auto &region : inst.getRegions()) {
       for (auto &b : region)
@@ -537,9 +537,9 @@ LogicalResult mlir::getBackwardComputationSliceState(
   // TODO(andydavis, bondhugula) Use MemRef read/write regions instead of
   // using 'kSliceFusionBarrierAttrName'.
   for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
-    Value *iv = srcLoopIVs[i]->getInductionVar();
+    Value *iv = srcLoopIVs[i].getInductionVar();
     if (sequentialLoops.count(iv) == 0 &&
-        srcLoopIVs[i]->getAttr(kSliceFusionBarrierAttrName) == nullptr)
+        srcLoopIVs[i].getAttr(kSliceFusionBarrierAttrName) == nullptr)
       continue;
     for (unsigned j = i; j < numSrcLoopIVs; ++j) {
       sliceState->lbs[j] = AffineMap();
@@ -583,18 +583,18 @@ AffineForOp mlir::insertBackwardComputationSlice(
   // Find the inst block positions of 'srcOpInst' within 'srcLoopIVs'.
   SmallVector<unsigned, 4> positions;
   // TODO(andydavis): This code is incorrect since srcLoopIVs can be 0-d.
-  findInstPosition(srcOpInst, srcLoopIVs[0]->getInstruction()->getBlock(),
+  findInstPosition(srcOpInst, srcLoopIVs[0].getInstruction()->getBlock(),
                    &positions);
 
   // Clone src loop nest and insert it a the beginning of the instruction block
   // of the loop at 'dstLoopDepth' in 'dstLoopIVs'.
   auto dstAffineForOp = dstLoopIVs[dstLoopDepth - 1];
-  FuncBuilder b(dstAffineForOp->getBody(), dstAffineForOp->getBody()->begin());
+  FuncBuilder b(dstAffineForOp.getBody(), dstAffineForOp.getBody()->begin());
   auto sliceLoopNest =
-      b.clone(*srcLoopIVs[0]->getInstruction())->cast<AffineForOp>();
+      b.clone(*srcLoopIVs[0].getInstruction())->cast<AffineForOp>();
 
   Instruction *sliceInst =
-      getInstAtPosition(positions, /*level=*/0, sliceLoopNest->getBody());
+      getInstAtPosition(positions, /*level=*/0, sliceLoopNest.getBody());
   // Get loop nest surrounding 'sliceInst'.
   SmallVector<AffineForOp, 4> sliceSurroundingLoops;
   getLoopIVs(*sliceInst, &sliceSurroundingLoops);
@@ -611,9 +611,9 @@ AffineForOp mlir::insertBackwardComputationSlice(
   for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
     auto forOp = sliceSurroundingLoops[dstLoopDepth + i];
     if (AffineMap lbMap = sliceState->lbs[i])
-      forOp->setLowerBound(sliceState->lbOperands[i], lbMap);
+      forOp.setLowerBound(sliceState->lbOperands[i], lbMap);
     if (AffineMap ubMap = sliceState->ubs[i])
-      forOp->setUpperBound(sliceState->ubOperands[i], ubMap);
+      forOp.setUpperBound(sliceState->ubOperands[i], ubMap);
   }
   return sliceLoopNest;
 }
@@ -670,7 +670,7 @@ unsigned mlir::getNumCommonSurroundingLoops(Instruction &A, Instruction &B) {
   unsigned minNumLoops = std::min(loopsA.size(), loopsB.size());
   unsigned numCommonLoops = 0;
   for (unsigned i = 0; i < minNumLoops; ++i) {
-    if (loopsA[i]->getInstruction() != loopsB[i]->getInstruction())
+    if (loopsA[i].getInstruction() != loopsB[i].getInstruction())
       break;
     ++numCommonLoops;
   }
@@ -727,7 +727,7 @@ static Optional<int64_t> getMemoryFootprintBytes(Block &block,
 
 Optional<int64_t> mlir::getMemoryFootprintBytes(AffineForOp forOp,
                                                 int memorySpace) {
-  auto *forInst = forOp->getInstruction();
+  auto *forInst = forOp.getInstruction();
   return ::getMemoryFootprintBytes(
       *forInst->getBlock(), Block::iterator(forInst),
       std::next(Block::iterator(forInst)), memorySpace);
@@ -737,10 +737,10 @@ Optional<int64_t> mlir::getMemoryFootprintBytes(AffineForOp forOp,
 /// at 'forOp'.
 void mlir::getSequentialLoops(
     AffineForOp forOp, llvm::SmallDenseSet<Value *, 8> *sequentialLoops) {
-  forOp->getInstruction()->walk([&](Instruction *inst) {
+  forOp.getInstruction()->walk([&](Instruction *inst) {
     if (auto innerFor = inst->dyn_cast<AffineForOp>())
       if (!isLoopParallel(innerFor))
-        sequentialLoops->insert(innerFor->getInductionVar());
+        sequentialLoops->insert(innerFor.getInductionVar());
   });
 }
 
@@ -748,13 +748,13 @@ void mlir::getSequentialLoops(
 bool mlir::isLoopParallel(AffineForOp forOp) {
   // Collect all load and store ops in loop nest rooted at 'forOp'.
   SmallVector<Instruction *, 8> loadAndStoreOpInsts;
-  forOp->getInstruction()->walk([&](Instruction *opInst) {
+  forOp.getInstruction()->walk([&](Instruction *opInst) {
     if (opInst->isa<LoadOp>() || opInst->isa<StoreOp>())
       loadAndStoreOpInsts.push_back(opInst);
   });
 
   // Dep check depth would be number of enclosing loops + 1.
-  unsigned depth = getNestingDepth(*forOp->getInstruction()) + 1;
+  unsigned depth = getNestingDepth(*forOp.getInstruction()) + 1;
 
   // Check dependences between all pairs of ops in 'loadAndStoreOpInsts'.
   for (auto *srcOpInst : loadAndStoreOpInsts) {
diff --git a/mlir/lib/Analysis/VectorAnalysis.cpp b/mlir/lib/Analysis/VectorAnalysis.cpp
index 5df31affe31..32543c8d975 100644
--- a/mlir/lib/Analysis/VectorAnalysis.cpp
+++ b/mlir/lib/Analysis/VectorAnalysis.cpp
@@ -115,7 +115,7 @@ static AffineMap makePermutationMap(
   for (auto kvp : enclosingLoopToVectorDim) {
     assert(kvp.second < perm.size());
     auto invariants = getInvariantAccesses(
-        *kvp.first->cast<AffineForOp>()->getInductionVar(), unwrappedIndices);
+        *kvp.first->cast<AffineForOp>().getInductionVar(), unwrappedIndices);
     unsigned numIndices = unwrappedIndices.size();
     unsigned countInvariantIndices = 0;
     for (unsigned dim = 0; dim < numIndices; ++dim) {
diff --git a/mlir/lib/EDSC/Builders.cpp b/mlir/lib/EDSC/Builders.cpp
index 595141af84e..5cf5cb6cfff 100644
--- a/mlir/lib/EDSC/Builders.cpp
+++ b/mlir/lib/EDSC/Builders.cpp
@@ -87,7 +87,7 @@ mlir::edsc::ValueHandle::createComposedAffineApply(AffineMap map,
   Instruction *inst =
       makeComposedAffineApply(ScopedContext::getBuilder(),
                               ScopedContext::getLocation(), map, operands)
-          ->getInstruction();
+          .getInstruction();
   assert(inst->getNumResults() == 1 && "Not a single result AffineApply");
   return ValueHandle(inst->getResult(0));
 }
@@ -103,8 +103,8 @@ ValueHandle ValueHandle::create(StringRef name, ArrayRef<ValueHandle> operands,
   if (auto f = inst->dyn_cast<AffineForOp>()) {
     // Immediately create the loop body so we can just insert instructions right
     // away.
-    f->createBody();
-    return ValueHandle(f->getInductionVar());
+    f.createBody();
+    return ValueHandle(f.getInductionVar());
   }
   llvm_unreachable("unsupported instruction, use an InstructionHandle instead");
 }
@@ -173,7 +173,7 @@ mlir::edsc::LoopBuilder::LoopBuilder(ValueHandle *iv,
         ubs, ScopedContext::getBuilder()->getMultiDimIdentityMap(ubs.size()),
         step);
   }
-  auto *body = getForInductionVarOwner(iv->getValue())->getBody();
+  auto *body = getForInductionVarOwner(iv->getValue()).getBody();
   enter(body);
 }
 
diff --git a/mlir/lib/EDSC/MLIREmitter.cpp b/mlir/lib/EDSC/MLIREmitter.cpp
index 89c66b08941..97f8bd75b36 100644
--- a/mlir/lib/EDSC/MLIREmitter.cpp
+++ b/mlir/lib/EDSC/MLIREmitter.cpp
@@ -52,7 +52,7 @@ static void printDefininingStatement(llvm::raw_ostream &os, Value &v) {
     return;
   }
   if (auto forInst = getForInductionVarOwner(&v)) {
-    forInst->getInstruction()->print(os);
+    forInst.getInstruction()->print(os);
   } else if (auto *bbArg = dyn_cast<BlockArgument>(&v)) {
     os << "block_argument";
   } else {
@@ -176,8 +176,8 @@ Value *mlir::edsc::MLIREmitter::emitExpr(Expr e) {
         forOp = builder->create<AffineForOp>(
             location, lbs, builder->getMultiDimIdentityMap(lbs.size()), ubs,
             builder->getMultiDimIdentityMap(ubs.size()), step);
-      forOp->createBody();
-      res = forOp->getInductionVar();
+      forOp.createBody();
+      res = forOp.getInductionVar();
     }
   }
 
@@ -236,7 +236,7 @@ mlir::edsc::MLIREmitter &mlir::edsc::MLIREmitter::emitStmt(const Stmt &stmt) {
   bind(Bindable(stmt.getLHS()), val);
   if (stmt.getRHS().getKind() == ExprKind::For) {
     // Step into the loop.
-    builder->setInsertionPointToStart(getForInductionVarOwner(val)->getBody());
+    builder->setInsertionPointToStart(getForInductionVarOwner(val).getBody());
   }
   emitStmts(stmt.getEnclosedStmts());
   builder->setInsertionPoint(block, ip);
diff --git a/mlir/lib/EDSC/Types.cpp b/mlir/lib/EDSC/Types.cpp
index ac8b98e38c3..a516f9617ac 100644
--- a/mlir/lib/EDSC/Types.cpp
+++ b/mlir/lib/EDSC/Types.cpp
@@ -209,7 +209,7 @@ Expr::build(FuncBuilder &b, const llvm::DenseMap<Expr, Value *> &ssaBindings,
     auto affInstr = makeComposedAffineApply(
         &b, b.getUnknownLoc(),
         getAttribute("map").cast<AffineMapAttr>().getValue(), operandValues);
-    return {affInstr->getResult()};
+    return {affInstr.getResult()};
   }
 
   auto state = OperationState(b.getContext(), b.getUnknownLoc(), getName());
diff --git a/mlir/lib/Transforms/DmaGeneration.cpp b/mlir/lib/Transforms/DmaGeneration.cpp
index 4fa040d73eb..5a1af03d299 100644
--- a/mlir/lib/Transforms/DmaGeneration.cpp
+++ b/mlir/lib/Transforms/DmaGeneration.cpp
@@ -403,7 +403,7 @@ bool DmaGeneration::generateDma(const MemRefRegion &region, Block *block,
                                     zeroIndex, stride, numEltPerStride);
     // Since new ops are being appended (for outgoing DMAs), adjust the end to
     // mark end of range of the original.
-    *nEnd = Block::iterator(op->getInstruction());
+    *nEnd = Block::iterator(op.getInstruction());
   }
 
   // Matching DMA wait to block on completion; tag always has a 0 index.
@@ -414,7 +414,7 @@ bool DmaGeneration::generateDma(const MemRefRegion &region, Block *block,
   if (*nEnd == end)
     // Since new ops are being appended (for outgoing DMAs), adjust the end to
     // mark end of range of the original.
-    *nEnd = Block::iterator(tagDeallocOp->getInstruction());
+    *nEnd = Block::iterator(tagDeallocOp.getInstruction());
 
   // Generate dealloc for the DMA buffer.
   if (!existingBuf)
@@ -500,13 +500,13 @@ bool DmaGeneration::runOnBlock(Block *block) {
       // the footprint can't be calculated, we assume for now it fits. Recurse
       // inside if footprint for 'forOp' exceeds capacity, or when
       // clSkipNonUnitStrideLoop is set and the step size is not one.
-      bool recurseInner = clSkipNonUnitStrideLoop ? forOp->getStep() != 1
+      bool recurseInner = clSkipNonUnitStrideLoop ? forOp.getStep() != 1
                                                   : exceedsCapacity(forOp);
       if (recurseInner) {
         // We'll recurse and do the DMAs at an inner level for 'forInst'.
         runOnBlock(/*begin=*/curBegin, /*end=*/it);
         // Recurse onto the body of this loop.
-        runOnBlock(forOp->getBody());
+        runOnBlock(forOp.getBody());
         // The next region starts right after the 'affine.for' instruction.
         curBegin = std::next(it);
       } else {
@@ -561,15 +561,15 @@ findHighestBlockForPlacement(const MemRefRegion &region, Block &block,
   for (auto e = enclosingFors.rend(); it != e; ++it) {
     // TODO(bondhugula): also need to be checking this for regions symbols that
     // aren't loop IVs, whether we are within their resp. defs' dominance scope.
-    if (llvm::is_contained(symbols, (*it)->getInductionVar()))
+    if (llvm::is_contained(symbols, it->getInductionVar()))
       break;
   }
 
   if (it != enclosingFors.rbegin()) {
     auto lastInvariantIV = *std::prev(it);
-    *dmaPlacementReadStart = Block::iterator(lastInvariantIV->getInstruction());
+    *dmaPlacementReadStart = Block::iterator(lastInvariantIV.getInstruction());
     *dmaPlacementWriteStart = std::next(*dmaPlacementReadStart);
-    *dmaPlacementBlock = lastInvariantIV->getInstruction()->getBlock();
+    *dmaPlacementBlock = lastInvariantIV.getInstruction()->getBlock();
   } else {
     *dmaPlacementReadStart = begin;
     *dmaPlacementWriteStart = end;
@@ -737,9 +737,8 @@ uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) {
   AffineForOp forOp;
   uint64_t sizeInKib = llvm::divideCeil(totalDmaBuffersSizeInBytes, 1024);
   if (llvm::DebugFlag && (forOp = begin->dyn_cast<AffineForOp>())) {
-    forOp->emitNote(
-        Twine(sizeInKib) +
-        " KiB of DMA buffers in fast memory space for this block\n");
+    forOp.emitNote(Twine(sizeInKib) +
+                   " KiB of DMA buffers in fast memory space for this block\n");
   }
 
   if (totalDmaBuffersSizeInBytes > fastMemCapacityBytes) {
diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp
index 84644bf11a0..c757ea8e58b 100644
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@@ -696,8 +696,8 @@ bool MemRefDependenceGraph::init(Function *f) {
         getLoopIVs(*use.getOwner(), &loops);
         if (loops.empty())
           continue;
-        assert(forToNodeMap.count(loops[0]->getInstruction()) > 0);
-        unsigned userLoopNestId = forToNodeMap[loops[0]->getInstruction()];
+        assert(forToNodeMap.count(loops[0].getInstruction()) > 0);
+        unsigned userLoopNestId = forToNodeMap[loops[0].getInstruction()];
         addEdge(node.id, userLoopNestId, value);
       }
     }
@@ -745,8 +745,8 @@ struct LoopNestStatsCollector {
 
   void collect(Instruction *inst) {
     inst->walk<AffineForOp>([&](AffineForOp forOp) {
-      auto *forInst = forOp->getInstruction();
-      auto *parentInst = forOp->getInstruction()->getParentInst();
+      auto *forInst = forOp.getInstruction();
+      auto *parentInst = forOp.getInstruction()->getParentInst();
       if (parentInst != nullptr) {
         assert(parentInst->isa<AffineForOp>() && "Expected parent AffineForOp");
         // Add mapping to 'forOp' from its parent AffineForOp.
@@ -756,7 +756,7 @@ struct LoopNestStatsCollector {
       // Record the number of op instructions in the body of 'forOp'.
       unsigned count = 0;
       stats->opCountMap[forInst] = 0;
-      for (auto &inst : *forOp->getBody()) {
+      for (auto &inst : *forOp.getBody()) {
         if (!inst.isa<AffineForOp>() && !inst.isa<AffineIfOp>())
           ++count;
       }
@@ -796,7 +796,7 @@ static int64_t getComputeCost(
   int64_t opCount = stats->opCountMap[forInst];
   if (stats->loopMap.count(forInst) > 0) {
     for (auto childForOp : stats->loopMap[forInst]) {
-      opCount += getComputeCost(childForOp->getInstruction(), stats,
+      opCount += getComputeCost(childForOp.getInstruction(), stats,
                                 tripCountOverrideMap, computeCostMap);
     }
   }
@@ -854,11 +854,11 @@ static bool buildSliceTripCountMap(
     AffineMap ubMap = sliceState->ubs[i];
     if (lbMap == AffineMap() || ubMap == AffineMap()) {
       // The iteration of src loop IV 'i' was not sliced. Use full loop bounds.
-      if (srcLoopIVs[i]->hasConstantLowerBound() &&
-          srcLoopIVs[i]->hasConstantUpperBound()) {
-        (*tripCountMap)[srcLoopIVs[i]->getInstruction()] =
-            srcLoopIVs[i]->getConstantUpperBound() -
-            srcLoopIVs[i]->getConstantLowerBound();
+      if (srcLoopIVs[i].hasConstantLowerBound() &&
+          srcLoopIVs[i].hasConstantUpperBound()) {
+        (*tripCountMap)[srcLoopIVs[i].getInstruction()] =
+            srcLoopIVs[i].getConstantUpperBound() -
+            srcLoopIVs[i].getConstantLowerBound();
         continue;
       }
       return false;
@@ -866,7 +866,7 @@ static bool buildSliceTripCountMap(
     Optional<uint64_t> tripCount = getConstDifference(lbMap, ubMap);
     if (!tripCount.hasValue())
       return false;
-    (*tripCountMap)[srcLoopIVs[i]->getInstruction()] = tripCount.getValue();
+    (*tripCountMap)[srcLoopIVs[i].getInstruction()] = tripCount.getValue();
   }
   return true;
 }
@@ -1060,12 +1060,12 @@ static void sinkSequentialLoops(MemRefDependenceGraph::Node *node) {
   SmallVector<AffineForOp, 4> loops;
   AffineForOp curr = node->inst->cast<AffineForOp>();
   loops.push_back(curr);
-  auto *currBody = curr->getBody();
+  auto *currBody = curr.getBody();
   while (!currBody->empty() &&
          std::next(currBody->begin()) == currBody->end() &&
-         (curr = curr->getBody()->front().dyn_cast<AffineForOp>())) {
+         (curr = curr.getBody()->front().dyn_cast<AffineForOp>())) {
     loops.push_back(curr);
-    currBody = curr->getBody();
+    currBody = curr.getBody();
   }
   if (loops.size() < 2)
     return;
@@ -1091,7 +1091,7 @@ static void sinkSequentialLoops(MemRefDependenceGraph::Node *node) {
     }
   }
   assert(loopNestRootIndex != -1 && "invalid root index");
-  node->inst = loops[loopNestRootIndex]->getInstruction();
+  node->inst = loops[loopNestRootIndex].getInstruction();
 }
 
 //  TODO(mlir-team): improve/complete this when we have target data.
@@ -1119,7 +1119,7 @@ static Value *createPrivateMemRef(AffineForOp forOp,
                                   unsigned dstLoopDepth,
                                   Optional<unsigned> fastMemorySpace,
                                   uint64_t localBufSizeThreshold) {
-  auto *forInst = forOp->getInstruction();
+  auto *forInst = forOp.getInstruction();
 
   // Create builder to insert alloc op just before 'forOp'.
   FuncBuilder b(forInst);
@@ -1187,7 +1187,7 @@ static Value *createPrivateMemRef(AffineForOp forOp,
   for (auto dimSize : oldMemRefType.getShape()) {
     if (dimSize == -1)
       allocOperands.push_back(
-          top.create<DimOp>(forOp->getLoc(), oldMemRef, dynamicDimCount++));
+          top.create<DimOp>(forOp.getLoc(), oldMemRef, dynamicDimCount++));
   }
 
   // Create new private memref for fused loop 'forOp'.
@@ -1196,7 +1196,7 @@ static Value *createPrivateMemRef(AffineForOp forOp,
   // at the beginning of the function, because loop nests can be reordered
   // during the fusion pass.
   Value *newMemRef =
-      top.create<AllocOp>(forOp->getLoc(), newMemRefType, allocOperands);
+      top.create<AllocOp>(forOp.getLoc(), newMemRefType, allocOperands);
 
   // Build an AffineMap to remap access functions based on lower bound offsets.
   SmallVector<AffineExpr, 4> remapExprs;
@@ -1220,7 +1220,7 @@ static Value *createPrivateMemRef(AffineForOp forOp,
   bool ret =
       replaceAllMemRefUsesWith(oldMemRef, newMemRef, {}, indexRemap,
                                /*extraOperands=*/outerIVs,
-                               /*domInstFilter=*/&*forOp->getBody()->begin());
+                               /*domInstFilter=*/&*forOp.getBody()->begin());
   assert(ret && "replaceAllMemrefUsesWith should always succeed here");
   (void)ret;
   return newMemRef;
@@ -1437,7 +1437,7 @@ static bool isFusionProfitable(Instruction *srcOpInst,
   // Walk src loop nest and collect stats.
   LoopNestStats srcLoopNestStats;
   LoopNestStatsCollector srcStatsCollector(&srcLoopNestStats);
-  srcStatsCollector.collect(srcLoopIVs[0]->getInstruction());
+  srcStatsCollector.collect(srcLoopIVs[0].getInstruction());
   // Currently only constant trip count loop nests are supported.
   if (srcStatsCollector.hasLoopWithNonConstTripCount) {
     LLVM_DEBUG(llvm::dbgs() << "Non-constant trip count loops unsupported.\n");
@@ -1449,7 +1449,7 @@ static bool isFusionProfitable(Instruction *srcOpInst,
 
   LoopNestStats dstLoopNestStats;
   LoopNestStatsCollector dstStatsCollector(&dstLoopNestStats);
-  dstStatsCollector.collect(dstLoopIVs[0]->getInstruction());
+  dstStatsCollector.collect(dstLoopIVs[0].getInstruction());
   // Currently only constant trip count loop nests are supported.
   if (dstStatsCollector.hasLoopWithNonConstTripCount) {
     LLVM_DEBUG(llvm::dbgs() << "Non-constant trip count loops unsupported.\n");
@@ -1484,7 +1484,7 @@ static bool isFusionProfitable(Instruction *srcOpInst,
 
   // Compute op instance count for the src loop nest without iteration slicing.
   uint64_t srcLoopNestCost =
-      getComputeCost(srcLoopIVs[0]->getInstruction(), &srcLoopNestStats,
+      getComputeCost(srcLoopIVs[0].getInstruction(), &srcLoopNestStats,
                      /*tripCountOverrideMap=*/nullptr,
                      /*computeCostMap=*/nullptr);
 
@@ -1504,7 +1504,7 @@ static bool isFusionProfitable(Instruction *srcOpInst,
 
   // Compute op instance count for the src loop nest.
   uint64_t dstLoopNestCost =
-      getComputeCost(dstLoopIVs[0]->getInstruction(), &dstLoopNestStats,
+      getComputeCost(dstLoopIVs[0].getInstruction(), &dstLoopNestStats,
                      /*tripCountOverrideMap=*/nullptr,
                      /*computeCostMap=*/nullptr);
 
@@ -1543,7 +1543,7 @@ static bool isFusionProfitable(Instruction *srcOpInst,
     // TODO(andydavis) Add load coalescing to memref data flow opt pass.
     if (storeLoadFwdGuaranteed) {
       // A single store disappears: -1 for that.
-      computeCostMap[srcLoopIVs[numSrcLoopIVs - 1]->getInstruction()] = -1;
+      computeCostMap[srcLoopIVs[numSrcLoopIVs - 1].getInstruction()] = -1;
       for (auto *loadOp : dstLoadOpInsts) {
         auto *parentInst = loadOp->getParentInst();
         if (parentInst && parentInst->isa<AffineForOp>())
@@ -1553,15 +1553,15 @@ static bool isFusionProfitable(Instruction *srcOpInst,
 
     // Compute op instance count for the src loop nest with iteration slicing.
     int64_t sliceComputeCost =
-        getComputeCost(srcLoopIVs[0]->getInstruction(), &srcLoopNestStats,
+        getComputeCost(srcLoopIVs[0].getInstruction(), &srcLoopNestStats,
                        /*tripCountOverrideMap=*/&sliceTripCountMap,
                        /*computeCostMap=*/&computeCostMap);
 
     // Compute cost of fusion for this depth.
-    computeCostMap[dstLoopIVs[i - 1]->getInstruction()] = sliceComputeCost;
+    computeCostMap[dstLoopIVs[i - 1].getInstruction()] = sliceComputeCost;
 
     int64_t fusedLoopNestComputeCost =
-        getComputeCost(dstLoopIVs[0]->getInstruction(), &dstLoopNestStats,
+        getComputeCost(dstLoopIVs[0].getInstruction(), &dstLoopNestStats,
                        /*tripCountOverrideMap=*/nullptr, &computeCostMap);
 
     double additionalComputeFraction =
@@ -1935,20 +1935,19 @@ public:
           auto sliceLoopNest = mlir::insertBackwardComputationSlice(
               srcStoreOpInst, dstLoadOpInsts[0], bestDstLoopDepth, &sliceState);
           if (sliceLoopNest) {
-            LLVM_DEBUG(llvm::dbgs()
-                       << "\tslice loop nest:\n"
-                       << *sliceLoopNest->getInstruction() << "\n");
+            LLVM_DEBUG(llvm::dbgs() << "\tslice loop nest:\n"
+                                    << *sliceLoopNest.getInstruction() << "\n");
             // Move 'dstAffineForOp' before 'insertPointInst' if needed.
             auto dstAffineForOp = dstNode->inst->cast<AffineForOp>();
-            if (insertPointInst != dstAffineForOp->getInstruction()) {
-              dstAffineForOp->getInstruction()->moveBefore(insertPointInst);
+            if (insertPointInst != dstAffineForOp.getInstruction()) {
+              dstAffineForOp.getInstruction()->moveBefore(insertPointInst);
             }
             // Update edges between 'srcNode' and 'dstNode'.
             mdg->updateEdges(srcNode->id, dstNode->id, memref);
 
             // Collect slice loop stats.
             LoopNestStateCollector sliceCollector;
-            sliceCollector.collect(sliceLoopNest->getInstruction());
+            sliceCollector.collect(sliceLoopNest.getInstruction());
             // Promote single iteration slice loops to single IV value.
             for (auto forOp : sliceCollector.forOps) {
               promoteIfSingleIteration(forOp);
@@ -1974,7 +1973,7 @@ public:
 
             // Collect dst loop stats after memref privatizaton transformation.
             LoopNestStateCollector dstLoopCollector;
-            dstLoopCollector.collect(dstAffineForOp->getInstruction());
+            dstLoopCollector.collect(dstAffineForOp.getInstruction());
 
             // Add new load ops to current Node load op list 'loads' to
             // continue fusing based on new operands.
@@ -2097,8 +2096,8 @@ public:
       if (sliceLoopNest != nullptr) {
         auto dstForInst = dstNode->inst->cast<AffineForOp>();
         // Update instruction position of fused loop nest (if needed).
-        if (insertPointInst != dstForInst->getInstruction()) {
-          dstForInst->getInstruction()->moveBefore(insertPointInst);
+        if (insertPointInst != dstForInst.getInstruction()) {
+          dstForInst.getInstruction()->moveBefore(insertPointInst);
         }
         // Update data dependence graph state post fusion.
         updateStateAfterSiblingFusion(sliceLoopNest, sibNode, dstNode);
@@ -2190,7 +2189,7 @@ public:
 
     // Collect slice loop stats.
     LoopNestStateCollector sliceCollector;
-    sliceCollector.collect(sliceLoopNest->getInstruction());
+    sliceCollector.collect(sliceLoopNest.getInstruction());
     // Promote single iteration slice loops to single IV value.
     for (auto forOp : sliceCollector.forOps) {
       promoteIfSingleIteration(forOp);
@@ -2199,7 +2198,7 @@ public:
     // Collect dst loop stats after memref privatizaton transformation.
     auto dstForInst = dstNode->inst->cast<AffineForOp>();
     LoopNestStateCollector dstLoopCollector;
-    dstLoopCollector.collect(dstForInst->getInstruction());
+    dstLoopCollector.collect(dstForInst.getInstruction());
     // Clear and add back loads and stores
     mdg->clearNodeLoadAndStores(dstNode->id);
     mdg->addToNode(dstNode->id, dstLoopCollector.loadOpInsts,
@@ -2209,7 +2208,7 @@ public:
     // function.
     if (mdg->getOutEdgeCount(sibNode->id) == 0) {
       mdg->removeNode(sibNode->id);
-      sibNode->inst->cast<AffineForOp>()->erase();
+      sibNode->inst->cast<AffineForOp>().erase();
     }
   }
 
diff --git a/mlir/lib/Transforms/LoopTiling.cpp b/mlir/lib/Transforms/LoopTiling.cpp
index 314864d3f3c..2dbdf689f02 100644
--- a/mlir/lib/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Transforms/LoopTiling.cpp
@@ -92,14 +92,14 @@ FunctionPassBase *mlir::createLoopTilingPass(uint64_t cacheSizeBytes) {
 // location in destination's body.
 static inline void moveLoopBody(AffineForOp src, AffineForOp dest,
                                 Block::iterator loc) {
-  dest->getBody()->getInstructions().splice(loc,
-                                            src->getBody()->getInstructions());
+  dest.getBody()->getInstructions().splice(loc,
+                                           src.getBody()->getInstructions());
 }
 
 // Move the loop body of AffineForOp 'src' from 'src' to the start of dest's
 // body.
 static inline void moveLoopBody(AffineForOp src, AffineForOp dest) {
-  moveLoopBody(src, dest, dest->getBody()->begin());
+  moveLoopBody(src, dest, dest.getBody()->begin());
 }
 
 /// Constructs and sets new loop bounds after tiling for the case of
@@ -114,18 +114,18 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
   assert(!origLoops.empty());
   assert(origLoops.size() == tileSizes.size());
 
-  FuncBuilder b(origLoops[0]->getInstruction());
+  FuncBuilder b(origLoops[0].getInstruction());
   unsigned width = origLoops.size();
 
   // Bounds for tile space loops.
   for (unsigned i = 0; i < width; i++) {
-    auto lbOperands = origLoops[i]->getLowerBoundOperands();
-    auto ubOperands = origLoops[i]->getUpperBoundOperands();
+    auto lbOperands = origLoops[i].getLowerBoundOperands();
+    auto ubOperands = origLoops[i].getUpperBoundOperands();
     SmallVector<Value *, 4> newLbOperands(lbOperands);
     SmallVector<Value *, 4> newUbOperands(ubOperands);
-    newLoops[i]->setLowerBound(newLbOperands, origLoops[i]->getLowerBoundMap());
-    newLoops[i]->setUpperBound(newUbOperands, origLoops[i]->getUpperBoundMap());
-    newLoops[i]->setStep(tileSizes[i]);
+    newLoops[i].setLowerBound(newLbOperands, origLoops[i].getLowerBoundMap());
+    newLoops[i].setUpperBound(newUbOperands, origLoops[i].getUpperBoundMap());
+    newLoops[i].setStep(tileSizes[i]);
   }
   // Bounds for intra-tile loops.
   for (unsigned i = 0; i < width; i++) {
@@ -133,24 +133,24 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
     auto mayBeConstantCount = getConstantTripCount(origLoops[i]);
     // The lower bound is just the tile-space loop.
     AffineMap lbMap = b.getDimIdentityMap();
-    newLoops[width + i]->setLowerBound(
-        /*operands=*/newLoops[i]->getInductionVar(), lbMap);
+    newLoops[width + i].setLowerBound(
+        /*operands=*/newLoops[i].getInductionVar(), lbMap);
 
     // Set the upper bound.
     if (mayBeConstantCount.hasValue() &&
         mayBeConstantCount.getValue() < tileSizes[i]) {
       // Trip count is less than tile size; upper bound is the trip count.
       auto ubMap = b.getConstantAffineMap(mayBeConstantCount.getValue());
-      newLoops[width + i]->setUpperBoundMap(ubMap);
+      newLoops[width + i].setUpperBoundMap(ubMap);
     } else if (largestDiv % tileSizes[i] != 0) {
       // Intra-tile loop ii goes from i to min(i + tileSize, ub_i).
       // Construct the upper bound map; the operands are the original operands
       // with 'i' (tile-space loop) appended to it. The new upper bound map is
       // the original one with an additional expression i + tileSize appended.
-      SmallVector<Value *, 4> ubOperands(origLoops[i]->getUpperBoundOperands());
-      ubOperands.push_back(newLoops[i]->getInductionVar());
+      SmallVector<Value *, 4> ubOperands(origLoops[i].getUpperBoundOperands());
+      ubOperands.push_back(newLoops[i].getInductionVar());
 
-      auto origUbMap = origLoops[i]->getUpperBoundMap();
+      auto origUbMap = origLoops[i].getUpperBoundMap();
       SmallVector<AffineExpr, 4> boundExprs;
       boundExprs.reserve(1 + origUbMap.getNumResults());
       auto dim = b.getAffineDimExpr(origUbMap.getNumInputs());
@@ -161,12 +161,12 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
                         origUbMap.getResults().end());
       auto ubMap =
           b.getAffineMap(origUbMap.getNumInputs() + 1, 0, boundExprs, {});
-      newLoops[width + i]->setUpperBound(/*operands=*/ubOperands, ubMap);
+      newLoops[width + i].setUpperBound(/*operands=*/ubOperands, ubMap);
     } else {
       // No need of the min expression.
       auto dim = b.getAffineDimExpr(0);
       auto ubMap = b.getAffineMap(1, 0, dim + tileSizes[i], {});
-      newLoops[width + i]->setUpperBound(newLoops[i]->getInductionVar(), ubMap);
+      newLoops[width + i].setUpperBound(newLoops[i].getInductionVar(), ubMap);
     }
   }
 }
@@ -181,14 +181,14 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef<AffineForOp> band,
 
   // Check if the supplied for inst's are all successively nested.
   for (unsigned i = 1, e = band.size(); i < e; i++) {
-    assert(band[i]->getInstruction()->getParentInst() ==
-           band[i - 1]->getInstruction());
+    assert(band[i].getInstruction()->getParentInst() ==
+           band[i - 1].getInstruction());
   }
 
   auto origLoops = band;
 
   AffineForOp rootAffineForOp = origLoops[0];
-  auto loc = rootAffineForOp->getLoc();
+  auto loc = rootAffineForOp.getLoc();
   // Note that width is at least one since band isn't empty.
   unsigned width = band.size();
 
@@ -196,19 +196,19 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef<AffineForOp> band,
   AffineForOp innermostPointLoop;
 
   // The outermost among the loops as we add more..
-  auto *topLoop = rootAffineForOp->getInstruction();
+  auto *topLoop = rootAffineForOp.getInstruction();
 
   // Add intra-tile (or point) loops.
   for (unsigned i = 0; i < width; i++) {
     FuncBuilder b(topLoop);
     // Loop bounds will be set later.
     auto pointLoop = b.create<AffineForOp>(loc, 0, 0);
-    pointLoop->createBody();
-    pointLoop->getBody()->getInstructions().splice(
-        pointLoop->getBody()->begin(), topLoop->getBlock()->getInstructions(),
+    pointLoop.createBody();
+    pointLoop.getBody()->getInstructions().splice(
+        pointLoop.getBody()->begin(), topLoop->getBlock()->getInstructions(),
         topLoop);
     newLoops[2 * width - 1 - i] = pointLoop;
-    topLoop = pointLoop->getInstruction();
+    topLoop = pointLoop.getInstruction();
     if (i == 0)
       innermostPointLoop = pointLoop;
   }
@@ -218,12 +218,12 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef<AffineForOp> band,
     FuncBuilder b(topLoop);
     // Loop bounds will be set later.
     auto tileSpaceLoop = b.create<AffineForOp>(loc, 0, 0);
-    tileSpaceLoop->createBody();
-    tileSpaceLoop->getBody()->getInstructions().splice(
-        tileSpaceLoop->getBody()->begin(),
+    tileSpaceLoop.createBody();
+    tileSpaceLoop.getBody()->getInstructions().splice(
+        tileSpaceLoop.getBody()->begin(),
         topLoop->getBlock()->getInstructions(), topLoop);
     newLoops[2 * width - i - 1] = tileSpaceLoop;
-    topLoop = tileSpaceLoop->getInstruction();
+    topLoop = tileSpaceLoop.getInstruction();
   }
 
   // Move the loop body of the original nest to the new one.
@@ -236,19 +236,19 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef<AffineForOp> band,
   getIndexSet(band, &cst);
 
   if (!cst.isHyperRectangular(0, width)) {
-    rootAffineForOp->emitError("tiled code generation unimplemented for the "
-                               "non-hyperrectangular case");
+    rootAffineForOp.emitError("tiled code generation unimplemented for the "
+                              "non-hyperrectangular case");
     return failure();
   }
 
   constructTiledIndexSetHyperRect(origLoops, newLoops, tileSizes);
   // In this case, the point loop IVs just replace the original ones.
   for (unsigned i = 0; i < width; i++) {
-    origLoopIVs[i]->replaceAllUsesWith(newLoops[i + width]->getInductionVar());
+    origLoopIVs[i]->replaceAllUsesWith(newLoops[i + width].getInductionVar());
   }
 
   // Erase the old loop nest.
-  rootAffineForOp->erase();
+  rootAffineForOp.erase();
 
   return success();
 }
@@ -265,8 +265,8 @@ static void getTileableBands(Function *f,
     AffineForOp currInst = root;
     do {
       band.push_back(currInst);
-    } while (currInst->getBody()->getInstructions().size() == 1 &&
-             (currInst = currInst->getBody()->front().dyn_cast<AffineForOp>()));
+    } while (currInst.getBody()->getInstructions().size() == 1 &&
+             (currInst = currInst.getBody()->front().dyn_cast<AffineForOp>()));
     bands->push_back(band);
   };
 
@@ -341,8 +341,8 @@ void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
     if (avoidMaxMinBounds)
       adjustToDivisorsOfTripCounts(band, tileSizes);
     LLVM_DEBUG(
-        rootForOp->emitWarning("memory footprint unknown: using default tile "
-                               "sizes adjusted to trip count divisors"));
+        rootForOp.emitWarning("memory footprint unknown: using default tile "
+                              "sizes adjusted to trip count divisors"));
     return;
   }
 
@@ -398,7 +398,7 @@ void LoopTiling::runOnFunction() {
         msg << tSize << " ";
       msg << "]\n";
       auto rootForOp = band[0];
-      rootForOp->emitNote(msg.str());
+      rootForOp.emitNote(msg.str());
     }
     if (failed(tileCodeGen(band, tileSizes)))
       return signalPassFailure();
diff --git a/mlir/lib/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
index 240f3960488..0822ddf37e3 100644
--- a/mlir/lib/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
@@ -155,7 +155,7 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
   if (unrollJamFactor == 1)
     return promoteIfSingleIteration(forOp);
 
-  if (forOp->getBody()->empty())
+  if (forOp.getBody()->empty())
     return failure();
 
   // Loops where both lower and upper bounds are multi-result maps won't be
@@ -164,7 +164,7 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
   // TODO(mlir-team): this may not be common, but we could support the case
   // where the lower bound is a multi-result map and the ub is a single result
   // one.
-  if (forOp->getLowerBoundMap().getNumResults() != 1)
+  if (forOp.getLowerBoundMap().getNumResults() != 1)
     return failure();
 
   Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
@@ -173,7 +173,7 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
       mayBeConstantTripCount.getValue() < unrollJamFactor)
     return failure();
 
-  auto *forInst = forOp->getInstruction();
+  auto *forInst = forOp.getInstruction();
 
   // Gather all sub-blocks to jam upon the loop being unrolled.
   JamBlockGatherer jbg;
@@ -193,21 +193,21 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
     SmallVector<Value *, 4> cleanupOperands;
     getCleanupLoopLowerBound(forOp, unrollJamFactor, &cleanupMap,
                              &cleanupOperands, &builder);
-    cleanupAffineForOp->setLowerBound(cleanupOperands, cleanupMap);
+    cleanupAffineForOp.setLowerBound(cleanupOperands, cleanupMap);
 
     // Promote the cleanup loop if it has turned into a single iteration loop.
     promoteIfSingleIteration(cleanupAffineForOp);
 
     // Adjust the upper bound of the original loop - it will be the same as the
     // cleanup loop's lower bound. Its lower bound remains unchanged.
-    forOp->setUpperBound(cleanupOperands, cleanupMap);
+    forOp.setUpperBound(cleanupOperands, cleanupMap);
   }
 
   // Scale the step of loop being unroll-jammed by the unroll-jam factor.
-  int64_t step = forOp->getStep();
-  forOp->setStep(step * unrollJamFactor);
+  int64_t step = forOp.getStep();
+  forOp.setStep(step * unrollJamFactor);
 
-  auto *forOpIV = forOp->getInductionVar();
+  auto *forOpIV = forOp.getInductionVar();
   for (auto &subBlock : subBlocks) {
     // Builder to insert unroll-jammed bodies. Insert right at the end of
     // sub-block.
diff --git a/mlir/lib/Transforms/LowerAffine.cpp b/mlir/lib/Transforms/LowerAffine.cpp
index cb65720cee3..93197c30cb2 100644
--- a/mlir/lib/Transforms/LowerAffine.cpp
+++ b/mlir/lib/Transforms/LowerAffine.cpp
@@ -51,7 +51,7 @@ public:
     if (!lhs || !rhs)
       return nullptr;
     auto op = builder.create<OpTy>(loc, lhs, rhs);
-    return op->getResult();
+    return op.getResult();
   }
 
   Value *visitAddExpr(AffineBinaryOpExpr expr) {
@@ -189,7 +189,7 @@ public:
         builder.getIntegerAttr(builder.getIndexType(), expr.getValue());
     auto op =
         builder.create<ConstantOp>(loc, builder.getIndexType(), valueAttr);
-    return op->getResult();
+    return op.getResult();
   }
 
   Value *visitDimExpr(AffineDimExpr expr) {
@@ -270,7 +270,7 @@ static Value *buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate,
   Value *value = *valueIt++;
   for (; valueIt != values.end(); ++valueIt) {
     auto cmpOp = builder.create<CmpIOp>(loc, predicate, value, *valueIt);
-    value = builder.create<SelectOp>(loc, cmpOp->getResult(), value, *valueIt);
+    value = builder.create<SelectOp>(loc, cmpOp.getResult(), value, *valueIt);
   }
 
   return value;
@@ -320,8 +320,8 @@ static Value *buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate,
 //      +--------------------------------+
 //
 bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) {
-  auto loc = forOp->getLoc();
-  auto *forInst = forOp->getInstruction();
+  auto loc = forOp.getLoc();
+  auto *forInst = forOp.getInstruction();
 
   // Start by splitting the block containing the 'affine.for' into two parts.
   // The part before will get the init code, the part after will be the end
@@ -339,19 +339,19 @@ bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) {
   auto *bodyBlock = new Block();
   bodyBlock->insertBefore(endBlock);
 
-  auto *oldBody = forOp->getBody();
+  auto *oldBody = forOp.getBody();
   bodyBlock->getInstructions().splice(bodyBlock->begin(),
                                       oldBody->getInstructions(),
                                       oldBody->begin(), oldBody->end());
 
   // The code in the body of the forOp now uses 'iv' as its indvar.
-  forOp->getInductionVar()->replaceAllUsesWith(iv);
+  forOp.getInductionVar()->replaceAllUsesWith(iv);
 
   // Append the induction variable stepping logic and branch back to the exit
   // condition block.  Construct an affine expression f : (x -> x+step) and
   // apply this expression to the induction variable.
   FuncBuilder builder(bodyBlock);
-  auto affStep = builder.getAffineConstantExpr(forOp->getStep());
+  auto affStep = builder.getAffineConstantExpr(forOp.getStep());
   auto affDim = builder.getAffineDimExpr(0);
   auto stepped = expandAffineExpr(&builder, loc, affDim + affStep, iv, {});
   if (!stepped)
@@ -364,18 +364,18 @@ bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) {
   builder.setInsertionPointToEnd(initBlock);
 
   // Compute loop bounds.
-  SmallVector<Value *, 8> operands(forOp->getLowerBoundOperands());
+  SmallVector<Value *, 8> operands(forOp.getLowerBoundOperands());
   auto lbValues = expandAffineMap(&builder, forInst->getLoc(),
-                                  forOp->getLowerBoundMap(), operands);
+                                  forOp.getLowerBoundMap(), operands);
   if (!lbValues)
     return true;
   Value *lowerBound =
       buildMinMaxReductionSeq(loc, CmpIPredicate::SGT, *lbValues, builder);
 
-  operands.assign(forOp->getUpperBoundOperands().begin(),
-                  forOp->getUpperBoundOperands().end());
+  operands.assign(forOp.getUpperBoundOperands().begin(),
+                  forOp.getUpperBoundOperands().end());
   auto ubValues = expandAffineMap(&builder, forInst->getLoc(),
-                                  forOp->getUpperBoundMap(), operands);
+                                  forOp.getUpperBoundMap(), operands);
   if (!ubValues)
     return true;
   Value *upperBound =
@@ -390,7 +390,7 @@ bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) {
                                endBlock, ArrayRef<Value *>());
 
   // Ok, we're done!
-  forOp->erase();
+  forOp.erase();
   return false;
 }
 
@@ -454,7 +454,7 @@ bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) {
 //      +--------------------------------+
 //
 bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) {
-  auto *ifInst = ifOp->getInstruction();
+  auto *ifInst = ifOp.getInstruction();
   auto loc = ifInst->getLoc();
 
   // Start by splitting the block containing the 'affine.if' into two parts. The
@@ -470,7 +470,7 @@ bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) {
   thenBlock->insertBefore(continueBlock);
 
   // If the 'then' block is not empty, then splice the instructions.
-  auto &oldThenBlocks = ifOp->getThenBlocks();
+  auto &oldThenBlocks = ifOp.getThenBlocks();
   if (!oldThenBlocks.empty()) {
     // We currently only handle one 'then' block.
     if (std::next(oldThenBlocks.begin()) != oldThenBlocks.end())
@@ -489,7 +489,7 @@ bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) {
   // Handle the 'else' block the same way, but we skip it if we have no else
   // code.
   Block *elseBlock = continueBlock;
-  auto &oldElseBlocks = ifOp->getElseBlocks();
+  auto &oldElseBlocks = ifOp.getElseBlocks();
   if (!oldElseBlocks.empty()) {
     // We currently only handle one 'else' block.
     if (std::next(oldElseBlocks.begin()) != oldElseBlocks.end())
@@ -507,7 +507,7 @@ bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) {
   }
 
   // Ok, now we just have to handle the condition logic.
-  auto integerSet = ifOp->getIntegerSet();
+  auto integerSet = ifOp.getIntegerSet();
 
   // Implement short-circuit logic.  For each affine expression in the
   // 'affine.if' condition, convert it into an affine map and call
@@ -545,7 +545,7 @@ bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) {
     auto comparisonOp = builder.create<CmpIOp>(
         loc, isEquality ? CmpIPredicate::EQ : CmpIPredicate::SGE, affResult,
         zeroConstant);
-    builder.create<CondBranchOp>(loc, comparisonOp->getResult(), nextBlock,
+    builder.create<CondBranchOp>(loc, comparisonOp.getResult(), nextBlock,
                                  /*trueArgs*/ ArrayRef<Value *>(), elseBlock,
                                  /*falseArgs*/ ArrayRef<Value *>());
     builder.setInsertionPointToEnd(nextBlock);
@@ -570,19 +570,19 @@ bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) {
 // Convert an "affine.apply" operation into a sequence of arithmetic
 // instructions using the StandardOps dialect.  Return true on error.
 bool LowerAffinePass::lowerAffineApply(AffineApplyOp op) {
-  FuncBuilder builder(op->getInstruction());
+  FuncBuilder builder(op.getInstruction());
   auto maybeExpandedMap =
-      expandAffineMap(&builder, op->getLoc(), op->getAffineMap(),
-                      llvm::to_vector<8>(op->getOperands()));
+      expandAffineMap(&builder, op.getLoc(), op.getAffineMap(),
+                      llvm::to_vector<8>(op.getOperands()));
   if (!maybeExpandedMap)
     return true;
 
-  Value *original = op->getResult();
+  Value *original = op.getResult();
   Value *expanded = (*maybeExpandedMap)[0];
   if (!expanded)
     return true;
   original->replaceAllUsesWith(expanded);
-  op->erase();
+  op.erase();
   return false;
 }
 
diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp
index b59071aa9fe..a92e2d5960c 100644
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp
@@ -72,7 +72,7 @@ static unsigned getTagMemRefPos(Instruction &dmaInst) {
 /// added dimension by the loop IV of the specified 'affine.for' instruction
 /// modulo 2. Returns false if such a replacement cannot be performed.
 static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
-  auto *forBody = forOp->getBody();
+  auto *forBody = forOp.getBody();
   FuncBuilder bInner(forBody, forBody->begin());
   bInner.setInsertionPoint(forBody, forBody->begin());
 
@@ -93,7 +93,7 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
   auto newMemRefType = doubleShape(oldMemRefType);
 
   // The double buffer is allocated right before 'forInst'.
-  auto *forInst = forOp->getInstruction();
+  auto *forInst = forOp.getInstruction();
   FuncBuilder bOuter(forInst);
   // Put together alloc operands for any dynamic dimensions of the memref.
   SmallVector<Value *, 4> allocOperands;
@@ -110,21 +110,21 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
 
   // Create 'iv mod 2' value to index the leading dimension.
   auto d0 = bInner.getAffineDimExpr(0);
-  int64_t step = forOp->getStep();
+  int64_t step = forOp.getStep();
   auto modTwoMap = bInner.getAffineMap(/*dimCount=*/1, /*symbolCount=*/0,
                                        {d0.floorDiv(step) % 2}, {});
-  auto ivModTwoOp = bInner.create<AffineApplyOp>(forOp->getLoc(), modTwoMap,
-                                                 forOp->getInductionVar());
+  auto ivModTwoOp = bInner.create<AffineApplyOp>(forOp.getLoc(), modTwoMap,
+                                                 forOp.getInductionVar());
 
   // replaceAllMemRefUsesWith will always succeed unless the forOp body has
   // non-deferencing uses of the memref (dealloc's are fine though).
-  if (!replaceAllMemRefUsesWith(
-          oldMemRef, newMemRef, /*extraIndices=*/{ivModTwoOp},
-          /*indexRemap=*/AffineMap(),
-          /*extraOperands=*/{},
-          /*domInstFilter=*/&*forOp->getBody()->begin())) {
+  if (!replaceAllMemRefUsesWith(oldMemRef, newMemRef,
+                                /*extraIndices=*/{ivModTwoOp},
+                                /*indexRemap=*/AffineMap(),
+                                /*extraOperands=*/{},
+                                /*domInstFilter=*/&*forOp.getBody()->begin())) {
     LLVM_DEBUG(
-        forOp->emitError("memref replacement for double buffering failed"));
+        forOp.emitError("memref replacement for double buffering failed"));
     ivModTwoOp->getInstruction()->erase();
     return false;
   }
@@ -180,14 +180,14 @@ static void findMatchingStartFinishInsts(
 
   // Collect outgoing DMA instructions - needed to check for dependences below.
   SmallVector<DmaStartOp, 4> outgoingDmaOps;
-  for (auto &inst : *forOp->getBody()) {
+  for (auto &inst : *forOp.getBody()) {
     auto dmaStartOp = inst.dyn_cast<DmaStartOp>();
     if (dmaStartOp && dmaStartOp->isSrcMemorySpaceFaster())
       outgoingDmaOps.push_back(dmaStartOp);
   }
 
   SmallVector<Instruction *, 4> dmaStartInsts, dmaFinishInsts;
-  for (auto &inst : *forOp->getBody()) {
+  for (auto &inst : *forOp.getBody()) {
     // Collect DMA finish instructions.
     if (inst.isa<DmaWaitOp>()) {
       dmaFinishInsts.push_back(&inst);
@@ -220,7 +220,7 @@ static void findMatchingStartFinishInsts(
       // We can double buffer regardless of dealloc's outside the loop.
       if (use.getOwner()->isa<DeallocOp>())
         continue;
-      if (!forOp->getBody()->findAncestorInstInBlock(*use.getOwner())) {
+      if (!forOp.getBody()->findAncestorInstInBlock(*use.getOwner())) {
         LLVM_DEBUG(llvm::dbgs()
                        << "can't pipeline: buffer is live out of loop\n";);
         escapingUses = true;
@@ -249,8 +249,7 @@ static void findMatchingStartFinishInsts(
 void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
   auto mayBeConstTripCount = getConstantTripCount(forOp);
   if (!mayBeConstTripCount.hasValue()) {
-    LLVM_DEBUG(
-        forOp->emitNote("won't pipeline due to unknown trip count loop"));
+    LLVM_DEBUG(forOp.emitNote("won't pipeline due to unknown trip count loop"));
     return;
   }
 
@@ -258,7 +257,7 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
   findMatchingStartFinishInsts(forOp, startWaitPairs);
 
   if (startWaitPairs.empty()) {
-    LLVM_DEBUG(forOp->emitNote("No dma start/finish pairs\n"));
+    LLVM_DEBUG(forOp.emitNote("No dma start/finish pairs\n"));
     return;
   }
 
@@ -332,7 +331,7 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
     mlir::createAffineComputationSlice(dmaStartInst, &sliceOps);
     if (!sliceOps.empty()) {
       for (auto sliceOp : sliceOps) {
-        instShiftMap[sliceOp->getInstruction()] = 0;
+        instShiftMap[sliceOp.getInstruction()] = 0;
       }
     } else {
       // If a slice wasn't created, the reachable affine.apply op's from its
@@ -346,16 +345,16 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
     }
   }
   // Everything else (including compute ops and dma finish) are shifted by one.
-  for (auto &inst : *forOp->getBody()) {
+  for (auto &inst : *forOp.getBody()) {
     if (instShiftMap.find(&inst) == instShiftMap.end()) {
       instShiftMap[&inst] = 1;
     }
   }
 
   // Get shifts stored in map.
-  std::vector<uint64_t> shifts(forOp->getBody()->getInstructions().size());
+  std::vector<uint64_t> shifts(forOp.getBody()->getInstructions().size());
   unsigned s = 0;
-  for (auto &inst : *forOp->getBody()) {
+  for (auto &inst : *forOp.getBody()) {
     assert(instShiftMap.find(&inst) != instShiftMap.end());
     shifts[s++] = instShiftMap[&inst];
 
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index bf0c3ced2e2..918bd5b9e21 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -47,7 +47,7 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
                                     AffineMap *map,
                                     SmallVectorImpl<Value *> *operands,
                                     FuncBuilder *b) {
-  auto lbMap = forOp->getLowerBoundMap();
+  auto lbMap = forOp.getLowerBoundMap();
 
   // Single result lower bound map only.
   if (lbMap.getNumResults() != 1) {
@@ -65,10 +65,10 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
     return;
   }
 
-  unsigned step = forOp->getStep();
+  unsigned step = forOp.getStep();
 
-  SmallVector<Value *, 4> lbOperands(forOp->getLowerBoundOperands());
-  auto lb = b->create<AffineApplyOp>(forOp->getLoc(), lbMap, lbOperands);
+  SmallVector<Value *, 4> lbOperands(forOp.getLowerBoundOperands());
+  auto lb = b->create<AffineApplyOp>(forOp.getLoc(), lbMap, lbOperands);
 
   // For each upper bound expr, get the range.
   // Eg: affine.for %i = lb to min (ub1, ub2),
@@ -84,7 +84,7 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
         b->getAffineMap(tripCountMap.getNumDims(), tripCountMap.getNumSymbols(),
                         bumpExprs[i], {});
     bumpValues[i] =
-        b->create<AffineApplyOp>(forOp->getLoc(), bumpMap, tripCountOperands);
+        b->create<AffineApplyOp>(forOp.getLoc(), bumpMap, tripCountOperands);
   }
 
   SmallVector<AffineExpr, 4> newUbExprs(tripCountMap.getNumResults());
@@ -105,8 +105,8 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
       v->getDefiningInst()->erase();
     }
   }
-  if (lb->use_empty())
-    lb->erase();
+  if (lb.use_empty())
+    lb.erase();
 }
 
 /// Promotes the loop body of a forOp to its containing block if the forOp
@@ -118,21 +118,21 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
     return failure();
 
   // TODO(mlir-team): there is no builder for a max.
-  if (forOp->getLowerBoundMap().getNumResults() != 1)
+  if (forOp.getLowerBoundMap().getNumResults() != 1)
     return failure();
 
   // Replaces all IV uses to its single iteration value.
-  auto *iv = forOp->getInductionVar();
-  Instruction *forInst = forOp->getInstruction();
+  auto *iv = forOp.getInductionVar();
+  Instruction *forInst = forOp.getInstruction();
   if (!iv->use_empty()) {
-    if (forOp->hasConstantLowerBound()) {
+    if (forOp.hasConstantLowerBound()) {
       auto *mlFunc = forInst->getFunction();
       FuncBuilder topBuilder(mlFunc);
       auto constOp = topBuilder.create<ConstantIndexOp>(
-          forOp->getLoc(), forOp->getConstantLowerBound());
+          forOp.getLoc(), forOp.getConstantLowerBound());
       iv->replaceAllUsesWith(constOp);
     } else {
-      AffineBound lb = forOp->getLowerBound();
+      AffineBound lb = forOp.getLowerBound();
       SmallVector<Value *, 4> lbOperands(lb.operand_begin(), lb.operand_end());
       FuncBuilder builder(forInst->getBlock(), Block::iterator(forInst));
       if (lb.getMap() == builder.getDimIdentityMap()) {
@@ -148,8 +148,8 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
   // Move the loop body instructions to the loop's containing block.
   auto *block = forInst->getBlock();
   block->getInstructions().splice(Block::iterator(forInst),
-                                  forOp->getBody()->getInstructions());
-  forOp->erase();
+                                  forOp.getBody()->getInstructions());
+  forOp.erase();
   return success();
 }
 
@@ -173,18 +173,18 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
              const std::vector<std::pair<uint64_t, ArrayRef<Instruction *>>>
                  &instGroupQueue,
              unsigned offset, AffineForOp srcForInst, FuncBuilder *b) {
-  SmallVector<Value *, 4> lbOperands(srcForInst->getLowerBoundOperands());
-  SmallVector<Value *, 4> ubOperands(srcForInst->getUpperBoundOperands());
+  SmallVector<Value *, 4> lbOperands(srcForInst.getLowerBoundOperands());
+  SmallVector<Value *, 4> ubOperands(srcForInst.getUpperBoundOperands());
 
   assert(lbMap.getNumInputs() == lbOperands.size());
   assert(ubMap.getNumInputs() == ubOperands.size());
 
   auto loopChunk =
-      b->create<AffineForOp>(srcForInst->getLoc(), lbOperands, lbMap,
-                             ubOperands, ubMap, srcForInst->getStep());
-  loopChunk->createBody();
-  auto *loopChunkIV = loopChunk->getInductionVar();
-  auto *srcIV = srcForInst->getInductionVar();
+      b->create<AffineForOp>(srcForInst.getLoc(), lbOperands, lbMap, ubOperands,
+                             ubMap, srcForInst.getStep());
+  loopChunk.createBody();
+  auto *loopChunkIV = loopChunk.getInductionVar();
+  auto *srcIV = srcForInst.getInductionVar();
 
   BlockAndValueMapping operandMap;
 
@@ -197,18 +197,18 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
     // Generate the remapping if the shift is not zero: remappedIV = newIV -
     // shift.
     if (!srcIV->use_empty() && shift != 0) {
-      FuncBuilder b(loopChunk->getBody());
+      FuncBuilder b(loopChunk.getBody());
       auto ivRemap = b.create<AffineApplyOp>(
-          srcForInst->getLoc(),
+          srcForInst.getLoc(),
           b.getSingleDimShiftAffineMap(
-              -static_cast<int64_t>(srcForInst->getStep() * shift)),
+              -static_cast<int64_t>(srcForInst.getStep() * shift)),
           loopChunkIV);
       operandMap.map(srcIV, ivRemap);
     } else {
       operandMap.map(srcIV, loopChunkIV);
     }
     for (auto *inst : insts) {
-      loopChunk->getBody()->push_back(inst->clone(operandMap, b->getContext()));
+      loopChunk.getBody()->push_back(inst->clone(operandMap, b->getContext()));
     }
   }
   if (succeeded(promoteIfSingleIteration(loopChunk)))
@@ -233,7 +233,7 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
 // method.
 LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
                                  bool unrollPrologueEpilogue) {
-  if (forOp->getBody()->empty())
+  if (forOp.getBody()->empty())
     return success();
 
   // If the trip counts aren't constant, we would need versioning and
@@ -242,7 +242,7 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
   // constant trip count "full tiles" before applying this.
   auto mayBeConstTripCount = getConstantTripCount(forOp);
   if (!mayBeConstTripCount.hasValue()) {
-    LLVM_DEBUG(forOp->emitNote("non-constant trip count loop not handled"));
+    LLVM_DEBUG(forOp.emitNote("non-constant trip count loop not handled"));
     return success();
   }
   uint64_t tripCount = mayBeConstTripCount.getValue();
@@ -250,9 +250,9 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
   assert(isInstwiseShiftValid(forOp, shifts) &&
          "shifts will lead to an invalid transformation\n");
 
-  int64_t step = forOp->getStep();
+  int64_t step = forOp.getStep();
 
-  unsigned numChildInsts = forOp->getBody()->getInstructions().size();
+  unsigned numChildInsts = forOp.getBody()->getInstructions().size();
 
   // Do a linear time (counting) sort for the shifts.
   uint64_t maxShift = 0;
@@ -261,7 +261,7 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
   }
   // Such large shifts are not the typical use case.
   if (maxShift >= numChildInsts) {
-    forOp->emitWarning("not shifting because shifts are unrealistically large");
+    forOp.emitWarning("not shifting because shifts are unrealistically large");
     return success();
   }
 
@@ -270,7 +270,7 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
   // body of the 'affine.for' inst.
   std::vector<std::vector<Instruction *>> sortedInstGroups(maxShift + 1);
   unsigned pos = 0;
-  for (auto &inst : *forOp->getBody()) {
+  for (auto &inst : *forOp.getBody()) {
     auto shift = shifts[pos++];
     sortedInstGroups[shift].push_back(&inst);
   }
@@ -288,9 +288,9 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
   // of instructions is paired with its shift.
   std::vector<std::pair<uint64_t, ArrayRef<Instruction *>>> instGroupQueue;
 
-  auto origLbMap = forOp->getLowerBoundMap();
+  auto origLbMap = forOp.getLowerBoundMap();
   uint64_t lbShift = 0;
-  FuncBuilder b(forOp->getInstruction());
+  FuncBuilder b(forOp.getInstruction());
   for (uint64_t d = 0, e = sortedInstGroups.size(); d < e; ++d) {
     // If nothing is shifted by d, continue.
     if (sortedInstGroups[d].empty())
@@ -340,12 +340,12 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
   }
 
   // Erase the original for inst.
-  forOp->erase();
+  forOp.erase();
 
   if (unrollPrologueEpilogue && prologue)
     loopUnrollFull(prologue);
   if (unrollPrologueEpilogue && !epilogue &&
-      epilogue->getInstruction() != prologue->getInstruction())
+      epilogue.getInstruction() != prologue.getInstruction())
     loopUnrollFull(epilogue);
 
   return success();
@@ -385,7 +385,7 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
   if (unrollFactor == 1)
     return promoteIfSingleIteration(forOp);
 
-  if (forOp->getBody()->empty())
+  if (forOp.getBody()->empty())
     return failure();
 
   // Loops where the lower bound is a max expression isn't supported for
@@ -393,7 +393,7 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
   // both the lower bound and the upper bound are multi-result maps. However,
   // one meaningful way to do such unrolling would be to specialize the loop for
   // the 'hotspot' case and unroll that hotspot.
-  if (forOp->getLowerBoundMap().getNumResults() != 1)
+  if (forOp.getLowerBoundMap().getNumResults() != 1)
     return failure();
 
   // If the trip count is lower than the unroll factor, no unrolled body.
@@ -404,7 +404,7 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
     return failure();
 
   // Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
-  Instruction *forInst = forOp->getInstruction();
+  Instruction *forInst = forOp.getInstruction();
   if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
     FuncBuilder builder(forInst->getBlock(), ++Block::iterator(forInst));
     auto cleanupForInst = builder.clone(*forInst)->cast<AffineForOp>();
@@ -415,29 +415,29 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
     assert(cleanupMap &&
            "cleanup loop lower bound map for single result lower bound maps "
            "can always be determined");
-    cleanupForInst->setLowerBound(cleanupOperands, cleanupMap);
+    cleanupForInst.setLowerBound(cleanupOperands, cleanupMap);
     // Promote the loop body up if this has turned into a single iteration loop.
     promoteIfSingleIteration(cleanupForInst);
 
     // Adjust upper bound of the original loop; this is the same as the lower
     // bound of the cleanup loop.
-    forOp->setUpperBound(cleanupOperands, cleanupMap);
+    forOp.setUpperBound(cleanupOperands, cleanupMap);
   }
 
   // Scale the step of loop being unrolled by unroll factor.
-  int64_t step = forOp->getStep();
-  forOp->setStep(step * unrollFactor);
+  int64_t step = forOp.getStep();
+  forOp.setStep(step * unrollFactor);
 
   // Builder to insert unrolled bodies right after the last instruction in the
   // body of 'forOp'.
-  FuncBuilder builder(forOp->getBody(), forOp->getBody()->end());
+  FuncBuilder builder(forOp.getBody(), forOp.getBody()->end());
 
   // Keep a pointer to the last instruction in the original block so that we
   // know what to clone (since we are doing this in-place).
-  Block::iterator srcBlockEnd = std::prev(forOp->getBody()->end());
+  Block::iterator srcBlockEnd = std::prev(forOp.getBody()->end());
 
   // Unroll the contents of 'forOp' (append unrollFactor-1 additional copies).
-  auto *forOpIV = forOp->getInductionVar();
+  auto *forOpIV = forOp.getInductionVar();
   for (unsigned i = 1; i < unrollFactor; i++) {
     BlockAndValueMapping operandMap;
 
@@ -448,12 +448,12 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
       auto d0 = builder.getAffineDimExpr(0);
       auto bumpMap = builder.getAffineMap(1, 0, {d0 + i * step}, {});
       auto ivUnroll =
-          builder.create<AffineApplyOp>(forOp->getLoc(), bumpMap, forOpIV);
+          builder.create<AffineApplyOp>(forOp.getLoc(), bumpMap, forOpIV);
       operandMap.map(forOpIV, ivUnroll);
     }
 
     // Clone the original body of 'forOp'.
-    for (auto it = forOp->getBody()->begin(); it != std::next(srcBlockEnd);
+    for (auto it = forOp.getBody()->begin(); it != std::next(srcBlockEnd);
          it++) {
       builder.clone(*it, operandMap);
     }
@@ -467,20 +467,20 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
 /// Performs loop interchange on 'forOpA' and 'forOpB', where 'forOpB' is
 /// nested within 'forOpA' as the only instruction in its block.
 void mlir::interchangeLoops(AffineForOp forOpA, AffineForOp forOpB) {
-  auto *forOpAInst = forOpA->getInstruction();
+  auto *forOpAInst = forOpA.getInstruction();
   // 1) Slice forOpA's instruction list (which is just forOpB) just before
   // forOpA (in forOpA's parent's block) this should leave 'forOpA's
   // instruction list empty (because its perfectly nested).
-  assert(&*forOpA->getBody()->begin() == forOpB->getInstruction());
+  assert(&*forOpA.getBody()->begin() == forOpB.getInstruction());
   forOpAInst->getBlock()->getInstructions().splice(
-      Block::iterator(forOpAInst), forOpA->getBody()->getInstructions());
+      Block::iterator(forOpAInst), forOpA.getBody()->getInstructions());
   // 2) Slice forOpB's instruction list into forOpA's instruction list (this
   // leaves forOpB's instruction list empty).
-  forOpA->getBody()->getInstructions().splice(
-      forOpA->getBody()->begin(), forOpB->getBody()->getInstructions());
+  forOpA.getBody()->getInstructions().splice(
+      forOpA.getBody()->begin(), forOpB.getBody()->getInstructions());
   // 3) Slice forOpA into forOpB's instruction list.
-  forOpB->getBody()->getInstructions().splice(
-      forOpB->getBody()->begin(), forOpAInst->getBlock()->getInstructions(),
+  forOpB.getBody()->getInstructions().splice(
+      forOpB.getBody()->begin(), forOpAInst->getBlock()->getInstructions(),
       Block::iterator(forOpAInst));
 }
 
@@ -488,8 +488,8 @@ void mlir::interchangeLoops(AffineForOp forOpA, AffineForOp forOpB) {
 /// deeper in the loop nest.
 void mlir::sinkLoop(AffineForOp forOp, unsigned loopDepth) {
   for (unsigned i = 0; i < loopDepth; ++i) {
-    assert(forOp->getBody()->front().isa<AffineForOp>());
-    AffineForOp nextForOp = forOp->getBody()->front().cast<AffineForOp>();
+    assert(forOp.getBody()->front().isa<AffineForOp>());
+    AffineForOp nextForOp = forOp.getBody()->front().cast<AffineForOp>();
     interchangeLoops(forOp, nextForOp);
   }
 }
@@ -521,12 +521,12 @@ static void augmentMapAndBounds(FuncBuilder *b, Value *iv, AffineMap *map,
 static void cloneLoopBodyInto(AffineForOp forOp, Value *oldIv,
                               AffineForOp newForOp) {
   BlockAndValueMapping map;
-  map.map(oldIv, newForOp->getInductionVar());
-  FuncBuilder b(newForOp->getBody(), newForOp->getBody()->end());
-  for (auto it = forOp->getBody()->begin(), end = forOp->getBody()->end();
+  map.map(oldIv, newForOp.getInductionVar());
+  FuncBuilder b(newForOp.getBody(), newForOp.getBody()->end());
+  for (auto it = forOp.getBody()->begin(), end = forOp.getBody()->end();
        it != end; ++it) {
     // Step over newForOp in case it is nested under forOp.
-    if (&*it == newForOp->getInstruction()) {
+    if (&*it == newForOp.getInstruction()) {
       continue;
     }
     auto *inst = b.clone(*it, map);
@@ -554,35 +554,35 @@ stripmineSink(AffineForOp forOp, uint64_t factor,
   // forOp and that targets are not nested under each other when DominanceInfo
   // exposes the capability. It seems overkill to construct a whole function
   // dominance tree at this point.
-  auto originalStep = forOp->getStep();
+  auto originalStep = forOp.getStep();
   auto scaledStep = originalStep * factor;
-  forOp->setStep(scaledStep);
+  forOp.setStep(scaledStep);
 
-  auto *forInst = forOp->getInstruction();
+  auto *forInst = forOp.getInstruction();
   FuncBuilder b(forInst->getBlock(), ++Block::iterator(forInst));
 
   // Lower-bound map creation.
-  auto lbMap = forOp->getLowerBoundMap();
-  SmallVector<Value *, 4> lbOperands(forOp->getLowerBoundOperands());
-  augmentMapAndBounds(&b, forOp->getInductionVar(), &lbMap, &lbOperands);
+  auto lbMap = forOp.getLowerBoundMap();
+  SmallVector<Value *, 4> lbOperands(forOp.getLowerBoundOperands());
+  augmentMapAndBounds(&b, forOp.getInductionVar(), &lbMap, &lbOperands);
 
   // Upper-bound map creation.
-  auto ubMap = forOp->getUpperBoundMap();
-  SmallVector<Value *, 4> ubOperands(forOp->getUpperBoundOperands());
-  augmentMapAndBounds(&b, forOp->getInductionVar(), &ubMap, &ubOperands,
+  auto ubMap = forOp.getUpperBoundMap();
+  SmallVector<Value *, 4> ubOperands(forOp.getUpperBoundOperands());
+  augmentMapAndBounds(&b, forOp.getInductionVar(), &ubMap, &ubOperands,
                       /*offset=*/scaledStep);
 
   SmallVector<AffineForOp, 8> innerLoops;
   for (auto t : targets) {
     // Insert newForOp at the end of `t`.
-    FuncBuilder b(t->getBody(), t->getBody()->end());
-    auto newForOp = b.create<AffineForOp>(t->getLoc(), lbOperands, lbMap,
+    FuncBuilder b(t.getBody(), t.getBody()->end());
+    auto newForOp = b.create<AffineForOp>(t.getLoc(), lbOperands, lbMap,
                                           ubOperands, ubMap, originalStep);
-    newForOp->createBody();
-    cloneLoopBodyInto(t, forOp->getInductionVar(), newForOp);
+    newForOp.createBody();
+    cloneLoopBodyInto(t, forOp.getInductionVar(), newForOp);
     // Remove all instructions from `t` except `newForOp`.
-    auto rit = ++newForOp->getInstruction()->getReverseIterator();
-    auto re = t->getBody()->rend();
+    auto rit = ++newForOp.getInstruction()->getReverseIterator();
+    auto re = t.getBody()->rend();
     for (auto &inst : llvm::make_early_inc_range(llvm::make_range(rit, re))) {
       inst.erase();
     }
diff --git a/mlir/lib/Transforms/Vectorization/VectorizerTestPass.cpp b/mlir/lib/Transforms/Vectorization/VectorizerTestPass.cpp
index 9c9f8593f31..f57a53d3670 100644
--- a/mlir/lib/Transforms/Vectorization/VectorizerTestPass.cpp
+++ b/mlir/lib/Transforms/Vectorization/VectorizerTestPass.cpp
@@ -231,7 +231,7 @@ static bool affineApplyOp(Instruction &inst) {
 
 static bool singleResultAffineApplyOpWithoutUses(Instruction &inst) {
   auto app = inst.dyn_cast<AffineApplyOp>();
-  return app && app->use_empty();
+  return app && app.use_empty();
 }
 
 void VectorizerTestPass::testNormalizeMaps(Function *f) {
@@ -249,8 +249,8 @@ void VectorizerTestPass::testNormalizeMaps(Function *f) {
     for (auto m : matches) {
       auto app = m.getMatchedInstruction()->cast<AffineApplyOp>();
       FuncBuilder b(m.getMatchedInstruction());
-      SmallVector<Value *, 8> operands(app->getOperands());
-      makeComposedAffineApply(&b, app->getLoc(), app->getAffineMap(), operands);
+      SmallVector<Value *, 8> operands(app.getOperands());
+      makeComposedAffineApply(&b, app.getLoc(), app.getAffineMap(), operands);
     }
   }
   // We should now be able to erase everything in reverse order in this test.
diff --git a/mlir/lib/Transforms/Vectorize.cpp b/mlir/lib/Transforms/Vectorize.cpp
index a52129ed0d6..362cad352fb 100644
--- a/mlir/lib/Transforms/Vectorize.cpp
+++ b/mlir/lib/Transforms/Vectorize.cpp
@@ -856,7 +856,7 @@ static LogicalResult vectorizeRootOrTerminal(Value *iv,
 static LogicalResult vectorizeAffineForOp(AffineForOp loop, int64_t step,
                                           VectorizationState *state) {
   using namespace functional;
-  loop->setStep(step);
+  loop.setStep(step);
 
   FilterFunctionType notVectorizedThisPattern = [state](Instruction &inst) {
     if (!matcher::isLoadOrStore(inst)) {
@@ -868,15 +868,15 @@ static LogicalResult vectorizeAffineForOp(AffineForOp loop, int64_t step,
   };
   auto loadAndStores = matcher::Op(notVectorizedThisPattern);
   SmallVector<NestedMatch, 8> loadAndStoresMatches;
-  loadAndStores.match(loop->getInstruction(), &loadAndStoresMatches);
+  loadAndStores.match(loop.getInstruction(), &loadAndStoresMatches);
   for (auto ls : loadAndStoresMatches) {
     auto *opInst = ls.getMatchedInstruction();
     auto load = opInst->dyn_cast<LoadOp>();
     auto store = opInst->dyn_cast<StoreOp>();
     LLVM_DEBUG(opInst->print(dbgs()));
     LogicalResult result =
-        load ? vectorizeRootOrTerminal(loop->getInductionVar(), load, state)
-             : vectorizeRootOrTerminal(loop->getInductionVar(), store, state);
+        load ? vectorizeRootOrTerminal(loop.getInductionVar(), load, state)
+             : vectorizeRootOrTerminal(loop.getInductionVar(), store, state);
     if (failed(result)) {
       return failure();
     }
@@ -1164,18 +1164,17 @@ static LogicalResult vectorizeRootMatch(NestedMatch m,
   /// Sets up error handling for this root loop. This is how the root match
   /// maintains a clone for handling failure and restores the proper state via
   /// RAII.
-  auto *loopInst = loop->getInstruction();
+  auto *loopInst = loop.getInstruction();
   FuncBuilder builder(loopInst);
   auto clonedLoop = builder.clone(*loopInst)->cast<AffineForOp>();
   struct Guard {
     LogicalResult failure() {
-      loop->getInductionVar()->replaceAllUsesWith(
-          clonedLoop->getInductionVar());
-      loop->erase();
+      loop.getInductionVar()->replaceAllUsesWith(clonedLoop.getInductionVar());
+      loop.erase();
       return mlir::failure();
     }
     LogicalResult success() {
-      clonedLoop->erase();
+      clonedLoop.erase();
       return mlir::success();
     }
     AffineForOp loop;